aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/bug-hunting.rst2
-rw-r--r--Documentation/devicetree/bindings/net/ethernet-controller.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml2
-rw-r--r--Documentation/netlink/specs/ovpn.yaml367
-rw-r--r--Documentation/netlink/specs/rt-addr.yaml (renamed from Documentation/netlink/specs/rt_addr.yaml)24
-rw-r--r--Documentation/netlink/specs/rt-link.yaml (renamed from Documentation/netlink/specs/rt_link.yaml)16
-rw-r--r--Documentation/netlink/specs/rt-neigh.yaml (renamed from Documentation/netlink/specs/rt_neigh.yaml)0
-rw-r--r--Documentation/netlink/specs/rt-route.yaml (renamed from Documentation/netlink/specs/rt_route.yaml)22
-rw-r--r--Documentation/netlink/specs/rt-rule.yaml (renamed from Documentation/netlink/specs/rt_rule.yaml)0
-rw-r--r--Documentation/networking/dccp.rst219
-rw-r--r--Documentation/networking/device_drivers/ethernet/meta/fbnic.rst49
-rw-r--r--Documentation/networking/devlink/devlink-info.rst4
-rw-r--r--Documentation/networking/devlink/index.rst1
-rw-r--r--Documentation/networking/devlink/ixgbe.rst122
-rw-r--r--Documentation/networking/index.rst1
-rw-r--r--Documentation/networking/ip-sysctl.rst4
-rw-r--r--Documentation/networking/net_cachelines/net_device.rst2
-rw-r--r--Documentation/networking/net_cachelines/snmp.rst2
-rw-r--r--Documentation/networking/netdevices.rst65
-rw-r--r--Documentation/networking/rxrpc.rst15
-rw-r--r--Documentation/translations/zh_CN/admin-guide/bug-hunting.rst2
-rw-r--r--Documentation/translations/zh_TW/admin-guide/bug-hunting.rst2
-rw-r--r--Documentation/userspace-api/netlink/netlink-raw.rst2
-rw-r--r--MAINTAINERS20
-rw-r--r--arch/m68k/configs/amiga_defconfig2
-rw-r--r--arch/m68k/configs/apollo_defconfig2
-rw-r--r--arch/m68k/configs/atari_defconfig2
-rw-r--r--arch/m68k/configs/bvme6000_defconfig2
-rw-r--r--arch/m68k/configs/hp300_defconfig2
-rw-r--r--arch/m68k/configs/mac_defconfig2
-rw-r--r--arch/m68k/configs/multi_defconfig2
-rw-r--r--arch/m68k/configs/mvme147_defconfig2
-rw-r--r--arch/m68k/configs/mvme16x_defconfig2
-rw-r--r--arch/m68k/configs/q40_defconfig2
-rw-r--r--arch/m68k/configs/sun3_defconfig2
-rw-r--r--arch/m68k/configs/sun3x_defconfig2
-rw-r--r--arch/mips/configs/bigsur_defconfig1
-rw-r--r--arch/mips/configs/gpr_defconfig1
-rw-r--r--arch/mips/configs/mtx1_defconfig1
-rw-r--r--arch/powerpc/configs/pmac32_defconfig1
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig1
-rw-r--r--drivers/net/Kconfig15
-rw-r--r--drivers/net/Makefile1
-rw-r--r--drivers/net/bareudp.c16
-rw-r--r--drivers/net/bonding/bond_main.c23
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c97
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h19
-rw-r--r--drivers/net/dsa/mt7530.c246
-rw-r--r--drivers/net/dsa/mt7530.h42
-rw-r--r--drivers/net/dsa/mv88e6xxx/ptp.c11
-rw-r--r--drivers/net/dsa/sja1105/sja1105_ptp.c14
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.c275
-rw-r--r--drivers/net/ethernet/airoha/airoha_eth.h30
-rw-r--r--drivers/net/ethernet/airoha/airoha_ppe.c233
-rw-r--r--drivers/net/ethernet/airoha/airoha_regs.h7
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-common.h117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-dcb.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-desc.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-dev.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-drv.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-i2c.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-main.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-mdio.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-pci.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-platform.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-ptp.c117
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe.h117
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c201
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h3
-rw-r--r--drivers/net/ethernet/brocade/bna/bnad.c2
-rw-r--r--drivers/net/ethernet/cortina/gemini.c37
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c2
-rw-r--r--drivers/net/ethernet/intel/Kconfig2
-rw-r--r--drivers/net/ethernet/intel/e1000e/e1000.h2
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c75
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c7
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h61
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c22
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c71
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h3
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c63
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp.c65
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_hw.c82
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_hw.h5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_repr.c10
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sbq_cmd.h11
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.c266
-rw-r--r--drivers/net/ethernet/intel/ice/ice_tc_lib.h11
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c17
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.c26
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.h12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c53
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ptp.c20
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c3
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ptp.c14
-rw-r--r--drivers/net/ethernet/intel/ixgbe/Makefile3
-rw-r--r--drivers/net/ethernet/intel/ixgbe/devlink/devlink.c557
-rw-r--r--drivers/net/ethernet/intel/ixgbe/devlink/devlink.h10
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h21
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_common.c1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c56
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c1310
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h18
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c86
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c12
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.c707
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.h12
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c10
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c269
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c16
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_type.h5
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h161
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c1
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c2
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c2
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h2
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c56
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c467
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h69
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c98
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c420
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c515
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h103
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c69
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h12
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic.h3
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_csr.h34
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c178
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c335
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h48
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_netdev.c47
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_pci.c1
-rw-r--r--drivers/net/ethernet/microchip/lan743x_main.c3
-rw-r--r--drivers/net/ethernet/microchip/lan743x_ptp.c14
-rw-r--r--drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c14
-rw-r--r--drivers/net/ethernet/mscc/ocelot_ptp.c5
-rw-r--r--drivers/net/ethernet/mscc/ocelot_vsc7514.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfd3/dp.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfdk/dp.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed.h1
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dbg_hsi.h31
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_debug.c25
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c19
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hsi.h52
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hw.c11
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hw.h9
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c138
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c26
-rw-r--r--drivers/net/ethernet/realtek/r8169_phy_config.c36
-rw-r--r--drivers/net/ethernet/renesas/ravb_ptp.c11
-rw-r--r--drivers/net/ethernet/rocker/rocker_ofdpa.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c25
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c42
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c14
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c46
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c9
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c11
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c43
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c88
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c57
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c11
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h3
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c28
-rw-r--r--drivers/net/ethernet/wangxun/Kconfig4
-rw-r--r--drivers/net/ethernet/wangxun/libwx/Makefile2
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c302
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.h4
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_lib.c128
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_mbx.c176
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_mbx.h77
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_sriov.c909
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_sriov.h18
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h93
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_main.c93
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c5
-rw-r--r--drivers/net/ethernet/wangxun/ngbe/ngbe_type.h3
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c21
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_main.c29
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c6
-rw-r--r--drivers/net/ethernet/wangxun/txgbe/txgbe_type.h7
-rw-r--r--drivers/net/geneve.c16
-rw-r--r--drivers/net/gtp.c18
-rw-r--r--drivers/net/hamradio/baycom_epp.c5
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c2
-rw-r--r--drivers/net/macvlan.c2
-rw-r--r--drivers/net/mdio/Kconfig7
-rw-r--r--drivers/net/mdio/Makefile1
-rw-r--r--drivers/net/mdio/mdio-realtek-rtl9300.c522
-rw-r--r--drivers/net/ovpn/Makefile22
-rw-r--r--drivers/net/ovpn/bind.c55
-rw-r--r--drivers/net/ovpn/bind.h101
-rw-r--r--drivers/net/ovpn/crypto.c210
-rw-r--r--drivers/net/ovpn/crypto.h145
-rw-r--r--drivers/net/ovpn/crypto_aead.c383
-rw-r--r--drivers/net/ovpn/crypto_aead.h29
-rw-r--r--drivers/net/ovpn/io.c446
-rw-r--r--drivers/net/ovpn/io.h34
-rw-r--r--drivers/net/ovpn/main.c274
-rw-r--r--drivers/net/ovpn/main.h14
-rw-r--r--drivers/net/ovpn/netlink-gen.c213
-rw-r--r--drivers/net/ovpn/netlink-gen.h41
-rw-r--r--drivers/net/ovpn/netlink.c1258
-rw-r--r--drivers/net/ovpn/netlink.h18
-rw-r--r--drivers/net/ovpn/ovpnpriv.h55
-rw-r--r--drivers/net/ovpn/peer.c1365
-rw-r--r--drivers/net/ovpn/peer.h163
-rw-r--r--drivers/net/ovpn/pktid.c129
-rw-r--r--drivers/net/ovpn/pktid.h86
-rw-r--r--drivers/net/ovpn/proto.h118
-rw-r--r--drivers/net/ovpn/skb.h61
-rw-r--r--drivers/net/ovpn/socket.c233
-rw-r--r--drivers/net/ovpn/socket.h49
-rw-r--r--drivers/net/ovpn/stats.c21
-rw-r--r--drivers/net/ovpn/stats.h47
-rw-r--r--drivers/net/ovpn/tcp.c598
-rw-r--r--drivers/net/ovpn/tcp.h36
-rw-r--r--drivers/net/ovpn/udp.c439
-rw-r--r--drivers/net/ovpn/udp.h25
-rw-r--r--drivers/net/phy/Kconfig2
-rw-r--r--drivers/net/phy/air_en8811h.c103
-rw-r--r--drivers/net/phy/dp83640.c13
-rw-r--r--drivers/net/phy/mediatek/Kconfig5
-rw-r--r--drivers/net/phy/mediatek/mtk-ge-soc.c61
-rw-r--r--drivers/net/phy/micrel.c17
-rw-r--r--drivers/net/phy/microchip_rds_ptp.c5
-rw-r--r--drivers/net/phy/nxp-c45-tja11xx.c13
-rw-r--r--drivers/net/phy/phy_device.c12
-rw-r--r--drivers/net/usb/asix.h1
-rw-r--r--drivers/net/usb/asix_common.c22
-rw-r--r--drivers/net/usb/asix_devices.c17
-rw-r--r--drivers/net/vxlan/vxlan_core.c18
-rw-r--r--drivers/net/vxlan/vxlan_vnifilter.c5
-rw-r--r--drivers/ptp/ptp_chardev.c16
-rw-r--r--drivers/ptp/ptp_clockmatrix.c14
-rw-r--r--drivers/ptp/ptp_fc3.c1
-rw-r--r--drivers/ptp/ptp_idt82p33.c15
-rw-r--r--fs/afs/Kconfig1
-rw-r--r--fs/afs/Makefile1
-rw-r--r--fs/afs/cm_security.c340
-rw-r--r--fs/afs/internal.h20
-rw-r--r--fs/afs/main.c1
-rw-r--r--fs/afs/misc.c27
-rw-r--r--fs/afs/rxrpc.c40
-rw-r--r--fs/afs/server.c2
-rw-r--r--include/crypto/krb5.h5
-rw-r--r--include/keys/rxrpc-type.h17
-rw-r--r--include/linux/dccp.h289
-rw-r--r--include/linux/ethtool.h9
-rw-r--r--include/linux/mm.h58
-rw-r--r--include/linux/netdevice.h22
-rw-r--r--include/linux/phy.h6
-rw-r--r--include/linux/poison.h4
-rw-r--r--include/linux/ptp_clock_kernel.h18
-rw-r--r--include/linux/skbuff.h5
-rw-r--r--include/linux/socket.h2
-rw-r--r--include/linux/stmmac.h1
-rw-r--r--include/linux/tfrc.h51
-rw-r--r--include/linux/udp.h16
-rw-r--r--include/net/af_rxrpc.h51
-rw-r--r--include/net/dropreason-core.h7
-rw-r--r--include/net/inet_hashtables.h7
-rw-r--r--include/net/ip_tunnels.h7
-rw-r--r--include/net/mptcp.h13
-rw-r--r--include/net/net_namespace.h4
-rw-r--r--include/net/netdev_lock.h16
-rw-r--r--include/net/netdev_queues.h4
-rw-r--r--include/net/netdev_rx_queue.h6
-rw-r--r--include/net/netlink.h16
-rw-r--r--include/net/netns/ipv4.h11
-rw-r--r--include/net/page_pool/helpers.h11
-rw-r--r--include/net/page_pool/types.h6
-rw-r--r--include/net/rps.h5
-rw-r--r--include/net/rstreason.h2
-rw-r--r--include/net/secure_seq.h4
-rw-r--r--include/net/sock.h1
-rw-r--r--include/net/tcp.h3
-rw-r--r--include/net/udp.h1
-rw-r--r--include/net/udp_tunnel.h15
-rw-r--r--include/net/xdp.h1
-rw-r--r--include/trace/events/afs.h11
-rw-r--r--include/trace/events/rxrpc.h163
-rw-r--r--include/trace/events/sock.h1
-rw-r--r--include/trace/events/sunrpc.h2
-rw-r--r--include/trace/events/tcp.h24
-rw-r--r--include/uapi/linux/if_bridge.h10
-rw-r--r--include/uapi/linux/if_link.h15
-rw-r--r--include/uapi/linux/ovpn.h109
-rw-r--r--include/uapi/linux/rxrpc.h77
-rw-r--r--include/uapi/linux/snmp.h1
-rw-r--r--include/uapi/linux/udp.h1
-rw-r--r--kernel/bpf/btf.c1
-rw-r--r--kernel/configs/debug.config5
-rw-r--r--mm/page_alloc.c8
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile1
-rw-r--r--net/bridge/br.c22
-rw-r--r--net/bridge/br_arp_nd_proxy.c7
-rw-r--r--net/bridge/br_input.c3
-rw-r--r--net/bridge/br_mdb.c28
-rw-r--r--net/bridge/br_private.h30
-rw-r--r--net/bridge/br_switchdev.c13
-rw-r--r--net/core/datagram.c5
-rw-r--r--net/core/dev.c93
-rw-r--r--net/core/dev.h22
-rw-r--r--net/core/fib_rules.c14
-rw-r--r--net/core/lock_debug.c4
-rw-r--r--net/core/neighbour.c8
-rw-r--r--net/core/net-procfs.c9
-rw-r--r--net/core/net_namespace.c181
-rw-r--r--net/core/netdev-genl.c73
-rw-r--r--net/core/netmem_priv.h33
-rw-r--r--net/core/page_pool.c81
-rw-r--r--net/core/pktgen.c103
-rw-r--r--net/core/rtnetlink.c19
-rw-r--r--net/core/secure_seq.c42
-rw-r--r--net/core/skbuff.c34
-rw-r--r--net/core/sock.c36
-rw-r--r--net/core/sock_diag.c2
-rw-r--r--net/core/sysctl_net_core.c6
-rw-r--r--net/core/xdp.c16
-rw-r--r--net/dccp/Kconfig46
-rw-r--r--net/dccp/Makefile30
-rw-r--r--net/dccp/ackvec.c403
-rw-r--r--net/dccp/ackvec.h136
-rw-r--r--net/dccp/ccid.c219
-rw-r--r--net/dccp/ccid.h262
-rw-r--r--net/dccp/ccids/Kconfig55
-rw-r--r--net/dccp/ccids/ccid2.c794
-rw-r--r--net/dccp/ccids/ccid2.h121
-rw-r--r--net/dccp/ccids/ccid3.c866
-rw-r--r--net/dccp/ccids/ccid3.h148
-rw-r--r--net/dccp/ccids/lib/loss_interval.c184
-rw-r--r--net/dccp/ccids/lib/loss_interval.h69
-rw-r--r--net/dccp/ccids/lib/packet_history.c439
-rw-r--r--net/dccp/ccids/lib/packet_history.h142
-rw-r--r--net/dccp/ccids/lib/tfrc.c46
-rw-r--r--net/dccp/ccids/lib/tfrc.h73
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c702
-rw-r--r--net/dccp/dccp.h483
-rw-r--r--net/dccp/diag.c85
-rw-r--r--net/dccp/feat.c1581
-rw-r--r--net/dccp/feat.h133
-rw-r--r--net/dccp/input.c739
-rw-r--r--net/dccp/ipv4.c1101
-rw-r--r--net/dccp/ipv6.c1174
-rw-r--r--net/dccp/ipv6.h27
-rw-r--r--net/dccp/minisocks.c266
-rw-r--r--net/dccp/options.c609
-rw-r--r--net/dccp/output.c708
-rw-r--r--net/dccp/proto.c1293
-rw-r--r--net/dccp/qpolicy.c136
-rw-r--r--net/dccp/sysctl.c107
-rw-r--r--net/dccp/timer.c272
-rw-r--r--net/dccp/trace.h82
-rw-r--r--net/devlink/dev.c2
-rw-r--r--net/hsr/hsr_device.c5
-rw-r--r--net/hsr/hsr_main.c9
-rw-r--r--net/hsr/hsr_main.h1
-rw-r--r--net/hsr/hsr_slave.c2
-rw-r--r--net/ipv4/Kconfig2
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/devinet.c4
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/inet_connection_sock.c23
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_hashtables.c30
-rw-r--r--net/ipv4/inet_timewait_sock.c4
-rw-r--r--net/ipv4/ip_gre.c27
-rw-r--r--net/ipv4/ip_tunnel.c25
-rw-r--r--net/ipv4/ip_vti.c9
-rw-r--r--net/ipv4/ipip.c9
-rw-r--r--net/ipv4/ipmr.c8
-rw-r--r--net/ipv4/nexthop.c13
-rw-r--r--net/ipv4/proc.c1
-rw-r--r--net/ipv4/route.c7
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_ipv4.c3
-rw-r--r--net/ipv4/tcp_minisocks.c9
-rw-r--r--net/ipv4/udp.c52
-rw-r--r--net/ipv4/udp_offload.c172
-rw-r--r--net/ipv4/udp_tunnel_core.c15
-rw-r--r--net/ipv6/addrconf.c12
-rw-r--r--net/ipv6/addrlabel.c8
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/ioam6_iptunnel.c76
-rw-r--r--net/ipv6/ip6_gre.c22
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ip6_tunnel.c24
-rw-r--r--net/ipv6/ip6_vti.c27
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/ipv6/sit.c23
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/ipv6/udp.c2
-rw-r--r--net/ipv6/udp_offload.c5
-rw-r--r--net/mpls/af_mpls.c8
-rw-r--r--net/mptcp/mib.c1
-rw-r--r--net/mptcp/mib.h1
-rw-r--r--net/mptcp/pm.c5
-rw-r--r--net/mptcp/protocol.c4
-rw-r--r--net/mptcp/protocol.h1
-rw-r--r--net/mptcp/sched.c35
-rw-r--r--net/mptcp/subflow.c12
-rw-r--r--net/ncsi/internal.h21
-rw-r--r--net/ncsi/ncsi-pkt.h23
-rw-r--r--net/ncsi/ncsi-rsp.c21
-rw-r--r--net/rxrpc/Kconfig23
-rw-r--r--net/rxrpc/Makefile6
-rw-r--r--net/rxrpc/af_rxrpc.c93
-rw-r--r--net/rxrpc/ar-internal.h82
-rw-r--r--net/rxrpc/call_accept.c34
-rw-r--r--net/rxrpc/call_object.c24
-rw-r--r--net/rxrpc/conn_event.c134
-rw-r--r--net/rxrpc/conn_object.c2
-rw-r--r--net/rxrpc/insecure.c13
-rw-r--r--net/rxrpc/io_thread.c12
-rw-r--r--net/rxrpc/key.c187
-rw-r--r--net/rxrpc/oob.c379
-rw-r--r--net/rxrpc/output.c60
-rw-r--r--net/rxrpc/peer_object.c22
-rw-r--r--net/rxrpc/protocol.h20
-rw-r--r--net/rxrpc/recvmsg.c132
-rw-r--r--net/rxrpc/rxgk.c1367
-rw-r--r--net/rxrpc/rxgk_app.c285
-rw-r--r--net/rxrpc/rxgk_common.h139
-rw-r--r--net/rxrpc/rxgk_kdf.c288
-rw-r--r--net/rxrpc/rxkad.c296
-rw-r--r--net/rxrpc/rxperf.c78
-rw-r--r--net/rxrpc/security.c3
-rw-r--r--net/rxrpc/sendmsg.c25
-rw-r--r--net/rxrpc/server_key.c42
-rw-r--r--net/sched/act_api.c16
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/node.c2
-rw-r--r--net/unix/af_unix.c8
-rw-r--r--net/xdp/xsk_buff_pool.c6
-rw-r--r--net/xfrm/xfrm_interface_core.c34
-rw-r--r--samples/bpf/sockex2_kern.c1
-rwxr-xr-xscripts/checkpatch.pl2
-rw-r--r--security/lsm_audit.c19
-rw-r--r--security/selinux/hooks.c41
-rw-r--r--security/selinux/include/classmap.h2
-rw-r--r--security/selinux/nlmsgtab.c1
-rw-r--r--security/smack/smack_lsm.c9
-rw-r--r--tools/net/ynl/Makefile.deps2
-rw-r--r--tools/net/ynl/generated/Makefile2
-rw-r--r--tools/net/ynl/lib/ynl-priv.h3
-rw-r--r--tools/net/ynl/lib/ynl.c59
-rw-r--r--tools/net/ynl/lib/ynl.h3
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py55
-rw-r--r--tools/net/ynl/samples/.gitignore4
-rw-r--r--tools/net/ynl/samples/rt-addr.c80
-rw-r--r--tools/net/ynl/samples/rt-route.c80
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/net/config1
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py4
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c21
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_diag.c7
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh26
-rw-r--r--tools/testing/selftests/net/ovpn/.gitignore2
-rw-r--r--tools/testing/selftests/net/ovpn/Makefile31
-rw-r--r--tools/testing/selftests/net/ovpn/common.sh92
-rw-r--r--tools/testing/selftests/net/ovpn/config10
-rw-r--r--tools/testing/selftests/net/ovpn/data64.key5
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c2376
-rw-r--r--tools/testing/selftests/net/ovpn/tcp_peers.txt5
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-chachapoly.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket.sh45
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-float.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test.sh113
-rw-r--r--tools/testing/selftests/net/ovpn/udp_peers.txt5
-rw-r--r--tools/testing/selftests/net/reuseport_addr_any.c36
-rwxr-xr-xtools/testing/selftests/net/test_bridge_neigh_suppress.sh125
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh4
509 files changed, 25517 insertions, 19555 deletions
diff --git a/Documentation/admin-guide/bug-hunting.rst b/Documentation/admin-guide/bug-hunting.rst
index ce6f4e8ca487..30858757c9f2 100644
--- a/Documentation/admin-guide/bug-hunting.rst
+++ b/Documentation/admin-guide/bug-hunting.rst
@@ -196,7 +196,7 @@ will see the assembler code for the routine shown, but if your kernel has
debug symbols the C code will also be available. (Debug symbols can be enabled
in the kernel hacking menu of the menu configuration.) For example::
- $ objdump -r -S -l --disassemble net/dccp/ipv4.o
+ $ objdump -r -S -l --disassemble net/ipv4/tcp.o
.. note::
diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index 45819b235800..61e51ea58a98 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -197,7 +197,7 @@ properties:
description:
Link speed.
$ref: /schemas/types.yaml#/definitions/uint32
- enum: [10, 100, 1000, 2500, 10000]
+ enum: [10, 100, 1000, 2500, 5000, 10000]
full-duplex:
$ref: /schemas/types.yaml#/definitions/flag
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index b11894fbaec4..7b3d948f187d 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -143,6 +143,8 @@ properties:
label:
description: label associated with this port
+ fixed-link: true
+
ti,mac-only:
$ref: /schemas/types.yaml#/definitions/flag
description:
diff --git a/Documentation/netlink/specs/ovpn.yaml b/Documentation/netlink/specs/ovpn.yaml
new file mode 100644
index 000000000000..096c51f0c69a
--- /dev/null
+++ b/Documentation/netlink/specs/ovpn.yaml
@@ -0,0 +1,367 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+#
+# Copyright (c) 2024-2025, OpenVPN Inc.
+#
+
+name: ovpn
+
+protocol: genetlink
+
+doc: Netlink protocol to control OpenVPN network devices
+
+definitions:
+ -
+ type: const
+ name: nonce-tail-size
+ value: 8
+ -
+ type: enum
+ name: cipher-alg
+ entries: [ none, aes-gcm, chacha20-poly1305 ]
+ -
+ type: enum
+ name: del-peer-reason
+ entries:
+ - teardown
+ - userspace
+ - expired
+ - transport-error
+ - transport-disconnect
+ -
+ type: enum
+ name: key-slot
+ entries: [ primary, secondary ]
+
+attribute-sets:
+ -
+ name: peer
+ attributes:
+ -
+ name: id
+ type: u32
+ doc: >-
+ The unique ID of the peer in the device context. To be used to identify
+ peers during operations for a specific device
+ checks:
+ max: 0xFFFFFF
+ -
+ name: remote-ipv4
+ type: u32
+ doc: The remote IPv4 address of the peer
+ byte-order: big-endian
+ display-hint: ipv4
+ -
+ name: remote-ipv6
+ type: binary
+ doc: The remote IPv6 address of the peer
+ display-hint: ipv6
+ checks:
+ exact-len: 16
+ -
+ name: remote-ipv6-scope-id
+ type: u32
+ doc: The scope id of the remote IPv6 address of the peer (RFC2553)
+ -
+ name: remote-port
+ type: u16
+ doc: The remote port of the peer
+ byte-order: big-endian
+ checks:
+ min: 1
+ -
+ name: socket
+ type: u32
+ doc: The socket to be used to communicate with the peer
+ -
+ name: socket-netnsid
+ type: s32
+ doc: The ID of the netns the socket assigned to this peer lives in
+ -
+ name: vpn-ipv4
+ type: u32
+ doc: The IPv4 address assigned to the peer by the server
+ byte-order: big-endian
+ display-hint: ipv4
+ -
+ name: vpn-ipv6
+ type: binary
+ doc: The IPv6 address assigned to the peer by the server
+ display-hint: ipv6
+ checks:
+ exact-len: 16
+ -
+ name: local-ipv4
+ type: u32
+ doc: The local IPv4 to be used to send packets to the peer (UDP only)
+ byte-order: big-endian
+ display-hint: ipv4
+ -
+ name: local-ipv6
+ type: binary
+ doc: The local IPv6 to be used to send packets to the peer (UDP only)
+ display-hint: ipv6
+ checks:
+ exact-len: 16
+ -
+ name: local-port
+ type: u16
+ doc: The local port to be used to send packets to the peer (UDP only)
+ byte-order: big-endian
+ checks:
+ min: 1
+ -
+ name: keepalive-interval
+ type: u32
+ doc: >-
+ The number of seconds after which a keep alive message is sent to the
+ peer
+ -
+ name: keepalive-timeout
+ type: u32
+ doc: >-
+ The number of seconds from the last activity after which the peer is
+ assumed dead
+ -
+ name: del-reason
+ type: u32
+ doc: The reason why a peer was deleted
+ enum: del-peer-reason
+ -
+ name: vpn-rx-bytes
+ type: uint
+ doc: Number of bytes received over the tunnel
+ -
+ name: vpn-tx-bytes
+ type: uint
+ doc: Number of bytes transmitted over the tunnel
+ -
+ name: vpn-rx-packets
+ type: uint
+ doc: Number of packets received over the tunnel
+ -
+ name: vpn-tx-packets
+ type: uint
+ doc: Number of packets transmitted over the tunnel
+ -
+ name: link-rx-bytes
+ type: uint
+ doc: Number of bytes received at the transport level
+ -
+ name: link-tx-bytes
+ type: uint
+ doc: Number of bytes transmitted at the transport level
+ -
+ name: link-rx-packets
+ type: uint
+ doc: Number of packets received at the transport level
+ -
+ name: link-tx-packets
+ type: uint
+ doc: Number of packets transmitted at the transport level
+ -
+ name: keyconf
+ attributes:
+ -
+ name: peer-id
+ type: u32
+ doc: >-
+ The unique ID of the peer in the device context. To be used to
+ identify peers during key operations
+ checks:
+ max: 0xFFFFFF
+ -
+ name: slot
+ type: u32
+ doc: The slot where the key should be stored
+ enum: key-slot
+ -
+ name: key-id
+ doc: >-
+ The unique ID of the key in the peer context. Used to fetch the
+ correct key upon decryption
+ type: u32
+ checks:
+ max: 7
+ -
+ name: cipher-alg
+ type: u32
+ doc: The cipher to be used when communicating with the peer
+ enum: cipher-alg
+ -
+ name: encrypt-dir
+ type: nest
+ doc: Key material for encrypt direction
+ nested-attributes: keydir
+ -
+ name: decrypt-dir
+ type: nest
+ doc: Key material for decrypt direction
+ nested-attributes: keydir
+ -
+ name: keydir
+ attributes:
+ -
+ name: cipher-key
+ type: binary
+ doc: The actual key to be used by the cipher
+ checks:
+ max-len: 256
+ -
+ name: nonce-tail
+ type: binary
+ doc: >-
+ Random nonce to be concatenated to the packet ID, in order to
+ obtain the actual cipher IV
+ checks:
+ exact-len: nonce-tail-size
+ -
+ name: ovpn
+ attributes:
+ -
+ name: ifindex
+ type: u32
+ doc: Index of the ovpn interface to operate on
+ -
+ name: peer
+ type: nest
+ doc: >-
+ The peer object containing the attributed of interest for the specific
+ operation
+ nested-attributes: peer
+ -
+ name: keyconf
+ type: nest
+ doc: Peer specific cipher configuration
+ nested-attributes: keyconf
+
+operations:
+ list:
+ -
+ name: peer-new
+ attribute-set: ovpn
+ flags: [ admin-perm ]
+ doc: Add a remote peer
+ do:
+ pre: ovpn-nl-pre-doit
+ post: ovpn-nl-post-doit
+ request:
+ attributes:
+ - ifindex
+ - peer
+ -
+ name: peer-set
+ attribute-set: ovpn
+ flags: [ admin-perm ]
+ doc: modify a remote peer
+ do:
+ pre: ovpn-nl-pre-doit
+ post: ovpn-nl-post-doit
+ request:
+ attributes:
+ - ifindex
+ - peer
+ -
+ name: peer-get
+ attribute-set: ovpn
+ flags: [ admin-perm ]
+ doc: Retrieve data about existing remote peers (or a specific one)
+ do:
+ pre: ovpn-nl-pre-doit
+ post: ovpn-nl-post-doit
+ request:
+ attributes:
+ - ifindex
+ - peer
+ reply:
+ attributes:
+ - peer
+ dump:
+ request:
+ attributes:
+ - ifindex
+ reply:
+ attributes:
+ - peer
+ -
+ name: peer-del
+ attribute-set: ovpn
+ flags: [ admin-perm ]
+ doc: Delete existing remote peer
+ do:
+ pre: ovpn-nl-pre-doit
+ post: ovpn-nl-post-doit
+ request:
+ attributes:
+ - ifindex
+ - peer
+ -
+ name: peer-del-ntf
+ doc: Notification about a peer being deleted
+ notify: peer-get
+ mcgrp: peers
+
+ -
+ name: key-new
+ attribute-set: ovpn
+ flags: [ admin-perm ]
+ doc: Add a cipher key for a specific peer
+ do:
+ pre: ovpn-nl-pre-doit
+ post: ovpn-nl-post-doit
+ request:
+ attributes:
+ - ifindex
+ - keyconf
+ -
+ name: key-get
+ attribute-set: ovpn
+ flags: [ admin-perm ]
+ doc: Retrieve non-sensitive data about peer key and cipher
+ do:
+ pre: ovpn-nl-pre-doit
+ post: ovpn-nl-post-doit
+ request:
+ attributes:
+ - ifindex
+ - keyconf
+ reply:
+ attributes:
+ - keyconf
+ -
+ name: key-swap
+ attribute-set: ovpn
+ flags: [ admin-perm ]
+ doc: Swap primary and secondary session keys for a specific peer
+ do:
+ pre: ovpn-nl-pre-doit
+ post: ovpn-nl-post-doit
+ request:
+ attributes:
+ - ifindex
+ - keyconf
+ -
+ name: key-swap-ntf
+ notify: key-get
+ doc: >-
+ Notification about key having exhausted its IV space and requiring
+ renegotiation
+ mcgrp: peers
+ -
+ name: key-del
+ attribute-set: ovpn
+ flags: [ admin-perm ]
+ doc: Delete cipher key for a specific peer
+ do:
+ pre: ovpn-nl-pre-doit
+ post: ovpn-nl-post-doit
+ request:
+ attributes:
+ - ifindex
+ - keyconf
+
+mcast-groups:
+ list:
+ -
+ name: peers
diff --git a/Documentation/netlink/specs/rt_addr.yaml b/Documentation/netlink/specs/rt-addr.yaml
index df6b23f06a22..4f86aa1075da 100644
--- a/Documentation/netlink/specs/rt_addr.yaml
+++ b/Documentation/netlink/specs/rt-addr.yaml
@@ -2,6 +2,7 @@
name: rt-addr
protocol: netlink-raw
+uapi-header: linux/rtnetlink.h
protonum: 0
doc:
@@ -49,6 +50,8 @@ definitions:
-
name: ifa-flags
type: flags
+ name-prefix: ifa-f-
+ enum-name:
entries:
-
name: secondary
@@ -124,6 +127,7 @@ attribute-sets:
operations:
fixed-header: ifaddrmsg
enum-model: directional
+ name-prefix: rtm-
list:
-
name: newaddr
@@ -133,11 +137,6 @@ operations:
request:
value: 20
attributes: &ifaddr-all
- - ifa-family
- - ifa-flags
- - ifa-prefixlen
- - ifa-scope
- - ifa-index
- address
- label
- local
@@ -150,11 +149,6 @@ operations:
request:
value: 21
attributes:
- - ifa-family
- - ifa-flags
- - ifa-prefixlen
- - ifa-scope
- - ifa-index
- address
- local
-
@@ -164,8 +158,7 @@ operations:
dump:
request:
value: 22
- attributes:
- - ifa-index
+ attributes: []
reply:
value: 20
attributes: *ifaddr-all
@@ -177,9 +170,7 @@ operations:
do:
request:
value: 58
- attributes:
- - ifa-family
- - ifa-index
+ attributes: []
reply:
value: 58
attributes: &mcaddr-attrs
@@ -188,8 +179,7 @@ operations:
dump:
request:
value: 58
- attributes:
- - ifa-family
+ attributes: []
reply:
value: 58
attributes: *mcaddr-attrs
diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt-link.yaml
index 6b9d5ee87d93..726dfa083d14 100644
--- a/Documentation/netlink/specs/rt_link.yaml
+++ b/Documentation/netlink/specs/rt-link.yaml
@@ -938,6 +938,12 @@ definitions:
entries:
- name: none
- name: default
+ -
+ name: ovpn-mode
+ type: enum
+ entries:
+ - p2p
+ - mp
attribute-sets:
-
@@ -2279,6 +2285,13 @@ attribute-sets:
-
name: tailroom
type: u16
+ -
+ name: linkinfo-ovpn-attrs
+ attributes:
+ -
+ name: mode
+ type: u8
+ enum: ovpn-mode
sub-messages:
-
@@ -2329,6 +2342,9 @@ sub-messages:
-
value: netkit
attribute-set: linkinfo-netkit-attrs
+ -
+ value: ovpn
+ attribute-set: linkinfo-ovpn-attrs
-
name: linkinfo-member-data-msg
formats:
diff --git a/Documentation/netlink/specs/rt_neigh.yaml b/Documentation/netlink/specs/rt-neigh.yaml
index a843caa72259..a843caa72259 100644
--- a/Documentation/netlink/specs/rt_neigh.yaml
+++ b/Documentation/netlink/specs/rt-neigh.yaml
diff --git a/Documentation/netlink/specs/rt_route.yaml b/Documentation/netlink/specs/rt-route.yaml
index 292469c7d4b9..800f3a823d47 100644
--- a/Documentation/netlink/specs/rt_route.yaml
+++ b/Documentation/netlink/specs/rt-route.yaml
@@ -2,6 +2,7 @@
name: rt-route
protocol: netlink-raw
+uapi-header: linux/rtnetlink.h
protonum: 0
doc:
@@ -11,6 +12,7 @@ definitions:
-
name: rtm-type
name-prefix: rtn-
+ enum-name:
type: enum
entries:
- unspec
@@ -245,21 +247,19 @@ attribute-sets:
operations:
enum-model: directional
+ fixed-header: rtmsg
+ name-prefix: rtm-
list:
-
name: getroute
doc: Dump route information.
attribute-set: route-attrs
- fixed-header: rtmsg
do:
request:
value: 26
attributes:
- - rtm-family
- src
- - rtm-src-len
- dst
- - rtm-dst-len
- iif
- oif
- ip-proto
@@ -271,15 +271,6 @@ operations:
reply:
value: 24
attributes: &all-route-attrs
- - rtm-family
- - rtm-dst-len
- - rtm-src-len
- - rtm-tos
- - rtm-table
- - rtm-protocol
- - rtm-scope
- - rtm-type
- - rtm-flags
- dst
- src
- iif
@@ -311,8 +302,7 @@ operations:
dump:
request:
value: 26
- attributes:
- - rtm-family
+ attributes: []
reply:
value: 24
attributes: *all-route-attrs
@@ -320,7 +310,6 @@ operations:
name: newroute
doc: Create a new route
attribute-set: route-attrs
- fixed-header: rtmsg
do:
request:
value: 24
@@ -329,7 +318,6 @@ operations:
name: delroute
doc: Delete an existing route
attribute-set: route-attrs
- fixed-header: rtmsg
do:
request:
value: 25
diff --git a/Documentation/netlink/specs/rt_rule.yaml b/Documentation/netlink/specs/rt-rule.yaml
index de0938d36541..de0938d36541 100644
--- a/Documentation/netlink/specs/rt_rule.yaml
+++ b/Documentation/netlink/specs/rt-rule.yaml
diff --git a/Documentation/networking/dccp.rst b/Documentation/networking/dccp.rst
deleted file mode 100644
index 91e5c33ba3ff..000000000000
--- a/Documentation/networking/dccp.rst
+++ /dev/null
@@ -1,219 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-=============
-DCCP protocol
-=============
-
-
-.. Contents
- - Introduction
- - Missing features
- - Socket options
- - Sysctl variables
- - IOCTLs
- - Other tunables
- - Notes
-
-
-Introduction
-============
-Datagram Congestion Control Protocol (DCCP) is an unreliable, connection
-oriented protocol designed to solve issues present in UDP and TCP, particularly
-for real-time and multimedia (streaming) traffic.
-It divides into a base protocol (RFC 4340) and pluggable congestion control
-modules called CCIDs. Like pluggable TCP congestion control, at least one CCID
-needs to be enabled in order for the protocol to function properly. In the Linux
-implementation, this is the TCP-like CCID2 (RFC 4341). Additional CCIDs, such as
-the TCP-friendly CCID3 (RFC 4342), are optional.
-For a brief introduction to CCIDs and suggestions for choosing a CCID to match
-given applications, see section 10 of RFC 4340.
-
-It has a base protocol and pluggable congestion control IDs (CCIDs).
-
-DCCP is a Proposed Standard (RFC 2026), and the homepage for DCCP as a protocol
-is at http://www.ietf.org/html.charters/dccp-charter.html
-
-
-Missing features
-================
-The Linux DCCP implementation does not currently support all the features that are
-specified in RFCs 4340...42.
-
-The known bugs are at:
-
- http://www.linuxfoundation.org/collaborate/workgroups/networking/todo#DCCP
-
-For more up-to-date versions of the DCCP implementation, please consider using
-the experimental DCCP test tree; instructions for checking this out are on:
-http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp_testing#Experimental_DCCP_source_tree
-
-
-Socket options
-==============
-DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes
-a policy ID as argument and can only be set before the connection (i.e. changes
-during an established connection are not supported). Currently, two policies are
-defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special,
-and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an
-u32 priority value as ancillary data to sendmsg(), where higher numbers indicate
-a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to
-be formatted using a cmsg(3) message header filled in as follows::
-
- cmsg->cmsg_level = SOL_DCCP;
- cmsg->cmsg_type = DCCP_SCM_PRIORITY;
- cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); /* or CMSG_LEN(4) */
-
-DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero
-value is always interpreted as unbounded queue length. If different from zero,
-the interpretation of this parameter depends on the current dequeuing policy
-(see above): the "simple" policy will enforce a fixed queue size by returning
-EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the
-lowest-priority packet first. The default value for this parameter is
-initialised from /proc/sys/net/dccp/default/tx_qlen.
-
-DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
-service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
-the socket will fall back to 0 (which means that no meaningful service code
-is present). On active sockets this is set before connect(); specifying more
-than one code has no effect (all subsequent service codes are ignored). The
-case is different for passive sockets, where multiple service codes (up to 32)
-can be set before calling bind().
-
-DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet
-size (application payload size) in bytes, see RFC 4340, section 14.
-
-DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs
-supported by the endpoint. The option value is an array of type uint8_t whose
-size is passed as option length. The minimum array size is 4 elements, the
-value returned in the optlen argument always reflects the true number of
-built-in CCIDs.
-
-DCCP_SOCKOPT_CCID is write-only and sets both the TX and RX CCIDs at the same
-time, combining the operation of the next two socket options. This option is
-preferable over the latter two, since often applications will use the same
-type of CCID for both directions; and mixed use of CCIDs is not currently well
-understood. This socket option takes as argument at least one uint8_t value, or
-an array of uint8_t values, which must match available CCIDS (see above). CCIDs
-must be registered on the socket before calling connect() or listen().
-
-DCCP_SOCKOPT_TX_CCID is read/write. It returns the current CCID (if set) or sets
-the preference list for the TX CCID, using the same format as DCCP_SOCKOPT_CCID.
-Please note that the getsockopt argument type here is ``int``, not uint8_t.
-
-DCCP_SOCKOPT_RX_CCID is analogous to DCCP_SOCKOPT_TX_CCID, but for the RX CCID.
-
-DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold
-timewait state when closing the connection (RFC 4340, 8.3). The usual case is
-that the closing server sends a CloseReq, whereupon the client holds timewait
-state. When this boolean socket option is on, the server sends a Close instead
-and will enter TIMEWAIT. This option must be set after accept() returns.
-
-DCCP_SOCKOPT_SEND_CSCOV and DCCP_SOCKOPT_RECV_CSCOV are used for setting the
-partial checksum coverage (RFC 4340, sec. 9.2). The default is that checksums
-always cover the entire packet and that only fully covered application data is
-accepted by the receiver. Hence, when using this feature on the sender, it must
-be enabled at the receiver, too with suitable choice of CsCov.
-
-DCCP_SOCKOPT_SEND_CSCOV sets the sender checksum coverage. Values in the
- range 0..15 are acceptable. The default setting is 0 (full coverage),
- values between 1..15 indicate partial coverage.
-
-DCCP_SOCKOPT_RECV_CSCOV is for the receiver and has a different meaning: it
- sets a threshold, where again values 0..15 are acceptable. The default
- of 0 means that all packets with a partial coverage will be discarded.
- Values in the range 1..15 indicate that packets with minimally such a
- coverage value are also acceptable. The higher the number, the more
- restrictive this setting (see [RFC 4340, sec. 9.2.1]). Partial coverage
- settings are inherited to the child socket after accept().
-
-The following two options apply to CCID 3 exclusively and are getsockopt()-only.
-In either case, a TFRC info struct (defined in <linux/tfrc.h>) is returned.
-
-DCCP_SOCKOPT_CCID_RX_INFO
- Returns a ``struct tfrc_rx_info`` in optval; the buffer for optval and
- optlen must be set to at least sizeof(struct tfrc_rx_info).
-
-DCCP_SOCKOPT_CCID_TX_INFO
- Returns a ``struct tfrc_tx_info`` in optval; the buffer for optval and
- optlen must be set to at least sizeof(struct tfrc_tx_info).
-
-On unidirectional connections it is useful to close the unused half-connection
-via shutdown (SHUT_WR or SHUT_RD): this will reduce per-packet processing costs.
-
-
-Sysctl variables
-================
-Several DCCP default parameters can be managed by the following sysctls
-(sysctl net.dccp.default or /proc/sys/net/dccp/default):
-
-request_retries
- The number of active connection initiation retries (the number of
- Requests minus one) before timing out. In addition, it also governs
- the behaviour of the other, passive side: this variable also sets
- the number of times DCCP repeats sending a Response when the initial
- handshake does not progress from RESPOND to OPEN (i.e. when no Ack
- is received after the initial Request). This value should be greater
- than 0, suggested is less than 10. Analogue of tcp_syn_retries.
-
-retries1
- How often a DCCP Response is retransmitted until the listening DCCP
- side considers its connecting peer dead. Analogue of tcp_retries1.
-
-retries2
- The number of times a general DCCP packet is retransmitted. This has
- importance for retransmitted acknowledgments and feature negotiation,
- data packets are never retransmitted. Analogue of tcp_retries2.
-
-tx_ccid = 2
- Default CCID for the sender-receiver half-connection. Depending on the
- choice of CCID, the Send Ack Vector feature is enabled automatically.
-
-rx_ccid = 2
- Default CCID for the receiver-sender half-connection; see tx_ccid.
-
-seq_window = 100
- The initial sequence window (sec. 7.5.2) of the sender. This influences
- the local ackno validity and the remote seqno validity windows (7.5.1).
- Values in the range Wmin = 32 (RFC 4340, 7.5.2) up to 2^32-1 can be set.
-
-tx_qlen = 5
- The size of the transmit buffer in packets. A value of 0 corresponds
- to an unbounded transmit buffer.
-
-sync_ratelimit = 125 ms
- The timeout between subsequent DCCP-Sync packets sent in response to
- sequence-invalid packets on the same socket (RFC 4340, 7.5.4). The unit
- of this parameter is milliseconds; a value of 0 disables rate-limiting.
-
-
-IOCTLS
-======
-FIONREAD
- Works as in udp(7): returns in the ``int`` argument pointer the size of
- the next pending datagram in bytes, or 0 when no datagram is pending.
-
-SIOCOUTQ
- Returns the number of unsent data bytes in the socket send queue as ``int``
- into the buffer specified by the argument pointer.
-
-Other tunables
-==============
-Per-route rto_min support
- CCID-2 supports the RTAX_RTO_MIN per-route setting for the minimum value
- of the RTO timer. This setting can be modified via the 'rto_min' option
- of iproute2; for example::
-
- > ip route change 10.0.0.0/24 rto_min 250j dev wlan0
- > ip route add 10.0.0.254/32 rto_min 800j dev wlan0
- > ip route show dev wlan0
-
- CCID-3 also supports the rto_min setting: it is used to define the lower
- bound for the expiry of the nofeedback timer. This can be useful on LANs
- with very low RTTs (e.g., loopback, Gbit ethernet).
-
-
-Notes
-=====
-DCCP does not travel through NAT successfully at present on many boxes. This is
-because the checksum covers the pseudo-header as per TCP and UDP. Linux NAT
-support for DCCP has been added.
diff --git a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst
index 04e0595bb0a7..3483e498c08e 100644
--- a/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst
+++ b/Documentation/networking/device_drivers/ethernet/meta/fbnic.rst
@@ -31,6 +31,46 @@ separate entry.
Statistics
----------
+TX MAC Interface
+~~~~~~~~~~~~~~~~
+
+ - ``ptp_illegal_req``: packets sent to the NIC with PTP request bit set but routed to BMC/FW
+ - ``ptp_good_ts``: packets successfully routed to MAC with PTP request bit set
+ - ``ptp_bad_ts``: packets destined for MAC with PTP request bit set but aborted because of some error (e.g., DMA read error)
+
+TX Extension (TEI) Interface (TTI)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ - ``tti_cm_drop``: control messages dropped at the TX Extension (TEI) Interface because of credit starvation
+ - ``tti_frame_drop``: packets dropped at the TX Extension (TEI) Interface because of credit starvation
+ - ``tti_tbi_drop``: packets dropped at the TX BMC Interface (TBI) because of credit starvation
+
+RXB (RX Buffer) Enqueue
+~~~~~~~~~~~~~~~~~~~~~~~
+
+ - ``rxb_integrity_err[i]``: frames enqueued with integrity errors (e.g., multi-bit ECC errors) on RXB input i
+ - ``rxb_mac_err[i]``: frames enqueued with MAC end-of-frame errors (e.g., bad FCS) on RXB input i
+ - ``rxb_parser_err[i]``: frames experienced RPC parser errors
+ - ``rxb_frm_err[i]``: frames experienced signaling errors (e.g., missing end-of-packet/start-of-packet) on RXB input i
+ - ``rxb_drbo[i]_frames``: frames received at RXB input i
+ - ``rxb_drbo[i]_bytes``: bytes received at RXB input i
+
+RXB (RX Buffer) FIFO
+~~~~~~~~~~~~~~~~~~~~
+
+ - ``rxb_fifo[i]_drop``: transitions into the drop state on RXB pool i
+ - ``rxb_fifo[i]_dropped_frames``: frames dropped on RXB pool i
+ - ``rxb_fifo[i]_ecn``: transitions into the ECN mark state on RXB pool i
+ - ``rxb_fifo[i]_level``: current occupancy of RXB pool i
+
+RXB (RX Buffer) Dequeue
+~~~~~~~~~~~~~~~~~~~~~~~
+
+ - ``rxb_intf[i]_frames``: frames sent to the output i
+ - ``rxb_intf[i]_bytes``: bytes sent to the output i
+ - ``rxb_pbuf[i]_frames``: frames sent to output i from the perspective of internal packet buffer
+ - ``rxb_pbuf[i]_bytes``: bytes sent to output i from the perspective of internal packet buffer
+
RPC (Rx parser)
~~~~~~~~~~~~~~~
@@ -44,6 +84,15 @@ RPC (Rx parser)
- ``rpc_out_of_hdr_err``: frames where header was larger than parsable region
- ``ovr_size_err``: oversized frames
+Hardware Queues
+~~~~~~~~~~~~~~~
+
+1. RX DMA Engine:
+
+ - ``rde_[i]_pkt_err``: packets with MAC EOP, RPC parser, RXB truncation, or RDE frame truncation errors. These error are flagged in the packet metadata because of cut-through support but the actual drop happens once PCIE/RDE is reached.
+ - ``rde_[i]_pkt_cq_drop``: packets dropped because RCQ is full
+ - ``rde_[i]_pkt_bdq_drop``: packets dropped because HPQ or PPQ ran out of host buffer
+
PCIe
~~~~
diff --git a/Documentation/networking/devlink/devlink-info.rst b/Documentation/networking/devlink/devlink-info.rst
index 23073bc219d8..dd6adc4d0559 100644
--- a/Documentation/networking/devlink/devlink-info.rst
+++ b/Documentation/networking/devlink/devlink-info.rst
@@ -86,6 +86,10 @@ In case software/firmware components are loaded from the disk (e.g.
``/lib/firmware``) only the running version should be reported via
the kernel API.
+Please note that any security versions reported via devlink are purely
+informational. Devlink does not use a secure channel to communicate with
+the device.
+
Generic Versions
================
diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst
index 948c8c44e233..8319f43b5933 100644
--- a/Documentation/networking/devlink/index.rst
+++ b/Documentation/networking/devlink/index.rst
@@ -84,6 +84,7 @@ parameters, info versions, and other features it supports.
i40e
ionic
ice
+ ixgbe
mlx4
mlx5
mlxsw
diff --git a/Documentation/networking/devlink/ixgbe.rst b/Documentation/networking/devlink/ixgbe.rst
new file mode 100644
index 000000000000..3fce291348fa
--- /dev/null
+++ b/Documentation/networking/devlink/ixgbe.rst
@@ -0,0 +1,122 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+ixgbe devlink support
+=====================
+
+This document describes the devlink features implemented by the ``ixgbe``
+device driver.
+
+Info versions
+=============
+
+Any of the versions dealing with the security presented by ``devlink-info``
+is purely informational. Devlink does not use a secure channel to communicate
+with the device.
+
+The ``ixgbe`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+ :widths: 5 5 5 90
+
+ * - Name
+ - Type
+ - Example
+ - Description
+ * - ``board.id``
+ - fixed
+ - H49289-000
+ - The Product Board Assembly (PBA) identifier of the board.
+ * - ``fw.undi``
+ - running
+ - 1.1937.0
+ - Version of the Option ROM containing the UEFI driver. The version is
+ reported in ``major.minor.patch`` format. The major version is
+ incremented whenever a major breaking change occurs, or when the
+ minor version would overflow. The minor version is incremented for
+ non-breaking changes and reset to 1 when the major version is
+ incremented. The patch version is normally 0 but is incremented when
+ a fix is delivered as a patch against an older base Option ROM.
+ * - ``fw.undi.srev``
+ - running
+ - 4
+ - Number indicating the security revision of the Option ROM.
+ * - ``fw.bundle_id``
+ - running
+ - 0x80000d0d
+ - Unique identifier of the firmware image file that was loaded onto
+ the device. Also referred to as the EETRACK identifier of the NVM.
+ * - ``fw.mgmt.api``
+ - running
+ - 1.5.1
+ - 3-digit version number (major.minor.patch) of the API exported over
+ the AdminQ by the management firmware. Used by the driver to
+ identify what commands are supported. Historical versions of the
+ kernel only displayed a 2-digit version number (major.minor).
+ * - ``fw.mgmt.build``
+ - running
+ - 0x305d955f
+ - Unique identifier of the source for the management firmware.
+ * - ``fw.mgmt.srev``
+ - running
+ - 3
+ - Number indicating the security revision of the firmware.
+ * - ``fw.psid.api``
+ - running
+ - 0.80
+ - Version defining the format of the flash contents.
+ * - ``fw.netlist``
+ - running
+ - 1.1.2000-6.7.0
+ - The version of the netlist module. This module defines the device's
+ Ethernet capabilities and default settings, and is used by the
+ management firmware as part of managing link and device
+ connectivity.
+ * - ``fw.netlist.build``
+ - running
+ - 0xee16ced7
+ - The first 4 bytes of the hash of the netlist module contents.
+
+Flash Update
+============
+
+The ``ixgbe`` driver implements support for flash update using the
+``devlink-flash`` interface. It supports updating the device flash using a
+combined flash image that contains the ``fw.mgmt``, ``fw.undi``, and
+``fw.netlist`` components.
+
+.. list-table:: List of supported overwrite modes
+ :widths: 5 95
+
+ * - Bits
+ - Behavior
+ * - ``DEVLINK_FLASH_OVERWRITE_SETTINGS``
+ - Do not preserve settings stored in the flash components being
+ updated. This includes overwriting the port configuration that
+ determines the number of physical functions the device will
+ initialize with.
+ * - ``DEVLINK_FLASH_OVERWRITE_SETTINGS`` and ``DEVLINK_FLASH_OVERWRITE_IDENTIFIERS``
+ - Do not preserve either settings or identifiers. Overwrite everything
+ in the flash with the contents from the provided image, without
+ performing any preservation. This includes overwriting device
+ identifying fields such as the MAC address, Vital product Data (VPD) area,
+ and device serial number. It is expected that this combination be used with an
+ image customized for the specific device.
+
+Reload
+======
+
+The ``ixgbe`` driver supports activating new firmware after a flash update
+using ``DEVLINK_CMD_RELOAD`` with the ``DEVLINK_RELOAD_ACTION_FW_ACTIVATE``
+action.
+
+.. code:: shell
+
+ $ devlink dev reload pci/0000:01:00.0 reload action fw_activate
+
+The new firmware is activated by issuing a device specific Embedded
+Management Processor reset which requests the device to reset and reload the
+EMP firmware image.
+
+The driver does not currently support reloading the driver via
+``DEVLINK_RELOAD_ACTION_DRIVER_REINIT``.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index c64133d309bf..ac90b82f3ce9 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -48,7 +48,6 @@ Contents:
ax25
bonding
cdc_mbim
- dccp
dctcp
devmem
dns_resolver
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 5c63ab928b97..b43222ee57cf 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -37,8 +37,8 @@ ip_no_pmtu_disc - INTEGER
Mode 3 is a hardened pmtu discover mode. The kernel will only
accept fragmentation-needed errors if the underlying protocol
can verify them besides a plain socket lookup. Current
- protocols for which pmtu events will be honored are TCP, SCTP
- and DCCP as they verify e.g. the sequence number or the
+ protocols for which pmtu events will be honored are TCP and
+ SCTP as they verify e.g. the sequence number or the
association. This mode should not be enabled globally but is
only intended to secure e.g. name servers in namespaces where
TCP path mtu must still work but path MTU information of other
diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst
index 6327e689e8a8..ca8605eb82ff 100644
--- a/Documentation/networking/net_cachelines/net_device.rst
+++ b/Documentation/networking/net_cachelines/net_device.rst
@@ -131,7 +131,7 @@ struct ref_tracker_dir refcnt_tracker
struct list_head link_watch_list
enum:8 reg_state
bool dismantle
-enum:16 rtnl_link_state
+bool rtnl_link_initilizing
bool needs_free_netdev
void*priv_destructor struct net_device
struct netpoll_info* npinfo read_mostly napi_poll/napi_poll_lock
diff --git a/Documentation/networking/net_cachelines/snmp.rst b/Documentation/networking/net_cachelines/snmp.rst
index bc96efc92cf5..bd44b3eebbef 100644
--- a/Documentation/networking/net_cachelines/snmp.rst
+++ b/Documentation/networking/net_cachelines/snmp.rst
@@ -37,6 +37,8 @@ unsigned_long LINUX_MIB_TIMEWAITKILLED
unsigned_long LINUX_MIB_PAWSACTIVEREJECTED
unsigned_long LINUX_MIB_PAWSESTABREJECTED
unsigned_long LINUX_MIB_TSECR_REJECTED
+unsigned_long LINUX_MIB_PAWS_OLD_ACK
+unsigned_long LINUX_MIB_PAWS_TW_REJECTED
unsigned_long LINUX_MIB_DELAYEDACKLOST
unsigned_long LINUX_MIB_LISTENOVERFLOWS
unsigned_long LINUX_MIB_LISTENDROPS
diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst
index eab601ab2db0..77fe1f7b93be 100644
--- a/Documentation/networking/netdevices.rst
+++ b/Documentation/networking/netdevices.rst
@@ -8,7 +8,7 @@ Network Devices, the Kernel, and You!
Introduction
============
The following is a random collection of documentation regarding
-network devices.
+network devices. It is intended for driver developers.
struct net_device lifetime rules
================================
@@ -314,13 +314,8 @@ napi->poll:
softirq
will be called with interrupts disabled by netconsole.
-struct netdev_queue_mgmt_ops synchronization rules
-==================================================
-
-All queue management ndo callbacks are holding netdev instance lock.
-
-RTNL and netdev instance lock
-=============================
+netdev instance lock
+====================
Historically, all networking control operations were protected by a single
global lock known as ``rtnl_lock``. There is an ongoing effort to replace this
@@ -328,10 +323,13 @@ global lock with separate locks for each network namespace. Additionally,
properties of individual netdev are increasingly protected by per-netdev locks.
For device drivers that implement shaping or queue management APIs, all control
-operations will be performed under the netdev instance lock. Currently, this
-instance lock is acquired within the context of ``rtnl_lock``. The drivers
-can also explicitly request instance lock to be acquired via
-``request_ops_lock``. In the future, there will be an option for individual
+operations will be performed under the netdev instance lock.
+Drivers can also explicitly request instance lock to be held during ops
+by setting ``request_ops_lock`` to true. Code comments and docs refer
+to drivers which have ops called under the instance lock as "ops locked".
+See also the documentation of the ``lock`` member of struct net_device.
+
+In the future, there will be an option for individual
drivers to opt out of using ``rtnl_lock`` and instead perform their control
operations directly under the netdev instance lock.
@@ -344,8 +342,46 @@ functions handle acquiring the instance lock themselves, while the
``netif_xxx`` functions assume that the driver has already acquired
the instance lock.
+struct net_device_ops
+---------------------
+
+``ndos`` are called without holding the instance lock for most drivers.
+
+"Ops locked" drivers will have most of the ``ndos`` invoked under
+the instance lock.
+
+struct ethtool_ops
+------------------
+
+Similarly to ``ndos`` the instance lock is only held for select drivers.
+For "ops locked" drivers all ethtool ops without exceptions should
+be called under the instance lock.
+
+struct netdev_stat_ops
+----------------------
+
+"qstat" ops are invoked under the instance lock for "ops locked" drivers,
+and under rtnl_lock for all other drivers.
+
+struct net_shaper_ops
+---------------------
+
+All net shaper callbacks are invoked while holding the netdev instance
+lock. ``rtnl_lock`` may or may not be held.
+
+Note that supporting net shapers automatically enables "ops locking".
+
+struct netdev_queue_mgmt_ops
+----------------------------
+
+All queue management callbacks are invoked while holding the netdev instance
+lock. ``rtnl_lock`` may or may not be held.
+
+Note that supporting struct netdev_queue_mgmt_ops automatically enables
+"ops locking".
+
Notifiers and netdev instance lock
-==================================
+----------------------------------
For device drivers that implement shaping or queue management APIs,
some of the notifiers (``enum netdev_cmd``) are running under the netdev
@@ -353,9 +389,10 @@ instance lock.
For devices with locked ops, currently only the following notifiers are
running under the lock:
+* ``NETDEV_CHANGE``
* ``NETDEV_REGISTER``
* ``NETDEV_UP``
-* ``NETDEV_CHANGE``
+* ``NETDEV_XDP_FEAT_CHANGE``
The following notifiers are running without the lock:
* ``NETDEV_UNREGISTER``
diff --git a/Documentation/networking/rxrpc.rst b/Documentation/networking/rxrpc.rst
index e807e18ba32a..fe2ea73be441 100644
--- a/Documentation/networking/rxrpc.rst
+++ b/Documentation/networking/rxrpc.rst
@@ -1172,3 +1172,18 @@ adjusted through sysctls in /proc/net/rxrpc/:
header plus exactly 1412 bytes of data. The terminal packet must contain
a four byte header plus any amount of data. In any event, a jumbo packet
may not exceed rxrpc_rx_mtu in size.
+
+
+API Function Reference
+======================
+
+.. kernel-doc:: net/rxrpc/af_rxrpc.c
+.. kernel-doc:: net/rxrpc/call_object.c
+.. kernel-doc:: net/rxrpc/key.c
+.. kernel-doc:: net/rxrpc/oob.c
+.. kernel-doc:: net/rxrpc/peer_object.c
+.. kernel-doc:: net/rxrpc/recvmsg.c
+.. kernel-doc:: net/rxrpc/rxgk.c
+.. kernel-doc:: net/rxrpc/rxkad.c
+.. kernel-doc:: net/rxrpc/sendmsg.c
+.. kernel-doc:: net/rxrpc/server_key.c
diff --git a/Documentation/translations/zh_CN/admin-guide/bug-hunting.rst b/Documentation/translations/zh_CN/admin-guide/bug-hunting.rst
index c3f6a83294dc..4b3432753eb9 100644
--- a/Documentation/translations/zh_CN/admin-guide/bug-hunting.rst
+++ b/Documentation/translations/zh_CN/admin-guide/bug-hunting.rst
@@ -188,7 +188,7 @@ objdump
编行。如果没有调试符号,您将看到所示例程的汇编程序代码,但是如果内核有调试
符号,C代码也将可见(调试符号可以在内核配置菜单的hacking项中启用)。例如::
- $ objdump -r -S -l --disassemble net/dccp/ipv4.o
+ $ objdump -r -S -l --disassemble net/ipv4/tcp.o
.. note::
diff --git a/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst b/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst
index b25ecc44d735..80ea5677ee52 100644
--- a/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst
+++ b/Documentation/translations/zh_TW/admin-guide/bug-hunting.rst
@@ -191,7 +191,7 @@ objdump
編行。如果沒有調試符號,您將看到所示例程的彙編程序代碼,但是如果內核有調試
符號,C代碼也將可見(調試符號可以在內核配置菜單的hacking項中啓用)。例如::
- $ objdump -r -S -l --disassemble net/dccp/ipv4.o
+ $ objdump -r -S -l --disassemble net/ipv4/tcp.o
.. note::
diff --git a/Documentation/userspace-api/netlink/netlink-raw.rst b/Documentation/userspace-api/netlink/netlink-raw.rst
index 1990eea772d0..31fc91020eb3 100644
--- a/Documentation/userspace-api/netlink/netlink-raw.rst
+++ b/Documentation/userspace-api/netlink/netlink-raw.rst
@@ -62,7 +62,7 @@ Sub-messages
------------
Several raw netlink families such as
-:doc:`rt_link<../../networking/netlink_spec/rt_link>` and
+:doc:`rt-link<../../networking/netlink_spec/rt-link>` and
:doc:`tc<../../networking/netlink_spec/tc>` use attribute nesting as an
abstraction to carry module specific information.
diff --git a/MAINTAINERS b/MAINTAINERS
index 8c7d796131a8..657a67f9031e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6547,15 +6547,6 @@ S: Maintained
F: Documentation/scsi/dc395x.rst
F: drivers/scsi/dc395x.*
-DCCP PROTOCOL
-L: dccp@vger.kernel.org
-S: Orphan
-W: http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp
-F: include/linux/dccp.h
-F: include/linux/tfrc.h
-F: include/uapi/linux/dccp.h
-F: net/dccp/
-
DEBUGOBJECTS:
M: Thomas Gleixner <tglx@linutronix.de>
L: linux-kernel@vger.kernel.org
@@ -18140,6 +18131,17 @@ F: arch/openrisc/
F: drivers/irqchip/irq-ompic.c
F: drivers/irqchip/irq-or1k-*
+OPENVPN DATA CHANNEL OFFLOAD
+M: Antonio Quartulli <antonio@openvpn.net>
+L: openvpn-devel@lists.sourceforge.net (subscribers-only)
+L: netdev@vger.kernel.org
+S: Supported
+T: git https://github.com/OpenVPN/linux-kernel-ovpn.git
+F: Documentation/netlink/specs/ovpn.yaml
+F: drivers/net/ovpn/
+F: include/uapi/linux/ovpn.h
+F: tools/testing/selftests/net/ovpn/
+
OPENVSWITCH
M: Aaron Conole <aconole@redhat.com>
M: Eelco Chaudron <echaudro@redhat.com>
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 31ecb8b7b9f1..85bd696b0e90 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -267,8 +267,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-# CONFIG_IP_DCCP_CCID3 is not set
CONFIG_SCTP_COOKIE_HMAC_SHA1=y
CONFIG_RDS=m
CONFIG_RDS_TCP=m
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index 1f57514624d5..93003ceb51bb 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -263,8 +263,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-# CONFIG_IP_DCCP_CCID3 is not set
CONFIG_SCTP_COOKIE_HMAC_SHA1=y
CONFIG_RDS=m
CONFIG_RDS_TCP=m
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 02db7a48e57e..fa6ed1bb01e3 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -270,8 +270,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-# CONFIG_IP_DCCP_CCID3 is not set
CONFIG_SCTP_COOKIE_HMAC_SHA1=y
CONFIG_RDS=m
CONFIG_RDS_TCP=m
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index f0e673cb17eb..80ca001d5eab 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -260,8 +260,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-# CONFIG_IP_DCCP_CCID3 is not set
CONFIG_SCTP_COOKIE_HMAC_SHA1=y
CONFIG_RDS=m
CONFIG_RDS_TCP=m
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index e8ca5a50b86d..5e35e4b1fe16 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -262,8 +262,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-# CONFIG_IP_DCCP_CCID3 is not set
CONFIG_SCTP_COOKIE_HMAC_SHA1=y
CONFIG_RDS=m
CONFIG_RDS_TCP=m
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index b3a270441bb1..11bc3f3c7576 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -261,8 +261,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-# CONFIG_IP_DCCP_CCID3 is not set
CONFIG_SCTP_COOKIE_HMAC_SHA1=y
CONFIG_RDS=m
CONFIG_RDS_TCP=m
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index d215dba006ce..343f702210df 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -281,8 +281,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-# CONFIG_IP_DCCP_CCID3 is not set
CONFIG_SCTP_COOKIE_HMAC_SHA1=y
CONFIG_RDS=m
CONFIG_RDS_TCP=m
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index a888ed93ff82..6104a5a24594 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -259,8 +259,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-# CONFIG_IP_DCCP_CCID3 is not set
CONFIG_SCTP_COOKIE_HMAC_SHA1=y
CONFIG_RDS=m
CONFIG_RDS_TCP=m
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index b481782375f6..a99dd6c7c9c8 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -260,8 +260,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-# CONFIG_IP_DCCP_CCID3 is not set
CONFIG_SCTP_COOKIE_HMAC_SHA1=y
CONFIG_RDS=m
CONFIG_RDS_TCP=m
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index 6eba743d8eb5..0d87c119d4c4 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -261,8 +261,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-# CONFIG_IP_DCCP_CCID3 is not set
CONFIG_SCTP_COOKIE_HMAC_SHA1=y
CONFIG_RDS=m
CONFIG_RDS_TCP=m
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 9bdbb418ffa8..841f9615b6ab 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -256,8 +256,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-# CONFIG_IP_DCCP_CCID3 is not set
CONFIG_SCTP_COOKIE_HMAC_SHA1=y
CONFIG_RDS=m
CONFIG_RDS_TCP=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index e1cf20fa5343..12c5dc92ea4c 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -257,8 +257,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
-# CONFIG_IP_DCCP_CCID3 is not set
CONFIG_SCTP_COOKIE_HMAC_SHA1=y
CONFIG_RDS=m
CONFIG_RDS_TCP=m
diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig
index 8f7c36868204..97d2cd997285 100644
--- a/arch/mips/configs/bigsur_defconfig
+++ b/arch/mips/configs/bigsur_defconfig
@@ -81,7 +81,6 @@ CONFIG_IP_VS_SH=m
CONFIG_IP_VS_SED=m
CONFIG_IP_VS_NQ=m
CONFIG_IP_VS_FTP=m
-CONFIG_IP_DCCP=m
CONFIG_BRIDGE=m
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
diff --git a/arch/mips/configs/gpr_defconfig b/arch/mips/configs/gpr_defconfig
index 12f3eed8a946..df55fe6df8ba 100644
--- a/arch/mips/configs/gpr_defconfig
+++ b/arch/mips/configs/gpr_defconfig
@@ -84,7 +84,6 @@ CONFIG_BRIDGE_EBT_MARK_T=m
CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_IP_DCCP=m
CONFIG_IP_SCTP=m
CONFIG_TIPC=m
CONFIG_ATM=y
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index 06b7a0b97eca..487bd84043f7 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -130,7 +130,6 @@ CONFIG_BRIDGE_EBT_MARK_T=m
CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
-CONFIG_IP_DCCP=m
CONFIG_IP_SCTP=m
CONFIG_TIPC=m
CONFIG_ATM=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 1bc3466bc909..ae45f70b29f0 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -87,7 +87,6 @@ CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_IP_DCCP=m
CONFIG_BT=m
CONFIG_BT_RFCOMM=m
CONFIG_BT_RFCOMM_TTY=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index a91a766b71a4..5e0f2cd7e62b 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -225,7 +225,6 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_SNAT=m
CONFIG_BRIDGE_EBT_LOG=m
CONFIG_BRIDGE_EBT_NFLOG=m
-CONFIG_IP_DCCP=m
CONFIG_TIPC=m
CONFIG_ATM=m
CONFIG_ATM_CLIP=m
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 271520510b5f..b29628d46be9 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -115,6 +115,21 @@ config WIREGUARD_DEBUG
Say N here unless you know what you're doing.
+config OVPN
+ tristate "OpenVPN data channel offload"
+ depends on NET && INET
+ depends on IPV6 || !IPV6
+ select DST_CACHE
+ select NET_UDP_TUNNEL
+ select CRYPTO
+ select CRYPTO_AES
+ select CRYPTO_GCM
+ select CRYPTO_CHACHA20POLY1305
+ select STREAM_PARSER
+ help
+ This module enhances the performance of the OpenVPN userspace software
+ by offloading the data channel processing to kernelspace.
+
config EQUALIZER
tristate "EQL (serial line load balancing) support"
help
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 75333251a01a..73bc63ecd65f 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_IPVLAN) += ipvlan/
obj-$(CONFIG_IPVTAP) += ipvlan/
obj-$(CONFIG_DUMMY) += dummy.o
obj-$(CONFIG_WIREGUARD) += wireguard/
+obj-$(CONFIG_OVPN) += ovpn/
obj-$(CONFIG_EQUALIZER) += eql.o
obj-$(CONFIG_IFB) += ifb.o
obj-$(CONFIG_MACSEC) += macsec.o
diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
index d1473c5f8eef..a9dffdcac805 100644
--- a/drivers/net/bareudp.c
+++ b/drivers/net/bareudp.c
@@ -777,27 +777,19 @@ static __net_init int bareudp_init_net(struct net *net)
return 0;
}
-static void bareudp_destroy_tunnels(struct net *net, struct list_head *head)
+static void __net_exit bareudp_exit_rtnl_net(struct net *net,
+ struct list_head *dev_kill_list)
{
struct bareudp_net *bn = net_generic(net, bareudp_net_id);
struct bareudp_dev *bareudp, *next;
list_for_each_entry_safe(bareudp, next, &bn->bareudp_list, next)
- unregister_netdevice_queue(bareudp->dev, head);
-}
-
-static void __net_exit bareudp_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_kill_list)
-{
- struct net *net;
-
- list_for_each_entry(net, net_list, exit_list)
- bareudp_destroy_tunnels(net, dev_kill_list);
+ bareudp_dellink(bareudp->dev, dev_kill_list);
}
static struct pernet_operations bareudp_net_ops = {
.init = bareudp_init_net,
- .exit_batch_rtnl = bareudp_exit_batch_rtnl,
+ .exit_rtnl = bareudp_exit_rtnl_net,
.id = &bareudp_net_id,
.size = sizeof(struct bareudp_net),
};
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8ea183da8d53..665b9af684fc 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -6563,7 +6563,7 @@ static int __net_init bond_net_init(struct net *net)
/* According to commit 69b0216ac255 ("bonding: fix bonding_masters
* race condition in bond unloading") we need to remove sysfs files
- * before we remove our devices (done later in bond_net_exit_batch_rtnl())
+ * before we remove our devices (done later in bond_net_exit_rtnl())
*/
static void __net_exit bond_net_pre_exit(struct net *net)
{
@@ -6572,25 +6572,20 @@ static void __net_exit bond_net_pre_exit(struct net *net)
bond_destroy_sysfs(bn);
}
-static void __net_exit bond_net_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_kill_list)
+static void __net_exit bond_net_exit_rtnl(struct net *net,
+ struct list_head *dev_kill_list)
{
- struct bond_net *bn;
- struct net *net;
+ struct bond_net *bn = net_generic(net, bond_net_id);
+ struct bonding *bond, *tmp_bond;
/* Kill off any bonds created after unregistering bond rtnl ops */
- list_for_each_entry(net, net_list, exit_list) {
- struct bonding *bond, *tmp_bond;
-
- bn = net_generic(net, bond_net_id);
- list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
- unregister_netdevice_queue(bond->dev, dev_kill_list);
- }
+ list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
+ unregister_netdevice_queue(bond->dev, dev_kill_list);
}
/* According to commit 23fa5c2caae0 ("bonding: destroy proc directory
* only after all bonds are gone") bond_destroy_proc_dir() is called
- * after bond_net_exit_batch_rtnl() has completed.
+ * after bond_net_exit_rtnl() has completed.
*/
static void __net_exit bond_net_exit_batch(struct list_head *net_list)
{
@@ -6606,7 +6601,7 @@ static void __net_exit bond_net_exit_batch(struct list_head *net_list)
static struct pernet_operations bond_net_ops = {
.init = bond_net_init,
.pre_exit = bond_net_pre_exit,
- .exit_batch_rtnl = bond_net_exit_batch_rtnl,
+ .exit_rtnl = bond_net_exit_rtnl,
.exit_batch = bond_net_exit_batch,
.id = &bond_net_id,
.size = sizeof(struct bond_net),
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 89f0796894af..b45052497f8a 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -3999,6 +3999,89 @@ static int ksz_ets_band_to_queue(struct tc_ets_qopt_offload_replace_params *p,
return p->bands - 1 - band;
}
+/**
+ * ksz88x3_tc_ets_add - Configure ETS (Enhanced Transmission Selection)
+ * for a port on KSZ88x3 switch
+ * @dev: Pointer to the KSZ switch device structure
+ * @port: Port number to configure
+ * @p: Pointer to offload replace parameters describing ETS bands and mapping
+ *
+ * The KSZ88x3 supports two scheduling modes: Strict Priority and
+ * Weighted Fair Queuing (WFQ). Both modes have fixed behavior:
+ * - No configurable queue-to-priority mapping
+ * - No weight adjustment in WFQ mode
+ *
+ * This function configures the switch to use strict priority mode by
+ * clearing the WFQ enable bit for all queues associated with ETS bands.
+ * If strict priority is not explicitly requested, the switch will default
+ * to WFQ mode.
+ *
+ * Return: 0 on success, or a negative error code on failure
+ */
+static int ksz88x3_tc_ets_add(struct ksz_device *dev, int port,
+ struct tc_ets_qopt_offload_replace_params *p)
+{
+ int ret, band;
+
+ /* Only strict priority mode is supported for now.
+ * WFQ is implicitly enabled when strict mode is disabled.
+ */
+ for (band = 0; band < p->bands; band++) {
+ int queue = ksz_ets_band_to_queue(p, band);
+ u8 reg;
+
+ /* Calculate TXQ Split Control register address for this
+ * port/queue
+ */
+ reg = KSZ8873_TXQ_SPLIT_CTRL_REG(port, queue);
+
+ /* Clear WFQ enable bit to select strict priority scheduling */
+ ret = ksz_rmw8(dev, reg, KSZ8873_TXQ_WFQ_ENABLE, 0);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * ksz88x3_tc_ets_del - Reset ETS (Enhanced Transmission Selection) config
+ * for a port on KSZ88x3 switch
+ * @dev: Pointer to the KSZ switch device structure
+ * @port: Port number to reset
+ *
+ * The KSZ88x3 supports only fixed scheduling modes: Strict Priority or
+ * Weighted Fair Queuing (WFQ), with no reconfiguration of weights or
+ * queue mapping. This function resets the port’s scheduling mode to
+ * the default, which is WFQ, by enabling the WFQ bit for all queues.
+ *
+ * Return: 0 on success, or a negative error code on failure
+ */
+static int ksz88x3_tc_ets_del(struct ksz_device *dev, int port)
+{
+ int ret, queue;
+
+ /* Iterate over all transmit queues for this port */
+ for (queue = 0; queue < dev->info->num_tx_queues; queue++) {
+ u8 reg;
+
+ /* Calculate TXQ Split Control register address for this
+ * port/queue
+ */
+ reg = KSZ8873_TXQ_SPLIT_CTRL_REG(port, queue);
+
+ /* Set WFQ enable bit to revert back to default scheduling
+ * mode
+ */
+ ret = ksz_rmw8(dev, reg, KSZ8873_TXQ_WFQ_ENABLE,
+ KSZ8873_TXQ_WFQ_ENABLE);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int ksz_queue_set_strict(struct ksz_device *dev, int port, int queue)
{
int ret;
@@ -4080,6 +4163,7 @@ static int ksz_tc_ets_del(struct ksz_device *dev, int port)
for (queue = 0; queue < dev->info->num_tx_queues; queue++) {
ret = ksz_queue_set_wrr(dev, port, queue,
KSZ9477_DEFAULT_WRR_WEIGHT);
+
if (ret)
return ret;
}
@@ -4132,7 +4216,7 @@ static int ksz_tc_setup_qdisc_ets(struct dsa_switch *ds, int port,
struct ksz_device *dev = ds->priv;
int ret;
- if (is_ksz8(dev))
+ if (is_ksz8(dev) && !ksz_is_ksz88x3(dev))
return -EOPNOTSUPP;
if (qopt->parent != TC_H_ROOT) {
@@ -4146,9 +4230,16 @@ static int ksz_tc_setup_qdisc_ets(struct dsa_switch *ds, int port,
if (ret)
return ret;
- return ksz_tc_ets_add(dev, port, &qopt->replace_params);
+ if (ksz_is_ksz88x3(dev))
+ return ksz88x3_tc_ets_add(dev, port,
+ &qopt->replace_params);
+ else
+ return ksz_tc_ets_add(dev, port, &qopt->replace_params);
case TC_ETS_DESTROY:
- return ksz_tc_ets_del(dev, port);
+ if (ksz_is_ksz88x3(dev))
+ return ksz88x3_tc_ets_del(dev, port);
+ else
+ return ksz_tc_ets_del(dev, port);
case TC_ETS_STATS:
case TC_ETS_GRAFT:
return -EOPNOTSUPP;
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index af17a9c030d4..dd5429ff16ee 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -836,6 +836,25 @@ static inline bool is_lan937x_tx_phy(struct ksz_device *dev, int port)
#define SW_HI_SPEED_DRIVE_STRENGTH_S 4
#define SW_LO_SPEED_DRIVE_STRENGTH_S 0
+/* TXQ Split Control Register for per-port, per-queue configuration.
+ * Register 0xAF is TXQ Split for Q3 on Port 1.
+ * Register offset formula: 0xAF + (port * 4) + (3 - queue)
+ * where: port = 0..2, queue = 0..3
+ */
+#define KSZ8873_TXQ_SPLIT_CTRL_REG(port, queue) \
+ (0xAF + ((port) * 4) + (3 - (queue)))
+
+/* Bit 7 selects between:
+ * 0 = Strict priority mode (highest-priority queue first)
+ * 1 = Weighted Fair Queuing (WFQ) mode:
+ * Queue weights: Q3:Q2:Q1:Q0 = 8:4:2:1
+ * If any queues are empty, weight is redistributed.
+ *
+ * Note: This is referred to as "Weighted Fair Queuing" (WFQ) in KSZ8863/8873
+ * documentation, and as "Weighted Round Robin" (WRR) in KSZ9477 family docs.
+ */
+#define KSZ8873_TXQ_WFQ_ENABLE BIT(7)
+
#define KSZ9477_REG_PORT_OUT_RATE_0 0x0420
#define KSZ9477_OUT_RATE_NO_LIMIT 0
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index d70399bce5b9..0a33ca1dd7ca 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -32,47 +32,15 @@ static struct mt753x_pcs *pcs_to_mt753x_pcs(struct phylink_pcs *pcs)
/* String, offset, and register size in bytes if different from 4 bytes */
static const struct mt7530_mib_desc mt7530_mib[] = {
- MIB_DESC(1, 0x00, "TxDrop"),
- MIB_DESC(1, 0x04, "TxCrcErr"),
- MIB_DESC(1, 0x08, "TxUnicast"),
- MIB_DESC(1, 0x0c, "TxMulticast"),
- MIB_DESC(1, 0x10, "TxBroadcast"),
- MIB_DESC(1, 0x14, "TxCollision"),
- MIB_DESC(1, 0x18, "TxSingleCollision"),
- MIB_DESC(1, 0x1c, "TxMultipleCollision"),
- MIB_DESC(1, 0x20, "TxDeferred"),
- MIB_DESC(1, 0x24, "TxLateCollision"),
- MIB_DESC(1, 0x28, "TxExcessiveCollistion"),
- MIB_DESC(1, 0x2c, "TxPause"),
- MIB_DESC(1, 0x30, "TxPktSz64"),
- MIB_DESC(1, 0x34, "TxPktSz65To127"),
- MIB_DESC(1, 0x38, "TxPktSz128To255"),
- MIB_DESC(1, 0x3c, "TxPktSz256To511"),
- MIB_DESC(1, 0x40, "TxPktSz512To1023"),
- MIB_DESC(1, 0x44, "Tx1024ToMax"),
- MIB_DESC(2, 0x48, "TxBytes"),
- MIB_DESC(1, 0x60, "RxDrop"),
- MIB_DESC(1, 0x64, "RxFiltering"),
- MIB_DESC(1, 0x68, "RxUnicast"),
- MIB_DESC(1, 0x6c, "RxMulticast"),
- MIB_DESC(1, 0x70, "RxBroadcast"),
- MIB_DESC(1, 0x74, "RxAlignErr"),
- MIB_DESC(1, 0x78, "RxCrcErr"),
- MIB_DESC(1, 0x7c, "RxUnderSizeErr"),
- MIB_DESC(1, 0x80, "RxFragErr"),
- MIB_DESC(1, 0x84, "RxOverSzErr"),
- MIB_DESC(1, 0x88, "RxJabberErr"),
- MIB_DESC(1, 0x8c, "RxPause"),
- MIB_DESC(1, 0x90, "RxPktSz64"),
- MIB_DESC(1, 0x94, "RxPktSz65To127"),
- MIB_DESC(1, 0x98, "RxPktSz128To255"),
- MIB_DESC(1, 0x9c, "RxPktSz256To511"),
- MIB_DESC(1, 0xa0, "RxPktSz512To1023"),
- MIB_DESC(1, 0xa4, "RxPktSz1024ToMax"),
- MIB_DESC(2, 0xa8, "RxBytes"),
- MIB_DESC(1, 0xb0, "RxCtrlDrop"),
- MIB_DESC(1, 0xb4, "RxIngressDrop"),
- MIB_DESC(1, 0xb8, "RxArlDrop"),
+ MIB_DESC(1, MT7530_PORT_MIB_TX_DROP, "TxDrop"),
+ MIB_DESC(1, MT7530_PORT_MIB_TX_CRC_ERR, "TxCrcErr"),
+ MIB_DESC(1, MT7530_PORT_MIB_TX_COLLISION, "TxCollision"),
+ MIB_DESC(1, MT7530_PORT_MIB_RX_DROP, "RxDrop"),
+ MIB_DESC(1, MT7530_PORT_MIB_RX_FILTERING, "RxFiltering"),
+ MIB_DESC(1, MT7530_PORT_MIB_RX_CRC_ERR, "RxCrcErr"),
+ MIB_DESC(1, MT7530_PORT_MIB_RX_CTRL_DROP, "RxCtrlDrop"),
+ MIB_DESC(1, MT7530_PORT_MIB_RX_INGRESS_DROP, "RxIngressDrop"),
+ MIB_DESC(1, MT7530_PORT_MIB_RX_ARL_DROP, "RxArlDrop"),
};
static void
@@ -790,23 +758,33 @@ mt7530_get_strings(struct dsa_switch *ds, int port, u32 stringset,
}
static void
+mt7530_read_port_stats(struct mt7530_priv *priv, int port,
+ u32 offset, u8 size, uint64_t *data)
+{
+ u32 val, reg = MT7530_PORT_MIB_COUNTER(port) + offset;
+
+ val = mt7530_read(priv, reg);
+ *data = val;
+
+ if (size == 2) {
+ val = mt7530_read(priv, reg + 4);
+ *data |= (u64)val << 32;
+ }
+}
+
+static void
mt7530_get_ethtool_stats(struct dsa_switch *ds, int port,
uint64_t *data)
{
struct mt7530_priv *priv = ds->priv;
const struct mt7530_mib_desc *mib;
- u32 reg, i;
- u64 hi;
+ int i;
for (i = 0; i < ARRAY_SIZE(mt7530_mib); i++) {
mib = &mt7530_mib[i];
- reg = MT7530_PORT_MIB_COUNTER(port) + mib->offset;
- data[i] = mt7530_read(priv, reg);
- if (mib->size == 2) {
- hi = mt7530_read(priv, reg + 4);
- data[i] |= hi << 32;
- }
+ mt7530_read_port_stats(priv, port, mib->offset, mib->size,
+ data + i);
}
}
@@ -819,6 +797,172 @@ mt7530_get_sset_count(struct dsa_switch *ds, int port, int sset)
return ARRAY_SIZE(mt7530_mib);
}
+static void mt7530_get_eth_mac_stats(struct dsa_switch *ds, int port,
+ struct ethtool_eth_mac_stats *mac_stats)
+{
+ struct mt7530_priv *priv = ds->priv;
+
+ /* MIB counter doesn't provide a FramesTransmittedOK but instead
+ * provide stats for Unicast, Broadcast and Multicast frames separately.
+ * To simulate a global frame counter, read Unicast and addition Multicast
+ * and Broadcast later
+ */
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_UNICAST, 1,
+ &mac_stats->FramesTransmittedOK);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_SINGLE_COLLISION, 1,
+ &mac_stats->SingleCollisionFrames);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_MULTIPLE_COLLISION, 1,
+ &mac_stats->MultipleCollisionFrames);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_UNICAST, 1,
+ &mac_stats->FramesReceivedOK);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_BYTES, 2,
+ &mac_stats->OctetsTransmittedOK);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_ALIGN_ERR, 1,
+ &mac_stats->AlignmentErrors);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_DEFERRED, 1,
+ &mac_stats->FramesWithDeferredXmissions);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_LATE_COLLISION, 1,
+ &mac_stats->LateCollisions);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_EXCESSIVE_COLLISION, 1,
+ &mac_stats->FramesAbortedDueToXSColls);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_BYTES, 2,
+ &mac_stats->OctetsReceivedOK);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_MULTICAST, 1,
+ &mac_stats->MulticastFramesXmittedOK);
+ mac_stats->FramesTransmittedOK += mac_stats->MulticastFramesXmittedOK;
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_BROADCAST, 1,
+ &mac_stats->BroadcastFramesXmittedOK);
+ mac_stats->FramesTransmittedOK += mac_stats->BroadcastFramesXmittedOK;
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_MULTICAST, 1,
+ &mac_stats->MulticastFramesReceivedOK);
+ mac_stats->FramesReceivedOK += mac_stats->MulticastFramesReceivedOK;
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_BROADCAST, 1,
+ &mac_stats->BroadcastFramesReceivedOK);
+ mac_stats->FramesReceivedOK += mac_stats->BroadcastFramesReceivedOK;
+}
+
+static const struct ethtool_rmon_hist_range mt7530_rmon_ranges[] = {
+ { 0, 64 },
+ { 65, 127 },
+ { 128, 255 },
+ { 256, 511 },
+ { 512, 1023 },
+ { 1024, MT7530_MAX_MTU },
+ {}
+};
+
+static void mt7530_get_rmon_stats(struct dsa_switch *ds, int port,
+ struct ethtool_rmon_stats *rmon_stats,
+ const struct ethtool_rmon_hist_range **ranges)
+{
+ struct mt7530_priv *priv = ds->priv;
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_UNDER_SIZE_ERR, 1,
+ &rmon_stats->undersize_pkts);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_OVER_SZ_ERR, 1,
+ &rmon_stats->oversize_pkts);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_FRAG_ERR, 1,
+ &rmon_stats->fragments);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_JABBER_ERR, 1,
+ &rmon_stats->jabbers);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PKT_SZ_64, 1,
+ &rmon_stats->hist[0]);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PKT_SZ_65_TO_127, 1,
+ &rmon_stats->hist[1]);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PKT_SZ_128_TO_255, 1,
+ &rmon_stats->hist[2]);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PKT_SZ_256_TO_511, 1,
+ &rmon_stats->hist[3]);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PKT_SZ_512_TO_1023, 1,
+ &rmon_stats->hist[4]);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PKT_SZ_1024_TO_MAX, 1,
+ &rmon_stats->hist[5]);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PKT_SZ_64, 1,
+ &rmon_stats->hist_tx[0]);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PKT_SZ_65_TO_127, 1,
+ &rmon_stats->hist_tx[1]);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PKT_SZ_128_TO_255, 1,
+ &rmon_stats->hist_tx[2]);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PKT_SZ_256_TO_511, 1,
+ &rmon_stats->hist_tx[3]);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PKT_SZ_512_TO_1023, 1,
+ &rmon_stats->hist_tx[4]);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PKT_SZ_1024_TO_MAX, 1,
+ &rmon_stats->hist_tx[5]);
+
+ *ranges = mt7530_rmon_ranges;
+}
+
+static void mt7530_get_stats64(struct dsa_switch *ds, int port,
+ struct rtnl_link_stats64 *storage)
+{
+ struct mt7530_priv *priv = ds->priv;
+ uint64_t data;
+
+ /* MIB counter doesn't provide a FramesTransmittedOK but instead
+ * provide stats for Unicast, Broadcast and Multicast frames separately.
+ * To simulate a global frame counter, read Unicast and addition Multicast
+ * and Broadcast later
+ */
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_UNICAST, 1,
+ &storage->rx_packets);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_MULTICAST, 1,
+ &storage->multicast);
+ storage->rx_packets += storage->multicast;
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_BROADCAST, 1,
+ &data);
+ storage->rx_packets += data;
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_UNICAST, 1,
+ &storage->tx_packets);
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_MULTICAST, 1,
+ &data);
+ storage->tx_packets += data;
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_BROADCAST, 1,
+ &data);
+ storage->tx_packets += data;
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_BYTES, 2,
+ &storage->rx_bytes);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_BYTES, 2,
+ &storage->tx_bytes);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_DROP, 1,
+ &storage->rx_dropped);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_DROP, 1,
+ &storage->tx_dropped);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_CRC_ERR, 1,
+ &storage->rx_crc_errors);
+}
+
+static void mt7530_get_eth_ctrl_stats(struct dsa_switch *ds, int port,
+ struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+ struct mt7530_priv *priv = ds->priv;
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_TX_PAUSE, 1,
+ &ctrl_stats->MACControlFramesTransmitted);
+
+ mt7530_read_port_stats(priv, port, MT7530_PORT_MIB_RX_PAUSE, 1,
+ &ctrl_stats->MACControlFramesReceived);
+}
+
static int
mt7530_set_ageing_time(struct dsa_switch *ds, unsigned int msecs)
{
@@ -3105,6 +3249,10 @@ const struct dsa_switch_ops mt7530_switch_ops = {
.get_strings = mt7530_get_strings,
.get_ethtool_stats = mt7530_get_ethtool_stats,
.get_sset_count = mt7530_get_sset_count,
+ .get_eth_mac_stats = mt7530_get_eth_mac_stats,
+ .get_rmon_stats = mt7530_get_rmon_stats,
+ .get_eth_ctrl_stats = mt7530_get_eth_ctrl_stats,
+ .get_stats64 = mt7530_get_stats64,
.set_ageing_time = mt7530_set_ageing_time,
.port_enable = mt7530_port_enable,
.port_disable = mt7530_port_disable,
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index c3ea403d7acf..d4b838a055ad 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -423,6 +423,48 @@ enum mt7530_vlan_port_acc_frm {
/* Register for MIB */
#define MT7530_PORT_MIB_COUNTER(x) (0x4000 + (x) * 0x100)
+/* Each define is an offset of MT7530_PORT_MIB_COUNTER */
+#define MT7530_PORT_MIB_TX_DROP 0x00
+#define MT7530_PORT_MIB_TX_CRC_ERR 0x04
+#define MT7530_PORT_MIB_TX_UNICAST 0x08
+#define MT7530_PORT_MIB_TX_MULTICAST 0x0c
+#define MT7530_PORT_MIB_TX_BROADCAST 0x10
+#define MT7530_PORT_MIB_TX_COLLISION 0x14
+#define MT7530_PORT_MIB_TX_SINGLE_COLLISION 0x18
+#define MT7530_PORT_MIB_TX_MULTIPLE_COLLISION 0x1c
+#define MT7530_PORT_MIB_TX_DEFERRED 0x20
+#define MT7530_PORT_MIB_TX_LATE_COLLISION 0x24
+#define MT7530_PORT_MIB_TX_EXCESSIVE_COLLISION 0x28
+#define MT7530_PORT_MIB_TX_PAUSE 0x2c
+#define MT7530_PORT_MIB_TX_PKT_SZ_64 0x30
+#define MT7530_PORT_MIB_TX_PKT_SZ_65_TO_127 0x34
+#define MT7530_PORT_MIB_TX_PKT_SZ_128_TO_255 0x38
+#define MT7530_PORT_MIB_TX_PKT_SZ_256_TO_511 0x3c
+#define MT7530_PORT_MIB_TX_PKT_SZ_512_TO_1023 0x40
+#define MT7530_PORT_MIB_TX_PKT_SZ_1024_TO_MAX 0x44
+#define MT7530_PORT_MIB_TX_BYTES 0x48 /* 64 bytes */
+#define MT7530_PORT_MIB_RX_DROP 0x60
+#define MT7530_PORT_MIB_RX_FILTERING 0x64
+#define MT7530_PORT_MIB_RX_UNICAST 0x68
+#define MT7530_PORT_MIB_RX_MULTICAST 0x6c
+#define MT7530_PORT_MIB_RX_BROADCAST 0x70
+#define MT7530_PORT_MIB_RX_ALIGN_ERR 0x74
+#define MT7530_PORT_MIB_RX_CRC_ERR 0x78
+#define MT7530_PORT_MIB_RX_UNDER_SIZE_ERR 0x7c
+#define MT7530_PORT_MIB_RX_FRAG_ERR 0x80
+#define MT7530_PORT_MIB_RX_OVER_SZ_ERR 0x84
+#define MT7530_PORT_MIB_RX_JABBER_ERR 0x88
+#define MT7530_PORT_MIB_RX_PAUSE 0x8c
+#define MT7530_PORT_MIB_RX_PKT_SZ_64 0x90
+#define MT7530_PORT_MIB_RX_PKT_SZ_65_TO_127 0x94
+#define MT7530_PORT_MIB_RX_PKT_SZ_128_TO_255 0x98
+#define MT7530_PORT_MIB_RX_PKT_SZ_256_TO_511 0x9c
+#define MT7530_PORT_MIB_RX_PKT_SZ_512_TO_1023 0xa0
+#define MT7530_PORT_MIB_RX_PKT_SZ_1024_TO_MAX 0xa4
+#define MT7530_PORT_MIB_RX_BYTES 0xa8 /* 64 bytes */
+#define MT7530_PORT_MIB_RX_CTRL_DROP 0xb0
+#define MT7530_PORT_MIB_RX_INGRESS_DROP 0xb4
+#define MT7530_PORT_MIB_RX_ARL_DROP 0xb8
#define MT7530_MIB_CCR 0x4fe0
#define CCR_MIB_ENABLE BIT(31)
#define CCR_RX_OCT_CNT_GOOD BIT(7)
diff --git a/drivers/net/dsa/mv88e6xxx/ptp.c b/drivers/net/dsa/mv88e6xxx/ptp.c
index aed4a4b07f34..1d3b2c94c53e 100644
--- a/drivers/net/dsa/mv88e6xxx/ptp.c
+++ b/drivers/net/dsa/mv88e6xxx/ptp.c
@@ -332,13 +332,6 @@ static int mv88e6352_ptp_enable_extts(struct mv88e6xxx_chip *chip,
int pin;
int err;
- /* Reject requests with unsupported flags */
- if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_FALLING_EDGE |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
/* Reject requests to enable time stamping on both edges. */
if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
(rq->extts.flags & PTP_ENABLE_FEATURE) &&
@@ -566,6 +559,10 @@ int mv88e6xxx_ptp_setup(struct mv88e6xxx_chip *chip)
chip->ptp_clock_info.verify = ptp_ops->ptp_verify;
chip->ptp_clock_info.do_aux_work = mv88e6xxx_hwtstamp_work;
+ chip->ptp_clock_info.supported_extts_flags = PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS;
+
if (ptp_ops->set_ptp_cpu_port) {
struct dsa_port *dp;
int upstream = 0;
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 198e787e8560..3abc64aec411 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -737,10 +737,6 @@ static int sja1105_per_out_enable(struct sja1105_private *priv,
if (perout->index != 0)
return -EOPNOTSUPP;
- /* Reject requests with unsupported flags */
- if (perout->flags)
- return -EOPNOTSUPP;
-
mutex_lock(&ptp_data->lock);
rc = sja1105_change_ptp_clk_pin_func(priv, PTP_PF_PEROUT);
@@ -820,13 +816,6 @@ static int sja1105_extts_enable(struct sja1105_private *priv,
if (extts->index != 0)
return -EOPNOTSUPP;
- /* Reject requests with unsupported flags */
- if (extts->flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_FALLING_EDGE |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
/* We can only enable time stamping on both edges, sadly. */
if ((extts->flags & PTP_STRICT_FLAGS) &&
(extts->flags & PTP_ENABLE_FEATURE) &&
@@ -912,6 +901,9 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds)
.n_pins = 1,
.n_ext_ts = 1,
.n_per_out = 1,
+ .supported_extts_flags = PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS,
};
/* Only used on SJA1105 */
diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c
index d748dc6de923..c773b5ea9c05 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.c
+++ b/drivers/net/ethernet/airoha/airoha_eth.c
@@ -527,6 +527,25 @@ static int airoha_fe_init(struct airoha_eth *eth)
/* disable IFC by default */
airoha_fe_clear(eth, REG_FE_CSR_IFC_CFG, FE_IFC_EN_MASK);
+ airoha_fe_wr(eth, REG_PPE_DFT_CPORT0(0),
+ FIELD_PREP(DFT_CPORT_MASK(7), FE_PSE_PORT_CDM1) |
+ FIELD_PREP(DFT_CPORT_MASK(6), FE_PSE_PORT_CDM1) |
+ FIELD_PREP(DFT_CPORT_MASK(5), FE_PSE_PORT_CDM1) |
+ FIELD_PREP(DFT_CPORT_MASK(4), FE_PSE_PORT_CDM1) |
+ FIELD_PREP(DFT_CPORT_MASK(3), FE_PSE_PORT_CDM1) |
+ FIELD_PREP(DFT_CPORT_MASK(2), FE_PSE_PORT_CDM1) |
+ FIELD_PREP(DFT_CPORT_MASK(1), FE_PSE_PORT_CDM1) |
+ FIELD_PREP(DFT_CPORT_MASK(0), FE_PSE_PORT_CDM1));
+ airoha_fe_wr(eth, REG_PPE_DFT_CPORT0(1),
+ FIELD_PREP(DFT_CPORT_MASK(7), FE_PSE_PORT_CDM2) |
+ FIELD_PREP(DFT_CPORT_MASK(6), FE_PSE_PORT_CDM2) |
+ FIELD_PREP(DFT_CPORT_MASK(5), FE_PSE_PORT_CDM2) |
+ FIELD_PREP(DFT_CPORT_MASK(4), FE_PSE_PORT_CDM2) |
+ FIELD_PREP(DFT_CPORT_MASK(3), FE_PSE_PORT_CDM2) |
+ FIELD_PREP(DFT_CPORT_MASK(2), FE_PSE_PORT_CDM2) |
+ FIELD_PREP(DFT_CPORT_MASK(1), FE_PSE_PORT_CDM2) |
+ FIELD_PREP(DFT_CPORT_MASK(0), FE_PSE_PORT_CDM2));
+
/* enable 1:N vlan action, init vlan table */
airoha_fe_set(eth, REG_MC_VLAN_EN, MC_VLAN_EN_MASK);
@@ -694,7 +713,7 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget)
reason = FIELD_GET(AIROHA_RXD4_PPE_CPU_REASON, msg1);
if (reason == PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED)
- airoha_ppe_check_skb(eth->ppe, hash);
+ airoha_ppe_check_skb(eth->ppe, q->skb, hash);
done++;
napi_gro_receive(&q->napi, q->skb);
@@ -1631,7 +1650,6 @@ static void airhoha_set_gdm2_loopback(struct airoha_gdm_port *port)
if (port->id == 3) {
/* FIXME: handle XSI_PCE1_PORT */
- airoha_fe_wr(eth, REG_PPE_DFT_CPORT0(0), 0x5500);
airoha_fe_rmw(eth, REG_FE_WAN_PORT,
WAN1_EN_MASK | WAN1_MASK | WAN0_MASK,
FIELD_PREP(WAN0_MASK, HSGMII_LAN_PCIE0_SRCPORT));
@@ -2109,6 +2127,125 @@ static int airoha_tc_setup_qdisc_ets(struct airoha_gdm_port *port,
}
}
+static int airoha_qdma_get_rl_param(struct airoha_qdma *qdma, int queue_id,
+ u32 addr, enum trtcm_param_type param,
+ u32 *val_low, u32 *val_high)
+{
+ u32 idx = QDMA_METER_IDX(queue_id), group = QDMA_METER_GROUP(queue_id);
+ u32 val, config = FIELD_PREP(RATE_LIMIT_PARAM_TYPE_MASK, param) |
+ FIELD_PREP(RATE_LIMIT_METER_GROUP_MASK, group) |
+ FIELD_PREP(RATE_LIMIT_PARAM_INDEX_MASK, idx);
+
+ airoha_qdma_wr(qdma, REG_TRTCM_CFG_PARAM(addr), config);
+ if (read_poll_timeout(airoha_qdma_rr, val,
+ val & RATE_LIMIT_PARAM_RW_DONE_MASK,
+ USEC_PER_MSEC, 10 * USEC_PER_MSEC, true, qdma,
+ REG_TRTCM_CFG_PARAM(addr)))
+ return -ETIMEDOUT;
+
+ *val_low = airoha_qdma_rr(qdma, REG_TRTCM_DATA_LOW(addr));
+ if (val_high)
+ *val_high = airoha_qdma_rr(qdma, REG_TRTCM_DATA_HIGH(addr));
+
+ return 0;
+}
+
+static int airoha_qdma_set_rl_param(struct airoha_qdma *qdma, int queue_id,
+ u32 addr, enum trtcm_param_type param,
+ u32 val)
+{
+ u32 idx = QDMA_METER_IDX(queue_id), group = QDMA_METER_GROUP(queue_id);
+ u32 config = RATE_LIMIT_PARAM_RW_MASK |
+ FIELD_PREP(RATE_LIMIT_PARAM_TYPE_MASK, param) |
+ FIELD_PREP(RATE_LIMIT_METER_GROUP_MASK, group) |
+ FIELD_PREP(RATE_LIMIT_PARAM_INDEX_MASK, idx);
+
+ airoha_qdma_wr(qdma, REG_TRTCM_DATA_LOW(addr), val);
+ airoha_qdma_wr(qdma, REG_TRTCM_CFG_PARAM(addr), config);
+
+ return read_poll_timeout(airoha_qdma_rr, val,
+ val & RATE_LIMIT_PARAM_RW_DONE_MASK,
+ USEC_PER_MSEC, 10 * USEC_PER_MSEC, true,
+ qdma, REG_TRTCM_CFG_PARAM(addr));
+}
+
+static int airoha_qdma_set_rl_config(struct airoha_qdma *qdma, int queue_id,
+ u32 addr, bool enable, u32 enable_mask)
+{
+ u32 val;
+ int err;
+
+ err = airoha_qdma_get_rl_param(qdma, queue_id, addr, TRTCM_MISC_MODE,
+ &val, NULL);
+ if (err)
+ return err;
+
+ val = enable ? val | enable_mask : val & ~enable_mask;
+
+ return airoha_qdma_set_rl_param(qdma, queue_id, addr, TRTCM_MISC_MODE,
+ val);
+}
+
+static int airoha_qdma_set_rl_token_bucket(struct airoha_qdma *qdma,
+ int queue_id, u32 rate_val,
+ u32 bucket_size)
+{
+ u32 val, config, tick, unit, rate, rate_frac;
+ int err;
+
+ err = airoha_qdma_get_rl_param(qdma, queue_id, REG_INGRESS_TRTCM_CFG,
+ TRTCM_MISC_MODE, &config, NULL);
+ if (err)
+ return err;
+
+ val = airoha_qdma_rr(qdma, REG_INGRESS_TRTCM_CFG);
+ tick = FIELD_GET(INGRESS_FAST_TICK_MASK, val);
+ if (config & TRTCM_TICK_SEL)
+ tick *= FIELD_GET(INGRESS_SLOW_TICK_RATIO_MASK, val);
+ if (!tick)
+ return -EINVAL;
+
+ unit = (config & TRTCM_PKT_MODE) ? 1000000 / tick : 8000 / tick;
+ if (!unit)
+ return -EINVAL;
+
+ rate = rate_val / unit;
+ rate_frac = rate_val % unit;
+ rate_frac = FIELD_PREP(TRTCM_TOKEN_RATE_MASK, rate_frac) / unit;
+ rate = FIELD_PREP(TRTCM_TOKEN_RATE_MASK, rate) |
+ FIELD_PREP(TRTCM_TOKEN_RATE_FRACTION_MASK, rate_frac);
+
+ err = airoha_qdma_set_rl_param(qdma, queue_id, REG_INGRESS_TRTCM_CFG,
+ TRTCM_TOKEN_RATE_MODE, rate);
+ if (err)
+ return err;
+
+ val = bucket_size;
+ if (!(config & TRTCM_PKT_MODE))
+ val = max_t(u32, val, MIN_TOKEN_SIZE);
+ val = min_t(u32, __fls(val), MAX_TOKEN_SIZE_OFFSET);
+
+ return airoha_qdma_set_rl_param(qdma, queue_id, REG_INGRESS_TRTCM_CFG,
+ TRTCM_BUCKETSIZE_SHIFT_MODE, val);
+}
+
+static int airoha_qdma_init_rl_config(struct airoha_qdma *qdma, int queue_id,
+ bool enable, enum trtcm_unit_type unit)
+{
+ bool tick_sel = queue_id == 0 || queue_id == 2 || queue_id == 8;
+ enum trtcm_param mode = TRTCM_METER_MODE;
+ int err;
+
+ mode |= unit == TRTCM_PACKET_UNIT ? TRTCM_PKT_MODE : 0;
+ err = airoha_qdma_set_rl_config(qdma, queue_id, REG_INGRESS_TRTCM_CFG,
+ enable, mode);
+ if (err)
+ return err;
+
+ return airoha_qdma_set_rl_config(qdma, queue_id, REG_INGRESS_TRTCM_CFG,
+ tick_sel, TRTCM_TICK_SEL);
+}
+
static int airoha_qdma_get_trtcm_param(struct airoha_qdma *qdma, int channel,
u32 addr, enum trtcm_param_type param,
enum trtcm_mode_type mode,
@@ -2273,10 +2410,142 @@ static int airoha_tc_htb_alloc_leaf_queue(struct airoha_gdm_port *port,
return 0;
}
+static int airoha_qdma_set_rx_meter(struct airoha_gdm_port *port,
+ u32 rate, u32 bucket_size,
+ enum trtcm_unit_type unit_type)
+{
+ struct airoha_qdma *qdma = port->qdma;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) {
+ int err;
+
+ if (!qdma->q_rx[i].ndesc)
+ continue;
+
+ err = airoha_qdma_init_rl_config(qdma, i, !!rate, unit_type);
+ if (err)
+ return err;
+
+ err = airoha_qdma_set_rl_token_bucket(qdma, i, rate,
+ bucket_size);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int airoha_tc_matchall_act_validate(struct tc_cls_matchall_offload *f)
+{
+ const struct flow_action *actions = &f->rule->action;
+ const struct flow_action_entry *act;
+
+ if (!flow_action_has_entries(actions)) {
+ NL_SET_ERR_MSG_MOD(f->common.extack,
+ "filter run with no actions");
+ return -EINVAL;
+ }
+
+ if (!flow_offload_has_one_action(actions)) {
+ NL_SET_ERR_MSG_MOD(f->common.extack,
+ "only once action per filter is supported");
+ return -EOPNOTSUPP;
+ }
+
+ act = &actions->entries[0];
+ if (act->id != FLOW_ACTION_POLICE) {
+ NL_SET_ERR_MSG_MOD(f->common.extack, "unsupported action");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
+ NL_SET_ERR_MSG_MOD(f->common.extack,
+ "invalid exceed action id");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) {
+ NL_SET_ERR_MSG_MOD(f->common.extack,
+ "invalid notexceed action id");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
+ !flow_action_is_last_entry(actions, act)) {
+ NL_SET_ERR_MSG_MOD(f->common.extack,
+ "action accept must be last");
+ return -EOPNOTSUPP;
+ }
+
+ if (act->police.peakrate_bytes_ps || act->police.avrate ||
+ act->police.overhead || act->police.mtu) {
+ NL_SET_ERR_MSG_MOD(f->common.extack,
+ "peakrate/avrate/overhead/mtu unsupported");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int airoha_dev_tc_matchall(struct net_device *dev,
+ struct tc_cls_matchall_offload *f)
+{
+ enum trtcm_unit_type unit_type = TRTCM_BYTE_UNIT;
+ struct airoha_gdm_port *port = netdev_priv(dev);
+ u32 rate = 0, bucket_size = 0;
+
+ switch (f->command) {
+ case TC_CLSMATCHALL_REPLACE: {
+ const struct flow_action_entry *act;
+ int err;
+
+ err = airoha_tc_matchall_act_validate(f);
+ if (err)
+ return err;
+
+ act = &f->rule->action.entries[0];
+ if (act->police.rate_pkt_ps) {
+ rate = act->police.rate_pkt_ps;
+ bucket_size = act->police.burst_pkt;
+ unit_type = TRTCM_PACKET_UNIT;
+ } else {
+ rate = div_u64(act->police.rate_bytes_ps, 1000);
+ rate = rate << 3; /* Kbps */
+ bucket_size = act->police.burst;
+ }
+ fallthrough;
+ }
+ case TC_CLSMATCHALL_DESTROY:
+ return airoha_qdma_set_rx_meter(port, rate, bucket_size,
+ unit_type);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int airoha_dev_setup_tc_block_cb(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
+{
+ struct net_device *dev = cb_priv;
+
+ if (!tc_can_offload(dev))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return airoha_ppe_setup_tc_block_cb(dev, type_data);
+ case TC_SETUP_CLSMATCHALL:
+ return airoha_dev_tc_matchall(dev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static int airoha_dev_setup_tc_block(struct airoha_gdm_port *port,
struct flow_block_offload *f)
{
- flow_setup_cb_t *cb = airoha_ppe_setup_tc_block_cb;
+ flow_setup_cb_t *cb = airoha_dev_setup_tc_block_cb;
static LIST_HEAD(block_cb_list);
struct flow_block_cb *block_cb;
diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h
index ec8908f904c6..da5371bcd147 100644
--- a/drivers/net/ethernet/airoha/airoha_eth.h
+++ b/drivers/net/ethernet/airoha/airoha_eth.h
@@ -127,6 +127,11 @@ enum tx_sched_mode {
TC_SCH_WRR2,
};
+enum trtcm_unit_type {
+ TRTCM_BYTE_UNIT,
+ TRTCM_PACKET_UNIT,
+};
+
enum trtcm_param_type {
TRTCM_MISC_MODE, /* meter_en, pps_mode, tick_sel */
TRTCM_TOKEN_RATE_MODE,
@@ -422,12 +427,27 @@ struct airoha_flow_data {
} pppoe;
};
+enum airoha_flow_entry_type {
+ FLOW_TYPE_L4,
+ FLOW_TYPE_L2,
+ FLOW_TYPE_L2_SUBFLOW,
+};
+
struct airoha_flow_table_entry {
- struct hlist_node list;
+ union {
+ struct hlist_node list; /* PPE L3 flow entry */
+ struct {
+ struct rhash_head l2_node; /* L2 flow entry */
+ struct hlist_head l2_flows; /* PPE L2 subflows list */
+ };
+ };
struct airoha_foe_entry data;
+ struct hlist_node l2_subflow_node; /* PPE L2 subflow entry */
u32 hash;
+ enum airoha_flow_entry_type type;
+
struct rhash_head node;
unsigned long cookie;
};
@@ -480,6 +500,8 @@ struct airoha_ppe {
void *foe;
dma_addr_t foe_dma;
+ struct rhashtable l2_flows;
+
struct hlist_head *foe_flow;
u16 foe_check_time[PPE_NUM_ENTRIES];
@@ -535,9 +557,9 @@ u32 airoha_rmw(void __iomem *base, u32 offset, u32 mask, u32 val);
bool airoha_is_valid_gdm_port(struct airoha_eth *eth,
struct airoha_gdm_port *port);
-void airoha_ppe_check_skb(struct airoha_ppe *ppe, u16 hash);
-int airoha_ppe_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
- void *cb_priv);
+void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb,
+ u16 hash);
+int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data);
int airoha_ppe_init(struct airoha_eth *eth);
void airoha_ppe_deinit(struct airoha_eth *eth);
struct airoha_foe_entry *airoha_ppe_foe_get_entry(struct airoha_ppe *ppe,
diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c
index f10dab935cab..6e9787c2843b 100644
--- a/drivers/net/ethernet/airoha/airoha_ppe.c
+++ b/drivers/net/ethernet/airoha/airoha_ppe.c
@@ -24,6 +24,13 @@ static const struct rhashtable_params airoha_flow_table_params = {
.automatic_shrinking = true,
};
+static const struct rhashtable_params airoha_l2_flow_table_params = {
+ .head_offset = offsetof(struct airoha_flow_table_entry, l2_node),
+ .key_offset = offsetof(struct airoha_flow_table_entry, data.bridge),
+ .key_len = 2 * ETH_ALEN,
+ .automatic_shrinking = true,
+};
+
static bool airoha_ppe2_is_enabled(struct airoha_eth *eth)
{
return airoha_fe_rr(eth, REG_PPE_GLO_CFG(1)) & PPE_GLO_CFG_EN_MASK;
@@ -197,6 +204,15 @@ static int airoha_get_dsa_port(struct net_device **dev)
#endif
}
+static void airoha_ppe_foe_set_bridge_addrs(struct airoha_foe_bridge *br,
+ struct ethhdr *eh)
+{
+ br->dest_mac_hi = get_unaligned_be32(eh->h_dest);
+ br->dest_mac_lo = get_unaligned_be16(eh->h_dest + 4);
+ br->src_mac_hi = get_unaligned_be16(eh->h_source);
+ br->src_mac_lo = get_unaligned_be32(eh->h_source + 2);
+}
+
static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
struct airoha_foe_entry *hwe,
struct net_device *dev, int type,
@@ -247,13 +263,7 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth,
qdata = FIELD_PREP(AIROHA_FOE_SHAPER_ID, 0x7f);
if (type == PPE_PKT_TYPE_BRIDGE) {
- hwe->bridge.dest_mac_hi = get_unaligned_be32(data->eth.h_dest);
- hwe->bridge.dest_mac_lo =
- get_unaligned_be16(data->eth.h_dest + 4);
- hwe->bridge.src_mac_hi =
- get_unaligned_be16(data->eth.h_source);
- hwe->bridge.src_mac_lo =
- get_unaligned_be32(data->eth.h_source + 2);
+ airoha_ppe_foe_set_bridge_addrs(&hwe->bridge, &data->eth);
hwe->bridge.data = qdata;
hwe->bridge.ib2 = val;
l2 = &hwe->bridge.l2.common;
@@ -378,6 +388,19 @@ static u32 airoha_ppe_foe_get_entry_hash(struct airoha_foe_entry *hwe)
hv3 = hwe->ipv6.src_ip[1] ^ hwe->ipv6.dest_ip[1];
hv3 ^= hwe->ipv6.src_ip[0];
break;
+ case PPE_PKT_TYPE_BRIDGE: {
+ struct airoha_foe_mac_info *l2 = &hwe->bridge.l2;
+
+ hv1 = l2->common.src_mac_hi & 0xffff;
+ hv1 = hv1 << 16 | l2->src_mac_lo;
+
+ hv2 = l2->common.dest_mac_lo;
+ hv2 = hv2 << 16;
+ hv2 = hv2 | ((l2->common.src_mac_hi & 0xffff0000) >> 16);
+
+ hv3 = l2->common.dest_mac_hi;
+ break;
+ }
case PPE_PKT_TYPE_IPV4_DSLITE:
case PPE_PKT_TYPE_IPV6_6RD:
default:
@@ -476,10 +499,102 @@ static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe,
return 0;
}
-static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, u32 hash)
+static void airoha_ppe_foe_remove_flow(struct airoha_ppe *ppe,
+ struct airoha_flow_table_entry *e)
+{
+ lockdep_assert_held(&ppe_lock);
+
+ hlist_del_init(&e->list);
+ if (e->hash != 0xffff) {
+ e->data.ib1 &= ~AIROHA_FOE_IB1_BIND_STATE;
+ e->data.ib1 |= FIELD_PREP(AIROHA_FOE_IB1_BIND_STATE,
+ AIROHA_FOE_STATE_INVALID);
+ airoha_ppe_foe_commit_entry(ppe, &e->data, e->hash);
+ e->hash = 0xffff;
+ }
+ if (e->type == FLOW_TYPE_L2_SUBFLOW) {
+ hlist_del_init(&e->l2_subflow_node);
+ kfree(e);
+ }
+}
+
+static void airoha_ppe_foe_remove_l2_flow(struct airoha_ppe *ppe,
+ struct airoha_flow_table_entry *e)
+{
+ struct hlist_head *head = &e->l2_flows;
+ struct hlist_node *n;
+
+ lockdep_assert_held(&ppe_lock);
+
+ rhashtable_remove_fast(&ppe->l2_flows, &e->l2_node,
+ airoha_l2_flow_table_params);
+ hlist_for_each_entry_safe(e, n, head, l2_subflow_node)
+ airoha_ppe_foe_remove_flow(ppe, e);
+}
+
+static void airoha_ppe_foe_flow_remove_entry(struct airoha_ppe *ppe,
+ struct airoha_flow_table_entry *e)
+{
+ spin_lock_bh(&ppe_lock);
+
+ if (e->type == FLOW_TYPE_L2)
+ airoha_ppe_foe_remove_l2_flow(ppe, e);
+ else
+ airoha_ppe_foe_remove_flow(ppe, e);
+
+ spin_unlock_bh(&ppe_lock);
+}
+
+static int
+airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe,
+ struct airoha_flow_table_entry *e,
+ u32 hash)
+{
+ u32 mask = AIROHA_FOE_IB1_BIND_PACKET_TYPE | AIROHA_FOE_IB1_BIND_UDP;
+ struct airoha_foe_entry *hwe_p, hwe;
+ struct airoha_flow_table_entry *f;
+ struct airoha_foe_mac_info *l2;
+ int type;
+
+ hwe_p = airoha_ppe_foe_get_entry(ppe, hash);
+ if (!hwe_p)
+ return -EINVAL;
+
+ f = kzalloc(sizeof(*f), GFP_ATOMIC);
+ if (!f)
+ return -ENOMEM;
+
+ hlist_add_head(&f->l2_subflow_node, &e->l2_flows);
+ f->type = FLOW_TYPE_L2_SUBFLOW;
+ f->hash = hash;
+
+ memcpy(&hwe, hwe_p, sizeof(*hwe_p));
+ hwe.ib1 = (hwe.ib1 & mask) | (e->data.ib1 & ~mask);
+ l2 = &hwe.bridge.l2;
+ memcpy(l2, &e->data.bridge.l2, sizeof(*l2));
+
+ type = FIELD_GET(AIROHA_FOE_IB1_BIND_PACKET_TYPE, hwe.ib1);
+ if (type == PPE_PKT_TYPE_IPV4_HNAPT)
+ memcpy(&hwe.ipv4.new_tuple, &hwe.ipv4.orig_tuple,
+ sizeof(hwe.ipv4.new_tuple));
+ else if (type >= PPE_PKT_TYPE_IPV6_ROUTE_3T &&
+ l2->common.etype == ETH_P_IP)
+ l2->common.etype = ETH_P_IPV6;
+
+ hwe.bridge.ib2 = e->data.bridge.ib2;
+ airoha_ppe_foe_commit_entry(ppe, &hwe, hash);
+
+ return 0;
+}
+
+static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe,
+ struct sk_buff *skb,
+ u32 hash)
{
struct airoha_flow_table_entry *e;
+ struct airoha_foe_bridge br = {};
struct airoha_foe_entry *hwe;
+ bool commit_done = false;
struct hlist_node *n;
u32 index, state;
@@ -495,21 +610,68 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, u32 hash)
index = airoha_ppe_foe_get_entry_hash(hwe);
hlist_for_each_entry_safe(e, n, &ppe->foe_flow[index], list) {
- if (airoha_ppe_foe_compare_entry(e, hwe)) {
- airoha_ppe_foe_commit_entry(ppe, &e->data, hash);
- e->hash = hash;
- break;
+ if (e->type == FLOW_TYPE_L2_SUBFLOW) {
+ state = FIELD_GET(AIROHA_FOE_IB1_BIND_STATE, hwe->ib1);
+ if (state != AIROHA_FOE_STATE_BIND) {
+ e->hash = 0xffff;
+ airoha_ppe_foe_remove_flow(ppe, e);
+ }
+ continue;
}
+
+ if (commit_done || !airoha_ppe_foe_compare_entry(e, hwe)) {
+ e->hash = 0xffff;
+ continue;
+ }
+
+ airoha_ppe_foe_commit_entry(ppe, &e->data, hash);
+ commit_done = true;
+ e->hash = hash;
}
+
+ if (commit_done)
+ goto unlock;
+
+ airoha_ppe_foe_set_bridge_addrs(&br, eth_hdr(skb));
+ e = rhashtable_lookup_fast(&ppe->l2_flows, &br,
+ airoha_l2_flow_table_params);
+ if (e)
+ airoha_ppe_foe_commit_subflow_entry(ppe, e, hash);
unlock:
spin_unlock_bh(&ppe_lock);
}
+static int
+airoha_ppe_foe_l2_flow_commit_entry(struct airoha_ppe *ppe,
+ struct airoha_flow_table_entry *e)
+{
+ struct airoha_flow_table_entry *prev;
+
+ e->type = FLOW_TYPE_L2;
+ prev = rhashtable_lookup_get_insert_fast(&ppe->l2_flows, &e->l2_node,
+ airoha_l2_flow_table_params);
+ if (!prev)
+ return 0;
+
+ if (IS_ERR(prev))
+ return PTR_ERR(prev);
+
+ return rhashtable_replace_fast(&ppe->l2_flows, &prev->l2_node,
+ &e->l2_node,
+ airoha_l2_flow_table_params);
+}
+
static int airoha_ppe_foe_flow_commit_entry(struct airoha_ppe *ppe,
struct airoha_flow_table_entry *e)
{
- u32 hash = airoha_ppe_foe_get_entry_hash(&e->data);
+ int type = FIELD_GET(AIROHA_FOE_IB1_BIND_PACKET_TYPE, e->data.ib1);
+ u32 hash;
+
+ if (type == PPE_PKT_TYPE_BRIDGE)
+ return airoha_ppe_foe_l2_flow_commit_entry(ppe, e);
+ hash = airoha_ppe_foe_get_entry_hash(&e->data);
+ e->type = FLOW_TYPE_L4;
e->hash = 0xffff;
spin_lock_bh(&ppe_lock);
@@ -519,23 +681,6 @@ static int airoha_ppe_foe_flow_commit_entry(struct airoha_ppe *ppe,
return 0;
}
-static void airoha_ppe_foe_flow_remove_entry(struct airoha_ppe *ppe,
- struct airoha_flow_table_entry *e)
-{
- spin_lock_bh(&ppe_lock);
-
- hlist_del_init(&e->list);
- if (e->hash != 0xffff) {
- e->data.ib1 &= ~AIROHA_FOE_IB1_BIND_STATE;
- e->data.ib1 |= FIELD_PREP(AIROHA_FOE_IB1_BIND_STATE,
- AIROHA_FOE_STATE_INVALID);
- airoha_ppe_foe_commit_entry(ppe, &e->data, e->hash);
- e->hash = 0xffff;
- }
-
- spin_unlock_bh(&ppe_lock);
-}
-
static int airoha_ppe_flow_offload_replace(struct airoha_gdm_port *port,
struct flow_cls_offload *f)
{
@@ -822,18 +967,13 @@ error_npu_put:
return err;
}
-int airoha_ppe_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
- void *cb_priv)
+int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data)
{
- struct flow_cls_offload *cls = type_data;
- struct net_device *dev = cb_priv;
struct airoha_gdm_port *port = netdev_priv(dev);
+ struct flow_cls_offload *cls = type_data;
struct airoha_eth *eth = port->qdma->eth;
int err = 0;
- if (!tc_can_offload(dev) || type != TC_SETUP_CLSFLOWER)
- return -EOPNOTSUPP;
-
mutex_lock(&flow_offload_mutex);
if (!eth->npu)
@@ -846,7 +986,8 @@ int airoha_ppe_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
return err;
}
-void airoha_ppe_check_skb(struct airoha_ppe *ppe, u16 hash)
+void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb,
+ u16 hash)
{
u16 now, diff;
@@ -859,7 +1000,7 @@ void airoha_ppe_check_skb(struct airoha_ppe *ppe, u16 hash)
return;
ppe->foe_check_time[hash] = now;
- airoha_ppe_foe_insert_entry(ppe, hash);
+ airoha_ppe_foe_insert_entry(ppe, skb, hash);
}
int airoha_ppe_init(struct airoha_eth *eth)
@@ -890,9 +1031,20 @@ int airoha_ppe_init(struct airoha_eth *eth)
if (err)
return err;
+ err = rhashtable_init(&ppe->l2_flows, &airoha_l2_flow_table_params);
+ if (err)
+ goto error_flow_table_destroy;
+
err = airoha_ppe_debugfs_init(ppe);
if (err)
- rhashtable_destroy(&eth->flow_table);
+ goto error_l2_flow_table_destroy;
+
+ return 0;
+
+error_l2_flow_table_destroy:
+ rhashtable_destroy(&ppe->l2_flows);
+error_flow_table_destroy:
+ rhashtable_destroy(&eth->flow_table);
return err;
}
@@ -909,6 +1061,7 @@ void airoha_ppe_deinit(struct airoha_eth *eth)
}
rcu_read_unlock();
+ rhashtable_destroy(&eth->ppe->l2_flows);
rhashtable_destroy(&eth->flow_table);
debugfs_remove(eth->ppe->debugfs_dir);
}
diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h
index 8146cde4e8ba..29c8f046b991 100644
--- a/drivers/net/ethernet/airoha/airoha_regs.h
+++ b/drivers/net/ethernet/airoha/airoha_regs.h
@@ -283,6 +283,7 @@
#define PPE_HASH_SEED 0x12345678
#define REG_PPE_DFT_CPORT0(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x248)
+#define DFT_CPORT_MASK(_n) GENMASK(3 + ((_n) << 2), ((_n) << 2))
#define REG_PPE_DFT_CPORT1(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x24c)
@@ -691,6 +692,12 @@
#define REG_TRTCM_DATA_LOW(_n) ((_n) + 0x8)
#define REG_TRTCM_DATA_HIGH(_n) ((_n) + 0xc)
+#define RATE_LIMIT_PARAM_RW_MASK BIT(31)
+#define RATE_LIMIT_PARAM_RW_DONE_MASK BIT(30)
+#define RATE_LIMIT_PARAM_TYPE_MASK GENMASK(29, 28)
+#define RATE_LIMIT_METER_GROUP_MASK GENMASK(27, 26)
+#define RATE_LIMIT_PARAM_INDEX_MASK GENMASK(23, 16)
+
#define REG_TXWRR_MODE_CFG 0x1020
#define TWRR_WEIGHT_SCALE_MASK BIT(31)
#define TWRR_WEIGHT_BASE_MASK BIT(3)
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 897720fdf5d8..7d9ee9867a40 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1777,7 +1777,7 @@ static void ena_init_napi_in_range(struct ena_adapter *adapter,
if (ENA_IS_XDP_INDEX(adapter, i))
napi_handler = ena_xdp_io_poll;
- netif_napi_add(adapter->netdev, &napi->napi, napi_handler);
+ netif_napi_add_config(adapter->netdev, &napi->napi, napi_handler, i);
if (!ENA_IS_XDP_INDEX(adapter, i))
napi->rx_ring = rx_ring;
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index 3b70f6737633..e3d33f5b9642 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#ifndef __XGBE_COMMON_H__
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c b/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c
index c68ace804e37..1474df5544fa 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dcb.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include <linux/netdevice.h>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
index b35808d3d07f..d9157c4acde9 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2014 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2014 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include <linux/debugfs.h>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
index 230726d7b74f..7dba849bcd6b 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-desc.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2014 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2014 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include "xgbe.h"
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index f393228d41c7..b51a3666dddb 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include <linux/phy.h>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index d84a310dfcd4..0697eed1cb72 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include <linux/module.h>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 4431ab1c18b3..be0d2c7d08dc 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include <linux/spinlock.h>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
index 7a833894f52a..d40011e8ddf2 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2016 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include <linux/module.h>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index 0e8698928e4d..4ebdd123c435 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include <linux/module.h>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 07f4f3418d01..71449edbb76d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include <linux/interrupt.h>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
index c636999a6a84..3e9f31256dce 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2016 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include <linux/module.h>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c
index d16eae415f72..2e6b8ffe785c 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2016 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include <linux/module.h>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
index 268399dfcf22..7a4dfa4e19c7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2016 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include <linux/module.h>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
index 4365bd62942c..47d53e59ccf6 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include <linux/module.h>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
index 7051bd7cf6dc..978c4dd01fa0 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2014 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2014 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#include <linux/clk.h>
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index d85386cac8d1..e5f5104342aa 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -1,117 +1,8 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause)
/*
- * AMD 10Gb Ethernet driver
- *
- * This file is available to you under your choice of the following two
- * licenses:
- *
- * License 1: GPLv2
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- *
- * This file is free software; you may copy, redistribute and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or (at
- * your option) any later version.
- *
- * This file is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * License 2: Modified BSD
- *
- * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Advanced Micro Devices, Inc. nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
- * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * This file incorporates work covered by the following copyright and
- * permission notice:
- * The Synopsys DWC ETHER XGMAC Software Driver and documentation
- * (hereinafter "Software") is an unsupported proprietary work of Synopsys,
- * Inc. unless otherwise expressly agreed to in writing between Synopsys
- * and you.
- *
- * The Software IS NOT an item of Licensed Software or Licensed Product
- * under any End User Software License Agreement or Agreement for Licensed
- * Product with Synopsys or any supplement thereto. Permission is hereby
- * granted, free of charge, to any person obtaining a copy of this software
- * annotated with this license and the Software, to deal in the Software
- * without restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished
- * to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
- * BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
- * PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
+ * Copyright (c) 2014-2025, Advanced Micro Devices, Inc.
+ * Copyright (c) 2014, Synopsys, Inc.
+ * All rights reserved
*/
#ifndef __XGBE_H__
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index c8e3468eee61..3d9205b9a8c3 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -893,9 +893,9 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
bnapi->events &= ~BNXT_TX_CMP_EVENT;
}
-static bool bnxt_separate_head_pool(void)
+static bool bnxt_separate_head_pool(struct bnxt_rx_ring_info *rxr)
{
- return PAGE_SIZE > BNXT_RX_PAGE_SIZE;
+ return rxr->need_head_pool || PAGE_SIZE > BNXT_RX_PAGE_SIZE;
}
static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
@@ -919,6 +919,20 @@ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
return page;
}
+static netmem_ref __bnxt_alloc_rx_netmem(struct bnxt *bp, dma_addr_t *mapping,
+ struct bnxt_rx_ring_info *rxr,
+ gfp_t gfp)
+{
+ netmem_ref netmem;
+
+ netmem = page_pool_alloc_netmems(rxr->page_pool, gfp);
+ if (!netmem)
+ return 0;
+
+ *mapping = page_pool_get_dma_addr_netmem(netmem);
+ return netmem;
+}
+
static inline u8 *__bnxt_alloc_rx_frag(struct bnxt *bp, dma_addr_t *mapping,
struct bnxt_rx_ring_info *rxr,
gfp_t gfp)
@@ -999,21 +1013,19 @@ static inline u16 bnxt_find_next_agg_idx(struct bnxt_rx_ring_info *rxr, u16 idx)
return next;
}
-static inline int bnxt_alloc_rx_page(struct bnxt *bp,
- struct bnxt_rx_ring_info *rxr,
- u16 prod, gfp_t gfp)
+static int bnxt_alloc_rx_netmem(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+ u16 prod, gfp_t gfp)
{
struct rx_bd *rxbd =
&rxr->rx_agg_desc_ring[RX_AGG_RING(bp, prod)][RX_IDX(prod)];
struct bnxt_sw_rx_agg_bd *rx_agg_buf;
- struct page *page;
- dma_addr_t mapping;
u16 sw_prod = rxr->rx_sw_agg_prod;
unsigned int offset = 0;
+ dma_addr_t mapping;
+ netmem_ref netmem;
- page = __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp);
-
- if (!page)
+ netmem = __bnxt_alloc_rx_netmem(bp, &mapping, rxr, gfp);
+ if (!netmem)
return -ENOMEM;
if (unlikely(test_bit(sw_prod, rxr->rx_agg_bmap)))
@@ -1023,7 +1035,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp,
rx_agg_buf = &rxr->rx_agg_ring[sw_prod];
rxr->rx_sw_agg_prod = RING_RX_AGG(bp, NEXT_RX_AGG(sw_prod));
- rx_agg_buf->page = page;
+ rx_agg_buf->netmem = netmem;
rx_agg_buf->offset = offset;
rx_agg_buf->mapping = mapping;
rxbd->rx_bd_haddr = cpu_to_le64(mapping);
@@ -1067,11 +1079,11 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx,
p5_tpa = true;
for (i = 0; i < agg_bufs; i++) {
- u16 cons;
- struct rx_agg_cmp *agg;
struct bnxt_sw_rx_agg_bd *cons_rx_buf, *prod_rx_buf;
+ struct rx_agg_cmp *agg;
struct rx_bd *prod_bd;
- struct page *page;
+ netmem_ref netmem;
+ u16 cons;
if (p5_tpa)
agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, start + i);
@@ -1088,11 +1100,11 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx,
cons_rx_buf = &rxr->rx_agg_ring[cons];
/* It is possible for sw_prod to be equal to cons, so
- * set cons_rx_buf->page to NULL first.
+ * set cons_rx_buf->netmem to 0 first.
*/
- page = cons_rx_buf->page;
- cons_rx_buf->page = NULL;
- prod_rx_buf->page = page;
+ netmem = cons_rx_buf->netmem;
+ cons_rx_buf->netmem = 0;
+ prod_rx_buf->netmem = netmem;
prod_rx_buf->offset = cons_rx_buf->offset;
prod_rx_buf->mapping = cons_rx_buf->mapping;
@@ -1218,29 +1230,35 @@ static struct sk_buff *bnxt_rx_skb(struct bnxt *bp,
return skb;
}
-static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
- struct bnxt_cp_ring_info *cpr,
- struct skb_shared_info *shinfo,
- u16 idx, u32 agg_bufs, bool tpa,
- struct xdp_buff *xdp)
+static u32 __bnxt_rx_agg_netmems(struct bnxt *bp,
+ struct bnxt_cp_ring_info *cpr,
+ u16 idx, u32 agg_bufs, bool tpa,
+ struct sk_buff *skb,
+ struct xdp_buff *xdp)
{
struct bnxt_napi *bnapi = cpr->bnapi;
- struct pci_dev *pdev = bp->pdev;
- struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
- u16 prod = rxr->rx_agg_prod;
+ struct skb_shared_info *shinfo;
+ struct bnxt_rx_ring_info *rxr;
u32 i, total_frag_len = 0;
bool p5_tpa = false;
+ u16 prod;
+
+ rxr = bnapi->rx_ring;
+ prod = rxr->rx_agg_prod;
if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && tpa)
p5_tpa = true;
+ if (skb)
+ shinfo = skb_shinfo(skb);
+ else
+ shinfo = xdp_get_shared_info_from_buff(xdp);
+
for (i = 0; i < agg_bufs; i++) {
- skb_frag_t *frag = &shinfo->frags[i];
- u16 cons, frag_len;
- struct rx_agg_cmp *agg;
struct bnxt_sw_rx_agg_bd *cons_rx_buf;
- struct page *page;
- dma_addr_t mapping;
+ struct rx_agg_cmp *agg;
+ u16 cons, frag_len;
+ netmem_ref netmem;
if (p5_tpa)
agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, i);
@@ -1251,27 +1269,41 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT;
cons_rx_buf = &rxr->rx_agg_ring[cons];
- skb_frag_fill_page_desc(frag, cons_rx_buf->page,
- cons_rx_buf->offset, frag_len);
- shinfo->nr_frags = i + 1;
+ if (skb) {
+ skb_add_rx_frag_netmem(skb, i, cons_rx_buf->netmem,
+ cons_rx_buf->offset,
+ frag_len, BNXT_RX_PAGE_SIZE);
+ } else {
+ skb_frag_t *frag = &shinfo->frags[i];
+
+ skb_frag_fill_netmem_desc(frag, cons_rx_buf->netmem,
+ cons_rx_buf->offset,
+ frag_len);
+ shinfo->nr_frags = i + 1;
+ }
__clear_bit(cons, rxr->rx_agg_bmap);
- /* It is possible for bnxt_alloc_rx_page() to allocate
+ /* It is possible for bnxt_alloc_rx_netmem() to allocate
* a sw_prod index that equals the cons index, so we
* need to clear the cons entry now.
*/
- mapping = cons_rx_buf->mapping;
- page = cons_rx_buf->page;
- cons_rx_buf->page = NULL;
+ netmem = cons_rx_buf->netmem;
+ cons_rx_buf->netmem = 0;
- if (xdp && page_is_pfmemalloc(page))
+ if (xdp && netmem_is_pfmemalloc(netmem))
xdp_buff_set_frag_pfmemalloc(xdp);
- if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_ATOMIC) != 0) {
+ if (bnxt_alloc_rx_netmem(bp, rxr, prod, GFP_ATOMIC) != 0) {
+ if (skb) {
+ skb->len -= frag_len;
+ skb->data_len -= frag_len;
+ skb->truesize -= BNXT_RX_PAGE_SIZE;
+ }
+
--shinfo->nr_frags;
- cons_rx_buf->page = page;
+ cons_rx_buf->netmem = netmem;
- /* Update prod since possibly some pages have been
+ /* Update prod since possibly some netmems have been
* allocated already.
*/
rxr->rx_agg_prod = prod;
@@ -1279,8 +1311,8 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
return 0;
}
- dma_sync_single_for_cpu(&pdev->dev, mapping, BNXT_RX_PAGE_SIZE,
- bp->rx_dir);
+ page_pool_dma_sync_netmem_for_cpu(rxr->page_pool, netmem, 0,
+ BNXT_RX_PAGE_SIZE);
total_frag_len += frag_len;
prod = NEXT_RX_AGG(prod);
@@ -1289,32 +1321,28 @@ static u32 __bnxt_rx_agg_pages(struct bnxt *bp,
return total_frag_len;
}
-static struct sk_buff *bnxt_rx_agg_pages_skb(struct bnxt *bp,
- struct bnxt_cp_ring_info *cpr,
- struct sk_buff *skb, u16 idx,
- u32 agg_bufs, bool tpa)
+static struct sk_buff *bnxt_rx_agg_netmems_skb(struct bnxt *bp,
+ struct bnxt_cp_ring_info *cpr,
+ struct sk_buff *skb, u16 idx,
+ u32 agg_bufs, bool tpa)
{
- struct skb_shared_info *shinfo = skb_shinfo(skb);
u32 total_frag_len = 0;
- total_frag_len = __bnxt_rx_agg_pages(bp, cpr, shinfo, idx,
- agg_bufs, tpa, NULL);
+ total_frag_len = __bnxt_rx_agg_netmems(bp, cpr, idx, agg_bufs, tpa,
+ skb, NULL);
if (!total_frag_len) {
skb_mark_for_recycle(skb);
dev_kfree_skb(skb);
return NULL;
}
- skb->data_len += total_frag_len;
- skb->len += total_frag_len;
- skb->truesize += BNXT_RX_PAGE_SIZE * agg_bufs;
return skb;
}
-static u32 bnxt_rx_agg_pages_xdp(struct bnxt *bp,
- struct bnxt_cp_ring_info *cpr,
- struct xdp_buff *xdp, u16 idx,
- u32 agg_bufs, bool tpa)
+static u32 bnxt_rx_agg_netmems_xdp(struct bnxt *bp,
+ struct bnxt_cp_ring_info *cpr,
+ struct xdp_buff *xdp, u16 idx,
+ u32 agg_bufs, bool tpa)
{
struct skb_shared_info *shinfo = xdp_get_shared_info_from_buff(xdp);
u32 total_frag_len = 0;
@@ -1322,8 +1350,8 @@ static u32 bnxt_rx_agg_pages_xdp(struct bnxt *bp,
if (!xdp_buff_has_frags(xdp))
shinfo->nr_frags = 0;
- total_frag_len = __bnxt_rx_agg_pages(bp, cpr, shinfo,
- idx, agg_bufs, tpa, xdp);
+ total_frag_len = __bnxt_rx_agg_netmems(bp, cpr, idx, agg_bufs, tpa,
+ NULL, xdp);
if (total_frag_len) {
xdp_buff_set_frags_flag(xdp);
shinfo->nr_frags = agg_bufs;
@@ -1895,7 +1923,8 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
}
if (agg_bufs) {
- skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, idx, agg_bufs, true);
+ skb = bnxt_rx_agg_netmems_skb(bp, cpr, skb, idx, agg_bufs,
+ true);
if (!skb) {
/* Page reuse already handled by bnxt_rx_pages(). */
cpr->sw_stats->rx.rx_oom_discards += 1;
@@ -2175,9 +2204,10 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
if (bnxt_xdp_attached(bp, rxr)) {
bnxt_xdp_buff_init(bp, rxr, cons, data_ptr, len, &xdp);
if (agg_bufs) {
- u32 frag_len = bnxt_rx_agg_pages_xdp(bp, cpr, &xdp,
- cp_cons, agg_bufs,
- false);
+ u32 frag_len = bnxt_rx_agg_netmems_xdp(bp, cpr, &xdp,
+ cp_cons,
+ agg_bufs,
+ false);
if (!frag_len)
goto oom_next_rx;
@@ -2229,7 +2259,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
if (agg_bufs) {
if (!xdp_active) {
- skb = bnxt_rx_agg_pages_skb(bp, cpr, skb, cp_cons, agg_bufs, false);
+ skb = bnxt_rx_agg_netmems_skb(bp, cpr, skb, cp_cons,
+ agg_bufs, false);
if (!skb)
goto oom_next_rx;
} else {
@@ -3445,15 +3476,15 @@ static void bnxt_free_one_rx_agg_ring(struct bnxt *bp, struct bnxt_rx_ring_info
for (i = 0; i < max_idx; i++) {
struct bnxt_sw_rx_agg_bd *rx_agg_buf = &rxr->rx_agg_ring[i];
- struct page *page = rx_agg_buf->page;
+ netmem_ref netmem = rx_agg_buf->netmem;
- if (!page)
+ if (!netmem)
continue;
- rx_agg_buf->page = NULL;
+ rx_agg_buf->netmem = 0;
__clear_bit(i, rxr->rx_agg_bmap);
- page_pool_recycle_direct(rxr->page_pool, page);
+ page_pool_recycle_direct_netmem(rxr->page_pool, netmem);
}
}
@@ -3746,7 +3777,7 @@ static void bnxt_free_rx_rings(struct bnxt *bp)
xdp_rxq_info_unreg(&rxr->xdp_rxq);
page_pool_destroy(rxr->page_pool);
- if (bnxt_separate_head_pool())
+ if (bnxt_separate_head_pool(rxr))
page_pool_destroy(rxr->head_pool);
rxr->page_pool = rxr->head_pool = NULL;
@@ -3777,15 +3808,19 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
pp.dev = &bp->pdev->dev;
pp.dma_dir = bp->rx_dir;
pp.max_len = PAGE_SIZE;
- pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
+ pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV |
+ PP_FLAG_ALLOW_UNREADABLE_NETMEM;
+ pp.queue_idx = rxr->bnapi->index;
pool = page_pool_create(&pp);
if (IS_ERR(pool))
return PTR_ERR(pool);
rxr->page_pool = pool;
- if (bnxt_separate_head_pool()) {
+ rxr->need_head_pool = page_pool_is_unreadable(pool);
+ if (bnxt_separate_head_pool(rxr)) {
pp.pool_size = max(bp->rx_ring_size, 1024);
+ pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
pool = page_pool_create(&pp);
if (IS_ERR(pool))
goto err_destroy_pp;
@@ -4197,6 +4232,8 @@ static void bnxt_reset_rx_ring_struct(struct bnxt *bp,
rxr->page_pool->p.napi = NULL;
rxr->page_pool = NULL;
+ rxr->head_pool->p.napi = NULL;
+ rxr->head_pool = NULL;
memset(&rxr->xdp_rxq, 0, sizeof(struct xdp_rxq_info));
ring = &rxr->rx_ring_struct;
@@ -4321,16 +4358,16 @@ static void bnxt_alloc_one_rx_ring_skb(struct bnxt *bp,
rxr->rx_prod = prod;
}
-static void bnxt_alloc_one_rx_ring_page(struct bnxt *bp,
- struct bnxt_rx_ring_info *rxr,
- int ring_nr)
+static void bnxt_alloc_one_rx_ring_netmem(struct bnxt *bp,
+ struct bnxt_rx_ring_info *rxr,
+ int ring_nr)
{
u32 prod;
int i;
prod = rxr->rx_agg_prod;
for (i = 0; i < bp->rx_agg_ring_size; i++) {
- if (bnxt_alloc_rx_page(bp, rxr, prod, GFP_KERNEL)) {
+ if (bnxt_alloc_rx_netmem(bp, rxr, prod, GFP_KERNEL)) {
netdev_warn(bp->dev, "init'ed rx ring %d with %d/%d pages only\n",
ring_nr, i, bp->rx_ring_size);
break;
@@ -4371,7 +4408,7 @@ static int bnxt_alloc_one_rx_ring(struct bnxt *bp, int ring_nr)
if (!(bp->flags & BNXT_FLAG_AGG_RINGS))
return 0;
- bnxt_alloc_one_rx_ring_page(bp, rxr, ring_nr);
+ bnxt_alloc_one_rx_ring_netmem(bp, rxr, ring_nr);
if (rxr->rx_tpa) {
rc = bnxt_alloc_one_tpa_info_data(bp, rxr);
@@ -15708,6 +15745,7 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
clone->rx_agg_prod = 0;
clone->rx_sw_agg_prod = 0;
clone->rx_next_cons = 0;
+ clone->need_head_pool = false;
rc = bnxt_alloc_rx_page_pool(bp, clone, rxr->page_pool->p.nid);
if (rc)
@@ -15750,7 +15788,7 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
bnxt_alloc_one_rx_ring_skb(bp, clone, idx);
if (bp->flags & BNXT_FLAG_AGG_RINGS)
- bnxt_alloc_one_rx_ring_page(bp, clone, idx);
+ bnxt_alloc_one_rx_ring_netmem(bp, clone, idx);
if (bp->flags & BNXT_FLAG_TPA)
bnxt_alloc_one_tpa_info_data(bp, clone);
@@ -15766,7 +15804,7 @@ err_rxq_info_unreg:
xdp_rxq_info_unreg(&clone->xdp_rxq);
err_page_pool_destroy:
page_pool_destroy(clone->page_pool);
- if (bnxt_separate_head_pool())
+ if (bnxt_separate_head_pool(clone))
page_pool_destroy(clone->head_pool);
clone->page_pool = NULL;
clone->head_pool = NULL;
@@ -15785,7 +15823,7 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem)
xdp_rxq_info_unreg(&rxr->xdp_rxq);
page_pool_destroy(rxr->page_pool);
- if (bnxt_separate_head_pool())
+ if (bnxt_separate_head_pool(rxr))
page_pool_destroy(rxr->head_pool);
rxr->page_pool = NULL;
rxr->head_pool = NULL;
@@ -15876,6 +15914,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx)
rxr->page_pool = clone->page_pool;
rxr->head_pool = clone->head_pool;
rxr->xdp_rxq = clone->xdp_rxq;
+ rxr->need_head_pool = clone->need_head_pool;
bnxt_copy_rx_ring(bp, rxr, clone);
@@ -15961,7 +16000,7 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx)
bnxt_hwrm_rx_ring_free(bp, rxr, false);
bnxt_hwrm_rx_agg_ring_free(bp, rxr, false);
page_pool_disable_direct_recycling(rxr->page_pool);
- if (bnxt_separate_head_pool())
+ if (bnxt_separate_head_pool(rxr))
page_pool_disable_direct_recycling(rxr->head_pool);
if (bp->flags & BNXT_FLAG_SHARED_RINGS)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 21726cf56586..868a2e5a5b02 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -903,7 +903,7 @@ struct bnxt_sw_rx_bd {
};
struct bnxt_sw_rx_agg_bd {
- struct page *page;
+ netmem_ref netmem;
unsigned int offset;
dma_addr_t mapping;
};
@@ -1106,6 +1106,7 @@ struct bnxt_rx_ring_info {
unsigned long *rx_agg_bmap;
u16 rx_agg_bmap_size;
+ bool need_head_pool;
dma_addr_t rx_desc_mapping[MAX_RX_PAGES];
dma_addr_t rx_agg_desc_mapping[MAX_RX_AGG_PAGES];
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index a03eff3d4425..50eb54ecf1ba 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -1735,7 +1735,7 @@ bnad_iocpf_sem_timeout(struct timer_list *t)
* Time CPU m CPU n
* 0 1 = test_bit
* 1 clear_bit
- * 2 del_timer_sync
+ * 2 timer_delete_sync
* 3 mod_timer
*/
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index 517a15904fb0..6a2004bbe87f 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -1144,6 +1144,7 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb,
struct gmac_txdesc *txd;
skb_frag_t *skb_frag;
dma_addr_t mapping;
+ bool tcp = false;
void *buffer;
u16 mss;
int ret;
@@ -1151,6 +1152,13 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb,
word1 = skb->len;
word3 = SOF_BIT;
+ /* Determine if we are doing TCP */
+ if (skb->protocol == htons(ETH_P_IP))
+ tcp = (ip_hdr(skb)->protocol == IPPROTO_TCP);
+ else
+ /* IPv6 */
+ tcp = (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP);
+
mss = skb_shinfo(skb)->gso_size;
if (mss) {
/* This means we are dealing with TCP and skb->len is the
@@ -1163,8 +1171,26 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb,
mss, skb->len);
word1 |= TSS_MTU_ENABLE_BIT;
word3 |= mss;
+ } else if (tcp) {
+ /* Even if we are not using TSO, use the hardware offloader
+ * for transferring the TCP frame: this hardware has partial
+ * TCP awareness (called TOE - TCP Offload Engine) and will
+ * according to the datasheet put packets belonging to the
+ * same TCP connection in the same queue for the TOE/TSO
+ * engine to process. The engine will deal with chopping
+ * up frames that exceed ETH_DATA_LEN which the
+ * checksumming engine cannot handle (see below) into
+ * manageable chunks. It flawlessly deals with quite big
+ * frames and frames containing custom DSA EtherTypes.
+ */
+ mss = netdev->mtu + skb_tcp_all_headers(skb);
+ mss = min(mss, skb->len);
+ netdev_dbg(netdev, "TOE/TSO len %04x mtu %04x mss %04x\n",
+ skb->len, netdev->mtu, mss);
+ word1 |= TSS_MTU_ENABLE_BIT;
+ word3 |= mss;
} else if (skb->len >= ETH_FRAME_LEN) {
- /* Hardware offloaded checksumming isn't working on frames
+ /* Hardware offloaded checksumming isn't working on non-TCP frames
* bigger than 1514 bytes. A hypothesis about this is that the
* checksum buffer is only 1518 bytes, so when the frames get
* bigger they get truncated, or the last few bytes get
@@ -1181,21 +1207,16 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb,
}
if (skb->ip_summed == CHECKSUM_PARTIAL) {
- int tcp = 0;
-
/* We do not switch off the checksumming on non TCP/UDP
* frames: as is shown from tests, the checksumming engine
* is smart enough to see that a frame is not actually TCP
* or UDP and then just pass it through without any changes
* to the frame.
*/
- if (skb->protocol == htons(ETH_P_IP)) {
+ if (skb->protocol == htons(ETH_P_IP))
word1 |= TSS_IP_CHKSUM_BIT;
- tcp = ip_hdr(skb)->protocol == IPPROTO_TCP;
- } else { /* IPv6 */
+ else
word1 |= TSS_IPV6_ENABLE_BIT;
- tcp = ipv6_hdr(skb)->nexthdr == IPPROTO_TCP;
- }
word1 |= tcp ? TSS_TCP_CHKSUM_BIT : TSS_UDP_CHKSUM_BIT;
}
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index c3791cf23c87..8aaac9101377 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -2185,7 +2185,7 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv)
xdp_features = 0;
}
- xdp_set_features_flag(priv->dev, xdp_features);
+ xdp_set_features_flag_locked(priv->dev, xdp_features);
}
static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index 1640d2f27833..b2adb40a5921 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -147,6 +147,8 @@ config IXGBE
depends on PCI
depends on PTP_1588_CLOCK_OPTIONAL
select MDIO
+ select NET_DEVLINK
+ select PLDMFW
select PHYLIB
help
This driver supports Intel(R) 10GbE PCI Express family of
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index ba9c19e6994c..952898151565 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -319,7 +319,7 @@ struct e1000_adapter {
u16 tx_ring_count;
u16 rx_ring_count;
- struct hwtstamp_config hwtstamp_config;
+ struct kernel_hwtstamp_config hwtstamp_config;
struct delayed_work systim_overflow_work;
struct sk_buff *tx_hwtstamp_skb;
unsigned long tx_hwtstamp_start;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 8ebcb6a7d608..e0f492a6723f 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -3574,6 +3574,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca)
* e1000e_config_hwtstamp - configure the hwtstamp registers and enable/disable
* @adapter: board private structure
* @config: timestamp configuration
+ * @extack: netlink extended ACK for error report
*
* Outgoing time stamping can be enabled and disabled. Play nice and
* disable it when requested, although it shouldn't cause any overhead
@@ -3587,7 +3588,8 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca)
* exception of "all V2 events regardless of level 2 or 4".
**/
static int e1000e_config_hwtstamp(struct e1000_adapter *adapter,
- struct hwtstamp_config *config)
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
{
struct e1000_hw *hw = &adapter->hw;
u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
@@ -3598,8 +3600,10 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter,
bool is_l2 = false;
u32 regval;
- if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP))
+ if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP)) {
+ NL_SET_ERR_MSG(extack, "No HW timestamp support");
return -EINVAL;
+ }
switch (config->tx_type) {
case HWTSTAMP_TX_OFF:
@@ -3608,6 +3612,7 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter,
case HWTSTAMP_TX_ON:
break;
default:
+ NL_SET_ERR_MSG(extack, "Unsupported TX HW timestamp type");
return -ERANGE;
}
@@ -3681,6 +3686,7 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter,
config->rx_filter = HWTSTAMP_FILTER_ALL;
break;
default:
+ NL_SET_ERR_MSG(extack, "Unsupported RX HW timestamp filter");
return -ERANGE;
}
@@ -3693,7 +3699,8 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter,
ew32(TSYNCTXCTL, regval);
if ((er32(TSYNCTXCTL) & E1000_TSYNCTXCTL_ENABLED) !=
(regval & E1000_TSYNCTXCTL_ENABLED)) {
- e_err("Timesync Tx Control register not set as expected\n");
+ NL_SET_ERR_MSG(extack,
+ "Timesync Tx Control register not set as expected");
return -EAGAIN;
}
@@ -3706,7 +3713,8 @@ static int e1000e_config_hwtstamp(struct e1000_adapter *adapter,
E1000_TSYNCRXCTL_TYPE_MASK)) !=
(regval & (E1000_TSYNCRXCTL_ENABLED |
E1000_TSYNCRXCTL_TYPE_MASK))) {
- e_err("Timesync Rx Control register not set as expected\n");
+ NL_SET_ERR_MSG(extack,
+ "Timesync Rx Control register not set as expected");
return -EAGAIN;
}
@@ -3901,6 +3909,7 @@ static void e1000e_systim_reset(struct e1000_adapter *adapter)
{
struct ptp_clock_info *info = &adapter->ptp_clock_info;
struct e1000_hw *hw = &adapter->hw;
+ struct netlink_ext_ack extack = {};
unsigned long flags;
u32 timinca;
s32 ret_val;
@@ -3932,7 +3941,12 @@ static void e1000e_systim_reset(struct e1000_adapter *adapter)
spin_unlock_irqrestore(&adapter->systim_lock, flags);
/* restore the previous hwtstamp configuration settings */
- e1000e_config_hwtstamp(adapter, &adapter->hwtstamp_config);
+ ret_val = e1000e_config_hwtstamp(adapter, &adapter->hwtstamp_config,
+ &extack);
+ if (ret_val) {
+ if (extack._msg)
+ e_err("%s\n", extack._msg);
+ }
}
/**
@@ -6079,8 +6093,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
return 0;
}
-static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
- int cmd)
+static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
struct mii_ioctl_data *data = if_mii(ifr);
@@ -6140,7 +6153,8 @@ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
/**
* e1000e_hwtstamp_set - control hardware time stamping
* @netdev: network interface device structure
- * @ifr: interface request
+ * @config: timestamp configuration
+ * @extack: netlink extended ACK report
*
* Outgoing time stamping can be enabled and disabled. Play nice and
* disable it when requested, although it shouldn't cause any overhead
@@ -6153,20 +6167,18 @@ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
* specified. Matching the kind of event packet is not supported, with the
* exception of "all V2 events regardless of level 2 or 4".
**/
-static int e1000e_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr)
+static int e1000e_hwtstamp_set(struct net_device *netdev,
+ struct kernel_hwtstamp_config *config,
+ struct netlink_ext_ack *extack)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
- struct hwtstamp_config config;
int ret_val;
- if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
- return -EFAULT;
-
- ret_val = e1000e_config_hwtstamp(adapter, &config);
+ ret_val = e1000e_config_hwtstamp(adapter, config, extack);
if (ret_val)
return ret_val;
- switch (config.rx_filter) {
+ switch (config->rx_filter) {
case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
case HWTSTAMP_FILTER_PTP_V2_SYNC:
@@ -6178,38 +6190,23 @@ static int e1000e_hwtstamp_set(struct net_device *netdev, struct ifreq *ifr)
* by hardware so notify the caller the requested packets plus
* some others are time stamped.
*/
- config.rx_filter = HWTSTAMP_FILTER_SOME;
+ config->rx_filter = HWTSTAMP_FILTER_SOME;
break;
default:
break;
}
- return copy_to_user(ifr->ifr_data, &config,
- sizeof(config)) ? -EFAULT : 0;
+ return 0;
}
-static int e1000e_hwtstamp_get(struct net_device *netdev, struct ifreq *ifr)
+static int e1000e_hwtstamp_get(struct net_device *netdev,
+ struct kernel_hwtstamp_config *kernel_config)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
- return copy_to_user(ifr->ifr_data, &adapter->hwtstamp_config,
- sizeof(adapter->hwtstamp_config)) ? -EFAULT : 0;
-}
+ *kernel_config = adapter->hwtstamp_config;
-static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
-{
- switch (cmd) {
- case SIOCGMIIPHY:
- case SIOCGMIIREG:
- case SIOCSMIIREG:
- return e1000_mii_ioctl(netdev, ifr, cmd);
- case SIOCSHWTSTAMP:
- return e1000e_hwtstamp_set(netdev, ifr);
- case SIOCGHWTSTAMP:
- return e1000e_hwtstamp_get(netdev, ifr);
- default:
- return -EOPNOTSUPP;
- }
+ return 0;
}
static int e1000_init_phy_wakeup(struct e1000_adapter *adapter, u32 wufc)
@@ -7346,9 +7343,11 @@ static const struct net_device_ops e1000e_netdev_ops = {
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = e1000_netpoll,
#endif
- .ndo_set_features = e1000_set_features,
- .ndo_fix_features = e1000_fix_features,
+ .ndo_set_features = e1000_set_features,
+ .ndo_fix_features = e1000_fix_features,
.ndo_features_check = passthru_features_check,
+ .ndo_hwtstamp_get = e1000e_hwtstamp_get,
+ .ndo_hwtstamp_set = e1000e_hwtstamp_set,
};
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 370b4bddee44..b11c35e307ca 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -817,10 +817,11 @@ int i40e_pf_reset(struct i40e_hw *hw)
void i40e_clear_hw(struct i40e_hw *hw)
{
u32 num_queues, base_queue;
- u32 num_pf_int;
- u32 num_vf_int;
+ s32 num_pf_int;
+ s32 num_vf_int;
u32 num_vfs;
- u32 i, j;
+ s32 i;
+ u32 j;
u32 val;
u32 eol = 0x7ff;
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index fd083647c14a..e42572ae7631 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -193,8 +193,6 @@
#define ice_pf_to_dev(pf) (&((pf)->pdev->dev))
-#define ice_pf_src_tmr_owned(pf) ((pf)->hw.func_caps.ts_func_info.src_tmr_owned)
-
enum ice_feature {
ICE_F_DSCP,
ICE_F_PHY_RCLK,
@@ -515,6 +513,7 @@ enum ice_pf_flags {
ICE_FLAG_MTU_CHANGED,
ICE_FLAG_GNSS, /* GNSS successfully initialized */
ICE_FLAG_DPLL, /* SyncE/PTP dplls initialized */
+ ICE_FLAG_LLDP_AQ_FLTR,
ICE_PF_FLAGS_NBITS /* must be last */
};
@@ -1045,4 +1044,62 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf)
}
extern const struct xdp_metadata_ops ice_xdp_md_ops;
+
+/**
+ * ice_is_dual - Check if given config is multi-NAC
+ * @hw: pointer to HW structure
+ *
+ * Return: true if the device is running in mutli-NAC (Network
+ * Acceleration Complex) configuration variant, false otherwise
+ * (always false for non-E825 devices).
+ */
+static inline bool ice_is_dual(struct ice_hw *hw)
+{
+ return hw->mac_type == ICE_MAC_GENERIC_3K_E825 &&
+ (hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_DUAL_M);
+}
+
+/**
+ * ice_is_primary - Check if given device belongs to the primary complex
+ * @hw: pointer to HW structure
+ *
+ * Check if given PF/HW is running on primary complex in multi-NAC
+ * configuration.
+ *
+ * Return: true if the device is dual, false otherwise (always true
+ * for non-E825 devices).
+ */
+static inline bool ice_is_primary(struct ice_hw *hw)
+{
+ return hw->mac_type != ICE_MAC_GENERIC_3K_E825 ||
+ !ice_is_dual(hw) ||
+ (hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_PRIMARY_M);
+}
+
+/**
+ * ice_pf_src_tmr_owned - Check if a primary timer is owned by PF
+ * @pf: pointer to PF structure
+ *
+ * Return: true if PF owns primary timer, false otherwise.
+ */
+static inline bool ice_pf_src_tmr_owned(struct ice_pf *pf)
+{
+ return pf->hw.func_caps.ts_func_info.src_tmr_owned &&
+ ice_is_primary(&pf->hw);
+}
+
+/**
+ * ice_get_primary_hw - Get pointer to primary ice_hw structure
+ * @pf: pointer to PF structure
+ *
+ * Return: A pointer to ice_hw structure with access to timesync
+ * register space.
+ */
+static inline struct ice_hw *ice_get_primary_hw(struct ice_pf *pf)
+{
+ if (!pf->adapter->ctrl_pf)
+ return &pf->hw;
+ else
+ return &pf->adapter->ctrl_pf->hw;
+}
#endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 59df31c2c83f..4fedf0181c4e 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1135,6 +1135,8 @@ int ice_init_hw(struct ice_hw *hw)
}
}
+ hw->lane_num = ice_get_phy_lane_number(hw);
+
return 0;
err_unroll_fltr_mgmt_struct:
ice_cleanup_fltr_mgmt_struct(hw);
@@ -3434,7 +3436,7 @@ int ice_aq_get_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port,
msg.msg_addr_low = lower_16_bits(reg_offset);
msg.msg_addr_high = receiver_id;
msg.opcode = ice_sbq_msg_rd;
- msg.dest_dev = rmn_0;
+ msg.dest_dev = ice_sbq_dev_phy_0;
err = ice_sbq_rw_reg(hw, &msg, flag);
if (err)
@@ -4082,10 +4084,12 @@ int ice_get_phy_lane_number(struct ice_hw *hw)
continue;
if (hw->pf_id == lport) {
+ if (hw->mac_type == ICE_MAC_GENERIC_3K_E825 &&
+ ice_is_dual(hw) && !ice_is_primary(hw))
+ lane += ICE_PORTS_PER_QUAD;
kfree(options);
return lane;
}
-
lport++;
}
@@ -6011,15 +6015,21 @@ bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw)
/**
* ice_lldp_fltr_add_remove - add or remove a LLDP Rx switch filter
* @hw: pointer to HW struct
- * @vsi_num: absolute HW index for VSI
+ * @vsi: VSI to add the filter to
* @add: boolean for if adding or removing a filter
+ *
+ * Return: 0 on success, -EOPNOTSUPP if the operation cannot be performed
+ * with this HW or VSI, otherwise an error corresponding to
+ * the AQ transaction result.
*/
-int
-ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add)
+int ice_lldp_fltr_add_remove(struct ice_hw *hw, struct ice_vsi *vsi, bool add)
{
struct ice_aqc_lldp_filter_ctrl *cmd;
struct ice_aq_desc desc;
+ if (vsi->type != ICE_VSI_PF || !ice_fw_supports_lldp_fltr_ctrl(hw))
+ return -EOPNOTSUPP;
+
cmd = &desc.params.lldp_filter_ctrl;
ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_lldp_filter_ctrl);
@@ -6029,7 +6039,7 @@ ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add)
else
cmd->cmd_flags = ICE_AQC_LLDP_FILTER_ACTION_DELETE;
- cmd->vsi_num = cpu_to_le16(vsi_num);
+ cmd->vsi_num = cpu_to_le16(vsi->vsi_num);
return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 9b00aa0ddf10..64c530b39191 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -290,8 +290,7 @@ int
ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
struct ice_sq_cd *cd);
bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw);
-int
-ice_lldp_fltr_add_remove(struct ice_hw *hw, u16 vsi_num, bool add);
+int ice_lldp_fltr_add_remove(struct ice_hw *hw, struct ice_vsi *vsi, bool add);
int ice_lldp_execute_pending_mib(struct ice_hw *hw);
int
ice_aq_read_i2c(struct ice_hw *hw, struct ice_aqc_link_topo_addr topo_addr,
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index a7c510832824..67988c7ab08e 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -846,7 +846,7 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
goto dcb_init_err;
}
- ice_cfg_sw_lldp(pf_vsi, false, true);
+ ice_cfg_sw_rx_lldp(pf, true);
pf->dcbx_cap = ice_dcb_get_mode(port_info, true);
return 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
index ed21d7f55ac1..6aae03771746 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -29,6 +29,7 @@ static int ice_eswitch_setup_env(struct ice_pf *pf)
return -ENODEV;
ice_remove_vsi_fltr(&pf->hw, uplink_vsi->idx);
+ ice_vsi_cfg_sw_lldp(uplink_vsi, true, false);
netif_addr_lock_bh(netdev);
__dev_uc_unsync(netdev, NULL);
@@ -245,6 +246,10 @@ ice_eswitch_set_target_vsi(struct sk_buff *skb,
u64 cd_cmd, dst_vsi;
if (!dst) {
+ struct ethhdr *eth = (struct ethhdr *)skb_mac_header(skb);
+
+ if (unlikely(eth->h_proto == htons(ETH_P_LLDP)))
+ return;
cd_cmd = ICE_TX_CTX_DESC_SWTCH_UPLINK << ICE_TXD_CTX_QW1_CMD_S;
off->cd_qw1 |= (cd_cmd | ICE_TX_DESC_DTYPE_CTX);
} else {
@@ -278,6 +283,7 @@ static void ice_eswitch_release_env(struct ice_pf *pf)
ice_fltr_add_mac_and_broadcast(uplink_vsi,
uplink_vsi->port_info->mac.perm_addr,
ICE_FWD_TO_VSI);
+ ice_vsi_cfg_sw_lldp(uplink_vsi, true, true);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 7c2dc347e4e5..648815170477 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -1818,7 +1818,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
/* Remove rule to direct LLDP packets to default VSI.
* The FW LLDP engine will now be consuming them.
*/
- ice_cfg_sw_lldp(vsi, false, false);
+ ice_cfg_sw_rx_lldp(vsi->back, false);
/* AQ command to start FW LLDP agent will return an
* error if the agent is already started
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index 1d118171de37..aceec184e89b 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -1605,7 +1605,7 @@ void ice_fdir_replay_fltrs(struct ice_pf *pf)
*/
int ice_fdir_create_dflt_rules(struct ice_pf *pf)
{
- const enum ice_fltr_ptype dflt_rules[] = {
+ static const enum ice_fltr_ptype dflt_rules[] = {
ICE_FLTR_PTYPE_NONF_IPV4_TCP, ICE_FLTR_PTYPE_NONF_IPV4_UDP,
ICE_FLTR_PTYPE_NONF_IPV6_TCP, ICE_FLTR_PTYPE_NONF_IPV6_UDP,
};
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 0bcf9d127ac9..03bb16191237 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2065,12 +2065,15 @@ static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi)
}
/**
- * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling
+ * ice_vsi_cfg_sw_lldp - Config switch rules for LLDP packet handling
* @vsi: the VSI being configured
* @tx: bool to determine Tx or Rx rule
* @create: bool to determine create or remove Rule
+ *
+ * Adding an ethtype Tx rule to the uplink VSI results in it being applied
+ * to the whole port, so LLDP transmission for VFs will be blocked too.
*/
-void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
+void ice_vsi_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
{
int (*eth_fltr)(struct ice_vsi *v, u16 type, u16 flag,
enum ice_sw_fwd_act_type act);
@@ -2085,19 +2088,59 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_TX,
ICE_DROP_PACKET);
} else {
- if (ice_fw_supports_lldp_fltr_ctrl(&pf->hw)) {
- status = ice_lldp_fltr_add_remove(&pf->hw, vsi->vsi_num,
- create);
- } else {
+ if (!test_bit(ICE_FLAG_LLDP_AQ_FLTR, pf->flags)) {
status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX,
ICE_FWD_TO_VSI);
+ if (!status || !create)
+ goto report;
+
+ dev_info(dev,
+ "Failed to add generic LLDP Rx filter on VSI %i error: %d, falling back to specialized AQ control\n",
+ vsi->vsi_num, status);
}
+
+ status = ice_lldp_fltr_add_remove(&pf->hw, vsi, create);
+ if (!status)
+ set_bit(ICE_FLAG_LLDP_AQ_FLTR, pf->flags);
+
}
+report:
if (status)
- dev_dbg(dev, "Fail %s %s LLDP rule on VSI %i error: %d\n",
- create ? "adding" : "removing", tx ? "TX" : "RX",
- vsi->vsi_num, status);
+ dev_warn(dev, "Failed to %s %s LLDP rule on VSI %i error: %d\n",
+ create ? "add" : "remove", tx ? "Tx" : "Rx",
+ vsi->vsi_num, status);
+}
+
+/**
+ * ice_cfg_sw_rx_lldp - Enable/disable software handling of LLDP
+ * @pf: the PF being configured
+ * @enable: enable or disable
+ *
+ * Configure switch rules to enable/disable LLDP handling by software
+ * across PF.
+ */
+void ice_cfg_sw_rx_lldp(struct ice_pf *pf, bool enable)
+{
+ struct ice_vsi *vsi;
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ vsi = ice_get_main_vsi(pf);
+ ice_vsi_cfg_sw_lldp(vsi, false, enable);
+
+ if (!test_bit(ICE_FLAG_SRIOV_ENA, pf->flags))
+ return;
+
+ ice_for_each_vf(pf, bkt, vf) {
+ vsi = ice_get_vf_vsi(vf);
+
+ if (WARN_ON(!vsi))
+ continue;
+
+ if (ice_vf_is_lldp_ena(vf))
+ ice_vsi_cfg_sw_lldp(vsi, false, enable);
+ }
}
/**
@@ -2528,7 +2571,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_vsi_cfg_params *params)
if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF) {
ice_fltr_add_eth(vsi, ETH_P_PAUSE, ICE_FLTR_TX,
ICE_DROP_PACKET);
- ice_cfg_sw_lldp(vsi, true, true);
+ ice_vsi_cfg_sw_lldp(vsi, true, true);
}
if (!vsi->agg_node)
@@ -2825,9 +2868,11 @@ int ice_vsi_release(struct ice_vsi *vsi)
/* The Rx rule will only exist to remove if the LLDP FW
* engine is currently stopped
*/
- if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF &&
- !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
- ice_cfg_sw_lldp(vsi, false, false);
+ if (!ice_is_safe_mode(pf) &&
+ !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags) &&
+ (vsi->type == ICE_VSI_PF || (vsi->type == ICE_VSI_VF &&
+ ice_vf_is_lldp_ena(vsi->vf))))
+ ice_vsi_cfg_sw_lldp(vsi, false, false);
ice_vsi_decfg(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index b4c9cb28a016..654516c5fc3e 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -29,7 +29,8 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi);
-void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
+void ice_vsi_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
+void ice_cfg_sw_rx_lldp(struct ice_pf *pf, bool enable);
int ice_set_link(struct ice_vsi *vsi, bool ena);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index d390157b59fe..1fbe13ee93a8 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -8330,11 +8330,16 @@ void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
* @np: net device to configure
* @filter_dev: device on which filter is added
* @cls_flower: offload data
+ * @ingress: if the rule is added to an ingress block
+ *
+ * Return: 0 if the flower was successfully added or deleted,
+ * negative error code otherwise.
*/
static int
ice_setup_tc_cls_flower(struct ice_netdev_priv *np,
struct net_device *filter_dev,
- struct flow_cls_offload *cls_flower)
+ struct flow_cls_offload *cls_flower,
+ bool ingress)
{
struct ice_vsi *vsi = np->vsi;
@@ -8343,7 +8348,7 @@ ice_setup_tc_cls_flower(struct ice_netdev_priv *np,
switch (cls_flower->command) {
case FLOW_CLS_REPLACE:
- return ice_add_cls_flower(filter_dev, vsi, cls_flower);
+ return ice_add_cls_flower(filter_dev, vsi, cls_flower, ingress);
case FLOW_CLS_DESTROY:
return ice_del_cls_flower(vsi, cls_flower);
default:
@@ -8352,20 +8357,46 @@ ice_setup_tc_cls_flower(struct ice_netdev_priv *np,
}
/**
- * ice_setup_tc_block_cb - callback handler registered for TC block
+ * ice_setup_tc_block_cb_ingress - callback handler for ingress TC block
* @type: TC SETUP type
* @type_data: TC flower offload data that contains user input
* @cb_priv: netdev private data
+ *
+ * Return: 0 if the setup was successful, negative error code otherwise.
*/
static int
-ice_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+ice_setup_tc_block_cb_ingress(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
{
struct ice_netdev_priv *np = cb_priv;
switch (type) {
case TC_SETUP_CLSFLOWER:
return ice_setup_tc_cls_flower(np, np->vsi->netdev,
- type_data);
+ type_data, true);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+/**
+ * ice_setup_tc_block_cb_egress - callback handler for egress TC block
+ * @type: TC SETUP type
+ * @type_data: TC flower offload data that contains user input
+ * @cb_priv: netdev private data
+ *
+ * Return: 0 if the setup was successful, negative error code otherwise.
+ */
+static int
+ice_setup_tc_block_cb_egress(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct ice_netdev_priv *np = cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return ice_setup_tc_cls_flower(np, np->vsi->netdev,
+ type_data, false);
default:
return -EOPNOTSUPP;
}
@@ -9310,16 +9341,32 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
void *type_data)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
+ enum flow_block_binder_type binder_type;
struct ice_pf *pf = np->vsi->back;
+ flow_setup_cb_t *flower_handler;
bool locked = false;
int err;
switch (type) {
case TC_SETUP_BLOCK:
+ binder_type =
+ ((struct flow_block_offload *)type_data)->binder_type;
+
+ switch (binder_type) {
+ case FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS:
+ flower_handler = ice_setup_tc_block_cb_ingress;
+ break;
+ case FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS:
+ flower_handler = ice_setup_tc_block_cb_egress;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
return flow_block_cb_setup_simple(type_data,
&ice_block_cb_list,
- ice_setup_tc_block_cb,
- np, np, true);
+ flower_handler,
+ np, np, false);
case TC_SETUP_QDISC_MQPRIO:
if (ice_is_eswitch_mode_switchdev(pf)) {
netdev_err(netdev, "TC MQPRIO offload not supported, switchdev is enabled\n");
@@ -9380,7 +9427,7 @@ ice_indr_setup_block_cb(enum tc_setup_type type, void *type_data,
case TC_SETUP_CLSFLOWER:
return ice_setup_tc_cls_flower(np, priv->netdev,
(struct flow_cls_offload *)
- type_data);
+ type_data, false);
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 1fd1ae03eb90..b79a148ed0f2 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -305,6 +305,9 @@ u64 ice_ptp_read_src_clk_reg(struct ice_pf *pf,
u32 hi, lo, lo2;
u8 tmr_idx;
+ if (!ice_is_primary(hw))
+ hw = ice_get_primary_hw(pf);
+
tmr_idx = ice_get_ptp_src_clock_index(hw);
guard(spinlock)(&pf->adapter->ptp_gltsyn_time_lock);
/* Read the system timestamp pre PHC read */
@@ -1624,14 +1627,6 @@ static int ice_ptp_cfg_extts(struct ice_pf *pf, struct ptp_extts_request *rq,
int pin_desc_idx;
u8 tmr_idx;
- /* Reject requests with unsupported flags */
-
- if (rq->flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_FALLING_EDGE |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
tmr_idx = hw->func_caps.ts_func_info.tmr_index_owned;
chan = rq->index;
@@ -1802,9 +1797,6 @@ static int ice_ptp_cfg_perout(struct ice_pf *pf, struct ptp_perout_request *rq,
struct ice_hw *hw = &pf->hw;
int pin_desc_idx;
- if (rq->flags & ~PTP_PEROUT_PHASE)
- return -EOPNOTSUPP;
-
pin_desc_idx = ice_ptp_find_pin_idx(pf, PTP_PF_PEROUT, rq->index);
if (pin_desc_idx < 0)
return -EIO;
@@ -2737,6 +2729,11 @@ static void ice_ptp_set_caps(struct ice_pf *pf)
info->enable = ice_ptp_gpio_enable;
info->verify = ice_verify_pin;
+ info->supported_extts_flags = PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS;
+ info->supported_perout_flags = PTP_PEROUT_PHASE;
+
switch (pf->hw.mac_type) {
case ICE_MAC_E810:
ice_ptp_set_funcs_e810(pf);
@@ -2986,6 +2983,32 @@ static void ice_ptp_periodic_work(struct kthread_work *work)
}
/**
+ * ice_ptp_prepare_rebuild_sec - Prepare second NAC for PTP reset or rebuild
+ * @pf: Board private structure
+ * @rebuild: rebuild if true, prepare if false
+ * @reset_type: the reset type being performed
+ */
+static void ice_ptp_prepare_rebuild_sec(struct ice_pf *pf, bool rebuild,
+ enum ice_reset_req reset_type)
+{
+ struct list_head *entry;
+
+ list_for_each(entry, &pf->adapter->ports.ports) {
+ struct ice_ptp_port *port = list_entry(entry,
+ struct ice_ptp_port,
+ list_node);
+ struct ice_pf *peer_pf = ptp_port_to_pf(port);
+
+ if (!ice_is_primary(&peer_pf->hw)) {
+ if (rebuild)
+ ice_ptp_rebuild(peer_pf, reset_type);
+ else
+ ice_ptp_prepare_for_reset(peer_pf, reset_type);
+ }
+ }
+}
+
+/**
* ice_ptp_prepare_for_reset - Prepare PTP for reset
* @pf: Board private structure
* @reset_type: the reset type being performed
@@ -2993,6 +3016,7 @@ static void ice_ptp_periodic_work(struct kthread_work *work)
void ice_ptp_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
{
struct ice_ptp *ptp = &pf->ptp;
+ struct ice_hw *hw = &pf->hw;
u8 src_tmr;
if (ptp->state != ICE_PTP_READY)
@@ -3008,6 +3032,9 @@ void ice_ptp_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type)
if (reset_type == ICE_RESET_PFR)
return;
+ if (ice_pf_src_tmr_owned(pf) && hw->mac_type == ICE_MAC_GENERIC_3K_E825)
+ ice_ptp_prepare_rebuild_sec(pf, false, reset_type);
+
ice_ptp_release_tx_tracker(pf, &pf->ptp.port.tx);
/* Disable periodic outputs */
@@ -3129,13 +3156,6 @@ err:
dev_err(ice_pf_to_dev(pf), "PTP reset failed %d\n", err);
}
-static bool ice_is_primary(struct ice_hw *hw)
-{
- return hw->mac_type == ICE_MAC_GENERIC_3K_E825 && ice_is_dual(hw) ?
- !!(hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_PRIMARY_M) :
- true;
-}
-
static int ice_ptp_setup_adapter(struct ice_pf *pf)
{
if (!ice_pf_src_tmr_owned(pf) || !ice_is_primary(&pf->hw))
@@ -3355,17 +3375,16 @@ void ice_ptp_init(struct ice_pf *pf)
{
struct ice_ptp *ptp = &pf->ptp;
struct ice_hw *hw = &pf->hw;
- int lane_num, err;
+ int err;
ptp->state = ICE_PTP_INITIALIZING;
- lane_num = ice_get_phy_lane_number(hw);
- if (lane_num < 0) {
- err = lane_num;
+ if (hw->lane_num < 0) {
+ err = hw->lane_num;
goto err_exit;
}
+ ptp->port.port_num = hw->lane_num;
- ptp->port.port_num = (u8)lane_num;
ice_ptp_init_hw(hw);
ice_ptp_init_tx_interrupt_mode(pf);
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
index 89bb8461284a..ccac84eb34c9 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c
@@ -240,7 +240,7 @@ static int ice_read_cgu_reg_e82x(struct ice_hw *hw, u32 addr, u32 *val)
{
struct ice_sbq_msg_input cgu_msg = {
.opcode = ice_sbq_msg_rd,
- .dest_dev = cgu,
+ .dest_dev = ice_sbq_dev_cgu,
.msg_addr_low = addr
};
int err;
@@ -272,7 +272,7 @@ static int ice_write_cgu_reg_e82x(struct ice_hw *hw, u32 addr, u32 val)
{
struct ice_sbq_msg_input cgu_msg = {
.opcode = ice_sbq_msg_wr,
- .dest_dev = cgu,
+ .dest_dev = ice_sbq_dev_cgu,
.msg_addr_low = addr,
.data = val
};
@@ -874,8 +874,12 @@ static u32 ice_ptp_tmr_cmd_to_port_reg(struct ice_hw *hw,
*/
void ice_ptp_src_cmd(struct ice_hw *hw, enum ice_ptp_tmr_cmd cmd)
{
+ struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
u32 cmd_val = ice_ptp_tmr_cmd_to_src_reg(hw, cmd);
+ if (!ice_is_primary(hw))
+ hw = ice_get_primary_hw(pf);
+
wr32(hw, GLTSYN_CMD, cmd_val);
}
@@ -891,6 +895,9 @@ static void ice_ptp_exec_tmr_cmd(struct ice_hw *hw)
{
struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
+ if (!ice_is_primary(hw))
+ hw = ice_get_primary_hw(pf);
+
guard(spinlock)(&pf->adapter->ptp_gltsyn_time_lock);
wr32(hw, GLTSYN_CMD_SYNC, SYNC_EXEC_CMD);
ice_flush(hw);
@@ -919,16 +926,24 @@ static void ice_ptp_cfg_sync_delay(const struct ice_hw *hw, u32 delay)
*
* Return: destination sideband queue PHY device.
*/
-static enum ice_sbq_msg_dev ice_ptp_get_dest_dev_e825(struct ice_hw *hw,
- u8 port)
+static enum ice_sbq_dev_id ice_ptp_get_dest_dev_e825(struct ice_hw *hw,
+ u8 port)
{
- /* On a single complex E825, PHY 0 is always destination device phy_0
+ u8 curr_phy, tgt_phy;
+
+ tgt_phy = port >= hw->ptp.ports_per_phy;
+ curr_phy = hw->lane_num >= hw->ptp.ports_per_phy;
+ /* In the driver, lanes 4..7 are in fact 0..3 on a second PHY.
+ * On a single complex E825C, PHY 0 is always destination device phy_0
* and PHY 1 is phy_0_peer.
+ * On dual complex E825C, device phy_0 points to PHY on a current
+ * complex and phy_0_peer to PHY on a different complex.
*/
- if (port >= hw->ptp.ports_per_phy)
- return eth56g_phy_1;
+ if ((!ice_is_dual(hw) && tgt_phy == 1) ||
+ (ice_is_dual(hw) && tgt_phy != curr_phy))
+ return ice_sbq_dev_phy_0_peer;
else
- return eth56g_phy_0;
+ return ice_sbq_dev_phy_0;
}
/**
@@ -2417,6 +2432,7 @@ int ice_phy_cfg_intr_eth56g(struct ice_hw *hw, u8 port, bool ena, u8 threshold)
static int ice_read_phy_and_phc_time_eth56g(struct ice_hw *hw, u8 port,
u64 *phy_time, u64 *phc_time)
{
+ struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
u64 tx_time, rx_time;
u32 zo, lo;
u8 tmr_idx;
@@ -2436,8 +2452,13 @@ static int ice_read_phy_and_phc_time_eth56g(struct ice_hw *hw, u8 port,
ice_ptp_exec_tmr_cmd(hw);
/* Read the captured PHC time from the shadow time registers */
- zo = rd32(hw, GLTSYN_SHTIME_0(tmr_idx));
- lo = rd32(hw, GLTSYN_SHTIME_L(tmr_idx));
+ if (ice_is_primary(hw)) {
+ zo = rd32(hw, GLTSYN_SHTIME_0(tmr_idx));
+ lo = rd32(hw, GLTSYN_SHTIME_L(tmr_idx));
+ } else {
+ zo = rd32(ice_get_primary_hw(pf), GLTSYN_SHTIME_0(tmr_idx));
+ lo = rd32(ice_get_primary_hw(pf), GLTSYN_SHTIME_L(tmr_idx));
+ }
*phc_time = (u64)lo << 32 | zo;
/* Read the captured PHY time from the PHY shadow registers */
@@ -2574,6 +2595,7 @@ int ice_stop_phy_timer_eth56g(struct ice_hw *hw, u8 port, bool soft_reset)
*/
int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port)
{
+ struct ice_pf *pf = container_of(hw, struct ice_pf, hw);
u32 lo, hi;
u64 incval;
u8 tmr_idx;
@@ -2599,8 +2621,13 @@ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port)
if (err)
return err;
- lo = rd32(hw, GLTSYN_INCVAL_L(tmr_idx));
- hi = rd32(hw, GLTSYN_INCVAL_H(tmr_idx));
+ if (ice_is_primary(hw)) {
+ lo = rd32(hw, GLTSYN_INCVAL_L(tmr_idx));
+ hi = rd32(hw, GLTSYN_INCVAL_H(tmr_idx));
+ } else {
+ lo = rd32(ice_get_primary_hw(pf), GLTSYN_INCVAL_L(tmr_idx));
+ hi = rd32(ice_get_primary_hw(pf), GLTSYN_INCVAL_H(tmr_idx));
+ }
incval = (u64)hi << 32 | lo;
err = ice_write_40b_ptp_reg_eth56g(hw, port, PHY_REG_TIMETUS_L, incval);
@@ -2631,25 +2658,6 @@ int ice_start_phy_timer_eth56g(struct ice_hw *hw, u8 port)
}
/**
- * ice_sb_access_ena_eth56g - Enable SB devices (PHY and others) access
- * @hw: pointer to HW struct
- * @enable: Enable or disable access
- *
- * Enable sideband devices (PHY and others) access.
- */
-static void ice_sb_access_ena_eth56g(struct ice_hw *hw, bool enable)
-{
- u32 val = rd32(hw, PF_SB_REM_DEV_CTL);
-
- if (enable)
- val |= BIT(eth56g_phy_0) | BIT(cgu) | BIT(eth56g_phy_1);
- else
- val &= ~(BIT(eth56g_phy_0) | BIT(cgu) | BIT(eth56g_phy_1));
-
- wr32(hw, PF_SB_REM_DEV_CTL, val);
-}
-
-/**
* ice_ptp_init_phc_e825 - Perform E825 specific PHC initialization
* @hw: pointer to HW struct
*
@@ -2659,8 +2667,6 @@ static void ice_sb_access_ena_eth56g(struct ice_hw *hw, bool enable)
*/
static int ice_ptp_init_phc_e825(struct ice_hw *hw)
{
- ice_sb_access_ena_eth56g(hw, true);
-
/* Initialize the Clock Generation Unit */
return ice_init_cgu_e82x(hw);
}
@@ -2747,8 +2753,6 @@ static void ice_ptp_init_phy_e825(struct ice_hw *hw)
params->num_phys = 2;
ptp->ports_per_phy = 4;
ptp->num_lports = params->num_phys * ptp->ports_per_phy;
-
- ice_sb_access_ena_eth56g(hw, true);
}
/* E822 family functions
@@ -2781,7 +2785,7 @@ static void ice_fill_phy_msg_e82x(struct ice_hw *hw,
msg->msg_addr_high = P_Q1_H(P_4_BASE + offset, phy_port);
}
- msg->dest_dev = rmn_0;
+ msg->dest_dev = ice_sbq_dev_phy_0;
}
/**
@@ -3104,7 +3108,7 @@ static int ice_fill_quad_msg_e82x(struct ice_hw *hw,
if (quad >= ICE_GET_QUAD_NUM(hw->ptp.num_lports))
return -EINVAL;
- msg->dest_dev = rmn_0;
+ msg->dest_dev = ice_sbq_dev_phy_0;
if (!(quad % ICE_GET_QUAD_NUM(hw->ptp.ports_per_phy)))
addr = Q_0_BASE + offset;
@@ -4823,7 +4827,7 @@ static int ice_read_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 *val)
msg.msg_addr_low = lower_16_bits(addr);
msg.msg_addr_high = upper_16_bits(addr);
msg.opcode = ice_sbq_msg_rd;
- msg.dest_dev = rmn_0;
+ msg.dest_dev = ice_sbq_dev_phy_0;
err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD);
if (err) {
@@ -4853,7 +4857,7 @@ static int ice_write_phy_reg_e810(struct ice_hw *hw, u32 addr, u32 val)
msg.msg_addr_low = lower_16_bits(addr);
msg.msg_addr_high = upper_16_bits(addr);
msg.opcode = ice_sbq_msg_wr;
- msg.dest_dev = rmn_0;
+ msg.dest_dev = ice_sbq_dev_phy_0;
msg.data = val;
err = ice_sbq_rw_reg(hw, &msg, ICE_AQ_FLAG_RD);
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
index e5925ccc2613..83f20fa7ace7 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h
@@ -444,11 +444,6 @@ static inline u64 ice_get_base_incval(struct ice_hw *hw)
}
}
-static inline bool ice_is_dual(struct ice_hw *hw)
-{
- return !!(hw->dev_caps.nac_topo.mode & ICE_NAC_TOPO_DUAL_M);
-}
-
#define PFTSYN_SEM_BYTES 4
#define ICE_PTP_CLOCK_INDEX_0 0x00
diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c
index fb7a1b9a4313..cb08746556a6 100644
--- a/drivers/net/ethernet/intel/ice/ice_repr.c
+++ b/drivers/net/ethernet/intel/ice/ice_repr.c
@@ -219,7 +219,8 @@ ice_repr_setup_tc_cls_flower(struct ice_repr *repr,
{
switch (flower->command) {
case FLOW_CLS_REPLACE:
- return ice_add_cls_flower(repr->netdev, repr->src_vsi, flower);
+ return ice_add_cls_flower(repr->netdev, repr->src_vsi, flower,
+ true);
case FLOW_CLS_DESTROY:
return ice_del_cls_flower(repr->src_vsi, flower);
default:
@@ -336,6 +337,7 @@ void ice_repr_destroy(struct ice_repr *repr)
static void ice_repr_rem_vf(struct ice_repr *repr)
{
ice_eswitch_decfg_vsi(repr->src_vsi, repr->parent_mac);
+ ice_pass_vf_tx_lldp(repr->src_vsi, true);
unregister_netdev(repr->netdev);
ice_devlink_destroy_vf_port(repr->vf);
ice_virtchnl_set_dflt_ops(repr->vf);
@@ -417,6 +419,10 @@ static int ice_repr_add_vf(struct ice_repr *repr)
if (err)
goto err_netdev;
+ err = ice_drop_vf_tx_lldp(repr->src_vsi, true);
+ if (err)
+ goto err_drop_lldp;
+
err = ice_eswitch_cfg_vsi(repr->src_vsi, repr->parent_mac);
if (err)
goto err_cfg_vsi;
@@ -429,6 +435,8 @@ static int ice_repr_add_vf(struct ice_repr *repr)
return 0;
err_cfg_vsi:
+ ice_pass_vf_tx_lldp(repr->src_vsi, true);
+err_drop_lldp:
unregister_netdev(repr->netdev);
err_netdev:
ice_devlink_destroy_vf_port(vf);
diff --git a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
index 3b0054faf70c..183dd5457d6a 100644
--- a/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_sbq_cmd.h
@@ -46,13 +46,10 @@ struct ice_sbq_evt_desc {
u8 data[24];
};
-enum ice_sbq_msg_dev {
- eth56g_phy_0 = 0x02,
- rmn_0 = 0x02,
- rmn_1 = 0x03,
- rmn_2 = 0x04,
- cgu = 0x06,
- eth56g_phy_1 = 0x0D,
+enum ice_sbq_dev_id {
+ ice_sbq_dev_phy_0 = 0x02,
+ ice_sbq_dev_cgu = 0x06,
+ ice_sbq_dev_phy_0_peer = 0x0D,
};
enum ice_sbq_msg_opcode {
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index f1648cf103b7..0e4dc1a5cff0 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -63,6 +63,7 @@ static void ice_free_vf_res(struct ice_vf *vf)
if (vf->lan_vsi_idx != ICE_NO_VSI) {
ice_vf_vsi_release(vf);
vf->num_mac = 0;
+ vf->num_mac_lldp = 0;
}
last_vector_idx = vf->first_vector_idx + vf->num_msix - 1;
@@ -1402,6 +1403,9 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
mutex_lock(&vf->cfg_lock);
+ while (!trusted && vf->num_mac_lldp)
+ ice_vf_update_mac_lldp_num(vf, ice_get_vf_vsi(vf), false);
+
vf->trusted = trusted;
ice_reset_vf(vf, ICE_VF_RESET_NOTIFY);
dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n",
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 4a91e0aaf0a5..9d9a7edd3618 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -3146,7 +3146,7 @@ ice_add_update_vsi_list(struct ice_hw *hw,
u16 vsi_handle_arr[2];
/* A rule already exists with the new VSI being added */
- if (cur_fltr->fwd_id.hw_vsi_id == new_fltr->fwd_id.hw_vsi_id)
+ if (cur_fltr->vsi_handle == new_fltr->vsi_handle)
return -EEXIST;
vsi_handle_arr[0] = cur_fltr->vsi_handle;
@@ -5978,7 +5978,7 @@ ice_adv_add_update_vsi_list(struct ice_hw *hw,
/* A rule already exists with the new VSI being added */
if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map))
- return 0;
+ return -EEXIST;
/* Update the previously created VSI list set with
* the new VSI ID passed in
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
index ea39b999a0d0..fb9ea7f8ef44 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c
@@ -12,14 +12,11 @@
/**
* ice_tc_count_lkups - determine lookup count for switch filter
* @flags: TC-flower flags
- * @headers: Pointer to TC flower filter header structure
* @fltr: Pointer to outer TC filter structure
*
- * Determine lookup count based on TC flower input for switch filter.
+ * Return: lookup count based on TC flower input for a switch filter.
*/
-static int
-ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers,
- struct ice_tc_flower_fltr *fltr)
+static int ice_tc_count_lkups(u32 flags, struct ice_tc_flower_fltr *fltr)
{
int lkups_cnt = 1; /* 0th lookup is metadata */
@@ -684,26 +681,26 @@ static int ice_tc_setup_action(struct net_device *filter_dev,
fltr->action.fltr_act = action;
if (ice_is_port_repr_netdev(filter_dev) &&
- ice_is_port_repr_netdev(target_dev)) {
+ ice_is_port_repr_netdev(target_dev) &&
+ fltr->direction == ICE_ESWITCH_FLTR_EGRESS) {
repr = ice_netdev_to_repr(target_dev);
fltr->dest_vsi = repr->src_vsi;
- fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
} else if (ice_is_port_repr_netdev(filter_dev) &&
- ice_tc_is_dev_uplink(target_dev)) {
+ ice_tc_is_dev_uplink(target_dev) &&
+ fltr->direction == ICE_ESWITCH_FLTR_EGRESS) {
repr = ice_netdev_to_repr(filter_dev);
fltr->dest_vsi = repr->src_vsi->back->eswitch.uplink_vsi;
- fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
} else if (ice_tc_is_dev_uplink(filter_dev) &&
- ice_is_port_repr_netdev(target_dev)) {
+ ice_is_port_repr_netdev(target_dev) &&
+ fltr->direction == ICE_ESWITCH_FLTR_INGRESS) {
repr = ice_netdev_to_repr(target_dev);
fltr->dest_vsi = repr->src_vsi;
- fltr->direction = ICE_ESWITCH_FLTR_INGRESS;
} else {
NL_SET_ERR_MSG_MOD(fltr->extack,
- "Unsupported netdevice in switchdev mode");
+ "The action is not supported for this netdevice");
return -EINVAL;
}
@@ -716,13 +713,11 @@ ice_tc_setup_drop_action(struct net_device *filter_dev,
{
fltr->action.fltr_act = ICE_DROP_PACKET;
- if (ice_is_port_repr_netdev(filter_dev)) {
- fltr->direction = ICE_ESWITCH_FLTR_EGRESS;
- } else if (ice_tc_is_dev_uplink(filter_dev)) {
- fltr->direction = ICE_ESWITCH_FLTR_INGRESS;
- } else {
+ if (!ice_tc_is_dev_uplink(filter_dev) &&
+ !(ice_is_port_repr_netdev(filter_dev) &&
+ fltr->direction == ICE_ESWITCH_FLTR_INGRESS)) {
NL_SET_ERR_MSG_MOD(fltr->extack,
- "Unsupported netdevice in switchdev mode");
+ "The action is not supported for this netdevice");
return -EINVAL;
}
@@ -767,10 +762,157 @@ static int ice_eswitch_tc_parse_action(struct net_device *filter_dev,
return 0;
}
+static bool ice_is_fltr_lldp(struct ice_tc_flower_fltr *fltr)
+{
+ return fltr->outer_headers.l2_key.n_proto == htons(ETH_P_LLDP);
+}
+
+static bool ice_is_fltr_pf_tx_lldp(struct ice_tc_flower_fltr *fltr)
+{
+ struct ice_vsi *vsi = fltr->src_vsi, *uplink;
+
+ if (!ice_is_switchdev_running(vsi->back))
+ return false;
+
+ uplink = vsi->back->eswitch.uplink_vsi;
+ return vsi == uplink && fltr->action.fltr_act == ICE_DROP_PACKET &&
+ ice_is_fltr_lldp(fltr) &&
+ fltr->direction == ICE_ESWITCH_FLTR_EGRESS &&
+ fltr->flags == ICE_TC_FLWR_FIELD_ETH_TYPE_ID;
+}
+
+static bool ice_is_fltr_vf_tx_lldp(struct ice_tc_flower_fltr *fltr)
+{
+ struct ice_vsi *vsi = fltr->src_vsi, *uplink;
+
+ uplink = vsi->back->eswitch.uplink_vsi;
+ return fltr->src_vsi->type == ICE_VSI_VF && ice_is_fltr_lldp(fltr) &&
+ fltr->direction == ICE_ESWITCH_FLTR_EGRESS &&
+ fltr->dest_vsi == uplink;
+}
+
+static struct ice_tc_flower_fltr *
+ice_find_pf_tx_lldp_fltr(struct ice_pf *pf)
+{
+ struct ice_tc_flower_fltr *fltr;
+
+ hlist_for_each_entry(fltr, &pf->tc_flower_fltr_list, tc_flower_node)
+ if (ice_is_fltr_pf_tx_lldp(fltr))
+ return fltr;
+
+ return NULL;
+}
+
+static bool ice_any_vf_lldp_tx_ena(struct ice_pf *pf)
+{
+ struct ice_vf *vf;
+ unsigned int bkt;
+
+ ice_for_each_vf(pf, bkt, vf)
+ if (vf->lldp_tx_ena)
+ return true;
+
+ return false;
+}
+
+int ice_pass_vf_tx_lldp(struct ice_vsi *vsi, bool deinit)
+{
+ struct ice_rule_query_data remove_entry = {
+ .rid = vsi->vf->lldp_recipe_id,
+ .rule_id = vsi->vf->lldp_rule_id,
+ .vsi_handle = vsi->idx,
+ };
+ struct ice_pf *pf = vsi->back;
+ int err;
+
+ if (vsi->vf->lldp_tx_ena)
+ return 0;
+
+ if (!deinit && !ice_find_pf_tx_lldp_fltr(vsi->back))
+ return -EINVAL;
+
+ if (!deinit && ice_any_vf_lldp_tx_ena(pf))
+ return -EINVAL;
+
+ err = ice_rem_adv_rule_by_id(&pf->hw, &remove_entry);
+ if (!err)
+ vsi->vf->lldp_tx_ena = true;
+
+ return err;
+}
+
+int ice_drop_vf_tx_lldp(struct ice_vsi *vsi, bool init)
+{
+ struct ice_rule_query_data rule_added;
+ struct ice_adv_rule_info rinfo = {
+ .priority = 7,
+ .src_vsi = vsi->idx,
+ .sw_act = {
+ .src = vsi->idx,
+ .flag = ICE_FLTR_TX,
+ .fltr_act = ICE_DROP_PACKET,
+ .vsi_handle = vsi->idx,
+ },
+ .flags_info.act_valid = true,
+ };
+ struct ice_adv_lkup_elem list[3];
+ struct ice_pf *pf = vsi->back;
+ int err;
+
+ if (!init && !vsi->vf->lldp_tx_ena)
+ return 0;
+
+ memset(list, 0, sizeof(list));
+ ice_rule_add_direction_metadata(&list[0]);
+ ice_rule_add_src_vsi_metadata(&list[1]);
+ list[2].type = ICE_ETYPE_OL;
+ list[2].h_u.ethertype.ethtype_id = htons(ETH_P_LLDP);
+ list[2].m_u.ethertype.ethtype_id = htons(0xFFFF);
+
+ err = ice_add_adv_rule(&pf->hw, list, ARRAY_SIZE(list), &rinfo,
+ &rule_added);
+ if (err) {
+ dev_err(&pf->pdev->dev,
+ "Failed to add an LLDP rule to VSI 0x%X: %d\n",
+ vsi->idx, err);
+ } else {
+ vsi->vf->lldp_recipe_id = rule_added.rid;
+ vsi->vf->lldp_rule_id = rule_added.rule_id;
+ vsi->vf->lldp_tx_ena = false;
+ }
+
+ return err;
+}
+
+static void ice_handle_add_pf_lldp_drop_rule(struct ice_vsi *vsi)
+{
+ struct ice_tc_flower_fltr *fltr;
+ struct ice_pf *pf = vsi->back;
+
+ hlist_for_each_entry(fltr, &pf->tc_flower_fltr_list, tc_flower_node) {
+ if (!ice_is_fltr_vf_tx_lldp(fltr))
+ continue;
+ ice_pass_vf_tx_lldp(fltr->src_vsi, true);
+ break;
+ }
+}
+
+static void ice_handle_del_pf_lldp_drop_rule(struct ice_pf *pf)
+{
+ int i;
+
+ /* Make the VF LLDP fwd to uplink rule dormant */
+ ice_for_each_vsi(pf, i) {
+ struct ice_vsi *vf_vsi = pf->vsi[i];
+
+ if (vf_vsi && vf_vsi->type == ICE_VSI_VF)
+ ice_drop_vf_tx_lldp(vf_vsi, false);
+ }
+}
+
static int
ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
{
- struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers;
struct ice_adv_rule_info rule_info = { 0 };
struct ice_rule_query_data rule_added;
struct ice_hw *hw = &vsi->back->hw;
@@ -785,7 +927,10 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
return -EOPNOTSUPP;
}
- lkups_cnt = ice_tc_count_lkups(flags, headers, fltr);
+ if (ice_is_fltr_vf_tx_lldp(fltr))
+ return ice_pass_vf_tx_lldp(vsi, false);
+
+ lkups_cnt = ice_tc_count_lkups(flags, fltr);
list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
if (!list)
return -ENOMEM;
@@ -814,6 +959,11 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
rule_info.sw_act.src = hw->pf_id;
rule_info.flags_info.act = ICE_SINGLE_ACT_LB_ENABLE;
} else if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS &&
+ !fltr->dest_vsi && vsi == vsi->back->eswitch.uplink_vsi) {
+ /* PF to Uplink */
+ rule_info.sw_act.flag |= ICE_FLTR_TX;
+ rule_info.sw_act.src = vsi->idx;
+ } else if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS &&
fltr->dest_vsi == vsi->back->eswitch.uplink_vsi) {
/* VF to Uplink */
rule_info.sw_act.flag |= ICE_FLTR_TX;
@@ -846,11 +996,17 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because it already exist");
ret = -EINVAL;
goto exit;
+ } else if (ret == -ENOSPC) {
+ NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter: insufficient space available.");
+ goto exit;
} else if (ret) {
NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter due to error");
goto exit;
}
+ if (ice_is_fltr_pf_tx_lldp(fltr))
+ ice_handle_add_pf_lldp_drop_rule(vsi);
+
/* store the output params, which are needed later for removing
* advanced switch filter
*/
@@ -985,7 +1141,6 @@ static int
ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi,
struct ice_tc_flower_fltr *tc_fltr)
{
- struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers;
struct ice_adv_rule_info rule_info = {0};
struct ice_rule_query_data rule_added;
struct ice_adv_lkup_elem *list;
@@ -1021,7 +1176,7 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi,
return PTR_ERR(dest_vsi);
}
- lkups_cnt = ice_tc_count_lkups(flags, headers, tc_fltr);
+ lkups_cnt = ice_tc_count_lkups(flags, tc_fltr);
list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC);
if (!list)
return -ENOMEM;
@@ -1056,8 +1211,13 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi,
tc_fltr->action.fwd.q.hw_queue, lkups_cnt);
break;
case ICE_DROP_PACKET:
- rule_info.sw_act.flag |= ICE_FLTR_RX;
- rule_info.sw_act.src = hw->pf_id;
+ if (tc_fltr->direction == ICE_ESWITCH_FLTR_EGRESS) {
+ rule_info.sw_act.flag |= ICE_FLTR_TX;
+ rule_info.sw_act.src = vsi->idx;
+ } else {
+ rule_info.sw_act.flag |= ICE_FLTR_RX;
+ rule_info.sw_act.src = hw->pf_id;
+ }
rule_info.priority = ICE_SWITCH_FLTR_PRIO_VSI;
break;
default:
@@ -1071,6 +1231,10 @@ ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi,
"Unable to add filter because it already exist");
ret = -EINVAL;
goto exit;
+ } else if (ret == -ENOSPC) {
+ NL_SET_ERR_MSG_MOD(tc_fltr->extack,
+ "Unable to add filter: insufficient space available.");
+ goto exit;
} else if (ret) {
NL_SET_ERR_MSG_MOD(tc_fltr->extack,
"Unable to add filter due to error");
@@ -1463,11 +1627,16 @@ ice_parse_tunnel_attr(struct net_device *dev, struct flow_rule *rule,
* @filter_dev: Pointer to device on which filter is being added
* @f: Pointer to struct flow_cls_offload
* @fltr: Pointer to filter structure
+ * @ingress: if the rule is added to an ingress block
+ *
+ * Return: 0 if the flower was parsed successfully, -EINVAL if the flower
+ * cannot be parsed, -EOPNOTSUPP if such filter cannot be configured
+ * for the given VSI.
*/
static int
ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
struct flow_cls_offload *f,
- struct ice_tc_flower_fltr *fltr)
+ struct ice_tc_flower_fltr *fltr, bool ingress)
{
struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers;
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
@@ -1551,6 +1720,20 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
fltr->flags |= ICE_TC_FLWR_FIELD_ETH_TYPE_ID;
}
+ if (!ingress) {
+ bool switchdev =
+ ice_is_eswitch_mode_switchdev(vsi->back);
+
+ if (switchdev != (n_proto_key == ETH_P_LLDP)) {
+ NL_SET_ERR_MSG_FMT_MOD(fltr->extack,
+ "%sLLDP filtering is not supported on egress in %s mode",
+ switchdev ? "Non-" : "",
+ switchdev ? "switchdev" :
+ "legacy");
+ return -EOPNOTSUPP;
+ }
+ }
+
headers->l2_key.n_proto = cpu_to_be16(n_proto_key);
headers->l2_mask.n_proto = cpu_to_be16(n_proto_mask);
headers->l3_key.ip_proto = match.key->ip_proto;
@@ -1726,6 +1909,14 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
return -EINVAL;
}
}
+
+ /* Ingress filter on representor results in an egress filter in HW
+ * and vice versa
+ */
+ ingress = ice_is_port_repr_netdev(filter_dev) ? !ingress : ingress;
+ fltr->direction = ingress ? ICE_ESWITCH_FLTR_INGRESS :
+ ICE_ESWITCH_FLTR_EGRESS;
+
return 0;
}
@@ -1939,6 +2130,12 @@ static int ice_del_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
struct ice_pf *pf = vsi->back;
int err;
+ if (ice_is_fltr_pf_tx_lldp(fltr))
+ ice_handle_del_pf_lldp_drop_rule(pf);
+
+ if (ice_is_fltr_vf_tx_lldp(fltr))
+ return ice_drop_vf_tx_lldp(vsi, false);
+
rule_rem.rid = fltr->rid;
rule_rem.rule_id = fltr->rule_id;
rule_rem.vsi_handle = fltr->dest_vsi_handle;
@@ -1975,14 +2172,18 @@ static int ice_del_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
* @vsi: Pointer to VSI
* @f: Pointer to flower offload structure
* @__fltr: Pointer to struct ice_tc_flower_fltr
+ * @ingress: if the rule is added to an ingress block
*
* This function parses TC-flower input fields, parses action,
* and adds a filter.
+ *
+ * Return: 0 if the filter was successfully added,
+ * negative error code otherwise.
*/
static int
ice_add_tc_fltr(struct net_device *netdev, struct ice_vsi *vsi,
struct flow_cls_offload *f,
- struct ice_tc_flower_fltr **__fltr)
+ struct ice_tc_flower_fltr **__fltr, bool ingress)
{
struct ice_tc_flower_fltr *fltr;
int err;
@@ -1999,7 +2200,7 @@ ice_add_tc_fltr(struct net_device *netdev, struct ice_vsi *vsi,
fltr->src_vsi = vsi;
INIT_HLIST_NODE(&fltr->tc_flower_node);
- err = ice_parse_cls_flower(netdev, vsi, f, fltr);
+ err = ice_parse_cls_flower(netdev, vsi, f, fltr, ingress);
if (err < 0)
goto err;
@@ -2042,10 +2243,13 @@ ice_find_tc_flower_fltr(struct ice_pf *pf, unsigned long cookie)
* @netdev: Pointer to filter device
* @vsi: Pointer to VSI
* @cls_flower: Pointer to flower offload structure
+ * @ingress: if the rule is added to an ingress block
+ *
+ * Return: 0 if the flower was successfully added,
+ * negative error code otherwise.
*/
-int
-ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi,
- struct flow_cls_offload *cls_flower)
+int ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi,
+ struct flow_cls_offload *cls_flower, bool ingress)
{
struct netlink_ext_ack *extack = cls_flower->common.extack;
struct net_device *vsi_netdev = vsi->netdev;
@@ -2080,7 +2284,7 @@ ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi,
}
/* prep and add TC-flower filter in HW */
- err = ice_add_tc_fltr(netdev, vsi, cls_flower, &fltr);
+ err = ice_add_tc_fltr(netdev, vsi, cls_flower, &fltr, ingress);
if (err)
return err;
diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h
index d84f153517ec..8a3ab2f22af9 100644
--- a/drivers/net/ethernet/intel/ice/ice_tc_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h
@@ -211,13 +211,14 @@ static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf)
}
struct ice_vsi *ice_locate_vsi_using_queue(struct ice_vsi *vsi, int queue);
-int
-ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi,
- struct flow_cls_offload *cls_flower);
-int
-ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower);
+int ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi,
+ struct flow_cls_offload *cls_flower, bool ingress);
+int ice_del_cls_flower(struct ice_vsi *vsi,
+ struct flow_cls_offload *cls_flower);
void ice_replay_tc_fltrs(struct ice_pf *pf);
bool ice_is_tunnel_supported(struct net_device *dev);
+int ice_drop_vf_tx_lldp(struct ice_vsi *vsi, bool init);
+int ice_pass_vf_tx_lldp(struct ice_vsi *vsi, bool deinit);
static inline bool ice_is_forward_action(enum ice_sw_fwd_act_type fltr_act)
{
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 1e4f6f6ee449..0e5107fe62ad 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -2440,19 +2440,20 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring)
/* allow CONTROL frames egress from main VSI if FW LLDP disabled */
eth = (struct ethhdr *)skb_mac_header(skb);
- if (unlikely((skb->priority == TC_PRIO_CONTROL ||
- eth->h_proto == htons(ETH_P_LLDP)) &&
- vsi->type == ICE_VSI_PF &&
- vsi->port_info->qos_cfg.is_sw_lldp))
- offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
- ICE_TX_CTX_DESC_SWTCH_UPLINK <<
- ICE_TXD_CTX_QW1_CMD_S);
- ice_tstamp(tx_ring, skb, first, &offload);
if ((ice_is_switchdev_running(vsi->back) ||
ice_lag_is_switchdev_running(vsi->back)) &&
vsi->type != ICE_VSI_SF)
ice_eswitch_set_target_vsi(skb, &offload);
+ else if (unlikely((skb->priority == TC_PRIO_CONTROL ||
+ eth->h_proto == htons(ETH_P_LLDP)) &&
+ vsi->type == ICE_VSI_PF &&
+ vsi->port_info->qos_cfg.is_sw_lldp))
+ offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+ ICE_TX_CTX_DESC_SWTCH_UPLINK <<
+ ICE_TXD_CTX_QW1_CMD_S);
+
+ ice_tstamp(tx_ring, skb, first, &offload);
if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) {
struct ice_tx_ctx_desc *cdesc;
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 0aab21113cc4..ccf53cc6403e 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -970,6 +970,7 @@ struct ice_hw {
u8 intrl_gran;
struct ice_ptp_hw ptp;
+ s8 lane_num;
/* Active package version (currently active) */
struct ice_pkg_ver active_pkg_ver;
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index 815ad0bfe832..48cd533e93b7 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -226,6 +226,7 @@ static void ice_vf_clear_counters(struct ice_vf *vf)
vsi->num_vlan = 0;
vf->num_mac = 0;
+ vf->num_mac_lldp = 0;
memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events));
memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events));
}
@@ -1401,3 +1402,28 @@ struct ice_vsi *ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi)
rcu_read_unlock();
return ctrl_vsi;
}
+
+/**
+ * ice_vf_update_mac_lldp_num - update the VF's number of LLDP addresses
+ * @vf: a VF to add the address to
+ * @vsi: the corresponding VSI
+ * @incr: is the rule added or removed
+ */
+void ice_vf_update_mac_lldp_num(struct ice_vf *vf, struct ice_vsi *vsi,
+ bool incr)
+{
+ bool lldp_by_fw = test_bit(ICE_FLAG_FW_LLDP_AGENT, vsi->back->flags);
+ bool was_ena = ice_vf_is_lldp_ena(vf) && !lldp_by_fw;
+ bool is_ena;
+
+ if (WARN_ON(!vsi)) {
+ vf->num_mac_lldp = 0;
+ return;
+ }
+
+ vf->num_mac_lldp += incr ? 1 : -1;
+ is_ena = ice_vf_is_lldp_ena(vf) && !lldp_by_fw;
+
+ if (was_ena != is_ena)
+ ice_vsi_cfg_sw_lldp(vsi, false, is_ena);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
index 799b2c1f1184..482f4285fd35 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -124,6 +124,7 @@ struct ice_vf {
u8 spoofchk:1;
u8 link_forced:1;
u8 link_up:1; /* only valid if VF link is forced */
+ u8 lldp_tx_ena:1;
u32 ptp_caps;
@@ -134,6 +135,7 @@ struct ice_vf {
unsigned long vf_caps; /* VF's adv. capabilities */
u8 num_req_qs; /* num of queue pairs requested by VF */
u16 num_mac;
+ u16 num_mac_lldp;
u16 num_vf_qs; /* num of queue configured per VF */
u8 vlan_strip_ena; /* Outer and Inner VLAN strip enable */
#define ICE_INNER_VLAN_STRIP_ENA BIT(0)
@@ -149,6 +151,9 @@ struct ice_vf {
/* devlink port data */
struct devlink_port devlink_port;
+ u16 lldp_recipe_id;
+ u16 lldp_rule_id;
+
u16 num_msix; /* num of MSI-X configured on this VF */
struct ice_vf_qs_bw qs_bw[ICE_MAX_RSS_QS_PER_VF];
};
@@ -180,6 +185,11 @@ static inline u16 ice_vf_get_port_vlan_tpid(struct ice_vf *vf)
return vf->port_vlan_info.tpid;
}
+static inline bool ice_vf_is_lldp_ena(struct ice_vf *vf)
+{
+ return vf->num_mac_lldp && vf->trusted;
+}
+
/* VF Hash Table access functions
*
* These functions provide abstraction for interacting with the VF hash table.
@@ -245,6 +255,8 @@ ice_vf_clear_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m);
int ice_reset_vf(struct ice_vf *vf, u32 flags);
void ice_reset_all_vfs(struct ice_pf *pf);
struct ice_vsi *ice_get_vf_ctrl_vsi(struct ice_pf *pf, struct ice_vsi *vsi);
+void ice_vf_update_mac_lldp_num(struct ice_vf *vf, struct ice_vsi *vsi,
+ bool incr);
#else /* CONFIG_PCI_IOV */
static inline struct ice_vf *ice_get_vf_by_id(struct ice_pf *pf, u16 vf_id)
{
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index 7c3006eb68dd..b147f6cf2615 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -2266,6 +2266,51 @@ ice_vfhw_mac_add(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr)
}
/**
+ * ice_is_mc_lldp_eth_addr - check if the given MAC is a multicast LLDP address
+ * @mac: address to check
+ *
+ * Return: true if the address is one of the three possible LLDP multicast
+ * addresses, false otherwise.
+ */
+static bool ice_is_mc_lldp_eth_addr(const u8 *mac)
+{
+ const u8 lldp_mac_base[] = {0x01, 0x80, 0xc2, 0x00, 0x00};
+
+ if (memcmp(mac, lldp_mac_base, sizeof(lldp_mac_base)))
+ return false;
+
+ return (mac[5] == 0x0e || mac[5] == 0x03 || mac[5] == 0x00);
+}
+
+/**
+ * ice_vc_can_add_mac - check if the VF is allowed to add a given MAC
+ * @vf: a VF to add the address to
+ * @mac: address to check
+ *
+ * Return: true if the VF is allowed to add such MAC address, false otherwise.
+ */
+static bool ice_vc_can_add_mac(const struct ice_vf *vf, const u8 *mac)
+{
+ struct device *dev = ice_pf_to_dev(vf->pf);
+
+ if (is_unicast_ether_addr(mac) &&
+ !ice_can_vf_change_mac((struct ice_vf *)vf)) {
+ dev_err(dev,
+ "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
+ return false;
+ }
+
+ if (!vf->trusted && ice_is_mc_lldp_eth_addr(mac)) {
+ dev_warn(dev,
+ "An untrusted VF %u is attempting to configure an LLDP multicast address\n",
+ vf->vf_id);
+ return false;
+ }
+
+ return true;
+}
+
+/**
* ice_vc_add_mac_addr - attempt to add the MAC address passed in
* @vf: pointer to the VF info
* @vsi: pointer to the VF's VSI
@@ -2283,10 +2328,8 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi,
if (ether_addr_equal(mac_addr, vf->dev_lan_addr))
return 0;
- if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) {
- dev_err(dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
+ if (!ice_vc_can_add_mac(vf, mac_addr))
return -EPERM;
- }
ret = ice_fltr_add_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
if (ret == -EEXIST) {
@@ -2301,6 +2344,8 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi,
return ret;
} else {
vf->num_mac++;
+ if (ice_is_mc_lldp_eth_addr(mac_addr))
+ ice_vf_update_mac_lldp_num(vf, vsi, true);
}
ice_vfhw_mac_add(vf, vc_ether_addr);
@@ -2395,6 +2440,8 @@ ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi,
ice_vfhw_mac_del(vf, vc_ether_addr);
vf->num_mac--;
+ if (ice_is_mc_lldp_eth_addr(mac_addr))
+ ice_vf_update_mac_lldp_num(vf, vsi, false);
return 0;
}
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index f323e1c1989f..793c96016288 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -502,13 +502,6 @@ static int igb_ptp_feature_enable_82580(struct ptp_clock_info *ptp,
switch (rq->type) {
case PTP_CLK_REQ_EXTTS:
- /* Reject requests with unsupported flags */
- if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_FALLING_EDGE |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
/* Both the rising and falling edge are timestamped */
if (rq->extts.flags & PTP_STRICT_FLAGS &&
(rq->extts.flags & PTP_ENABLE_FEATURE) &&
@@ -658,13 +651,6 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp,
switch (rq->type) {
case PTP_CLK_REQ_EXTTS:
- /* Reject requests with unsupported flags */
- if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_FALLING_EDGE |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
/* Reject requests failing to enable both edges. */
if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
(rq->extts.flags & PTP_ENABLE_FEATURE) &&
@@ -1356,6 +1342,9 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->ptp_caps.n_per_out = IGB_N_PEROUT;
adapter->ptp_caps.n_pins = IGB_N_SDP;
adapter->ptp_caps.pps = 0;
+ adapter->ptp_caps.supported_extts_flags = PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS;
adapter->ptp_caps.pin_config = adapter->sdp_config;
adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580;
adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
@@ -1378,6 +1367,9 @@ void igb_ptp_init(struct igb_adapter *adapter)
adapter->ptp_caps.n_ext_ts = IGB_N_EXTTS;
adapter->ptp_caps.n_per_out = IGB_N_PEROUT;
adapter->ptp_caps.n_pins = IGB_N_SDP;
+ adapter->ptp_caps.supported_extts_flags = PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS;
adapter->ptp_caps.pps = 1;
adapter->ptp_caps.pin_config = adapter->sdp_config;
adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index b1669d7cf435..ddfa654db1e0 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -7125,6 +7125,9 @@ static int igc_probe(struct pci_dev *pdev,
netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_XSK_ZEROCOPY;
+ /* enable HW vlan tag insertion/stripping by default */
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+
/* MTU range: 68 - 9216 */
netdev->min_mtu = ETH_MIN_MTU;
netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 612ed26a29c5..cfb589f9da56 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -257,13 +257,6 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
switch (rq->type) {
case PTP_CLK_REQ_EXTTS:
- /* Reject requests with unsupported flags */
- if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_FALLING_EDGE |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
/* Reject requests failing to enable both edges. */
if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
(rq->extts.flags & PTP_ENABLE_FEATURE) &&
@@ -300,10 +293,6 @@ static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
return 0;
case PTP_CLK_REQ_PEROUT:
- /* Reject requests with unsupported flags */
- if (rq->perout.flags)
- return -EOPNOTSUPP;
-
if (on) {
pin = ptp_find_pin(igc->ptp_clock, PTP_PF_PEROUT,
rq->perout.index);
@@ -1162,6 +1151,9 @@ void igc_ptp_init(struct igc_adapter *adapter)
adapter->ptp_caps.pin_config = adapter->sdp_config;
adapter->ptp_caps.n_ext_ts = IGC_N_EXTTS;
adapter->ptp_caps.n_per_out = IGC_N_PEROUT;
+ adapter->ptp_caps.supported_extts_flags = PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS;
adapter->ptp_caps.n_pins = IGC_N_SDP;
adapter->ptp_caps.verify = igc_ptp_verify_pin;
diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile
index b456d102655a..ce447540d146 100644
--- a/drivers/net/ethernet/intel/ixgbe/Makefile
+++ b/drivers/net/ethernet/intel/ixgbe/Makefile
@@ -4,12 +4,13 @@
# Makefile for the Intel(R) 10GbE PCI Express ethernet driver
#
+subdir-ccflags-y += -I$(src)
obj-$(CONFIG_IXGBE) += ixgbe.o
ixgbe-y := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \
ixgbe_82599.o ixgbe_82598.o ixgbe_phy.o ixgbe_sriov.o \
ixgbe_mbx.o ixgbe_x540.o ixgbe_x550.o ixgbe_lib.o ixgbe_ptp.o \
- ixgbe_xsk.o ixgbe_e610.o
+ ixgbe_xsk.o ixgbe_e610.o devlink/devlink.o ixgbe_fw_update.o
ixgbe-$(CONFIG_IXGBE_DCB) += ixgbe_dcb.o ixgbe_dcb_82598.o \
ixgbe_dcb_82599.o ixgbe_dcb_nl.o
diff --git a/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c
new file mode 100644
index 000000000000..54f1b83dfe42
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.c
@@ -0,0 +1,557 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025, Intel Corporation. */
+
+#include "ixgbe.h"
+#include "devlink.h"
+#include "ixgbe_fw_update.h"
+
+struct ixgbe_info_ctx {
+ char buf[128];
+ struct ixgbe_orom_info pending_orom;
+ struct ixgbe_nvm_info pending_nvm;
+ struct ixgbe_netlist_info pending_netlist;
+ struct ixgbe_hw_dev_caps dev_caps;
+};
+
+enum ixgbe_devlink_version_type {
+ IXGBE_DL_VERSION_RUNNING,
+ IXGBE_DL_VERSION_STORED
+};
+
+static void ixgbe_info_get_dsn(struct ixgbe_adapter *adapter,
+ struct ixgbe_info_ctx *ctx)
+{
+ u8 dsn[8];
+
+ /* Copy the DSN into an array in Big Endian format */
+ put_unaligned_be64(pci_get_dsn(adapter->pdev), dsn);
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "%8phD", dsn);
+}
+
+static void ixgbe_info_orom_ver(struct ixgbe_adapter *adapter,
+ struct ixgbe_info_ctx *ctx,
+ enum ixgbe_devlink_version_type type)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct ixgbe_nvm_version nvm_ver;
+
+ ctx->buf[0] = '\0';
+
+ if (hw->mac.type == ixgbe_mac_e610) {
+ struct ixgbe_orom_info *orom = &adapter->hw.flash.orom;
+
+ if (type == IXGBE_DL_VERSION_STORED &&
+ ctx->dev_caps.common_cap.nvm_update_pending_orom)
+ orom = &ctx->pending_orom;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
+ orom->major, orom->build, orom->patch);
+ return;
+ }
+
+ ixgbe_get_oem_prod_version(hw, &nvm_ver);
+ if (nvm_ver.oem_valid) {
+ snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x",
+ nvm_ver.oem_major, nvm_ver.oem_minor,
+ nvm_ver.oem_release);
+
+ return;
+ }
+
+ ixgbe_get_orom_version(hw, &nvm_ver);
+ if (nvm_ver.or_valid)
+ snprintf(ctx->buf, sizeof(ctx->buf), "%d.%d.%d",
+ nvm_ver.or_major, nvm_ver.or_build, nvm_ver.or_patch);
+}
+
+static void ixgbe_info_eetrack(struct ixgbe_adapter *adapter,
+ struct ixgbe_info_ctx *ctx,
+ enum ixgbe_devlink_version_type type)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct ixgbe_nvm_version nvm_ver;
+
+ if (hw->mac.type == ixgbe_mac_e610) {
+ u32 eetrack = hw->flash.nvm.eetrack;
+
+ if (type == IXGBE_DL_VERSION_STORED &&
+ ctx->dev_caps.common_cap.nvm_update_pending_nvm)
+ eetrack = ctx->pending_nvm.eetrack;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", eetrack);
+ return;
+ }
+
+ ixgbe_get_oem_prod_version(hw, &nvm_ver);
+
+ /* No ETRACK version for OEM */
+ if (nvm_ver.oem_valid) {
+ ctx->buf[0] = '\0';
+ return;
+ }
+
+ ixgbe_get_etk_id(hw, &nvm_ver);
+ snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm_ver.etk_id);
+}
+
+static void ixgbe_info_fw_api(struct ixgbe_adapter *adapter,
+ struct ixgbe_info_ctx *ctx)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u",
+ hw->api_maj_ver, hw->api_min_ver, hw->api_patch);
+}
+
+static void ixgbe_info_fw_build(struct ixgbe_adapter *adapter,
+ struct ixgbe_info_ctx *ctx)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", hw->fw_build);
+}
+
+static void ixgbe_info_fw_srev(struct ixgbe_adapter *adapter,
+ struct ixgbe_info_ctx *ctx,
+ enum ixgbe_devlink_version_type type)
+{
+ struct ixgbe_nvm_info *nvm = &adapter->hw.flash.nvm;
+
+ if (type == IXGBE_DL_VERSION_STORED &&
+ ctx->dev_caps.common_cap.nvm_update_pending_nvm)
+ nvm = &ctx->pending_nvm;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "%u", nvm->srev);
+}
+
+static void ixgbe_info_orom_srev(struct ixgbe_adapter *adapter,
+ struct ixgbe_info_ctx *ctx,
+ enum ixgbe_devlink_version_type type)
+{
+ struct ixgbe_orom_info *orom = &adapter->hw.flash.orom;
+
+ if (type == IXGBE_DL_VERSION_STORED &&
+ ctx->dev_caps.common_cap.nvm_update_pending_orom)
+ orom = &ctx->pending_orom;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "%u", orom->srev);
+}
+
+static void ixgbe_info_nvm_ver(struct ixgbe_adapter *adapter,
+ struct ixgbe_info_ctx *ctx,
+ enum ixgbe_devlink_version_type type)
+{
+ struct ixgbe_nvm_info *nvm = &adapter->hw.flash.nvm;
+
+ if (type == IXGBE_DL_VERSION_STORED &&
+ ctx->dev_caps.common_cap.nvm_update_pending_nvm)
+ nvm = &ctx->pending_nvm;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", nvm->major, nvm->minor);
+}
+
+static void ixgbe_info_netlist_ver(struct ixgbe_adapter *adapter,
+ struct ixgbe_info_ctx *ctx,
+ enum ixgbe_devlink_version_type type)
+{
+ struct ixgbe_netlist_info *netlist = &adapter->hw.flash.netlist;
+
+ if (type == IXGBE_DL_VERSION_STORED &&
+ ctx->dev_caps.common_cap.nvm_update_pending_netlist)
+ netlist = &ctx->pending_netlist;
+
+ /* The netlist version fields are BCD formatted */
+ snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x",
+ netlist->major, netlist->minor,
+ netlist->type >> 16, netlist->type & 0xFFFF,
+ netlist->rev, netlist->cust_ver);
+}
+
+static void ixgbe_info_netlist_build(struct ixgbe_adapter *adapter,
+ struct ixgbe_info_ctx *ctx,
+ enum ixgbe_devlink_version_type type)
+{
+ struct ixgbe_netlist_info *netlist = &adapter->hw.flash.netlist;
+
+ if (type == IXGBE_DL_VERSION_STORED &&
+ ctx->dev_caps.common_cap.nvm_update_pending_netlist)
+ netlist = &ctx->pending_netlist;
+
+ snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash);
+}
+
+static int ixgbe_set_ctx_dev_caps(struct ixgbe_hw *hw,
+ struct ixgbe_info_ctx *ctx,
+ struct netlink_ext_ack *extack)
+{
+ bool *pending_orom, *pending_nvm, *pending_netlist;
+ int err;
+
+ err = ixgbe_discover_dev_caps(hw, &ctx->dev_caps);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unable to discover device capabilities");
+ return err;
+ }
+
+ pending_orom = &ctx->dev_caps.common_cap.nvm_update_pending_orom;
+ pending_nvm = &ctx->dev_caps.common_cap.nvm_update_pending_nvm;
+ pending_netlist = &ctx->dev_caps.common_cap.nvm_update_pending_netlist;
+
+ if (*pending_orom) {
+ err = ixgbe_get_inactive_orom_ver(hw, &ctx->pending_orom);
+ if (err)
+ *pending_orom = false;
+ }
+
+ if (*pending_nvm) {
+ err = ixgbe_get_inactive_nvm_ver(hw, &ctx->pending_nvm);
+ if (err)
+ *pending_nvm = false;
+ }
+
+ if (*pending_netlist) {
+ err = ixgbe_get_inactive_netlist_ver(hw, &ctx->pending_netlist);
+ if (err)
+ *pending_netlist = false;
+ }
+
+ return 0;
+}
+
+static int ixgbe_devlink_info_get_e610(struct ixgbe_adapter *adapter,
+ struct devlink_info_req *req,
+ struct ixgbe_info_ctx *ctx)
+{
+ int err;
+
+ ixgbe_info_fw_api(adapter, ctx);
+ err = devlink_info_version_running_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW_MGMT_API,
+ ctx->buf);
+ if (err)
+ return err;
+
+ ixgbe_info_fw_build(adapter, ctx);
+ err = devlink_info_version_running_put(req, "fw.mgmt.build", ctx->buf);
+ if (err)
+ return err;
+
+ ixgbe_info_fw_srev(adapter, ctx, IXGBE_DL_VERSION_RUNNING);
+ err = devlink_info_version_running_put(req, "fw.mgmt.srev", ctx->buf);
+ if (err)
+ return err;
+
+ ixgbe_info_orom_srev(adapter, ctx, IXGBE_DL_VERSION_RUNNING);
+ err = devlink_info_version_running_put(req, "fw.undi.srev", ctx->buf);
+ if (err)
+ return err;
+
+ ixgbe_info_nvm_ver(adapter, ctx, IXGBE_DL_VERSION_RUNNING);
+ err = devlink_info_version_running_put(req, "fw.psid.api", ctx->buf);
+ if (err)
+ return err;
+
+ ixgbe_info_netlist_ver(adapter, ctx, IXGBE_DL_VERSION_RUNNING);
+ err = devlink_info_version_running_put(req, "fw.netlist", ctx->buf);
+ if (err)
+ return err;
+
+ ixgbe_info_netlist_build(adapter, ctx, IXGBE_DL_VERSION_RUNNING);
+ return devlink_info_version_running_put(req, "fw.netlist.build",
+ ctx->buf);
+}
+
+static int
+ixgbe_devlink_pending_info_get_e610(struct ixgbe_adapter *adapter,
+ struct devlink_info_req *req,
+ struct ixgbe_info_ctx *ctx)
+{
+ int err;
+
+ ixgbe_info_orom_ver(adapter, ctx, IXGBE_DL_VERSION_STORED);
+ err = devlink_info_version_stored_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW_UNDI,
+ ctx->buf);
+ if (err)
+ return err;
+
+ ixgbe_info_eetrack(adapter, ctx, IXGBE_DL_VERSION_STORED);
+ err = devlink_info_version_stored_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID,
+ ctx->buf);
+ if (err)
+ return err;
+
+ ixgbe_info_fw_srev(adapter, ctx, IXGBE_DL_VERSION_STORED);
+ err = devlink_info_version_stored_put(req, "fw.mgmt.srev", ctx->buf);
+ if (err)
+ return err;
+
+ ixgbe_info_orom_srev(adapter, ctx, IXGBE_DL_VERSION_STORED);
+ err = devlink_info_version_stored_put(req, "fw.undi.srev", ctx->buf);
+ if (err)
+ return err;
+
+ ixgbe_info_nvm_ver(adapter, ctx, IXGBE_DL_VERSION_STORED);
+ err = devlink_info_version_stored_put(req, "fw.psid.api", ctx->buf);
+ if (err)
+ return err;
+
+ ixgbe_info_netlist_ver(adapter, ctx, IXGBE_DL_VERSION_STORED);
+ err = devlink_info_version_stored_put(req, "fw.netlist", ctx->buf);
+ if (err)
+ return err;
+
+ ixgbe_info_netlist_build(adapter, ctx, IXGBE_DL_VERSION_STORED);
+ return devlink_info_version_stored_put(req, "fw.netlist.build",
+ ctx->buf);
+}
+
+static int ixgbe_devlink_info_get(struct devlink *devlink,
+ struct devlink_info_req *req,
+ struct netlink_ext_ack *extack)
+{
+ struct ixgbe_adapter *adapter = devlink_priv(devlink);
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct ixgbe_info_ctx *ctx;
+ int err;
+
+ ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (hw->mac.type == ixgbe_mac_e610)
+ ixgbe_refresh_fw_version(adapter);
+
+ ixgbe_info_get_dsn(adapter, ctx);
+ err = devlink_info_serial_number_put(req, ctx->buf);
+ if (err)
+ goto free_ctx;
+
+ err = hw->eeprom.ops.read_pba_string(hw, ctx->buf, sizeof(ctx->buf));
+ if (err)
+ goto free_ctx;
+
+ err = devlink_info_version_fixed_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_BOARD_ID,
+ ctx->buf);
+ if (err)
+ goto free_ctx;
+
+ ixgbe_info_orom_ver(adapter, ctx, IXGBE_DL_VERSION_RUNNING);
+ err = devlink_info_version_running_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW_UNDI,
+ ctx->buf);
+ if (err)
+ goto free_ctx;
+
+ ixgbe_info_eetrack(adapter, ctx, IXGBE_DL_VERSION_RUNNING);
+ err = devlink_info_version_running_put(req,
+ DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID,
+ ctx->buf);
+ if (err || hw->mac.type != ixgbe_mac_e610)
+ goto free_ctx;
+
+ err = ixgbe_set_ctx_dev_caps(hw, ctx, extack);
+ if (err)
+ goto free_ctx;
+
+ err = ixgbe_devlink_info_get_e610(adapter, req, ctx);
+ if (err)
+ goto free_ctx;
+
+ err = ixgbe_devlink_pending_info_get_e610(adapter, req, ctx);
+free_ctx:
+ kfree(ctx);
+ return err;
+}
+
+/**
+ * ixgbe_devlink_reload_empr_start - Start EMP reset to activate new firmware
+ * @devlink: pointer to the devlink instance to reload
+ * @netns_change: if true, the network namespace is changing
+ * @action: the action to perform. Must be DEVLINK_RELOAD_ACTION_FW_ACTIVATE
+ * @limit: limits on what reload should do, such as not resetting
+ * @extack: netlink extended ACK structure
+ *
+ * Allow user to activate new Embedded Management Processor firmware by
+ * issuing device specific EMP reset. Called in response to
+ * a DEVLINK_CMD_RELOAD with the DEVLINK_RELOAD_ACTION_FW_ACTIVATE.
+ *
+ * Note that teardown and rebuild of the driver state happens automatically as
+ * part of an interrupt and watchdog task. This is because all physical
+ * functions on the device must be able to reset when an EMP reset occurs from
+ * any source.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_devlink_reload_empr_start(struct devlink *devlink,
+ bool netns_change,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ struct netlink_ext_ack *extack)
+{
+ struct ixgbe_adapter *adapter = devlink_priv(devlink);
+ struct ixgbe_hw *hw = &adapter->hw;
+ u8 pending;
+ int err;
+
+ if (hw->mac.type != ixgbe_mac_e610)
+ return -EOPNOTSUPP;
+
+ err = ixgbe_get_pending_updates(adapter, &pending, extack);
+ if (err)
+ return err;
+
+ /* Pending is a bitmask of which flash banks have a pending update,
+ * including the main NVM bank, the Option ROM bank, and the netlist
+ * bank. If any of these bits are set, then there is a pending update
+ * waiting to be activated.
+ */
+ if (!pending) {
+ NL_SET_ERR_MSG_MOD(extack, "No pending firmware update");
+ return -ECANCELED;
+ }
+
+ if (adapter->fw_emp_reset_disabled) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "EMP reset is not available. To activate firmware, a reboot or power cycle is needed");
+ return -ECANCELED;
+ }
+
+ err = ixgbe_aci_nvm_update_empr(hw);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to trigger EMP device reset to reload firmware");
+
+ return err;
+}
+
+/*Wait for 10 sec with 0.5 sec tic. EMPR takes no less than half of a sec */
+#define IXGBE_DEVLINK_RELOAD_TIMEOUT_SEC 20
+
+/**
+ * ixgbe_devlink_reload_empr_finish - finishes EMP reset
+ * @devlink: pointer to the devlink instance
+ * @action: the action to perform.
+ * @limit: limits on what reload should do
+ * @actions_performed: actions performed
+ * @extack: netlink extended ACK structure
+ *
+ * Wait for new NVM to be loaded during EMP reset.
+ *
+ * Return: -ETIME when timer is exceeded, 0 on success.
+ */
+static int ixgbe_devlink_reload_empr_finish(struct devlink *devlink,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ u32 *actions_performed,
+ struct netlink_ext_ack *extack)
+{
+ struct ixgbe_adapter *adapter = devlink_priv(devlink);
+ struct ixgbe_hw *hw = &adapter->hw;
+ int i = 0;
+ u32 fwsm;
+
+ do {
+ /* Just right away after triggering EMP reset the FWSM register
+ * may be not cleared yet, so begin the loop with the delay
+ * in order to not check the not updated register.
+ */
+ mdelay(500);
+
+ fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
+
+ if (i++ >= IXGBE_DEVLINK_RELOAD_TIMEOUT_SEC)
+ return -ETIME;
+
+ } while (!(fwsm & IXGBE_FWSM_FW_VAL_BIT));
+
+ *actions_performed = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE);
+
+ adapter->flags2 &= ~(IXGBE_FLAG2_API_MISMATCH |
+ IXGBE_FLAG2_FW_ROLLBACK);
+
+ return 0;
+}
+
+static const struct devlink_ops ixgbe_devlink_ops = {
+ .info_get = ixgbe_devlink_info_get,
+ .supported_flash_update_params =
+ DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK,
+ .flash_update = ixgbe_flash_pldm_image,
+ .reload_actions = BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE),
+ .reload_down = ixgbe_devlink_reload_empr_start,
+ .reload_up = ixgbe_devlink_reload_empr_finish,
+};
+
+/**
+ * ixgbe_allocate_devlink - Allocate devlink instance
+ * @dev: device to allocate devlink for
+ *
+ * Allocate a devlink instance for this physical function.
+ *
+ * Return: pointer to the device adapter structure on success,
+ * ERR_PTR(-ENOMEM) when allocation failed.
+ */
+struct ixgbe_adapter *ixgbe_allocate_devlink(struct device *dev)
+{
+ struct ixgbe_adapter *adapter;
+ struct devlink *devlink;
+
+ devlink = devlink_alloc(&ixgbe_devlink_ops, sizeof(*adapter), dev);
+ if (!devlink)
+ return ERR_PTR(-ENOMEM);
+
+ adapter = devlink_priv(devlink);
+ adapter->devlink = devlink;
+
+ return adapter;
+}
+
+/**
+ * ixgbe_devlink_set_switch_id - Set unique switch ID based on PCI DSN
+ * @adapter: pointer to the device adapter structure
+ * @ppid: struct with switch id information
+ */
+static void ixgbe_devlink_set_switch_id(struct ixgbe_adapter *adapter,
+ struct netdev_phys_item_id *ppid)
+{
+ u64 id = pci_get_dsn(adapter->pdev);
+
+ ppid->id_len = sizeof(id);
+ put_unaligned_be64(id, &ppid->id);
+}
+
+/**
+ * ixgbe_devlink_register_port - Register devlink port
+ * @adapter: pointer to the device adapter structure
+ *
+ * Create and register a devlink_port for this physical function.
+ *
+ * Return: 0 on success, error code on failure.
+ */
+int ixgbe_devlink_register_port(struct ixgbe_adapter *adapter)
+{
+ struct devlink_port *devlink_port = &adapter->devlink_port;
+ struct devlink *devlink = adapter->devlink;
+ struct device *dev = &adapter->pdev->dev;
+ struct devlink_port_attrs attrs = {};
+ int err;
+
+ attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+ attrs.phys.port_number = adapter->hw.bus.func;
+ ixgbe_devlink_set_switch_id(adapter, &attrs.switch_id);
+
+ devlink_port_attrs_set(devlink_port, &attrs);
+
+ err = devl_port_register(devlink, devlink_port, 0);
+ if (err) {
+ dev_err(dev,
+ "devlink port registration failed, err %d\n", err);
+ }
+
+ return err;
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/devlink/devlink.h b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.h
new file mode 100644
index 000000000000..0b27653a3407
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/devlink/devlink.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2025, Intel Corporation. */
+
+#ifndef _IXGBE_DEVLINK_H_
+#define _IXGBE_DEVLINK_H_
+
+struct ixgbe_adapter *ixgbe_allocate_devlink(struct device *dev);
+int ixgbe_devlink_register_port(struct ixgbe_adapter *adapter);
+
+#endif /* _IXGBE_DEVLINK_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index e6a380d4929b..23c2e2c2649c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -17,6 +17,8 @@
#include <linux/net_tstamp.h>
#include <linux/ptp_clock_kernel.h>
+#include <net/devlink.h>
+
#include "ixgbe_type.h"
#include "ixgbe_common.h"
#include "ixgbe_dcb.h"
@@ -612,6 +614,8 @@ struct ixgbe_adapter {
struct bpf_prog *xdp_prog;
struct pci_dev *pdev;
struct mii_bus *mii_bus;
+ struct devlink *devlink;
+ struct devlink_port devlink_port;
unsigned long state;
@@ -667,6 +671,8 @@ struct ixgbe_adapter {
#define IXGBE_FLAG2_PHY_FW_LOAD_FAILED BIT(20)
#define IXGBE_FLAG2_NO_MEDIA BIT(21)
#define IXGBE_FLAG2_MOD_POWER_UNSUPPORTED BIT(22)
+#define IXGBE_FLAG2_API_MISMATCH BIT(23)
+#define IXGBE_FLAG2_FW_ROLLBACK BIT(24)
/* Tx fast path data */
int num_tx_queues;
@@ -755,6 +761,8 @@ struct ixgbe_adapter {
u32 atr_sample_rate;
spinlock_t fdir_perfect_lock;
+ bool fw_emp_reset_disabled;
+
#ifdef IXGBE_FCOE
struct ixgbe_fcoe fcoe;
#endif /* IXGBE_FCOE */
@@ -830,6 +838,17 @@ struct ixgbe_adapter {
spinlock_t vfs_lock;
};
+struct ixgbe_netdevice_priv {
+ struct ixgbe_adapter *adapter;
+};
+
+static inline struct ixgbe_adapter *ixgbe_from_netdev(struct net_device *netdev)
+{
+ struct ixgbe_netdevice_priv *priv = netdev_priv(netdev);
+
+ return priv->adapter;
+}
+
static inline int ixgbe_determine_xdp_q_idx(int cpu)
{
if (static_key_enabled(&ixgbe_xdp_locking_key))
@@ -945,6 +964,8 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter);
int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter);
bool ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id,
u16 subdevice_id);
+void ixgbe_set_fw_version_e610(struct ixgbe_adapter *adapter);
+void ixgbe_refresh_fw_version(struct ixgbe_adapter *adapter);
#ifdef CONFIG_PCI_IOV
void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter);
#endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index 4aaaea3b5f8f..444da982593f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -1169,6 +1169,7 @@ static const struct ixgbe_eeprom_operations eeprom_ops_82598 = {
.calc_checksum = &ixgbe_calc_eeprom_checksum_generic,
.validate_checksum = &ixgbe_validate_eeprom_checksum_generic,
.update_checksum = &ixgbe_update_eeprom_checksum_generic,
+ .read_pba_string = &ixgbe_read_pba_string_generic,
};
static const struct ixgbe_phy_operations phy_ops_82598 = {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index 964988b4d58b..d5b1b974b4a3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -2230,6 +2230,7 @@ static const struct ixgbe_eeprom_operations eeprom_ops_82599 = {
.calc_checksum = &ixgbe_calc_eeprom_checksum_generic,
.validate_checksum = &ixgbe_validate_eeprom_checksum_generic,
.update_checksum = &ixgbe_update_eeprom_checksum_generic,
+ .read_pba_string = &ixgbe_read_pba_string_generic,
};
static const struct ixgbe_phy_operations phy_ops_82599 = {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 7beaf6ea57f9..5784d5d1896e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -332,6 +332,7 @@ int ixgbe_start_hw_generic(struct ixgbe_hw *hw)
* Devices in the second generation:
* 82599
* X540
+ * E610
**/
int ixgbe_start_hw_gen2(struct ixgbe_hw *hw)
{
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
index 19d6b6fa8fb3..3dd5a16a14df 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
@@ -118,14 +118,14 @@ static int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max)
static u8 ixgbe_dcbnl_get_state(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
return !!(adapter->flags & IXGBE_FLAG_DCB_ENABLED);
}
static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
/* Fail command if not in CEE mode */
if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE))
@@ -142,7 +142,7 @@ static u8 ixgbe_dcbnl_set_state(struct net_device *netdev, u8 state)
static void ixgbe_dcbnl_get_perm_hw_addr(struct net_device *netdev,
u8 *perm_addr)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
int i, j;
memset(perm_addr, 0xff, MAX_ADDR_LEN);
@@ -167,7 +167,7 @@ static void ixgbe_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
u8 prio, u8 bwg_id, u8 bw_pct,
u8 up_map)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
if (prio != DCB_ATTR_VALUE_UNDEFINED)
adapter->temp_dcb_cfg.tc_config[tc].path[0].prio_type = prio;
@@ -184,7 +184,7 @@ static void ixgbe_dcbnl_set_pg_tc_cfg_tx(struct net_device *netdev, int tc,
static void ixgbe_dcbnl_set_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
u8 bw_pct)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
adapter->temp_dcb_cfg.bw_percentage[0][bwg_id] = bw_pct;
}
@@ -193,7 +193,7 @@ static void ixgbe_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, int tc,
u8 prio, u8 bwg_id, u8 bw_pct,
u8 up_map)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
if (prio != DCB_ATTR_VALUE_UNDEFINED)
adapter->temp_dcb_cfg.tc_config[tc].path[1].prio_type = prio;
@@ -210,7 +210,7 @@ static void ixgbe_dcbnl_set_pg_tc_cfg_rx(struct net_device *netdev, int tc,
static void ixgbe_dcbnl_set_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
u8 bw_pct)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
adapter->temp_dcb_cfg.bw_percentage[1][bwg_id] = bw_pct;
}
@@ -219,7 +219,7 @@ static void ixgbe_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int tc,
u8 *prio, u8 *bwg_id, u8 *bw_pct,
u8 *up_map)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
*prio = adapter->dcb_cfg.tc_config[tc].path[0].prio_type;
*bwg_id = adapter->dcb_cfg.tc_config[tc].path[0].bwg_id;
@@ -230,7 +230,7 @@ static void ixgbe_dcbnl_get_pg_tc_cfg_tx(struct net_device *netdev, int tc,
static void ixgbe_dcbnl_get_pg_bwg_cfg_tx(struct net_device *netdev, int bwg_id,
u8 *bw_pct)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
*bw_pct = adapter->dcb_cfg.bw_percentage[0][bwg_id];
}
@@ -239,7 +239,7 @@ static void ixgbe_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int tc,
u8 *prio, u8 *bwg_id, u8 *bw_pct,
u8 *up_map)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
*prio = adapter->dcb_cfg.tc_config[tc].path[1].prio_type;
*bwg_id = adapter->dcb_cfg.tc_config[tc].path[1].bwg_id;
@@ -250,7 +250,7 @@ static void ixgbe_dcbnl_get_pg_tc_cfg_rx(struct net_device *netdev, int tc,
static void ixgbe_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
u8 *bw_pct)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
*bw_pct = adapter->dcb_cfg.bw_percentage[1][bwg_id];
}
@@ -258,7 +258,7 @@ static void ixgbe_dcbnl_get_pg_bwg_cfg_rx(struct net_device *netdev, int bwg_id,
static void ixgbe_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
u8 setting)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc = setting;
if (adapter->temp_dcb_cfg.tc_config[priority].dcb_pfc !=
@@ -269,14 +269,14 @@ static void ixgbe_dcbnl_set_pfc_cfg(struct net_device *netdev, int priority,
static void ixgbe_dcbnl_get_pfc_cfg(struct net_device *netdev, int priority,
u8 *setting)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
*setting = adapter->dcb_cfg.tc_config[priority].dcb_pfc;
}
static void ixgbe_dcbnl_devreset(struct net_device *dev)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
usleep_range(1000, 2000);
@@ -295,7 +295,7 @@ static void ixgbe_dcbnl_devreset(struct net_device *dev)
static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_dcb_config *dcb_cfg = &adapter->dcb_cfg;
struct ixgbe_hw *hw = &adapter->hw;
int ret = DCB_NO_HW_CHG;
@@ -383,7 +383,7 @@ static u8 ixgbe_dcbnl_set_all(struct net_device *netdev)
static u8 ixgbe_dcbnl_getcap(struct net_device *netdev, int capid, u8 *cap)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
switch (capid) {
case DCB_CAP_ATTR_PG:
@@ -420,7 +420,7 @@ static u8 ixgbe_dcbnl_getcap(struct net_device *netdev, int capid, u8 *cap)
static int ixgbe_dcbnl_getnumtcs(struct net_device *netdev, int tcid, u8 *num)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
switch (tcid) {
@@ -447,14 +447,14 @@ static int ixgbe_dcbnl_setnumtcs(struct net_device *netdev, int tcid, u8 num)
static u8 ixgbe_dcbnl_getpfcstate(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
return adapter->dcb_cfg.pfc_mode_enable;
}
static void ixgbe_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
adapter->temp_dcb_cfg.pfc_mode_enable = state;
}
@@ -471,7 +471,7 @@ static void ixgbe_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
*/
static int ixgbe_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct dcb_app app = {
.selector = idtype,
.protocol = id,
@@ -486,7 +486,7 @@ static int ixgbe_dcbnl_getapp(struct net_device *netdev, u8 idtype, u16 id)
static int ixgbe_dcbnl_ieee_getets(struct net_device *dev,
struct ieee_ets *ets)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ieee_ets *my_ets = adapter->ixgbe_ieee_ets;
ets->ets_cap = adapter->dcb_cfg.num_tcs.pg_tcs;
@@ -506,7 +506,7 @@ static int ixgbe_dcbnl_ieee_getets(struct net_device *dev,
static int ixgbe_dcbnl_ieee_setets(struct net_device *dev,
struct ieee_ets *ets)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
int max_frame = dev->mtu + ETH_HLEN + ETH_FCS_LEN;
int i, err;
__u8 max_tc = 0;
@@ -559,7 +559,7 @@ static int ixgbe_dcbnl_ieee_setets(struct net_device *dev,
static int ixgbe_dcbnl_ieee_getpfc(struct net_device *dev,
struct ieee_pfc *pfc)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ieee_pfc *my_pfc = adapter->ixgbe_ieee_pfc;
int i;
@@ -584,7 +584,7 @@ static int ixgbe_dcbnl_ieee_getpfc(struct net_device *dev,
static int ixgbe_dcbnl_ieee_setpfc(struct net_device *dev,
struct ieee_pfc *pfc)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ixgbe_hw *hw = &adapter->hw;
u8 *prio_tc;
int err;
@@ -616,7 +616,7 @@ static int ixgbe_dcbnl_ieee_setpfc(struct net_device *dev,
static int ixgbe_dcbnl_ieee_setapp(struct net_device *dev,
struct dcb_app *app)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
int err;
if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
@@ -661,7 +661,7 @@ static int ixgbe_dcbnl_ieee_setapp(struct net_device *dev,
static int ixgbe_dcbnl_ieee_delapp(struct net_device *dev,
struct dcb_app *app)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
int err;
if (!(adapter->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
@@ -705,13 +705,13 @@ static int ixgbe_dcbnl_ieee_delapp(struct net_device *dev,
static u8 ixgbe_dcbnl_getdcbx(struct net_device *dev)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
return adapter->dcbx_cap;
}
static u8 ixgbe_dcbnl_setdcbx(struct net_device *dev, u8 mode)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ieee_ets ets = {0};
struct ieee_pfc pfc = {0};
int err = 0;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c
index 00935747c8c5..9ada35f7d8f7 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c
@@ -344,6 +344,40 @@ void ixgbe_fill_dflt_direct_cmd_desc(struct ixgbe_aci_desc *desc, u16 opcode)
}
/**
+ * ixgbe_aci_get_fw_ver - Get the firmware version
+ * @hw: pointer to the HW struct
+ *
+ * Get the firmware version using ACI command (0x0001).
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_aci_get_fw_ver(struct ixgbe_hw *hw)
+{
+ struct ixgbe_aci_cmd_get_ver *resp;
+ struct ixgbe_aci_desc desc;
+ int err;
+
+ resp = &desc.params.get_ver;
+
+ ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_ver);
+
+ err = ixgbe_aci_send_cmd(hw, &desc, NULL, 0);
+ if (!err) {
+ hw->fw_branch = resp->fw_branch;
+ hw->fw_maj_ver = resp->fw_major;
+ hw->fw_min_ver = resp->fw_minor;
+ hw->fw_patch = resp->fw_patch;
+ hw->fw_build = le32_to_cpu(resp->fw_build);
+ hw->api_branch = resp->api_branch;
+ hw->api_maj_ver = resp->api_major;
+ hw->api_min_ver = resp->api_minor;
+ hw->api_patch = resp->api_patch;
+ }
+
+ return err;
+}
+
+/**
* ixgbe_aci_req_res - request a common resource
* @hw: pointer to the HW struct
* @res: resource ID
@@ -554,6 +588,20 @@ static bool ixgbe_parse_e610_caps(struct ixgbe_hw *hw,
break;
case IXGBE_ACI_CAPS_NVM_VER:
break;
+ case IXGBE_ACI_CAPS_PENDING_NVM_VER:
+ caps->nvm_update_pending_nvm = true;
+ break;
+ case IXGBE_ACI_CAPS_PENDING_OROM_VER:
+ caps->nvm_update_pending_orom = true;
+ break;
+ case IXGBE_ACI_CAPS_PENDING_NET_VER:
+ caps->nvm_update_pending_netlist = true;
+ break;
+ case IXGBE_ACI_CAPS_NVM_MGMT:
+ caps->nvm_unified_update =
+ (number & IXGBE_NVM_MGMT_UNIFIED_UPD_SUPPORT) ?
+ true : false;
+ break;
case IXGBE_ACI_CAPS_MAX_MTU:
caps->max_mtu = number;
break;
@@ -1411,6 +1459,32 @@ int ixgbe_configure_lse(struct ixgbe_hw *hw, bool activate, u16 mask)
}
/**
+ * ixgbe_start_hw_e610 - Prepare hardware for Tx/Rx
+ * @hw: pointer to hardware structure
+ *
+ * Get firmware version and start the hardware using the generic
+ * start_hw() and ixgbe_start_hw_gen2() functions.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_start_hw_e610(struct ixgbe_hw *hw)
+{
+ int err;
+
+ err = ixgbe_aci_get_fw_ver(hw);
+ if (err)
+ return err;
+
+ err = ixgbe_start_hw_generic(hw);
+ if (err)
+ return err;
+
+ ixgbe_start_hw_gen2(hw);
+
+ return 0;
+}
+
+/**
* ixgbe_get_media_type_e610 - Gets media type
* @hw: pointer to the HW struct
*
@@ -1743,6 +1817,38 @@ void ixgbe_disable_rx_e610(struct ixgbe_hw *hw)
}
/**
+ * ixgbe_fw_recovery_mode_e610 - Check FW NVM recovery mode
+ * @hw: pointer to hardware structure
+ *
+ * Check FW NVM recovery mode by reading the value of
+ * the dedicated register.
+ *
+ * Return: true if FW is in recovery mode, otherwise false.
+ */
+static bool ixgbe_fw_recovery_mode_e610(struct ixgbe_hw *hw)
+{
+ u32 fwsm = IXGBE_READ_REG(hw, IXGBE_GL_MNG_FWSM);
+
+ return !!(fwsm & IXGBE_GL_MNG_FWSM_RECOVERY_M);
+}
+
+/**
+ * ixgbe_fw_rollback_mode_e610 - Check FW NVM rollback mode
+ * @hw: pointer to hardware structure
+ *
+ * Check FW NVM rollback mode by reading the value of
+ * the dedicated register.
+ *
+ * Return: true if FW is in rollback mode, otherwise false.
+ */
+static bool ixgbe_fw_rollback_mode_e610(struct ixgbe_hw *hw)
+{
+ u32 fwsm = IXGBE_READ_REG(hw, IXGBE_GL_MNG_FWSM);
+
+ return !!(fwsm & IXGBE_GL_MNG_FWSM_ROLLBACK_M);
+}
+
+/**
* ixgbe_init_phy_ops_e610 - PHY specific init
* @hw: pointer to hardware structure
*
@@ -2226,6 +2332,131 @@ int ixgbe_aci_read_nvm(struct ixgbe_hw *hw, u16 module_typeid, u32 offset,
}
/**
+ * ixgbe_aci_erase_nvm - erase NVM sector
+ * @hw: pointer to the HW struct
+ * @module_typeid: module pointer location in words from the NVM beginning
+ *
+ * Erase the NVM sector using the ACI command (0x0702).
+ *
+ * Return: the exit code of the operation.
+ */
+int ixgbe_aci_erase_nvm(struct ixgbe_hw *hw, u16 module_typeid)
+{
+ struct ixgbe_aci_cmd_nvm *cmd;
+ struct ixgbe_aci_desc desc;
+ __le16 len;
+ int err;
+
+ /* Read a length value from SR, so module_typeid is equal to 0,
+ * calculate offset where module size is placed from bytes to words
+ * set last command and read from SR values to true.
+ */
+ err = ixgbe_aci_read_nvm(hw, 0, 2 * module_typeid + 2, 2, &len, true,
+ true);
+ if (err)
+ return err;
+
+ cmd = &desc.params.nvm;
+
+ ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_erase);
+
+ cmd->module_typeid = cpu_to_le16(module_typeid);
+ cmd->length = len;
+ cmd->offset_low = 0;
+ cmd->offset_high = 0;
+
+ return ixgbe_aci_send_cmd(hw, &desc, NULL, 0);
+}
+
+/**
+ * ixgbe_aci_update_nvm - update NVM
+ * @hw: pointer to the HW struct
+ * @module_typeid: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be written (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @command_flags: command parameters
+ *
+ * Update the NVM using the ACI command (0x0703).
+ *
+ * Return: the exit code of the operation.
+ */
+int ixgbe_aci_update_nvm(struct ixgbe_hw *hw, u16 module_typeid,
+ u32 offset, u16 length, void *data,
+ bool last_command, u8 command_flags)
+{
+ struct ixgbe_aci_cmd_nvm *cmd;
+ struct ixgbe_aci_desc desc;
+
+ cmd = &desc.params.nvm;
+
+ /* In offset the highest byte must be zeroed. */
+ if (offset & 0xFF000000)
+ return -EINVAL;
+
+ ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_write);
+
+ cmd->cmd_flags |= command_flags;
+
+ /* If this is the last command in a series, set the proper flag. */
+ if (last_command)
+ cmd->cmd_flags |= IXGBE_ACI_NVM_LAST_CMD;
+ cmd->module_typeid = cpu_to_le16(module_typeid);
+ cmd->offset_low = cpu_to_le16(offset & 0xFFFF);
+ cmd->offset_high = FIELD_GET(IXGBE_ACI_NVM_OFFSET_HI_U_MASK, offset);
+ cmd->length = cpu_to_le16(length);
+
+ desc.flags |= cpu_to_le16(IXGBE_ACI_FLAG_RD);
+
+ return ixgbe_aci_send_cmd(hw, &desc, data, length);
+}
+
+/**
+ * ixgbe_nvm_write_activate - NVM activate write
+ * @hw: pointer to the HW struct
+ * @cmd_flags: flags for write activate command
+ * @response_flags: response indicators from firmware
+ *
+ * Update the control word with the required banks' validity bits
+ * and dumps the Shadow RAM to flash using ACI command (0x0707).
+ *
+ * cmd_flags controls which banks to activate, the preservation level to use
+ * when activating the NVM bank, and whether an EMP reset is required for
+ * activation.
+ *
+ * Note that the 16bit cmd_flags value is split between two separate 1 byte
+ * flag values in the descriptor.
+ *
+ * On successful return of the firmware command, the response_flags variable
+ * is updated with the flags reported by firmware indicating certain status,
+ * such as whether EMP reset is enabled.
+ *
+ * Return: the exit code of the operation.
+ */
+int ixgbe_nvm_write_activate(struct ixgbe_hw *hw, u16 cmd_flags,
+ u8 *response_flags)
+{
+ struct ixgbe_aci_cmd_nvm *cmd;
+ struct ixgbe_aci_desc desc;
+ s32 err;
+
+ cmd = &desc.params.nvm;
+ ixgbe_fill_dflt_direct_cmd_desc(&desc,
+ ixgbe_aci_opc_nvm_write_activate);
+
+ cmd->cmd_flags = (u8)(cmd_flags & 0xFF);
+ cmd->offset_high = (u8)FIELD_GET(IXGBE_ACI_NVM_OFFSET_HI_A_MASK,
+ cmd_flags);
+
+ err = ixgbe_aci_send_cmd(hw, &desc, NULL, 0);
+ if (!err && response_flags)
+ *response_flags = cmd->cmd_flags;
+
+ return err;
+}
+
+/**
* ixgbe_nvm_validate_checksum - validate checksum
* @hw: pointer to the HW struct
*
@@ -2267,6 +2498,955 @@ int ixgbe_nvm_validate_checksum(struct ixgbe_hw *hw)
}
/**
+ * ixgbe_discover_flash_size - Discover the available flash size
+ * @hw: pointer to the HW struct
+ *
+ * The device flash could be up to 16MB in size. However, it is possible that
+ * the actual size is smaller. Use bisection to determine the accessible size
+ * of flash memory.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_discover_flash_size(struct ixgbe_hw *hw)
+{
+ u32 min_size = 0, max_size = IXGBE_ACI_NVM_MAX_OFFSET + 1;
+ int err;
+
+ err = ixgbe_acquire_nvm(hw, IXGBE_RES_READ);
+ if (err)
+ return err;
+
+ while ((max_size - min_size) > 1) {
+ u32 offset = (max_size + min_size) / 2;
+ u32 len = 1;
+ u8 data;
+
+ err = ixgbe_read_flat_nvm(hw, offset, &len, &data, false);
+ if (err == -EIO &&
+ hw->aci.last_status == IXGBE_ACI_RC_EINVAL) {
+ err = 0;
+ max_size = offset;
+ } else if (!err) {
+ min_size = offset;
+ } else {
+ /* an unexpected error occurred */
+ goto err_read_flat_nvm;
+ }
+ }
+
+ hw->flash.flash_size = max_size;
+
+err_read_flat_nvm:
+ ixgbe_release_nvm(hw);
+
+ return err;
+}
+
+/**
+ * ixgbe_read_sr_base_address - Read the value of a Shadow RAM pointer word
+ * @hw: pointer to the HW structure
+ * @offset: the word offset of the Shadow RAM word to read
+ * @pointer: pointer value read from Shadow RAM
+ *
+ * Read the given Shadow RAM word, and convert it to a pointer value specified
+ * in bytes. This function assumes the specified offset is a valid pointer
+ * word.
+ *
+ * Each pointer word specifies whether it is stored in word size or 4KB
+ * sector size by using the highest bit. The reported pointer value will be in
+ * bytes, intended for flat NVM reads.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_read_sr_base_address(struct ixgbe_hw *hw, u16 offset,
+ u32 *pointer)
+{
+ u16 value;
+ int err;
+
+ err = ixgbe_read_ee_aci_e610(hw, offset, &value);
+ if (err)
+ return err;
+
+ /* Determine if the pointer is in 4KB or word units */
+ if (value & IXGBE_SR_NVM_PTR_4KB_UNITS)
+ *pointer = (value & ~IXGBE_SR_NVM_PTR_4KB_UNITS) * SZ_4K;
+ else
+ *pointer = value * sizeof(u16);
+
+ return 0;
+}
+
+/**
+ * ixgbe_read_sr_area_size - Read an area size from a Shadow RAM word
+ * @hw: pointer to the HW structure
+ * @offset: the word offset of the Shadow RAM to read
+ * @size: size value read from the Shadow RAM
+ *
+ * Read the given Shadow RAM word, and convert it to an area size value
+ * specified in bytes. This function assumes the specified offset is a valid
+ * area size word.
+ *
+ * Each area size word is specified in 4KB sector units. This function reports
+ * the size in bytes, intended for flat NVM reads.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_read_sr_area_size(struct ixgbe_hw *hw, u16 offset, u32 *size)
+{
+ u16 value;
+ int err;
+
+ err = ixgbe_read_ee_aci_e610(hw, offset, &value);
+ if (err)
+ return err;
+
+ /* Area sizes are always specified in 4KB units */
+ *size = value * SZ_4K;
+
+ return 0;
+}
+
+/**
+ * ixgbe_determine_active_flash_banks - Discover active bank for each module
+ * @hw: pointer to the HW struct
+ *
+ * Read the Shadow RAM control word and determine which banks are active for
+ * the NVM, OROM, and Netlist modules. Also read and calculate the associated
+ * pointer and size. These values are then cached into the ixgbe_flash_info
+ * structure for later use in order to calculate the correct offset to read
+ * from the active module.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_determine_active_flash_banks(struct ixgbe_hw *hw)
+{
+ struct ixgbe_bank_info *banks = &hw->flash.banks;
+ u16 ctrl_word;
+ int err;
+
+ err = ixgbe_read_ee_aci_e610(hw, IXGBE_E610_SR_NVM_CTRL_WORD,
+ &ctrl_word);
+ if (err)
+ return err;
+
+ if (FIELD_GET(IXGBE_SR_CTRL_WORD_1_M, ctrl_word) !=
+ IXGBE_SR_CTRL_WORD_VALID)
+ return -ENODATA;
+
+ if (!(ctrl_word & IXGBE_SR_CTRL_WORD_NVM_BANK))
+ banks->nvm_bank = IXGBE_1ST_FLASH_BANK;
+ else
+ banks->nvm_bank = IXGBE_2ND_FLASH_BANK;
+
+ if (!(ctrl_word & IXGBE_SR_CTRL_WORD_OROM_BANK))
+ banks->orom_bank = IXGBE_1ST_FLASH_BANK;
+ else
+ banks->orom_bank = IXGBE_2ND_FLASH_BANK;
+
+ if (!(ctrl_word & IXGBE_SR_CTRL_WORD_NETLIST_BANK))
+ banks->netlist_bank = IXGBE_1ST_FLASH_BANK;
+ else
+ banks->netlist_bank = IXGBE_2ND_FLASH_BANK;
+
+ err = ixgbe_read_sr_base_address(hw, IXGBE_E610_SR_1ST_NVM_BANK_PTR,
+ &banks->nvm_ptr);
+ if (err)
+ return err;
+
+ err = ixgbe_read_sr_area_size(hw, IXGBE_E610_SR_NVM_BANK_SIZE,
+ &banks->nvm_size);
+ if (err)
+ return err;
+
+ err = ixgbe_read_sr_base_address(hw, IXGBE_E610_SR_1ST_OROM_BANK_PTR,
+ &banks->orom_ptr);
+ if (err)
+ return err;
+
+ err = ixgbe_read_sr_area_size(hw, IXGBE_E610_SR_OROM_BANK_SIZE,
+ &banks->orom_size);
+ if (err)
+ return err;
+
+ err = ixgbe_read_sr_base_address(hw, IXGBE_E610_SR_NETLIST_BANK_PTR,
+ &banks->netlist_ptr);
+ if (err)
+ return err;
+
+ err = ixgbe_read_sr_area_size(hw, IXGBE_E610_SR_NETLIST_BANK_SIZE,
+ &banks->netlist_size);
+
+ return err;
+}
+
+/**
+ * ixgbe_get_flash_bank_offset - Get offset into requested flash bank
+ * @hw: pointer to the HW structure
+ * @bank: whether to read from the active or inactive flash bank
+ * @module: the module to read from
+ *
+ * Based on the module, lookup the module offset from the beginning of the
+ * flash.
+ *
+ * Return: the flash offset. Note that a value of zero is invalid and must be
+ * treated as an error.
+ */
+static int ixgbe_get_flash_bank_offset(struct ixgbe_hw *hw,
+ enum ixgbe_bank_select bank,
+ u16 module)
+{
+ struct ixgbe_bank_info *banks = &hw->flash.banks;
+ enum ixgbe_flash_bank active_bank;
+ bool second_bank_active;
+ u32 offset, size;
+
+ switch (module) {
+ case IXGBE_E610_SR_1ST_NVM_BANK_PTR:
+ offset = banks->nvm_ptr;
+ size = banks->nvm_size;
+ active_bank = banks->nvm_bank;
+ break;
+ case IXGBE_E610_SR_1ST_OROM_BANK_PTR:
+ offset = banks->orom_ptr;
+ size = banks->orom_size;
+ active_bank = banks->orom_bank;
+ break;
+ case IXGBE_E610_SR_NETLIST_BANK_PTR:
+ offset = banks->netlist_ptr;
+ size = banks->netlist_size;
+ active_bank = banks->netlist_bank;
+ break;
+ default:
+ return 0;
+ }
+
+ switch (active_bank) {
+ case IXGBE_1ST_FLASH_BANK:
+ second_bank_active = false;
+ break;
+ case IXGBE_2ND_FLASH_BANK:
+ second_bank_active = true;
+ break;
+ default:
+ return 0;
+ }
+
+ /* The second flash bank is stored immediately following the first
+ * bank. Based on whether the 1st or 2nd bank is active, and whether
+ * we want the active or inactive bank, calculate the desired offset.
+ */
+ switch (bank) {
+ case IXGBE_ACTIVE_FLASH_BANK:
+ return offset + (second_bank_active ? size : 0);
+ case IXGBE_INACTIVE_FLASH_BANK:
+ return offset + (second_bank_active ? 0 : size);
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_read_flash_module - Read a word from one of the main NVM modules
+ * @hw: pointer to the HW structure
+ * @bank: which bank of the module to read
+ * @module: the module to read
+ * @offset: the offset into the module in bytes
+ * @data: storage for the word read from the flash
+ * @length: bytes of data to read
+ *
+ * Read data from the specified flash module. The bank parameter indicates
+ * whether or not to read from the active bank or the inactive bank of that
+ * module.
+ *
+ * The word will be read using flat NVM access, and relies on the
+ * hw->flash.banks data being setup by ixgbe_determine_active_flash_banks()
+ * during initialization.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_read_flash_module(struct ixgbe_hw *hw,
+ enum ixgbe_bank_select bank,
+ u16 module, u32 offset, u8 *data, u32 length)
+{
+ u32 start;
+ int err;
+
+ start = ixgbe_get_flash_bank_offset(hw, bank, module);
+ if (!start)
+ return -EINVAL;
+
+ err = ixgbe_acquire_nvm(hw, IXGBE_RES_READ);
+ if (err)
+ return err;
+
+ err = ixgbe_read_flat_nvm(hw, start + offset, &length, data, false);
+
+ ixgbe_release_nvm(hw);
+
+ return err;
+}
+
+/**
+ * ixgbe_read_nvm_module - Read from the active main NVM module
+ * @hw: pointer to the HW structure
+ * @bank: whether to read from active or inactive NVM module
+ * @offset: offset into the NVM module to read, in words
+ * @data: storage for returned word value
+ *
+ * Read the specified word from the active NVM module. This includes the CSS
+ * header at the start of the NVM module.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_read_nvm_module(struct ixgbe_hw *hw,
+ enum ixgbe_bank_select bank,
+ u32 offset, u16 *data)
+{
+ __le16 data_local;
+ int err;
+
+ err = ixgbe_read_flash_module(hw, bank, IXGBE_E610_SR_1ST_NVM_BANK_PTR,
+ offset * sizeof(data_local),
+ (u8 *)&data_local,
+ sizeof(data_local));
+ if (!err)
+ *data = le16_to_cpu(data_local);
+
+ return err;
+}
+
+/**
+ * ixgbe_read_netlist_module - Read data from the netlist module area
+ * @hw: pointer to the HW structure
+ * @bank: whether to read from the active or inactive module
+ * @offset: offset into the netlist to read from
+ * @data: storage for returned word value
+ *
+ * Read a word from the specified netlist bank.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_read_netlist_module(struct ixgbe_hw *hw,
+ enum ixgbe_bank_select bank,
+ u32 offset, u16 *data)
+{
+ __le16 data_local;
+ int err;
+
+ err = ixgbe_read_flash_module(hw, bank, IXGBE_E610_SR_NETLIST_BANK_PTR,
+ offset * sizeof(data_local),
+ (u8 *)&data_local, sizeof(data_local));
+ if (!err)
+ *data = le16_to_cpu(data_local);
+
+ return err;
+}
+
+/**
+ * ixgbe_read_orom_module - Read from the active Option ROM module
+ * @hw: pointer to the HW structure
+ * @bank: whether to read from active or inactive OROM module
+ * @offset: offset into the OROM module to read, in words
+ * @data: storage for returned word value
+ *
+ * Read the specified word from the active Option ROM module of the flash.
+ * Note that unlike the NVM module, the CSS data is stored at the end of the
+ * module instead of at the beginning.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_read_orom_module(struct ixgbe_hw *hw,
+ enum ixgbe_bank_select bank,
+ u32 offset, u16 *data)
+{
+ __le16 data_local;
+ int err;
+
+ err = ixgbe_read_flash_module(hw, bank, IXGBE_E610_SR_1ST_OROM_BANK_PTR,
+ offset * sizeof(data_local),
+ (u8 *)&data_local, sizeof(data_local));
+ if (!err)
+ *data = le16_to_cpu(data_local);
+
+ return err;
+}
+
+/**
+ * ixgbe_get_nvm_css_hdr_len - Read the CSS header length
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash bank
+ * @hdr_len: storage for header length in words
+ *
+ * Read the CSS header length from the NVM CSS header and add the
+ * Authentication header size, and then convert to words.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_get_nvm_css_hdr_len(struct ixgbe_hw *hw,
+ enum ixgbe_bank_select bank,
+ u32 *hdr_len)
+{
+ u16 hdr_len_l, hdr_len_h;
+ u32 hdr_len_dword;
+ int err;
+
+ err = ixgbe_read_nvm_module(hw, bank, IXGBE_NVM_CSS_HDR_LEN_L,
+ &hdr_len_l);
+ if (err)
+ return err;
+
+ err = ixgbe_read_nvm_module(hw, bank, IXGBE_NVM_CSS_HDR_LEN_H,
+ &hdr_len_h);
+ if (err)
+ return err;
+
+ /* CSS header length is in DWORD, so convert to words and add
+ * authentication header size.
+ */
+ hdr_len_dword = (hdr_len_h << 16) | hdr_len_l;
+ *hdr_len = hdr_len_dword * 2 + IXGBE_NVM_AUTH_HEADER_LEN;
+
+ return 0;
+}
+
+/**
+ * ixgbe_read_nvm_sr_copy - Read a word from the Shadow RAM copy
+ * @hw: pointer to the HW structure
+ * @bank: whether to read from the active or inactive NVM module
+ * @offset: offset into the Shadow RAM copy to read, in words
+ * @data: storage for returned word value
+ *
+ * Read the specified word from the copy of the Shadow RAM found in the
+ * specified NVM module.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_read_nvm_sr_copy(struct ixgbe_hw *hw,
+ enum ixgbe_bank_select bank,
+ u32 offset, u16 *data)
+{
+ u32 hdr_len;
+ int err;
+
+ err = ixgbe_get_nvm_css_hdr_len(hw, bank, &hdr_len);
+ if (err)
+ return err;
+
+ hdr_len = round_up(hdr_len, IXGBE_HDR_LEN_ROUNDUP);
+
+ return ixgbe_read_nvm_module(hw, bank, hdr_len + offset, data);
+}
+
+/**
+ * ixgbe_get_nvm_srev - Read the security revision from the NVM CSS header
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash bank
+ * @srev: storage for security revision
+ *
+ * Read the security revision out of the CSS header of the active NVM module
+ * bank.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_get_nvm_srev(struct ixgbe_hw *hw,
+ enum ixgbe_bank_select bank, u32 *srev)
+{
+ u16 srev_l, srev_h;
+ int err;
+
+ err = ixgbe_read_nvm_module(hw, bank, IXGBE_NVM_CSS_SREV_L, &srev_l);
+ if (err)
+ return err;
+
+ err = ixgbe_read_nvm_module(hw, bank, IXGBE_NVM_CSS_SREV_H, &srev_h);
+ if (err)
+ return err;
+
+ *srev = (srev_h << 16) | srev_l;
+
+ return 0;
+}
+
+/**
+ * ixgbe_get_orom_civd_data - Get the combo version information from Option ROM
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash module
+ * @civd: storage for the Option ROM CIVD data.
+ *
+ * Searches through the Option ROM flash contents to locate the CIVD data for
+ * the image.
+ *
+ * Return: the exit code of the operation.
+ */
+static int
+ixgbe_get_orom_civd_data(struct ixgbe_hw *hw, enum ixgbe_bank_select bank,
+ struct ixgbe_orom_civd_info *civd)
+{
+ struct ixgbe_orom_civd_info tmp;
+ u32 offset;
+ int err;
+
+ /* The CIVD section is located in the Option ROM aligned to 512 bytes.
+ * The first 4 bytes must contain the ASCII characters "$CIV".
+ * A simple modulo 256 sum of all of the bytes of the structure must
+ * equal 0.
+ */
+ for (offset = 0; (offset + SZ_512) <= hw->flash.banks.orom_size;
+ offset += SZ_512) {
+ u8 sum = 0;
+ u32 i;
+
+ err = ixgbe_read_flash_module(hw, bank,
+ IXGBE_E610_SR_1ST_OROM_BANK_PTR,
+ offset,
+ (u8 *)&tmp, sizeof(tmp));
+ if (err)
+ return err;
+
+ /* Skip forward until we find a matching signature */
+ if (memcmp(IXGBE_OROM_CIV_SIGNATURE, tmp.signature,
+ sizeof(tmp.signature)))
+ continue;
+
+ /* Verify that the simple checksum is zero */
+ for (i = 0; i < sizeof(tmp); i++)
+ sum += ((u8 *)&tmp)[i];
+
+ if (sum)
+ return -EDOM;
+
+ *civd = tmp;
+ return 0;
+ }
+
+ return -ENODATA;
+}
+
+/**
+ * ixgbe_get_orom_srev - Read the security revision from the OROM CSS header
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from active or inactive flash module
+ * @srev: storage for security revision
+ *
+ * Read the security revision out of the CSS header of the active OROM module
+ * bank.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_get_orom_srev(struct ixgbe_hw *hw,
+ enum ixgbe_bank_select bank,
+ u32 *srev)
+{
+ u32 orom_size_word = hw->flash.banks.orom_size / 2;
+ u32 css_start, hdr_len;
+ u16 srev_l, srev_h;
+ int err;
+
+ err = ixgbe_get_nvm_css_hdr_len(hw, bank, &hdr_len);
+ if (err)
+ return err;
+
+ if (orom_size_word < hdr_len)
+ return -EINVAL;
+
+ /* Calculate how far into the Option ROM the CSS header starts. Note
+ * that ixgbe_read_orom_module takes a word offset.
+ */
+ css_start = orom_size_word - hdr_len;
+ err = ixgbe_read_orom_module(hw, bank,
+ css_start + IXGBE_NVM_CSS_SREV_L,
+ &srev_l);
+ if (err)
+ return err;
+
+ err = ixgbe_read_orom_module(hw, bank,
+ css_start + IXGBE_NVM_CSS_SREV_H,
+ &srev_h);
+ if (err)
+ return err;
+
+ *srev = srev_h << 16 | srev_l;
+
+ return 0;
+}
+
+/**
+ * ixgbe_get_orom_ver_info - Read Option ROM version information
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash module
+ * @orom: pointer to Option ROM info structure
+ *
+ * Read Option ROM version and security revision from the Option ROM flash
+ * section.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_get_orom_ver_info(struct ixgbe_hw *hw,
+ enum ixgbe_bank_select bank,
+ struct ixgbe_orom_info *orom)
+{
+ struct ixgbe_orom_civd_info civd;
+ u32 combo_ver;
+ int err;
+
+ err = ixgbe_get_orom_civd_data(hw, bank, &civd);
+ if (err)
+ return err;
+
+ combo_ver = le32_to_cpu(civd.combo_ver);
+
+ orom->major = (u8)FIELD_GET(IXGBE_OROM_VER_MASK, combo_ver);
+ orom->patch = (u8)FIELD_GET(IXGBE_OROM_VER_PATCH_MASK, combo_ver);
+ orom->build = (u16)FIELD_GET(IXGBE_OROM_VER_BUILD_MASK, combo_ver);
+
+ return ixgbe_get_orom_srev(hw, bank, &orom->srev);
+}
+
+/**
+ * ixgbe_get_inactive_orom_ver - Read Option ROM version from the inactive bank
+ * @hw: pointer to the HW structure
+ * @orom: storage for Option ROM version information
+ *
+ * Read the Option ROM version and security revision data for the inactive
+ * section of flash. Used to access version data for a pending update that has
+ * not yet been activated.
+ *
+ * Return: the exit code of the operation.
+ */
+int ixgbe_get_inactive_orom_ver(struct ixgbe_hw *hw,
+ struct ixgbe_orom_info *orom)
+{
+ return ixgbe_get_orom_ver_info(hw, IXGBE_INACTIVE_FLASH_BANK, orom);
+}
+
+/**
+ * ixgbe_get_nvm_ver_info - Read NVM version information
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash bank
+ * @nvm: pointer to NVM info structure
+ *
+ * Read the NVM EETRACK ID and map version of the main NVM image bank, filling
+ * in the nvm info structure.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_get_nvm_ver_info(struct ixgbe_hw *hw,
+ enum ixgbe_bank_select bank,
+ struct ixgbe_nvm_info *nvm)
+{
+ u16 eetrack_lo, eetrack_hi, ver;
+ int err;
+
+ err = ixgbe_read_nvm_sr_copy(hw, bank,
+ IXGBE_E610_SR_NVM_DEV_STARTER_VER, &ver);
+ if (err)
+ return err;
+
+ nvm->major = FIELD_GET(IXGBE_E610_NVM_VER_HI_MASK, ver);
+ nvm->minor = FIELD_GET(IXGBE_E610_NVM_VER_LO_MASK, ver);
+
+ err = ixgbe_read_nvm_sr_copy(hw, bank, IXGBE_E610_SR_NVM_EETRACK_LO,
+ &eetrack_lo);
+ if (err)
+ return err;
+
+ err = ixgbe_read_nvm_sr_copy(hw, bank, IXGBE_E610_SR_NVM_EETRACK_HI,
+ &eetrack_hi);
+ if (err)
+ return err;
+
+ nvm->eetrack = (eetrack_hi << 16) | eetrack_lo;
+
+ ixgbe_get_nvm_srev(hw, bank, &nvm->srev);
+
+ return 0;
+}
+
+/**
+ * ixgbe_get_inactive_nvm_ver - Read Option ROM version from the inactive bank
+ * @hw: pointer to the HW structure
+ * @nvm: storage for Option ROM version information
+ *
+ * Read the NVM EETRACK ID, Map version, and security revision of the
+ * inactive NVM bank. Used to access version data for a pending update that
+ * has not yet been activated.
+ *
+ * Return: the exit code of the operation.
+ */
+int ixgbe_get_inactive_nvm_ver(struct ixgbe_hw *hw, struct ixgbe_nvm_info *nvm)
+{
+ return ixgbe_get_nvm_ver_info(hw, IXGBE_INACTIVE_FLASH_BANK, nvm);
+}
+
+/**
+ * ixgbe_get_active_nvm_ver - Read Option ROM version from the active bank
+ * @hw: pointer to the HW structure
+ * @nvm: storage for Option ROM version information
+ *
+ * Reads the NVM EETRACK ID, Map version, and security revision of the
+ * active NVM bank.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_get_active_nvm_ver(struct ixgbe_hw *hw,
+ struct ixgbe_nvm_info *nvm)
+{
+ return ixgbe_get_nvm_ver_info(hw, IXGBE_ACTIVE_FLASH_BANK, nvm);
+}
+
+/**
+ * ixgbe_get_netlist_info - Read the netlist version information
+ * @hw: pointer to the HW struct
+ * @bank: whether to read from the active or inactive flash bank
+ * @netlist: pointer to netlist version info structure
+ *
+ * Get the netlist version information from the requested bank. Reads the Link
+ * Topology section to find the Netlist ID block and extract the relevant
+ * information into the netlist version structure.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_get_netlist_info(struct ixgbe_hw *hw,
+ enum ixgbe_bank_select bank,
+ struct ixgbe_netlist_info *netlist)
+{
+ u16 module_id, length, node_count, i;
+ u16 *id_blk;
+ int err;
+
+ err = ixgbe_read_netlist_module(hw, bank, IXGBE_NETLIST_TYPE_OFFSET,
+ &module_id);
+ if (err)
+ return err;
+
+ if (module_id != IXGBE_NETLIST_LINK_TOPO_MOD_ID)
+ return -EIO;
+
+ err = ixgbe_read_netlist_module(hw, bank, IXGBE_LINK_TOPO_MODULE_LEN,
+ &length);
+ if (err)
+ return err;
+
+ /* Sanity check that we have at least enough words to store the
+ * netlist ID block.
+ */
+ if (length < IXGBE_NETLIST_ID_BLK_SIZE)
+ return -EIO;
+
+ err = ixgbe_read_netlist_module(hw, bank, IXGBE_LINK_TOPO_NODE_COUNT,
+ &node_count);
+ if (err)
+ return err;
+
+ node_count &= IXGBE_LINK_TOPO_NODE_COUNT_M;
+
+ id_blk = kcalloc(IXGBE_NETLIST_ID_BLK_SIZE, sizeof(*id_blk), GFP_KERNEL);
+ if (!id_blk)
+ return -ENOMEM;
+
+ /* Read out the entire Netlist ID Block at once. */
+ err = ixgbe_read_flash_module(hw, bank, IXGBE_E610_SR_NETLIST_BANK_PTR,
+ IXGBE_NETLIST_ID_BLK_OFFSET(node_count) *
+ sizeof(*id_blk), (u8 *)id_blk,
+ IXGBE_NETLIST_ID_BLK_SIZE *
+ sizeof(*id_blk));
+ if (err)
+ goto free_id_blk;
+
+ for (i = 0; i < IXGBE_NETLIST_ID_BLK_SIZE; i++)
+ id_blk[i] = le16_to_cpu(((__le16 *)id_blk)[i]);
+
+ netlist->major = id_blk[IXGBE_NETLIST_ID_BLK_MAJOR_VER_HIGH] << 16 |
+ id_blk[IXGBE_NETLIST_ID_BLK_MAJOR_VER_LOW];
+ netlist->minor = id_blk[IXGBE_NETLIST_ID_BLK_MINOR_VER_HIGH] << 16 |
+ id_blk[IXGBE_NETLIST_ID_BLK_MINOR_VER_LOW];
+ netlist->type = id_blk[IXGBE_NETLIST_ID_BLK_TYPE_HIGH] << 16 |
+ id_blk[IXGBE_NETLIST_ID_BLK_TYPE_LOW];
+ netlist->rev = id_blk[IXGBE_NETLIST_ID_BLK_REV_HIGH] << 16 |
+ id_blk[IXGBE_NETLIST_ID_BLK_REV_LOW];
+ netlist->cust_ver = id_blk[IXGBE_NETLIST_ID_BLK_CUST_VER];
+ /* Read the left most 4 bytes of SHA */
+ netlist->hash = id_blk[IXGBE_NETLIST_ID_BLK_SHA_HASH_WORD(15)] << 16 |
+ id_blk[IXGBE_NETLIST_ID_BLK_SHA_HASH_WORD(14)];
+
+free_id_blk:
+ kfree(id_blk);
+ return err;
+}
+
+/**
+ * ixgbe_get_inactive_netlist_ver - Read netlist version from the inactive bank
+ * @hw: pointer to the HW struct
+ * @netlist: pointer to netlist version info structure
+ *
+ * Read the netlist version data from the inactive netlist bank. Used to
+ * extract version data of a pending flash update in order to display the
+ * version data.
+ *
+ * Return: the exit code of the operation.
+ */
+int ixgbe_get_inactive_netlist_ver(struct ixgbe_hw *hw,
+ struct ixgbe_netlist_info *netlist)
+{
+ return ixgbe_get_netlist_info(hw, IXGBE_INACTIVE_FLASH_BANK, netlist);
+}
+
+/**
+ * ixgbe_get_flash_data - get flash data
+ * @hw: pointer to the HW struct
+ *
+ * Read and populate flash data such as Shadow RAM size,
+ * max_timeout and blank_nvm_mode
+ *
+ * Return: the exit code of the operation.
+ */
+int ixgbe_get_flash_data(struct ixgbe_hw *hw)
+{
+ struct ixgbe_flash_info *flash = &hw->flash;
+ u32 fla, gens_stat;
+ u8 sr_size;
+ int err;
+
+ /* The SR size is stored regardless of the NVM programming mode
+ * as the blank mode may be used in the factory line.
+ */
+ gens_stat = IXGBE_READ_REG(hw, GLNVM_GENS);
+ sr_size = FIELD_GET(GLNVM_GENS_SR_SIZE_M, gens_stat);
+
+ /* Switching to words (sr_size contains power of 2) */
+ flash->sr_words = BIT(sr_size) * (SZ_1K / sizeof(u16));
+
+ /* Check if we are in the normal or blank NVM programming mode */
+ fla = IXGBE_READ_REG(hw, IXGBE_GLNVM_FLA);
+ if (fla & IXGBE_GLNVM_FLA_LOCKED_M) {
+ flash->blank_nvm_mode = false;
+ } else {
+ flash->blank_nvm_mode = true;
+ return -EIO;
+ }
+
+ err = ixgbe_discover_flash_size(hw);
+ if (err)
+ return err;
+
+ err = ixgbe_determine_active_flash_banks(hw);
+ if (err)
+ return err;
+
+ err = ixgbe_get_nvm_ver_info(hw, IXGBE_ACTIVE_FLASH_BANK,
+ &flash->nvm);
+ if (err)
+ return err;
+
+ err = ixgbe_get_orom_ver_info(hw, IXGBE_ACTIVE_FLASH_BANK,
+ &flash->orom);
+ if (err)
+ return err;
+
+ err = ixgbe_get_netlist_info(hw, IXGBE_ACTIVE_FLASH_BANK,
+ &flash->netlist);
+ return err;
+}
+
+/**
+ * ixgbe_aci_nvm_update_empr - update NVM using EMPR
+ * @hw: pointer to the HW struct
+ *
+ * Force EMP reset using ACI command (0x0709). This command allows SW to
+ * request an EMPR to activate new FW.
+ *
+ * Return: the exit code of the operation.
+ */
+int ixgbe_aci_nvm_update_empr(struct ixgbe_hw *hw)
+{
+ struct ixgbe_aci_desc desc;
+
+ ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_update_empr);
+
+ return ixgbe_aci_send_cmd(hw, &desc, NULL, 0);
+}
+
+/* ixgbe_nvm_set_pkg_data - NVM set package data
+ * @hw: pointer to the HW struct
+ * @del_pkg_data_flag: If is set then the current pkg_data store by FW
+ * is deleted.
+ * If bit is set to 1, then buffer should be size 0.
+ * @data: pointer to buffer
+ * @length: length of the buffer
+ *
+ * Set package data using ACI command (0x070A).
+ * This command is equivalent to the reception of
+ * a PLDM FW Update GetPackageData cmd. This command should be sent
+ * as part of the NVM update as the first cmd in the flow.
+ *
+ * Return: the exit code of the operation.
+ */
+int ixgbe_nvm_set_pkg_data(struct ixgbe_hw *hw, bool del_pkg_data_flag,
+ u8 *data, u16 length)
+{
+ struct ixgbe_aci_cmd_nvm_pkg_data *cmd;
+ struct ixgbe_aci_desc desc;
+
+ if (length != 0 && !data)
+ return -EINVAL;
+
+ cmd = &desc.params.pkg_data;
+
+ ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_pkg_data);
+ desc.flags |= cpu_to_le16(IXGBE_ACI_FLAG_RD);
+
+ if (del_pkg_data_flag)
+ cmd->cmd_flags |= IXGBE_ACI_NVM_PKG_DELETE;
+
+ return ixgbe_aci_send_cmd(hw, &desc, data, length);
+}
+
+/* ixgbe_nvm_pass_component_tbl - NVM pass component table
+ * @hw: pointer to the HW struct
+ * @data: pointer to buffer
+ * @length: length of the buffer
+ * @transfer_flag: parameter for determining stage of the update
+ * @comp_response: a pointer to the response from the 0x070B ACI.
+ * @comp_response_code: a pointer to the response code from the 0x070B ACI.
+ *
+ * Pass component table using ACI command (0x070B). This command is equivalent
+ * to the reception of a PLDM FW Update PassComponentTable cmd.
+ * This command should be sent once per component. It can be only sent after
+ * Set Package Data cmd and before actual update. FW will assume these
+ * commands are going to be sent until the TransferFlag is set to End or
+ * StartAndEnd.
+ *
+ * Return: the exit code of the operation.
+ */
+int ixgbe_nvm_pass_component_tbl(struct ixgbe_hw *hw, u8 *data, u16 length,
+ u8 transfer_flag, u8 *comp_response,
+ u8 *comp_response_code)
+{
+ struct ixgbe_aci_cmd_nvm_pass_comp_tbl *cmd;
+ struct ixgbe_aci_desc desc;
+ int err;
+
+ if (!data || !comp_response || !comp_response_code)
+ return -EINVAL;
+
+ cmd = &desc.params.pass_comp_tbl;
+
+ ixgbe_fill_dflt_direct_cmd_desc(&desc,
+ ixgbe_aci_opc_nvm_pass_component_tbl);
+ desc.flags |= cpu_to_le16(IXGBE_ACI_FLAG_RD);
+
+ cmd->transfer_flag = transfer_flag;
+ err = ixgbe_aci_send_cmd(hw, &desc, data, length);
+ if (!err) {
+ *comp_response = cmd->component_response;
+ *comp_response_code = cmd->component_response_code;
+ }
+
+ return err;
+}
+
+/**
* ixgbe_read_sr_word_aci - Reads Shadow RAM via ACI
* @hw: pointer to the HW structure
* @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
@@ -2485,7 +3665,7 @@ int ixgbe_validate_eeprom_checksum_e610(struct ixgbe_hw *hw, u16 *checksum_val)
if (err)
return err;
- err = ixgbe_read_sr_word_aci(hw, E610_SR_SW_CHECKSUM_WORD,
+ err = ixgbe_read_sr_word_aci(hw, IXGBE_E610_SR_SW_CHECKSUM_WORD,
&tmp_checksum);
ixgbe_release_nvm(hw);
@@ -2580,9 +3760,129 @@ reset_hw_out:
return err;
}
+/**
+ * ixgbe_get_pfa_module_tlv - Read sub module TLV from NVM PFA
+ * @hw: pointer to hardware structure
+ * @module_tlv: pointer to module TLV to return
+ * @module_tlv_len: pointer to module TLV length to return
+ * @module_type: module type requested
+ *
+ * Find the requested sub module TLV type from the Preserved Field
+ * Area (PFA) and returns the TLV pointer and length. The caller can
+ * use these to read the variable length TLV value.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_get_pfa_module_tlv(struct ixgbe_hw *hw, u16 *module_tlv,
+ u16 *module_tlv_len, u16 module_type)
+{
+ u16 pfa_len, pfa_ptr, pfa_end_ptr;
+ u16 next_tlv;
+ int err;
+
+ err = ixgbe_read_ee_aci_e610(hw, IXGBE_E610_SR_PFA_PTR, &pfa_ptr);
+ if (err)
+ return err;
+
+ err = ixgbe_read_ee_aci_e610(hw, pfa_ptr, &pfa_len);
+ if (err)
+ return err;
+
+ /* Starting with first TLV after PFA length, iterate through the list
+ * of TLVs to find the requested one.
+ */
+ next_tlv = pfa_ptr + 1;
+ pfa_end_ptr = pfa_ptr + pfa_len;
+ while (next_tlv < pfa_end_ptr) {
+ u16 tlv_sub_module_type, tlv_len;
+
+ /* Read TLV type */
+ err = ixgbe_read_ee_aci_e610(hw, next_tlv,
+ &tlv_sub_module_type);
+ if (err)
+ break;
+
+ /* Read TLV length */
+ err = ixgbe_read_ee_aci_e610(hw, next_tlv + 1, &tlv_len);
+ if (err)
+ break;
+
+ if (tlv_sub_module_type == module_type) {
+ if (tlv_len) {
+ *module_tlv = next_tlv;
+ *module_tlv_len = tlv_len;
+ return 0;
+ }
+ return -EIO;
+ }
+ /* Check next TLV, i.e. current TLV pointer + length + 2 words
+ * (for current TLV's type and length).
+ */
+ next_tlv = next_tlv + tlv_len + 2;
+ }
+ /* Module does not exist */
+ return -ENODATA;
+}
+
+/**
+ * ixgbe_read_pba_string_e610 - Read PBA string from NVM
+ * @hw: pointer to hardware structure
+ * @pba_num: stores the part number string from the NVM
+ * @pba_num_size: part number string buffer length
+ *
+ * Read the part number string from the NVM.
+ *
+ * Return: the exit code of the operation.
+ */
+static int ixgbe_read_pba_string_e610(struct ixgbe_hw *hw, u8 *pba_num,
+ u32 pba_num_size)
+{
+ u16 pba_tlv, pba_tlv_len;
+ u16 pba_word, pba_size;
+ int err;
+
+ *pba_num = '\0';
+
+ err = ixgbe_get_pfa_module_tlv(hw, &pba_tlv, &pba_tlv_len,
+ IXGBE_E610_SR_PBA_BLOCK_PTR);
+ if (err)
+ return err;
+
+ /* pba_size is the next word */
+ err = ixgbe_read_ee_aci_e610(hw, (pba_tlv + 2), &pba_size);
+ if (err)
+ return err;
+
+ if (pba_tlv_len < pba_size)
+ return -EINVAL;
+
+ /* Subtract one to get PBA word count (PBA Size word is included in
+ * total size).
+ */
+ pba_size--;
+
+ if (pba_num_size < (((u32)pba_size * 2) + 1))
+ return -EINVAL;
+
+ for (u16 i = 0; i < pba_size; i++) {
+ err = ixgbe_read_ee_aci_e610(hw, (pba_tlv + 2 + 1) + i,
+ &pba_word);
+ if (err)
+ return err;
+
+ pba_num[(i * 2)] = FIELD_GET(IXGBE_E610_SR_PBA_BLOCK_MASK,
+ pba_word);
+ pba_num[(i * 2) + 1] = pba_word & 0xFF;
+ }
+
+ pba_num[(pba_size * 2)] = '\0';
+
+ return err;
+}
+
static const struct ixgbe_mac_operations mac_ops_e610 = {
.init_hw = ixgbe_init_hw_generic,
- .start_hw = ixgbe_start_hw_X540,
+ .start_hw = ixgbe_start_hw_e610,
.clear_hw_cntrs = ixgbe_clear_hw_cntrs_generic,
.enable_rx_dma = ixgbe_enable_rx_dma_generic,
.get_mac_addr = ixgbe_get_mac_addr_generic,
@@ -2621,8 +3921,12 @@ static const struct ixgbe_mac_operations mac_ops_e610 = {
.led_off = ixgbe_led_off_generic,
.init_led_link_act = ixgbe_init_led_link_act_generic,
.reset_hw = ixgbe_reset_hw_e610,
+ .get_fw_ver = ixgbe_aci_get_fw_ver,
.get_media_type = ixgbe_get_media_type_e610,
.setup_link = ixgbe_setup_link_e610,
+ .fw_recovery_mode = ixgbe_fw_recovery_mode_e610,
+ .fw_rollback_mode = ixgbe_fw_rollback_mode_e610,
+ .get_nvm_ver = ixgbe_get_active_nvm_ver,
.get_link_capabilities = ixgbe_get_link_capabilities_e610,
.get_bus_info = ixgbe_get_bus_info_generic,
.acquire_swfw_sync = ixgbe_acquire_swfw_sync_X540,
@@ -2647,6 +3951,8 @@ static const struct ixgbe_eeprom_operations eeprom_ops_e610 = {
.read = ixgbe_read_ee_aci_e610,
.read_buffer = ixgbe_read_ee_aci_buffer_e610,
.validate_checksum = ixgbe_validate_eeprom_checksum_e610,
+ .read_pba_string = ixgbe_read_pba_string_e610,
+ .init_params = ixgbe_init_eeprom_params_e610,
};
const struct ixgbe_info ixgbe_e610_info = {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h
index ba8c06b73810..30bc1f1b2549 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h
@@ -67,6 +67,11 @@ int ixgbe_aci_read_nvm(struct ixgbe_hw *hw, u16 module_typeid, u32 offset,
u16 length, void *data, bool last_command,
bool read_shadow_ram);
int ixgbe_nvm_validate_checksum(struct ixgbe_hw *hw);
+int ixgbe_get_inactive_orom_ver(struct ixgbe_hw *hw,
+ struct ixgbe_orom_info *orom);
+int ixgbe_get_inactive_nvm_ver(struct ixgbe_hw *hw, struct ixgbe_nvm_info *nvm);
+int ixgbe_get_inactive_netlist_ver(struct ixgbe_hw *hw,
+ struct ixgbe_netlist_info *netlist);
int ixgbe_read_sr_word_aci(struct ixgbe_hw *hw, u16 offset, u16 *data);
int ixgbe_read_flat_nvm(struct ixgbe_hw *hw, u32 offset, u32 *length,
u8 *data, bool read_shadow_ram);
@@ -77,5 +82,18 @@ int ixgbe_read_ee_aci_buffer_e610(struct ixgbe_hw *hw, u16 offset,
u16 words, u16 *data);
int ixgbe_validate_eeprom_checksum_e610(struct ixgbe_hw *hw, u16 *checksum_val);
int ixgbe_reset_hw_e610(struct ixgbe_hw *hw);
+int ixgbe_get_flash_data(struct ixgbe_hw *hw);
+int ixgbe_aci_nvm_update_empr(struct ixgbe_hw *hw);
+int ixgbe_nvm_set_pkg_data(struct ixgbe_hw *hw, bool del_pkg_data_flag,
+ u8 *data, u16 length);
+int ixgbe_nvm_pass_component_tbl(struct ixgbe_hw *hw, u8 *data, u16 length,
+ u8 transfer_flag, u8 *comp_response,
+ u8 *comp_response_code);
+int ixgbe_aci_erase_nvm(struct ixgbe_hw *hw, u16 module_typeid);
+int ixgbe_aci_update_nvm(struct ixgbe_hw *hw, u16 module_typeid,
+ u32 offset, u16 length, void *data,
+ bool last_command, u8 command_flags);
+int ixgbe_nvm_write_activate(struct ixgbe_hw *hw, u16 cmd_flags,
+ u8 *response_flags);
#endif /* _IXGBE_E610_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index f03925c1f521..c86103eccc8a 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -213,7 +213,7 @@ static void ixgbe_set_advertising_10gtypes(struct ixgbe_hw *hw,
static int ixgbe_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *cmd)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
ixgbe_link_speed supported_link;
bool autoneg = false;
@@ -458,7 +458,7 @@ static int ixgbe_get_link_ksettings(struct net_device *netdev,
static int ixgbe_set_link_ksettings(struct net_device *netdev,
const struct ethtool_link_ksettings *cmd)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
u32 advertised, old;
int err = 0;
@@ -535,7 +535,7 @@ static int ixgbe_set_link_ksettings(struct net_device *netdev,
static void ixgbe_get_pause_stats(struct net_device *netdev,
struct ethtool_pause_stats *stats)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw_stats *hwstats = &adapter->stats;
stats->tx_pause_frames = hwstats->lxontxc + hwstats->lxofftxc;
@@ -545,7 +545,7 @@ static void ixgbe_get_pause_stats(struct net_device *netdev,
static void ixgbe_get_pauseparam(struct net_device *netdev,
struct ethtool_pauseparam *pause)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
if (ixgbe_device_supports_autoneg_fc(hw) &&
@@ -567,7 +567,7 @@ static void ixgbe_get_pauseparam(struct net_device *netdev,
static int ixgbe_set_pauseparam(struct net_device *netdev,
struct ethtool_pauseparam *pause)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
struct ixgbe_fc_info fc = hw->fc;
@@ -606,13 +606,13 @@ static int ixgbe_set_pauseparam(struct net_device *netdev,
static u32 ixgbe_get_msglevel(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
return adapter->msg_enable;
}
static void ixgbe_set_msglevel(struct net_device *netdev, u32 data)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
adapter->msg_enable = data;
}
@@ -627,7 +627,7 @@ static int ixgbe_get_regs_len(struct net_device *netdev)
static void ixgbe_get_regs(struct net_device *netdev,
struct ethtool_regs *regs, void *p)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
u32 *regs_buff = p;
u8 i;
@@ -994,14 +994,14 @@ static void ixgbe_get_regs(struct net_device *netdev,
static int ixgbe_get_eeprom_len(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
return adapter->hw.eeprom.word_size * 2;
}
static int ixgbe_get_eeprom(struct net_device *netdev,
struct ethtool_eeprom *eeprom, u8 *bytes)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
u16 *eeprom_buff;
int first_word, last_word, eeprom_len;
@@ -1037,7 +1037,7 @@ static int ixgbe_get_eeprom(struct net_device *netdev,
static int ixgbe_set_eeprom(struct net_device *netdev,
struct ethtool_eeprom *eeprom, u8 *bytes)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
u16 *eeprom_buff;
void *ptr;
@@ -1104,10 +1104,22 @@ err:
return ret_val;
}
+void ixgbe_refresh_fw_version(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ ixgbe_get_flash_data(hw);
+ ixgbe_set_fw_version_e610(adapter);
+}
+
static void ixgbe_get_drvinfo(struct net_device *netdev,
struct ethtool_drvinfo *drvinfo)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
+
+ /* need to refresh info for e610 in case fw reloads in runtime */
+ if (adapter->hw.mac.type == ixgbe_mac_e610)
+ ixgbe_refresh_fw_version(adapter);
strscpy(drvinfo->driver, ixgbe_driver_name, sizeof(drvinfo->driver));
@@ -1161,7 +1173,7 @@ static void ixgbe_get_ringparam(struct net_device *netdev,
struct kernel_ethtool_ringparam *kernel_ring,
struct netlink_ext_ack *extack)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_ring *tx_ring = adapter->tx_ring[0];
struct ixgbe_ring *rx_ring = adapter->rx_ring[0];
@@ -1176,7 +1188,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
struct kernel_ethtool_ringparam *kernel_ring,
struct netlink_ext_ack *extack)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_ring *temp_ring;
int i, j, err = 0;
u32 new_rx_count, new_tx_count;
@@ -1336,7 +1348,7 @@ static int ixgbe_get_sset_count(struct net_device *netdev, int sset)
static void ixgbe_get_ethtool_stats(struct net_device *netdev,
struct ethtool_stats *stats, u64 *data)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct rtnl_link_stats64 temp;
const struct rtnl_link_stats64 *net_stats;
unsigned int start;
@@ -1710,7 +1722,7 @@ static int ixgbe_eeprom_test(struct ixgbe_adapter *adapter, u64 *data)
static irqreturn_t ixgbe_test_intr(int irq, void *data)
{
struct net_device *netdev = (struct net_device *) data;
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
adapter->test_icr |= IXGBE_READ_REG(&adapter->hw, IXGBE_EICR);
@@ -2183,7 +2195,7 @@ out:
static void ixgbe_diag_test(struct net_device *netdev,
struct ethtool_test *eth_test, u64 *data)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
bool if_running = netif_running(netdev);
if (ixgbe_removed(adapter->hw.hw_addr)) {
@@ -2306,7 +2318,7 @@ static int ixgbe_wol_exclusion(struct ixgbe_adapter *adapter,
static void ixgbe_get_wol(struct net_device *netdev,
struct ethtool_wolinfo *wol)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
wol->supported = WAKE_UCAST | WAKE_MCAST |
WAKE_BCAST | WAKE_MAGIC;
@@ -2328,7 +2340,7 @@ static void ixgbe_get_wol(struct net_device *netdev,
static int ixgbe_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE |
WAKE_FILTER))
@@ -2355,7 +2367,7 @@ static int ixgbe_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
static int ixgbe_nway_reset(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
if (netif_running(netdev))
ixgbe_reinit_locked(adapter);
@@ -2366,7 +2378,7 @@ static int ixgbe_nway_reset(struct net_device *netdev)
static int ixgbe_set_phys_id(struct net_device *netdev,
enum ethtool_phys_id_state state)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
if (!hw->mac.ops.led_on || !hw->mac.ops.led_off)
@@ -2399,7 +2411,7 @@ static int ixgbe_get_coalesce(struct net_device *netdev,
struct kernel_ethtool_coalesce *kernel_coal,
struct netlink_ext_ack *extack)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
/* only valid if in constant ITR mode */
if (adapter->rx_itr_setting <= 1)
@@ -2455,7 +2467,7 @@ static int ixgbe_set_coalesce(struct net_device *netdev,
struct kernel_ethtool_coalesce *kernel_coal,
struct netlink_ext_ack *extack)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_q_vector *q_vector;
int i;
u16 tx_itr_param, rx_itr_param, tx_itr_prev;
@@ -2681,7 +2693,7 @@ static int ixgbe_rss_indir_tbl_max(struct ixgbe_adapter *adapter)
static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
u32 *rule_locs)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
int ret = -EOPNOTSUPP;
switch (cmd->cmd) {
@@ -3069,7 +3081,7 @@ static int ixgbe_set_rss_hash_opt(struct ixgbe_adapter *adapter,
static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
int ret = -EOPNOTSUPP;
switch (cmd->cmd) {
@@ -3096,7 +3108,7 @@ static u32 ixgbe_get_rxfh_key_size(struct net_device *netdev)
static u32 ixgbe_rss_indir_size(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
return ixgbe_rss_indir_tbl_entries(adapter);
}
@@ -3116,7 +3128,7 @@ static void ixgbe_get_reta(struct ixgbe_adapter *adapter, u32 *indir)
static int ixgbe_get_rxfh(struct net_device *netdev,
struct ethtool_rxfh_param *rxfh)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
rxfh->hfunc = ETH_RSS_HASH_TOP;
@@ -3134,7 +3146,7 @@ static int ixgbe_set_rxfh(struct net_device *netdev,
struct ethtool_rxfh_param *rxfh,
struct netlink_ext_ack *extack)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
int i;
u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter);
@@ -3176,7 +3188,7 @@ static int ixgbe_set_rxfh(struct net_device *netdev,
static int ixgbe_get_ts_info(struct net_device *dev,
struct kernel_ethtool_ts_info *info)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
/* we always support timestamping disabled */
info->rx_filters = BIT(HWTSTAMP_FILTER_NONE);
@@ -3252,7 +3264,7 @@ static unsigned int ixgbe_max_channels(struct ixgbe_adapter *adapter)
static void ixgbe_get_channels(struct net_device *dev,
struct ethtool_channels *ch)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
/* report maximum channels */
ch->max_combined = ixgbe_max_channels(adapter);
@@ -3289,7 +3301,7 @@ static void ixgbe_get_channels(struct net_device *dev,
static int ixgbe_set_channels(struct net_device *dev,
struct ethtool_channels *ch)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
unsigned int count = ch->combined_count;
u8 max_rss_indices = ixgbe_max_rss_indices(adapter);
@@ -3327,7 +3339,7 @@ static int ixgbe_set_channels(struct net_device *dev,
static int ixgbe_get_module_info(struct net_device *dev,
struct ethtool_modinfo *modinfo)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ixgbe_hw *hw = &adapter->hw;
u8 sff8472_rev, addr_mode;
bool page_swap = false;
@@ -3373,7 +3385,7 @@ static int ixgbe_get_module_eeprom(struct net_device *dev,
struct ethtool_eeprom *ee,
u8 *data)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ixgbe_hw *hw = &adapter->hw;
int status = -EFAULT;
u8 databyte = 0xFF;
@@ -3469,7 +3481,7 @@ ixgbe_get_eee_fw(struct ixgbe_adapter *adapter, struct ethtool_keee *edata)
static int ixgbe_get_eee(struct net_device *netdev, struct ethtool_keee *edata)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
if (!(adapter->flags2 & IXGBE_FLAG2_EEE_CAPABLE))
@@ -3483,7 +3495,7 @@ static int ixgbe_get_eee(struct net_device *netdev, struct ethtool_keee *edata)
static int ixgbe_set_eee(struct net_device *netdev, struct ethtool_keee *edata)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
struct ethtool_keee eee_data;
int ret_val;
@@ -3538,7 +3550,7 @@ static int ixgbe_set_eee(struct net_device *netdev, struct ethtool_keee *edata)
static u32 ixgbe_get_priv_flags(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
u32 priv_flags = 0;
if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
@@ -3555,7 +3567,7 @@ static u32 ixgbe_get_priv_flags(struct net_device *netdev)
static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
unsigned int flags2 = adapter->flags2;
unsigned int i;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
index 955dced844a9..7dcf6ecd157b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -56,7 +56,7 @@ int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid)
if (xid >= netdev->fcoe_ddp_xid)
return 0;
- adapter = netdev_priv(netdev);
+ adapter = ixgbe_from_netdev(netdev);
fcoe = &adapter->fcoe;
ddp = &fcoe->ddp[xid];
if (!ddp->udl)
@@ -153,7 +153,7 @@ static int ixgbe_fcoe_ddp_setup(struct net_device *netdev, u16 xid,
if (!netdev || !sgl)
return 0;
- adapter = netdev_priv(netdev);
+ adapter = ixgbe_from_netdev(netdev);
if (xid >= netdev->fcoe_ddp_xid) {
e_warn(drv, "xid=0x%x out-of-range\n", xid);
return 0;
@@ -834,7 +834,7 @@ static void ixgbe_fcoe_ddp_disable(struct ixgbe_adapter *adapter)
*/
int ixgbe_fcoe_enable(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_fcoe *fcoe = &adapter->fcoe;
atomic_inc(&fcoe->refcnt);
@@ -881,7 +881,7 @@ int ixgbe_fcoe_enable(struct net_device *netdev)
*/
int ixgbe_fcoe_disable(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
if (!atomic_dec_and_test(&adapter->fcoe.refcnt))
return -EINVAL;
@@ -927,7 +927,7 @@ int ixgbe_fcoe_disable(struct net_device *netdev)
int ixgbe_fcoe_get_wwn(struct net_device *netdev, u64 *wwn, int type)
{
u16 prefix = 0xffff;
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_mac_info *mac = &adapter->hw.mac;
switch (type) {
@@ -967,7 +967,7 @@ int ixgbe_fcoe_get_wwn(struct net_device *netdev, u64 *wwn, int type)
int ixgbe_fcoe_get_hbainfo(struct net_device *netdev,
struct netdev_fcoe_hbainfo *info)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
u64 dsn;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.c
new file mode 100644
index 000000000000..49d3b66add7e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.c
@@ -0,0 +1,707 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2025 Intel Corporation. */
+
+#include <linux/crc32.h>
+#include <linux/pldmfw.h>
+#include <linux/uuid.h>
+
+#include "ixgbe.h"
+#include "ixgbe_fw_update.h"
+
+struct ixgbe_fwu_priv {
+ struct pldmfw context;
+
+ struct ixgbe_adapter *adapter;
+ struct netlink_ext_ack *extack;
+
+ /* Track which NVM banks to activate at the end of the update */
+ u8 activate_flags;
+ bool emp_reset_available;
+};
+
+/**
+ * ixgbe_send_package_data - Send record package data to firmware
+ * @context: PLDM fw update structure
+ * @data: pointer to the package data
+ * @length: length of the package data
+ *
+ * Send a copy of the package data associated with the PLDM record matching
+ * this device to the firmware.
+ *
+ * Note that this function sends an AdminQ command that will fail unless the
+ * NVM resource has been acquired.
+ *
+ * Return: zero on success, or a negative error code on failure.
+ */
+static int ixgbe_send_package_data(struct pldmfw *context,
+ const u8 *data, u16 length)
+{
+ struct ixgbe_fwu_priv *priv = container_of(context,
+ struct ixgbe_fwu_priv,
+ context);
+ struct ixgbe_adapter *adapter = priv->adapter;
+ struct ixgbe_hw *hw = &adapter->hw;
+ u8 *package_data;
+ int err;
+
+ package_data = kmemdup(data, length, GFP_KERNEL);
+ if (!package_data)
+ return -ENOMEM;
+
+ err = ixgbe_nvm_set_pkg_data(hw, false, package_data, length);
+
+ kfree(package_data);
+
+ return err;
+}
+
+/**
+ * ixgbe_check_component_response - Report firmware response to a component
+ * @adapter: device private data structure
+ * @response: indicates whether this component can be updated
+ * @code: code indicating reason for response
+ * @extack: netlink extended ACK structure
+ *
+ * Check whether firmware indicates if this component can be updated. Report
+ * a suitable error message over the netlink extended ACK if the component
+ * cannot be updated.
+ *
+ * Return: 0 if the component can be updated, or -ECANCELED if the
+ * firmware indicates the component cannot be updated.
+ */
+static int ixgbe_check_component_response(struct ixgbe_adapter *adapter,
+ u8 response, u8 code,
+ struct netlink_ext_ack *extack)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ switch (response) {
+ case IXGBE_ACI_NVM_PASS_COMP_CAN_BE_UPDATED:
+ /* Firmware indicated this update is good to proceed. */
+ return 0;
+ case IXGBE_ACI_NVM_PASS_COMP_CAN_MAY_BE_UPDATEABLE:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Firmware recommends not updating, as it may result in a downgrade. Continuing anyways");
+ return 0;
+ case IXGBE_ACI_NVM_PASS_COMP_CAN_NOT_BE_UPDATED:
+ NL_SET_ERR_MSG_MOD(extack, "Firmware has rejected updating.");
+ break;
+ case IXGBE_ACI_NVM_PASS_COMP_PARTIAL_CHECK:
+ if (hw->mac.ops.fw_recovery_mode &&
+ hw->mac.ops.fw_recovery_mode(hw))
+ return 0;
+ break;
+ }
+
+ switch (code) {
+ case IXGBE_ACI_NVM_PASS_COMP_STAMP_IDENTICAL_CODE:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Component comparison stamp is identical to running image");
+ break;
+ case IXGBE_ACI_NVM_PASS_COMP_STAMP_LOWER:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Component comparison stamp is lower than running image");
+ break;
+ case IXGBE_ACI_NVM_PASS_COMP_INVALID_STAMP_CODE:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Component comparison stamp is invalid");
+ break;
+ case IXGBE_ACI_NVM_PASS_COMP_CONFLICT_CODE:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Component table conflict occurred");
+ break;
+ case IXGBE_ACI_NVM_PASS_COMP_PRE_REQ_NOT_MET_CODE:
+ NL_SET_ERR_MSG_MOD(extack, "Component pre-requisites not met");
+ break;
+ case IXGBE_ACI_NVM_PASS_COMP_NOT_SUPPORTED_CODE:
+ NL_SET_ERR_MSG_MOD(extack, "Component not supported");
+ break;
+ case IXGBE_ACI_NVM_PASS_COMP_CANNOT_DOWNGRADE_CODE:
+ NL_SET_ERR_MSG_MOD(extack, "Component cannot be downgraded");
+ break;
+ case IXGBE_ACI_NVM_PASS_COMP_INCOMPLETE_IMAGE_CODE:
+ NL_SET_ERR_MSG_MOD(extack, "Incomplete component image");
+ break;
+ case IXGBE_ACI_NVM_PASS_COMP_VER_STR_IDENTICAL_CODE:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Component version is identical to running image");
+ break;
+ case IXGBE_ACI_NVM_PASS_COMP_VER_STR_LOWER_CODE:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Component version is lower than the running image");
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Received unexpected response code from firmware");
+ break;
+ }
+
+ return -ECANCELED;
+}
+
+/**
+ * ixgbe_send_component_table - Send PLDM component table to firmware
+ * @context: PLDM fw update structure
+ * @component: the component to process
+ * @transfer_flag: relative transfer order of this component
+ *
+ * Read relevant data from the component and forward it to the device
+ * firmware. Check the response to determine if the firmware indicates that
+ * the update can proceed.
+ *
+ * This function sends ACI commands related to the NVM, and assumes that
+ * the NVM resource has been acquired.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+static int ixgbe_send_component_table(struct pldmfw *context,
+ struct pldmfw_component *component,
+ u8 transfer_flag)
+{
+ struct ixgbe_fwu_priv *priv = container_of(context,
+ struct ixgbe_fwu_priv,
+ context);
+ struct ixgbe_adapter *adapter = priv->adapter;
+ struct netlink_ext_ack *extack = priv->extack;
+ struct ixgbe_aci_cmd_nvm_comp_tbl *comp_tbl;
+ u8 comp_response, comp_response_code;
+ struct ixgbe_hw *hw = &adapter->hw;
+ size_t length;
+ int err;
+
+ switch (component->identifier) {
+ case NVM_COMP_ID_OROM:
+ case NVM_COMP_ID_NVM:
+ case NVM_COMP_ID_NETLIST:
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unable to update due to unknown firmware component");
+ return -EOPNOTSUPP;
+ }
+
+ length = struct_size(comp_tbl, cvs, component->version_len);
+ comp_tbl = kzalloc(length, GFP_KERNEL);
+ if (!comp_tbl)
+ return -ENOMEM;
+
+ comp_tbl->comp_class = cpu_to_le16(component->classification);
+ comp_tbl->comp_id = cpu_to_le16(component->identifier);
+ comp_tbl->comp_class_idx = FWU_COMP_CLASS_IDX_NOT_USE;
+ comp_tbl->comp_cmp_stamp = cpu_to_le32(component->comparison_stamp);
+ comp_tbl->cvs_type = component->version_type;
+ comp_tbl->cvs_len = component->version_len;
+
+ memcpy(comp_tbl->cvs, component->version_string,
+ component->version_len);
+
+ err = ixgbe_nvm_pass_component_tbl(hw, (u8 *)comp_tbl, length,
+ transfer_flag, &comp_response,
+ &comp_response_code);
+
+ kfree(comp_tbl);
+
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to transfer component table to firmware");
+ return -EIO;
+ }
+
+ return ixgbe_check_component_response(adapter,
+ comp_response,
+ comp_response_code, extack);
+}
+
+/**
+ * ixgbe_write_one_nvm_block - Write an NVM block and await completion response
+ * @adapter: the PF data structure
+ * @module: the module to write to
+ * @offset: offset in bytes
+ * @block_size: size of the block to write, up to 4k
+ * @block: pointer to block of data to write
+ * @last_cmd: whether this is the last command
+ * @extack: netlink extended ACK structure
+ *
+ * Write a block of data to a flash module, and await for the completion
+ * response message from firmware.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * On successful return, reset level indicates the device reset required to
+ * complete the update.
+ *
+ * 0 - IXGBE_ACI_NVM_POR_FLAG - A full power on is required
+ * 1 - IXGBE_ACI_NVM_PERST_FLAG - A cold PCIe reset is required
+ * 2 - IXGBE_ACI_NVM_EMPR_FLAG - An EMP reset is required
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+static int ixgbe_write_one_nvm_block(struct ixgbe_adapter *adapter,
+ u16 module, u32 offset,
+ u16 block_size, u8 *block, bool last_cmd,
+ struct netlink_ext_ack *extack)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ return ixgbe_aci_update_nvm(hw, module, offset, block_size, block,
+ last_cmd, 0);
+}
+
+/**
+ * ixgbe_write_nvm_module - Write data to an NVM module
+ * @adapter: the PF driver structure
+ * @module: the module id to program
+ * @component: the name of the component being updated
+ * @image: buffer of image data to write to the NVM
+ * @length: length of the buffer
+ * @extack: netlink extended ACK structure
+ *
+ * Loop over the data for a given NVM module and program it in 4 Kb
+ * blocks. Notify devlink core of progress after each block is programmed.
+ * Loops over a block of data and programs the NVM in 4k block chunks.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+static int ixgbe_write_nvm_module(struct ixgbe_adapter *adapter, u16 module,
+ const char *component, const u8 *image,
+ u32 length,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink *devlink = adapter->devlink;
+ u32 offset = 0;
+ bool last_cmd;
+ u8 *block;
+ int err;
+
+ devlink_flash_update_status_notify(devlink, "Flashing",
+ component, 0, length);
+
+ block = kzalloc(IXGBE_ACI_MAX_BUFFER_SIZE, GFP_KERNEL);
+ if (!block)
+ return -ENOMEM;
+
+ do {
+ u32 block_size;
+
+ block_size = min_t(u32, IXGBE_ACI_MAX_BUFFER_SIZE,
+ length - offset);
+ last_cmd = !(offset + block_size < length);
+
+ memcpy(block, image + offset, block_size);
+
+ err = ixgbe_write_one_nvm_block(adapter, module, offset,
+ block_size, block, last_cmd,
+ extack);
+ if (err)
+ break;
+
+ offset += block_size;
+
+ devlink_flash_update_status_notify(devlink, "Flashing",
+ component, offset, length);
+ } while (!last_cmd);
+
+ if (err)
+ devlink_flash_update_status_notify(devlink, "Flashing failed",
+ component, length, length);
+ else
+ devlink_flash_update_status_notify(devlink, "Flashing done",
+ component, length, length);
+
+ kfree(block);
+
+ return err;
+}
+
+/* Length in seconds to wait before timing out when erasing a flash module.
+ * Yes, erasing really can take minutes to complete.
+ */
+#define IXGBE_FW_ERASE_TIMEOUT 300
+
+/**
+ * ixgbe_erase_nvm_module - Erase an NVM module and await firmware completion
+ * @adapter: the PF data structure
+ * @module: the module to erase
+ * @component: name of the component being updated
+ * @extack: netlink extended ACK structure
+ *
+ * Erase the inactive NVM bank associated with this module, and await for
+ * a completion response message from firmware.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+static int ixgbe_erase_nvm_module(struct ixgbe_adapter *adapter, u16 module,
+ const char *component,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink *devlink = adapter->devlink;
+ struct ixgbe_hw *hw = &adapter->hw;
+ int err;
+
+ devlink_flash_update_timeout_notify(devlink, "Erasing", component,
+ IXGBE_FW_ERASE_TIMEOUT);
+
+ err = ixgbe_aci_erase_nvm(hw, module);
+ if (err)
+ devlink_flash_update_status_notify(devlink, "Erasing failed",
+ component, 0, 0);
+ else
+ devlink_flash_update_status_notify(devlink, "Erasing done",
+ component, 0, 0);
+
+ return err;
+}
+
+/**
+ * ixgbe_switch_flash_banks - Tell firmware to switch NVM banks
+ * @adapter: Pointer to the PF data structure
+ * @activate_flags: flags used for the activation command
+ * @emp_reset_available: on return, indicates if EMP reset is available
+ * @extack: netlink extended ACK structure
+ *
+ * Notify firmware to activate the newly written flash banks, and wait for the
+ * firmware response.
+ *
+ * Return: 0 on success or an error code on failure.
+ */
+static int ixgbe_switch_flash_banks(struct ixgbe_adapter *adapter,
+ u8 activate_flags,
+ bool *emp_reset_available,
+ struct netlink_ext_ack *extack)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+ u8 response_flags;
+ int err;
+
+ err = ixgbe_nvm_write_activate(hw, activate_flags, &response_flags);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to switch active flash banks");
+ return err;
+ }
+
+ if (emp_reset_available) {
+ if (hw->dev_caps.common_cap.reset_restrict_support)
+ *emp_reset_available =
+ response_flags & IXGBE_ACI_NVM_EMPR_ENA;
+ else
+ *emp_reset_available = true;
+ }
+
+ return 0;
+}
+
+/**
+ * ixgbe_flash_component - Flash a component of the NVM
+ * @context: PLDM fw update structure
+ * @component: the component table to program
+ *
+ * Program the flash contents for a given component. First, determine the
+ * module id. Then, erase the secondary bank for this module. Finally, write
+ * the contents of the component to the NVM.
+ *
+ * Note this function assumes the caller has acquired the NVM resource.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+static int ixgbe_flash_component(struct pldmfw *context,
+ struct pldmfw_component *component)
+{
+ struct ixgbe_fwu_priv *priv = container_of(context,
+ struct ixgbe_fwu_priv,
+ context);
+ struct netlink_ext_ack *extack = priv->extack;
+ struct ixgbe_adapter *adapter = priv->adapter;
+ const char *name;
+ u16 module;
+ int err;
+ u8 flag;
+
+ switch (component->identifier) {
+ case NVM_COMP_ID_OROM:
+ module = IXGBE_E610_SR_1ST_OROM_BANK_PTR;
+ flag = IXGBE_ACI_NVM_ACTIV_SEL_OROM;
+ name = "fw.undi";
+ break;
+ case NVM_COMP_ID_NVM:
+ module = IXGBE_E610_SR_1ST_NVM_BANK_PTR;
+ flag = IXGBE_ACI_NVM_ACTIV_SEL_NVM;
+ name = "fw.mgmt";
+ break;
+ case NVM_COMP_ID_NETLIST:
+ module = IXGBE_E610_SR_NETLIST_BANK_PTR;
+ flag = IXGBE_ACI_NVM_ACTIV_SEL_NETLIST;
+ name = "fw.netlist";
+ break;
+
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ /* Mark this component for activating at the end. */
+ priv->activate_flags |= flag;
+
+ err = ixgbe_erase_nvm_module(adapter, module, name, extack);
+ if (err)
+ return err;
+
+ return ixgbe_write_nvm_module(adapter, module, name,
+ component->component_data,
+ component->component_size, extack);
+}
+
+/**
+ * ixgbe_finalize_update - Perform last steps to complete device update
+ * @context: PLDM fw update structure
+ *
+ * Called as the last step of the update process. Complete the update by
+ * telling the firmware to switch active banks, and perform a reset of
+ * configured.
+ *
+ * Return: 0 on success, or an error code on failure.
+ */
+static int ixgbe_finalize_update(struct pldmfw *context)
+{
+ struct ixgbe_fwu_priv *priv = container_of(context,
+ struct ixgbe_fwu_priv,
+ context);
+ struct ixgbe_adapter *adapter = priv->adapter;
+ struct netlink_ext_ack *extack = priv->extack;
+ struct devlink *devlink = adapter->devlink;
+ int err;
+
+ /* Finally, notify firmware to activate the written NVM banks */
+ err = ixgbe_switch_flash_banks(adapter, priv->activate_flags,
+ &priv->emp_reset_available, extack);
+ if (err)
+ return err;
+
+ adapter->fw_emp_reset_disabled = !priv->emp_reset_available;
+
+ if (!adapter->fw_emp_reset_disabled)
+ devlink_flash_update_status_notify(devlink,
+ "Suggested is to activate new firmware by devlink reload, if it doesn't work then a power cycle is required",
+ NULL, 0, 0);
+
+ return 0;
+}
+
+static const struct pldmfw_ops ixgbe_fwu_ops_e610 = {
+ .match_record = &pldmfw_op_pci_match_record,
+ .send_package_data = &ixgbe_send_package_data,
+ .send_component_table = &ixgbe_send_component_table,
+ .flash_component = &ixgbe_flash_component,
+ .finalize_update = &ixgbe_finalize_update,
+};
+
+/**
+ * ixgbe_get_pending_updates - Check if the component has a pending update
+ * @adapter: the PF driver structure
+ * @pending: on return, bitmap of updates pending
+ * @extack: Netlink extended ACK
+ *
+ * Check if the device has any pending updates on any flash components.
+ *
+ * Return: 0 on success, or a negative error code on failure. Update
+ * pending with the bitmap of pending updates.
+ */
+int ixgbe_get_pending_updates(struct ixgbe_adapter *adapter, u8 *pending,
+ struct netlink_ext_ack *extack)
+{
+ struct ixgbe_hw_dev_caps *dev_caps;
+ struct ixgbe_hw *hw = &adapter->hw;
+ int err;
+
+ dev_caps = kzalloc(sizeof(*dev_caps), GFP_KERNEL);
+ if (!dev_caps)
+ return -ENOMEM;
+
+ err = ixgbe_discover_dev_caps(hw, dev_caps);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unable to read device capabilities");
+ kfree(dev_caps);
+ return -EIO;
+ }
+
+ *pending = 0;
+
+ if (dev_caps->common_cap.nvm_update_pending_nvm)
+ *pending |= IXGBE_ACI_NVM_ACTIV_SEL_NVM;
+
+ if (dev_caps->common_cap.nvm_update_pending_orom)
+ *pending |= IXGBE_ACI_NVM_ACTIV_SEL_OROM;
+
+ if (dev_caps->common_cap.nvm_update_pending_netlist)
+ *pending |= IXGBE_ACI_NVM_ACTIV_SEL_NETLIST;
+
+ kfree(dev_caps);
+
+ return 0;
+}
+
+/**
+ * ixgbe_cancel_pending_update - Cancel any pending update for a component
+ * @adapter: the PF driver structure
+ * @component: if not NULL, the name of the component being updated
+ * @extack: Netlink extended ACK structure
+ *
+ * Cancel any pending update for the specified component. If component is
+ * NULL, all device updates will be canceled.
+ *
+ * Return: 0 on success, or a negative error code on failure.
+ */
+static int ixgbe_cancel_pending_update(struct ixgbe_adapter *adapter,
+ const char *component,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink *devlink = adapter->devlink;
+ struct ixgbe_hw *hw = &adapter->hw;
+ u8 pending;
+ int err;
+
+ err = ixgbe_get_pending_updates(adapter, &pending, extack);
+ if (err)
+ return err;
+
+ /* If the flash_update request is for a specific component, ignore all
+ * of the other components.
+ */
+ if (component) {
+ if (strcmp(component, "fw.mgmt") == 0)
+ pending &= IXGBE_ACI_NVM_ACTIV_SEL_NVM;
+ else if (strcmp(component, "fw.undi") == 0)
+ pending &= IXGBE_ACI_NVM_ACTIV_SEL_OROM;
+ else if (strcmp(component, "fw.netlist") == 0)
+ pending &= IXGBE_ACI_NVM_ACTIV_SEL_NETLIST;
+ else
+ return -EINVAL;
+ }
+
+ /* There is no previous pending update, so this request may continue */
+ if (!pending)
+ return 0;
+
+ /* In order to allow overwriting a previous pending update, notify
+ * firmware to cancel that update by issuing the appropriate command.
+ */
+ devlink_flash_update_status_notify(devlink,
+ "Canceling previous pending update",
+ component, 0, 0);
+
+ err = ixgbe_acquire_nvm(hw, IXGBE_RES_WRITE);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to acquire device flash lock");
+ return -EIO;
+ }
+
+ pending |= IXGBE_ACI_NVM_REVERT_LAST_ACTIV;
+ err = ixgbe_switch_flash_banks(adapter, pending, NULL, extack);
+
+ ixgbe_release_nvm(hw);
+
+ return err;
+}
+
+/**
+ * ixgbe_flash_pldm_image - Write a PLDM-formatted firmware image to the device
+ * @devlink: pointer to devlink associated with the device to update
+ * @params: devlink flash update parameters
+ * @extack: netlink extended ACK structure
+ *
+ * Parse the data for a given firmware file, verifying that it is a valid PLDM
+ * formatted image that matches this device.
+ *
+ * Extract the device record Package Data and Component Tables and send them
+ * to the firmware. Extract and write the flash data for each of the three
+ * main flash components, "fw.mgmt", "fw.undi", and "fw.netlist". Notify
+ * firmware once the data is written to the inactive banks.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int ixgbe_flash_pldm_image(struct devlink *devlink,
+ struct devlink_flash_update_params *params,
+ struct netlink_ext_ack *extack)
+{
+ struct ixgbe_adapter *adapter = devlink_priv(devlink);
+ struct device *dev = &adapter->pdev->dev;
+ struct ixgbe_hw *hw = &adapter->hw;
+ struct ixgbe_fwu_priv priv;
+ u8 preservation;
+ int err;
+
+ if (hw->mac.type != ixgbe_mac_e610)
+ return -EOPNOTSUPP;
+
+ switch (params->overwrite_mask) {
+ case 0:
+ /* preserve all settings and identifiers */
+ preservation = IXGBE_ACI_NVM_PRESERVE_ALL;
+ break;
+ case DEVLINK_FLASH_OVERWRITE_SETTINGS:
+ /* Overwrite settings, but preserve vital information such as
+ * device identifiers.
+ */
+ preservation = IXGBE_ACI_NVM_PRESERVE_SELECTED;
+ break;
+ case (DEVLINK_FLASH_OVERWRITE_SETTINGS |
+ DEVLINK_FLASH_OVERWRITE_IDENTIFIERS):
+ /* overwrite both settings and identifiers, preserve nothing */
+ preservation = IXGBE_ACI_NVM_NO_PRESERVATION;
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Requested overwrite mask is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ /* Cannot get caps in recovery mode, so lack of nvm_unified_update bit
+ * cannot lead to error
+ */
+ if (!hw->dev_caps.common_cap.nvm_unified_update &&
+ (hw->mac.ops.fw_recovery_mode &&
+ !hw->mac.ops.fw_recovery_mode(hw))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Current firmware does not support unified update");
+ return -EOPNOTSUPP;
+ }
+
+ memset(&priv, 0, sizeof(priv));
+
+ priv.context.ops = &ixgbe_fwu_ops_e610;
+ priv.context.dev = dev;
+ priv.extack = extack;
+ priv.adapter = adapter;
+ priv.activate_flags = preservation;
+
+ devlink_flash_update_status_notify(devlink,
+ "Preparing to flash", NULL, 0, 0);
+
+ err = ixgbe_cancel_pending_update(adapter, NULL, extack);
+ if (err)
+ return err;
+
+ err = ixgbe_acquire_nvm(hw, IXGBE_RES_WRITE);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to acquire device flash lock");
+ return -EIO;
+ }
+
+ err = pldmfw_flash_image(&priv.context, params->fw);
+ if (err == -ENOENT) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Firmware image has no record matching this device");
+ } else if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to flash PLDM image");
+ }
+
+ ixgbe_release_nvm(hw);
+
+ return err;
+}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.h
new file mode 100644
index 000000000000..abdd708c93df
--- /dev/null
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fw_update.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2025 Intel Corporation. */
+
+#ifndef _IXGBE_FW_UPDATE_H_
+#define _IXGBE_FW_UPDATE_H_
+
+int ixgbe_flash_pldm_image(struct devlink *devlink,
+ struct devlink_flash_update_params *params,
+ struct netlink_ext_ack *extack);
+int ixgbe_get_pending_updates(struct ixgbe_adapter *adapter, u8 *pending,
+ struct netlink_ext_ack *extack);
+#endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
index 07ea1954a276..648a7c618cd1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
@@ -478,7 +478,7 @@ static int ixgbe_ipsec_parse_proto_keys(struct xfrm_state *xs,
static int ixgbe_ipsec_check_mgmt_ip(struct xfrm_state *xs)
{
struct net_device *dev = xs->xso.real_dev;
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ixgbe_hw *hw = &adapter->hw;
u32 mfval, manc, reg;
int num_filters = 4;
@@ -563,7 +563,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs,
struct netlink_ext_ack *extack)
{
struct net_device *dev = xs->xso.real_dev;
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ixgbe_ipsec *ipsec = adapter->ipsec;
struct ixgbe_hw *hw = &adapter->hw;
int checked, match, first;
@@ -757,7 +757,7 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs,
static void ixgbe_ipsec_del_sa(struct xfrm_state *xs)
{
struct net_device *dev = xs->xso.real_dev;
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ixgbe_ipsec *ipsec = adapter->ipsec;
struct ixgbe_hw *hw = &adapter->hw;
u32 zerobuf[4] = {0, 0, 0, 0};
@@ -1052,7 +1052,7 @@ int ixgbe_ipsec_tx(struct ixgbe_ring *tx_ring,
struct ixgbe_tx_buffer *first,
struct ixgbe_ipsec_tx_data *itd)
{
- struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(tx_ring->netdev);
struct ixgbe_ipsec *ipsec = adapter->ipsec;
struct xfrm_state *xs;
struct sec_path *sp;
@@ -1142,7 +1142,7 @@ void ixgbe_ipsec_rx(struct ixgbe_ring *rx_ring,
union ixgbe_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
- struct ixgbe_adapter *adapter = netdev_priv(rx_ring->netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(rx_ring->netdev);
__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
__le16 ipsec_pkt_types = cpu_to_le16(IXGBE_RXDADV_PKTTYPE_IPSEC_AH |
IXGBE_RXDADV_PKTTYPE_IPSEC_ESP);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index a2718218963e..cdfafc477ee0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -49,6 +49,7 @@
#include "ixgbe_sriov.h"
#include "ixgbe_model.h"
#include "ixgbe_txrx_common.h"
+#include "devlink/devlink.h"
char ixgbe_driver_name[] = "ixgbe";
static const char ixgbe_driver_string[] =
@@ -1095,7 +1096,7 @@ static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter)
static int ixgbe_tx_maxrate(struct net_device *netdev,
int queue_index, u32 maxrate)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
u32 bcnrc_val = ixgbe_link_mbps(adapter);
@@ -4678,7 +4679,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
static int ixgbe_vlan_rx_add_vid(struct net_device *netdev,
__be16 proto, u16 vid)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
/* add VID to filter table */
@@ -4737,7 +4738,7 @@ void ixgbe_update_pf_promisc_vlvf(struct ixgbe_adapter *adapter, u32 vid)
static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev,
__be16 proto, u16 vid)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
/* remove VID from filter table */
@@ -4962,7 +4963,7 @@ static void ixgbe_restore_vlan(struct ixgbe_adapter *adapter)
**/
static int ixgbe_write_mc_addr_list(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
if (!netif_running(netdev))
@@ -5138,7 +5139,7 @@ int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter,
static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
int ret;
ret = ixgbe_add_mac_filter(adapter, addr, VMDQ_P(0));
@@ -5148,7 +5149,7 @@ static int ixgbe_uc_sync(struct net_device *netdev, const unsigned char *addr)
static int ixgbe_uc_unsync(struct net_device *netdev, const unsigned char *addr)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
ixgbe_del_mac_filter(adapter, addr, VMDQ_P(0));
@@ -5166,7 +5167,7 @@ static int ixgbe_uc_unsync(struct net_device *netdev, const unsigned char *addr)
**/
void ixgbe_set_rx_mode(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE;
netdev_features_t features = netdev->features;
@@ -5268,7 +5269,7 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter)
static int ixgbe_udp_tunnel_sync(struct net_device *dev, unsigned int table)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ixgbe_hw *hw = &adapter->hw;
struct udp_tunnel_info ti;
@@ -6600,7 +6601,7 @@ static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter)
**/
static void ixgbe_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
/* Do the reset outside of interrupt context */
ixgbe_tx_timeout_reset(adapter);
@@ -6849,7 +6850,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK;
/* initialize eeprom parameters */
- if (ixgbe_init_eeprom_params_generic(hw)) {
+ if (hw->eeprom.ops.init_params(hw)) {
e_dev_err("EEPROM initialization failed\n");
return -EIO;
}
@@ -7165,7 +7166,7 @@ static int ixgbe_max_xdp_frame_size(struct ixgbe_adapter *adapter)
**/
static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
if (ixgbe_enabled_xdp_adapter(adapter)) {
int new_frame_size = new_mtu + IXGBE_PKT_HDR_PAD;
@@ -7212,7 +7213,7 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
**/
int ixgbe_open(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
int err, queues;
@@ -7316,7 +7317,7 @@ static void ixgbe_close_suspend(struct ixgbe_adapter *adapter)
**/
int ixgbe_close(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
ixgbe_ptp_stop(adapter);
@@ -8364,6 +8365,34 @@ static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter)
rtnl_unlock();
}
+static int ixgbe_check_fw_api_mismatch(struct ixgbe_adapter *adapter)
+{
+ struct ixgbe_hw *hw = &adapter->hw;
+
+ if (hw->mac.type != ixgbe_mac_e610)
+ return 0;
+
+ if (hw->mac.ops.get_fw_ver && hw->mac.ops.get_fw_ver(hw))
+ return 0;
+
+ if (hw->api_maj_ver > IXGBE_FW_API_VER_MAJOR) {
+ e_dev_err("The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
+
+ adapter->flags2 |= IXGBE_FLAG2_API_MISMATCH;
+ return -EOPNOTSUPP;
+ } else if (hw->api_maj_ver == IXGBE_FW_API_VER_MAJOR &&
+ hw->api_min_ver > IXGBE_FW_API_VER_MINOR + IXGBE_FW_API_VER_DIFF_ALLOWED) {
+ e_dev_info("The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
+ adapter->flags2 |= IXGBE_FLAG2_API_MISMATCH;
+ } else if (hw->api_maj_ver < IXGBE_FW_API_VER_MAJOR ||
+ hw->api_min_ver < IXGBE_FW_API_VER_MINOR - IXGBE_FW_API_VER_DIFF_ALLOWED) {
+ e_dev_info("The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
+ adapter->flags2 |= IXGBE_FLAG2_API_MISMATCH;
+ }
+
+ return 0;
+}
+
/**
* ixgbe_check_fw_error - Check firmware for errors
* @adapter: the adapter private structure
@@ -8374,12 +8403,14 @@ static bool ixgbe_check_fw_error(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
u32 fwsm;
+ int err;
/* read fwsm.ext_err_ind register and log errors */
fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
+ /* skip if E610's FW is reloading, warning in that case may be misleading */
if (fwsm & IXGBE_FWSM_EXT_ERR_IND_MASK ||
- !(fwsm & IXGBE_FWSM_FW_VAL_BIT))
+ (!(fwsm & IXGBE_FWSM_FW_VAL_BIT) && !(hw->mac.type == ixgbe_mac_e610)))
e_dev_warn("Warning firmware error detected FWSM: 0x%08X\n",
fwsm);
@@ -8387,10 +8418,53 @@ static bool ixgbe_check_fw_error(struct ixgbe_adapter *adapter)
e_dev_err("Firmware recovery mode detected. Limiting functionality. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
return true;
}
+ if (!(adapter->flags2 & IXGBE_FLAG2_API_MISMATCH)) {
+ err = ixgbe_check_fw_api_mismatch(adapter);
+ if (err)
+ return true;
+ }
+
+ /* return here if FW rollback mode has been already detected */
+ if (adapter->flags2 & IXGBE_FLAG2_FW_ROLLBACK)
+ return false;
+
+ if (hw->mac.ops.fw_rollback_mode && hw->mac.ops.fw_rollback_mode(hw)) {
+ struct ixgbe_nvm_info *nvm_info = &adapter->hw.flash.nvm;
+ char ver_buff[64] = "";
+
+ if (hw->mac.ops.get_fw_ver && hw->mac.ops.get_fw_ver(hw))
+ goto no_version;
+
+ if (hw->mac.ops.get_nvm_ver &&
+ hw->mac.ops.get_nvm_ver(hw, nvm_info))
+ goto no_version;
+
+ snprintf(ver_buff, sizeof(ver_buff),
+ "Current version is NVM:%x.%x.%x, FW:%d.%d. ",
+ nvm_info->major, nvm_info->minor, nvm_info->eetrack,
+ hw->fw_maj_ver, hw->fw_maj_ver);
+no_version:
+ e_dev_warn("Firmware rollback mode detected. %sDevice may exhibit limited functionality. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware rollback mode.",
+ ver_buff);
+
+ adapter->flags2 |= IXGBE_FLAG2_FW_ROLLBACK;
+ }
return false;
}
+static void ixgbe_recovery_service_task(struct work_struct *work)
+{
+ struct ixgbe_adapter *adapter = container_of(work,
+ struct ixgbe_adapter,
+ service_task);
+
+ ixgbe_handle_fw_event(adapter);
+ ixgbe_service_event_complete(adapter);
+
+ mod_timer(&adapter->service_timer, jiffies + msecs_to_jiffies(100));
+}
+
/**
* ixgbe_service_task - manages and runs subtasks
* @work: pointer to work_struct containing our data
@@ -8410,8 +8484,13 @@ static void ixgbe_service_task(struct work_struct *work)
return;
}
if (ixgbe_check_fw_error(adapter)) {
- if (!test_bit(__IXGBE_DOWN, &adapter->state))
+ if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
+ if (adapter->mii_bus) {
+ mdiobus_unregister(adapter->mii_bus);
+ adapter->mii_bus = NULL;
+ }
unregister_netdev(adapter->netdev);
+ }
ixgbe_service_event_complete(adapter);
return;
}
@@ -9001,7 +9080,7 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
switch (vlan_get_protocol(skb)) {
case htons(ETH_P_FCOE):
case htons(ETH_P_FIP):
- adapter = netdev_priv(dev);
+ adapter = ixgbe_from_netdev(dev);
if (!sb_dev && (adapter->flags & IXGBE_FLAG_FCOE_ENABLED))
break;
@@ -9260,7 +9339,7 @@ static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb,
struct net_device *netdev,
struct ixgbe_ring *ring)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_ring *tx_ring;
/*
@@ -9292,7 +9371,7 @@ static netdev_tx_t ixgbe_xmit_frame(struct sk_buff *skb,
**/
static int ixgbe_set_mac(struct net_device *netdev, void *p)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
struct sockaddr *addr = p;
@@ -9310,7 +9389,7 @@ static int ixgbe_set_mac(struct net_device *netdev, void *p)
static int
ixgbe_mdio_read(struct net_device *netdev, int prtad, int devad, u16 addr)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
u16 value;
int rc;
@@ -9336,7 +9415,7 @@ ixgbe_mdio_read(struct net_device *netdev, int prtad, int devad, u16 addr)
static int ixgbe_mdio_write(struct net_device *netdev, int prtad, int devad,
u16 addr, u16 value)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
if (adapter->mii_bus) {
@@ -9356,7 +9435,7 @@ static int ixgbe_mdio_write(struct net_device *netdev, int prtad, int devad,
static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
switch (cmd) {
case SIOCSHWTSTAMP:
@@ -9382,7 +9461,7 @@ static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
static int ixgbe_add_sanmac_netdev(struct net_device *dev)
{
int err = 0;
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ixgbe_hw *hw = &adapter->hw;
if (is_valid_ether_addr(hw->mac.san_addr)) {
@@ -9406,7 +9485,7 @@ static int ixgbe_add_sanmac_netdev(struct net_device *dev)
static int ixgbe_del_sanmac_netdev(struct net_device *dev)
{
int err = 0;
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ixgbe_mac_info *mac = &adapter->hw.mac;
if (is_valid_ether_addr(mac->san_addr)) {
@@ -9437,7 +9516,7 @@ static void ixgbe_get_ring_stats64(struct rtnl_link_stats64 *stats,
static void ixgbe_get_stats64(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
int i;
rcu_read_lock();
@@ -9480,7 +9559,7 @@ static void ixgbe_get_stats64(struct net_device *netdev,
static int ixgbe_ndo_get_vf_stats(struct net_device *netdev, int vf,
struct ifla_vf_stats *vf_stats)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
if (vf < 0 || vf >= adapter->num_vfs)
return -EINVAL;
@@ -9597,7 +9676,7 @@ static int ixgbe_reassign_macvlan_pool(struct net_device *vdev,
static void ixgbe_defrag_macvlan_pools(struct net_device *dev)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct netdev_nested_priv priv = {
.data = (void *)adapter,
};
@@ -9618,7 +9697,7 @@ static void ixgbe_defrag_macvlan_pools(struct net_device *dev)
*/
int ixgbe_setup_tc(struct net_device *dev, u8 tc)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ixgbe_hw *hw = &adapter->hw;
/* Hardware supports up to 8 traffic classes */
@@ -10176,7 +10255,7 @@ static LIST_HEAD(ixgbe_block_cb_list);
static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
switch (type) {
case TC_SETUP_BLOCK:
@@ -10204,7 +10283,7 @@ void ixgbe_sriov_reinit(struct ixgbe_adapter *adapter)
#endif
void ixgbe_do_reset(struct net_device *netdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
if (netif_running(netdev))
ixgbe_reinit_locked(adapter);
@@ -10215,7 +10294,7 @@ void ixgbe_do_reset(struct net_device *netdev)
static netdev_features_t ixgbe_fix_features(struct net_device *netdev,
netdev_features_t features)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
/* If Rx checksum is disabled, then RSC/LRO should also be disabled */
if (!(features & NETIF_F_RXCSUM))
@@ -10252,7 +10331,7 @@ static void ixgbe_reset_l2fw_offload(struct ixgbe_adapter *adapter)
static int ixgbe_set_features(struct net_device *netdev,
netdev_features_t features)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
netdev_features_t changed = netdev->features ^ features;
bool need_reset = false;
@@ -10328,7 +10407,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
{
/* guarantee we can provide a unique filter for the unicast address */
if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) {
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
u16 pool = VMDQ_P(0);
if (netdev_uc_count(dev) >= ixgbe_available_rars(adapter, pool))
@@ -10416,7 +10495,7 @@ static int ixgbe_ndo_bridge_setlink(struct net_device *dev,
struct nlmsghdr *nlh, u16 flags,
struct netlink_ext_ack *extack)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct nlattr *attr, *br_spec;
int rem;
@@ -10444,7 +10523,7 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev,
u32 filter_mask, int nlflags)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
if (!(adapter->flags & IXGBE_FLAG_SRIOV_ENABLED))
return 0;
@@ -10456,7 +10535,7 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
{
- struct ixgbe_adapter *adapter = netdev_priv(pdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(pdev);
struct ixgbe_fwd_adapter *accel;
int tcs = adapter->hw_tcs ? : 1;
int pool, err;
@@ -10553,7 +10632,7 @@ static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
{
struct ixgbe_fwd_adapter *accel = priv;
- struct ixgbe_adapter *adapter = netdev_priv(pdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(pdev);
unsigned int rxbase = accel->rx_base_queue;
unsigned int i;
@@ -10631,7 +10710,7 @@ ixgbe_features_check(struct sk_buff *skb, struct net_device *dev,
static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
{
int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct bpf_prog *old_prog;
bool need_reset;
int num_queues;
@@ -10703,7 +10782,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
static int ixgbe_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
switch (xdp->command) {
case XDP_SETUP_PROG:
@@ -10738,7 +10817,7 @@ void ixgbe_xdp_ring_update_tail_locked(struct ixgbe_ring *ring)
static int ixgbe_xdp_xmit(struct net_device *dev, int n,
struct xdp_frame **frames, u32 flags)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ixgbe_ring *ring;
int nxmit = 0;
int i;
@@ -11146,7 +11225,7 @@ bool ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id,
* format to display. The FW version is taken from the EEPROM/NVM.
*
*/
-static void ixgbe_set_fw_version_e610(struct ixgbe_adapter *adapter)
+void ixgbe_set_fw_version_e610(struct ixgbe_adapter *adapter)
{
struct ixgbe_orom_info *orom = &adapter->hw.flash.orom;
struct ixgbe_nvm_info *nvm = &adapter->hw.flash.nvm;
@@ -11197,6 +11276,65 @@ static void ixgbe_set_fw_version(struct ixgbe_adapter *adapter)
}
/**
+ * ixgbe_recovery_probe - Handle FW recovery mode during probe
+ * @adapter: the adapter private structure
+ *
+ * Perform limited driver initialization when FW error is detected.
+ *
+ * Return: 0 on successful probe for E610, -EIO if recovery mode is detected
+ * for non-E610 adapter, error status code on any other case.
+ */
+static int ixgbe_recovery_probe(struct ixgbe_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct pci_dev *pdev = adapter->pdev;
+ struct ixgbe_hw *hw = &adapter->hw;
+ bool disable_dev;
+ int err = -EIO;
+
+ if (hw->mac.type != ixgbe_mac_e610)
+ goto clean_up_probe;
+
+ ixgbe_get_hw_control(adapter);
+ mutex_init(&hw->aci.lock);
+ err = ixgbe_get_flash_data(&adapter->hw);
+ if (err)
+ goto shutdown_aci;
+
+ timer_setup(&adapter->service_timer, ixgbe_service_timer, 0);
+ INIT_WORK(&adapter->service_task, ixgbe_recovery_service_task);
+ set_bit(__IXGBE_SERVICE_INITED, &adapter->state);
+ clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state);
+
+ if (hw->mac.ops.get_bus_info)
+ hw->mac.ops.get_bus_info(hw);
+
+ pci_set_drvdata(pdev, adapter);
+ /* We are creating devlink interface so NIC can be managed,
+ * e.g. new NVM image loaded
+ */
+ devl_lock(adapter->devlink);
+ ixgbe_devlink_register_port(adapter);
+ SET_NETDEV_DEVLINK_PORT(adapter->netdev,
+ &adapter->devlink_port);
+ devl_register(adapter->devlink);
+ devl_unlock(adapter->devlink);
+
+ return 0;
+shutdown_aci:
+ mutex_destroy(&adapter->hw.aci.lock);
+ ixgbe_release_hw_control(adapter);
+ devlink_free(adapter->devlink);
+clean_up_probe:
+ disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
+ free_netdev(netdev);
+ pci_release_mem_regions(pdev);
+ if (disable_dev)
+ pci_disable_device(pdev);
+ return err;
+}
+
+/**
* ixgbe_probe - Device Initialization Routine
* @pdev: PCI device information struct
* @ent: entry in ixgbe_pci_tbl
@@ -11210,6 +11348,7 @@ static void ixgbe_set_fw_version(struct ixgbe_adapter *adapter)
static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct net_device *netdev;
+ struct ixgbe_netdevice_priv *netdev_priv_wrapper;
struct ixgbe_adapter *adapter = NULL;
struct ixgbe_hw *hw;
const struct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data];
@@ -11263,7 +11402,13 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
indices = IXGBE_MAX_RSS_INDICES_X550;
}
- netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices);
+ adapter = ixgbe_allocate_devlink(&pdev->dev);
+ if (IS_ERR(adapter)) {
+ err = PTR_ERR(adapter);
+ goto err_devlink;
+ }
+
+ netdev = alloc_etherdev_mq(sizeof(*netdev_priv_wrapper), indices);
if (!netdev) {
err = -ENOMEM;
goto err_alloc_etherdev;
@@ -11271,7 +11416,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
SET_NETDEV_DEV(netdev, &pdev->dev);
- adapter = netdev_priv(netdev);
+ netdev_priv_wrapper = netdev_priv(netdev);
+ netdev_priv_wrapper->adapter = adapter;
adapter->netdev = netdev;
adapter->pdev = pdev;
@@ -11326,10 +11472,21 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
goto err_sw_init;
+ /* Make sure the SWFW semaphore is in a valid state */
+ if (hw->mac.ops.init_swfw_sync)
+ hw->mac.ops.init_swfw_sync(hw);
+
+ if (ixgbe_check_fw_error(adapter))
+ return ixgbe_recovery_probe(adapter);
+
if (adapter->hw.mac.type == ixgbe_mac_e610) {
err = ixgbe_get_caps(&adapter->hw);
if (err)
dev_err(&pdev->dev, "ixgbe_get_caps failed %d\n", err);
+
+ err = ixgbe_get_flash_data(&adapter->hw);
+ if (err)
+ goto err_sw_init;
}
if (adapter->hw.mac.type == ixgbe_mac_82599EB)
@@ -11348,10 +11505,6 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
break;
}
- /* Make sure the SWFW semaphore is in a valid state */
- if (hw->mac.ops.init_swfw_sync)
- hw->mac.ops.init_swfw_sync(hw);
-
/* Make it possible the adapter to be woken up via WOL */
switch (adapter->hw.mac.type) {
case ixgbe_mac_82599EB:
@@ -11504,11 +11657,6 @@ skip_sriov:
if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
netdev->features |= NETIF_F_LRO;
- if (ixgbe_check_fw_error(adapter)) {
- err = -EIO;
- goto err_sw_init;
- }
-
/* make sure the EEPROM is good */
if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) {
e_dev_err("The EEPROM Checksum Is Not Valid\n");
@@ -11591,7 +11739,7 @@ skip_sriov:
if (expected_gts > 0)
ixgbe_check_minimum_link(adapter, expected_gts);
- err = ixgbe_read_pba_string_generic(hw, part_str, sizeof(part_str));
+ err = hw->eeprom.ops.read_pba_string(hw, part_str, sizeof(part_str));
if (err)
strscpy(part_str, "Unknown", sizeof(part_str));
if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present)
@@ -11617,6 +11765,11 @@ skip_sriov:
}
strcpy(netdev->name, "eth%d");
pci_set_drvdata(pdev, adapter);
+
+ devl_lock(adapter->devlink);
+ ixgbe_devlink_register_port(adapter);
+ SET_NETDEV_DEVLINK_PORT(adapter->netdev, &adapter->devlink_port);
+
err = register_netdev(netdev);
if (err)
goto err_register;
@@ -11671,11 +11824,15 @@ skip_sriov:
if (err)
goto err_netdev;
+ devl_register(adapter->devlink);
+ devl_unlock(adapter->devlink);
return 0;
err_netdev:
unregister_netdev(netdev);
err_register:
+ devl_port_unregister(&adapter->devlink_port);
+ devl_unlock(adapter->devlink);
ixgbe_release_hw_control(adapter);
ixgbe_clear_interrupt_scheme(adapter);
if (hw->mac.type == ixgbe_mac_e610)
@@ -11692,7 +11849,9 @@ err_ioremap:
disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state);
free_netdev(netdev);
err_alloc_etherdev:
+ devlink_free(adapter->devlink);
pci_release_mem_regions(pdev);
+err_devlink:
err_pci_reg:
err_dma:
if (!adapter || disable_dev)
@@ -11721,6 +11880,8 @@ static void ixgbe_remove(struct pci_dev *pdev)
return;
netdev = adapter->netdev;
+ devl_lock(adapter->devlink);
+ devl_unregister(adapter->devlink);
ixgbe_dbg_adapter_exit(adapter);
set_bit(__IXGBE_REMOVING, &adapter->state);
@@ -11756,6 +11917,10 @@ static void ixgbe_remove(struct pci_dev *pdev)
if (netdev->reg_state == NETREG_REGISTERED)
unregister_netdev(netdev);
+ devl_port_unregister(&adapter->devlink_port);
+ devl_unlock(adapter->devlink);
+ devlink_free(adapter->devlink);
+
ixgbe_stop_ipsec_offload(adapter);
ixgbe_clear_interrupt_scheme(adapter);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index 0a03a8bb5f88..2d54828bdfbb 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -167,7 +167,7 @@ int ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
u16 reg, u16 val, bool lock)
{
u32 swfw_mask = hw->phy.phy_semaphore_mask;
- int max_retry = 1;
+ int max_retry = 3;
int retry = 0;
u8 reg_high;
u8 csum;
@@ -2285,7 +2285,7 @@ static int ixgbe_write_i2c_byte_generic_int(struct ixgbe_hw *hw, u8 byte_offset,
u8 dev_addr, u8 data, bool lock)
{
u32 swfw_mask = hw->phy.phy_semaphore_mask;
- u32 max_retry = 1;
+ u32 max_retry = 3;
u32 retry = 0;
int status;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index ccdce80edd14..0dbbd2befd4d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -1418,7 +1418,7 @@ void ixgbe_set_all_vfs(struct ixgbe_adapter *adapter)
int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
int retval;
if (vf >= adapter->num_vfs)
@@ -1526,7 +1526,7 @@ int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
u8 qos, __be16 vlan_proto)
{
int err = 0;
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7))
return -EINVAL;
@@ -1644,7 +1644,7 @@ void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter)
int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
int max_tx_rate)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
int link_speed;
/* verify VF is active */
@@ -1679,7 +1679,7 @@ int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate,
int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
struct ixgbe_hw *hw = &adapter->hw;
if (vf >= adapter->num_vfs)
@@ -1757,7 +1757,7 @@ void ixgbe_set_vf_link_state(struct ixgbe_adapter *adapter, int vf, int state)
**/
int ixgbe_ndo_set_vf_link_state(struct net_device *netdev, int vf, int state)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
int ret = 0;
if (vf < 0 || vf >= adapter->num_vfs) {
@@ -1794,7 +1794,7 @@ int ixgbe_ndo_set_vf_link_state(struct net_device *netdev, int vf, int state)
int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf,
bool setting)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
/* This operation is currently supported only for 82599 and x540
* devices.
@@ -1813,7 +1813,7 @@ int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf,
int ixgbe_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
if (vf >= adapter->num_vfs)
return -EINVAL;
@@ -1836,7 +1836,7 @@ int ixgbe_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
int ixgbe_ndo_get_vf_config(struct net_device *netdev,
int vf, struct ifla_vf_info *ivi)
{
- struct ixgbe_adapter *adapter = netdev_priv(netdev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
if (vf >= adapter->num_vfs)
return -EINVAL;
ivi->vf = vf;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 5fdf32d79d82..892fa6c1f879 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -3446,6 +3446,8 @@ struct ixgbe_eeprom_operations {
int (*validate_checksum)(struct ixgbe_hw *, u16 *);
int (*update_checksum)(struct ixgbe_hw *);
int (*calc_checksum)(struct ixgbe_hw *);
+ int (*read_pba_string)(struct ixgbe_hw *hw, u8 *pba_num,
+ u32 pba_num_size);
};
struct ixgbe_mac_operations {
@@ -3454,6 +3456,7 @@ struct ixgbe_mac_operations {
int (*start_hw)(struct ixgbe_hw *);
int (*clear_hw_cntrs)(struct ixgbe_hw *);
enum ixgbe_media_type (*get_media_type)(struct ixgbe_hw *);
+ int (*get_fw_ver)(struct ixgbe_hw *hw);
int (*get_mac_addr)(struct ixgbe_hw *, u8 *);
int (*get_san_mac_addr)(struct ixgbe_hw *, u8 *);
int (*get_device_caps)(struct ixgbe_hw *, u16 *);
@@ -3522,6 +3525,8 @@ struct ixgbe_mac_operations {
int (*get_thermal_sensor_data)(struct ixgbe_hw *);
int (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw);
bool (*fw_recovery_mode)(struct ixgbe_hw *hw);
+ bool (*fw_rollback_mode)(struct ixgbe_hw *hw);
+ int (*get_nvm_ver)(struct ixgbe_hw *hw, struct ixgbe_nvm_info *nvm);
void (*disable_rx)(struct ixgbe_hw *hw);
void (*enable_rx)(struct ixgbe_hw *hw);
void (*set_source_address_pruning)(struct ixgbe_hw *, bool,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h
index 617e07878e4f..bea94e5ccb73 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h
@@ -10,11 +10,75 @@
#define IXGBE_MAX_VSI 768
/* Checksum and Shadow RAM pointers */
-#define E610_SR_SW_CHECKSUM_WORD 0x3F
+#define IXGBE_E610_SR_NVM_CTRL_WORD 0x00
+#define IXGBE_E610_SR_PBA_BLOCK_PTR 0x16
+#define IXGBE_E610_SR_PBA_BLOCK_MASK GENMASK(15, 8)
+#define IXGBE_E610_SR_NVM_DEV_STARTER_VER 0x18
+#define IXGBE_E610_SR_NVM_EETRACK_LO 0x2D
+#define IXGBE_E610_SR_NVM_EETRACK_HI 0x2E
+#define IXGBE_E610_NVM_VER_LO_MASK GENMASK(7, 0)
+#define IXGBE_E610_NVM_VER_HI_MASK GENMASK(15, 12)
+#define IXGBE_E610_SR_SW_CHECKSUM_WORD 0x3F
+#define IXGBE_E610_SR_PFA_PTR 0x40
+#define IXGBE_E610_SR_1ST_NVM_BANK_PTR 0x42
+#define IXGBE_E610_SR_NVM_BANK_SIZE 0x43
+#define IXGBE_E610_SR_1ST_OROM_BANK_PTR 0x44
+#define IXGBE_E610_SR_OROM_BANK_SIZE 0x45
+#define IXGBE_E610_SR_NETLIST_BANK_PTR 0x46
+#define IXGBE_E610_SR_NETLIST_BANK_SIZE 0x47
+
+/* The OROM version topology */
+#define IXGBE_OROM_VER_PATCH_MASK GENMASK_ULL(7, 0)
+#define IXGBE_OROM_VER_BUILD_MASK GENMASK_ULL(23, 8)
+#define IXGBE_OROM_VER_MASK GENMASK_ULL(31, 24)
+
+/* CSS Header words */
+#define IXGBE_NVM_CSS_HDR_LEN_L 0x02
+#define IXGBE_NVM_CSS_HDR_LEN_H 0x03
+#define IXGBE_NVM_CSS_SREV_L 0x14
+#define IXGBE_NVM_CSS_SREV_H 0x15
+
+#define IXGBE_HDR_LEN_ROUNDUP 32
+
+/* Length of Authentication header section in words */
+#define IXGBE_NVM_AUTH_HEADER_LEN 0x08
/* Shadow RAM related */
#define IXGBE_SR_WORDS_IN_1KB 512
+/* The Netlist ID Block is located after all of the Link Topology nodes. */
+#define IXGBE_NETLIST_ID_BLK_SIZE 0x30
+#define IXGBE_NETLIST_ID_BLK_OFFSET(n) IXGBE_NETLIST_LINK_TOPO_OFFSET(0x0004 + 2 * (n))
+
+/* netlist ID block field offsets (word offsets) */
+#define IXGBE_NETLIST_ID_BLK_MAJOR_VER_LOW 0x02
+#define IXGBE_NETLIST_ID_BLK_MAJOR_VER_HIGH 0x03
+#define IXGBE_NETLIST_ID_BLK_MINOR_VER_LOW 0x04
+#define IXGBE_NETLIST_ID_BLK_MINOR_VER_HIGH 0x05
+#define IXGBE_NETLIST_ID_BLK_TYPE_LOW 0x06
+#define IXGBE_NETLIST_ID_BLK_TYPE_HIGH 0x07
+#define IXGBE_NETLIST_ID_BLK_REV_LOW 0x08
+#define IXGBE_NETLIST_ID_BLK_REV_HIGH 0x09
+#define IXGBE_NETLIST_ID_BLK_SHA_HASH_WORD(n) (0x0A + (n))
+#define IXGBE_NETLIST_ID_BLK_CUST_VER 0x2F
+
+/* The Link Topology Netlist section is stored as a series of words. It is
+ * stored in the NVM as a TLV, with the first two words containing the type
+ * and length.
+ */
+#define IXGBE_NETLIST_LINK_TOPO_MOD_ID 0x011B
+#define IXGBE_NETLIST_TYPE_OFFSET 0x0000
+#define IXGBE_NETLIST_LEN_OFFSET 0x0001
+
+/* The Link Topology section follows the TLV header. When reading the netlist
+ * using ixgbe_read_netlist_module, we need to account for the 2-word TLV
+ * header.
+ */
+#define IXGBE_NETLIST_LINK_TOPO_OFFSET(n) ((n) + 2)
+#define IXGBE_LINK_TOPO_MODULE_LEN IXGBE_NETLIST_LINK_TOPO_OFFSET(0x0000)
+#define IXGBE_LINK_TOPO_NODE_COUNT IXGBE_NETLIST_LINK_TOPO_OFFSET(0x0001)
+#define IXGBE_LINK_TOPO_NODE_COUNT_M GENMASK_ULL(9, 0)
+
/* Firmware Status Register (GL_FWSTS) */
#define GL_FWSTS 0x00083048 /* Reset Source: POR */
#define GL_FWSTS_EP_PF0 BIT(24)
@@ -24,11 +88,23 @@
#define GLNVM_GENS 0x000B6100 /* Reset Source: POR */
#define GLNVM_GENS_SR_SIZE_M GENMASK(7, 5)
+#define IXGBE_GL_MNG_FWSM 0x000B6134 /* Reset Source: POR */
+#define IXGBE_GL_MNG_FWSM_RECOVERY_M BIT(1)
+#define IXGBE_GL_MNG_FWSM_ROLLBACK_M BIT(2)
+
/* Flash Access Register */
#define IXGBE_GLNVM_FLA 0x000B6108 /* Reset Source: POR */
#define IXGBE_GLNVM_FLA_LOCKED_S 6
#define IXGBE_GLNVM_FLA_LOCKED_M BIT(6)
+/* Auxiliary field, mask and shift definition for Shadow RAM and NVM Flash */
+#define IXGBE_SR_CTRL_WORD_1_M GENMASK(7, 6)
+#define IXGBE_SR_CTRL_WORD_VALID BIT(0)
+#define IXGBE_SR_CTRL_WORD_OROM_BANK BIT(3)
+#define IXGBE_SR_CTRL_WORD_NETLIST_BANK BIT(4)
+#define IXGBE_SR_CTRL_WORD_NVM_BANK BIT(5)
+#define IXGBE_SR_NVM_PTR_4KB_UNITS BIT(15)
+
/* Admin Command Interface (ACI) registers */
#define IXGBE_PF_HIDA(_i) (0x00085000 + ((_i) * 4))
#define IXGBE_PF_HIDA_2(_i) (0x00085020 + ((_i) * 4))
@@ -40,6 +116,10 @@
#define IXGBE_PF_HICR_SV BIT(2)
#define IXGBE_PF_HICR_EV BIT(3)
+#define IXGBE_FW_API_VER_MAJOR 0x01
+#define IXGBE_FW_API_VER_MINOR 0x07
+#define IXGBE_FW_API_VER_DIFF_ALLOWED 0x02
+
#define IXGBE_ACI_DESC_SIZE 32
#define IXGBE_ACI_DESC_SIZE_IN_DWORDS (IXGBE_ACI_DESC_SIZE / BYTES_PER_DWORD)
@@ -761,6 +841,8 @@ struct ixgbe_aci_cmd_nvm {
#define IXGBE_ACI_NVM_MAX_OFFSET 0xFFFFFF
__le16 offset_low;
u8 offset_high; /* For Write Activate offset_high is used as flags2 */
+#define IXGBE_ACI_NVM_OFFSET_HI_A_MASK GENMASK(15, 8)
+#define IXGBE_ACI_NVM_OFFSET_HI_U_MASK GENMASK(23, 16)
u8 cmd_flags;
#define IXGBE_ACI_NVM_LAST_CMD BIT(0)
#define IXGBE_ACI_NVM_PCIR_REQ BIT(0) /* Used by NVM Write reply */
@@ -776,6 +858,9 @@ struct ixgbe_aci_cmd_nvm {
#define IXGBE_ACI_NVM_PERST_FLAG 1
#define IXGBE_ACI_NVM_EMPR_FLAG 2
#define IXGBE_ACI_NVM_EMPR_ENA BIT(0) /* Write Activate reply only */
+#define IXGBE_ACI_NVM_NO_PRESERVATION 0x0
+#define IXGBE_ACI_NVM_PRESERVE_SELECTED 0x6
+
/* For Write Activate, several flags are sent as part of a separate
* flags2 field using a separate byte. For simplicity of the software
* interface, we pass the flags as a 16 bit value so these flags are
@@ -805,6 +890,63 @@ struct ixgbe_aci_cmd_nvm_checksum {
u8 rsvd2[12];
};
+/* Used for NVM Set Package Data command - 0x070A */
+struct ixgbe_aci_cmd_nvm_pkg_data {
+ u8 reserved[3];
+ u8 cmd_flags;
+#define IXGBE_ACI_NVM_PKG_DELETE BIT(0) /* used for command call */
+
+ u32 reserved1;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+/* Used for Pass Component Table command - 0x070B */
+struct ixgbe_aci_cmd_nvm_pass_comp_tbl {
+ u8 component_response; /* Response only */
+#define IXGBE_ACI_NVM_PASS_COMP_CAN_BE_UPDATED 0x0
+#define IXGBE_ACI_NVM_PASS_COMP_CAN_MAY_BE_UPDATEABLE 0x1
+#define IXGBE_ACI_NVM_PASS_COMP_CAN_NOT_BE_UPDATED 0x2
+#define IXGBE_ACI_NVM_PASS_COMP_PARTIAL_CHECK 0x3
+ u8 component_response_code; /* Response only */
+#define IXGBE_ACI_NVM_PASS_COMP_CAN_BE_UPDATED_CODE 0x0
+#define IXGBE_ACI_NVM_PASS_COMP_STAMP_IDENTICAL_CODE 0x1
+#define IXGBE_ACI_NVM_PASS_COMP_STAMP_LOWER 0x2
+#define IXGBE_ACI_NVM_PASS_COMP_INVALID_STAMP_CODE 0x3
+#define IXGBE_ACI_NVM_PASS_COMP_CONFLICT_CODE 0x4
+#define IXGBE_ACI_NVM_PASS_COMP_PRE_REQ_NOT_MET_CODE 0x5
+#define IXGBE_ACI_NVM_PASS_COMP_NOT_SUPPORTED_CODE 0x6
+#define IXGBE_ACI_NVM_PASS_COMP_CANNOT_DOWNGRADE_CODE 0x7
+#define IXGBE_ACI_NVM_PASS_COMP_INCOMPLETE_IMAGE_CODE 0x8
+#define IXGBE_ACI_NVM_PASS_COMP_VER_STR_IDENTICAL_CODE 0xA
+#define IXGBE_ACI_NVM_PASS_COMP_VER_STR_LOWER_CODE 0xB
+ u8 reserved;
+ u8 transfer_flag;
+ __le32 reserved1;
+ __le32 addr_high;
+ __le32 addr_low;
+};
+
+struct ixgbe_aci_cmd_nvm_comp_tbl {
+ __le16 comp_class;
+#define NVM_COMP_CLASS_ALL_FW 0x000A
+
+ __le16 comp_id;
+#define NVM_COMP_ID_OROM 0x5
+#define NVM_COMP_ID_NVM 0x6
+#define NVM_COMP_ID_NETLIST 0x8
+
+ u8 comp_class_idx;
+#define FWU_COMP_CLASS_IDX_NOT_USE 0x0
+
+ __le32 comp_cmp_stamp;
+ u8 cvs_type;
+#define NVM_CVS_TYPE_ASCII 0x1
+
+ u8 cvs_len;
+ u8 cvs[]; /* Component Version String */
+} __packed;
+
/**
* struct ixgbe_aci_desc - Admin Command (AC) descriptor
* @flags: IXGBE_ACI_FLAG_* flags
@@ -848,6 +990,8 @@ struct ixgbe_aci_desc {
struct ixgbe_aci_cmd_sff_eeprom read_write_sff_param;
struct ixgbe_aci_cmd_nvm nvm;
struct ixgbe_aci_cmd_nvm_checksum nvm_checksum;
+ struct ixgbe_aci_cmd_nvm_pkg_data pkg_data;
+ struct ixgbe_aci_cmd_nvm_pass_comp_tbl pass_comp_tbl;
} params;
};
@@ -984,6 +1128,16 @@ struct ixgbe_hw_caps {
#define IXGBE_EXT_TOPO_DEV_IMG_PROG_EN BIT(1)
} __packed;
+#define IXGBE_OROM_CIV_SIGNATURE "$CIV"
+
+struct ixgbe_orom_civd_info {
+ u8 signature[4]; /* Must match ASCII '$CIV' characters */
+ u8 checksum; /* Simple modulo 256 sum of all structure bytes must equal 0 */
+ __le32 combo_ver; /* Combo Image Version number */
+ u8 combo_name_len; /* Length of the unicode combo image version string, max of 32 */
+ __le16 combo_name[32]; /* Unicode string representing the Combo Image version */
+};
+
/* Function specific capabilities */
struct ixgbe_hw_func_caps {
u32 num_allocd_vfs; /* Number of allocated VFs */
@@ -1015,6 +1169,11 @@ struct ixgbe_aci_info {
enum ixgbe_aci_err last_status; /* last status of sent admin command */
};
+enum ixgbe_bank_select {
+ IXGBE_ACTIVE_FLASH_BANK,
+ IXGBE_INACTIVE_FLASH_BANK,
+};
+
/* Option ROM version information */
struct ixgbe_orom_info {
u8 major; /* Major version of OROM */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index 1fc821fb351a..f1ab95aa8c83 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -894,6 +894,7 @@ static const struct ixgbe_eeprom_operations eeprom_ops_X540 = {
.calc_checksum = &ixgbe_calc_eeprom_checksum_X540,
.validate_checksum = &ixgbe_validate_eeprom_checksum_X540,
.update_checksum = &ixgbe_update_eeprom_checksum_X540,
+ .read_pba_string = &ixgbe_read_pba_string_generic,
};
static const struct ixgbe_phy_operations phy_ops_X540 = {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 277ceaf8a793..1d2acdb64f45 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -3959,6 +3959,7 @@ static const struct ixgbe_mac_operations mac_ops_x550em_a_fw = {
.validate_checksum = &ixgbe_validate_eeprom_checksum_X550, \
.update_checksum = &ixgbe_update_eeprom_checksum_X550, \
.calc_checksum = &ixgbe_calc_eeprom_checksum_X550, \
+ .read_pba_string = &ixgbe_read_pba_string_generic, \
static const struct ixgbe_eeprom_operations eeprom_ops_X550 = {
X550_COMMON_EEP
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 3e3b471e53f0..ac58964b2f08 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -508,7 +508,7 @@ bool ixgbe_clean_xdp_tx_irq(struct ixgbe_q_vector *q_vector,
int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
+ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev);
struct ixgbe_ring *ring;
if (test_bit(__IXGBE_DOWN, &adapter->state))
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c
index 18c922dd5fc6..5841e30dff2a 100644
--- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c
@@ -18,8 +18,6 @@
#include "octep_vf_config.h"
#include "octep_vf_main.h"
-struct workqueue_struct *octep_vf_wq;
-
/* Supported Devices */
static const struct pci_device_id octep_vf_pci_id_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN93_VF)},
diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h
index 1a352f41f823..b9f13506f462 100644
--- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h
+++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h
@@ -320,8 +320,6 @@ static inline u16 OCTEP_VF_MINOR_REV(struct octep_vf_device *oct)
#define octep_vf_read_csr64(octep_vf_dev, reg_off) \
readq((octep_vf_dev)->mmio.hw_addr + (reg_off))
-extern struct workqueue_struct *octep_vf_wq;
-
int octep_vf_device_setup(struct octep_vf_device *oct);
int octep_vf_setup_iqs(struct octep_vf_device *oct);
void octep_vf_free_iqs(struct octep_vf_device *oct);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
index c3b6e0f60a79..7f6a435ac680 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
@@ -357,9 +357,12 @@ int cn10k_free_matchall_ipolicer(struct otx2_nic *pfvf)
mutex_lock(&pfvf->mbox.lock);
/* Remove RQ's policer mapping */
- for (qidx = 0; qidx < hw->rx_queues; qidx++)
- cn10k_map_unmap_rq_policer(pfvf, qidx,
- hw->matchall_ipolicer, false);
+ for (qidx = 0; qidx < hw->rx_queues; qidx++) {
+ rc = cn10k_map_unmap_rq_policer(pfvf, qidx, hw->matchall_ipolicer, false);
+ if (rc)
+ dev_warn(pfvf->dev, "Failed to unmap RQ %d's policer (error %d).",
+ qidx, rc);
+ }
rc = cn10k_free_leaf_profile(pfvf, hw->matchall_ipolicer);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 568bbe5f83f5..d292e6a9e22c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -154,7 +154,8 @@ mlx5_core-$(CONFIG_MLX5_HW_STEERING) += steering/hws/cmd.o \
steering/hws/vport.o \
steering/hws/bwc_complex.o \
steering/hws/fs_hws_pools.o \
- steering/hws/fs_hws.o
+ steering/hws/fs_hws.o \
+ steering/hws/action_ste_pool.o
#
# SF device
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index f803e1c93590..5ce1b463b7a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -707,8 +707,8 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
xdpi = mlx5e_xdpi_fifo_pop(xdpi_fifo);
page = xdpi.page.page;
- /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
- * as we know this is a page_pool page.
+ /* No need to check page_pool_page_is_pp() as we
+ * know this is a page_pool page.
*/
page_pool_recycle_direct(page->pp, page);
} while (++n < num);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 65a94e46edcf..cec18efadc73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -643,13 +643,6 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp,
int pin = -1;
int err = 0;
- /* Reject requests with unsupported flags */
- if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_FALLING_EDGE |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
/* Reject requests to enable time stamping on both edges. */
if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
(rq->extts.flags & PTP_ENABLE_FEATURE) &&
@@ -820,12 +813,6 @@ static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clo
return 0;
}
-static bool mlx5_perout_verify_flags(struct mlx5_core_dev *mdev, unsigned int flags)
-{
- return ((!mlx5_npps_real_time_supported(mdev) && flags) ||
- (mlx5_npps_real_time_supported(mdev) && flags & ~PTP_PEROUT_DUTY_CYCLE));
-}
-
static int mlx5_perout_configure(struct ptp_clock_info *ptp,
struct ptp_clock_request *rq,
int on)
@@ -861,12 +848,6 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
goto unlock;
}
- /* Reject requests with unsupported flags */
- if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) {
- err = -EOPNOTSUPP;
- goto unlock;
- }
-
if (on) {
pin_mode = MLX5_PIN_MODE_OUT;
pattern = MLX5_OUT_PATTERN_PERIODIC;
@@ -1034,6 +1015,13 @@ static void mlx5_init_pin_config(struct mlx5_core_dev *mdev)
clock->ptp_info.verify = mlx5_ptp_verify;
clock->ptp_info.pps = 1;
+ clock->ptp_info.supported_extts_flags = PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS;
+
+ if (mlx5_npps_real_time_supported(mdev))
+ clock->ptp_info.supported_perout_flags = PTP_PEROUT_DUTY_CYCLE;
+
for (i = 0; i < clock->ptp_info.n_pins; i++) {
snprintf(clock->ptp_info.pin_config[i].name,
sizeof(clock->ptp_info.pin_config[i].name),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
index b5332c54d4fb..bef4d25c1a2a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
@@ -238,6 +238,7 @@ hws_action_fixup_stc_attr(struct mlx5hws_context *ctx,
enum mlx5hws_table_type table_type,
bool is_mirror)
{
+ struct mlx5hws_pool *pool;
bool use_fixup = false;
u32 fw_tbl_type;
u32 base_id;
@@ -253,13 +254,11 @@ hws_action_fixup_stc_attr(struct mlx5hws_context *ctx,
use_fixup = true;
break;
}
+ pool = stc_attr->ste_table.ste_pool;
if (!is_mirror)
- base_id = mlx5hws_pool_chunk_get_base_id(stc_attr->ste_table.ste_pool,
- &stc_attr->ste_table.ste);
+ base_id = mlx5hws_pool_get_base_id(pool);
else
- base_id =
- mlx5hws_pool_chunk_get_base_mirror_id(stc_attr->ste_table.ste_pool,
- &stc_attr->ste_table.ste);
+ base_id = mlx5hws_pool_get_base_mirror_id(pool);
*fixup_stc_attr = *stc_attr;
fixup_stc_attr->ste_table.ste_obj_id = base_id;
@@ -337,7 +336,7 @@ __must_hold(&ctx->ctrl_lock)
if (!mlx5hws_context_cap_dynamic_reparse(ctx))
stc_attr->reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
- obj_0_id = mlx5hws_pool_chunk_get_base_id(stc_pool, stc);
+ obj_0_id = mlx5hws_pool_get_base_id(stc_pool);
/* According to table/action limitation change the stc_attr */
use_fixup = hws_action_fixup_stc_attr(ctx, stc_attr, &fixup_stc_attr, table_type, false);
@@ -353,7 +352,7 @@ __must_hold(&ctx->ctrl_lock)
if (table_type == MLX5HWS_TABLE_TYPE_FDB) {
u32 obj_1_id;
- obj_1_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, stc);
+ obj_1_id = mlx5hws_pool_get_base_mirror_id(stc_pool);
use_fixup = hws_action_fixup_stc_attr(ctx, stc_attr,
&fixup_stc_attr,
@@ -393,11 +392,11 @@ __must_hold(&ctx->ctrl_lock)
stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_DROP;
stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
stc_attr.stc_offset = stc->offset;
- obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, stc);
+ obj_id = mlx5hws_pool_get_base_id(stc_pool);
mlx5hws_cmd_stc_modify(ctx->mdev, obj_id, &stc_attr);
if (table_type == MLX5HWS_TABLE_TYPE_FDB) {
- obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, stc);
+ obj_id = mlx5hws_pool_get_base_mirror_id(stc_pool);
mlx5hws_cmd_stc_modify(ctx->mdev, obj_id, &stc_attr);
}
@@ -1575,17 +1574,15 @@ hws_action_create_dest_match_range_definer(struct mlx5hws_context *ctx)
return definer;
}
-static struct mlx5hws_matcher_action_ste *
+static struct mlx5hws_range_action_table *
hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
struct mlx5hws_definer *definer,
u32 miss_ft_id)
{
struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0};
- struct mlx5hws_action_default_stc *default_stc;
- struct mlx5hws_matcher_action_ste *table_ste;
+ struct mlx5hws_range_action_table *table_ste;
struct mlx5hws_pool_attr pool_attr = {0};
struct mlx5hws_pool *ste_pool, *stc_pool;
- struct mlx5hws_pool_chunk *ste;
u32 *rtc_0_id, *rtc_1_id;
u32 obj_id;
int ret;
@@ -1604,7 +1601,6 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB;
pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
- pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL;
pool_attr.alloc_log_sz = 1;
table_ste->pool = mlx5hws_pool_create(ctx, &pool_attr);
if (!table_ste->pool) {
@@ -1616,8 +1612,6 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
rtc_0_id = &table_ste->rtc_0_id;
rtc_1_id = &table_ste->rtc_1_id;
ste_pool = table_ste->pool;
- ste = &table_ste->ste;
- ste->order = 1;
rtc_attr.log_size = 0;
rtc_attr.log_depth = 0;
@@ -1629,18 +1623,16 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
rtc_attr.fw_gen_wqe = true;
rtc_attr.is_scnd_range = true;
- obj_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste);
+ obj_id = mlx5hws_pool_get_base_id(ste_pool);
rtc_attr.pd = ctx->pd_num;
rtc_attr.ste_base = obj_id;
- rtc_attr.ste_offset = ste->offset;
rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx);
rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, false);
/* STC is a single resource (obj_id), use any STC for the ID */
stc_pool = ctx->stc_pool;
- default_stc = ctx->common_res.default_stc;
- obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit);
+ obj_id = mlx5hws_pool_get_base_id(stc_pool);
rtc_attr.stc_base = obj_id;
ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id);
@@ -1650,11 +1642,11 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
}
/* Create mirror RTC */
- obj_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste);
+ obj_id = mlx5hws_pool_get_base_mirror_id(ste_pool);
rtc_attr.ste_base = obj_id;
rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, true);
- obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, &default_stc->default_hit);
+ obj_id = mlx5hws_pool_get_base_mirror_id(stc_pool);
rtc_attr.stc_base = obj_id;
ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_1_id);
@@ -1677,9 +1669,9 @@ free_ste:
return NULL;
}
-static void
-hws_action_destroy_dest_match_range_table(struct mlx5hws_context *ctx,
- struct mlx5hws_matcher_action_ste *table_ste)
+static void hws_action_destroy_dest_match_range_table(
+ struct mlx5hws_context *ctx,
+ struct mlx5hws_range_action_table *table_ste)
{
mutex_lock(&ctx->ctrl_lock);
@@ -1691,12 +1683,11 @@ hws_action_destroy_dest_match_range_table(struct mlx5hws_context *ctx,
mutex_unlock(&ctx->ctrl_lock);
}
-static int
-hws_action_create_dest_match_range_fill_table(struct mlx5hws_context *ctx,
- struct mlx5hws_matcher_action_ste *table_ste,
- struct mlx5hws_action *hit_ft_action,
- struct mlx5hws_definer *range_definer,
- u32 min, u32 max)
+static int hws_action_create_dest_match_range_fill_table(
+ struct mlx5hws_context *ctx,
+ struct mlx5hws_range_action_table *table_ste,
+ struct mlx5hws_action *hit_ft_action,
+ struct mlx5hws_definer *range_definer, u32 min, u32 max)
{
struct mlx5hws_wqe_gta_data_seg_ste match_wqe_data = {0};
struct mlx5hws_wqe_gta_data_seg_ste range_wqe_data = {0};
@@ -1792,7 +1783,7 @@ mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx,
u32 min, u32 max, u32 flags)
{
struct mlx5hws_cmd_stc_modify_attr stc_attr = {0};
- struct mlx5hws_matcher_action_ste *table_ste;
+ struct mlx5hws_range_action_table *table_ste;
struct mlx5hws_action *hit_ft_action;
struct mlx5hws_definer *definer;
struct mlx5hws_action *action;
@@ -1837,7 +1828,6 @@ mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx,
stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE;
stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
- stc_attr.ste_table.ste = table_ste->ste;
stc_attr.ste_table.ste_pool = table_ste->pool;
stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h
index 64b76075f7f8..25fa0d4c9221 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h
@@ -118,6 +118,12 @@ struct mlx5hws_action_template {
u8 only_term;
};
+struct mlx5hws_range_action_table {
+ struct mlx5hws_pool *pool;
+ u32 rtc_0_id;
+ u32 rtc_1_id;
+};
+
struct mlx5hws_action {
u8 type;
u8 flags;
@@ -186,7 +192,7 @@ struct mlx5hws_action {
size_t size;
} remove_header;
struct {
- struct mlx5hws_matcher_action_ste *table_ste;
+ struct mlx5hws_range_action_table *table_ste;
struct mlx5hws_action *hit_ft_action;
struct mlx5hws_definer *definer;
} range;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c
new file mode 100644
index 000000000000..5766a9c82f96
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.c
@@ -0,0 +1,467 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */
+
+#include "internal.h"
+
+static const char *
+hws_pool_opt_to_str(enum mlx5hws_pool_optimize opt)
+{
+ switch (opt) {
+ case MLX5HWS_POOL_OPTIMIZE_NONE:
+ return "rx-and-tx";
+ case MLX5HWS_POOL_OPTIMIZE_ORIG:
+ return "rx-only";
+ case MLX5HWS_POOL_OPTIMIZE_MIRROR:
+ return "tx-only";
+ default:
+ return "unknown";
+ }
+}
+
+static int
+hws_action_ste_table_create_pool(struct mlx5hws_context *ctx,
+ struct mlx5hws_action_ste_table *action_tbl,
+ enum mlx5hws_pool_optimize opt, size_t log_sz)
+{
+ struct mlx5hws_pool_attr pool_attr = { 0 };
+
+ pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
+ pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB;
+ pool_attr.flags = MLX5HWS_POOL_FLAG_BUDDY;
+ pool_attr.opt_type = opt;
+ pool_attr.alloc_log_sz = log_sz;
+
+ action_tbl->pool = mlx5hws_pool_create(ctx, &pool_attr);
+ if (!action_tbl->pool) {
+ mlx5hws_err(ctx, "Failed to allocate STE pool\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int hws_action_ste_table_create_single_rtc(
+ struct mlx5hws_context *ctx,
+ struct mlx5hws_action_ste_table *action_tbl,
+ enum mlx5hws_pool_optimize opt, size_t log_sz, bool tx)
+{
+ struct mlx5hws_cmd_rtc_create_attr rtc_attr = { 0 };
+ u32 *rtc_id;
+
+ rtc_attr.log_depth = 0;
+ rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET;
+ /* Action STEs use the default always hit definer. */
+ rtc_attr.match_definer_0 = ctx->caps->trivial_match_definer;
+ rtc_attr.is_frst_jumbo = false;
+ rtc_attr.miss_ft_id = 0;
+ rtc_attr.pd = ctx->pd_num;
+ rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx);
+
+ if (tx) {
+ rtc_attr.table_type = FS_FT_FDB_TX;
+ rtc_attr.ste_base =
+ mlx5hws_pool_get_base_mirror_id(action_tbl->pool);
+ rtc_attr.stc_base =
+ mlx5hws_pool_get_base_mirror_id(ctx->stc_pool);
+ rtc_attr.log_size =
+ opt == MLX5HWS_POOL_OPTIMIZE_ORIG ? 0 : log_sz;
+ rtc_id = &action_tbl->rtc_1_id;
+ } else {
+ rtc_attr.table_type = FS_FT_FDB_RX;
+ rtc_attr.ste_base = mlx5hws_pool_get_base_id(action_tbl->pool);
+ rtc_attr.stc_base = mlx5hws_pool_get_base_id(ctx->stc_pool);
+ rtc_attr.log_size =
+ opt == MLX5HWS_POOL_OPTIMIZE_MIRROR ? 0 : log_sz;
+ rtc_id = &action_tbl->rtc_0_id;
+ }
+
+ return mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_id);
+}
+
+static int
+hws_action_ste_table_create_rtcs(struct mlx5hws_context *ctx,
+ struct mlx5hws_action_ste_table *action_tbl,
+ enum mlx5hws_pool_optimize opt, size_t log_sz)
+{
+ int err;
+
+ err = hws_action_ste_table_create_single_rtc(ctx, action_tbl, opt,
+ log_sz, false);
+ if (err)
+ return err;
+
+ err = hws_action_ste_table_create_single_rtc(ctx, action_tbl, opt,
+ log_sz, true);
+ if (err) {
+ mlx5hws_cmd_rtc_destroy(ctx->mdev, action_tbl->rtc_0_id);
+ return err;
+ }
+
+ return 0;
+}
+
+static void
+hws_action_ste_table_destroy_rtcs(struct mlx5hws_action_ste_table *action_tbl)
+{
+ mlx5hws_cmd_rtc_destroy(action_tbl->pool->ctx->mdev,
+ action_tbl->rtc_1_id);
+ mlx5hws_cmd_rtc_destroy(action_tbl->pool->ctx->mdev,
+ action_tbl->rtc_0_id);
+}
+
+static int
+hws_action_ste_table_create_stc(struct mlx5hws_context *ctx,
+ struct mlx5hws_action_ste_table *action_tbl)
+{
+ struct mlx5hws_cmd_stc_modify_attr stc_attr = { 0 };
+
+ stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
+ stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE;
+ stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
+ stc_attr.ste_table.ste_pool = action_tbl->pool;
+ stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer;
+
+ return mlx5hws_action_alloc_single_stc(ctx, &stc_attr,
+ MLX5HWS_TABLE_TYPE_FDB,
+ &action_tbl->stc);
+}
+
+static struct mlx5hws_action_ste_table *
+hws_action_ste_table_alloc(struct mlx5hws_action_ste_pool_element *parent_elem)
+{
+ enum mlx5hws_pool_optimize opt = parent_elem->opt;
+ struct mlx5hws_context *ctx = parent_elem->ctx;
+ struct mlx5hws_action_ste_table *action_tbl;
+ size_t log_sz;
+ int err;
+
+ log_sz = min(parent_elem->log_sz ?
+ parent_elem->log_sz +
+ MLX5HWS_ACTION_STE_TABLE_STEP_LOG_SZ :
+ MLX5HWS_ACTION_STE_TABLE_INIT_LOG_SZ,
+ MLX5HWS_ACTION_STE_TABLE_MAX_LOG_SZ);
+
+ action_tbl = kzalloc(sizeof(*action_tbl), GFP_KERNEL);
+ if (!action_tbl)
+ return ERR_PTR(-ENOMEM);
+
+ err = hws_action_ste_table_create_pool(ctx, action_tbl, opt, log_sz);
+ if (err)
+ goto free_tbl;
+
+ err = hws_action_ste_table_create_rtcs(ctx, action_tbl, opt, log_sz);
+ if (err)
+ goto destroy_pool;
+
+ err = hws_action_ste_table_create_stc(ctx, action_tbl);
+ if (err)
+ goto destroy_rtcs;
+
+ action_tbl->parent_elem = parent_elem;
+ INIT_LIST_HEAD(&action_tbl->list_node);
+ action_tbl->last_used = jiffies;
+ list_add(&action_tbl->list_node, &parent_elem->available);
+ parent_elem->log_sz = log_sz;
+
+ mlx5hws_dbg(ctx,
+ "Allocated %s action STE table log_sz %zu; STEs (%d, %d); RTCs (%d, %d); STC %d\n",
+ hws_pool_opt_to_str(opt), log_sz,
+ mlx5hws_pool_get_base_id(action_tbl->pool),
+ mlx5hws_pool_get_base_mirror_id(action_tbl->pool),
+ action_tbl->rtc_0_id, action_tbl->rtc_1_id,
+ action_tbl->stc.offset);
+
+ return action_tbl;
+
+destroy_rtcs:
+ hws_action_ste_table_destroy_rtcs(action_tbl);
+destroy_pool:
+ mlx5hws_pool_destroy(action_tbl->pool);
+free_tbl:
+ kfree(action_tbl);
+
+ return ERR_PTR(err);
+}
+
+static void
+hws_action_ste_table_destroy(struct mlx5hws_action_ste_table *action_tbl)
+{
+ struct mlx5hws_context *ctx = action_tbl->parent_elem->ctx;
+
+ mlx5hws_dbg(ctx,
+ "Destroying %s action STE table: STEs (%d, %d); RTCs (%d, %d); STC %d\n",
+ hws_pool_opt_to_str(action_tbl->parent_elem->opt),
+ mlx5hws_pool_get_base_id(action_tbl->pool),
+ mlx5hws_pool_get_base_mirror_id(action_tbl->pool),
+ action_tbl->rtc_0_id, action_tbl->rtc_1_id,
+ action_tbl->stc.offset);
+
+ mlx5hws_action_free_single_stc(ctx, MLX5HWS_TABLE_TYPE_FDB,
+ &action_tbl->stc);
+ hws_action_ste_table_destroy_rtcs(action_tbl);
+ mlx5hws_pool_destroy(action_tbl->pool);
+
+ list_del(&action_tbl->list_node);
+ kfree(action_tbl);
+}
+
+static int
+hws_action_ste_pool_element_init(struct mlx5hws_context *ctx,
+ struct mlx5hws_action_ste_pool_element *elem,
+ enum mlx5hws_pool_optimize opt)
+{
+ elem->ctx = ctx;
+ elem->opt = opt;
+ INIT_LIST_HEAD(&elem->available);
+ INIT_LIST_HEAD(&elem->full);
+
+ return 0;
+}
+
+static void hws_action_ste_pool_element_destroy(
+ struct mlx5hws_action_ste_pool_element *elem)
+{
+ struct mlx5hws_action_ste_table *action_tbl, *p;
+
+ /* This should be empty, but attempt to free its elements anyway. */
+ list_for_each_entry_safe(action_tbl, p, &elem->full, list_node)
+ hws_action_ste_table_destroy(action_tbl);
+
+ list_for_each_entry_safe(action_tbl, p, &elem->available, list_node)
+ hws_action_ste_table_destroy(action_tbl);
+}
+
+static int hws_action_ste_pool_init(struct mlx5hws_context *ctx,
+ struct mlx5hws_action_ste_pool *pool)
+{
+ enum mlx5hws_pool_optimize opt;
+ int err;
+
+ mutex_init(&pool->lock);
+
+ /* Rules which are added for both RX and TX must use the same action STE
+ * indices for both. If we were to use a single table, then RX-only and
+ * TX-only rules would waste the unused entries. Thus, we use separate
+ * table sets for the three cases.
+ */
+ for (opt = MLX5HWS_POOL_OPTIMIZE_NONE; opt < MLX5HWS_POOL_OPTIMIZE_MAX;
+ opt++) {
+ err = hws_action_ste_pool_element_init(ctx, &pool->elems[opt],
+ opt);
+ if (err)
+ goto destroy_elems;
+ pool->elems[opt].parent_pool = pool;
+ }
+
+ return 0;
+
+destroy_elems:
+ while (opt-- > MLX5HWS_POOL_OPTIMIZE_NONE)
+ hws_action_ste_pool_element_destroy(&pool->elems[opt]);
+
+ return err;
+}
+
+static void hws_action_ste_pool_destroy(struct mlx5hws_action_ste_pool *pool)
+{
+ int opt;
+
+ for (opt = MLX5HWS_POOL_OPTIMIZE_MAX - 1;
+ opt >= MLX5HWS_POOL_OPTIMIZE_NONE; opt--)
+ hws_action_ste_pool_element_destroy(&pool->elems[opt]);
+}
+
+static void hws_action_ste_pool_element_collect_stale(
+ struct mlx5hws_action_ste_pool_element *elem, struct list_head *cleanup)
+{
+ struct mlx5hws_action_ste_table *action_tbl, *p;
+ unsigned long expire_time, now;
+
+ expire_time = secs_to_jiffies(MLX5HWS_ACTION_STE_POOL_EXPIRE_SECONDS);
+ now = jiffies;
+
+ list_for_each_entry_safe(action_tbl, p, &elem->available, list_node) {
+ if (mlx5hws_pool_full(action_tbl->pool) &&
+ time_before(action_tbl->last_used + expire_time, now))
+ list_move(&action_tbl->list_node, cleanup);
+ }
+}
+
+static void hws_action_ste_table_cleanup_list(struct list_head *cleanup)
+{
+ struct mlx5hws_action_ste_table *action_tbl, *p;
+
+ list_for_each_entry_safe(action_tbl, p, cleanup, list_node)
+ hws_action_ste_table_destroy(action_tbl);
+}
+
+static void hws_action_ste_pool_cleanup(struct work_struct *work)
+{
+ enum mlx5hws_pool_optimize opt;
+ struct mlx5hws_context *ctx;
+ LIST_HEAD(cleanup);
+ int i;
+
+ ctx = container_of(work, struct mlx5hws_context,
+ action_ste_cleanup.work);
+
+ for (i = 0; i < ctx->queues; i++) {
+ struct mlx5hws_action_ste_pool *p = &ctx->action_ste_pool[i];
+
+ mutex_lock(&p->lock);
+ for (opt = MLX5HWS_POOL_OPTIMIZE_NONE;
+ opt < MLX5HWS_POOL_OPTIMIZE_MAX; opt++)
+ hws_action_ste_pool_element_collect_stale(
+ &p->elems[opt], &cleanup);
+ mutex_unlock(&p->lock);
+ }
+
+ hws_action_ste_table_cleanup_list(&cleanup);
+
+ schedule_delayed_work(&ctx->action_ste_cleanup,
+ secs_to_jiffies(
+ MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS));
+}
+
+int mlx5hws_action_ste_pool_init(struct mlx5hws_context *ctx)
+{
+ struct mlx5hws_action_ste_pool *pool;
+ size_t queues = ctx->queues;
+ int i, err;
+
+ pool = kcalloc(queues, sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return -ENOMEM;
+
+ for (i = 0; i < queues; i++) {
+ err = hws_action_ste_pool_init(ctx, &pool[i]);
+ if (err)
+ goto free_pool;
+ }
+
+ ctx->action_ste_pool = pool;
+
+ INIT_DELAYED_WORK(&ctx->action_ste_cleanup,
+ hws_action_ste_pool_cleanup);
+ schedule_delayed_work(
+ &ctx->action_ste_cleanup,
+ secs_to_jiffies(MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS));
+
+ return 0;
+
+free_pool:
+ while (i--)
+ hws_action_ste_pool_destroy(&pool[i]);
+ kfree(pool);
+
+ return err;
+}
+
+void mlx5hws_action_ste_pool_uninit(struct mlx5hws_context *ctx)
+{
+ size_t queues = ctx->queues;
+ int i;
+
+ cancel_delayed_work_sync(&ctx->action_ste_cleanup);
+
+ for (i = 0; i < queues; i++)
+ hws_action_ste_pool_destroy(&ctx->action_ste_pool[i]);
+
+ kfree(ctx->action_ste_pool);
+}
+
+static struct mlx5hws_action_ste_pool_element *
+hws_action_ste_choose_elem(struct mlx5hws_action_ste_pool *pool,
+ bool skip_rx, bool skip_tx)
+{
+ if (skip_rx)
+ return &pool->elems[MLX5HWS_POOL_OPTIMIZE_MIRROR];
+
+ if (skip_tx)
+ return &pool->elems[MLX5HWS_POOL_OPTIMIZE_ORIG];
+
+ return &pool->elems[MLX5HWS_POOL_OPTIMIZE_NONE];
+}
+
+static int
+hws_action_ste_table_chunk_alloc(struct mlx5hws_action_ste_table *action_tbl,
+ struct mlx5hws_action_ste_chunk *chunk)
+{
+ int err;
+
+ err = mlx5hws_pool_chunk_alloc(action_tbl->pool, &chunk->ste);
+ if (err)
+ return err;
+
+ chunk->action_tbl = action_tbl;
+ action_tbl->last_used = jiffies;
+
+ return 0;
+}
+
+int mlx5hws_action_ste_chunk_alloc(struct mlx5hws_action_ste_pool *pool,
+ bool skip_rx, bool skip_tx,
+ struct mlx5hws_action_ste_chunk *chunk)
+{
+ struct mlx5hws_action_ste_pool_element *elem;
+ struct mlx5hws_action_ste_table *action_tbl;
+ bool found;
+ int err;
+
+ if (skip_rx && skip_tx)
+ return -EINVAL;
+
+ mutex_lock(&pool->lock);
+
+ elem = hws_action_ste_choose_elem(pool, skip_rx, skip_tx);
+
+ mlx5hws_dbg(elem->ctx,
+ "Allocating action STEs skip_rx %d skip_tx %d order %d\n",
+ skip_rx, skip_tx, chunk->ste.order);
+
+ found = false;
+ list_for_each_entry(action_tbl, &elem->available, list_node) {
+ if (!hws_action_ste_table_chunk_alloc(action_tbl, chunk)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ action_tbl = hws_action_ste_table_alloc(elem);
+ if (IS_ERR(action_tbl)) {
+ err = PTR_ERR(action_tbl);
+ goto out;
+ }
+
+ err = hws_action_ste_table_chunk_alloc(action_tbl, chunk);
+ if (err)
+ goto out;
+ }
+
+ if (mlx5hws_pool_empty(action_tbl->pool))
+ list_move(&action_tbl->list_node, &elem->full);
+
+ err = 0;
+
+out:
+ mutex_unlock(&pool->lock);
+
+ return err;
+}
+
+void mlx5hws_action_ste_chunk_free(struct mlx5hws_action_ste_chunk *chunk)
+{
+ struct mutex *lock = &chunk->action_tbl->parent_elem->parent_pool->lock;
+
+ mlx5hws_dbg(chunk->action_tbl->pool->ctx,
+ "Freeing action STEs offset %d order %d\n",
+ chunk->ste.offset, chunk->ste.order);
+
+ mutex_lock(lock);
+ mlx5hws_pool_chunk_free(chunk->action_tbl->pool, &chunk->ste);
+ chunk->action_tbl->last_used = jiffies;
+ list_move(&chunk->action_tbl->list_node,
+ &chunk->action_tbl->parent_elem->available);
+ mutex_unlock(lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h
new file mode 100644
index 000000000000..a8ba97359e31
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action_ste_pool.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */
+
+#ifndef ACTION_STE_POOL_H_
+#define ACTION_STE_POOL_H_
+
+#define MLX5HWS_ACTION_STE_TABLE_INIT_LOG_SZ 10
+#define MLX5HWS_ACTION_STE_TABLE_STEP_LOG_SZ 1
+#define MLX5HWS_ACTION_STE_TABLE_MAX_LOG_SZ 20
+
+#define MLX5HWS_ACTION_STE_POOL_CLEANUP_SECONDS 300
+#define MLX5HWS_ACTION_STE_POOL_EXPIRE_SECONDS 300
+
+struct mlx5hws_action_ste_pool_element;
+
+struct mlx5hws_action_ste_table {
+ struct mlx5hws_action_ste_pool_element *parent_elem;
+ /* Wraps the RTC and STE range for this given action. */
+ struct mlx5hws_pool *pool;
+ /* Match STEs use this STC to jump to this pool's RTC. */
+ struct mlx5hws_pool_chunk stc;
+ u32 rtc_0_id;
+ u32 rtc_1_id;
+ struct list_head list_node;
+ unsigned long last_used;
+};
+
+struct mlx5hws_action_ste_pool_element {
+ struct mlx5hws_context *ctx;
+ struct mlx5hws_action_ste_pool *parent_pool;
+ size_t log_sz; /* Size of the largest table so far. */
+ enum mlx5hws_pool_optimize opt;
+ struct list_head available;
+ struct list_head full;
+};
+
+/* Central repository of action STEs. The context contains one of these pools
+ * per queue.
+ */
+struct mlx5hws_action_ste_pool {
+ /* Protects the entire pool. We have one pool per queue and only one
+ * operation can be active per rule at a given time. Thus this lock
+ * protects solely against concurrent garbage collection and we expect
+ * very little contention.
+ */
+ struct mutex lock;
+ struct mlx5hws_action_ste_pool_element elems[MLX5HWS_POOL_OPTIMIZE_MAX];
+};
+
+/* A chunk of STEs and the table it was allocated from. Used by rules. */
+struct mlx5hws_action_ste_chunk {
+ struct mlx5hws_action_ste_table *action_tbl;
+ struct mlx5hws_pool_chunk ste;
+};
+
+int mlx5hws_action_ste_pool_init(struct mlx5hws_context *ctx);
+
+void mlx5hws_action_ste_pool_uninit(struct mlx5hws_context *ctx);
+
+/* Callers are expected to fill chunk->ste.order. On success, this function
+ * populates chunk->tbl and chunk->ste.offset.
+ */
+int mlx5hws_action_ste_chunk_alloc(struct mlx5hws_action_ste_pool *pool,
+ bool skip_rx, bool skip_tx,
+ struct mlx5hws_action_ste_chunk *chunk);
+
+void mlx5hws_action_ste_chunk_free(struct mlx5hws_action_ste_chunk *chunk);
+
+#endif /* ACTION_STE_POOL_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
index 19dce1ba512d..510bfbbe5991 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
@@ -90,13 +90,19 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher,
bwc_matcher->priority = priority;
bwc_matcher->size_log = MLX5HWS_BWC_MATCHER_INIT_SIZE_LOG;
+ bwc_matcher->size_of_at_array = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM;
+ bwc_matcher->at = kcalloc(bwc_matcher->size_of_at_array,
+ sizeof(*bwc_matcher->at), GFP_KERNEL);
+ if (!bwc_matcher->at)
+ goto free_bwc_matcher_rules;
+
/* create dummy action template */
bwc_matcher->at[0] =
mlx5hws_action_template_create(action_types ?
action_types : init_action_types);
if (!bwc_matcher->at[0]) {
mlx5hws_err(table->ctx, "BWC matcher: failed creating action template\n");
- goto free_bwc_matcher_rules;
+ goto free_bwc_matcher_at_array;
}
bwc_matcher->num_of_at = 1;
@@ -126,6 +132,8 @@ free_mt:
mlx5hws_match_template_destroy(bwc_matcher->mt);
free_at:
mlx5hws_action_template_destroy(bwc_matcher->at[0]);
+free_bwc_matcher_at_array:
+ kfree(bwc_matcher->at);
free_bwc_matcher_rules:
kfree(bwc_matcher->rules);
err:
@@ -192,6 +200,7 @@ int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher)
for (i = 0; i < bwc_matcher->num_of_at; i++)
mlx5hws_action_template_destroy(bwc_matcher->at[i]);
+ kfree(bwc_matcher->at);
mlx5hws_match_template_destroy(bwc_matcher->mt);
kfree(bwc_matcher->rules);
@@ -469,21 +478,9 @@ hws_bwc_matcher_size_maxed_out(struct mlx5hws_bwc_matcher *bwc_matcher)
struct mlx5hws_cmd_query_caps *caps = bwc_matcher->matcher->tbl->ctx->caps;
/* check the match RTC size */
- if ((bwc_matcher->size_log +
- MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH +
- MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP) >
- (caps->ste_alloc_log_max - 1))
- return true;
-
- /* check the action RTC size */
- if ((bwc_matcher->size_log +
- MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP +
- ilog2(roundup_pow_of_two(bwc_matcher->matcher->action_ste.max_stes)) +
- MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT) >
- (caps->ste_alloc_log_max - 1))
- return true;
-
- return false;
+ return (bwc_matcher->size_log + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH +
+ MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP) >
+ (caps->ste_alloc_log_max - 1);
}
static bool
@@ -520,6 +517,23 @@ hws_bwc_matcher_extend_at(struct mlx5hws_bwc_matcher *bwc_matcher,
struct mlx5hws_rule_action rule_actions[])
{
enum mlx5hws_action_type action_types[MLX5HWS_BWC_MAX_ACTS];
+ void *p;
+
+ if (unlikely(bwc_matcher->num_of_at >= bwc_matcher->size_of_at_array)) {
+ if (bwc_matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT)
+ return -ENOMEM;
+ bwc_matcher->size_of_at_array *= 2;
+ p = krealloc(bwc_matcher->at,
+ bwc_matcher->size_of_at_array *
+ sizeof(*bwc_matcher->at),
+ __GFP_ZERO | GFP_KERNEL);
+ if (!p) {
+ bwc_matcher->size_of_at_array /= 2;
+ return -ENOMEM;
+ }
+
+ bwc_matcher->at = p;
+ }
hws_bwc_rule_actions_to_action_types(rule_actions, action_types);
@@ -753,19 +767,6 @@ hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher)
return hws_bwc_matcher_move(bwc_matcher);
}
-static int
-hws_bwc_matcher_rehash_at(struct mlx5hws_bwc_matcher *bwc_matcher)
-{
- /* Rehash by action template doesn't require any additional checking.
- * The bwc_matcher already contains the new action template.
- * Just do the usual rehash:
- * - create new matcher
- * - move all the rules to the new matcher
- * - destroy the old matcher
- */
- return hws_bwc_matcher_move(bwc_matcher);
-}
-
int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
u32 *match_param,
struct mlx5hws_rule_action rule_actions[],
@@ -805,23 +806,8 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher,
bwc_matcher->at[at_idx]);
if (unlikely(ret)) {
- /* Action template attach failed, possibly due to
- * requiring more action STEs.
- * Need to attempt creating new matcher with all
- * the action templates, including the new one.
- */
- ret = hws_bwc_matcher_rehash_at(bwc_matcher);
- if (unlikely(ret)) {
- mlx5hws_action_template_destroy(bwc_matcher->at[at_idx]);
- bwc_matcher->at[at_idx] = NULL;
- bwc_matcher->num_of_at--;
-
- hws_bwc_unlock_all_queues(ctx);
-
- mlx5hws_err(ctx,
- "BWC rule insertion: rehash AT failed (%d)\n", ret);
- return ret;
- }
+ hws_bwc_unlock_all_queues(ctx);
+ return ret;
}
hws_bwc_unlock_all_queues(ctx);
@@ -975,24 +961,8 @@ hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule,
ret = mlx5hws_matcher_attach_at(bwc_matcher->matcher,
bwc_matcher->at[at_idx]);
if (unlikely(ret)) {
- /* Action template attach failed, possibly due to
- * requiring more action STEs.
- * Need to attempt creating new matcher with all
- * the action templates, including the new one.
- */
- ret = hws_bwc_matcher_rehash_at(bwc_matcher);
- if (unlikely(ret)) {
- mlx5hws_action_template_destroy(bwc_matcher->at[at_idx]);
- bwc_matcher->at[at_idx] = NULL;
- bwc_matcher->num_of_at--;
-
- hws_bwc_unlock_all_queues(ctx);
-
- mlx5hws_err(ctx,
- "BWC rule update: rehash AT failed (%d)\n",
- ret);
- return ret;
- }
+ hws_bwc_unlock_all_queues(ctx);
+ return ret;
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
index 47f7ed141553..bb0cf4b922ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
@@ -10,9 +10,7 @@
#define MLX5HWS_BWC_MATCHER_REHASH_BURST_TH 32
/* Max number of AT attach operations for the same matcher.
- * When the limit is reached, next attempt to attach new AT
- * will result in creation of a new matcher and moving all
- * the rules to this matcher.
+ * When the limit is reached, a larger buffer is allocated for the ATs.
*/
#define MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM 8
@@ -23,10 +21,11 @@
struct mlx5hws_bwc_matcher {
struct mlx5hws_matcher *matcher;
struct mlx5hws_match_template *mt;
- struct mlx5hws_action_template *at[MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM];
- u32 priority;
+ struct mlx5hws_action_template **at;
u8 num_of_at;
+ u8 size_of_at_array;
u8 size_log;
+ u32 priority;
atomic_t num_of_rules;
struct list_head *rules;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c
index e8f98c109b99..9c83753e4592 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c
@@ -406,7 +406,6 @@ int mlx5hws_cmd_rtc_create(struct mlx5_core_dev *mdev,
MLX5_SET(rtc, attr, match_definer_1, rtc_attr->match_definer_1);
MLX5_SET(rtc, attr, stc_id, rtc_attr->stc_base);
MLX5_SET(rtc, attr, ste_table_base_id, rtc_attr->ste_base);
- MLX5_SET(rtc, attr, ste_table_offset, rtc_attr->ste_offset);
MLX5_SET(rtc, attr, miss_flow_table_id, rtc_attr->miss_ft_id);
MLX5_SET(rtc, attr, reparse_mode, rtc_attr->reparse_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h
index 51d9e0291ac1..fa6bff210266 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h
@@ -70,7 +70,6 @@ struct mlx5hws_cmd_rtc_create_attr {
u32 pd;
u32 stc_base;
u32 ste_base;
- u32 ste_offset;
u32 miss_ft_id;
bool fw_gen_wqe;
u8 update_index_mode;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c
index 9cda2774fd64..428dae869706 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c
@@ -34,7 +34,6 @@ static int hws_context_pools_init(struct mlx5hws_context *ctx)
/* Create an STC pool per FT type */
pool_attr.pool_type = MLX5HWS_POOL_TYPE_STC;
- pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STC_POOL;
max_log_sz = min(MLX5HWS_POOL_STC_LOG_SZ, ctx->caps->stc_alloc_log_max);
pool_attr.alloc_log_sz = max(max_log_sz, ctx->caps->stc_alloc_log_gran);
@@ -159,10 +158,16 @@ static int hws_context_init_hws(struct mlx5hws_context *ctx,
if (ret)
goto pools_uninit;
+ ret = mlx5hws_action_ste_pool_init(ctx);
+ if (ret)
+ goto close_queues;
+
INIT_LIST_HEAD(&ctx->tbl_list);
return 0;
+close_queues:
+ mlx5hws_send_queues_close(ctx);
pools_uninit:
hws_context_pools_uninit(ctx);
uninit_pd:
@@ -175,6 +180,7 @@ static void hws_context_uninit_hws(struct mlx5hws_context *ctx)
if (!(ctx->flags & MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT))
return;
+ mlx5hws_action_ste_pool_uninit(ctx);
mlx5hws_send_queues_close(ctx);
hws_context_pools_uninit(ctx);
hws_context_uninit_pd(ctx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h
index 38c3647444ad..3f8938c73dc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h
@@ -39,6 +39,8 @@ struct mlx5hws_context {
struct mlx5hws_cmd_query_caps *caps;
u32 pd_num;
struct mlx5hws_pool *stc_pool;
+ struct mlx5hws_action_ste_pool *action_ste_pool; /* One per queue */
+ struct delayed_work action_ste_cleanup;
struct mlx5hws_context_common_res common_res;
struct mlx5hws_pattern_cache *pattern_cache;
struct mlx5hws_definer_cache *definer_cache;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
index 696275fd0ce2..91568d6c1dac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
@@ -118,7 +118,6 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma
{
enum mlx5hws_table_type tbl_type = matcher->tbl->type;
struct mlx5hws_cmd_ft_query_attr ft_attr = {0};
- struct mlx5hws_pool_chunk *ste;
struct mlx5hws_pool *ste_pool;
u64 icm_addr_0 = 0;
u64 icm_addr_1 = 0;
@@ -134,12 +133,11 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma
matcher->end_ft_id,
matcher->col_matcher ? HWS_PTR_TO_ID(matcher->col_matcher) : 0);
- ste = &matcher->match_ste.ste;
ste_pool = matcher->match_ste.pool;
if (ste_pool) {
- ste_0_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste);
+ ste_0_id = mlx5hws_pool_get_base_id(ste_pool);
if (tbl_type == MLX5HWS_TABLE_TYPE_FDB)
- ste_1_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste);
+ ste_1_id = mlx5hws_pool_get_base_mirror_id(ste_pool);
}
seq_printf(f, ",%d,%d,%d,%d",
@@ -148,19 +146,6 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma
matcher->match_ste.rtc_1_id,
(int)ste_1_id);
- ste = &matcher->action_ste.ste;
- ste_pool = matcher->action_ste.pool;
- if (ste_pool) {
- ste_0_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste);
- if (tbl_type == MLX5HWS_TABLE_TYPE_FDB)
- ste_1_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste);
- else
- ste_1_id = -1;
- } else {
- ste_0_id = -1;
- ste_1_id = -1;
- }
-
ft_attr.type = matcher->tbl->fw_ft_type;
ret = mlx5hws_cmd_flow_table_query(matcher->tbl->ctx->mdev,
matcher->end_ft_id,
@@ -170,10 +155,7 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma
if (ret)
return ret;
- seq_printf(f, ",%d,%d,%d,%d,%d,0x%llx,0x%llx\n",
- matcher->action_ste.rtc_0_id, (int)ste_0_id,
- matcher->action_ste.rtc_1_id, (int)ste_1_id,
- 0,
+ seq_printf(f, ",-1,-1,-1,-1,0,0x%llx,0x%llx\n",
mlx5hws_debug_icm_to_idx(icm_addr_0),
mlx5hws_debug_icm_to_idx(icm_addr_1));
@@ -387,14 +369,17 @@ static int hws_debug_dump_context_stc(struct seq_file *f, struct mlx5hws_context
if (!stc_pool)
return 0;
- if (stc_pool->resource[0]) {
- ret = hws_debug_dump_context_stc_resource(f, ctx, stc_pool->resource[0]);
+ if (stc_pool->resource) {
+ ret = hws_debug_dump_context_stc_resource(f, ctx,
+ stc_pool->resource);
if (ret)
return ret;
}
- if (stc_pool->mirror_resource[0]) {
- ret = hws_debug_dump_context_stc_resource(f, ctx, stc_pool->mirror_resource[0]);
+ if (stc_pool->mirror_resource) {
+ struct mlx5hws_pool_resource *res = stc_pool->mirror_resource;
+
+ ret = hws_debug_dump_context_stc_resource(f, ctx, res);
if (ret)
return ret;
}
@@ -402,10 +387,41 @@ static int hws_debug_dump_context_stc(struct seq_file *f, struct mlx5hws_context
return 0;
}
+static void
+hws_debug_dump_action_ste_table(struct seq_file *f,
+ struct mlx5hws_action_ste_table *action_tbl)
+{
+ int ste_0_id = mlx5hws_pool_get_base_id(action_tbl->pool);
+ int ste_1_id = mlx5hws_pool_get_base_mirror_id(action_tbl->pool);
+
+ seq_printf(f, "%d,0x%llx,%d,%d,%d,%d\n",
+ MLX5HWS_DEBUG_RES_TYPE_ACTION_STE_TABLE,
+ HWS_PTR_TO_ID(action_tbl),
+ action_tbl->rtc_0_id, ste_0_id,
+ action_tbl->rtc_1_id, ste_1_id);
+}
+
+static void hws_debug_dump_action_ste_pool(struct seq_file *f,
+ struct mlx5hws_action_ste_pool *pool)
+{
+ struct mlx5hws_action_ste_table *action_tbl;
+ enum mlx5hws_pool_optimize opt;
+
+ mutex_lock(&pool->lock);
+ for (opt = MLX5HWS_POOL_OPTIMIZE_NONE; opt < MLX5HWS_POOL_OPTIMIZE_MAX;
+ opt++) {
+ list_for_each_entry(action_tbl, &pool->elems[opt].available,
+ list_node) {
+ hws_debug_dump_action_ste_table(f, action_tbl);
+ }
+ }
+ mutex_unlock(&pool->lock);
+}
+
static int hws_debug_dump_context(struct seq_file *f, struct mlx5hws_context *ctx)
{
struct mlx5hws_table *tbl;
- int ret;
+ int ret, i;
ret = hws_debug_dump_context_info(f, ctx);
if (ret)
@@ -425,6 +441,9 @@ static int hws_debug_dump_context(struct seq_file *f, struct mlx5hws_context *ct
return ret;
}
+ for (i = 0; i < ctx->queues; i++)
+ hws_debug_dump_action_ste_pool(f, &ctx->action_ste_pool[i]);
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h
index e44e7ae28f93..89c396f9f266 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.h
@@ -26,6 +26,8 @@ enum mlx5hws_debug_res_type {
MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_HASH_DEFINER = 4205,
MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_RANGE_DEFINER = 4206,
MLX5HWS_DEBUG_RES_TYPE_MATCHER_TEMPLATE_COMPARE_MATCH_DEFINER = 4207,
+
+ MLX5HWS_DEBUG_RES_TYPE_ACTION_STE_TABLE = 4300,
};
static inline u64
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h
index 30ccd635b505..21279d503117 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h
@@ -17,6 +17,7 @@
#include "context.h"
#include "table.h"
#include "send.h"
+#include "action_ste_pool.h"
#include "rule.h"
#include "cmd.h"
#include "action.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
index b61864b32053..716502732d3d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
@@ -3,25 +3,6 @@
#include "internal.h"
-enum mlx5hws_matcher_rtc_type {
- HWS_MATCHER_RTC_TYPE_MATCH,
- HWS_MATCHER_RTC_TYPE_STE_ARRAY,
- HWS_MATCHER_RTC_TYPE_MAX,
-};
-
-static const char * const mlx5hws_matcher_rtc_type_str[] = {
- [HWS_MATCHER_RTC_TYPE_MATCH] = "MATCH",
- [HWS_MATCHER_RTC_TYPE_STE_ARRAY] = "STE_ARRAY",
- [HWS_MATCHER_RTC_TYPE_MAX] = "UNKNOWN",
-};
-
-static const char *hws_matcher_rtc_type_to_str(enum mlx5hws_matcher_rtc_type rtc_type)
-{
- if (rtc_type > HWS_MATCHER_RTC_TYPE_MAX)
- rtc_type = HWS_MATCHER_RTC_TYPE_MAX;
- return mlx5hws_matcher_rtc_type_str[rtc_type];
-}
-
static bool hws_matcher_requires_col_tbl(u8 log_num_of_rules)
{
/* Collision table concatenation is done only for large rule tables */
@@ -197,149 +178,96 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher)
static void hws_matcher_set_rtc_attr_sz(struct mlx5hws_matcher *matcher,
struct mlx5hws_cmd_rtc_create_attr *rtc_attr,
- enum mlx5hws_matcher_rtc_type rtc_type,
bool is_mirror)
{
- struct mlx5hws_pool_chunk *ste = &matcher->action_ste.ste;
enum mlx5hws_matcher_flow_src flow_src = matcher->attr.optimize_flow_src;
- bool is_match_rtc = rtc_type == HWS_MATCHER_RTC_TYPE_MATCH;
if ((flow_src == MLX5HWS_MATCHER_FLOW_SRC_VPORT && !is_mirror) ||
(flow_src == MLX5HWS_MATCHER_FLOW_SRC_WIRE && is_mirror)) {
/* Optimize FDB RTC */
rtc_attr->log_size = 0;
rtc_attr->log_depth = 0;
- } else {
- /* Keep original values */
- rtc_attr->log_size = is_match_rtc ? matcher->attr.table.sz_row_log : ste->order;
- rtc_attr->log_depth = is_match_rtc ? matcher->attr.table.sz_col_log : 0;
}
}
-static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
- enum mlx5hws_matcher_rtc_type rtc_type)
+static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher)
{
struct mlx5hws_matcher_attr *attr = &matcher->attr;
struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0};
struct mlx5hws_match_template *mt = matcher->mt;
struct mlx5hws_context *ctx = matcher->tbl->ctx;
- struct mlx5hws_action_default_stc *default_stc;
- struct mlx5hws_matcher_action_ste *action_ste;
struct mlx5hws_table *tbl = matcher->tbl;
- struct mlx5hws_pool *ste_pool, *stc_pool;
- struct mlx5hws_pool_chunk *ste;
- u32 *rtc_0_id, *rtc_1_id;
u32 obj_id;
int ret;
- switch (rtc_type) {
- case HWS_MATCHER_RTC_TYPE_MATCH:
- rtc_0_id = &matcher->match_ste.rtc_0_id;
- rtc_1_id = &matcher->match_ste.rtc_1_id;
- ste_pool = matcher->match_ste.pool;
- ste = &matcher->match_ste.ste;
- ste->order = attr->table.sz_col_log + attr->table.sz_row_log;
-
- rtc_attr.log_size = attr->table.sz_row_log;
- rtc_attr.log_depth = attr->table.sz_col_log;
- rtc_attr.is_frst_jumbo = mlx5hws_matcher_mt_is_jumbo(mt);
- rtc_attr.is_scnd_range = 0;
- rtc_attr.miss_ft_id = matcher->end_ft_id;
-
- if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) {
- /* The usual Hash Table */
- rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH;
-
- /* The first mt is used since all share the same definer */
- rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer);
- } else if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) {
- rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET;
- rtc_attr.num_hash_definer = 1;
-
- if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH) {
- /* Hash Split Table */
- rtc_attr.access_index_mode = MLX5_IFC_RTC_STE_ACCESS_MODE_BY_HASH;
- rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer);
- } else if (attr->distribute_mode == MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR) {
- /* Linear Lookup Table */
- rtc_attr.access_index_mode = MLX5_IFC_RTC_STE_ACCESS_MODE_LINEAR;
- rtc_attr.match_definer_0 = ctx->caps->linear_match_definer;
- }
- }
-
- /* Match pool requires implicit allocation */
- ret = mlx5hws_pool_chunk_alloc(ste_pool, ste);
- if (ret) {
- mlx5hws_err(ctx, "Failed to allocate STE for %s RTC",
- hws_matcher_rtc_type_to_str(rtc_type));
- return ret;
+ rtc_attr.log_size = attr->table.sz_row_log;
+ rtc_attr.log_depth = attr->table.sz_col_log;
+ rtc_attr.is_frst_jumbo = mlx5hws_matcher_mt_is_jumbo(mt);
+ rtc_attr.is_scnd_range = 0;
+ rtc_attr.miss_ft_id = matcher->end_ft_id;
+
+ if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_HASH) {
+ /* The usual Hash Table */
+ rtc_attr.update_index_mode =
+ MLX5_IFC_RTC_STE_UPDATE_MODE_BY_HASH;
+
+ /* The first mt is used since all share the same definer */
+ rtc_attr.match_definer_0 = mlx5hws_definer_get_id(mt->definer);
+ } else if (attr->insert_mode == MLX5HWS_MATCHER_INSERT_BY_INDEX) {
+ rtc_attr.update_index_mode =
+ MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET;
+ rtc_attr.num_hash_definer = 1;
+
+ if (attr->distribute_mode ==
+ MLX5HWS_MATCHER_DISTRIBUTE_BY_HASH) {
+ /* Hash Split Table */
+ rtc_attr.access_index_mode =
+ MLX5_IFC_RTC_STE_ACCESS_MODE_BY_HASH;
+ rtc_attr.match_definer_0 =
+ mlx5hws_definer_get_id(mt->definer);
+ } else if (attr->distribute_mode ==
+ MLX5HWS_MATCHER_DISTRIBUTE_BY_LINEAR) {
+ /* Linear Lookup Table */
+ rtc_attr.access_index_mode =
+ MLX5_IFC_RTC_STE_ACCESS_MODE_LINEAR;
+ rtc_attr.match_definer_0 =
+ ctx->caps->linear_match_definer;
}
- break;
-
- case HWS_MATCHER_RTC_TYPE_STE_ARRAY:
- action_ste = &matcher->action_ste;
-
- rtc_0_id = &action_ste->rtc_0_id;
- rtc_1_id = &action_ste->rtc_1_id;
- ste_pool = action_ste->pool;
- ste = &action_ste->ste;
- /* Action RTC size calculation:
- * log((max number of rules in matcher) *
- * (max number of action STEs per rule) *
- * (2 to support writing new STEs for update rule))
- */
- ste->order = ilog2(roundup_pow_of_two(action_ste->max_stes)) +
- attr->table.sz_row_log +
- MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT;
- rtc_attr.log_size = ste->order;
- rtc_attr.log_depth = 0;
- rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET;
- /* The action STEs use the default always hit definer */
- rtc_attr.match_definer_0 = ctx->caps->trivial_match_definer;
- rtc_attr.is_frst_jumbo = false;
- rtc_attr.miss_ft_id = 0;
- break;
-
- default:
- mlx5hws_err(ctx, "HWS Invalid RTC type\n");
- return -EINVAL;
}
- obj_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste);
+ obj_id = mlx5hws_pool_get_base_id(matcher->match_ste.pool);
rtc_attr.pd = ctx->pd_num;
rtc_attr.ste_base = obj_id;
- rtc_attr.ste_offset = ste->offset;
rtc_attr.reparse_mode = mlx5hws_context_get_reparse_mode(ctx);
rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, false);
- hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, false);
+ hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, false);
/* STC is a single resource (obj_id), use any STC for the ID */
- stc_pool = ctx->stc_pool;
- default_stc = ctx->common_res.default_stc;
- obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit);
+ obj_id = mlx5hws_pool_get_base_id(ctx->stc_pool);
rtc_attr.stc_base = obj_id;
- ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_0_id);
+ ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr,
+ &matcher->match_ste.rtc_0_id);
if (ret) {
- mlx5hws_err(ctx, "Failed to create matcher RTC of type %s",
- hws_matcher_rtc_type_to_str(rtc_type));
- goto free_ste;
+ mlx5hws_err(ctx, "Failed to create matcher RTC\n");
+ return ret;
}
if (tbl->type == MLX5HWS_TABLE_TYPE_FDB) {
- obj_id = mlx5hws_pool_chunk_get_base_mirror_id(ste_pool, ste);
+ obj_id = mlx5hws_pool_get_base_mirror_id(
+ matcher->match_ste.pool);
rtc_attr.ste_base = obj_id;
rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(tbl->type, true);
- obj_id = mlx5hws_pool_chunk_get_base_mirror_id(stc_pool, &default_stc->default_hit);
+ obj_id = mlx5hws_pool_get_base_mirror_id(ctx->stc_pool);
rtc_attr.stc_base = obj_id;
- hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, true);
+ hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, true);
- ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr, rtc_1_id);
+ ret = mlx5hws_cmd_rtc_create(ctx->mdev, &rtc_attr,
+ &matcher->match_ste.rtc_1_id);
if (ret) {
- mlx5hws_err(ctx, "Failed to create peer matcher RTC of type %s",
- hws_matcher_rtc_type_to_str(rtc_type));
+ mlx5hws_err(ctx, "Failed to create mirror matcher RTC\n");
goto destroy_rtc_0;
}
}
@@ -347,46 +275,18 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
return 0;
destroy_rtc_0:
- mlx5hws_cmd_rtc_destroy(ctx->mdev, *rtc_0_id);
-free_ste:
- if (rtc_type == HWS_MATCHER_RTC_TYPE_MATCH)
- mlx5hws_pool_chunk_free(ste_pool, ste);
+ mlx5hws_cmd_rtc_destroy(ctx->mdev, matcher->match_ste.rtc_0_id);
return ret;
}
-static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher,
- enum mlx5hws_matcher_rtc_type rtc_type)
+static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher)
{
- struct mlx5hws_matcher_action_ste *action_ste;
- struct mlx5hws_table *tbl = matcher->tbl;
- struct mlx5hws_pool_chunk *ste;
- struct mlx5hws_pool *ste_pool;
- u32 rtc_0_id, rtc_1_id;
-
- switch (rtc_type) {
- case HWS_MATCHER_RTC_TYPE_MATCH:
- rtc_0_id = matcher->match_ste.rtc_0_id;
- rtc_1_id = matcher->match_ste.rtc_1_id;
- ste_pool = matcher->match_ste.pool;
- ste = &matcher->match_ste.ste;
- break;
- case HWS_MATCHER_RTC_TYPE_STE_ARRAY:
- action_ste = &matcher->action_ste;
- rtc_0_id = action_ste->rtc_0_id;
- rtc_1_id = action_ste->rtc_1_id;
- ste_pool = action_ste->pool;
- ste = &action_ste->ste;
- break;
- default:
- return;
- }
+ struct mlx5_core_dev *mdev = matcher->tbl->ctx->mdev;
- if (tbl->type == MLX5HWS_TABLE_TYPE_FDB)
- mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_1_id);
+ if (matcher->tbl->type == MLX5HWS_TABLE_TYPE_FDB)
+ mlx5hws_cmd_rtc_destroy(mdev, matcher->match_ste.rtc_1_id);
- mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev, rtc_0_id);
- if (rtc_type == HWS_MATCHER_RTC_TYPE_MATCH)
- mlx5hws_pool_chunk_free(ste_pool, ste);
+ mlx5hws_cmd_rtc_destroy(mdev, matcher->match_ste.rtc_0_id);
}
static int
@@ -454,85 +354,17 @@ static int hws_matcher_check_and_process_at(struct mlx5hws_matcher *matcher,
return 0;
}
-static int hws_matcher_resize_init(struct mlx5hws_matcher *src_matcher)
-{
- struct mlx5hws_matcher_resize_data *resize_data;
-
- resize_data = kzalloc(sizeof(*resize_data), GFP_KERNEL);
- if (!resize_data)
- return -ENOMEM;
-
- resize_data->max_stes = src_matcher->action_ste.max_stes;
-
- resize_data->stc = src_matcher->action_ste.stc;
- resize_data->rtc_0_id = src_matcher->action_ste.rtc_0_id;
- resize_data->rtc_1_id = src_matcher->action_ste.rtc_1_id;
- resize_data->pool = src_matcher->action_ste.max_stes ?
- src_matcher->action_ste.pool : NULL;
-
- /* Place the new resized matcher on the dst matcher's list */
- list_add(&resize_data->list_node, &src_matcher->resize_dst->resize_data);
-
- /* Move all the previous resized matchers to the dst matcher's list */
- while (!list_empty(&src_matcher->resize_data)) {
- resize_data = list_first_entry(&src_matcher->resize_data,
- struct mlx5hws_matcher_resize_data,
- list_node);
- list_del_init(&resize_data->list_node);
- list_add(&resize_data->list_node, &src_matcher->resize_dst->resize_data);
- }
-
- return 0;
-}
-
-static void hws_matcher_resize_uninit(struct mlx5hws_matcher *matcher)
-{
- struct mlx5hws_matcher_resize_data *resize_data;
-
- if (!mlx5hws_matcher_is_resizable(matcher))
- return;
-
- while (!list_empty(&matcher->resize_data)) {
- resize_data = list_first_entry(&matcher->resize_data,
- struct mlx5hws_matcher_resize_data,
- list_node);
- list_del_init(&resize_data->list_node);
-
- if (resize_data->max_stes) {
- mlx5hws_action_free_single_stc(matcher->tbl->ctx,
- matcher->tbl->type,
- &resize_data->stc);
-
- if (matcher->tbl->type == MLX5HWS_TABLE_TYPE_FDB)
- mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev,
- resize_data->rtc_1_id);
-
- mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev,
- resize_data->rtc_0_id);
-
- if (resize_data->pool)
- mlx5hws_pool_destroy(resize_data->pool);
- }
-
- kfree(resize_data);
- }
-}
-
static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher)
{
bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt);
- struct mlx5hws_cmd_stc_modify_attr stc_attr = {0};
- struct mlx5hws_matcher_action_ste *action_ste;
- struct mlx5hws_table *tbl = matcher->tbl;
- struct mlx5hws_pool_attr pool_attr = {0};
- struct mlx5hws_context *ctx = tbl->ctx;
- u32 required_stes;
- u8 max_stes = 0;
+ struct mlx5hws_context *ctx = matcher->tbl->ctx;
+ u8 required_stes, max_stes;
int i, ret;
if (matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)
return 0;
+ max_stes = 0;
for (i = 0; i < matcher->num_of_at; i++) {
struct mlx5hws_action_template *at = &matcher->at[i];
@@ -548,75 +380,9 @@ static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher)
/* Future: Optimize reparse */
}
- /* There are no additional STEs required for matcher */
- if (!max_stes)
- return 0;
-
- matcher->action_ste.max_stes = max_stes;
-
- action_ste = &matcher->action_ste;
-
- /* Allocate action STE mempool */
- pool_attr.table_type = tbl->type;
- pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
- pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL;
- /* Pool size is similar to action RTC size */
- pool_attr.alloc_log_sz = ilog2(roundup_pow_of_two(action_ste->max_stes)) +
- matcher->attr.table.sz_row_log +
- MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT;
- hws_matcher_set_pool_attr(&pool_attr, matcher);
- action_ste->pool = mlx5hws_pool_create(ctx, &pool_attr);
- if (!action_ste->pool) {
- mlx5hws_err(ctx, "Failed to create action ste pool\n");
- return -EINVAL;
- }
-
- /* Allocate action RTC */
- ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY);
- if (ret) {
- mlx5hws_err(ctx, "Failed to create action RTC\n");
- goto free_ste_pool;
- }
-
- /* Allocate STC for jumps to STE */
- stc_attr.action_offset = MLX5HWS_ACTION_OFFSET_HIT;
- stc_attr.action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_STE_TABLE;
- stc_attr.reparse_mode = MLX5_IFC_STC_REPARSE_IGNORE;
- stc_attr.ste_table.ste = action_ste->ste;
- stc_attr.ste_table.ste_pool = action_ste->pool;
- stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer;
-
- ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl->type,
- &action_ste->stc);
- if (ret) {
- mlx5hws_err(ctx, "Failed to create action jump to table STC\n");
- goto free_rtc;
- }
+ matcher->num_of_action_stes = max_stes;
return 0;
-
-free_rtc:
- hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY);
-free_ste_pool:
- mlx5hws_pool_destroy(action_ste->pool);
- return ret;
-}
-
-static void hws_matcher_unbind_at(struct mlx5hws_matcher *matcher)
-{
- struct mlx5hws_matcher_action_ste *action_ste;
- struct mlx5hws_table *tbl = matcher->tbl;
-
- action_ste = &matcher->action_ste;
-
- if (!action_ste->max_stes ||
- matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION ||
- mlx5hws_matcher_is_in_resize(matcher))
- return;
-
- mlx5hws_action_free_single_stc(tbl->ctx, tbl->type, &action_ste->stc);
- hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY);
- mlx5hws_pool_destroy(action_ste->pool);
}
static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher)
@@ -638,7 +404,6 @@ static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher)
/* Create an STE pool per matcher*/
pool_attr.table_type = matcher->tbl->type;
pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
- pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_MATCHER_STE_POOL;
pool_attr.alloc_log_sz = matcher->attr.table.sz_col_log +
matcher->attr.table.sz_row_log;
hws_matcher_set_pool_attr(&pool_attr, matcher);
@@ -761,10 +526,10 @@ static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher)
/* Create matcher end flow table anchor */
ret = hws_matcher_create_end_ft(matcher);
if (ret)
- goto unbind_at;
+ goto unbind_mt;
/* Allocate the RTC for the new matcher */
- ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH);
+ ret = hws_matcher_create_rtc(matcher);
if (ret)
goto destroy_end_ft;
@@ -776,11 +541,9 @@ static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher)
return 0;
destroy_rtc:
- hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH);
+ hws_matcher_destroy_rtc(matcher);
destroy_end_ft:
hws_matcher_destroy_end_ft(matcher);
-unbind_at:
- hws_matcher_unbind_at(matcher);
unbind_mt:
hws_matcher_unbind_mt(matcher);
return ret;
@@ -788,11 +551,9 @@ unbind_mt:
static void hws_matcher_destroy_and_disconnect(struct mlx5hws_matcher *matcher)
{
- hws_matcher_resize_uninit(matcher);
hws_matcher_disconnect(matcher);
- hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH);
+ hws_matcher_destroy_rtc(matcher);
hws_matcher_destroy_end_ft(matcher);
- hws_matcher_unbind_at(matcher);
hws_matcher_unbind_mt(matcher);
}
@@ -814,8 +575,6 @@ hws_matcher_create_col_matcher(struct mlx5hws_matcher *matcher)
if (!col_matcher)
return -ENOMEM;
- INIT_LIST_HEAD(&col_matcher->resize_data);
-
col_matcher->tbl = matcher->tbl;
col_matcher->mt = matcher->mt;
col_matcher->at = matcher->at;
@@ -869,8 +628,6 @@ static int hws_matcher_init(struct mlx5hws_matcher *matcher)
struct mlx5hws_context *ctx = matcher->tbl->ctx;
int ret;
- INIT_LIST_HEAD(&matcher->resize_data);
-
mutex_lock(&ctx->ctrl_lock);
/* Allocate matcher resource and connect to the packet pipe */
@@ -905,18 +662,44 @@ static int hws_matcher_uninit(struct mlx5hws_matcher *matcher)
return 0;
}
+static int hws_matcher_grow_at_array(struct mlx5hws_matcher *matcher)
+{
+ void *p;
+
+ if (matcher->size_of_at_array >= MLX5HWS_MATCHER_MAX_AT)
+ return -ENOMEM;
+
+ matcher->size_of_at_array *= 2;
+ p = krealloc(matcher->at,
+ matcher->size_of_at_array * sizeof(*matcher->at),
+ __GFP_ZERO | GFP_KERNEL);
+ if (!p) {
+ matcher->size_of_at_array /= 2;
+ return -ENOMEM;
+ }
+
+ matcher->at = p;
+
+ return 0;
+}
+
int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher,
struct mlx5hws_action_template *at)
{
bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt);
- struct mlx5hws_context *ctx = matcher->tbl->ctx;
u32 required_stes;
int ret;
- if (!matcher->attr.max_num_of_at_attach) {
- mlx5hws_dbg(ctx, "Num of current at (%d) exceed allowed value\n",
- matcher->num_of_at);
- return -EOPNOTSUPP;
+ if (unlikely(matcher->num_of_at >= matcher->size_of_at_array)) {
+ ret = hws_matcher_grow_at_array(matcher);
+ if (ret)
+ return ret;
+
+ if (matcher->col_matcher) {
+ ret = hws_matcher_grow_at_array(matcher->col_matcher);
+ if (ret)
+ return ret;
+ }
}
ret = hws_matcher_check_and_process_at(matcher, at);
@@ -924,15 +707,11 @@ int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher,
return ret;
required_stes = at->num_of_action_stes - (!is_jumbo || at->only_term);
- if (matcher->action_ste.max_stes < required_stes) {
- mlx5hws_dbg(ctx, "Required STEs [%d] exceeds initial action template STE [%d]\n",
- required_stes, matcher->action_ste.max_stes);
- return -ENOMEM;
- }
+ if (matcher->num_of_action_stes < required_stes)
+ matcher->num_of_action_stes = required_stes;
matcher->at[matcher->num_of_at] = *at;
matcher->num_of_at += 1;
- matcher->attr.max_num_of_at_attach -= 1;
if (matcher->col_matcher)
matcher->col_matcher->num_of_at = matcher->num_of_at;
@@ -960,8 +739,9 @@ hws_matcher_set_templates(struct mlx5hws_matcher *matcher,
if (!matcher->mt)
return -ENOMEM;
- matcher->at = kvcalloc(num_of_at + matcher->attr.max_num_of_at_attach,
- sizeof(*matcher->at),
+ matcher->size_of_at_array =
+ num_of_at + matcher->attr.max_num_of_at_attach;
+ matcher->at = kvcalloc(matcher->size_of_at_array, sizeof(*matcher->at),
GFP_KERNEL);
if (!matcher->at) {
mlx5hws_err(ctx, "Failed to allocate action template array\n");
@@ -1110,7 +890,7 @@ static int hws_matcher_resize_precheck(struct mlx5hws_matcher *src_matcher,
return -EINVAL;
}
- if (src_matcher->action_ste.max_stes > dst_matcher->action_ste.max_stes) {
+ if (src_matcher->num_of_action_stes > dst_matcher->num_of_action_stes) {
mlx5hws_err(ctx, "Src/dst matcher max STEs mismatch\n");
return -EINVAL;
}
@@ -1139,10 +919,6 @@ int mlx5hws_matcher_resize_set_target(struct mlx5hws_matcher *src_matcher,
src_matcher->resize_dst = dst_matcher;
- ret = hws_matcher_resize_init(src_matcher);
- if (ret)
- src_matcher->resize_dst = NULL;
-
out:
mutex_unlock(&src_matcher->tbl->ctx->ctrl_lock);
return ret;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
index 020de70270c5..bad1fa8f77fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
@@ -23,6 +23,9 @@
*/
#define MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT 1
+/* Maximum number of action templates that can be attached to a matcher. */
+#define MLX5HWS_MATCHER_MAX_AT 128
+
enum mlx5hws_matcher_offset {
MLX5HWS_MATCHER_OFFSET_TAG_DW1 = 12,
MLX5HWS_MATCHER_OFFSET_TAG_DW0 = 13,
@@ -42,28 +45,9 @@ struct mlx5hws_match_template {
};
struct mlx5hws_matcher_match_ste {
- struct mlx5hws_pool_chunk ste;
- u32 rtc_0_id;
- u32 rtc_1_id;
- struct mlx5hws_pool *pool;
-};
-
-struct mlx5hws_matcher_action_ste {
- struct mlx5hws_pool_chunk ste;
- struct mlx5hws_pool_chunk stc;
u32 rtc_0_id;
u32 rtc_1_id;
struct mlx5hws_pool *pool;
- u8 max_stes;
-};
-
-struct mlx5hws_matcher_resize_data {
- struct mlx5hws_pool_chunk stc;
- u32 rtc_0_id;
- u32 rtc_1_id;
- struct mlx5hws_pool *pool;
- u8 max_stes;
- struct list_head list_node;
};
struct mlx5hws_matcher {
@@ -72,16 +56,16 @@ struct mlx5hws_matcher {
struct mlx5hws_match_template *mt;
struct mlx5hws_action_template *at;
u8 num_of_at;
+ u8 size_of_at_array;
u8 num_of_mt;
+ u8 num_of_action_stes;
/* enum mlx5hws_matcher_flags */
u8 flags;
u32 end_ft_id;
struct mlx5hws_matcher *col_matcher;
struct mlx5hws_matcher *resize_dst;
struct mlx5hws_matcher_match_ste match_ste;
- struct mlx5hws_matcher_action_ste action_ste;
struct list_head list_node;
- struct list_head resize_data;
};
static inline bool
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
index 50a81d360bb2..7e37d6e9eb83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
@@ -20,15 +20,14 @@ static void hws_pool_free_one_resource(struct mlx5hws_pool_resource *resource)
kfree(resource);
}
-static void hws_pool_resource_free(struct mlx5hws_pool *pool,
- int resource_idx)
+static void hws_pool_resource_free(struct mlx5hws_pool *pool)
{
- hws_pool_free_one_resource(pool->resource[resource_idx]);
- pool->resource[resource_idx] = NULL;
+ hws_pool_free_one_resource(pool->resource);
+ pool->resource = NULL;
if (pool->tbl_type == MLX5HWS_TABLE_TYPE_FDB) {
- hws_pool_free_one_resource(pool->mirror_resource[resource_idx]);
- pool->mirror_resource[resource_idx] = NULL;
+ hws_pool_free_one_resource(pool->mirror_resource);
+ pool->mirror_resource = NULL;
}
}
@@ -61,10 +60,8 @@ hws_pool_create_one_resource(struct mlx5hws_pool *pool, u32 log_range,
ret = -EINVAL;
}
- if (ret) {
- mlx5hws_err(pool->ctx, "Failed to allocate resource objects\n");
+ if (ret)
goto free_resource;
- }
resource->pool = pool;
resource->range = 1 << log_range;
@@ -77,199 +74,114 @@ free_resource:
return NULL;
}
-static int
-hws_pool_resource_alloc(struct mlx5hws_pool *pool, u32 log_range, int idx)
+static int hws_pool_resource_alloc(struct mlx5hws_pool *pool)
{
struct mlx5hws_pool_resource *resource;
u32 fw_ft_type, opt_log_range;
fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, false);
- opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_ORIG ? 0 : log_range;
+ opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_MIRROR ?
+ 0 : pool->alloc_log_sz;
resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type);
if (!resource) {
- mlx5hws_err(pool->ctx, "Failed allocating resource\n");
+ mlx5hws_err(pool->ctx, "Failed to allocate resource\n");
return -EINVAL;
}
- pool->resource[idx] = resource;
+ pool->resource = resource;
if (pool->tbl_type == MLX5HWS_TABLE_TYPE_FDB) {
struct mlx5hws_pool_resource *mirror_resource;
fw_ft_type = mlx5hws_table_get_res_fw_ft_type(pool->tbl_type, true);
- opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_MIRROR ? 0 : log_range;
+ opt_log_range = pool->opt_type == MLX5HWS_POOL_OPTIMIZE_ORIG ?
+ 0 : pool->alloc_log_sz;
mirror_resource = hws_pool_create_one_resource(pool, opt_log_range, fw_ft_type);
if (!mirror_resource) {
- mlx5hws_err(pool->ctx, "Failed allocating mirrored resource\n");
+ mlx5hws_err(pool->ctx, "Failed to allocate mirrored resource\n");
hws_pool_free_one_resource(resource);
- pool->resource[idx] = NULL;
+ pool->resource = NULL;
return -EINVAL;
}
- pool->mirror_resource[idx] = mirror_resource;
+ pool->mirror_resource = mirror_resource;
}
return 0;
}
-static unsigned long *hws_pool_create_and_init_bitmap(u32 log_range)
-{
- unsigned long *cur_bmp;
-
- cur_bmp = bitmap_zalloc(1 << log_range, GFP_KERNEL);
- if (!cur_bmp)
- return NULL;
-
- bitmap_fill(cur_bmp, 1 << log_range);
-
- return cur_bmp;
-}
-
-static void hws_pool_buddy_db_put_chunk(struct mlx5hws_pool *pool,
- struct mlx5hws_pool_chunk *chunk)
+static int hws_pool_buddy_init(struct mlx5hws_pool *pool)
{
struct mlx5hws_buddy_mem *buddy;
- buddy = pool->db.buddy_manager->buddies[chunk->resource_idx];
+ buddy = mlx5hws_buddy_create(pool->alloc_log_sz);
if (!buddy) {
- mlx5hws_err(pool->ctx, "No such buddy (%d)\n", chunk->resource_idx);
- return;
- }
-
- mlx5hws_buddy_free_mem(buddy, chunk->offset, chunk->order);
-}
-
-static struct mlx5hws_buddy_mem *
-hws_pool_buddy_get_next_buddy(struct mlx5hws_pool *pool, int idx,
- u32 order, bool *is_new_buddy)
-{
- static struct mlx5hws_buddy_mem *buddy;
- u32 new_buddy_size;
-
- buddy = pool->db.buddy_manager->buddies[idx];
- if (buddy)
- return buddy;
-
- new_buddy_size = max(pool->alloc_log_sz, order);
- *is_new_buddy = true;
- buddy = mlx5hws_buddy_create(new_buddy_size);
- if (!buddy) {
- mlx5hws_err(pool->ctx, "Failed to create buddy order: %d index: %d\n",
- new_buddy_size, idx);
- return NULL;
+ mlx5hws_err(pool->ctx, "Failed to create buddy order: %zu\n",
+ pool->alloc_log_sz);
+ return -ENOMEM;
}
- if (hws_pool_resource_alloc(pool, new_buddy_size, idx) != 0) {
- mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n",
- pool->type, new_buddy_size, idx);
+ if (hws_pool_resource_alloc(pool) != 0) {
+ mlx5hws_err(pool->ctx, "Failed to create resource type: %d size %zu\n",
+ pool->type, pool->alloc_log_sz);
mlx5hws_buddy_cleanup(buddy);
- return NULL;
+ return -ENOMEM;
}
- pool->db.buddy_manager->buddies[idx] = buddy;
+ pool->db.buddy = buddy;
- return buddy;
+ return 0;
}
-static int hws_pool_buddy_get_mem_chunk(struct mlx5hws_pool *pool,
- int order,
- u32 *buddy_idx,
- int *seg)
+static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool,
+ struct mlx5hws_pool_chunk *chunk)
{
- struct mlx5hws_buddy_mem *buddy;
- bool new_mem = false;
- int ret = 0;
- int i;
-
- *seg = -1;
-
- /* Find the next free place from the buddy array */
- while (*seg < 0) {
- for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) {
- buddy = hws_pool_buddy_get_next_buddy(pool, i,
- order,
- &new_mem);
- if (!buddy) {
- ret = -ENOMEM;
- goto out;
- }
-
- *seg = mlx5hws_buddy_alloc_mem(buddy, order);
- if (*seg >= 0)
- goto found;
-
- if (pool->flags & MLX5HWS_POOL_FLAGS_ONE_RESOURCE) {
- mlx5hws_err(pool->ctx,
- "Fail to allocate seg for one resource pool\n");
- ret = -ENOMEM;
- goto out;
- }
-
- if (new_mem) {
- /* We have new memory pool, should be place for us */
- mlx5hws_err(pool->ctx,
- "No memory for order: %d with buddy no: %d\n",
- order, i);
- ret = -ENOMEM;
- goto out;
- }
- }
+ struct mlx5hws_buddy_mem *buddy = pool->db.buddy;
+
+ if (!buddy) {
+ mlx5hws_err(pool->ctx, "Bad buddy state\n");
+ return -EINVAL;
}
-found:
- *buddy_idx = i;
-out:
- return ret;
+ chunk->offset = mlx5hws_buddy_alloc_mem(buddy, chunk->order);
+ if (chunk->offset >= 0)
+ return 0;
+
+ return -ENOMEM;
}
-static int hws_pool_buddy_db_get_chunk(struct mlx5hws_pool *pool,
- struct mlx5hws_pool_chunk *chunk)
+static void hws_pool_buddy_db_put_chunk(struct mlx5hws_pool *pool,
+ struct mlx5hws_pool_chunk *chunk)
{
- int ret = 0;
+ struct mlx5hws_buddy_mem *buddy;
- /* Go over the buddies and find next free slot */
- ret = hws_pool_buddy_get_mem_chunk(pool, chunk->order,
- &chunk->resource_idx,
- &chunk->offset);
- if (ret)
- mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n",
- chunk->order);
+ buddy = pool->db.buddy;
+ if (!buddy) {
+ mlx5hws_err(pool->ctx, "Bad buddy state\n");
+ return;
+ }
- return ret;
+ mlx5hws_buddy_free_mem(buddy, chunk->offset, chunk->order);
}
static void hws_pool_buddy_db_uninit(struct mlx5hws_pool *pool)
{
struct mlx5hws_buddy_mem *buddy;
- int i;
-
- for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) {
- buddy = pool->db.buddy_manager->buddies[i];
- if (buddy) {
- mlx5hws_buddy_cleanup(buddy);
- kfree(buddy);
- pool->db.buddy_manager->buddies[i] = NULL;
- }
- }
- kfree(pool->db.buddy_manager);
+ buddy = pool->db.buddy;
+ if (buddy) {
+ mlx5hws_buddy_cleanup(buddy);
+ kfree(buddy);
+ pool->db.buddy = NULL;
+ }
}
-static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool, u32 log_range)
+static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool)
{
- pool->db.buddy_manager = kzalloc(sizeof(*pool->db.buddy_manager), GFP_KERNEL);
- if (!pool->db.buddy_manager)
- return -ENOMEM;
-
- if (pool->flags & MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE) {
- bool new_buddy;
+ int ret;
- if (!hws_pool_buddy_get_next_buddy(pool, 0, log_range, &new_buddy)) {
- mlx5hws_err(pool->ctx,
- "Failed allocating memory on create log_sz: %d\n", log_range);
- kfree(pool->db.buddy_manager);
- return -ENOMEM;
- }
- }
+ ret = hws_pool_buddy_init(pool);
+ if (ret)
+ return ret;
pool->p_db_uninit = &hws_pool_buddy_db_uninit;
pool->p_get_chunk = &hws_pool_buddy_db_get_chunk;
@@ -278,261 +190,105 @@ static int hws_pool_buddy_db_init(struct mlx5hws_pool *pool, u32 log_range)
return 0;
}
-static int hws_pool_create_resource_on_index(struct mlx5hws_pool *pool,
- u32 alloc_size, int idx)
-{
- int ret = hws_pool_resource_alloc(pool, alloc_size, idx);
-
- if (ret) {
- mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n",
- pool->type, alloc_size, idx);
- return ret;
- }
-
- return 0;
-}
-
-static struct mlx5hws_pool_elements *
-hws_pool_element_create_new_elem(struct mlx5hws_pool *pool, u32 order, int idx)
+static unsigned long *hws_pool_create_and_init_bitmap(u32 log_range)
{
- struct mlx5hws_pool_elements *elem;
- u32 alloc_size;
-
- alloc_size = pool->alloc_log_sz;
+ unsigned long *bitmap;
- elem = kzalloc(sizeof(*elem), GFP_KERNEL);
- if (!elem)
+ bitmap = bitmap_zalloc(1 << log_range, GFP_KERNEL);
+ if (!bitmap)
return NULL;
- /* Sharing the same resource, also means that all the elements are with size 1 */
- if ((pool->flags & MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS) &&
- !(pool->flags & MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK)) {
- /* Currently all chunks in size 1 */
- elem->bitmap = hws_pool_create_and_init_bitmap(alloc_size - order);
- if (!elem->bitmap) {
- mlx5hws_err(pool->ctx,
- "Failed to create bitmap type: %d: size %d index: %d\n",
- pool->type, alloc_size, idx);
- goto free_elem;
- }
-
- elem->log_size = alloc_size - order;
- }
-
- if (hws_pool_create_resource_on_index(pool, alloc_size, idx)) {
- mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %d index: %d\n",
- pool->type, alloc_size, idx);
- goto free_db;
- }
-
- pool->db.element_manager->elements[idx] = elem;
-
- return elem;
+ bitmap_fill(bitmap, 1 << log_range);
-free_db:
- bitmap_free(elem->bitmap);
-free_elem:
- kfree(elem);
- return NULL;
+ return bitmap;
}
-static int hws_pool_element_find_seg(struct mlx5hws_pool_elements *elem, int *seg)
+static int hws_pool_bitmap_init(struct mlx5hws_pool *pool)
{
- unsigned int segment, size;
-
- size = 1 << elem->log_size;
+ unsigned long *bitmap;
- segment = find_first_bit(elem->bitmap, size);
- if (segment >= size) {
- elem->is_full = true;
+ bitmap = hws_pool_create_and_init_bitmap(pool->alloc_log_sz);
+ if (!bitmap) {
+ mlx5hws_err(pool->ctx, "Failed to create bitmap order: %zu\n",
+ pool->alloc_log_sz);
return -ENOMEM;
}
- bitmap_clear(elem->bitmap, segment, 1);
- *seg = segment;
- return 0;
-}
-
-static int
-hws_pool_onesize_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order,
- u32 *idx, int *seg)
-{
- struct mlx5hws_pool_elements *elem;
-
- elem = pool->db.element_manager->elements[0];
- if (!elem)
- elem = hws_pool_element_create_new_elem(pool, order, 0);
- if (!elem)
- goto err_no_elem;
-
- if (hws_pool_element_find_seg(elem, seg) != 0) {
- mlx5hws_err(pool->ctx, "No more resources (last request order: %d)\n", order);
+ if (hws_pool_resource_alloc(pool) != 0) {
+ mlx5hws_err(pool->ctx, "Failed to create resource type: %d: size %zu\n",
+ pool->type, pool->alloc_log_sz);
+ bitmap_free(bitmap);
return -ENOMEM;
}
- *idx = 0;
- elem->num_of_elements++;
- return 0;
+ pool->db.bitmap = bitmap;
-err_no_elem:
- mlx5hws_err(pool->ctx, "Failed to allocate element for order: %d\n", order);
- return -ENOMEM;
-}
-
-static int
-hws_pool_general_element_get_mem_chunk(struct mlx5hws_pool *pool, u32 order,
- u32 *idx, int *seg)
-{
- int ret, i;
-
- for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) {
- if (!pool->resource[i]) {
- ret = hws_pool_create_resource_on_index(pool, order, i);
- if (ret)
- goto err_no_res;
- *idx = i;
- *seg = 0; /* One memory slot in that element */
- return 0;
- }
- }
-
- mlx5hws_err(pool->ctx, "No more resources (last request order: %d)\n", order);
- return -ENOMEM;
-
-err_no_res:
- mlx5hws_err(pool->ctx, "Failed to allocate element for order: %d\n", order);
- return -ENOMEM;
+ return 0;
}
-static int hws_pool_general_element_db_get_chunk(struct mlx5hws_pool *pool,
- struct mlx5hws_pool_chunk *chunk)
+static int hws_pool_bitmap_db_get_chunk(struct mlx5hws_pool *pool,
+ struct mlx5hws_pool_chunk *chunk)
{
- int ret;
-
- /* Go over all memory elements and find/allocate free slot */
- ret = hws_pool_general_element_get_mem_chunk(pool, chunk->order,
- &chunk->resource_idx,
- &chunk->offset);
- if (ret)
- mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n",
- chunk->order);
+ unsigned long *bitmap, size;
- return ret;
-}
+ if (chunk->order != 0) {
+ mlx5hws_err(pool->ctx, "Pool only supports order 0 allocs\n");
+ return -EINVAL;
+ }
-static void hws_pool_general_element_db_put_chunk(struct mlx5hws_pool *pool,
- struct mlx5hws_pool_chunk *chunk)
-{
- if (unlikely(!pool->resource[chunk->resource_idx]))
- pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx);
+ bitmap = pool->db.bitmap;
+ if (!bitmap) {
+ mlx5hws_err(pool->ctx, "Bad bitmap state\n");
+ return -EINVAL;
+ }
- if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE)
- hws_pool_resource_free(pool, chunk->resource_idx);
-}
+ size = 1 << pool->alloc_log_sz;
-static void hws_pool_general_element_db_uninit(struct mlx5hws_pool *pool)
-{
- (void)pool;
-}
+ chunk->offset = find_first_bit(bitmap, size);
+ if (chunk->offset >= size)
+ return -ENOMEM;
-/* This memory management works as the following:
- * - At start doesn't allocate no mem at all.
- * - When new request for chunk arrived:
- * allocate resource and give it.
- * - When free that chunk:
- * the resource is freed.
- */
-static int hws_pool_general_element_db_init(struct mlx5hws_pool *pool)
-{
- pool->p_db_uninit = &hws_pool_general_element_db_uninit;
- pool->p_get_chunk = &hws_pool_general_element_db_get_chunk;
- pool->p_put_chunk = &hws_pool_general_element_db_put_chunk;
+ bitmap_clear(bitmap, chunk->offset, 1);
return 0;
}
-static void hws_onesize_element_db_destroy_element(struct mlx5hws_pool *pool,
- struct mlx5hws_pool_elements *elem,
- struct mlx5hws_pool_chunk *chunk)
-{
- if (unlikely(!pool->resource[chunk->resource_idx]))
- pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx);
-
- hws_pool_resource_free(pool, chunk->resource_idx);
- kfree(elem);
- pool->db.element_manager->elements[chunk->resource_idx] = NULL;
-}
-
-static void hws_onesize_element_db_put_chunk(struct mlx5hws_pool *pool,
- struct mlx5hws_pool_chunk *chunk)
+static void hws_pool_bitmap_db_put_chunk(struct mlx5hws_pool *pool,
+ struct mlx5hws_pool_chunk *chunk)
{
- struct mlx5hws_pool_elements *elem;
+ unsigned long *bitmap;
- if (unlikely(chunk->resource_idx))
- pr_warn("HWS: invalid resource with index %d\n", chunk->resource_idx);
-
- elem = pool->db.element_manager->elements[chunk->resource_idx];
- if (!elem) {
- mlx5hws_err(pool->ctx, "No such element (%d)\n", chunk->resource_idx);
+ bitmap = pool->db.bitmap;
+ if (!bitmap) {
+ mlx5hws_err(pool->ctx, "Bad bitmap state\n");
return;
}
- bitmap_set(elem->bitmap, chunk->offset, 1);
- elem->is_full = false;
- elem->num_of_elements--;
-
- if (pool->flags & MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE &&
- !elem->num_of_elements)
- hws_onesize_element_db_destroy_element(pool, elem, chunk);
+ bitmap_set(bitmap, chunk->offset, 1);
}
-static int hws_onesize_element_db_get_chunk(struct mlx5hws_pool *pool,
- struct mlx5hws_pool_chunk *chunk)
+static void hws_pool_bitmap_db_uninit(struct mlx5hws_pool *pool)
{
- int ret = 0;
+ unsigned long *bitmap;
- /* Go over all memory elements and find/allocate free slot */
- ret = hws_pool_onesize_element_get_mem_chunk(pool, chunk->order,
- &chunk->resource_idx,
- &chunk->offset);
- if (ret)
- mlx5hws_err(pool->ctx, "Failed to get free slot for chunk with order: %d\n",
- chunk->order);
-
- return ret;
-}
-
-static void hws_onesize_element_db_uninit(struct mlx5hws_pool *pool)
-{
- struct mlx5hws_pool_elements *elem;
- int i;
-
- for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) {
- elem = pool->db.element_manager->elements[i];
- if (elem) {
- bitmap_free(elem->bitmap);
- kfree(elem);
- pool->db.element_manager->elements[i] = NULL;
- }
+ bitmap = pool->db.bitmap;
+ if (bitmap) {
+ bitmap_free(bitmap);
+ pool->db.bitmap = NULL;
}
- kfree(pool->db.element_manager);
}
-/* This memory management works as the following:
- * - At start doesn't allocate no mem at all.
- * - When new request for chunk arrived:
- * aloocate the first and only slot of memory/resource
- * when it ended return error.
- */
-static int hws_pool_onesize_element_db_init(struct mlx5hws_pool *pool)
+static int hws_pool_bitmap_db_init(struct mlx5hws_pool *pool)
{
- pool->db.element_manager = kzalloc(sizeof(*pool->db.element_manager), GFP_KERNEL);
- if (!pool->db.element_manager)
- return -ENOMEM;
+ int ret;
- pool->p_db_uninit = &hws_onesize_element_db_uninit;
- pool->p_get_chunk = &hws_onesize_element_db_get_chunk;
- pool->p_put_chunk = &hws_onesize_element_db_put_chunk;
+ ret = hws_pool_bitmap_init(pool);
+ if (ret)
+ return ret;
+
+ pool->p_db_uninit = &hws_pool_bitmap_db_uninit;
+ pool->p_get_chunk = &hws_pool_bitmap_db_get_chunk;
+ pool->p_put_chunk = &hws_pool_bitmap_db_put_chunk;
return 0;
}
@@ -542,15 +298,14 @@ static int hws_pool_db_init(struct mlx5hws_pool *pool,
{
int ret;
- if (db_type == MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE)
- ret = hws_pool_general_element_db_init(pool);
- else if (db_type == MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE)
- ret = hws_pool_onesize_element_db_init(pool);
+ if (db_type == MLX5HWS_POOL_DB_TYPE_BITMAP)
+ ret = hws_pool_bitmap_db_init(pool);
else
- ret = hws_pool_buddy_db_init(pool, pool->alloc_log_sz);
+ ret = hws_pool_buddy_db_init(pool);
if (ret) {
- mlx5hws_err(pool->ctx, "Failed to init general db : %d (ret: %d)\n", db_type, ret);
+ mlx5hws_err(pool->ctx, "Failed to init pool type: %d (ret: %d)\n",
+ db_type, ret);
return ret;
}
@@ -569,6 +324,8 @@ int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool,
mutex_lock(&pool->lock);
ret = pool->p_get_chunk(pool, chunk);
+ if (ret == 0)
+ pool->available_elems -= 1 << chunk->order;
mutex_unlock(&pool->lock);
return ret;
@@ -579,6 +336,7 @@ void mlx5hws_pool_chunk_free(struct mlx5hws_pool *pool,
{
mutex_lock(&pool->lock);
pool->p_put_chunk(pool, chunk);
+ pool->available_elems += 1 << chunk->order;
mutex_unlock(&pool->lock);
}
@@ -599,17 +357,13 @@ mlx5hws_pool_create(struct mlx5hws_context *ctx, struct mlx5hws_pool_attr *pool_
pool->tbl_type = pool_attr->table_type;
pool->opt_type = pool_attr->opt_type;
- /* Support general db */
- if (pool->flags == (MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE |
- MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK))
- res_db_type = MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE;
- else if (pool->flags == (MLX5HWS_POOL_FLAGS_ONE_RESOURCE |
- MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS))
- res_db_type = MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE;
- else
+ if (pool->flags & MLX5HWS_POOL_FLAG_BUDDY)
res_db_type = MLX5HWS_POOL_DB_TYPE_BUDDY;
+ else
+ res_db_type = MLX5HWS_POOL_DB_TYPE_BITMAP;
pool->alloc_log_sz = pool_attr->alloc_log_sz;
+ pool->available_elems = 1 << pool_attr->alloc_log_sz;
if (hws_pool_db_init(pool, res_db_type))
goto free_pool;
@@ -623,18 +377,17 @@ free_pool:
return NULL;
}
-int mlx5hws_pool_destroy(struct mlx5hws_pool *pool)
+void mlx5hws_pool_destroy(struct mlx5hws_pool *pool)
{
- int i;
-
mutex_destroy(&pool->lock);
- for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++)
- if (pool->resource[i])
- hws_pool_resource_free(pool, i);
+ if (pool->available_elems != 1 << pool->alloc_log_sz)
+ mlx5hws_err(pool->ctx, "Attempting to destroy non-empty pool\n");
+
+ if (pool->resource)
+ hws_pool_resource_free(pool);
hws_pool_db_unint(pool);
kfree(pool);
- return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
index 621298b352b2..33e33d5f1fb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.h
@@ -6,16 +6,12 @@
#define MLX5HWS_POOL_STC_LOG_SZ 15
-#define MLX5HWS_POOL_RESOURCE_ARR_SZ 100
-
enum mlx5hws_pool_type {
MLX5HWS_POOL_TYPE_STE,
MLX5HWS_POOL_TYPE_STC,
};
struct mlx5hws_pool_chunk {
- u32 resource_idx;
- /* Internal offset, relative to base index */
int offset;
int order;
};
@@ -27,35 +23,17 @@ struct mlx5hws_pool_resource {
};
enum mlx5hws_pool_flags {
- /* Only a one resource in that pool */
- MLX5HWS_POOL_FLAGS_ONE_RESOURCE = 1 << 0,
- MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE = 1 << 1,
- /* No sharing resources between chunks */
- MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK = 1 << 2,
- /* All objects are in the same size */
- MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS = 1 << 3,
- /* Managed by buddy allocator */
- MLX5HWS_POOL_FLAGS_BUDDY_MANAGED = 1 << 4,
- /* Allocate pool_type memory on pool creation */
- MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE = 1 << 5,
-
- /* These values should be used by the caller */
- MLX5HWS_POOL_FLAGS_FOR_STC_POOL =
- MLX5HWS_POOL_FLAGS_ONE_RESOURCE |
- MLX5HWS_POOL_FLAGS_FIXED_SIZE_OBJECTS,
- MLX5HWS_POOL_FLAGS_FOR_MATCHER_STE_POOL =
- MLX5HWS_POOL_FLAGS_RELEASE_FREE_RESOURCE |
- MLX5HWS_POOL_FLAGS_RESOURCE_PER_CHUNK,
- MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL =
- MLX5HWS_POOL_FLAGS_ONE_RESOURCE |
- MLX5HWS_POOL_FLAGS_BUDDY_MANAGED |
- MLX5HWS_POOL_FLAGS_ALLOC_MEM_ON_CREATE,
+ /* Managed by a buddy allocator. If this is not set only allocations of
+ * order 0 are supported.
+ */
+ MLX5HWS_POOL_FLAG_BUDDY = BIT(0),
};
enum mlx5hws_pool_optimize {
MLX5HWS_POOL_OPTIMIZE_NONE = 0x0,
MLX5HWS_POOL_OPTIMIZE_ORIG = 0x1,
MLX5HWS_POOL_OPTIMIZE_MIRROR = 0x2,
+ MLX5HWS_POOL_OPTIMIZE_MAX = 0x3,
};
struct mlx5hws_pool_attr {
@@ -68,34 +46,17 @@ struct mlx5hws_pool_attr {
};
enum mlx5hws_db_type {
- /* Uses for allocating chunk of big memory, each element has its own resource in the FW*/
- MLX5HWS_POOL_DB_TYPE_GENERAL_SIZE,
- /* One resource only, all the elements are with same one size */
- MLX5HWS_POOL_DB_TYPE_ONE_SIZE_RESOURCE,
- /* Many resources, the memory allocated with buddy mechanism */
+ /* Uses a bitmap, supports only allocations of order 0. */
+ MLX5HWS_POOL_DB_TYPE_BITMAP,
+ /* Entries are managed using a buddy mechanism. */
MLX5HWS_POOL_DB_TYPE_BUDDY,
};
-struct mlx5hws_buddy_manager {
- struct mlx5hws_buddy_mem *buddies[MLX5HWS_POOL_RESOURCE_ARR_SZ];
-};
-
-struct mlx5hws_pool_elements {
- u32 num_of_elements;
- unsigned long *bitmap;
- u32 log_size;
- bool is_full;
-};
-
-struct mlx5hws_element_manager {
- struct mlx5hws_pool_elements *elements[MLX5HWS_POOL_RESOURCE_ARR_SZ];
-};
-
struct mlx5hws_pool_db {
enum mlx5hws_db_type type;
union {
- struct mlx5hws_element_manager *element_manager;
- struct mlx5hws_buddy_manager *buddy_manager;
+ unsigned long *bitmap;
+ struct mlx5hws_buddy_mem *buddy;
};
};
@@ -111,11 +72,11 @@ struct mlx5hws_pool {
enum mlx5hws_pool_flags flags;
struct mutex lock; /* protect the pool */
size_t alloc_log_sz;
+ size_t available_elems;
enum mlx5hws_table_type tbl_type;
enum mlx5hws_pool_optimize opt_type;
- struct mlx5hws_pool_resource *resource[MLX5HWS_POOL_RESOURCE_ARR_SZ];
- struct mlx5hws_pool_resource *mirror_resource[MLX5HWS_POOL_RESOURCE_ARR_SZ];
- /* DB */
+ struct mlx5hws_pool_resource *resource;
+ struct mlx5hws_pool_resource *mirror_resource;
struct mlx5hws_pool_db db;
/* Functions */
mlx5hws_pool_unint_db p_db_uninit;
@@ -127,7 +88,7 @@ struct mlx5hws_pool *
mlx5hws_pool_create(struct mlx5hws_context *ctx,
struct mlx5hws_pool_attr *pool_attr);
-int mlx5hws_pool_destroy(struct mlx5hws_pool *pool);
+void mlx5hws_pool_destroy(struct mlx5hws_pool *pool);
int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool,
struct mlx5hws_pool_chunk *chunk);
@@ -135,17 +96,37 @@ int mlx5hws_pool_chunk_alloc(struct mlx5hws_pool *pool,
void mlx5hws_pool_chunk_free(struct mlx5hws_pool *pool,
struct mlx5hws_pool_chunk *chunk);
-static inline u32
-mlx5hws_pool_chunk_get_base_id(struct mlx5hws_pool *pool,
- struct mlx5hws_pool_chunk *chunk)
+static inline u32 mlx5hws_pool_get_base_id(struct mlx5hws_pool *pool)
{
- return pool->resource[chunk->resource_idx]->base_id;
+ return pool->resource->base_id;
}
-static inline u32
-mlx5hws_pool_chunk_get_base_mirror_id(struct mlx5hws_pool *pool,
- struct mlx5hws_pool_chunk *chunk)
+static inline u32 mlx5hws_pool_get_base_mirror_id(struct mlx5hws_pool *pool)
{
- return pool->mirror_resource[chunk->resource_idx]->base_id;
+ return pool->mirror_resource->base_id;
+}
+
+static inline bool
+mlx5hws_pool_empty(struct mlx5hws_pool *pool)
+{
+ bool ret;
+
+ mutex_lock(&pool->lock);
+ ret = pool->available_elems == 0;
+ mutex_unlock(&pool->lock);
+
+ return ret;
+}
+
+static inline bool
+mlx5hws_pool_full(struct mlx5hws_pool *pool)
+{
+ bool ret;
+
+ mutex_lock(&pool->lock);
+ ret = pool->available_elems == (1 << pool->alloc_log_sz);
+ mutex_unlock(&pool->lock);
+
+ return ret;
}
#endif /* MLX5HWS_POOL_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
index a27a2d5ffc7b..9e6f35d68445 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
@@ -195,44 +195,30 @@ hws_rule_load_delete_info(struct mlx5hws_rule *rule,
}
}
-static int hws_rule_alloc_action_ste(struct mlx5hws_rule *rule)
+static int mlx5hws_rule_alloc_action_ste(struct mlx5hws_rule *rule,
+ u16 queue_id, bool skip_rx,
+ bool skip_tx)
{
struct mlx5hws_matcher *matcher = rule->matcher;
- struct mlx5hws_matcher_action_ste *action_ste;
- struct mlx5hws_pool_chunk ste = {0};
- int ret;
-
- action_ste = &matcher->action_ste;
- ste.order = ilog2(roundup_pow_of_two(action_ste->max_stes));
- ret = mlx5hws_pool_chunk_alloc(action_ste->pool, &ste);
- if (unlikely(ret)) {
- mlx5hws_err(matcher->tbl->ctx,
- "Failed to allocate STE for rule actions");
- return ret;
- }
-
- rule->action_ste.pool = matcher->action_ste.pool;
- rule->action_ste.num_stes = matcher->action_ste.max_stes;
- rule->action_ste.index = ste.offset;
+ struct mlx5hws_context *ctx = matcher->tbl->ctx;
- return 0;
+ rule->action_ste.ste.order =
+ ilog2(roundup_pow_of_two(matcher->num_of_action_stes));
+ return mlx5hws_action_ste_chunk_alloc(&ctx->action_ste_pool[queue_id],
+ skip_rx, skip_tx,
+ &rule->action_ste);
}
-void mlx5hws_rule_free_action_ste(struct mlx5hws_rule_action_ste_info *action_ste)
+void mlx5hws_rule_free_action_ste(struct mlx5hws_action_ste_chunk *action_ste)
{
- struct mlx5hws_pool_chunk ste = {0};
-
- if (!action_ste->num_stes)
+ if (!action_ste->action_tbl)
return;
- ste.order = ilog2(roundup_pow_of_two(action_ste->num_stes));
- ste.offset = action_ste->index;
-
/* This release is safe only when the rule match STE was deleted
* (when the rule is being deleted) or replaced with the new STE that
* isn't pointing to old action STEs (when the rule is being updated).
*/
- mlx5hws_pool_chunk_free(action_ste->pool, &ste);
+ mlx5hws_action_ste_chunk_free(action_ste);
}
static void hws_rule_create_init(struct mlx5hws_rule *rule,
@@ -250,22 +236,15 @@ static void hws_rule_create_init(struct mlx5hws_rule *rule,
rule->rtc_0 = 0;
rule->rtc_1 = 0;
- rule->action_ste.pool = NULL;
- rule->action_ste.num_stes = 0;
- rule->action_ste.index = -1;
-
rule->status = MLX5HWS_RULE_STATUS_CREATING;
} else {
rule->status = MLX5HWS_RULE_STATUS_UPDATING;
+ /* Save the old action STE info so we can free it after writing
+ * new action STEs and a corresponding match STE.
+ */
+ rule->old_action_ste = rule->action_ste;
}
- /* Initialize the old action STE info - shallow-copy action_ste.
- * In create flow this will set old_action_ste fields to initial values.
- * In update flow this will save the existing action STE info,
- * so that we will later use it to free old STEs.
- */
- rule->old_action_ste = rule->action_ste;
-
rule->pending_wqes = 0;
/* Init default send STE attributes */
@@ -277,7 +256,6 @@ static void hws_rule_create_init(struct mlx5hws_rule *rule,
/* Init default action apply */
apply->tbl_type = tbl->type;
apply->common_res = &ctx->common_res;
- apply->jump_to_action_stc = matcher->action_ste.stc.offset;
apply->require_dep = 0;
}
@@ -353,17 +331,24 @@ static int hws_rule_create_hws(struct mlx5hws_rule *rule,
if (action_stes) {
/* Allocate action STEs for rules that need more than match STE */
- ret = hws_rule_alloc_action_ste(rule);
+ ret = mlx5hws_rule_alloc_action_ste(rule, attr->queue_id,
+ !!ste_attr.rtc_0,
+ !!ste_attr.rtc_1);
if (ret) {
mlx5hws_err(ctx, "Failed to allocate action memory %d", ret);
mlx5hws_send_abort_new_dep_wqe(queue);
return ret;
}
+ apply.jump_to_action_stc =
+ rule->action_ste.action_tbl->stc.offset;
/* Skip RX/TX based on the dep_wqe init */
- ste_attr.rtc_0 = dep_wqe->rtc_0 ? matcher->action_ste.rtc_0_id : 0;
- ste_attr.rtc_1 = dep_wqe->rtc_1 ? matcher->action_ste.rtc_1_id : 0;
+ ste_attr.rtc_0 = dep_wqe->rtc_0 ?
+ rule->action_ste.action_tbl->rtc_0_id : 0;
+ ste_attr.rtc_1 = dep_wqe->rtc_1 ?
+ rule->action_ste.action_tbl->rtc_1_id : 0;
/* Action STEs are written to a specific index last to first */
- ste_attr.direct_index = rule->action_ste.index + action_stes;
+ ste_attr.direct_index =
+ rule->action_ste.ste.offset + action_stes;
apply.next_direct_idx = ste_attr.direct_index;
} else {
apply.next_direct_idx = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h
index b5ee94ac449b..1c47a9c11572 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h
@@ -43,12 +43,6 @@ struct mlx5hws_rule_match_tag {
};
};
-struct mlx5hws_rule_action_ste_info {
- struct mlx5hws_pool *pool;
- int index; /* STE array index */
- u8 num_stes;
-};
-
struct mlx5hws_rule_resize_info {
u32 rtc_0;
u32 rtc_1;
@@ -64,8 +58,8 @@ struct mlx5hws_rule {
struct mlx5hws_rule_match_tag tag;
struct mlx5hws_rule_resize_info *resize_info;
};
- struct mlx5hws_rule_action_ste_info action_ste;
- struct mlx5hws_rule_action_ste_info old_action_ste;
+ struct mlx5hws_action_ste_chunk action_ste;
+ struct mlx5hws_action_ste_chunk old_action_ste;
u32 rtc_0; /* The RTC into which the STE was inserted */
u32 rtc_1; /* The RTC into which the STE was inserted */
u8 status; /* enum mlx5hws_rule_status */
@@ -75,7 +69,7 @@ struct mlx5hws_rule {
*/
};
-void mlx5hws_rule_free_action_ste(struct mlx5hws_rule_action_ste_info *action_ste);
+void mlx5hws_rule_free_action_ste(struct mlx5hws_action_ste_chunk *action_ste);
int mlx5hws_rule_move_hws_remove(struct mlx5hws_rule *rule,
void *queue, void *user_data);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h
index 4ca7b99ef131..80d54edaac55 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic.h
@@ -81,6 +81,9 @@ struct fbnic_dev {
/* Local copy of hardware statistics */
struct fbnic_hw_stats hw_stats;
+
+ /* Lock protecting access to hw_stats */
+ spinlock_t hw_stats_lock;
};
/* Reserve entry 0 in the MSI-X "others" array until we have filled all
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
index 3b12a0ab5906..0c217c195c6a 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h
@@ -397,6 +397,15 @@ enum {
#define FBNIC_TCE_DROP_CTRL_TTI_FRM_DROP_EN CSR_BIT(1)
#define FBNIC_TCE_DROP_CTRL_TTI_TBI_DROP_EN CSR_BIT(2)
+#define FBNIC_TCE_TTI_CM_DROP_PKTS 0x0403e /* 0x100f8 */
+#define FBNIC_TCE_TTI_CM_DROP_BYTE_L 0x0403f /* 0x100fc */
+#define FBNIC_TCE_TTI_CM_DROP_BYTE_H 0x04040 /* 0x10100 */
+#define FBNIC_TCE_TTI_FRAME_DROP_PKTS 0x04041 /* 0x10104 */
+#define FBNIC_TCE_TTI_FRAME_DROP_BYTE_L 0x04042 /* 0x10108 */
+#define FBNIC_TCE_TTI_FRAME_DROP_BYTE_H 0x04043 /* 0x1010c */
+#define FBNIC_TCE_TBI_DROP_PKTS 0x04044 /* 0x10110 */
+#define FBNIC_TCE_TBI_DROP_BYTE_L 0x04045 /* 0x10114 */
+
#define FBNIC_TCE_TCAM_IDX2DEST_MAP 0x0404A /* 0x10128 */
#define FBNIC_TCE_TCAM_IDX2DEST_MAP_DEST_ID_0 CSR_GENMASK(3, 0)
enum {
@@ -432,6 +441,11 @@ enum {
#define FBNIC_TMI_SOP_PROT_CTRL 0x04400 /* 0x11000 */
#define FBNIC_TMI_DROP_CTRL 0x04401 /* 0x11004 */
#define FBNIC_TMI_DROP_CTRL_EN CSR_BIT(0)
+#define FBNIC_TMI_DROP_PKTS 0x04402 /* 0x11008 */
+#define FBNIC_TMI_DROP_BYTE_L 0x04403 /* 0x1100c */
+#define FBNIC_TMI_ILLEGAL_PTP_REQS 0x04409 /* 0x11024 */
+#define FBNIC_TMI_GOOD_PTP_TS 0x0440a /* 0x11028 */
+#define FBNIC_TMI_BAD_PTP_TS 0x0440b /* 0x1102c */
#define FBNIC_CSR_END_TMI 0x0443f /* CSR section delimiter */
/* Precision Time Protocol Registers */
@@ -485,6 +499,14 @@ enum {
FBNIC_RXB_FIFO_INDICES = 8
};
+enum {
+ FBNIC_RXB_INTF_NET = 0,
+ FBNIC_RXB_INTF_RBT = 1,
+ /* Unused */
+ /* Unused */
+ FBNIC_RXB_INTF_INDICES = 4
+};
+
#define FBNIC_RXB_CT_SIZE(n) (0x08000 + (n)) /* 0x20000 + 4*n */
#define FBNIC_RXB_CT_SIZE_CNT 8
#define FBNIC_RXB_CT_SIZE_HEADER CSR_GENMASK(5, 0)
@@ -864,6 +886,12 @@ enum {
#define FBNIC_QUEUE_TWQ1_BAL 0x022 /* 0x088 */
#define FBNIC_QUEUE_TWQ1_BAH 0x023 /* 0x08c */
+/* Tx Work Queue Statistics Registers */
+#define FBNIC_QUEUE_TWQ0_PKT_CNT 0x062 /* 0x188 */
+#define FBNIC_QUEUE_TWQ0_ERR_CNT 0x063 /* 0x18c */
+#define FBNIC_QUEUE_TWQ1_PKT_CNT 0x072 /* 0x1c8 */
+#define FBNIC_QUEUE_TWQ1_ERR_CNT 0x073 /* 0x1cc */
+
/* Tx Completion Queue Registers */
#define FBNIC_QUEUE_TCQ_CTL 0x080 /* 0x200 */
#define FBNIC_QUEUE_TCQ_CTL_RESET CSR_BIT(0)
@@ -953,6 +981,12 @@ enum {
FBNIC_QUEUE_RDE_CTL1_PAYLD_PACK_RSS = 2,
};
+/* Rx Per CQ Statistics Counters */
+#define FBNIC_QUEUE_RDE_PKT_CNT 0x2a2 /* 0xa88 */
+#define FBNIC_QUEUE_RDE_PKT_ERR_CNT 0x2a3 /* 0xa8c */
+#define FBNIC_QUEUE_RDE_CQ_DROP_CNT 0x2a4 /* 0xa90 */
+#define FBNIC_QUEUE_RDE_BDQ_DROP_CNT 0x2a5 /* 0xa94 */
+
/* Rx Interrupt Manager Registers */
#define FBNIC_QUEUE_RIM_CTL 0x2c0 /* 0xb00 */
#define FBNIC_QUEUE_RIM_CTL_MSIX_MASK CSR_GENMASK(7, 0)
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
index 0a751a2aaf73..5c7556c8c4c5 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c
@@ -27,6 +27,19 @@ struct fbnic_stat {
FBNIC_STAT_FIELDS(fbnic_hw_stats, name, stat)
static const struct fbnic_stat fbnic_gstrings_hw_stats[] = {
+ /* TTI */
+ FBNIC_HW_STAT("tti_cm_drop_frames", tti.cm_drop.frames),
+ FBNIC_HW_STAT("tti_cm_drop_bytes", tti.cm_drop.bytes),
+ FBNIC_HW_STAT("tti_frame_drop_frames", tti.frame_drop.frames),
+ FBNIC_HW_STAT("tti_frame_drop_bytes", tti.frame_drop.bytes),
+ FBNIC_HW_STAT("tti_tbi_drop_frames", tti.tbi_drop.frames),
+ FBNIC_HW_STAT("tti_tbi_drop_bytes", tti.tbi_drop.bytes),
+
+ /* TMI */
+ FBNIC_HW_STAT("ptp_illegal_req", tmi.ptp_illegal_req),
+ FBNIC_HW_STAT("ptp_good_ts", tmi.ptp_good_ts),
+ FBNIC_HW_STAT("ptp_bad_ts", tmi.ptp_bad_ts),
+
/* RPC */
FBNIC_HW_STAT("rpc_unkn_etype", rpc.unkn_etype),
FBNIC_HW_STAT("rpc_unkn_ext_hdr", rpc.unkn_ext_hdr),
@@ -39,7 +52,64 @@ static const struct fbnic_stat fbnic_gstrings_hw_stats[] = {
};
#define FBNIC_HW_FIXED_STATS_LEN ARRAY_SIZE(fbnic_gstrings_hw_stats)
-#define FBNIC_HW_STATS_LEN FBNIC_HW_FIXED_STATS_LEN
+
+#define FBNIC_RXB_ENQUEUE_STAT(name, stat) \
+ FBNIC_STAT_FIELDS(fbnic_rxb_enqueue_stats, name, stat)
+
+static const struct fbnic_stat fbnic_gstrings_rxb_enqueue_stats[] = {
+ FBNIC_RXB_ENQUEUE_STAT("rxb_integrity_err%u", integrity_err),
+ FBNIC_RXB_ENQUEUE_STAT("rxb_mac_err%u", mac_err),
+ FBNIC_RXB_ENQUEUE_STAT("rxb_parser_err%u", parser_err),
+ FBNIC_RXB_ENQUEUE_STAT("rxb_frm_err%u", frm_err),
+
+ FBNIC_RXB_ENQUEUE_STAT("rxb_drbo%u_frames", drbo.frames),
+ FBNIC_RXB_ENQUEUE_STAT("rxb_drbo%u_bytes", drbo.bytes),
+};
+
+#define FBNIC_HW_RXB_ENQUEUE_STATS_LEN \
+ ARRAY_SIZE(fbnic_gstrings_rxb_enqueue_stats)
+
+#define FBNIC_RXB_FIFO_STAT(name, stat) \
+ FBNIC_STAT_FIELDS(fbnic_rxb_fifo_stats, name, stat)
+
+static const struct fbnic_stat fbnic_gstrings_rxb_fifo_stats[] = {
+ FBNIC_RXB_FIFO_STAT("rxb_fifo%u_drop", trans_drop),
+ FBNIC_RXB_FIFO_STAT("rxb_fifo%u_dropped_frames", drop.frames),
+ FBNIC_RXB_FIFO_STAT("rxb_fifo%u_ecn", trans_ecn),
+ FBNIC_RXB_FIFO_STAT("rxb_fifo%u_level", level),
+};
+
+#define FBNIC_HW_RXB_FIFO_STATS_LEN ARRAY_SIZE(fbnic_gstrings_rxb_fifo_stats)
+
+#define FBNIC_RXB_DEQUEUE_STAT(name, stat) \
+ FBNIC_STAT_FIELDS(fbnic_rxb_dequeue_stats, name, stat)
+
+static const struct fbnic_stat fbnic_gstrings_rxb_dequeue_stats[] = {
+ FBNIC_RXB_DEQUEUE_STAT("rxb_intf%u_frames", intf.frames),
+ FBNIC_RXB_DEQUEUE_STAT("rxb_intf%u_bytes", intf.bytes),
+ FBNIC_RXB_DEQUEUE_STAT("rxb_pbuf%u_frames", pbuf.frames),
+ FBNIC_RXB_DEQUEUE_STAT("rxb_pbuf%u_bytes", pbuf.bytes),
+};
+
+#define FBNIC_HW_RXB_DEQUEUE_STATS_LEN \
+ ARRAY_SIZE(fbnic_gstrings_rxb_dequeue_stats)
+
+#define FBNIC_HW_Q_STAT(name, stat) \
+ FBNIC_STAT_FIELDS(fbnic_hw_q_stats, name, stat.value)
+
+static const struct fbnic_stat fbnic_gstrings_hw_q_stats[] = {
+ FBNIC_HW_Q_STAT("rde_%u_pkt_err", rde_pkt_err),
+ FBNIC_HW_Q_STAT("rde_%u_pkt_cq_drop", rde_pkt_cq_drop),
+ FBNIC_HW_Q_STAT("rde_%u_pkt_bdq_drop", rde_pkt_bdq_drop),
+};
+
+#define FBNIC_HW_Q_STATS_LEN ARRAY_SIZE(fbnic_gstrings_hw_q_stats)
+#define FBNIC_HW_STATS_LEN \
+ (FBNIC_HW_FIXED_STATS_LEN + \
+ FBNIC_HW_RXB_ENQUEUE_STATS_LEN * FBNIC_RXB_ENQUEUE_INDICES + \
+ FBNIC_HW_RXB_FIFO_STATS_LEN * FBNIC_RXB_FIFO_INDICES + \
+ FBNIC_HW_RXB_DEQUEUE_STATS_LEN * FBNIC_RXB_DEQUEUE_INDICES + \
+ FBNIC_HW_Q_STATS_LEN * FBNIC_MAX_QUEUES)
static void
fbnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
@@ -298,31 +368,125 @@ err_free_clone:
return err;
}
-static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data)
+static void fbnic_get_rxb_enqueue_strings(u8 **data, unsigned int idx)
+{
+ const struct fbnic_stat *stat;
+ int i;
+
+ stat = fbnic_gstrings_rxb_enqueue_stats;
+ for (i = 0; i < FBNIC_HW_RXB_ENQUEUE_STATS_LEN; i++, stat++)
+ ethtool_sprintf(data, stat->string, idx);
+}
+
+static void fbnic_get_rxb_fifo_strings(u8 **data, unsigned int idx)
+{
+ const struct fbnic_stat *stat;
+ int i;
+
+ stat = fbnic_gstrings_rxb_fifo_stats;
+ for (i = 0; i < FBNIC_HW_RXB_FIFO_STATS_LEN; i++, stat++)
+ ethtool_sprintf(data, stat->string, idx);
+}
+
+static void fbnic_get_rxb_dequeue_strings(u8 **data, unsigned int idx)
{
+ const struct fbnic_stat *stat;
int i;
+ stat = fbnic_gstrings_rxb_dequeue_stats;
+ for (i = 0; i < FBNIC_HW_RXB_DEQUEUE_STATS_LEN; i++, stat++)
+ ethtool_sprintf(data, stat->string, idx);
+}
+
+static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data)
+{
+ const struct fbnic_stat *stat;
+ int i, idx;
+
switch (sset) {
case ETH_SS_STATS:
- for (i = 0; i < FBNIC_HW_STATS_LEN; i++)
+ for (i = 0; i < FBNIC_HW_FIXED_STATS_LEN; i++)
ethtool_puts(&data, fbnic_gstrings_hw_stats[i].string);
+
+ for (i = 0; i < FBNIC_RXB_ENQUEUE_INDICES; i++)
+ fbnic_get_rxb_enqueue_strings(&data, i);
+
+ for (i = 0; i < FBNIC_RXB_FIFO_INDICES; i++)
+ fbnic_get_rxb_fifo_strings(&data, i);
+
+ for (i = 0; i < FBNIC_RXB_DEQUEUE_INDICES; i++)
+ fbnic_get_rxb_dequeue_strings(&data, i);
+
+ for (idx = 0; idx < FBNIC_MAX_QUEUES; idx++) {
+ stat = fbnic_gstrings_hw_q_stats;
+
+ for (i = 0; i < FBNIC_HW_Q_STATS_LEN; i++, stat++)
+ ethtool_sprintf(&data, stat->string, idx);
+ }
break;
}
}
+static void fbnic_report_hw_stats(const struct fbnic_stat *stat,
+ const void *base, int len, u64 **data)
+{
+ while (len--) {
+ u8 *curr = (u8 *)base + stat->offset;
+
+ **data = *(u64 *)curr;
+
+ stat++;
+ (*data)++;
+ }
+}
+
static void fbnic_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
struct fbnic_net *fbn = netdev_priv(dev);
- const struct fbnic_stat *stat;
+ struct fbnic_dev *fbd = fbn->fbd;
int i;
fbnic_get_hw_stats(fbn->fbd);
- for (i = 0; i < FBNIC_HW_STATS_LEN; i++) {
- stat = &fbnic_gstrings_hw_stats[i];
- data[i] = *(u64 *)((u8 *)&fbn->fbd->hw_stats + stat->offset);
+ spin_lock(&fbd->hw_stats_lock);
+ fbnic_report_hw_stats(fbnic_gstrings_hw_stats, &fbd->hw_stats,
+ FBNIC_HW_FIXED_STATS_LEN, &data);
+
+ for (i = 0; i < FBNIC_RXB_ENQUEUE_INDICES; i++) {
+ const struct fbnic_rxb_enqueue_stats *enq;
+
+ enq = &fbd->hw_stats.rxb.enq[i];
+ fbnic_report_hw_stats(fbnic_gstrings_rxb_enqueue_stats,
+ enq, FBNIC_HW_RXB_ENQUEUE_STATS_LEN,
+ &data);
+ }
+
+ for (i = 0; i < FBNIC_RXB_FIFO_INDICES; i++) {
+ const struct fbnic_rxb_fifo_stats *fifo;
+
+ fifo = &fbd->hw_stats.rxb.fifo[i];
+ fbnic_report_hw_stats(fbnic_gstrings_rxb_fifo_stats,
+ fifo, FBNIC_HW_RXB_FIFO_STATS_LEN,
+ &data);
+ }
+
+ for (i = 0; i < FBNIC_RXB_DEQUEUE_INDICES; i++) {
+ const struct fbnic_rxb_dequeue_stats *deq;
+
+ deq = &fbd->hw_stats.rxb.deq[i];
+ fbnic_report_hw_stats(fbnic_gstrings_rxb_dequeue_stats,
+ deq, FBNIC_HW_RXB_DEQUEUE_STATS_LEN,
+ &data);
+ }
+
+ for (i = 0; i < FBNIC_MAX_QUEUES; i++) {
+ const struct fbnic_hw_q_stats *hw_q = &fbd->hw_stats.hw_q[i];
+
+ fbnic_report_hw_stats(fbnic_gstrings_hw_q_stats, hw_q,
+ FBNIC_HW_Q_STATS_LEN, &data);
}
+ spin_unlock(&fbd->hw_stats_lock);
}
static int fbnic_get_sset_count(struct net_device *dev, int sset)
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c
index 89ac6bc8c7fc..4223d8100e64 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c
@@ -70,6 +70,100 @@ static void fbnic_hw_stat_rd64(struct fbnic_dev *fbd, u32 reg, s32 offset,
stat->u.old_reg_value_64 = new_reg_value;
}
+static void fbnic_reset_tmi_stats(struct fbnic_dev *fbd,
+ struct fbnic_tmi_stats *tmi)
+{
+ fbnic_hw_stat_rst32(fbd, FBNIC_TMI_DROP_PKTS, &tmi->drop.frames);
+ fbnic_hw_stat_rst64(fbd, FBNIC_TMI_DROP_BYTE_L, 1, &tmi->drop.bytes);
+
+ fbnic_hw_stat_rst32(fbd,
+ FBNIC_TMI_ILLEGAL_PTP_REQS,
+ &tmi->ptp_illegal_req);
+ fbnic_hw_stat_rst32(fbd, FBNIC_TMI_GOOD_PTP_TS, &tmi->ptp_good_ts);
+ fbnic_hw_stat_rst32(fbd, FBNIC_TMI_BAD_PTP_TS, &tmi->ptp_bad_ts);
+}
+
+static void fbnic_get_tmi_stats32(struct fbnic_dev *fbd,
+ struct fbnic_tmi_stats *tmi)
+{
+ fbnic_hw_stat_rd32(fbd, FBNIC_TMI_DROP_PKTS, &tmi->drop.frames);
+
+ fbnic_hw_stat_rd32(fbd,
+ FBNIC_TMI_ILLEGAL_PTP_REQS,
+ &tmi->ptp_illegal_req);
+ fbnic_hw_stat_rd32(fbd, FBNIC_TMI_GOOD_PTP_TS, &tmi->ptp_good_ts);
+ fbnic_hw_stat_rd32(fbd, FBNIC_TMI_BAD_PTP_TS, &tmi->ptp_bad_ts);
+}
+
+static void fbnic_get_tmi_stats(struct fbnic_dev *fbd,
+ struct fbnic_tmi_stats *tmi)
+{
+ fbnic_hw_stat_rd64(fbd, FBNIC_TMI_DROP_BYTE_L, 1, &tmi->drop.bytes);
+}
+
+static void fbnic_reset_tti_stats(struct fbnic_dev *fbd,
+ struct fbnic_tti_stats *tti)
+{
+ fbnic_hw_stat_rst32(fbd,
+ FBNIC_TCE_TTI_CM_DROP_PKTS,
+ &tti->cm_drop.frames);
+ fbnic_hw_stat_rst64(fbd,
+ FBNIC_TCE_TTI_CM_DROP_BYTE_L,
+ 1,
+ &tti->cm_drop.bytes);
+
+ fbnic_hw_stat_rst32(fbd,
+ FBNIC_TCE_TTI_FRAME_DROP_PKTS,
+ &tti->frame_drop.frames);
+ fbnic_hw_stat_rst64(fbd,
+ FBNIC_TCE_TTI_FRAME_DROP_BYTE_L,
+ 1,
+ &tti->frame_drop.bytes);
+
+ fbnic_hw_stat_rst32(fbd,
+ FBNIC_TCE_TBI_DROP_PKTS,
+ &tti->tbi_drop.frames);
+ fbnic_hw_stat_rst64(fbd,
+ FBNIC_TCE_TBI_DROP_BYTE_L,
+ 1,
+ &tti->tbi_drop.bytes);
+}
+
+static void fbnic_get_tti_stats32(struct fbnic_dev *fbd,
+ struct fbnic_tti_stats *tti)
+{
+ fbnic_hw_stat_rd32(fbd,
+ FBNIC_TCE_TTI_CM_DROP_PKTS,
+ &tti->cm_drop.frames);
+
+ fbnic_hw_stat_rd32(fbd,
+ FBNIC_TCE_TTI_FRAME_DROP_PKTS,
+ &tti->frame_drop.frames);
+
+ fbnic_hw_stat_rd32(fbd,
+ FBNIC_TCE_TBI_DROP_PKTS,
+ &tti->tbi_drop.frames);
+}
+
+static void fbnic_get_tti_stats(struct fbnic_dev *fbd,
+ struct fbnic_tti_stats *tti)
+{
+ fbnic_hw_stat_rd64(fbd,
+ FBNIC_TCE_TTI_CM_DROP_BYTE_L,
+ 1,
+ &tti->cm_drop.bytes);
+
+ fbnic_hw_stat_rd64(fbd,
+ FBNIC_TCE_TTI_FRAME_DROP_BYTE_L,
+ 1,
+ &tti->frame_drop.bytes);
+
+ fbnic_hw_stat_rd64(fbd,
+ FBNIC_TCE_TBI_DROP_BYTE_L,
+ 1,
+ &tti->tbi_drop.bytes);
+}
+
static void fbnic_reset_rpc_stats(struct fbnic_dev *fbd,
struct fbnic_rpc_stats *rpc)
{
@@ -117,6 +211,221 @@ static void fbnic_get_rpc_stats32(struct fbnic_dev *fbd,
&rpc->ovr_size_err);
}
+static void fbnic_reset_rxb_fifo_stats(struct fbnic_dev *fbd, int i,
+ struct fbnic_rxb_fifo_stats *fifo)
+{
+ fbnic_hw_stat_rst32(fbd, FBNIC_RXB_DROP_FRMS_STS(i),
+ &fifo->drop.frames);
+ fbnic_hw_stat_rst64(fbd, FBNIC_RXB_DROP_BYTES_STS_L(i), 1,
+ &fifo->drop.bytes);
+
+ fbnic_hw_stat_rst32(fbd, FBNIC_RXB_TRUN_FRMS_STS(i),
+ &fifo->trunc.frames);
+ fbnic_hw_stat_rst64(fbd, FBNIC_RXB_TRUN_BYTES_STS_L(i), 1,
+ &fifo->trunc.bytes);
+
+ fbnic_hw_stat_rst32(fbd, FBNIC_RXB_TRANS_DROP_STS(i),
+ &fifo->trans_drop);
+ fbnic_hw_stat_rst32(fbd, FBNIC_RXB_TRANS_ECN_STS(i),
+ &fifo->trans_ecn);
+
+ fifo->level.u.old_reg_value_32 = 0;
+}
+
+static void fbnic_reset_rxb_enq_stats(struct fbnic_dev *fbd, int i,
+ struct fbnic_rxb_enqueue_stats *enq)
+{
+ fbnic_hw_stat_rst32(fbd, FBNIC_RXB_DRBO_FRM_CNT_SRC(i),
+ &enq->drbo.frames);
+ fbnic_hw_stat_rst64(fbd, FBNIC_RXB_DRBO_BYTE_CNT_SRC_L(i), 4,
+ &enq->drbo.bytes);
+
+ fbnic_hw_stat_rst32(fbd, FBNIC_RXB_INTEGRITY_ERR(i),
+ &enq->integrity_err);
+ fbnic_hw_stat_rst32(fbd, FBNIC_RXB_MAC_ERR(i),
+ &enq->mac_err);
+ fbnic_hw_stat_rst32(fbd, FBNIC_RXB_PARSER_ERR(i),
+ &enq->parser_err);
+ fbnic_hw_stat_rst32(fbd, FBNIC_RXB_FRM_ERR(i),
+ &enq->frm_err);
+}
+
+static void fbnic_reset_rxb_deq_stats(struct fbnic_dev *fbd, int i,
+ struct fbnic_rxb_dequeue_stats *deq)
+{
+ fbnic_hw_stat_rst32(fbd, FBNIC_RXB_INTF_FRM_CNT_DST(i),
+ &deq->intf.frames);
+ fbnic_hw_stat_rst64(fbd, FBNIC_RXB_INTF_BYTE_CNT_DST_L(i), 4,
+ &deq->intf.bytes);
+
+ fbnic_hw_stat_rst32(fbd, FBNIC_RXB_PBUF_FRM_CNT_DST(i),
+ &deq->pbuf.frames);
+ fbnic_hw_stat_rst64(fbd, FBNIC_RXB_PBUF_BYTE_CNT_DST_L(i), 4,
+ &deq->pbuf.bytes);
+}
+
+static void fbnic_reset_rxb_stats(struct fbnic_dev *fbd,
+ struct fbnic_rxb_stats *rxb)
+{
+ int i;
+
+ for (i = 0; i < FBNIC_RXB_FIFO_INDICES; i++)
+ fbnic_reset_rxb_fifo_stats(fbd, i, &rxb->fifo[i]);
+
+ for (i = 0; i < FBNIC_RXB_INTF_INDICES; i++) {
+ fbnic_reset_rxb_enq_stats(fbd, i, &rxb->enq[i]);
+ fbnic_reset_rxb_deq_stats(fbd, i, &rxb->deq[i]);
+ }
+}
+
+static void fbnic_get_rxb_fifo_stats32(struct fbnic_dev *fbd, int i,
+ struct fbnic_rxb_fifo_stats *fifo)
+{
+ fbnic_hw_stat_rd32(fbd, FBNIC_RXB_DROP_FRMS_STS(i),
+ &fifo->drop.frames);
+ fbnic_hw_stat_rd32(fbd, FBNIC_RXB_TRUN_FRMS_STS(i),
+ &fifo->trunc.frames);
+
+ fbnic_hw_stat_rd32(fbd, FBNIC_RXB_TRANS_DROP_STS(i),
+ &fifo->trans_drop);
+ fbnic_hw_stat_rd32(fbd, FBNIC_RXB_TRANS_ECN_STS(i),
+ &fifo->trans_ecn);
+
+ fifo->level.value = rd32(fbd, FBNIC_RXB_PBUF_FIFO_LEVEL(i));
+}
+
+static void fbnic_get_rxb_fifo_stats(struct fbnic_dev *fbd, int i,
+ struct fbnic_rxb_fifo_stats *fifo)
+{
+ fbnic_hw_stat_rd64(fbd, FBNIC_RXB_DROP_BYTES_STS_L(i), 1,
+ &fifo->drop.bytes);
+ fbnic_hw_stat_rd64(fbd, FBNIC_RXB_TRUN_BYTES_STS_L(i), 1,
+ &fifo->trunc.bytes);
+
+ fbnic_get_rxb_fifo_stats32(fbd, i, fifo);
+}
+
+static void fbnic_get_rxb_enq_stats32(struct fbnic_dev *fbd, int i,
+ struct fbnic_rxb_enqueue_stats *enq)
+{
+ fbnic_hw_stat_rd32(fbd, FBNIC_RXB_DRBO_FRM_CNT_SRC(i),
+ &enq->drbo.frames);
+
+ fbnic_hw_stat_rd32(fbd, FBNIC_RXB_INTEGRITY_ERR(i),
+ &enq->integrity_err);
+ fbnic_hw_stat_rd32(fbd, FBNIC_RXB_MAC_ERR(i),
+ &enq->mac_err);
+ fbnic_hw_stat_rd32(fbd, FBNIC_RXB_PARSER_ERR(i),
+ &enq->parser_err);
+ fbnic_hw_stat_rd32(fbd, FBNIC_RXB_FRM_ERR(i),
+ &enq->frm_err);
+}
+
+static void fbnic_get_rxb_enq_stats(struct fbnic_dev *fbd, int i,
+ struct fbnic_rxb_enqueue_stats *enq)
+{
+ fbnic_hw_stat_rd64(fbd, FBNIC_RXB_DRBO_BYTE_CNT_SRC_L(i), 4,
+ &enq->drbo.bytes);
+
+ fbnic_get_rxb_enq_stats32(fbd, i, enq);
+}
+
+static void fbnic_get_rxb_deq_stats32(struct fbnic_dev *fbd, int i,
+ struct fbnic_rxb_dequeue_stats *deq)
+{
+ fbnic_hw_stat_rd32(fbd, FBNIC_RXB_INTF_FRM_CNT_DST(i),
+ &deq->intf.frames);
+ fbnic_hw_stat_rd32(fbd, FBNIC_RXB_PBUF_FRM_CNT_DST(i),
+ &deq->pbuf.frames);
+}
+
+static void fbnic_get_rxb_deq_stats(struct fbnic_dev *fbd, int i,
+ struct fbnic_rxb_dequeue_stats *deq)
+{
+ fbnic_hw_stat_rd64(fbd, FBNIC_RXB_INTF_BYTE_CNT_DST_L(i), 4,
+ &deq->intf.bytes);
+ fbnic_hw_stat_rd64(fbd, FBNIC_RXB_PBUF_BYTE_CNT_DST_L(i), 4,
+ &deq->pbuf.bytes);
+
+ fbnic_get_rxb_deq_stats32(fbd, i, deq);
+}
+
+static void fbnic_get_rxb_stats32(struct fbnic_dev *fbd,
+ struct fbnic_rxb_stats *rxb)
+{
+ int i;
+
+ for (i = 0; i < FBNIC_RXB_FIFO_INDICES; i++)
+ fbnic_get_rxb_fifo_stats32(fbd, i, &rxb->fifo[i]);
+
+ for (i = 0; i < FBNIC_RXB_INTF_INDICES; i++) {
+ fbnic_get_rxb_enq_stats32(fbd, i, &rxb->enq[i]);
+ fbnic_get_rxb_deq_stats32(fbd, i, &rxb->deq[i]);
+ }
+}
+
+static void fbnic_get_rxb_stats(struct fbnic_dev *fbd,
+ struct fbnic_rxb_stats *rxb)
+{
+ int i;
+
+ for (i = 0; i < FBNIC_RXB_FIFO_INDICES; i++)
+ fbnic_get_rxb_fifo_stats(fbd, i, &rxb->fifo[i]);
+
+ for (i = 0; i < FBNIC_RXB_INTF_INDICES; i++) {
+ fbnic_get_rxb_enq_stats(fbd, i, &rxb->enq[i]);
+ fbnic_get_rxb_deq_stats(fbd, i, &rxb->deq[i]);
+ }
+}
+
+static void fbnic_reset_hw_rxq_stats(struct fbnic_dev *fbd,
+ struct fbnic_hw_q_stats *hw_q)
+{
+ int i;
+
+ for (i = 0; i < fbd->max_num_queues; i++, hw_q++) {
+ u32 base = FBNIC_QUEUE(i);
+
+ fbnic_hw_stat_rst32(fbd,
+ base + FBNIC_QUEUE_RDE_PKT_ERR_CNT,
+ &hw_q->rde_pkt_err);
+ fbnic_hw_stat_rst32(fbd,
+ base + FBNIC_QUEUE_RDE_CQ_DROP_CNT,
+ &hw_q->rde_pkt_cq_drop);
+ fbnic_hw_stat_rst32(fbd,
+ base + FBNIC_QUEUE_RDE_BDQ_DROP_CNT,
+ &hw_q->rde_pkt_bdq_drop);
+ }
+}
+
+static void fbnic_get_hw_rxq_stats32(struct fbnic_dev *fbd,
+ struct fbnic_hw_q_stats *hw_q)
+{
+ int i;
+
+ for (i = 0; i < fbd->max_num_queues; i++, hw_q++) {
+ u32 base = FBNIC_QUEUE(i);
+
+ fbnic_hw_stat_rd32(fbd,
+ base + FBNIC_QUEUE_RDE_PKT_ERR_CNT,
+ &hw_q->rde_pkt_err);
+ fbnic_hw_stat_rd32(fbd,
+ base + FBNIC_QUEUE_RDE_CQ_DROP_CNT,
+ &hw_q->rde_pkt_cq_drop);
+ fbnic_hw_stat_rd32(fbd,
+ base + FBNIC_QUEUE_RDE_BDQ_DROP_CNT,
+ &hw_q->rde_pkt_bdq_drop);
+ }
+}
+
+void fbnic_get_hw_q_stats(struct fbnic_dev *fbd,
+ struct fbnic_hw_q_stats *hw_q)
+{
+ spin_lock(&fbd->hw_stats_lock);
+ fbnic_get_hw_rxq_stats32(fbd, hw_q);
+ spin_unlock(&fbd->hw_stats_lock);
+}
+
static void fbnic_reset_pcie_stats_asic(struct fbnic_dev *fbd,
struct fbnic_pcie_stats *pcie)
{
@@ -203,18 +512,40 @@ static void fbnic_get_pcie_stats_asic64(struct fbnic_dev *fbd,
void fbnic_reset_hw_stats(struct fbnic_dev *fbd)
{
+ spin_lock(&fbd->hw_stats_lock);
+ fbnic_reset_tmi_stats(fbd, &fbd->hw_stats.tmi);
+ fbnic_reset_tti_stats(fbd, &fbd->hw_stats.tti);
fbnic_reset_rpc_stats(fbd, &fbd->hw_stats.rpc);
+ fbnic_reset_rxb_stats(fbd, &fbd->hw_stats.rxb);
+ fbnic_reset_hw_rxq_stats(fbd, fbd->hw_stats.hw_q);
fbnic_reset_pcie_stats_asic(fbd, &fbd->hw_stats.pcie);
+ spin_unlock(&fbd->hw_stats_lock);
}
-void fbnic_get_hw_stats32(struct fbnic_dev *fbd)
+static void __fbnic_get_hw_stats32(struct fbnic_dev *fbd)
{
+ fbnic_get_tmi_stats32(fbd, &fbd->hw_stats.tmi);
+ fbnic_get_tti_stats32(fbd, &fbd->hw_stats.tti);
fbnic_get_rpc_stats32(fbd, &fbd->hw_stats.rpc);
+ fbnic_get_rxb_stats32(fbd, &fbd->hw_stats.rxb);
+ fbnic_get_hw_rxq_stats32(fbd, fbd->hw_stats.hw_q);
+}
+
+void fbnic_get_hw_stats32(struct fbnic_dev *fbd)
+{
+ spin_lock(&fbd->hw_stats_lock);
+ __fbnic_get_hw_stats32(fbd);
+ spin_unlock(&fbd->hw_stats_lock);
}
void fbnic_get_hw_stats(struct fbnic_dev *fbd)
{
- fbnic_get_hw_stats32(fbd);
+ spin_lock(&fbd->hw_stats_lock);
+ __fbnic_get_hw_stats32(fbd);
+ fbnic_get_tmi_stats(fbd, &fbd->hw_stats.tmi);
+ fbnic_get_tti_stats(fbd, &fbd->hw_stats.tti);
+ fbnic_get_rxb_stats(fbd, &fbd->hw_stats.rxb);
fbnic_get_pcie_stats_asic64(fbd, &fbd->hw_stats.pcie);
+ spin_unlock(&fbd->hw_stats_lock);
}
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h
index 78df56b87745..07e54bb75bf3 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h
@@ -17,6 +17,11 @@ struct fbnic_stat_counter {
bool reported;
};
+struct fbnic_hw_stat {
+ struct fbnic_stat_counter frames;
+ struct fbnic_stat_counter bytes;
+};
+
struct fbnic_eth_mac_stats {
struct fbnic_stat_counter FramesTransmittedOK;
struct fbnic_stat_counter FramesReceivedOK;
@@ -37,12 +42,49 @@ struct fbnic_mac_stats {
struct fbnic_eth_mac_stats eth_mac;
};
+struct fbnic_tmi_stats {
+ struct fbnic_hw_stat drop;
+ struct fbnic_stat_counter ptp_illegal_req, ptp_good_ts, ptp_bad_ts;
+};
+
+struct fbnic_tti_stats {
+ struct fbnic_hw_stat cm_drop, frame_drop, tbi_drop;
+};
+
struct fbnic_rpc_stats {
struct fbnic_stat_counter unkn_etype, unkn_ext_hdr;
struct fbnic_stat_counter ipv4_frag, ipv6_frag, ipv4_esp, ipv6_esp;
struct fbnic_stat_counter tcp_opt_err, out_of_hdr_err, ovr_size_err;
};
+struct fbnic_rxb_enqueue_stats {
+ struct fbnic_hw_stat drbo;
+ struct fbnic_stat_counter integrity_err, mac_err;
+ struct fbnic_stat_counter parser_err, frm_err;
+};
+
+struct fbnic_rxb_fifo_stats {
+ struct fbnic_hw_stat drop, trunc;
+ struct fbnic_stat_counter trans_drop, trans_ecn;
+ struct fbnic_stat_counter level;
+};
+
+struct fbnic_rxb_dequeue_stats {
+ struct fbnic_hw_stat intf, pbuf;
+};
+
+struct fbnic_rxb_stats {
+ struct fbnic_rxb_enqueue_stats enq[FBNIC_RXB_ENQUEUE_INDICES];
+ struct fbnic_rxb_fifo_stats fifo[FBNIC_RXB_FIFO_INDICES];
+ struct fbnic_rxb_dequeue_stats deq[FBNIC_RXB_DEQUEUE_INDICES];
+};
+
+struct fbnic_hw_q_stats {
+ struct fbnic_stat_counter rde_pkt_err;
+ struct fbnic_stat_counter rde_pkt_cq_drop;
+ struct fbnic_stat_counter rde_pkt_bdq_drop;
+};
+
struct fbnic_pcie_stats {
struct fbnic_stat_counter ob_rd_tlp, ob_rd_dword;
struct fbnic_stat_counter ob_wr_tlp, ob_wr_dword;
@@ -55,13 +97,19 @@ struct fbnic_pcie_stats {
struct fbnic_hw_stats {
struct fbnic_mac_stats mac;
+ struct fbnic_tmi_stats tmi;
+ struct fbnic_tti_stats tti;
struct fbnic_rpc_stats rpc;
+ struct fbnic_rxb_stats rxb;
+ struct fbnic_hw_q_stats hw_q[FBNIC_MAX_QUEUES];
struct fbnic_pcie_stats pcie;
};
u64 fbnic_stat_rd64(struct fbnic_dev *fbd, u32 reg, u32 offset);
void fbnic_reset_hw_stats(struct fbnic_dev *fbd);
+void fbnic_get_hw_q_stats(struct fbnic_dev *fbd,
+ struct fbnic_hw_q_stats *hw_q);
void fbnic_get_hw_stats32(struct fbnic_dev *fbd);
void fbnic_get_hw_stats(struct fbnic_dev *fbd);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
index 79a01fdd1dd1..d699f58dda21 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
@@ -403,12 +403,16 @@ static int fbnic_hwtstamp_set(struct net_device *netdev,
static void fbnic_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *stats64)
{
+ u64 rx_bytes, rx_packets, rx_dropped = 0, rx_errors = 0;
u64 tx_bytes, tx_packets, tx_dropped = 0;
- u64 rx_bytes, rx_packets, rx_dropped = 0;
struct fbnic_net *fbn = netdev_priv(dev);
+ struct fbnic_dev *fbd = fbn->fbd;
struct fbnic_queue_stats *stats;
+ u64 rx_over = 0, rx_missed = 0;
unsigned int start, i;
+ fbnic_get_hw_stats(fbd);
+
stats = &fbn->tx_stats;
tx_bytes = stats->bytes;
@@ -419,6 +423,12 @@ static void fbnic_get_stats64(struct net_device *dev,
stats64->tx_packets = tx_packets;
stats64->tx_dropped = tx_dropped;
+ /* Record drops from Tx HW Datapath */
+ tx_dropped += fbd->hw_stats.tmi.drop.frames.value +
+ fbd->hw_stats.tti.frame_drop.frames.value +
+ fbd->hw_stats.tti.tbi_drop.frames.value +
+ fbd->hw_stats.tmi.drop.frames.value;
+
for (i = 0; i < fbn->num_tx_queues; i++) {
struct fbnic_ring *txr = fbn->tx[i];
@@ -444,9 +454,34 @@ static void fbnic_get_stats64(struct net_device *dev,
rx_packets = stats->packets;
rx_dropped = stats->dropped;
+ spin_lock(&fbd->hw_stats_lock);
+ /* Record drops for the host FIFOs.
+ * 4: network to Host, 6: BMC to Host
+ * Exclude the BMC and MC FIFOs as those stats may contain drops
+ * due to unrelated items such as TCAM misses. They are still
+ * accessible through the ethtool stats.
+ */
+ i = FBNIC_RXB_FIFO_HOST;
+ rx_missed += fbd->hw_stats.rxb.fifo[i].drop.frames.value;
+ i = FBNIC_RXB_FIFO_BMC_TO_HOST;
+ rx_missed += fbd->hw_stats.rxb.fifo[i].drop.frames.value;
+
+ for (i = 0; i < fbd->max_num_queues; i++) {
+ /* Report packets dropped due to CQ/BDQ being full/empty */
+ rx_over += fbd->hw_stats.hw_q[i].rde_pkt_cq_drop.value;
+ rx_over += fbd->hw_stats.hw_q[i].rde_pkt_bdq_drop.value;
+
+ /* Report packets with errors */
+ rx_errors += fbd->hw_stats.hw_q[i].rde_pkt_err.value;
+ }
+ spin_unlock(&fbd->hw_stats_lock);
+
stats64->rx_bytes = rx_bytes;
stats64->rx_packets = rx_packets;
stats64->rx_dropped = rx_dropped;
+ stats64->rx_over_errors = rx_over;
+ stats64->rx_errors = rx_errors;
+ stats64->rx_missed_errors = rx_missed;
for (i = 0; i < fbn->num_rx_queues; i++) {
struct fbnic_ring *rxr = fbn->rx[i];
@@ -486,6 +521,7 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx,
{
struct fbnic_net *fbn = netdev_priv(dev);
struct fbnic_ring *rxr = fbn->rx[idx];
+ struct fbnic_dev *fbd = fbn->fbd;
struct fbnic_queue_stats *stats;
u64 bytes, packets, alloc_fail;
u64 csum_complete, csum_none;
@@ -509,6 +545,15 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx,
rx->alloc_fail = alloc_fail;
rx->csum_complete = csum_complete;
rx->csum_none = csum_none;
+
+ fbnic_get_hw_q_stats(fbd, fbd->hw_stats.hw_q);
+
+ spin_lock(&fbd->hw_stats_lock);
+ rx->hw_drop_overruns = fbd->hw_stats.hw_q[idx].rde_pkt_cq_drop.value +
+ fbd->hw_stats.hw_q[idx].rde_pkt_bdq_drop.value;
+ rx->hw_drops = fbd->hw_stats.hw_q[idx].rde_pkt_err.value +
+ rx->hw_drop_overruns;
+ spin_unlock(&fbd->hw_stats_lock);
}
static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx,
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
index 6cbbc2ee3e1f..1f76ebdd6ad1 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
@@ -292,6 +292,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
fbnic_devlink_register(fbd);
fbnic_dbg_fbd_init(fbd);
+ spin_lock_init(&fbd->hw_stats_lock);
/* Capture snapshot of hardware stats so netdev can calculate delta */
fbnic_reset_hw_stats(fbd);
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index 23760b613d3e..8b6b9b6efe18 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -2495,8 +2495,7 @@ static int lan743x_rx_process_buffer(struct lan743x_rx *rx)
/* save existing skb, allocate new skb and map to dma */
skb = buffer_info->skb;
- if (lan743x_rx_init_ring_element(rx, rx->last_head,
- GFP_ATOMIC | GFP_DMA)) {
+ if (lan743x_rx_init_ring_element(rx, rx->last_head, GFP_ATOMIC)) {
/* failed to allocate next skb.
* Memory is very low.
* Drop this packet and reuse buffer.
diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c
index 0be44dcb3393..b07f5b099a2b 100644
--- a/drivers/net/ethernet/microchip/lan743x_ptp.c
+++ b/drivers/net/ethernet/microchip/lan743x_ptp.c
@@ -463,10 +463,6 @@ static int lan743x_ptp_perout(struct lan743x_adapter *adapter, int on,
struct lan743x_ptp_perout *perout = &ptp->perout[index];
int ret = 0;
- /* Reject requests with unsupported flags */
- if (perout_request->flags & ~PTP_PEROUT_DUTY_CYCLE)
- return -EOPNOTSUPP;
-
if (on) {
perout_pin = ptp_find_pin(ptp->ptp_clock, PTP_PF_PEROUT,
perout_request->index);
@@ -942,12 +938,6 @@ static int lan743x_ptp_io_extts(struct lan743x_adapter *adapter, int on,
extts = &ptp->extts[index];
- if (extts_request->flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_FALLING_EDGE |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
if (on) {
extts_pin = ptp_find_pin(ptp->ptp_clock, PTP_PF_EXTTS, index);
if (extts_pin < 0)
@@ -1543,6 +1533,10 @@ int lan743x_ptp_open(struct lan743x_adapter *adapter)
ptp->ptp_clock_info.n_per_out = LAN743X_PTP_N_EVENT_CHAN;
ptp->ptp_clock_info.n_pins = n_pins;
ptp->ptp_clock_info.pps = LAN743X_PTP_N_PPS;
+ ptp->ptp_clock_info.supported_extts_flags = PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS;
+ ptp->ptp_clock_info.supported_perout_flags = PTP_PEROUT_DUTY_CYCLE;
ptp->ptp_clock_info.pin_config = ptp->pin_config;
ptp->ptp_clock_info.adjfine = lan743x_ptpci_adjfine;
ptp->ptp_clock_info.adjtime = lan743x_ptpci_adjtime;
diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c
index 63905bb5a63a..098406e2e5bb 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c
@@ -815,10 +815,6 @@ static int lan966x_ptp_perout(struct ptp_clock_info *ptp,
bool pps = false;
int pin;
- if (rq->perout.flags & ~(PTP_PEROUT_DUTY_CYCLE |
- PTP_PEROUT_PHASE))
- return -EOPNOTSUPP;
-
pin = ptp_find_pin(phc->clock, PTP_PF_PEROUT, rq->perout.index);
if (pin == -1 || pin >= LAN966X_PHC_PINS_NUM)
return -EINVAL;
@@ -917,12 +913,6 @@ static int lan966x_ptp_extts(struct ptp_clock_info *ptp,
if (lan966x->ptp_ext_irq <= 0)
return -EOPNOTSUPP;
- /* Reject requests with unsupported flags */
- if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
pin = ptp_find_pin(phc->clock, PTP_PF_EXTTS, rq->extts.index);
if (pin == -1 || pin >= LAN966X_PHC_PINS_NUM)
return -EINVAL;
@@ -978,6 +968,10 @@ static struct ptp_clock_info lan966x_ptp_clock_info = {
.n_per_out = LAN966X_PHC_PINS_NUM,
.n_ext_ts = LAN966X_PHC_PINS_NUM,
.n_pins = LAN966X_PHC_PINS_NUM,
+ .supported_extts_flags = PTP_RISING_EDGE |
+ PTP_STRICT_FLAGS,
+ .supported_perout_flags = PTP_PEROUT_DUTY_CYCLE |
+ PTP_PEROUT_PHASE,
};
static int lan966x_ptp_phc_init(struct lan966x *lan966x,
diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c
index cc1088988da0..d2a0a32f75ea 100644
--- a/drivers/net/ethernet/mscc/ocelot_ptp.c
+++ b/drivers/net/ethernet/mscc/ocelot_ptp.c
@@ -211,11 +211,6 @@ int ocelot_ptp_enable(struct ptp_clock_info *ptp,
switch (rq->type) {
case PTP_CLK_REQ_PEROUT:
- /* Reject requests with unsupported flags */
- if (rq->perout.flags & ~(PTP_PEROUT_DUTY_CYCLE |
- PTP_PEROUT_PHASE))
- return -EOPNOTSUPP;
-
pin = ptp_find_pin(ocelot->ptp_clock, PTP_PF_PEROUT,
rq->perout.index);
if (pin == 0)
diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
index 055b55651a49..498eec8ae61d 100644
--- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c
+++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c
@@ -108,6 +108,8 @@ static struct ptp_clock_info ocelot_ptp_clock_info = {
.n_ext_ts = 0,
.n_per_out = OCELOT_PTP_PINS_NUM,
.n_pins = OCELOT_PTP_PINS_NUM,
+ .supported_perout_flags = PTP_PEROUT_DUTY_CYCLE |
+ PTP_PEROUT_PHASE,
.pps = 0,
.gettime64 = ocelot_ptp_gettime64,
.settime64 = ocelot_ptp_settime64,
diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
index f1c6c47564b1..08086eb76996 100644
--- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c
@@ -779,7 +779,7 @@ nfp_nfd3_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
case NFP_NET_META_CSUM:
meta->csum_type = CHECKSUM_COMPLETE;
meta->csum =
- (__force __wsum)__get_unaligned_cpu32(data);
+ (__force __wsum)get_unaligned((u32 *)data);
data += 4;
break;
case NFP_NET_META_RESYNC_INFO:
diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
index ebeb6ab4465c..ab3cd06ed63e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
+++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c
@@ -779,7 +779,7 @@ nfp_nfdk_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
case NFP_NET_META_CSUM:
meta->csum_type = CHECKSUM_COMPLETE;
meta->csum =
- (__force __wsum)__get_unaligned_cpu32(data);
+ (__force __wsum)get_unaligned((u32 *)data);
data += 4;
break;
case NFP_NET_META_RESYNC_INFO:
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index b7def3b54937..016b575861b9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -939,7 +939,6 @@ u16 qed_get_cm_pq_idx_ofld_mtc(struct qed_hwfn *p_hwfn, u8 tc);
u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc);
/* doorbell recovery mechanism */
-void qed_db_recovery_dp(struct qed_hwfn *p_hwfn);
void qed_db_recovery_execute(struct qed_hwfn *p_hwfn);
bool qed_edpm_enabled(struct qed_hwfn *p_hwfn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dbg_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_dbg_hsi.h
index f6cd1b3efdfd..27e91d0d39f8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dbg_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dbg_hsi.h
@@ -1305,37 +1305,6 @@ enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
char *results_buf);
/**
- * qed_print_mcp_trace_results_cont(): Prints MCP Trace results, and
- * keeps the MCP trace meta data allocated, to support continuous MCP Trace
- * parsing. After the continuous parsing ends, mcp_trace_free_meta_data should
- * be called to free the meta data.
- *
- * @p_hwfn: HW device data.
- * @dump_buf: MVP trace dump buffer, starting from the header.
- * @results_buf: Buffer for printing the mcp trace results.
- *
- * Return: Error if the parsing fails, ok otherwise.
- */
-enum dbg_status qed_print_mcp_trace_results_cont(struct qed_hwfn *p_hwfn,
- u32 *dump_buf,
- char *results_buf);
-
-/**
- * qed_print_mcp_trace_line(): Prints MCP Trace results for a single line
- *
- * @p_hwfn: HW device data.
- * @dump_buf: MCP trace dump buffer, starting from the header.
- * @num_dumped_bytes: Number of bytes that were dumped.
- * @results_buf: Buffer for printing the mcp trace results.
- *
- * Return: Error if the parsing fails, ok otherwise.
- */
-enum dbg_status qed_print_mcp_trace_line(struct qed_hwfn *p_hwfn,
- u8 *dump_buf,
- u32 num_dumped_bytes,
- char *results_buf);
-
-/**
* qed_mcp_trace_free_meta_data(): Frees the MCP Trace meta data.
* Should be called after continuous MCP Trace parsing.
*
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 464a72afb758..9c3d3dd2f847 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -7614,31 +7614,6 @@ enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn,
results_buf, &parsed_buf_size, true);
}
-enum dbg_status qed_print_mcp_trace_results_cont(struct qed_hwfn *p_hwfn,
- u32 *dump_buf,
- char *results_buf)
-{
- u32 parsed_buf_size;
-
- return qed_parse_mcp_trace_dump(p_hwfn, dump_buf, results_buf,
- &parsed_buf_size, false);
-}
-
-enum dbg_status qed_print_mcp_trace_line(struct qed_hwfn *p_hwfn,
- u8 *dump_buf,
- u32 num_dumped_bytes,
- char *results_buf)
-{
- u32 parsed_results_bytes;
-
- return qed_parse_mcp_trace_buf(p_hwfn,
- dump_buf,
- num_dumped_bytes,
- 0,
- num_dumped_bytes,
- results_buf, &parsed_results_bytes);
-}
-
/* Frees the specified MCP Trace meta data */
void qed_mcp_trace_free_meta_data(struct qed_hwfn *p_hwfn)
{
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 86a93cac2647..9659ce5b0712 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -255,25 +255,6 @@ static void qed_db_recovery_teardown(struct qed_hwfn *p_hwfn)
p_hwfn->db_recovery_info.db_recovery_counter = 0;
}
-/* Print the content of the doorbell recovery mechanism */
-void qed_db_recovery_dp(struct qed_hwfn *p_hwfn)
-{
- struct qed_db_recovery_entry *db_entry = NULL;
-
- DP_NOTICE(p_hwfn,
- "Displaying doorbell recovery database. Counter was %d\n",
- p_hwfn->db_recovery_info.db_recovery_counter);
-
- /* Protect the list */
- spin_lock_bh(&p_hwfn->db_recovery_info.lock);
- list_for_each_entry(db_entry,
- &p_hwfn->db_recovery_info.list, list_entry) {
- qed_db_recovery_dp_entry(p_hwfn, db_entry, "Printing");
- }
-
- spin_unlock_bh(&p_hwfn->db_recovery_info.lock);
-}
-
/* Ring the doorbell of a single doorbell recovery entry */
static void qed_db_recovery_ring(struct qed_hwfn *p_hwfn,
struct qed_db_recovery_entry *db_entry)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index ed1a84542ad2..10e355397cee 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -2666,58 +2666,6 @@ void qed_gft_config(struct qed_hwfn *p_hwfn,
void qed_enable_context_validation(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt);
-/**
- * qed_calc_session_ctx_validation(): Calcualte validation byte for
- * session context.
- *
- * @p_ctx_mem: Pointer to context memory.
- * @ctx_size: Context size.
- * @ctx_type: Context type.
- * @cid: Context cid.
- *
- * Return: Void.
- */
-void qed_calc_session_ctx_validation(void *p_ctx_mem,
- u16 ctx_size, u8 ctx_type, u32 cid);
-
-/**
- * qed_calc_task_ctx_validation(): Calcualte validation byte for task
- * context.
- *
- * @p_ctx_mem: Pointer to context memory.
- * @ctx_size: Context size.
- * @ctx_type: Context type.
- * @tid: Context tid.
- *
- * Return: Void.
- */
-void qed_calc_task_ctx_validation(void *p_ctx_mem,
- u16 ctx_size, u8 ctx_type, u32 tid);
-
-/**
- * qed_memset_session_ctx(): Memset session context to 0 while
- * preserving validation bytes.
- *
- * @p_ctx_mem: Pointer to context memory.
- * @ctx_size: Size to initialzie.
- * @ctx_type: Context type.
- *
- * Return: Void.
- */
-void qed_memset_session_ctx(void *p_ctx_mem, u32 ctx_size, u8 ctx_type);
-
-/**
- * qed_memset_task_ctx(): Memset task context to 0 while preserving
- * validation bytes.
- *
- * @p_ctx_mem: Pointer to context memory.
- * @ctx_size: size to initialzie.
- * @ctx_type: context type.
- *
- * Return: Void.
- */
-void qed_memset_task_ctx(void *p_ctx_mem, u32 ctx_size, u8 ctx_type);
-
#define NUM_STORMS 6
/**
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 9e5f0dbc8a07..9907973399dc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -69,17 +69,6 @@ int qed_ptt_pool_alloc(struct qed_hwfn *p_hwfn)
return 0;
}
-void qed_ptt_invalidate(struct qed_hwfn *p_hwfn)
-{
- struct qed_ptt *p_ptt;
- int i;
-
- for (i = 0; i < PXP_EXTERNAL_BAR_PF_WINDOW_NUM; i++) {
- p_ptt = &p_hwfn->p_ptt_pool->ptts[i];
- p_ptt->pxp.offset = QED_BAR_INVALID_OFFSET;
- }
-}
-
void qed_ptt_pool_free(struct qed_hwfn *p_hwfn)
{
kfree(p_hwfn->p_ptt_pool);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.h b/drivers/net/ethernet/qlogic/qed/qed_hw.h
index e535983ce21b..3c98f58a184f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.h
@@ -62,15 +62,6 @@ enum _dmae_cmd_crc_mask {
void qed_gtt_init(struct qed_hwfn *p_hwfn);
/**
- * qed_ptt_invalidate(): Forces all ptt entries to be re-configured
- *
- * @p_hwfn: HW device data.
- *
- * Return: Void.
- */
-void qed_ptt_invalidate(struct qed_hwfn *p_hwfn);
-
-/**
* qed_ptt_pool_alloc(): Allocate and initialize PTT pool.
*
* @p_hwfn: HW device data.
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
index 407029a36fa1..aa20bb8caa9a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_fw_funcs.c
@@ -18,16 +18,6 @@
#define CDU_VALIDATION_DEFAULT_CFG CDU_CONTEXT_VALIDATION_DEFAULT_CFG
-static u16 con_region_offsets[3][NUM_OF_CONNECTION_TYPES] = {
- {400, 336, 352, 368, 304, 384, 416, 352}, /* region 3 offsets */
- {528, 496, 416, 512, 448, 512, 544, 480}, /* region 4 offsets */
- {608, 544, 496, 576, 576, 592, 624, 560} /* region 5 offsets */
-};
-
-static u16 task_region_offsets[1][NUM_OF_CONNECTION_TYPES] = {
- {240, 240, 112, 0, 0, 0, 0, 96} /* region 1 offsets */
-};
-
/* General constants */
#define QM_PQ_MEM_4KB(pq_size) (pq_size ? DIV_ROUND_UP((pq_size + 1) * \
QM_PQ_ELEMENT_SIZE, \
@@ -1576,134 +1566,6 @@ void qed_gft_config(struct qed_hwfn *p_hwfn,
qed_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_GFT, 1);
}
-DECLARE_CRC8_TABLE(cdu_crc8_table);
-
-/* Calculate and return CDU validation byte per connection type/region/cid */
-static u8 qed_calc_cdu_validation_byte(u8 conn_type, u8 region, u32 cid)
-{
- const u8 validation_cfg = CDU_VALIDATION_DEFAULT_CFG;
- u8 crc, validation_byte = 0;
- static u8 crc8_table_valid; /* automatically initialized to 0 */
- u32 validation_string = 0;
- __be32 data_to_crc;
-
- if (!crc8_table_valid) {
- crc8_populate_msb(cdu_crc8_table, 0x07);
- crc8_table_valid = 1;
- }
-
- /* The CRC is calculated on the String-to-compress:
- * [31:8] = {CID[31:20],CID[11:0]}
- * [7:4] = Region
- * [3:0] = Type
- */
- if ((validation_cfg >> CDU_CONTEXT_VALIDATION_CFG_USE_CID) & 1)
- validation_string |= (cid & 0xFFF00000) | ((cid & 0xFFF) << 8);
-
- if ((validation_cfg >> CDU_CONTEXT_VALIDATION_CFG_USE_REGION) & 1)
- validation_string |= ((region & 0xF) << 4);
-
- if ((validation_cfg >> CDU_CONTEXT_VALIDATION_CFG_USE_TYPE) & 1)
- validation_string |= (conn_type & 0xF);
-
- /* Convert to big-endian and calculate CRC8 */
- data_to_crc = cpu_to_be32(validation_string);
- crc = crc8(cdu_crc8_table, (u8 *)&data_to_crc, sizeof(data_to_crc),
- CRC8_INIT_VALUE);
-
- /* The validation byte [7:0] is composed:
- * for type A validation
- * [7] = active configuration bit
- * [6:0] = crc[6:0]
- *
- * for type B validation
- * [7] = active configuration bit
- * [6:3] = connection_type[3:0]
- * [2:0] = crc[2:0]
- */
- validation_byte |=
- ((validation_cfg >>
- CDU_CONTEXT_VALIDATION_CFG_USE_ACTIVE) & 1) << 7;
-
- if ((validation_cfg >>
- CDU_CONTEXT_VALIDATION_CFG_VALIDATION_TYPE_SHIFT) & 1)
- validation_byte |= ((conn_type & 0xF) << 3) | (crc & 0x7);
- else
- validation_byte |= crc & 0x7F;
-
- return validation_byte;
-}
-
-/* Calcualte and set validation bytes for session context */
-void qed_calc_session_ctx_validation(void *p_ctx_mem,
- u16 ctx_size, u8 ctx_type, u32 cid)
-{
- u8 *x_val_ptr, *t_val_ptr, *u_val_ptr, *p_ctx;
-
- p_ctx = (u8 * const)p_ctx_mem;
- x_val_ptr = &p_ctx[con_region_offsets[0][ctx_type]];
- t_val_ptr = &p_ctx[con_region_offsets[1][ctx_type]];
- u_val_ptr = &p_ctx[con_region_offsets[2][ctx_type]];
-
- memset(p_ctx, 0, ctx_size);
-
- *x_val_ptr = qed_calc_cdu_validation_byte(ctx_type, 3, cid);
- *t_val_ptr = qed_calc_cdu_validation_byte(ctx_type, 4, cid);
- *u_val_ptr = qed_calc_cdu_validation_byte(ctx_type, 5, cid);
-}
-
-/* Calcualte and set validation bytes for task context */
-void qed_calc_task_ctx_validation(void *p_ctx_mem,
- u16 ctx_size, u8 ctx_type, u32 tid)
-{
- u8 *p_ctx, *region1_val_ptr;
-
- p_ctx = (u8 * const)p_ctx_mem;
- region1_val_ptr = &p_ctx[task_region_offsets[0][ctx_type]];
-
- memset(p_ctx, 0, ctx_size);
-
- *region1_val_ptr = qed_calc_cdu_validation_byte(ctx_type, 1, tid);
-}
-
-/* Memset session context to 0 while preserving validation bytes */
-void qed_memset_session_ctx(void *p_ctx_mem, u32 ctx_size, u8 ctx_type)
-{
- u8 *x_val_ptr, *t_val_ptr, *u_val_ptr, *p_ctx;
- u8 x_val, t_val, u_val;
-
- p_ctx = (u8 * const)p_ctx_mem;
- x_val_ptr = &p_ctx[con_region_offsets[0][ctx_type]];
- t_val_ptr = &p_ctx[con_region_offsets[1][ctx_type]];
- u_val_ptr = &p_ctx[con_region_offsets[2][ctx_type]];
-
- x_val = *x_val_ptr;
- t_val = *t_val_ptr;
- u_val = *u_val_ptr;
-
- memset(p_ctx, 0, ctx_size);
-
- *x_val_ptr = x_val;
- *t_val_ptr = t_val;
- *u_val_ptr = u_val;
-}
-
-/* Memset task context to 0 while preserving validation bytes */
-void qed_memset_task_ctx(void *p_ctx_mem, u32 ctx_size, u8 ctx_type)
-{
- u8 *p_ctx, *region1_val_ptr;
- u8 region1_val;
-
- p_ctx = (u8 * const)p_ctx_mem;
- region1_val_ptr = &p_ctx[task_region_offsets[0][ctx_type]];
-
- region1_val = *region1_val_ptr;
-
- memset(p_ctx, 0, ctx_size);
-
- *region1_val_ptr = region1_val;
-}
-
/* Enable and configure context validation */
void qed_enable_context_validation(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt)
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 4eebd9cb40a3..8e62b109518b 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2852,10 +2852,23 @@ static u32 rtl_csi_read(struct rtl8169_private *tp, int addr)
RTL_R32(tp, CSIDR) : ~0;
}
+static void rtl_csi_mod(struct rtl8169_private *tp, int addr,
+ u32 mask, u32 set)
+{
+ u32 val;
+
+ WARN(addr % 4, "Invalid CSI address %#x\n", addr);
+
+ netdev_notice_once(tp->dev,
+ "No native access to PCI extended config space, falling back to CSI\n");
+
+ val = rtl_csi_read(tp, addr);
+ rtl_csi_write(tp, addr, (val & ~mask) | set);
+}
+
static void rtl_disable_zrxdc_timeout(struct rtl8169_private *tp)
{
struct pci_dev *pdev = tp->pci_dev;
- u32 csi;
int rc;
u8 val;
@@ -2872,16 +2885,12 @@ static void rtl_disable_zrxdc_timeout(struct rtl8169_private *tp)
}
}
- netdev_notice_once(tp->dev,
- "No native access to PCI extended config space, falling back to CSI\n");
- csi = rtl_csi_read(tp, RTL_GEN3_RELATED_OFF);
- rtl_csi_write(tp, RTL_GEN3_RELATED_OFF, csi & ~RTL_GEN3_ZRXDC_NONCOMPL);
+ rtl_csi_mod(tp, RTL_GEN3_RELATED_OFF, RTL_GEN3_ZRXDC_NONCOMPL, 0);
}
static void rtl_set_aspm_entry_latency(struct rtl8169_private *tp, u8 val)
{
struct pci_dev *pdev = tp->pci_dev;
- u32 csi;
/* According to Realtek the value at config space address 0x070f
* controls the L0s/L1 entrance latency. We try standard ECAM access
@@ -2893,10 +2902,7 @@ static void rtl_set_aspm_entry_latency(struct rtl8169_private *tp, u8 val)
pci_write_config_byte(pdev, 0x070f, val) == PCIBIOS_SUCCESSFUL)
return;
- netdev_notice_once(tp->dev,
- "No native access to PCI extended config space, falling back to CSI\n");
- csi = rtl_csi_read(tp, 0x070c) & 0x00ffffff;
- rtl_csi_write(tp, 0x070c, csi | val << 24);
+ rtl_csi_mod(tp, 0x070c, 0xff000000, val << 24);
}
static void rtl_set_def_aspm_entry_latency(struct rtl8169_private *tp)
diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c
index cf95e579c65d..748ca8b212ba 100644
--- a/drivers/net/ethernet/realtek/r8169_phy_config.c
+++ b/drivers/net/ethernet/realtek/r8169_phy_config.c
@@ -50,6 +50,15 @@ static void r8168g_phy_param(struct phy_device *phydev, u16 parm,
phy_restore_page(phydev, oldpage, 0);
}
+static void rtl8125_phy_param(struct phy_device *phydev, u16 parm,
+ u16 mask, u16 val)
+{
+ phy_lock_mdio_bus(phydev);
+ __phy_write_mmd(phydev, MDIO_MMD_VEND2, 0xb87c, parm);
+ __phy_modify_mmd(phydev, MDIO_MMD_VEND2, 0xb87e, mask, val);
+ phy_unlock_mdio_bus(phydev);
+}
+
struct phy_reg {
u16 reg;
u16 val;
@@ -1004,12 +1013,8 @@ static void rtl8125a_2_hw_phy_config(struct rtl8169_private *tp,
phy_write_paged(phydev, 0xac5, 0x16, 0x01ff);
phy_modify_paged(phydev, 0xac8, 0x15, 0x00f0, 0x0030);
- phy_write(phydev, 0x1f, 0x0b87);
- phy_write(phydev, 0x16, 0x80a2);
- phy_write(phydev, 0x17, 0x0153);
- phy_write(phydev, 0x16, 0x809c);
- phy_write(phydev, 0x17, 0x0153);
- phy_write(phydev, 0x1f, 0x0000);
+ rtl8125_phy_param(phydev, 0x80a2, 0xffff, 0x0153);
+ rtl8125_phy_param(phydev, 0x809c, 0xffff, 0x0153);
phy_write(phydev, 0x1f, 0x0a43);
phy_write(phydev, 0x13, 0x81B3);
@@ -1061,14 +1066,9 @@ static void rtl8125b_hw_phy_config(struct rtl8169_private *tp,
phy_modify_paged(phydev, 0xac4, 0x13, 0x00f0, 0x0090);
phy_modify_paged(phydev, 0xad3, 0x10, 0x0003, 0x0001);
- phy_write(phydev, 0x1f, 0x0b87);
- phy_write(phydev, 0x16, 0x80f5);
- phy_write(phydev, 0x17, 0x760e);
- phy_write(phydev, 0x16, 0x8107);
- phy_write(phydev, 0x17, 0x360e);
- phy_write(phydev, 0x16, 0x8551);
- phy_modify(phydev, 0x17, 0xff00, 0x0800);
- phy_write(phydev, 0x1f, 0x0000);
+ rtl8125_phy_param(phydev, 0x80f5, 0xffff, 0x760e);
+ rtl8125_phy_param(phydev, 0x8107, 0xffff, 0x360e);
+ rtl8125_phy_param(phydev, 0x8551, 0xff00, 0x0800);
phy_modify_paged(phydev, 0xbf0, 0x10, 0xe000, 0xa000);
phy_modify_paged(phydev, 0xbf4, 0x13, 0x0f00, 0x0300);
@@ -1110,12 +1110,8 @@ static void rtl8125bp_hw_phy_config(struct rtl8169_private *tp,
r8168g_phy_param(phydev, 0x8010, 0x0800, 0x0000);
- phy_write(phydev, 0x1f, 0x0b87);
- phy_write(phydev, 0x16, 0x8088);
- phy_modify(phydev, 0x17, 0xff00, 0x9000);
- phy_write(phydev, 0x16, 0x808f);
- phy_modify(phydev, 0x17, 0xff00, 0x9000);
- phy_write(phydev, 0x1f, 0x0000);
+ rtl8125_phy_param(phydev, 0x8088, 0xff00, 0x9000);
+ rtl8125_phy_param(phydev, 0x808f, 0xff00, 0x9000);
r8168g_phy_param(phydev, 0x8174, 0x2000, 0x1800);
diff --git a/drivers/net/ethernet/renesas/ravb_ptp.c b/drivers/net/ethernet/renesas/ravb_ptp.c
index b4365906669f..226c6c0ab945 100644
--- a/drivers/net/ethernet/renesas/ravb_ptp.c
+++ b/drivers/net/ethernet/renesas/ravb_ptp.c
@@ -176,12 +176,6 @@ static int ravb_ptp_extts(struct ptp_clock_info *ptp,
struct net_device *ndev = priv->ndev;
unsigned long flags;
- /* Reject requests with unsupported flags */
- if (req->flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_FALLING_EDGE))
- return -EOPNOTSUPP;
-
if (req->index)
return -EINVAL;
@@ -212,10 +206,6 @@ static int ravb_ptp_perout(struct ptp_clock_info *ptp,
unsigned long flags;
int error = 0;
- /* Reject requests with unsupported flags */
- if (req->flags)
- return -EOPNOTSUPP;
-
if (req->index)
return -EINVAL;
@@ -287,6 +277,7 @@ static const struct ptp_clock_info ravb_ptp_info = {
.max_adj = 50000000,
.n_ext_ts = N_EXT_TS,
.n_per_out = N_PER_OUT,
+ .supported_extts_flags = PTP_RISING_EDGE | PTP_FALLING_EDGE,
.adjfine = ravb_ptp_adjfine,
.adjtime = ravb_ptp_adjtime,
.gettime64 = ravb_ptp_gettime64,
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index d5db26103d82..19985d13e243 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -1933,7 +1933,7 @@ static int ofdpa_port_fdb(struct ofdpa_port *ofdpa_port,
spin_unlock_irqrestore(&ofdpa->fdb_tbl_lock, lock_flags);
/* Check if adding and already exists, or removing and can't find */
- if (!found != !removing) {
+ if (!found == removing) {
kfree(fdb);
if (!found && removing)
return 0;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
index 37fe7c288878..84072c8ed741 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-anarion.c
@@ -65,13 +65,12 @@ anarion_config_dt(struct platform_device *pdev,
{
struct anarion_gmac *gmac;
void __iomem *ctl_block;
- int err;
ctl_block = devm_platform_ioremap_resource(pdev, 1);
if (IS_ERR(ctl_block)) {
- err = PTR_ERR(ctl_block);
- dev_err(&pdev->dev, "Cannot get reset region (%d)!\n", err);
- return ERR_PTR(err);
+ dev_err(&pdev->dev, "Cannot get reset region (%pe)!\n",
+ ctl_block);
+ return ERR_CAST(ctl_block);
}
gmac = devm_kzalloc(&pdev->dev, sizeof(*gmac), GFP_KERNEL);
@@ -80,17 +79,11 @@ anarion_config_dt(struct platform_device *pdev,
gmac->ctl_block = ctl_block;
- switch (plat_dat->phy_interface) {
- case PHY_INTERFACE_MODE_RGMII:
- fallthrough;
- case PHY_INTERFACE_MODE_RGMII_ID:
- case PHY_INTERFACE_MODE_RGMII_RXID:
- case PHY_INTERFACE_MODE_RGMII_TXID:
+ if (phy_interface_mode_is_rgmii(plat_dat->phy_interface)) {
gmac->phy_intf_sel = GMAC_CONFIG_INTF_RGMII;
- break;
- default:
- dev_err(&pdev->dev, "Unsupported phy-mode (%d)\n",
- plat_dat->phy_interface);
+ } else {
+ dev_err(&pdev->dev, "Unsupported phy-mode (%s)\n",
+ phy_modes(plat_dat->phy_interface));
return ERR_PTR(-ENOTSUPP);
}
@@ -118,10 +111,9 @@ static int anarion_dwmac_probe(struct platform_device *pdev)
plat_dat->init = anarion_gmac_init;
plat_dat->exit = anarion_gmac_exit;
- anarion_gmac_init(pdev, gmac);
plat_dat->bsp_priv = gmac;
- return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+ return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res);
}
static const struct of_device_id anarion_dwmac_match[] = {
@@ -132,7 +124,6 @@ MODULE_DEVICE_TABLE(of, anarion_dwmac_match);
static struct platform_driver anarion_dwmac_driver = {
.probe = anarion_dwmac_probe,
- .remove = stmmac_pltfr_remove,
.driver = {
.name = "anarion-dwmac",
.pm = &stmmac_pltfr_pm_ops,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index cd431f84f34f..583b5c071cd1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -29,21 +29,10 @@ struct tegra_eqos {
void __iomem *regs;
struct reset_control *rst;
- struct clk *clk_slave;
struct gpio_desc *reset;
};
-static struct clk *dwc_eth_find_clk(struct plat_stmmacenet_data *plat_dat,
- const char *name)
-{
- for (int i = 0; i < plat_dat->num_clks; i++)
- if (strcmp(plat_dat->clks[i].id, name) == 0)
- return plat_dat->clks[i].clk;
-
- return NULL;
-}
-
static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
struct plat_stmmacenet_data *plat_dat)
{
@@ -132,7 +121,7 @@ static int dwc_qos_probe(struct platform_device *pdev,
struct plat_stmmacenet_data *plat_dat,
struct stmmac_resources *stmmac_res)
{
- plat_dat->pclk = dwc_eth_find_clk(plat_dat, "phy_ref_clk");
+ plat_dat->pclk = stmmac_pltfr_find_clk(plat_dat, "phy_ref_clk");
return 0;
}
@@ -209,20 +198,6 @@ static void tegra_eqos_fix_speed(void *priv, int speed, unsigned int mode)
}
}
-static int tegra_eqos_init(struct platform_device *pdev, void *priv)
-{
- struct tegra_eqos *eqos = priv;
- unsigned long rate;
- u32 value;
-
- rate = clk_get_rate(eqos->clk_slave);
-
- value = (rate / 1000000) - 1;
- writel(value, eqos->regs + GMAC_1US_TIC_COUNTER);
-
- return 0;
-}
-
static int tegra_eqos_probe(struct platform_device *pdev,
struct plat_stmmacenet_data *plat_dat,
struct stmmac_resources *res)
@@ -237,12 +212,11 @@ static int tegra_eqos_probe(struct platform_device *pdev,
eqos->dev = &pdev->dev;
eqos->regs = res->addr;
- eqos->clk_slave = plat_dat->stmmac_clk;
if (!is_of_node(dev->fwnode))
goto bypass_clk_reset_gpio;
- plat_dat->clk_tx_i = dwc_eth_find_clk(plat_dat, "tx");
+ plat_dat->clk_tx_i = stmmac_pltfr_find_clk(plat_dat, "tx");
eqos->reset = devm_gpiod_get(&pdev->dev, "phy-reset", GPIOD_OUT_HIGH);
if (IS_ERR(eqos->reset)) {
@@ -277,17 +251,11 @@ static int tegra_eqos_probe(struct platform_device *pdev,
bypass_clk_reset_gpio:
plat_dat->fix_mac_speed = tegra_eqos_fix_speed;
plat_dat->set_clk_tx_rate = stmmac_set_clk_tx_rate;
- plat_dat->init = tegra_eqos_init;
plat_dat->bsp_priv = eqos;
plat_dat->flags |= STMMAC_FLAG_SPH_DISABLE;
- err = tegra_eqos_init(pdev, eqos);
- if (err < 0)
- goto reset;
-
return 0;
-reset:
- reset_control_assert(eqos->rst);
+
reset_phy:
gpiod_set_value(eqos->reset, 1);
@@ -362,8 +330,8 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev)
if (ret)
return dev_err_probe(&pdev->dev, ret, "Failed to enable clocks\n");
- plat_dat->stmmac_clk = dwc_eth_find_clk(plat_dat,
- data->stmmac_clk_name);
+ plat_dat->stmmac_clk = stmmac_pltfr_find_clk(plat_dat,
+ data->stmmac_clk_name);
if (data->probe)
ret = data->probe(pdev, plat_dat, &stmmac_res);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index 5d279fa54b3e..889e2bb6f7f5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -379,10 +379,6 @@ static int imx_dwmac_probe(struct platform_device *pdev)
if (ret)
return ret;
- ret = imx_dwmac_init(pdev, dwmac);
- if (ret)
- goto err_dwmac_init;
-
if (dwmac->ops->fix_mac_speed) {
plat_dat->fix_mac_speed = dwmac->ops->fix_mac_speed;
} else if (!dwmac->ops->mac_rgmii_txclk_auto_adj) {
@@ -392,16 +388,10 @@ static int imx_dwmac_probe(struct platform_device *pdev)
dwmac->plat_dat->fix_soc_reset = dwmac->ops->fix_soc_reset;
- ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+ ret = stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res);
if (ret)
- goto err_drv_probe;
-
- return 0;
+ imx_dwmac_clks_config(dwmac, false);
-err_drv_probe:
- imx_dwmac_exit(pdev, plat_dat->bsp_priv);
-err_dwmac_init:
- imx_dwmac_clks_config(dwmac, false);
return ret;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
index 066783d66422..15abe214131f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c
@@ -56,6 +56,7 @@ enum ingenic_mac_version {
struct ingenic_mac {
const struct ingenic_soc_info *soc_info;
+ struct plat_stmmacenet_data *plat_dat;
struct device *dev;
struct regmap *regmap;
@@ -70,13 +71,13 @@ struct ingenic_soc_info {
int (*set_mode)(struct plat_stmmacenet_data *plat_dat);
};
-static int ingenic_mac_init(struct plat_stmmacenet_data *plat_dat)
+static int ingenic_mac_init(struct platform_device *pdev, void *bsp_priv)
{
- struct ingenic_mac *mac = plat_dat->bsp_priv;
+ struct ingenic_mac *mac = bsp_priv;
int ret;
if (mac->soc_info->set_mode) {
- ret = mac->soc_info->set_mode(plat_dat);
+ ret = mac->soc_info->set_mode(mac->plat_dat);
if (ret)
return ret;
}
@@ -284,44 +285,14 @@ static int ingenic_mac_probe(struct platform_device *pdev)
mac->soc_info = data;
mac->dev = &pdev->dev;
+ mac->plat_dat = plat_dat;
plat_dat->bsp_priv = mac;
+ plat_dat->init = ingenic_mac_init;
- ret = ingenic_mac_init(plat_dat);
- if (ret)
- return ret;
-
- return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int ingenic_mac_suspend(struct device *dev)
-{
- int ret;
-
- ret = stmmac_suspend(dev);
-
- return ret;
+ return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res);
}
-static int ingenic_mac_resume(struct device *dev)
-{
- struct net_device *ndev = dev_get_drvdata(dev);
- struct stmmac_priv *priv = netdev_priv(ndev);
- int ret;
-
- ret = ingenic_mac_init(priv->plat);
- if (ret)
- return ret;
-
- ret = stmmac_resume(dev);
-
- return ret;
-}
-#endif /* CONFIG_PM_SLEEP */
-
-static SIMPLE_DEV_PM_OPS(ingenic_mac_pm_ops, ingenic_mac_suspend, ingenic_mac_resume);
-
static struct ingenic_soc_info jz4775_soc_info = {
.version = ID_JZ4775,
.mask = MACPHYC_TXCLK_SEL_MASK | MACPHYC_SOFT_RST_MASK | MACPHYC_PHY_INFT_MASK,
@@ -370,10 +341,9 @@ MODULE_DEVICE_TABLE(of, ingenic_mac_of_matches);
static struct platform_driver ingenic_mac_driver = {
.probe = ingenic_mac_probe,
- .remove = stmmac_pltfr_remove,
.driver = {
.name = "ingenic-mac",
- .pm = pm_ptr(&ingenic_mac_pm_ops),
+ .pm = &stmmac_pltfr_pm_ops,
.of_match_table = ingenic_mac_of_matches,
},
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
index 599def7b3a64..4ea7b0a803d7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c
@@ -113,16 +113,7 @@ static int intel_eth_plat_probe(struct platform_device *pdev)
plat_dat->clk_tx_i = dwmac->tx_clk;
plat_dat->set_clk_tx_rate = stmmac_set_clk_tx_rate;
-
plat_dat->bsp_priv = dwmac;
- plat_dat->eee_usecs_rate = plat_dat->clk_ptp_rate;
-
- if (plat_dat->eee_usecs_rate > 0) {
- u32 tx_lpi_usec;
-
- tx_lpi_usec = (plat_dat->eee_usecs_rate / 1000000) - 1;
- writel(tx_lpi_usec, stmmac_res.addr + GMAC_1US_TIC_COUNTER);
- }
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
if (ret)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index c8bb9265bbb4..96c893dd262f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -300,11 +300,8 @@ static void intel_speed_mode_2500(struct net_device *ndev, void *intel_data)
if (((data & SERDES_LINK_MODE_MASK) >> SERDES_LINK_MODE_SHIFT) ==
SERDES_LINK_MODE_2G5) {
dev_info(priv->device, "Link Speed Mode: 2.5Gbps\n");
- priv->plat->max_speed = 2500;
priv->plat->phy_interface = PHY_INTERFACE_MODE_2500BASEX;
priv->plat->mdio_bus_data->default_an_inband = false;
- } else {
- priv->plat->max_speed = 1000;
}
}
@@ -682,7 +679,6 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
plat->axi->axi_blen[2] = 16;
plat->ptp_max_adj = plat->clk_ptp_rate;
- plat->eee_usecs_rate = plat->clk_ptp_rate;
/* Set system clock */
sprintf(clk_name, "%s-%s", "stmmac", pci_name(pdev));
@@ -1313,13 +1309,6 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
memset(&res, 0, sizeof(res));
res.addr = pcim_iomap_table(pdev)[0];
- if (plat->eee_usecs_rate > 0) {
- u32 tx_lpi_usec;
-
- tx_lpi_usec = (plat->eee_usecs_rate / 1000000) - 1;
- writel(tx_lpi_usec, res.addr + GMAC_1US_TIC_COUNTER);
- }
-
ret = stmmac_config_multi_msi(pdev, plat, &res);
if (ret) {
ret = stmmac_config_single_msi(pdev, plat, &res);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
index 0e4da216f942..e30bdf72331a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -106,12 +106,11 @@ struct qcom_ethqos {
struct platform_device *pdev;
void __iomem *rgmii_base;
void __iomem *mac_base;
- int (*configure_func)(struct qcom_ethqos *ethqos);
+ int (*configure_func)(struct qcom_ethqos *ethqos, int speed);
unsigned int link_clk_rate;
struct clk *link_clk;
struct phy *serdes_phy;
- int speed;
int serdes_speed;
phy_interface_t phy_mode;
@@ -385,7 +384,7 @@ static int ethqos_dll_configure(struct qcom_ethqos *ethqos)
return 0;
}
-static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos)
+static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos, int speed)
{
struct device *dev = &ethqos->pdev->dev;
int phase_shift;
@@ -412,7 +411,7 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos)
rgmii_updatel(ethqos, RGMII_CONFIG_INTF_SEL,
0, RGMII_IO_MACRO_CONFIG);
- switch (ethqos->speed) {
+ switch (speed) {
case SPEED_1000:
rgmii_updatel(ethqos, RGMII_CONFIG_DDR_MODE,
RGMII_CONFIG_DDR_MODE, RGMII_IO_MACRO_CONFIG);
@@ -532,14 +531,14 @@ static int ethqos_rgmii_macro_init(struct qcom_ethqos *ethqos)
loopback, RGMII_IO_MACRO_CONFIG);
break;
default:
- dev_err(dev, "Invalid speed %d\n", ethqos->speed);
+ dev_err(dev, "Invalid speed %d\n", speed);
return -EINVAL;
}
return 0;
}
-static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos)
+static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos, int speed)
{
struct device *dev = &ethqos->pdev->dev;
volatile unsigned int dll_lock;
@@ -562,7 +561,7 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos)
SDCC_DLL_CONFIG_PDN, SDCC_HC_REG_DLL_CONFIG);
if (ethqos->has_emac_ge_3) {
- if (ethqos->speed == SPEED_1000) {
+ if (speed == SPEED_1000) {
rgmii_writel(ethqos, 0x1800000, SDCC_TEST_CTL);
rgmii_writel(ethqos, 0x2C010800, SDCC_USR_CTL);
rgmii_writel(ethqos, 0xA001, SDCC_HC_REG_DLL_CONFIG2);
@@ -580,7 +579,7 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos)
rgmii_updatel(ethqos, SDCC_DLL_CONFIG_PDN, 0,
SDCC_HC_REG_DLL_CONFIG);
- if (ethqos->speed != SPEED_100 && ethqos->speed != SPEED_10) {
+ if (speed != SPEED_100 && speed != SPEED_10) {
/* Set DLL_EN */
rgmii_updatel(ethqos, SDCC_DLL_CONFIG_DLL_EN,
SDCC_DLL_CONFIG_DLL_EN, SDCC_HC_REG_DLL_CONFIG);
@@ -607,10 +606,10 @@ static int ethqos_configure_rgmii(struct qcom_ethqos *ethqos)
dev_err(dev, "Timeout while waiting for DLL lock\n");
}
- if (ethqos->speed == SPEED_1000)
+ if (speed == SPEED_1000)
ethqos_dll_configure(ethqos);
- ethqos_rgmii_macro_init(ethqos);
+ ethqos_rgmii_macro_init(ethqos, speed);
return 0;
}
@@ -626,7 +625,7 @@ static void ethqos_set_serdes_speed(struct qcom_ethqos *ethqos, int speed)
/* On interface toggle MAC registers gets reset.
* Configure MAC block for SGMII on ethernet phy link up
*/
-static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos)
+static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos, int speed)
{
struct net_device *dev = platform_get_drvdata(ethqos->pdev);
struct stmmac_priv *priv = netdev_priv(dev);
@@ -634,7 +633,7 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos)
val = readl(ethqos->mac_base + MAC_CTRL_REG);
- switch (ethqos->speed) {
+ switch (speed) {
case SPEED_2500:
val &= ~ETHQOS_MAC_CTRL_PORT_SEL;
rgmii_updatel(ethqos, RGMII_CONFIG2_RGMII_CLK_SEL_CFG,
@@ -673,17 +672,9 @@ static int ethqos_configure_sgmii(struct qcom_ethqos *ethqos)
return val;
}
-static void qcom_ethqos_speed_mode_2500(struct net_device *ndev, void *data)
+static int ethqos_configure(struct qcom_ethqos *ethqos, int speed)
{
- struct stmmac_priv *priv = netdev_priv(ndev);
-
- priv->plat->max_speed = 2500;
- priv->plat->phy_interface = PHY_INTERFACE_MODE_2500BASEX;
-}
-
-static int ethqos_configure(struct qcom_ethqos *ethqos)
-{
- return ethqos->configure_func(ethqos);
+ return ethqos->configure_func(ethqos, speed);
}
static void ethqos_fix_mac_speed(void *priv, int speed, unsigned int mode)
@@ -691,9 +682,8 @@ static void ethqos_fix_mac_speed(void *priv, int speed, unsigned int mode)
struct qcom_ethqos *ethqos = priv;
qcom_ethqos_set_sgmii_loopback(ethqos, false);
- ethqos->speed = speed;
ethqos_update_link_clk(ethqos, speed);
- ethqos_configure(ethqos);
+ ethqos_configure(ethqos, speed);
}
static int qcom_ethqos_serdes_powerup(struct net_device *ndev, void *priv)
@@ -709,7 +699,7 @@ static int qcom_ethqos_serdes_powerup(struct net_device *ndev, void *priv)
if (ret)
return ret;
- return phy_set_speed(ethqos->serdes_phy, ethqos->speed);
+ return phy_set_speed(ethqos->serdes_phy, ethqos->serdes_speed);
}
static void qcom_ethqos_serdes_powerdown(struct net_device *ndev, void *priv)
@@ -803,8 +793,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
ethqos->configure_func = ethqos_configure_rgmii;
break;
case PHY_INTERFACE_MODE_2500BASEX:
- plat_dat->speed_mode_2500 = qcom_ethqos_speed_mode_2500;
- fallthrough;
case PHY_INTERFACE_MODE_SGMII:
ethqos->configure_func = ethqos_configure_sgmii;
break;
@@ -847,7 +835,6 @@ static int qcom_ethqos_probe(struct platform_device *pdev)
return dev_err_probe(dev, PTR_ERR(ethqos->serdes_phy),
"Failed to get serdes phy\n");
- ethqos->speed = SPEED_1000;
ethqos->serdes_speed = SPEED_1000;
ethqos_update_link_clk(ethqos, SPEED_1000);
ethqos_set_func_clk_en(ethqos);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index be57c6c12c1c..53d5ce1f6dc6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -23,12 +23,7 @@
#define DWMAC_50MHZ 50000000
-#define IS_PHY_IF_MODE_RGMII(iface) (iface == PHY_INTERFACE_MODE_RGMII || \
- iface == PHY_INTERFACE_MODE_RGMII_ID || \
- iface == PHY_INTERFACE_MODE_RGMII_RXID || \
- iface == PHY_INTERFACE_MODE_RGMII_TXID)
-
-#define IS_PHY_IF_MODE_GBIT(iface) (IS_PHY_IF_MODE_RGMII(iface) || \
+#define IS_PHY_IF_MODE_GBIT(iface) (phy_interface_mode_is_rgmii(iface) || \
iface == PHY_INTERFACE_MODE_GMII)
/* STiH4xx register definitions (STiH407/STiH410 families)
@@ -148,7 +143,7 @@ static void stih4xx_fix_retime_src(void *priv, int spd, unsigned int mode)
src = TX_RETIME_SRC_CLKGEN;
freq = DWMAC_50MHZ;
}
- } else if (IS_PHY_IF_MODE_RGMII(dwmac->interface)) {
+ } else if (phy_interface_mode_is_rgmii(dwmac->interface)) {
/* On GiGa clk source can be either ext or from clkgen */
freq = rgmii_clock(spd);
@@ -238,6 +233,29 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
return 0;
}
+static int sti_dwmac_init(struct platform_device *pdev, void *bsp_priv)
+{
+ struct sti_dwmac *dwmac = bsp_priv;
+ int ret;
+
+ ret = clk_prepare_enable(dwmac->clk);
+ if (ret)
+ return ret;
+
+ ret = sti_dwmac_set_mode(dwmac);
+ if (ret)
+ clk_disable_unprepare(dwmac->clk);
+
+ return ret;
+}
+
+static void sti_dwmac_exit(struct platform_device *pdev, void *bsp_priv)
+{
+ struct sti_dwmac *dwmac = bsp_priv;
+
+ clk_disable_unprepare(dwmac->clk);
+}
+
static int sti_dwmac_probe(struct platform_device *pdev)
{
struct plat_stmmacenet_data *plat_dat;
@@ -274,59 +292,12 @@ static int sti_dwmac_probe(struct platform_device *pdev)
plat_dat->bsp_priv = dwmac;
plat_dat->fix_mac_speed = data->fix_retime_src;
+ plat_dat->init = sti_dwmac_init;
+ plat_dat->exit = sti_dwmac_exit;
- ret = clk_prepare_enable(dwmac->clk);
- if (ret)
- return ret;
-
- ret = sti_dwmac_set_mode(dwmac);
- if (ret)
- goto disable_clk;
-
- ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
- if (ret)
- goto disable_clk;
-
- return 0;
-
-disable_clk:
- clk_disable_unprepare(dwmac->clk);
-
- return ret;
-}
-
-static void sti_dwmac_remove(struct platform_device *pdev)
-{
- struct sti_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
-
- stmmac_dvr_remove(&pdev->dev);
-
- clk_disable_unprepare(dwmac->clk);
+ return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res);
}
-static int sti_dwmac_suspend(struct device *dev)
-{
- struct sti_dwmac *dwmac = get_stmmac_bsp_priv(dev);
- int ret = stmmac_suspend(dev);
-
- clk_disable_unprepare(dwmac->clk);
-
- return ret;
-}
-
-static int sti_dwmac_resume(struct device *dev)
-{
- struct sti_dwmac *dwmac = get_stmmac_bsp_priv(dev);
-
- clk_prepare_enable(dwmac->clk);
- sti_dwmac_set_mode(dwmac);
-
- return stmmac_resume(dev);
-}
-
-static DEFINE_SIMPLE_DEV_PM_OPS(sti_dwmac_pm_ops, sti_dwmac_suspend,
- sti_dwmac_resume);
-
static const struct sti_dwmac_of_data stih4xx_dwmac_data = {
.fix_retime_src = stih4xx_fix_retime_src,
};
@@ -339,10 +310,9 @@ MODULE_DEVICE_TABLE(of, sti_dwmac_match);
static struct platform_driver sti_dwmac_driver = {
.probe = sti_dwmac_probe,
- .remove = sti_dwmac_remove,
.driver = {
.name = "sti-dwmac",
- .pm = pm_sleep_ptr(&sti_dwmac_pm_ops),
+ .pm = &stmmac_pltfr_pm_ops,
.of_match_table = sti_dwmac_match,
},
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index c3d321192581..1eb16eec9c0d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -119,7 +119,7 @@ struct stm32_ops {
u32 syscfg_clr_off;
};
-static int stm32_dwmac_clk_enable(struct stm32_dwmac *dwmac, bool resume)
+static int stm32_dwmac_clk_enable(struct stm32_dwmac *dwmac)
{
int ret;
@@ -127,11 +127,9 @@ static int stm32_dwmac_clk_enable(struct stm32_dwmac *dwmac, bool resume)
if (ret)
goto err_clk_tx;
- if (!dwmac->ops->clk_rx_enable_in_suspend || !resume) {
- ret = clk_prepare_enable(dwmac->clk_rx);
- if (ret)
- goto err_clk_rx;
- }
+ ret = clk_prepare_enable(dwmac->clk_rx);
+ if (ret)
+ goto err_clk_rx;
ret = clk_prepare_enable(dwmac->syscfg_clk);
if (ret)
@@ -148,15 +146,14 @@ static int stm32_dwmac_clk_enable(struct stm32_dwmac *dwmac, bool resume)
err_clk_eth_ck:
clk_disable_unprepare(dwmac->syscfg_clk);
err_syscfg_clk:
- if (!dwmac->ops->clk_rx_enable_in_suspend || !resume)
- clk_disable_unprepare(dwmac->clk_rx);
+ clk_disable_unprepare(dwmac->clk_rx);
err_clk_rx:
clk_disable_unprepare(dwmac->clk_tx);
err_clk_tx:
return ret;
}
-static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat, bool resume)
+static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat)
{
struct stm32_dwmac *dwmac = plat_dat->bsp_priv;
int ret;
@@ -167,7 +164,7 @@ static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat, bool resume)
return ret;
}
- return stm32_dwmac_clk_enable(dwmac, resume);
+ return stm32_dwmac_clk_enable(dwmac);
}
static int stm32mp1_select_ethck_external(struct plat_stmmacenet_data *plat_dat)
@@ -382,12 +379,10 @@ static int stm32mcu_set_mode(struct plat_stmmacenet_data *plat_dat)
SYSCFG_MCU_ETH_MASK, val << 23);
}
-static void stm32_dwmac_clk_disable(struct stm32_dwmac *dwmac, bool suspend)
+static void stm32_dwmac_clk_disable(struct stm32_dwmac *dwmac)
{
clk_disable_unprepare(dwmac->clk_tx);
- if (!dwmac->ops->clk_rx_enable_in_suspend || !suspend)
- clk_disable_unprepare(dwmac->clk_rx);
-
+ clk_disable_unprepare(dwmac->clk_rx);
clk_disable_unprepare(dwmac->syscfg_clk);
if (dwmac->enable_eth_ck)
clk_disable_unprepare(dwmac->clk_eth_ck);
@@ -541,18 +536,32 @@ static int stm32_dwmac_probe(struct platform_device *pdev)
plat_dat->flags |= STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP;
plat_dat->bsp_priv = dwmac;
- ret = stm32_dwmac_init(plat_dat, false);
+ ret = stm32_dwmac_init(plat_dat);
if (ret)
return ret;
+ /* If this platform requires the clock to be running in suspend,
+ * prepare and enable the receive clock an additional time to keep
+ * it running.
+ */
+ if (dwmac->ops->clk_rx_enable_in_suspend) {
+ ret = clk_prepare_enable(dwmac->clk_rx);
+ if (ret)
+ goto err_clk_disable;
+ }
+
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
if (ret)
- goto err_clk_disable;
+ goto err_clk_disable_suspend;
return 0;
+err_clk_disable_suspend:
+ if (dwmac->ops->clk_rx_enable_in_suspend)
+ clk_disable_unprepare(dwmac->clk_rx);
+
err_clk_disable:
- stm32_dwmac_clk_disable(dwmac, false);
+ stm32_dwmac_clk_disable(dwmac);
return ret;
}
@@ -565,7 +574,15 @@ static void stm32_dwmac_remove(struct platform_device *pdev)
stmmac_dvr_remove(&pdev->dev);
- stm32_dwmac_clk_disable(dwmac, false);
+ /* If this platform requires the clock to be running in suspend,
+ * we need to disable and unprepare the receive clock an additional
+ * time to balance the extra clk_prepare_enable() in the probe
+ * function.
+ */
+ if (dwmac->ops->clk_rx_enable_in_suspend)
+ clk_disable_unprepare(dwmac->clk_rx);
+
+ stm32_dwmac_clk_disable(dwmac);
if (dwmac->irq_pwr_wakeup >= 0) {
dev_pm_clear_wake_irq(&pdev->dev);
@@ -596,7 +613,7 @@ static int stm32_dwmac_suspend(struct device *dev)
if (ret)
return ret;
- stm32_dwmac_clk_disable(dwmac, true);
+ stm32_dwmac_clk_disable(dwmac);
if (dwmac->ops->suspend)
ret = dwmac->ops->suspend(dwmac);
@@ -614,7 +631,7 @@ static int stm32_dwmac_resume(struct device *dev)
if (dwmac->ops->resume)
dwmac->ops->resume(dwmac);
- ret = stm32_dwmac_init(priv->plat, true);
+ ret = stm32_dwmac_init(priv->plat);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 85723a78793a..fd6518e252e3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -1239,14 +1239,10 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
if (ret)
return ret;
- ret = sun8i_dwmac_init(pdev, plat_dat->bsp_priv);
+ ret = stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res);
if (ret)
goto dwmac_syscon;
- ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
- if (ret)
- goto dwmac_exit;
-
ndev = dev_get_drvdata(&pdev->dev);
priv = netdev_priv(ndev);
@@ -1283,9 +1279,7 @@ dwmac_mux:
clk_put(gmac->ephy_clk);
dwmac_remove:
pm_runtime_put_noidle(&pdev->dev);
- stmmac_dvr_remove(&pdev->dev);
-dwmac_exit:
- sun8i_dwmac_exit(pdev, gmac);
+ stmmac_pltfr_remove(pdev);
dwmac_syscon:
sun8i_dwmac_unset_syscon(gmac);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 42fe29a4e300..5f387ec27c8c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -31,7 +31,6 @@
#define GMAC_RXQ_CTRL3 0x000000ac
#define GMAC_INT_STATUS 0x000000b0
#define GMAC_INT_EN 0x000000b4
-#define GMAC_1US_TIC_COUNTER 0x000000dc
#define GMAC_PCS_BASE 0x000000e0
#define GMAC_PHYIF_CONTROL_STATUS 0x000000f8
#define GMAC_PMT 0x000000c0
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index c73eff6a56b8..43c869f64c39 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -709,6 +709,17 @@ devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac)
#endif /* CONFIG_OF */
EXPORT_SYMBOL_GPL(devm_stmmac_probe_config_dt);
+struct clk *stmmac_pltfr_find_clk(struct plat_stmmacenet_data *plat_dat,
+ const char *name)
+{
+ for (int i = 0; i < plat_dat->num_clks; i++)
+ if (strcmp(plat_dat->clks[i].id, name) == 0)
+ return plat_dat->clks[i].clk;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(stmmac_pltfr_find_clk);
+
int stmmac_get_platform_resources(struct platform_device *pdev,
struct stmmac_resources *stmmac_res)
{
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
index 72dc1a32e46d..6e6561e29d6e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
@@ -14,6 +14,9 @@
struct plat_stmmacenet_data *
devm_stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac);
+struct clk *stmmac_pltfr_find_clk(struct plat_stmmacenet_data *plat_dat,
+ const char *name);
+
int stmmac_get_platform_resources(struct platform_device *pdev,
struct stmmac_resources *stmmac_res);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 1e6d2335293d..988ce9119306 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -427,7 +427,7 @@ static void am65_cpsw_nuss_ndo_host_tx_timeout(struct net_device *ndev,
if (netif_tx_queue_stopped(netif_txq)) {
/* try recover if stopped by us */
- txq_trans_update(netif_txq);
+ txq_trans_update(ndev, netif_txq);
netif_tx_wake_queue(netif_txq);
}
}
@@ -2679,7 +2679,9 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
goto of_node_put;
ret = of_get_mac_address(port_np, port->slave.mac_addr);
- if (ret) {
+ if (ret == -EPROBE_DEFER) {
+ goto of_node_put;
+ } else if (ret) {
am65_cpsw_am654_get_efuse_macid(port_np,
port->port_id,
port->slave.mac_addr);
@@ -2760,7 +2762,7 @@ am65_cpsw_nuss_init_port_ndev(struct am65_cpsw_common *common, u32 port_idx)
mutex_init(&ndev_priv->mm_lock);
port->qos.link_speed = SPEED_UNKNOWN;
SET_NETDEV_DEV(port->ndev, dev);
- port->ndev->dev.of_node = port->slave.port_np;
+ device_set_node(&port->ndev->dev, of_fwnode_handle(port->slave.port_np));
eth_hw_addr_set(port->ndev, port->slave.mac_addr);
@@ -3561,6 +3563,16 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
return ret;
}
+ am65_cpsw_nuss_get_ver(common);
+
+ ret = am65_cpsw_nuss_init_host_p(common);
+ if (ret)
+ goto err_pm_clear;
+
+ ret = am65_cpsw_nuss_init_slave_ports(common);
+ if (ret)
+ goto err_pm_clear;
+
node = of_get_child_by_name(dev->of_node, "mdio");
if (!node) {
dev_warn(dev, "MDIO node not found\n");
@@ -3577,16 +3589,6 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
}
of_node_put(node);
- am65_cpsw_nuss_get_ver(common);
-
- ret = am65_cpsw_nuss_init_host_p(common);
- if (ret)
- goto err_of_clear;
-
- ret = am65_cpsw_nuss_init_slave_ports(common);
- if (ret)
- goto err_of_clear;
-
/* init common data */
ale_params.dev = dev;
ale_params.ale_ageout = AM65_CPSW_ALE_AGEOUT_DEFAULT;
diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig
index 47e3e8434b9e..e5fc942c28cc 100644
--- a/drivers/net/ethernet/wangxun/Kconfig
+++ b/drivers/net/ethernet/wangxun/Kconfig
@@ -40,7 +40,7 @@ config NGBE
will be called ngbe.
config TXGBE
- tristate "Wangxun(R) 10GbE PCI Express adapters support"
+ tristate "Wangxun(R) 10/25/40GbE PCI Express adapters support"
depends on PCI
depends on COMMON_CLK
depends on I2C_DESIGNWARE_PLATFORM
@@ -55,7 +55,7 @@ config TXGBE
select PCS_XPCS
select LIBWX
help
- This driver supports Wangxun(R) 10GbE PCI Express family of
+ This driver supports Wangxun(R) 10/25/40GbE PCI Express family of
adapters.
More specific information on configuring the driver is in
diff --git a/drivers/net/ethernet/wangxun/libwx/Makefile b/drivers/net/ethernet/wangxun/libwx/Makefile
index e9f0f1f2309b..9b78b604a94e 100644
--- a/drivers/net/ethernet/wangxun/libwx/Makefile
+++ b/drivers/net/ethernet/wangxun/libwx/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_LIBWX) += libwx.o
-libwx-objs := wx_hw.o wx_lib.o wx_ethtool.o wx_ptp.o
+libwx-objs := wx_hw.o wx_lib.o wx_ethtool.o wx_ptp.o wx_mbx.o wx_sriov.o
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index aed45abafb1b..3c3aa5f4ebbf 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -10,6 +10,7 @@
#include "wx_type.h"
#include "wx_lib.h"
+#include "wx_sriov.h"
#include "wx_hw.h"
static int wx_phy_read_reg_mdi(struct mii_bus *bus, int phy_addr, int devnum, int regnum)
@@ -964,11 +965,28 @@ static void wx_sync_mac_table(struct wx *wx)
}
}
+static void wx_full_sync_mac_table(struct wx *wx)
+{
+ int i;
+
+ for (i = 0; i < wx->mac.num_rar_entries; i++) {
+ if (wx->mac_table[i].state & WX_MAC_STATE_IN_USE) {
+ wx_set_rar(wx, i,
+ wx->mac_table[i].addr,
+ wx->mac_table[i].pools,
+ WX_PSR_MAC_SWC_AD_H_AV);
+ } else {
+ wx_clear_rar(wx, i);
+ }
+ wx->mac_table[i].state &= ~(WX_MAC_STATE_MODIFIED);
+ }
+}
+
/* this function destroys the first RAR entry */
void wx_mac_set_default_filter(struct wx *wx, u8 *addr)
{
memcpy(&wx->mac_table[0].addr, addr, ETH_ALEN);
- wx->mac_table[0].pools = 1ULL;
+ wx->mac_table[0].pools = BIT(VMDQ_P(0));
wx->mac_table[0].state = (WX_MAC_STATE_DEFAULT | WX_MAC_STATE_IN_USE);
wx_set_rar(wx, 0, wx->mac_table[0].addr,
wx->mac_table[0].pools,
@@ -993,7 +1011,7 @@ void wx_flush_sw_mac_table(struct wx *wx)
}
EXPORT_SYMBOL(wx_flush_sw_mac_table);
-static int wx_add_mac_filter(struct wx *wx, u8 *addr, u16 pool)
+int wx_add_mac_filter(struct wx *wx, u8 *addr, u16 pool)
{
u32 i;
@@ -1024,7 +1042,7 @@ static int wx_add_mac_filter(struct wx *wx, u8 *addr, u16 pool)
return -ENOMEM;
}
-static int wx_del_mac_filter(struct wx *wx, u8 *addr, u16 pool)
+int wx_del_mac_filter(struct wx *wx, u8 *addr, u16 pool)
{
u32 i;
@@ -1206,6 +1224,35 @@ static void wx_update_mc_addr_list(struct wx *wx, struct net_device *netdev)
wx_dbg(wx, "Update mc addr list Complete\n");
}
+static void wx_restore_vf_multicasts(struct wx *wx)
+{
+ u32 i, j, vector_bit, vector_reg;
+ struct vf_data_storage *vfinfo;
+
+ for (i = 0; i < wx->num_vfs; i++) {
+ u32 vmolr = rd32(wx, WX_PSR_VM_L2CTL(i));
+
+ vfinfo = &wx->vfinfo[i];
+ for (j = 0; j < vfinfo->num_vf_mc_hashes; j++) {
+ wx->addr_ctrl.mta_in_use++;
+ vector_reg = WX_PSR_MC_TBL_REG(vfinfo->vf_mc_hashes[j]);
+ vector_bit = WX_PSR_MC_TBL_BIT(vfinfo->vf_mc_hashes[j]);
+ wr32m(wx, WX_PSR_MC_TBL(vector_reg),
+ BIT(vector_bit), BIT(vector_bit));
+ /* errata 5: maintain a copy of the reg table conf */
+ wx->mac.mta_shadow[vector_reg] |= BIT(vector_bit);
+ }
+ if (vfinfo->num_vf_mc_hashes)
+ vmolr |= WX_PSR_VM_L2CTL_ROMPE;
+ else
+ vmolr &= ~WX_PSR_VM_L2CTL_ROMPE;
+ wr32(wx, WX_PSR_VM_L2CTL(i), vmolr);
+ }
+
+ /* Restore any VF macvlans */
+ wx_full_sync_mac_table(wx);
+}
+
/**
* wx_write_mc_addr_list - write multicast addresses to MTA
* @netdev: network interface device structure
@@ -1223,6 +1270,9 @@ static int wx_write_mc_addr_list(struct net_device *netdev)
wx_update_mc_addr_list(wx, netdev);
+ if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags))
+ wx_restore_vf_multicasts(wx);
+
return netdev_mc_count(netdev);
}
@@ -1243,7 +1293,7 @@ int wx_set_mac(struct net_device *netdev, void *p)
if (retval)
return retval;
- wx_del_mac_filter(wx, wx->mac.addr, 0);
+ wx_del_mac_filter(wx, wx->mac.addr, VMDQ_P(0));
eth_hw_addr_set(netdev, addr->sa_data);
memcpy(wx->mac.addr, addr->sa_data, netdev->addr_len);
@@ -1345,6 +1395,10 @@ static int wx_hpbthresh(struct wx *wx)
/* Calculate delay value for device */
dv_id = WX_DV(link, tc);
+ /* Loopback switch introduces additional latency */
+ if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags))
+ dv_id += WX_B2BT(tc);
+
/* Delay value is calculated in bit times convert to KB */
kb = WX_BT2KB(dv_id);
rx_pba = rd32(wx, WX_RDB_PB_SZ(0)) >> WX_RDB_PB_SZ_SHIFT;
@@ -1400,12 +1454,107 @@ static void wx_pbthresh_setup(struct wx *wx)
wx->fc.low_water = 0;
}
+static void wx_set_ethertype_anti_spoofing(struct wx *wx, bool enable, int vf)
+{
+ u32 pfvfspoof, reg_offset, vf_shift;
+
+ vf_shift = WX_VF_IND_SHIFT(vf);
+ reg_offset = WX_VF_REG_OFFSET(vf);
+
+ pfvfspoof = rd32(wx, WX_TDM_ETYPE_AS(reg_offset));
+ if (enable)
+ pfvfspoof |= BIT(vf_shift);
+ else
+ pfvfspoof &= ~BIT(vf_shift);
+ wr32(wx, WX_TDM_ETYPE_AS(reg_offset), pfvfspoof);
+}
+
+int wx_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting)
+{
+ u32 index = WX_VF_REG_OFFSET(vf), vf_bit = WX_VF_IND_SHIFT(vf);
+ struct wx *wx = netdev_priv(netdev);
+ u32 regval;
+
+ if (vf >= wx->num_vfs)
+ return -EINVAL;
+
+ wx->vfinfo[vf].spoofchk_enabled = setting;
+
+ regval = (setting << vf_bit);
+ wr32m(wx, WX_TDM_MAC_AS(index), regval | BIT(vf_bit), regval);
+
+ if (wx->vfinfo[vf].vlan_count)
+ wr32m(wx, WX_TDM_VLAN_AS(index), regval | BIT(vf_bit), regval);
+
+ return 0;
+}
+
+static void wx_configure_virtualization(struct wx *wx)
+{
+ u16 pool = wx->num_rx_pools;
+ u32 reg_offset, vf_shift;
+ u32 i;
+
+ if (!test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags))
+ return;
+
+ wr32m(wx, WX_PSR_VM_CTL,
+ WX_PSR_VM_CTL_POOL_MASK | WX_PSR_VM_CTL_REPLEN,
+ FIELD_PREP(WX_PSR_VM_CTL_POOL_MASK, VMDQ_P(0)) |
+ WX_PSR_VM_CTL_REPLEN);
+ while (pool--)
+ wr32m(wx, WX_PSR_VM_L2CTL(pool),
+ WX_PSR_VM_L2CTL_AUPE, WX_PSR_VM_L2CTL_AUPE);
+
+ if (wx->mac.type == wx_mac_em) {
+ vf_shift = BIT(VMDQ_P(0));
+ /* Enable only the PF pools for Tx/Rx */
+ wr32(wx, WX_RDM_VF_RE(0), vf_shift);
+ wr32(wx, WX_TDM_VF_TE(0), vf_shift);
+ } else {
+ vf_shift = WX_VF_IND_SHIFT(VMDQ_P(0));
+ reg_offset = WX_VF_REG_OFFSET(VMDQ_P(0));
+
+ /* Enable only the PF pools for Tx/Rx */
+ wr32(wx, WX_RDM_VF_RE(reg_offset), GENMASK(31, vf_shift));
+ wr32(wx, WX_RDM_VF_RE(reg_offset ^ 1), reg_offset - 1);
+ wr32(wx, WX_TDM_VF_TE(reg_offset), GENMASK(31, vf_shift));
+ wr32(wx, WX_TDM_VF_TE(reg_offset ^ 1), reg_offset - 1);
+ }
+
+ /* clear VLAN promisc flag so VFTA will be updated if necessary */
+ clear_bit(WX_FLAG_VLAN_PROMISC, wx->flags);
+
+ for (i = 0; i < wx->num_vfs; i++) {
+ if (!wx->vfinfo[i].spoofchk_enabled)
+ wx_set_vf_spoofchk(wx->netdev, i, false);
+ /* enable ethertype anti spoofing if hw supports it */
+ wx_set_ethertype_anti_spoofing(wx, true, i);
+ }
+}
+
static void wx_configure_port(struct wx *wx)
{
u32 value, i;
- value = WX_CFG_PORT_CTL_D_VLAN | WX_CFG_PORT_CTL_QINQ;
+ if (wx->mac.type == wx_mac_em) {
+ value = (wx->num_vfs == 0) ?
+ WX_CFG_PORT_CTL_NUM_VT_NONE :
+ WX_CFG_PORT_CTL_NUM_VT_8;
+ } else {
+ if (test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags)) {
+ if (wx->ring_feature[RING_F_RSS].indices == 4)
+ value = WX_CFG_PORT_CTL_NUM_VT_32;
+ else
+ value = WX_CFG_PORT_CTL_NUM_VT_64;
+ } else {
+ value = 0;
+ }
+ }
+
+ value |= WX_CFG_PORT_CTL_D_VLAN | WX_CFG_PORT_CTL_QINQ;
wr32m(wx, WX_CFG_PORT_CTL,
+ WX_CFG_PORT_CTL_NUM_VT_MASK |
WX_CFG_PORT_CTL_D_VLAN |
WX_CFG_PORT_CTL_QINQ,
value);
@@ -1466,6 +1615,83 @@ static void wx_vlan_strip_control(struct wx *wx, bool enable)
}
}
+static void wx_vlan_promisc_enable(struct wx *wx)
+{
+ u32 vlnctrl, i, vind, bits, reg_idx;
+
+ vlnctrl = rd32(wx, WX_PSR_VLAN_CTL);
+ if (test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags)) {
+ /* we need to keep the VLAN filter on in SRIOV */
+ vlnctrl |= WX_PSR_VLAN_CTL_VFE;
+ wr32(wx, WX_PSR_VLAN_CTL, vlnctrl);
+ } else {
+ vlnctrl &= ~WX_PSR_VLAN_CTL_VFE;
+ wr32(wx, WX_PSR_VLAN_CTL, vlnctrl);
+ return;
+ }
+ /* We are already in VLAN promisc, nothing to do */
+ if (test_bit(WX_FLAG_VLAN_PROMISC, wx->flags))
+ return;
+ /* Set flag so we don't redo unnecessary work */
+ set_bit(WX_FLAG_VLAN_PROMISC, wx->flags);
+ /* Add PF to all active pools */
+ for (i = WX_PSR_VLAN_SWC_ENTRIES; --i;) {
+ wr32(wx, WX_PSR_VLAN_SWC_IDX, i);
+ vind = WX_VF_IND_SHIFT(VMDQ_P(0));
+ reg_idx = WX_VF_REG_OFFSET(VMDQ_P(0));
+ bits = rd32(wx, WX_PSR_VLAN_SWC_VM(reg_idx));
+ bits |= BIT(vind);
+ wr32(wx, WX_PSR_VLAN_SWC_VM(reg_idx), bits);
+ }
+ /* Set all bits in the VLAN filter table array */
+ for (i = 0; i < wx->mac.vft_size; i++)
+ wr32(wx, WX_PSR_VLAN_TBL(i), U32_MAX);
+}
+
+static void wx_scrub_vfta(struct wx *wx)
+{
+ u32 i, vid, bits, vfta, vind, vlvf, reg_idx;
+
+ for (i = WX_PSR_VLAN_SWC_ENTRIES; --i;) {
+ wr32(wx, WX_PSR_VLAN_SWC_IDX, i);
+ vlvf = rd32(wx, WX_PSR_VLAN_SWC_IDX);
+ /* pull VLAN ID from VLVF */
+ vid = vlvf & ~WX_PSR_VLAN_SWC_VIEN;
+ if (vlvf & WX_PSR_VLAN_SWC_VIEN) {
+ /* if PF is part of this then continue */
+ if (test_bit(vid, wx->active_vlans))
+ continue;
+ }
+ /* remove PF from the pool */
+ vind = WX_VF_IND_SHIFT(VMDQ_P(0));
+ reg_idx = WX_VF_REG_OFFSET(VMDQ_P(0));
+ bits = rd32(wx, WX_PSR_VLAN_SWC_VM(reg_idx));
+ bits &= ~BIT(vind);
+ wr32(wx, WX_PSR_VLAN_SWC_VM(reg_idx), bits);
+ }
+ /* extract values from vft_shadow and write back to VFTA */
+ for (i = 0; i < wx->mac.vft_size; i++) {
+ vfta = wx->mac.vft_shadow[i];
+ wr32(wx, WX_PSR_VLAN_TBL(i), vfta);
+ }
+}
+
+static void wx_vlan_promisc_disable(struct wx *wx)
+{
+ u32 vlnctrl;
+
+ /* configure vlan filtering */
+ vlnctrl = rd32(wx, WX_PSR_VLAN_CTL);
+ vlnctrl |= WX_PSR_VLAN_CTL_VFE;
+ wr32(wx, WX_PSR_VLAN_CTL, vlnctrl);
+ /* We are not in VLAN promisc, nothing to do */
+ if (!test_bit(WX_FLAG_VLAN_PROMISC, wx->flags))
+ return;
+ /* Set flag so we don't redo unnecessary work */
+ clear_bit(WX_FLAG_VLAN_PROMISC, wx->flags);
+ wx_scrub_vfta(wx);
+}
+
void wx_set_rx_mode(struct net_device *netdev)
{
struct wx *wx = netdev_priv(netdev);
@@ -1478,7 +1704,7 @@ void wx_set_rx_mode(struct net_device *netdev)
/* Check for Promiscuous and All Multicast modes */
fctrl = rd32(wx, WX_PSR_CTL);
fctrl &= ~(WX_PSR_CTL_UPE | WX_PSR_CTL_MPE);
- vmolr = rd32(wx, WX_PSR_VM_L2CTL(0));
+ vmolr = rd32(wx, WX_PSR_VM_L2CTL(VMDQ_P(0)));
vmolr &= ~(WX_PSR_VM_L2CTL_UPE |
WX_PSR_VM_L2CTL_MPE |
WX_PSR_VM_L2CTL_ROPE |
@@ -1499,7 +1725,10 @@ void wx_set_rx_mode(struct net_device *netdev)
fctrl |= WX_PSR_CTL_UPE | WX_PSR_CTL_MPE;
/* pf don't want packets routing to vf, so clear UPE */
vmolr |= WX_PSR_VM_L2CTL_MPE;
- vlnctrl &= ~WX_PSR_VLAN_CTL_VFE;
+ if (test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags) &&
+ test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags))
+ vlnctrl |= WX_PSR_VLAN_CTL_VFE;
+ features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
}
if (netdev->flags & IFF_ALLMULTI) {
@@ -1522,7 +1751,7 @@ void wx_set_rx_mode(struct net_device *netdev)
* sufficient space to store all the addresses then enable
* unicast promiscuous mode
*/
- count = wx_write_uc_addr_list(netdev, 0);
+ count = wx_write_uc_addr_list(netdev, VMDQ_P(0));
if (count < 0) {
vmolr &= ~WX_PSR_VM_L2CTL_ROPE;
vmolr |= WX_PSR_VM_L2CTL_UPE;
@@ -1540,7 +1769,7 @@ void wx_set_rx_mode(struct net_device *netdev)
wr32(wx, WX_PSR_VLAN_CTL, vlnctrl);
wr32(wx, WX_PSR_CTL, fctrl);
- wr32(wx, WX_PSR_VM_L2CTL(0), vmolr);
+ wr32(wx, WX_PSR_VM_L2CTL(VMDQ_P(0)), vmolr);
if ((features & NETIF_F_HW_VLAN_CTAG_RX) &&
(features & NETIF_F_HW_VLAN_STAG_RX))
@@ -1548,6 +1777,10 @@ void wx_set_rx_mode(struct net_device *netdev)
else
wx_vlan_strip_control(wx, false);
+ if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
+ wx_vlan_promisc_disable(wx);
+ else
+ wx_vlan_promisc_enable(wx);
}
EXPORT_SYMBOL(wx_set_rx_mode);
@@ -1797,6 +2030,13 @@ static void wx_setup_reta(struct wx *wx)
u32 random_key_size = WX_RSS_KEY_SIZE / 4;
u32 i, j;
+ if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags)) {
+ if (wx->mac.type == wx_mac_em)
+ rss_i = 1;
+ else
+ rss_i = rss_i < 4 ? 4 : rss_i;
+ }
+
/* Fill out hash function seeds */
for (i = 0; i < random_key_size; i++)
wr32(wx, WX_RDB_RSSRK(i), wx->rss_key[i]);
@@ -1814,10 +2054,42 @@ static void wx_setup_reta(struct wx *wx)
wx_store_reta(wx);
}
+#define WX_RDB_RSS_PL_2 FIELD_PREP(GENMASK(31, 29), 1)
+#define WX_RDB_RSS_PL_4 FIELD_PREP(GENMASK(31, 29), 2)
+static void wx_setup_psrtype(struct wx *wx)
+{
+ int rss_i = wx->ring_feature[RING_F_RSS].indices;
+ u32 psrtype;
+ int pool;
+
+ psrtype = WX_RDB_PL_CFG_L4HDR |
+ WX_RDB_PL_CFG_L3HDR |
+ WX_RDB_PL_CFG_L2HDR |
+ WX_RDB_PL_CFG_TUN_OUTL2HDR |
+ WX_RDB_PL_CFG_TUN_TUNHDR;
+
+ if (wx->mac.type == wx_mac_em) {
+ for_each_set_bit(pool, &wx->fwd_bitmask, 8)
+ wr32(wx, WX_RDB_PL_CFG(VMDQ_P(pool)), psrtype);
+ } else {
+ if (rss_i > 3)
+ psrtype |= WX_RDB_RSS_PL_4;
+ else if (rss_i > 1)
+ psrtype |= WX_RDB_RSS_PL_2;
+
+ for_each_set_bit(pool, &wx->fwd_bitmask, 32)
+ wr32(wx, WX_RDB_PL_CFG(VMDQ_P(pool)), psrtype);
+ }
+}
+
static void wx_setup_mrqc(struct wx *wx)
{
u32 rss_field = 0;
+ /* VT, and RSS do not coexist at the same time */
+ if (test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags))
+ return;
+
/* Disable indicating checksum in descriptor, enables RSS hash */
wr32m(wx, WX_PSR_CTL, WX_PSR_CTL_PCSD, WX_PSR_CTL_PCSD);
@@ -1847,16 +2119,11 @@ static void wx_setup_mrqc(struct wx *wx)
**/
void wx_configure_rx(struct wx *wx)
{
- u32 psrtype, i;
int ret;
+ u32 i;
wx_disable_rx(wx);
-
- psrtype = WX_RDB_PL_CFG_L4HDR |
- WX_RDB_PL_CFG_L3HDR |
- WX_RDB_PL_CFG_L2HDR |
- WX_RDB_PL_CFG_TUN_TUNHDR;
- wr32(wx, WX_RDB_PL_CFG(0), psrtype);
+ wx_setup_psrtype(wx);
/* enable hw crc stripping */
wr32m(wx, WX_RSC_CTL, WX_RSC_CTL_CRC_STRIP, WX_RSC_CTL_CRC_STRIP);
@@ -1904,6 +2171,7 @@ void wx_configure(struct wx *wx)
{
wx_set_rxpba(wx);
wx_pbthresh_setup(wx);
+ wx_configure_virtualization(wx);
wx_configure_port(wx);
wx_set_rx_mode(wx->netdev);
@@ -2262,7 +2530,7 @@ static int wx_set_vlvf(struct wx *wx, u32 vlan, u32 vind, bool vlan_on,
*
* Turn on/off specified VLAN in the VLAN filter table.
**/
-static int wx_set_vfta(struct wx *wx, u32 vlan, u32 vind, bool vlan_on)
+int wx_set_vfta(struct wx *wx, u32 vlan, u32 vind, bool vlan_on)
{
u32 bitindex, vfta, targetbit;
bool vfta_changed = false;
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
index b883342bb576..91c1d6135045 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
@@ -26,9 +26,12 @@ void wx_init_eeprom_params(struct wx *wx);
void wx_get_mac_addr(struct wx *wx, u8 *mac_addr);
void wx_init_rx_addrs(struct wx *wx);
void wx_mac_set_default_filter(struct wx *wx, u8 *addr);
+int wx_add_mac_filter(struct wx *wx, u8 *addr, u16 pool);
+int wx_del_mac_filter(struct wx *wx, u8 *addr, u16 pool);
void wx_flush_sw_mac_table(struct wx *wx);
int wx_set_mac(struct net_device *netdev, void *p);
void wx_disable_rx(struct wx *wx);
+int wx_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
int wx_disable_sec_rx_path(struct wx *wx);
void wx_enable_sec_rx_path(struct wx *wx);
void wx_set_rx_mode(struct net_device *netdev);
@@ -42,6 +45,7 @@ int wx_stop_adapter(struct wx *wx);
void wx_reset_misc(struct wx *wx);
int wx_get_pcie_msix_counts(struct wx *wx, u16 *msix_count, u16 max_msix_count);
int wx_sw_init(struct wx *wx);
+int wx_set_vfta(struct wx *wx, u32 vlan, u32 vind, bool vlan_on);
int wx_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid);
int wx_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid);
int wx_fc_enable(struct wx *wx, bool tx_pause, bool rx_pause);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index e69eaa65e0de..18422b940dbe 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -1620,6 +1620,65 @@ void wx_napi_disable_all(struct wx *wx)
}
EXPORT_SYMBOL(wx_napi_disable_all);
+static bool wx_set_vmdq_queues(struct wx *wx)
+{
+ u16 vmdq_i = wx->ring_feature[RING_F_VMDQ].limit;
+ u16 rss_i = wx->ring_feature[RING_F_RSS].limit;
+ u16 rss_m = WX_RSS_DISABLED_MASK;
+ u16 vmdq_m = 0;
+
+ /* only proceed if VMDq is enabled */
+ if (!test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags))
+ return false;
+ /* Add starting offset to total pool count */
+ vmdq_i += wx->ring_feature[RING_F_VMDQ].offset;
+
+ if (wx->mac.type == wx_mac_sp) {
+ /* double check we are limited to maximum pools */
+ vmdq_i = min_t(u16, 64, vmdq_i);
+
+ /* 64 pool mode with 2 queues per pool, or
+ * 16/32/64 pool mode with 1 queue per pool
+ */
+ if (vmdq_i > 32 || rss_i < 4) {
+ vmdq_m = WX_VMDQ_2Q_MASK;
+ rss_m = WX_RSS_2Q_MASK;
+ rss_i = min_t(u16, rss_i, 2);
+ /* 32 pool mode with 4 queues per pool */
+ } else {
+ vmdq_m = WX_VMDQ_4Q_MASK;
+ rss_m = WX_RSS_4Q_MASK;
+ rss_i = 4;
+ }
+ } else {
+ /* double check we are limited to maximum pools */
+ vmdq_i = min_t(u16, 8, vmdq_i);
+
+ /* when VMDQ on, disable RSS */
+ rss_i = 1;
+ }
+
+ /* remove the starting offset from the pool count */
+ vmdq_i -= wx->ring_feature[RING_F_VMDQ].offset;
+
+ /* save features for later use */
+ wx->ring_feature[RING_F_VMDQ].indices = vmdq_i;
+ wx->ring_feature[RING_F_VMDQ].mask = vmdq_m;
+
+ /* limit RSS based on user input and save for later use */
+ wx->ring_feature[RING_F_RSS].indices = rss_i;
+ wx->ring_feature[RING_F_RSS].mask = rss_m;
+
+ wx->queues_per_pool = rss_i;/*maybe same to num_rx_queues_per_pool*/
+ wx->num_rx_pools = vmdq_i;
+ wx->num_rx_queues_per_pool = rss_i;
+
+ wx->num_rx_queues = vmdq_i * rss_i;
+ wx->num_tx_queues = vmdq_i * rss_i;
+
+ return true;
+}
+
/**
* wx_set_rss_queues: Allocate queues for RSS
* @wx: board private structure to initialize
@@ -1634,6 +1693,10 @@ static void wx_set_rss_queues(struct wx *wx)
/* set mask for 16 queue limit of RSS */
f = &wx->ring_feature[RING_F_RSS];
+ if (wx->mac.type == wx_mac_sp)
+ f->mask = WX_RSS_64Q_MASK;
+ else
+ f->mask = WX_RSS_8Q_MASK;
f->indices = f->limit;
if (!(test_bit(WX_FLAG_FDIR_CAPABLE, wx->flags)))
@@ -1666,6 +1729,9 @@ static void wx_set_num_queues(struct wx *wx)
wx->num_tx_queues = 1;
wx->queues_per_pool = 1;
+ if (wx_set_vmdq_queues(wx))
+ return;
+
wx_set_rss_queues(wx);
}
@@ -1746,6 +1812,10 @@ static int wx_set_interrupt_capability(struct wx *wx)
if (ret == 0 || (ret == -ENOMEM))
return ret;
+ /* Disable VMDq support */
+ dev_warn(&wx->pdev->dev, "Disabling VMQQ support\n");
+ clear_bit(WX_FLAG_VMDQ_ENABLED, wx->flags);
+
/* Disable RSS */
dev_warn(&wx->pdev->dev, "Disabling RSS support\n");
wx->ring_feature[RING_F_RSS].limit = 1;
@@ -1772,6 +1842,49 @@ static int wx_set_interrupt_capability(struct wx *wx)
return 0;
}
+static bool wx_cache_ring_vmdq(struct wx *wx)
+{
+ struct wx_ring_feature *vmdq = &wx->ring_feature[RING_F_VMDQ];
+ struct wx_ring_feature *rss = &wx->ring_feature[RING_F_RSS];
+ u16 reg_idx;
+ int i;
+
+ /* only proceed if VMDq is enabled */
+ if (!test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags))
+ return false;
+
+ if (wx->mac.type == wx_mac_sp) {
+ /* start at VMDq register offset for SR-IOV enabled setups */
+ reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
+ for (i = 0; i < wx->num_rx_queues; i++, reg_idx++) {
+ /* If we are greater than indices move to next pool */
+ if ((reg_idx & ~vmdq->mask) >= rss->indices)
+ reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
+ wx->rx_ring[i]->reg_idx = reg_idx;
+ }
+ reg_idx = vmdq->offset * __ALIGN_MASK(1, ~vmdq->mask);
+ for (i = 0; i < wx->num_tx_queues; i++, reg_idx++) {
+ /* If we are greater than indices move to next pool */
+ if ((reg_idx & rss->mask) >= rss->indices)
+ reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
+ wx->tx_ring[i]->reg_idx = reg_idx;
+ }
+ } else {
+ /* start at VMDq register offset for SR-IOV enabled setups */
+ reg_idx = vmdq->offset;
+ for (i = 0; i < wx->num_rx_queues; i++)
+ /* If we are greater than indices move to next pool */
+ wx->rx_ring[i]->reg_idx = reg_idx + i;
+
+ reg_idx = vmdq->offset;
+ for (i = 0; i < wx->num_tx_queues; i++)
+ /* If we are greater than indices move to next pool */
+ wx->tx_ring[i]->reg_idx = reg_idx + i;
+ }
+
+ return true;
+}
+
/**
* wx_cache_ring_rss - Descriptor ring to register mapping for RSS
* @wx: board private structure to initialize
@@ -1783,6 +1896,9 @@ static void wx_cache_ring_rss(struct wx *wx)
{
u16 i;
+ if (wx_cache_ring_vmdq(wx))
+ return;
+
for (i = 0; i < wx->num_rx_queues; i++)
wx->rx_ring[i]->reg_idx = i;
@@ -2181,7 +2297,8 @@ static void wx_set_ivar(struct wx *wx, s8 direction,
wr32(wx, WX_PX_MISC_IVAR, ivar);
} else {
/* tx or rx causes */
- msix_vector += 1; /* offset for queue vectors */
+ if (!(wx->mac.type == wx_mac_em && wx->num_vfs == 7))
+ msix_vector += 1; /* offset for queue vectors */
msix_vector |= WX_PX_IVAR_ALLOC_VAL;
index = ((16 * (queue & 1)) + (8 * direction));
ivar = rd32(wx, WX_PX_IVAR(queue >> 1));
@@ -2233,10 +2350,17 @@ void wx_configure_vectors(struct wx *wx)
{
struct pci_dev *pdev = wx->pdev;
u32 eitrsel = 0;
- u16 v_idx;
+ u16 v_idx, i;
if (pdev->msix_enabled) {
/* Populate MSIX to EITR Select */
+ if (wx->mac.type == wx_mac_sp) {
+ if (wx->num_vfs >= 32)
+ eitrsel = BIT(wx->num_vfs % 32) - 1;
+ } else if (wx->mac.type == wx_mac_em) {
+ for (i = 0; i < wx->num_vfs; i++)
+ eitrsel |= BIT(i);
+ }
wr32(wx, WX_PX_ITRSEL, eitrsel);
/* use EIAM to auto-mask when MSI-X interrupt is asserted
* this saves a register write for every interrupt
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_mbx.c b/drivers/net/ethernet/wangxun/libwx/wx_mbx.c
new file mode 100644
index 000000000000..73af5f11c3bd
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/libwx/wx_mbx.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */
+
+#include <linux/pci.h>
+#include "wx_type.h"
+#include "wx_mbx.h"
+
+/**
+ * wx_obtain_mbx_lock_pf - obtain mailbox lock
+ * @wx: pointer to the HW structure
+ * @vf: the VF index
+ *
+ * Return: return 0 on success and -EBUSY on failure
+ **/
+static int wx_obtain_mbx_lock_pf(struct wx *wx, u16 vf)
+{
+ int count = 5;
+ u32 mailbox;
+
+ while (count--) {
+ /* Take ownership of the buffer */
+ wr32(wx, WX_PXMAILBOX(vf), WX_PXMAILBOX_PFU);
+
+ /* reserve mailbox for vf use */
+ mailbox = rd32(wx, WX_PXMAILBOX(vf));
+ if (mailbox & WX_PXMAILBOX_PFU)
+ return 0;
+ else if (count)
+ udelay(10);
+ }
+ wx_err(wx, "Failed to obtain mailbox lock for PF%d", vf);
+
+ return -EBUSY;
+}
+
+static int wx_check_for_bit_pf(struct wx *wx, u32 mask, int index)
+{
+ u32 mbvficr = rd32(wx, WX_MBVFICR(index));
+
+ if (!(mbvficr & mask))
+ return -EBUSY;
+ wr32(wx, WX_MBVFICR(index), mask);
+
+ return 0;
+}
+
+/**
+ * wx_check_for_ack_pf - checks to see if the VF has acked
+ * @wx: pointer to the HW structure
+ * @vf: the VF index
+ *
+ * Return: return 0 if the VF has set the status bit or else -EBUSY
+ **/
+int wx_check_for_ack_pf(struct wx *wx, u16 vf)
+{
+ u32 index = vf / 16, vf_bit = vf % 16;
+
+ return wx_check_for_bit_pf(wx,
+ FIELD_PREP(WX_MBVFICR_VFACK_MASK,
+ BIT(vf_bit)),
+ index);
+}
+
+/**
+ * wx_check_for_msg_pf - checks to see if the VF has sent mail
+ * @wx: pointer to the HW structure
+ * @vf: the VF index
+ *
+ * Return: return 0 if the VF has got req bit or else -EBUSY
+ **/
+int wx_check_for_msg_pf(struct wx *wx, u16 vf)
+{
+ u32 index = vf / 16, vf_bit = vf % 16;
+
+ return wx_check_for_bit_pf(wx,
+ FIELD_PREP(WX_MBVFICR_VFREQ_MASK,
+ BIT(vf_bit)),
+ index);
+}
+
+/**
+ * wx_write_mbx_pf - Places a message in the mailbox
+ * @wx: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @vf: the VF index
+ *
+ * Return: return 0 on success and -EINVAL/-EBUSY on failure
+ **/
+int wx_write_mbx_pf(struct wx *wx, u32 *msg, u16 size, u16 vf)
+{
+ struct wx_mbx_info *mbx = &wx->mbx;
+ int ret, i;
+
+ /* mbx->size is up to 15 */
+ if (size > mbx->size) {
+ wx_err(wx, "Invalid mailbox message size %d", size);
+ return -EINVAL;
+ }
+
+ /* lock the mailbox to prevent pf/vf race condition */
+ ret = wx_obtain_mbx_lock_pf(wx, vf);
+ if (ret)
+ return ret;
+
+ /* flush msg and acks as we are overwriting the message buffer */
+ wx_check_for_msg_pf(wx, vf);
+ wx_check_for_ack_pf(wx, vf);
+
+ /* copy the caller specified message to the mailbox memory buffer */
+ for (i = 0; i < size; i++)
+ wr32a(wx, WX_PXMBMEM(vf), i, msg[i]);
+
+ /* Interrupt VF to tell it a message has been sent and release buffer */
+ /* set mirrored mailbox flags */
+ wr32a(wx, WX_PXMBMEM(vf), WX_VXMAILBOX_SIZE, WX_PXMAILBOX_STS);
+ wr32(wx, WX_PXMAILBOX(vf), WX_PXMAILBOX_STS);
+
+ return 0;
+}
+
+/**
+ * wx_read_mbx_pf - Read a message from the mailbox
+ * @wx: pointer to the HW structure
+ * @msg: The message buffer
+ * @size: Length of buffer
+ * @vf: the VF index
+ *
+ * Return: return 0 on success and -EBUSY on failure
+ **/
+int wx_read_mbx_pf(struct wx *wx, u32 *msg, u16 size, u16 vf)
+{
+ struct wx_mbx_info *mbx = &wx->mbx;
+ int ret;
+ u16 i;
+
+ /* limit read to size of mailbox and mbx->size is up to 15 */
+ if (size > mbx->size)
+ size = mbx->size;
+
+ /* lock the mailbox to prevent pf/vf race condition */
+ ret = wx_obtain_mbx_lock_pf(wx, vf);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < size; i++)
+ msg[i] = rd32a(wx, WX_PXMBMEM(vf), i);
+
+ /* Acknowledge the message and release buffer */
+ /* set mirrored mailbox flags */
+ wr32a(wx, WX_PXMBMEM(vf), WX_VXMAILBOX_SIZE, WX_PXMAILBOX_ACK);
+ wr32(wx, WX_PXMAILBOX(vf), WX_PXMAILBOX_ACK);
+
+ return 0;
+}
+
+/**
+ * wx_check_for_rst_pf - checks to see if the VF has reset
+ * @wx: pointer to the HW structure
+ * @vf: the VF index
+ *
+ * Return: return 0 on success and -EBUSY on failure
+ **/
+int wx_check_for_rst_pf(struct wx *wx, u16 vf)
+{
+ u32 reg_offset = WX_VF_REG_OFFSET(vf);
+ u32 vf_shift = WX_VF_IND_SHIFT(vf);
+ u32 vflre = 0;
+
+ vflre = rd32(wx, WX_VFLRE(reg_offset));
+ if (!(vflre & BIT(vf_shift)))
+ return -EBUSY;
+ wr32(wx, WX_VFLREC(reg_offset), BIT(vf_shift));
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_mbx.h b/drivers/net/ethernet/wangxun/libwx/wx_mbx.h
new file mode 100644
index 000000000000..05aae138dbc3
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/libwx/wx_mbx.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */
+#ifndef _WX_MBX_H_
+#define _WX_MBX_H_
+
+#define WX_VXMAILBOX_SIZE 15
+
+/* PF Registers */
+#define WX_PXMAILBOX(i) (0x600 + (4 * (i))) /* i=[0,63] */
+#define WX_PXMAILBOX_STS BIT(0) /* Initiate message send to VF */
+#define WX_PXMAILBOX_ACK BIT(1) /* Ack message recv'd from VF */
+#define WX_PXMAILBOX_PFU BIT(3) /* PF owns the mailbox buffer */
+
+#define WX_PXMBMEM(i) (0x5000 + (64 * (i))) /* i=[0,63] */
+
+#define WX_VFLRE(i) (0x4A0 + (4 * (i))) /* i=[0,1] */
+#define WX_VFLREC(i) (0x4A8 + (4 * (i))) /* i=[0,1] */
+
+/* SR-IOV specific macros */
+#define WX_MBVFICR(i) (0x480 + (4 * (i))) /* i=[0,3] */
+#define WX_MBVFICR_VFREQ_MASK GENMASK(15, 0)
+#define WX_MBVFICR_VFACK_MASK GENMASK(31, 16)
+
+#define WX_VT_MSGTYPE_ACK BIT(31)
+#define WX_VT_MSGTYPE_NACK BIT(30)
+#define WX_VT_MSGTYPE_CTS BIT(29)
+#define WX_VT_MSGINFO_SHIFT 16
+#define WX_VT_MSGINFO_MASK GENMASK(23, 16)
+
+enum wx_pfvf_api_rev {
+ wx_mbox_api_null,
+ wx_mbox_api_13 = 4, /* API version 1.3 */
+ wx_mbox_api_unknown, /* indicates that API version is not known */
+};
+
+/* mailbox API */
+#define WX_VF_RESET 0x01 /* VF requests reset */
+#define WX_VF_SET_MAC_ADDR 0x02 /* VF requests PF to set MAC addr */
+#define WX_VF_SET_MULTICAST 0x03 /* VF requests PF to set MC addr */
+#define WX_VF_SET_VLAN 0x04 /* VF requests PF to set VLAN */
+#define WX_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */
+#define WX_VF_SET_MACVLAN 0x06 /* VF requests PF unicast filter */
+#define WX_VF_API_NEGOTIATE 0x08 /* negotiate API version */
+#define WX_VF_GET_QUEUES 0x09 /* get queue configuration */
+#define WX_VF_GET_RETA 0x0a /* VF request for RETA */
+#define WX_VF_GET_RSS_KEY 0x0b /* get RSS key */
+#define WX_VF_UPDATE_XCAST_MODE 0x0c
+#define WX_VF_GET_LINK_STATE 0x10 /* get vf link state */
+#define WX_VF_GET_FW_VERSION 0x11 /* get fw version */
+
+#define WX_VF_BACKUP 0x8001 /* VF requests backup */
+
+#define WX_PF_CONTROL_MSG BIT(8) /* PF control message */
+#define WX_PF_NOFITY_VF_LINK_STATUS 0x1
+#define WX_PF_NOFITY_VF_NET_NOT_RUNNING BIT(31)
+
+#define WX_VF_TX_QUEUES 1 /* number of Tx queues supported */
+#define WX_VF_RX_QUEUES 2 /* number of Rx queues supported */
+#define WX_VF_TRANS_VLAN 3 /* Indication of port vlan */
+#define WX_VF_DEF_QUEUE 4 /* Default queue offset */
+
+#define WX_VF_PERMADDR_MSG_LEN 4
+
+enum wxvf_xcast_modes {
+ WXVF_XCAST_MODE_NONE = 0,
+ WXVF_XCAST_MODE_MULTI,
+ WXVF_XCAST_MODE_ALLMULTI,
+ WXVF_XCAST_MODE_PROMISC,
+};
+
+int wx_write_mbx_pf(struct wx *wx, u32 *msg, u16 size, u16 vf);
+int wx_read_mbx_pf(struct wx *wx, u32 *msg, u16 size, u16 vf);
+int wx_check_for_rst_pf(struct wx *wx, u16 mbx_id);
+int wx_check_for_msg_pf(struct wx *wx, u16 mbx_id);
+int wx_check_for_ack_pf(struct wx *wx, u16 mbx_id);
+
+#endif /* _WX_MBX_H_ */
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c
new file mode 100644
index 000000000000..52e6a6faf715
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c
@@ -0,0 +1,909 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */
+
+#include <linux/etherdevice.h>
+#include <linux/pci.h>
+
+#include "wx_type.h"
+#include "wx_hw.h"
+#include "wx_mbx.h"
+#include "wx_sriov.h"
+
+static void wx_vf_configuration(struct pci_dev *pdev, int event_mask)
+{
+ bool enable = !!WX_VF_ENABLE_CHECK(event_mask);
+ struct wx *wx = pci_get_drvdata(pdev);
+ u32 vfn = WX_VF_NUM_GET(event_mask);
+
+ if (enable)
+ eth_zero_addr(wx->vfinfo[vfn].vf_mac_addr);
+}
+
+static int wx_alloc_vf_macvlans(struct wx *wx, u8 num_vfs)
+{
+ struct vf_macvlans *mv_list;
+ int num_vf_macvlans, i;
+
+ /* Initialize list of VF macvlans */
+ INIT_LIST_HEAD(&wx->vf_mvs.mvlist);
+
+ num_vf_macvlans = wx->mac.num_rar_entries -
+ (WX_MAX_PF_MACVLANS + 1 + num_vfs);
+ if (!num_vf_macvlans)
+ return -EINVAL;
+
+ mv_list = kcalloc(num_vf_macvlans, sizeof(struct vf_macvlans),
+ GFP_KERNEL);
+ if (!mv_list)
+ return -ENOMEM;
+
+ for (i = 0; i < num_vf_macvlans; i++) {
+ mv_list[i].vf = -1;
+ mv_list[i].free = true;
+ list_add(&mv_list[i].mvlist, &wx->vf_mvs.mvlist);
+ }
+ wx->mv_list = mv_list;
+
+ return 0;
+}
+
+static void wx_sriov_clear_data(struct wx *wx)
+{
+ /* set num VFs to 0 to prevent access to vfinfo */
+ wx->num_vfs = 0;
+
+ /* free VF control structures */
+ kfree(wx->vfinfo);
+ wx->vfinfo = NULL;
+
+ /* free macvlan list */
+ kfree(wx->mv_list);
+ wx->mv_list = NULL;
+
+ /* set default pool back to 0 */
+ wr32m(wx, WX_PSR_VM_CTL, WX_PSR_VM_CTL_POOL_MASK, 0);
+ wx->ring_feature[RING_F_VMDQ].offset = 0;
+
+ clear_bit(WX_FLAG_SRIOV_ENABLED, wx->flags);
+ /* Disable VMDq flag so device will be set in NM mode */
+ if (wx->ring_feature[RING_F_VMDQ].limit == 1)
+ clear_bit(WX_FLAG_VMDQ_ENABLED, wx->flags);
+}
+
+static int __wx_enable_sriov(struct wx *wx, u8 num_vfs)
+{
+ int i, ret = 0;
+ u32 value = 0;
+
+ set_bit(WX_FLAG_SRIOV_ENABLED, wx->flags);
+ wx_err(wx, "SR-IOV enabled with %d VFs\n", num_vfs);
+
+ /* Enable VMDq flag so device will be set in VM mode */
+ set_bit(WX_FLAG_VMDQ_ENABLED, wx->flags);
+ if (!wx->ring_feature[RING_F_VMDQ].limit)
+ wx->ring_feature[RING_F_VMDQ].limit = 1;
+ wx->ring_feature[RING_F_VMDQ].offset = num_vfs;
+
+ wx->vfinfo = kcalloc(num_vfs, sizeof(struct vf_data_storage),
+ GFP_KERNEL);
+ if (!wx->vfinfo)
+ return -ENOMEM;
+
+ ret = wx_alloc_vf_macvlans(wx, num_vfs);
+ if (ret)
+ return ret;
+
+ /* Initialize default switching mode VEB */
+ wr32m(wx, WX_PSR_CTL, WX_PSR_CTL_SW_EN, WX_PSR_CTL_SW_EN);
+
+ for (i = 0; i < num_vfs; i++) {
+ /* enable spoof checking for all VFs */
+ wx->vfinfo[i].spoofchk_enabled = true;
+ wx->vfinfo[i].link_enable = true;
+ /* untrust all VFs */
+ wx->vfinfo[i].trusted = false;
+ /* set the default xcast mode */
+ wx->vfinfo[i].xcast_mode = WXVF_XCAST_MODE_NONE;
+ }
+
+ if (wx->mac.type == wx_mac_em) {
+ value = WX_CFG_PORT_CTL_NUM_VT_8;
+ } else {
+ if (num_vfs < 32)
+ value = WX_CFG_PORT_CTL_NUM_VT_32;
+ else
+ value = WX_CFG_PORT_CTL_NUM_VT_64;
+ }
+ wr32m(wx, WX_CFG_PORT_CTL,
+ WX_CFG_PORT_CTL_NUM_VT_MASK,
+ value);
+
+ return ret;
+}
+
+static void wx_sriov_reinit(struct wx *wx)
+{
+ rtnl_lock();
+ wx->setup_tc(wx->netdev, netdev_get_num_tc(wx->netdev));
+ rtnl_unlock();
+}
+
+void wx_disable_sriov(struct wx *wx)
+{
+ if (!pci_vfs_assigned(wx->pdev))
+ pci_disable_sriov(wx->pdev);
+ else
+ wx_err(wx, "Unloading driver while VFs are assigned.\n");
+
+ /* clear flags and free allloced data */
+ wx_sriov_clear_data(wx);
+}
+EXPORT_SYMBOL(wx_disable_sriov);
+
+static int wx_pci_sriov_enable(struct pci_dev *dev,
+ int num_vfs)
+{
+ struct wx *wx = pci_get_drvdata(dev);
+ int err = 0, i;
+
+ err = __wx_enable_sriov(wx, num_vfs);
+ if (err)
+ return err;
+
+ wx->num_vfs = num_vfs;
+ for (i = 0; i < wx->num_vfs; i++)
+ wx_vf_configuration(dev, (i | WX_VF_ENABLE));
+
+ /* reset before enabling SRIOV to avoid mailbox issues */
+ wx_sriov_reinit(wx);
+
+ err = pci_enable_sriov(dev, num_vfs);
+ if (err) {
+ wx_err(wx, "Failed to enable PCI sriov: %d\n", err);
+ goto err_out;
+ }
+
+ return num_vfs;
+err_out:
+ wx_sriov_clear_data(wx);
+ return err;
+}
+
+static void wx_pci_sriov_disable(struct pci_dev *dev)
+{
+ struct wx *wx = pci_get_drvdata(dev);
+
+ wx_disable_sriov(wx);
+ wx_sriov_reinit(wx);
+}
+
+int wx_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+ struct wx *wx = pci_get_drvdata(pdev);
+ int err;
+
+ if (!num_vfs) {
+ if (!pci_vfs_assigned(pdev)) {
+ wx_pci_sriov_disable(pdev);
+ return 0;
+ }
+
+ wx_err(wx, "can't free VFs because some are assigned to VMs.\n");
+ return -EBUSY;
+ }
+
+ err = wx_pci_sriov_enable(pdev, num_vfs);
+ if (err)
+ return err;
+
+ return num_vfs;
+}
+EXPORT_SYMBOL(wx_pci_sriov_configure);
+
+static int wx_set_vf_mac(struct wx *wx, u16 vf, const u8 *mac_addr)
+{
+ u8 hw_addr[ETH_ALEN];
+ int ret = 0;
+
+ ether_addr_copy(hw_addr, mac_addr);
+ wx_del_mac_filter(wx, wx->vfinfo[vf].vf_mac_addr, vf);
+ ret = wx_add_mac_filter(wx, hw_addr, vf);
+ if (ret >= 0)
+ ether_addr_copy(wx->vfinfo[vf].vf_mac_addr, mac_addr);
+ else
+ eth_zero_addr(wx->vfinfo[vf].vf_mac_addr);
+
+ return ret;
+}
+
+static void wx_set_vmolr(struct wx *wx, u16 vf, bool aupe)
+{
+ u32 vmolr = rd32(wx, WX_PSR_VM_L2CTL(vf));
+
+ vmolr |= WX_PSR_VM_L2CTL_BAM;
+ if (aupe)
+ vmolr |= WX_PSR_VM_L2CTL_AUPE;
+ else
+ vmolr &= ~WX_PSR_VM_L2CTL_AUPE;
+ wr32(wx, WX_PSR_VM_L2CTL(vf), vmolr);
+}
+
+static void wx_set_vmvir(struct wx *wx, u16 vid, u16 qos, u16 vf)
+{
+ u32 vmvir = vid | (qos << VLAN_PRIO_SHIFT) |
+ WX_TDM_VLAN_INS_VLANA_DEFAULT;
+
+ wr32(wx, WX_TDM_VLAN_INS(vf), vmvir);
+}
+
+static int wx_set_vf_vlan(struct wx *wx, int add, int vid, u16 vf)
+{
+ if (!vid && !add)
+ return 0;
+
+ return wx_set_vfta(wx, vid, vf, (bool)add);
+}
+
+static void wx_set_vlan_anti_spoofing(struct wx *wx, bool enable, int vf)
+{
+ u32 index = WX_VF_REG_OFFSET(vf), vf_bit = WX_VF_IND_SHIFT(vf);
+ u32 pfvfspoof;
+
+ pfvfspoof = rd32(wx, WX_TDM_VLAN_AS(index));
+ if (enable)
+ pfvfspoof |= BIT(vf_bit);
+ else
+ pfvfspoof &= ~BIT(vf_bit);
+ wr32(wx, WX_TDM_VLAN_AS(index), pfvfspoof);
+}
+
+static void wx_write_qde(struct wx *wx, u32 vf, u32 qde)
+{
+ struct wx_ring_feature *vmdq = &wx->ring_feature[RING_F_VMDQ];
+ u32 q_per_pool = __ALIGN_MASK(1, ~vmdq->mask);
+ u32 reg = 0, n = vf * q_per_pool / 32;
+ u32 i = vf * q_per_pool;
+
+ reg = rd32(wx, WX_RDM_PF_QDE(n));
+ for (i = (vf * q_per_pool - n * 32);
+ i < ((vf + 1) * q_per_pool - n * 32);
+ i++) {
+ if (qde == 1)
+ reg |= qde << i;
+ else
+ reg &= qde << i;
+ }
+
+ wr32(wx, WX_RDM_PF_QDE(n), reg);
+}
+
+static void wx_clear_vmvir(struct wx *wx, u32 vf)
+{
+ wr32(wx, WX_TDM_VLAN_INS(vf), 0);
+}
+
+static void wx_ping_vf(struct wx *wx, int vf)
+{
+ u32 ping = WX_PF_CONTROL_MSG;
+
+ if (wx->vfinfo[vf].clear_to_send)
+ ping |= WX_VT_MSGTYPE_CTS;
+ wx_write_mbx_pf(wx, &ping, 1, vf);
+}
+
+static void wx_set_vf_rx_tx(struct wx *wx, int vf)
+{
+ u32 index = WX_VF_REG_OFFSET(vf), vf_bit = WX_VF_IND_SHIFT(vf);
+ u32 reg_cur_tx, reg_cur_rx, reg_req_tx, reg_req_rx;
+
+ reg_cur_tx = rd32(wx, WX_TDM_VF_TE(index));
+ reg_cur_rx = rd32(wx, WX_RDM_VF_RE(index));
+
+ if (wx->vfinfo[vf].link_enable) {
+ reg_req_tx = reg_cur_tx | BIT(vf_bit);
+ reg_req_rx = reg_cur_rx | BIT(vf_bit);
+ /* Enable particular VF */
+ if (reg_cur_tx != reg_req_tx)
+ wr32(wx, WX_TDM_VF_TE(index), reg_req_tx);
+ if (reg_cur_rx != reg_req_rx)
+ wr32(wx, WX_RDM_VF_RE(index), reg_req_rx);
+ } else {
+ reg_req_tx = BIT(vf_bit);
+ reg_req_rx = BIT(vf_bit);
+ /* Disable particular VF */
+ if (reg_cur_tx & reg_req_tx)
+ wr32(wx, WX_TDM_VFTE_CLR(index), reg_req_tx);
+ if (reg_cur_rx & reg_req_rx)
+ wr32(wx, WX_RDM_VFRE_CLR(index), reg_req_rx);
+ }
+}
+
+static int wx_get_vf_queues(struct wx *wx, u32 *msgbuf, u32 vf)
+{
+ struct wx_ring_feature *vmdq = &wx->ring_feature[RING_F_VMDQ];
+ unsigned int default_tc = 0;
+
+ msgbuf[WX_VF_TX_QUEUES] = __ALIGN_MASK(1, ~vmdq->mask);
+ msgbuf[WX_VF_RX_QUEUES] = __ALIGN_MASK(1, ~vmdq->mask);
+
+ if (wx->vfinfo[vf].pf_vlan || wx->vfinfo[vf].pf_qos)
+ msgbuf[WX_VF_TRANS_VLAN] = 1;
+ else
+ msgbuf[WX_VF_TRANS_VLAN] = 0;
+
+ /* notify VF of default queue */
+ msgbuf[WX_VF_DEF_QUEUE] = default_tc;
+
+ return 0;
+}
+
+static void wx_vf_reset_event(struct wx *wx, u16 vf)
+{
+ struct vf_data_storage *vfinfo = &wx->vfinfo[vf];
+ u8 num_tcs = netdev_get_num_tc(wx->netdev);
+
+ /* add PF assigned VLAN */
+ wx_set_vf_vlan(wx, true, vfinfo->pf_vlan, vf);
+
+ /* reset offloads to defaults */
+ wx_set_vmolr(wx, vf, !vfinfo->pf_vlan);
+
+ /* set outgoing tags for VFs */
+ if (!vfinfo->pf_vlan && !vfinfo->pf_qos && !num_tcs) {
+ wx_clear_vmvir(wx, vf);
+ } else {
+ if (vfinfo->pf_qos || !num_tcs)
+ wx_set_vmvir(wx, vfinfo->pf_vlan,
+ vfinfo->pf_qos, vf);
+ else
+ wx_set_vmvir(wx, vfinfo->pf_vlan,
+ wx->default_up, vf);
+ }
+
+ /* reset multicast table array for vf */
+ wx->vfinfo[vf].num_vf_mc_hashes = 0;
+
+ /* Flush and reset the mta with the new values */
+ wx_set_rx_mode(wx->netdev);
+
+ wx_del_mac_filter(wx, wx->vfinfo[vf].vf_mac_addr, vf);
+ /* reset VF api back to unknown */
+ wx->vfinfo[vf].vf_api = wx_mbox_api_null;
+}
+
+static void wx_vf_reset_msg(struct wx *wx, u16 vf)
+{
+ const u8 *vf_mac = wx->vfinfo[vf].vf_mac_addr;
+ struct net_device *dev = wx->netdev;
+ u32 msgbuf[5] = {0, 0, 0, 0, 0};
+ u8 *addr = (u8 *)(&msgbuf[1]);
+ u32 reg = 0, index, vf_bit;
+ int pf_max_frame;
+
+ /* reset the filters for the device */
+ wx_vf_reset_event(wx, vf);
+
+ /* set vf mac address */
+ if (!is_zero_ether_addr(vf_mac))
+ wx_set_vf_mac(wx, vf, vf_mac);
+
+ index = WX_VF_REG_OFFSET(vf);
+ vf_bit = WX_VF_IND_SHIFT(vf);
+
+ /* force drop enable for all VF Rx queues */
+ wx_write_qde(wx, vf, 1);
+
+ /* set transmit and receive for vf */
+ wx_set_vf_rx_tx(wx, vf);
+
+ pf_max_frame = dev->mtu + ETH_HLEN;
+
+ if (pf_max_frame > ETH_FRAME_LEN)
+ reg = BIT(vf_bit);
+ wr32(wx, WX_RDM_VFRE_CLR(index), reg);
+
+ /* enable VF mailbox for further messages */
+ wx->vfinfo[vf].clear_to_send = true;
+
+ /* reply to reset with ack and vf mac address */
+ msgbuf[0] = WX_VF_RESET;
+ if (!is_zero_ether_addr(vf_mac)) {
+ msgbuf[0] |= WX_VT_MSGTYPE_ACK;
+ memcpy(addr, vf_mac, ETH_ALEN);
+ } else {
+ msgbuf[0] |= WX_VT_MSGTYPE_NACK;
+ wx_err(wx, "VF %d has no MAC address assigned", vf);
+ }
+
+ msgbuf[3] = wx->mac.mc_filter_type;
+ wx_write_mbx_pf(wx, msgbuf, WX_VF_PERMADDR_MSG_LEN, vf);
+}
+
+static int wx_set_vf_mac_addr(struct wx *wx, u32 *msgbuf, u16 vf)
+{
+ const u8 *new_mac = ((u8 *)(&msgbuf[1]));
+ int ret;
+
+ if (!is_valid_ether_addr(new_mac)) {
+ wx_err(wx, "VF %d attempted to set invalid mac\n", vf);
+ return -EINVAL;
+ }
+
+ if (wx->vfinfo[vf].pf_set_mac &&
+ memcmp(wx->vfinfo[vf].vf_mac_addr, new_mac, ETH_ALEN)) {
+ wx_err(wx,
+ "VF %d attempt to set a MAC but it already had a MAC.",
+ vf);
+ return -EBUSY;
+ }
+
+ ret = wx_set_vf_mac(wx, vf, new_mac);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static void wx_set_vf_multicasts(struct wx *wx, u32 *msgbuf, u32 vf)
+{
+ struct vf_data_storage *vfinfo = &wx->vfinfo[vf];
+ u16 entries = (msgbuf[0] & WX_VT_MSGINFO_MASK)
+ >> WX_VT_MSGINFO_SHIFT;
+ u32 vmolr = rd32(wx, WX_PSR_VM_L2CTL(vf));
+ u32 vector_bit, vector_reg, mta_reg, i;
+ u16 *hash_list = (u16 *)&msgbuf[1];
+
+ /* only so many hash values supported */
+ entries = min_t(u16, entries, WX_MAX_VF_MC_ENTRIES);
+ vfinfo->num_vf_mc_hashes = entries;
+
+ for (i = 0; i < entries; i++)
+ vfinfo->vf_mc_hashes[i] = hash_list[i];
+
+ for (i = 0; i < vfinfo->num_vf_mc_hashes; i++) {
+ vector_reg = WX_PSR_MC_TBL_REG(vfinfo->vf_mc_hashes[i]);
+ vector_bit = WX_PSR_MC_TBL_BIT(vfinfo->vf_mc_hashes[i]);
+ mta_reg = wx->mac.mta_shadow[vector_reg];
+ mta_reg |= BIT(vector_bit);
+ wx->mac.mta_shadow[vector_reg] = mta_reg;
+ wr32(wx, WX_PSR_MC_TBL(vector_reg), mta_reg);
+ }
+ vmolr |= WX_PSR_VM_L2CTL_ROMPE;
+ wr32(wx, WX_PSR_VM_L2CTL(vf), vmolr);
+}
+
+static void wx_set_vf_lpe(struct wx *wx, u32 max_frame, u32 vf)
+{
+ u32 index, vf_bit, vfre;
+ u32 max_frs, reg_val;
+
+ /* determine VF receive enable location */
+ index = WX_VF_REG_OFFSET(vf);
+ vf_bit = WX_VF_IND_SHIFT(vf);
+
+ vfre = rd32(wx, WX_RDM_VF_RE(index));
+ vfre |= BIT(vf_bit);
+ wr32(wx, WX_RDM_VF_RE(index), vfre);
+
+ /* pull current max frame size from hardware */
+ max_frs = DIV_ROUND_UP(max_frame, 1024);
+ reg_val = rd32(wx, WX_MAC_WDG_TIMEOUT) & WX_MAC_WDG_TIMEOUT_WTO_MASK;
+ if (max_frs > (reg_val + WX_MAC_WDG_TIMEOUT_WTO_DELTA))
+ wr32(wx, WX_MAC_WDG_TIMEOUT,
+ max_frs - WX_MAC_WDG_TIMEOUT_WTO_DELTA);
+}
+
+static int wx_find_vlvf_entry(struct wx *wx, u32 vlan)
+{
+ int regindex;
+ u32 vlvf;
+
+ /* short cut the special case */
+ if (vlan == 0)
+ return 0;
+
+ /* Search for the vlan id in the VLVF entries */
+ for (regindex = 1; regindex < WX_PSR_VLAN_SWC_ENTRIES; regindex++) {
+ wr32(wx, WX_PSR_VLAN_SWC_IDX, regindex);
+ vlvf = rd32(wx, WX_PSR_VLAN_SWC);
+ if ((vlvf & VLAN_VID_MASK) == vlan)
+ break;
+ }
+
+ /* Return a negative value if not found */
+ if (regindex >= WX_PSR_VLAN_SWC_ENTRIES)
+ regindex = -EINVAL;
+
+ return regindex;
+}
+
+static int wx_set_vf_macvlan(struct wx *wx,
+ u16 vf, int index, unsigned char *mac_addr)
+{
+ struct vf_macvlans *entry;
+ struct list_head *pos;
+ int retval = 0;
+
+ if (index <= 1) {
+ list_for_each(pos, &wx->vf_mvs.mvlist) {
+ entry = list_entry(pos, struct vf_macvlans, mvlist);
+ if (entry->vf == vf) {
+ entry->vf = -1;
+ entry->free = true;
+ entry->is_macvlan = false;
+ wx_del_mac_filter(wx, entry->vf_macvlan, vf);
+ }
+ }
+ }
+
+ if (!index)
+ return 0;
+
+ entry = NULL;
+ list_for_each(pos, &wx->vf_mvs.mvlist) {
+ entry = list_entry(pos, struct vf_macvlans, mvlist);
+ if (entry->free)
+ break;
+ }
+
+ if (!entry || !entry->free)
+ return -ENOSPC;
+
+ retval = wx_add_mac_filter(wx, mac_addr, vf);
+ if (retval >= 0) {
+ entry->free = false;
+ entry->is_macvlan = true;
+ entry->vf = vf;
+ memcpy(entry->vf_macvlan, mac_addr, ETH_ALEN);
+ }
+
+ return retval;
+}
+
+static int wx_set_vf_vlan_msg(struct wx *wx, u32 *msgbuf, u16 vf)
+{
+ int add = (msgbuf[0] & WX_VT_MSGINFO_MASK) >> WX_VT_MSGINFO_SHIFT;
+ int vid = (msgbuf[1] & WX_PSR_VLAN_SWC_VLANID_MASK);
+ int ret;
+
+ if (add)
+ wx->vfinfo[vf].vlan_count++;
+ else if (wx->vfinfo[vf].vlan_count)
+ wx->vfinfo[vf].vlan_count--;
+
+ /* in case of promiscuous mode any VLAN filter set for a VF must
+ * also have the PF pool added to it.
+ */
+ if (add && wx->netdev->flags & IFF_PROMISC)
+ wx_set_vf_vlan(wx, add, vid, VMDQ_P(0));
+
+ ret = wx_set_vf_vlan(wx, add, vid, vf);
+ if (!ret && wx->vfinfo[vf].spoofchk_enabled)
+ wx_set_vlan_anti_spoofing(wx, true, vf);
+
+ /* Go through all the checks to see if the VLAN filter should
+ * be wiped completely.
+ */
+ if (!add && wx->netdev->flags & IFF_PROMISC) {
+ u32 bits = 0, vlvf;
+ int reg_ndx;
+
+ reg_ndx = wx_find_vlvf_entry(wx, vid);
+ if (reg_ndx < 0)
+ return -ENOSPC;
+ wr32(wx, WX_PSR_VLAN_SWC_IDX, reg_ndx);
+ vlvf = rd32(wx, WX_PSR_VLAN_SWC);
+ /* See if any other pools are set for this VLAN filter
+ * entry other than the PF.
+ */
+ if (VMDQ_P(0) < 32) {
+ bits = rd32(wx, WX_PSR_VLAN_SWC_VM_L);
+ bits &= ~BIT(VMDQ_P(0));
+ if (wx->mac.type != wx_mac_em)
+ bits |= rd32(wx, WX_PSR_VLAN_SWC_VM_H);
+ } else {
+ if (wx->mac.type != wx_mac_em)
+ bits = rd32(wx, WX_PSR_VLAN_SWC_VM_H);
+ bits &= ~BIT(VMDQ_P(0) % 32);
+ bits |= rd32(wx, WX_PSR_VLAN_SWC_VM_L);
+ }
+ /* If the filter was removed then ensure PF pool bit
+ * is cleared if the PF only added itself to the pool
+ * because the PF is in promiscuous mode.
+ */
+ if ((vlvf & VLAN_VID_MASK) == vid && !bits)
+ wx_set_vf_vlan(wx, add, vid, VMDQ_P(0));
+ }
+
+ return 0;
+}
+
+static int wx_set_vf_macvlan_msg(struct wx *wx, u32 *msgbuf, u16 vf)
+{
+ int index = (msgbuf[0] & WX_VT_MSGINFO_MASK) >>
+ WX_VT_MSGINFO_SHIFT;
+ u8 *new_mac = ((u8 *)(&msgbuf[1]));
+ int err;
+
+ if (wx->vfinfo[vf].pf_set_mac && index > 0) {
+ wx_err(wx, "VF %d request MACVLAN filter but is denied\n", vf);
+ return -EINVAL;
+ }
+
+ /* An non-zero index indicates the VF is setting a filter */
+ if (index) {
+ if (!is_valid_ether_addr(new_mac)) {
+ wx_err(wx, "VF %d attempted to set invalid mac\n", vf);
+ return -EINVAL;
+ }
+ /* If the VF is allowed to set MAC filters then turn off
+ * anti-spoofing to avoid false positives.
+ */
+ if (wx->vfinfo[vf].spoofchk_enabled)
+ wx_set_vf_spoofchk(wx->netdev, vf, false);
+ }
+
+ err = wx_set_vf_macvlan(wx, vf, index, new_mac);
+ if (err == -ENOSPC)
+ wx_err(wx,
+ "VF %d request MACVLAN filter but there is no space\n",
+ vf);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int wx_negotiate_vf_api(struct wx *wx, u32 *msgbuf, u32 vf)
+{
+ int api = msgbuf[1];
+
+ switch (api) {
+ case wx_mbox_api_13:
+ wx->vfinfo[vf].vf_api = api;
+ return 0;
+ default:
+ wx_err(wx, "VF %d requested invalid api version %u\n", vf, api);
+ return -EINVAL;
+ }
+}
+
+static int wx_get_vf_link_state(struct wx *wx, u32 *msgbuf, u32 vf)
+{
+ msgbuf[1] = wx->vfinfo[vf].link_enable;
+
+ return 0;
+}
+
+static int wx_get_fw_version(struct wx *wx, u32 *msgbuf, u32 vf)
+{
+ unsigned long fw_version = 0ULL;
+ int ret = 0;
+
+ ret = kstrtoul(wx->eeprom_id, 16, &fw_version);
+ if (ret)
+ return -EOPNOTSUPP;
+ msgbuf[1] = fw_version;
+
+ return 0;
+}
+
+static int wx_update_vf_xcast_mode(struct wx *wx, u32 *msgbuf, u32 vf)
+{
+ int xcast_mode = msgbuf[1];
+ u32 vmolr, disable, enable;
+
+ if (wx->vfinfo[vf].xcast_mode == xcast_mode)
+ return 0;
+
+ switch (xcast_mode) {
+ case WXVF_XCAST_MODE_NONE:
+ disable = WX_PSR_VM_L2CTL_BAM | WX_PSR_VM_L2CTL_ROMPE |
+ WX_PSR_VM_L2CTL_MPE | WX_PSR_VM_L2CTL_UPE |
+ WX_PSR_VM_L2CTL_VPE;
+ enable = 0;
+ break;
+ case WXVF_XCAST_MODE_MULTI:
+ disable = WX_PSR_VM_L2CTL_MPE | WX_PSR_VM_L2CTL_UPE |
+ WX_PSR_VM_L2CTL_VPE;
+ enable = WX_PSR_VM_L2CTL_BAM | WX_PSR_VM_L2CTL_ROMPE;
+ break;
+ case WXVF_XCAST_MODE_ALLMULTI:
+ disable = WX_PSR_VM_L2CTL_UPE | WX_PSR_VM_L2CTL_VPE;
+ enable = WX_PSR_VM_L2CTL_BAM | WX_PSR_VM_L2CTL_ROMPE |
+ WX_PSR_VM_L2CTL_MPE;
+ break;
+ case WXVF_XCAST_MODE_PROMISC:
+ disable = 0;
+ enable = WX_PSR_VM_L2CTL_BAM | WX_PSR_VM_L2CTL_ROMPE |
+ WX_PSR_VM_L2CTL_MPE | WX_PSR_VM_L2CTL_UPE |
+ WX_PSR_VM_L2CTL_VPE;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ vmolr = rd32(wx, WX_PSR_VM_L2CTL(vf));
+ vmolr &= ~disable;
+ vmolr |= enable;
+ wr32(wx, WX_PSR_VM_L2CTL(vf), vmolr);
+
+ wx->vfinfo[vf].xcast_mode = xcast_mode;
+ msgbuf[1] = xcast_mode;
+
+ return 0;
+}
+
+static void wx_rcv_msg_from_vf(struct wx *wx, u16 vf)
+{
+ u16 mbx_size = WX_VXMAILBOX_SIZE;
+ u32 msgbuf[WX_VXMAILBOX_SIZE];
+ int retval;
+
+ retval = wx_read_mbx_pf(wx, msgbuf, mbx_size, vf);
+ if (retval) {
+ wx_err(wx, "Error receiving message from VF\n");
+ return;
+ }
+
+ /* this is a message we already processed, do nothing */
+ if (msgbuf[0] & (WX_VT_MSGTYPE_ACK | WX_VT_MSGTYPE_NACK))
+ return;
+
+ if (msgbuf[0] == WX_VF_RESET) {
+ wx_vf_reset_msg(wx, vf);
+ return;
+ }
+
+ /* until the vf completes a virtual function reset it should not be
+ * allowed to start any configuration.
+ */
+ if (!wx->vfinfo[vf].clear_to_send) {
+ msgbuf[0] |= WX_VT_MSGTYPE_NACK;
+ wx_write_mbx_pf(wx, msgbuf, 1, vf);
+ return;
+ }
+
+ switch ((msgbuf[0] & U16_MAX)) {
+ case WX_VF_SET_MAC_ADDR:
+ retval = wx_set_vf_mac_addr(wx, msgbuf, vf);
+ break;
+ case WX_VF_SET_MULTICAST:
+ wx_set_vf_multicasts(wx, msgbuf, vf);
+ retval = 0;
+ break;
+ case WX_VF_SET_VLAN:
+ retval = wx_set_vf_vlan_msg(wx, msgbuf, vf);
+ break;
+ case WX_VF_SET_LPE:
+ wx_set_vf_lpe(wx, msgbuf[1], vf);
+ retval = 0;
+ break;
+ case WX_VF_SET_MACVLAN:
+ retval = wx_set_vf_macvlan_msg(wx, msgbuf, vf);
+ break;
+ case WX_VF_API_NEGOTIATE:
+ retval = wx_negotiate_vf_api(wx, msgbuf, vf);
+ break;
+ case WX_VF_GET_QUEUES:
+ retval = wx_get_vf_queues(wx, msgbuf, vf);
+ break;
+ case WX_VF_GET_LINK_STATE:
+ retval = wx_get_vf_link_state(wx, msgbuf, vf);
+ break;
+ case WX_VF_GET_FW_VERSION:
+ retval = wx_get_fw_version(wx, msgbuf, vf);
+ break;
+ case WX_VF_UPDATE_XCAST_MODE:
+ retval = wx_update_vf_xcast_mode(wx, msgbuf, vf);
+ break;
+ case WX_VF_BACKUP:
+ break;
+ default:
+ wx_err(wx, "Unhandled Msg %8.8x\n", msgbuf[0]);
+ break;
+ }
+
+ /* notify the VF of the results of what it sent us */
+ if (retval)
+ msgbuf[0] |= WX_VT_MSGTYPE_NACK;
+ else
+ msgbuf[0] |= WX_VT_MSGTYPE_ACK;
+
+ msgbuf[0] |= WX_VT_MSGTYPE_CTS;
+
+ wx_write_mbx_pf(wx, msgbuf, mbx_size, vf);
+}
+
+static void wx_rcv_ack_from_vf(struct wx *wx, u16 vf)
+{
+ u32 msg = WX_VT_MSGTYPE_NACK;
+
+ /* if device isn't clear to send it shouldn't be reading either */
+ if (!wx->vfinfo[vf].clear_to_send)
+ wx_write_mbx_pf(wx, &msg, 1, vf);
+}
+
+void wx_msg_task(struct wx *wx)
+{
+ u16 vf;
+
+ for (vf = 0; vf < wx->num_vfs; vf++) {
+ /* process any reset requests */
+ if (!wx_check_for_rst_pf(wx, vf))
+ wx_vf_reset_event(wx, vf);
+
+ /* process any messages pending */
+ if (!wx_check_for_msg_pf(wx, vf))
+ wx_rcv_msg_from_vf(wx, vf);
+
+ /* process any acks */
+ if (!wx_check_for_ack_pf(wx, vf))
+ wx_rcv_ack_from_vf(wx, vf);
+ }
+}
+EXPORT_SYMBOL(wx_msg_task);
+
+void wx_disable_vf_rx_tx(struct wx *wx)
+{
+ wr32(wx, WX_TDM_VFTE_CLR(0), U32_MAX);
+ wr32(wx, WX_RDM_VFRE_CLR(0), U32_MAX);
+ if (wx->mac.type != wx_mac_em) {
+ wr32(wx, WX_TDM_VFTE_CLR(1), U32_MAX);
+ wr32(wx, WX_RDM_VFRE_CLR(1), U32_MAX);
+ }
+}
+EXPORT_SYMBOL(wx_disable_vf_rx_tx);
+
+void wx_ping_all_vfs_with_link_status(struct wx *wx, bool link_up)
+{
+ u32 msgbuf[2] = {0, 0};
+ u16 i;
+
+ if (!wx->num_vfs)
+ return;
+ msgbuf[0] = WX_PF_NOFITY_VF_LINK_STATUS | WX_PF_CONTROL_MSG;
+ if (link_up)
+ msgbuf[1] = FIELD_PREP(GENMASK(31, 1), wx->speed) | link_up;
+ if (wx->notify_down)
+ msgbuf[1] |= WX_PF_NOFITY_VF_NET_NOT_RUNNING;
+ for (i = 0; i < wx->num_vfs; i++) {
+ if (wx->vfinfo[i].clear_to_send)
+ msgbuf[0] |= WX_VT_MSGTYPE_CTS;
+ wx_write_mbx_pf(wx, msgbuf, 2, i);
+ }
+}
+EXPORT_SYMBOL(wx_ping_all_vfs_with_link_status);
+
+static void wx_set_vf_link_state(struct wx *wx, int vf, int state)
+{
+ wx->vfinfo[vf].link_state = state;
+ switch (state) {
+ case IFLA_VF_LINK_STATE_AUTO:
+ if (netif_running(wx->netdev))
+ wx->vfinfo[vf].link_enable = true;
+ else
+ wx->vfinfo[vf].link_enable = false;
+ break;
+ case IFLA_VF_LINK_STATE_ENABLE:
+ wx->vfinfo[vf].link_enable = true;
+ break;
+ case IFLA_VF_LINK_STATE_DISABLE:
+ wx->vfinfo[vf].link_enable = false;
+ break;
+ }
+ /* restart the VF */
+ wx->vfinfo[vf].clear_to_send = false;
+ wx_ping_vf(wx, vf);
+
+ wx_set_vf_rx_tx(wx, vf);
+}
+
+void wx_set_all_vfs(struct wx *wx)
+{
+ int i;
+
+ for (i = 0; i < wx->num_vfs; i++)
+ wx_set_vf_link_state(wx, i, wx->vfinfo[i].link_state);
+}
+EXPORT_SYMBOL(wx_set_all_vfs);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.h b/drivers/net/ethernet/wangxun/libwx/wx_sriov.h
new file mode 100644
index 000000000000..8a3a47bb5815
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2015 - 2025 Beijing WangXun Technology Co., Ltd. */
+
+#ifndef _WX_SRIOV_H_
+#define _WX_SRIOV_H_
+
+#define WX_VF_ENABLE_CHECK(_m) FIELD_GET(BIT(31), (_m))
+#define WX_VF_NUM_GET(_m) FIELD_GET(GENMASK(5, 0), (_m))
+#define WX_VF_ENABLE BIT(31)
+
+void wx_disable_sriov(struct wx *wx);
+int wx_pci_sriov_configure(struct pci_dev *pdev, int num_vfs);
+void wx_msg_task(struct wx *wx);
+void wx_disable_vf_rx_tx(struct wx *wx);
+void wx_ping_all_vfs_with_link_status(struct wx *wx, bool link_up);
+void wx_set_all_vfs(struct wx *wx);
+
+#endif /* _WX_SRIOV_H_ */
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 4c545b2aa997..8cfed5d4ebf7 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -20,8 +20,13 @@
/* MSI-X capability fields masks */
#define WX_PCIE_MSIX_TBL_SZ_MASK 0x7FF
#define WX_PCI_LINK_STATUS 0xB2
+#define WX_MAX_PF_MACVLANS 15
+#define WX_MAX_VF_MC_ENTRIES 30
/**************** Global Registers ****************************/
+#define WX_VF_REG_OFFSET(_v) FIELD_GET(GENMASK(15, 5), (_v))
+#define WX_VF_IND_SHIFT(_v) FIELD_GET(GENMASK(4, 0), (_v))
+
/* chip control Registers */
#define WX_MIS_PWR 0x10000
#define WX_MIS_RST 0x1000C
@@ -76,6 +81,9 @@
#define WX_MAC_LXONOFFRXC 0x11E0C
/*********************** Receive DMA registers **************************/
+#define WX_RDM_VF_RE(_i) (0x12004 + ((_i) * 4))
+#define WX_RDM_PF_QDE(_i) (0x12080 + ((_i) * 4))
+#define WX_RDM_VFRE_CLR(_i) (0x120A0 + ((_i) * 4))
#define WX_RDM_DRP_PKT 0x12500
#define WX_RDM_PKT_CNT 0x12504
#define WX_RDM_BYTE_CNT_LSB 0x12508
@@ -84,12 +92,17 @@
/************************* Port Registers ************************************/
/* port cfg Registers */
#define WX_CFG_PORT_CTL 0x14400
+#define WX_CFG_PORT_CTL_PFRSTD BIT(14)
#define WX_CFG_PORT_CTL_DRV_LOAD BIT(3)
#define WX_CFG_PORT_CTL_QINQ BIT(2)
#define WX_CFG_PORT_CTL_D_VLAN BIT(0) /* double vlan*/
#define WX_CFG_TAG_TPID(_i) (0x14430 + ((_i) * 4))
#define WX_CFG_PORT_CTL_NUM_VT_MASK GENMASK(13, 12) /* number of TVs */
+#define WX_CFG_PORT_CTL_NUM_VT_NONE 0
+#define WX_CFG_PORT_CTL_NUM_VT_8 FIELD_PREP(GENMASK(13, 12), 1)
+#define WX_CFG_PORT_CTL_NUM_VT_32 FIELD_PREP(GENMASK(13, 12), 2)
+#define WX_CFG_PORT_CTL_NUM_VT_64 FIELD_PREP(GENMASK(13, 12), 3)
/* GPIO Registers */
#define WX_GPIO_DR 0x14800
@@ -112,6 +125,11 @@
/*********************** Transmit DMA registers **************************/
/* transmit global control */
#define WX_TDM_CTL 0x18000
+#define WX_TDM_VF_TE(_i) (0x18004 + ((_i) * 4))
+#define WX_TDM_MAC_AS(_i) (0x18060 + ((_i) * 4))
+#define WX_TDM_VLAN_AS(_i) (0x18070 + ((_i) * 4))
+#define WX_TDM_VFTE_CLR(_i) (0x180A0 + ((_i) * 4))
+
/* TDM CTL BIT */
#define WX_TDM_CTL_TE BIT(0) /* Transmit Enable */
#define WX_TDM_PB_THRE(_i) (0x18020 + ((_i) * 4))
@@ -165,6 +183,7 @@
/******************************* PSR Registers *******************************/
/* psr control */
#define WX_PSR_CTL 0x15000
+#define WX_PSR_VM_CTL 0x151B0
/* Header split receive */
#define WX_PSR_CTL_SW_EN BIT(18)
#define WX_PSR_CTL_RSC_ACK BIT(17)
@@ -201,12 +220,17 @@
#define WX_PSR_1588_CTL_VALID BIT(0)
/* mcasst/ucast overflow tbl */
#define WX_PSR_MC_TBL(_i) (0x15200 + ((_i) * 4))
+#define WX_PSR_MC_TBL_REG(_i) FIELD_GET(GENMASK(11, 5), (_i))
+#define WX_PSR_MC_TBL_BIT(_i) FIELD_GET(GENMASK(4, 0), (_i))
#define WX_PSR_UC_TBL(_i) (0x15400 + ((_i) * 4))
+#define WX_PSR_VM_CTL_REPLEN BIT(30) /* replication enabled */
+#define WX_PSR_VM_CTL_POOL_MASK GENMASK(12, 7)
/* VM L2 contorl */
#define WX_PSR_VM_L2CTL(_i) (0x15600 + ((_i) * 4))
#define WX_PSR_VM_L2CTL_UPE BIT(4) /* unicast promiscuous */
#define WX_PSR_VM_L2CTL_VACC BIT(6) /* accept nomatched vlan */
+#define WX_PSR_VM_L2CTL_VPE BIT(7) /* vlan promiscuous mode */
#define WX_PSR_VM_L2CTL_AUPE BIT(8) /* accept untagged packets */
#define WX_PSR_VM_L2CTL_ROMPE BIT(9) /* accept packets in MTA tbl */
#define WX_PSR_VM_L2CTL_ROPE BIT(10) /* accept packets in UC tbl */
@@ -245,10 +269,12 @@
#define WX_PSR_VLAN_SWC 0x16220
#define WX_PSR_VLAN_SWC_VM_L 0x16224
#define WX_PSR_VLAN_SWC_VM_H 0x16228
+#define WX_PSR_VLAN_SWC_VM(_i) (0x16224 + ((_i) * 4))
#define WX_PSR_VLAN_SWC_IDX 0x16230 /* 64 vlan entries */
/* VLAN pool filtering masks */
#define WX_PSR_VLAN_SWC_VIEN BIT(31) /* filter is valid */
#define WX_PSR_VLAN_SWC_ENTRIES 64
+#define WX_PSR_VLAN_SWC_VLANID_MASK GENMASK(11, 0)
/********************************* RSEC **************************************/
/* general rsec */
@@ -259,6 +285,13 @@
#define WX_RSC_ST 0x17004
#define WX_RSC_ST_RSEC_RDY BIT(0)
+/*********************** Transmit DMA registers **************************/
+/* transmit global control */
+#define WX_TDM_ETYPE_AS(_i) (0x18058 + ((_i) * 4))
+#define WX_TDM_VLAN_INS(_i) (0x18100 + ((_i) * 4))
+/* Per VF Port VLAN insertion rules */
+#define WX_TDM_VLAN_INS_VLANA_DEFAULT BIT(30) /* Always use default VLAN*/
+
/****************************** TDB ******************************************/
#define WX_TDB_PB_SZ(_i) (0x1CC00 + ((_i) * 4))
#define WX_TXPKT_SIZE_MAX 0xA /* Max Tx Packet size */
@@ -328,6 +361,9 @@
#define WX_MAC_WDG_TIMEOUT 0x1100C
#define WX_MAC_RX_FLOW_CTRL 0x11090
#define WX_MAC_RX_FLOW_CTRL_RFE BIT(0) /* receive fc enable */
+
+#define WX_MAC_WDG_TIMEOUT_WTO_MASK GENMASK(3, 0)
+#define WX_MAC_WDG_TIMEOUT_WTO_DELTA 2
/* MDIO Registers */
#define WX_MSCA 0x11200
#define WX_MSCA_RA(v) FIELD_PREP(U16_MAX, v)
@@ -417,6 +453,15 @@ enum WX_MSCA_CMD_value {
/* Number of 80 microseconds we wait for PCI Express master disable */
#define WX_PCI_MASTER_DISABLE_TIMEOUT 80000
+#define WX_RSS_64Q_MASK 0x3F
+#define WX_RSS_8Q_MASK 0x7
+#define WX_RSS_4Q_MASK 0x3
+#define WX_RSS_2Q_MASK 0x1
+#define WX_RSS_DISABLED_MASK 0x0
+
+#define WX_VMDQ_4Q_MASK 0x7C
+#define WX_VMDQ_2Q_MASK 0x7E
+
/****************** Manageablility Host Interface defines ********************/
#define WX_HI_MAX_BLOCK_BYTE_LENGTH 256 /* Num of bytes in range */
#define WX_HI_COMMAND_TIMEOUT 1000 /* Process HI command limit */
@@ -484,7 +529,7 @@ enum WX_MSCA_CMD_value {
#define WX_REQ_TX_DESCRIPTOR_MULTIPLE 128
#define WX_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */
-#define VMDQ_P(p) p
+#define VMDQ_P(p) ((p) + wx->ring_feature[RING_F_VMDQ].offset)
/* Supported Rx Buffer Sizes */
#define WX_RXBUFFER_256 256 /* Used for skb receive header */
@@ -778,6 +823,10 @@ struct wx_bus_info {
u16 device;
};
+struct wx_mbx_info {
+ u16 size;
+};
+
struct wx_thermal_sensor_data {
s16 temp;
s16 alarm_thresh;
@@ -1051,6 +1100,7 @@ struct wx_ring_feature {
enum wx_ring_f_enum {
RING_F_NONE = 0,
+ RING_F_VMDQ,
RING_F_RSS,
RING_F_FDIR,
RING_F_ARRAY_SIZE /* must be last in enum set */
@@ -1106,8 +1156,38 @@ enum wx_state {
WX_STATE_NBITS /* must be last */
};
+struct vf_data_storage {
+ struct pci_dev *vfdev;
+ unsigned char vf_mac_addr[ETH_ALEN];
+ bool spoofchk_enabled;
+ bool link_enable;
+ bool trusted;
+ int xcast_mode;
+ unsigned int vf_api;
+ bool clear_to_send;
+ u16 pf_vlan; /* When set, guest VLAN config not allowed. */
+ u16 pf_qos;
+ bool pf_set_mac;
+
+ u16 vf_mc_hashes[WX_MAX_VF_MC_ENTRIES];
+ u16 num_vf_mc_hashes;
+ u16 vlan_count;
+ int link_state;
+};
+
+struct vf_macvlans {
+ struct list_head mvlist;
+ int vf;
+ bool free;
+ bool is_macvlan;
+ u8 vf_macvlan[ETH_ALEN];
+};
+
enum wx_pf_flags {
WX_FLAG_SWFW_RING,
+ WX_FLAG_VMDQ_ENABLED,
+ WX_FLAG_VLAN_PROMISC,
+ WX_FLAG_SRIOV_ENABLED,
WX_FLAG_FDIR_CAPABLE,
WX_FLAG_FDIR_HASH,
WX_FLAG_FDIR_PERFECT,
@@ -1128,6 +1208,7 @@ struct wx {
struct pci_dev *pdev;
struct net_device *netdev;
struct wx_bus_info bus;
+ struct wx_mbx_info mbx;
struct wx_mac_info mac;
enum em_mac_type mac_type;
enum sp_media_type media_type;
@@ -1151,6 +1232,7 @@ struct wx {
u8 swfw_index;
/* PHY stuff */
+ bool notify_down;
unsigned int link;
int speed;
int duplex;
@@ -1182,6 +1264,8 @@ struct wx {
struct wx_ring *tx_ring[64] ____cacheline_aligned_in_smp;
struct wx_ring *rx_ring[64];
struct wx_q_vector *q_vector[64];
+ int num_rx_pools;
+ int num_rx_queues_per_pool;
unsigned int queues_per_pool;
struct msix_entry *msix_q_entries;
@@ -1203,6 +1287,7 @@ struct wx {
u32 wol;
u16 bd_number;
+ bool default_up;
struct wx_hw_stats stats;
u64 tx_busy;
@@ -1211,10 +1296,16 @@ struct wx {
u64 hw_csum_rx_good;
u64 hw_csum_rx_error;
u64 alloc_rx_buff_failed;
+ unsigned int num_vfs;
+ struct vf_data_storage *vfinfo;
+ struct vf_macvlans vf_mvs;
+ struct vf_macvlans *mv_list;
+ unsigned long fwd_bitmask;
u32 atr_sample_rate;
void (*atr)(struct wx_ring *ring, struct wx_tx_buffer *first, u8 ptype);
void (*configure_fdir)(struct wx *wx);
+ int (*setup_tc)(struct net_device *netdev, u8 tc);
void (*do_reset)(struct net_device *netdev);
int (*ptp_setup_sdp)(struct wx *wx);
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index 91b3055a5a9f..9c18203c8ce1 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -15,6 +15,8 @@
#include "../libwx/wx_hw.h"
#include "../libwx/wx_lib.h"
#include "../libwx/wx_ptp.h"
+#include "../libwx/wx_mbx.h"
+#include "../libwx/wx_sriov.h"
#include "ngbe_type.h"
#include "ngbe_mdio.h"
#include "ngbe_hw.h"
@@ -129,6 +131,10 @@ static int ngbe_sw_init(struct wx *wx)
wx->tx_work_limit = NGBE_DEFAULT_TX_WORK;
wx->rx_work_limit = NGBE_DEFAULT_RX_WORK;
+ wx->mbx.size = WX_VXMAILBOX_SIZE;
+ wx->setup_tc = ngbe_setup_tc;
+ set_bit(0, &wx->fwd_bitmask);
+
return 0;
}
@@ -200,12 +206,10 @@ static irqreturn_t ngbe_intr(int __always_unused irq, void *data)
return IRQ_HANDLED;
}
-static irqreturn_t ngbe_msix_other(int __always_unused irq, void *data)
+static irqreturn_t __ngbe_msix_misc(struct wx *wx, u32 eicr)
{
- struct wx *wx = data;
- u32 eicr;
-
- eicr = wx_misc_isb(wx, WX_ISB_MISC);
+ if (eicr & NGBE_PX_MISC_IC_VF_MBOX)
+ wx_msg_task(wx);
if (unlikely(eicr & NGBE_PX_MISC_IC_TIMESYNC))
wx_ptp_check_pps_event(wx);
@@ -217,6 +221,35 @@ static irqreturn_t ngbe_msix_other(int __always_unused irq, void *data)
return IRQ_HANDLED;
}
+static irqreturn_t ngbe_msix_misc(int __always_unused irq, void *data)
+{
+ struct wx *wx = data;
+ u32 eicr;
+
+ eicr = wx_misc_isb(wx, WX_ISB_MISC);
+
+ return __ngbe_msix_misc(wx, eicr);
+}
+
+static irqreturn_t ngbe_misc_and_queue(int __always_unused irq, void *data)
+{
+ struct wx_q_vector *q_vector;
+ struct wx *wx = data;
+ u32 eicr;
+
+ eicr = wx_misc_isb(wx, WX_ISB_MISC);
+ if (!eicr) {
+ /* queue */
+ q_vector = wx->q_vector[0];
+ napi_schedule_irqoff(&q_vector->napi);
+ if (netif_running(wx->netdev))
+ ngbe_irq_enable(wx, true);
+ return IRQ_HANDLED;
+ }
+
+ return __ngbe_msix_misc(wx, eicr);
+}
+
/**
* ngbe_request_msix_irqs - Initialize MSI-X interrupts
* @wx: board private structure
@@ -249,8 +282,16 @@ static int ngbe_request_msix_irqs(struct wx *wx)
}
}
- err = request_irq(wx->msix_entry->vector,
- ngbe_msix_other, 0, netdev->name, wx);
+ /* Due to hardware design, when num_vfs < 7, pf can use 0 for misc and 1
+ * for queue. But when num_vfs == 7, vector[1] is assigned to vf6.
+ * Misc and queue should reuse interrupt vector[0].
+ */
+ if (wx->num_vfs == 7)
+ err = request_irq(wx->msix_entry->vector,
+ ngbe_misc_and_queue, 0, netdev->name, wx);
+ else
+ err = request_irq(wx->msix_entry->vector,
+ ngbe_msix_misc, 0, netdev->name, wx);
if (err) {
wx_err(wx, "request_irq for msix_other failed: %d\n", err);
@@ -302,6 +343,22 @@ static void ngbe_disable_device(struct wx *wx)
struct net_device *netdev = wx->netdev;
u32 i;
+ if (wx->num_vfs) {
+ /* Clear EITR Select mapping */
+ wr32(wx, WX_PX_ITRSEL, 0);
+
+ /* Mark all the VFs as inactive */
+ for (i = 0; i < wx->num_vfs; i++)
+ wx->vfinfo[i].clear_to_send = 0;
+ wx->notify_down = true;
+ /* ping all the active vfs to let them know we are going down */
+ wx_ping_all_vfs_with_link_status(wx, false);
+ wx->notify_down = false;
+
+ /* Disable all VFTE/VFRE TX/RX */
+ wx_disable_vf_rx_tx(wx);
+ }
+
/* disable all enabled rx queues */
for (i = 0; i < wx->num_rx_queues; i++)
/* this call also flushes the previous write */
@@ -324,12 +381,19 @@ static void ngbe_disable_device(struct wx *wx)
wx_update_stats(wx);
}
+static void ngbe_reset(struct wx *wx)
+{
+ wx_flush_sw_mac_table(wx);
+ wx_mac_set_default_filter(wx, wx->mac.addr);
+ if (test_bit(WX_STATE_PTP_RUNNING, wx->state))
+ wx_ptp_reset(wx);
+}
+
void ngbe_down(struct wx *wx)
{
phylink_stop(wx->phylink);
ngbe_disable_device(wx);
- if (test_bit(WX_STATE_PTP_RUNNING, wx->state))
- wx_ptp_reset(wx);
+ ngbe_reset(wx);
wx_clean_all_tx_rings(wx);
wx_clean_all_rx_rings(wx);
}
@@ -352,6 +416,11 @@ void ngbe_up(struct wx *wx)
ngbe_sfp_modules_txrx_powerctl(wx, true);
phylink_start(wx->phylink);
+ /* Set PF Reset Done bit so PF/VF Mail Ops can work */
+ wr32m(wx, WX_CFG_PORT_CTL,
+ WX_CFG_PORT_CTL_PFRSTD, WX_CFG_PORT_CTL_PFRSTD);
+ if (wx->num_vfs)
+ wx_ping_all_vfs_with_link_status(wx, false);
}
/**
@@ -596,6 +665,10 @@ static int ngbe_probe(struct pci_dev *pdev,
goto err_pci_release_regions;
}
+ /* The emerald supports up to 8 VFs per pf, but physical
+ * function also need one pool for basic networking.
+ */
+ pci_sriov_set_totalvfs(pdev, NGBE_MAX_VFS_DRV_LIMIT);
wx->driver_name = ngbe_driver_name;
ngbe_set_ethtool_ops(netdev);
netdev->netdev_ops = &ngbe_netdev_ops;
@@ -744,6 +817,7 @@ static void ngbe_remove(struct pci_dev *pdev)
struct net_device *netdev;
netdev = wx->netdev;
+ wx_disable_sriov(wx);
unregister_netdev(netdev);
phylink_destroy(wx->phylink);
pci_release_selected_regions(pdev,
@@ -803,6 +877,7 @@ static struct pci_driver ngbe_driver = {
.suspend = ngbe_suspend,
.resume = ngbe_resume,
.shutdown = ngbe_shutdown,
+ .sriov_configure = wx_pci_sriov_configure,
};
module_pci_driver(ngbe_driver);
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
index ea1d7e9a91f3..c63bb6e6f405 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
@@ -9,6 +9,7 @@
#include "../libwx/wx_type.h"
#include "../libwx/wx_ptp.h"
#include "../libwx/wx_hw.h"
+#include "../libwx/wx_sriov.h"
#include "ngbe_type.h"
#include "ngbe_mdio.h"
@@ -70,6 +71,8 @@ static void ngbe_mac_link_down(struct phylink_config *config,
wx->speed = SPEED_UNKNOWN;
if (test_bit(WX_STATE_PTP_RUNNING, wx->state))
wx_ptp_reset_cyclecounter(wx);
+ /* ping all the active vfs to let them know we are going down */
+ wx_ping_all_vfs_with_link_status(wx, false);
}
static void ngbe_mac_link_up(struct phylink_config *config,
@@ -114,6 +117,8 @@ static void ngbe_mac_link_up(struct phylink_config *config,
wx->last_rx_ptp_check = jiffies;
if (test_bit(WX_STATE_PTP_RUNNING, wx->state))
wx_ptp_reset_cyclecounter(wx);
+ /* ping all the active vfs to let them know we are going up */
+ wx_ping_all_vfs_with_link_status(wx, true);
}
static const struct phylink_mac_ops ngbe_mac_ops = {
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
index 992adbb98c7d..bb74263f0498 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
@@ -73,12 +73,14 @@
#define NGBE_PX_MISC_IEN_TIMESYNC BIT(11)
#define NGBE_PX_MISC_IEN_ETH_LK BIT(18)
#define NGBE_PX_MISC_IEN_INT_ERR BIT(20)
+#define NGBE_PX_MISC_IC_VF_MBOX BIT(23)
#define NGBE_PX_MISC_IEN_GPIO BIT(26)
#define NGBE_PX_MISC_IEN_MASK ( \
NGBE_PX_MISC_IEN_DEV_RST | \
NGBE_PX_MISC_IEN_TIMESYNC | \
NGBE_PX_MISC_IEN_ETH_LK | \
NGBE_PX_MISC_IEN_INT_ERR | \
+ NGBE_PX_MISC_IC_VF_MBOX | \
NGBE_PX_MISC_IEN_GPIO)
/* Extended Interrupt Cause Read */
@@ -134,6 +136,7 @@
#define NGBE_MAX_RXD 8192
#define NGBE_MIN_RXD 128
+#define NGBE_MAX_VFS_DRV_LIMIT 7
extern char ngbe_driver_name[];
void ngbe_down(struct wx *wx);
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
index 8658a51ee810..3b9e831cf0ef 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_irq.c
@@ -7,6 +7,7 @@
#include "../libwx/wx_type.h"
#include "../libwx/wx_lib.h"
#include "../libwx/wx_hw.h"
+#include "../libwx/wx_sriov.h"
#include "txgbe_type.h"
#include "txgbe_phy.h"
#include "txgbe_irq.h"
@@ -109,8 +110,17 @@ static irqreturn_t txgbe_misc_irq_handle(int irq, void *data)
struct wx *wx = txgbe->wx;
u32 eicr;
- if (wx->pdev->msix_enabled)
+ if (wx->pdev->msix_enabled) {
+ eicr = wx_misc_isb(wx, WX_ISB_MISC);
+ if (!eicr)
+ return IRQ_NONE;
+ txgbe->eicr = eicr;
+ if (eicr & TXGBE_PX_MISC_IC_VF_MBOX) {
+ wx_msg_task(txgbe->wx);
+ wx_intr_enable(wx, TXGBE_INTR_MISC);
+ }
return IRQ_WAKE_THREAD;
+ }
eicr = wx_misc_isb(wx, WX_ISB_VEC0);
if (!eicr) {
@@ -129,6 +139,11 @@ static irqreturn_t txgbe_misc_irq_handle(int irq, void *data)
q_vector = wx->q_vector[0];
napi_schedule_irqoff(&q_vector->napi);
+ eicr = wx_misc_isb(wx, WX_ISB_MISC);
+ if (!eicr)
+ return IRQ_NONE;
+ txgbe->eicr = eicr;
+
return IRQ_WAKE_THREAD;
}
@@ -140,7 +155,7 @@ static irqreturn_t txgbe_misc_irq_thread_fn(int irq, void *data)
unsigned int sub_irq;
u32 eicr;
- eicr = wx_misc_isb(wx, WX_ISB_MISC);
+ eicr = txgbe->eicr;
if (eicr & (TXGBE_PX_MISC_ETH_LK | TXGBE_PX_MISC_ETH_LKDN |
TXGBE_PX_MISC_ETH_AN)) {
sub_irq = irq_find_mapping(txgbe->misc.domain, TXGBE_IRQ_LINK);
@@ -183,7 +198,7 @@ int txgbe_setup_misc_irq(struct txgbe *txgbe)
if (wx->mac.type == wx_mac_aml)
goto skip_sp_irq;
- txgbe->misc.nirqs = 1;
+ txgbe->misc.nirqs = TXGBE_IRQ_MAX;
txgbe->misc.domain = irq_domain_add_simple(NULL, txgbe->misc.nirqs, 0,
&txgbe_misc_irq_domain_ops, txgbe);
if (!txgbe->misc.domain)
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index 38206a46693b..d40db6b42200 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -15,6 +15,8 @@
#include "../libwx/wx_lib.h"
#include "../libwx/wx_ptp.h"
#include "../libwx/wx_hw.h"
+#include "../libwx/wx_mbx.h"
+#include "../libwx/wx_sriov.h"
#include "txgbe_type.h"
#include "txgbe_hw.h"
#include "txgbe_phy.h"
@@ -117,6 +119,12 @@ static void txgbe_up_complete(struct wx *wx)
/* enable transmits */
netif_tx_start_all_queues(netdev);
+
+ /* Set PF Reset Done bit so PF/VF Mail Ops can work */
+ wr32m(wx, WX_CFG_PORT_CTL, WX_CFG_PORT_CTL_PFRSTD,
+ WX_CFG_PORT_CTL_PFRSTD);
+ /* update setting rx tx for all active vfs */
+ wx_set_all_vfs(wx);
}
static void txgbe_reset(struct wx *wx)
@@ -165,6 +173,16 @@ static void txgbe_disable_device(struct wx *wx)
wx_err(wx, "%s: invalid bus lan id %d\n",
__func__, wx->bus.func);
+ if (wx->num_vfs) {
+ /* Clear EITR Select mapping */
+ wr32(wx, WX_PX_ITRSEL, 0);
+ /* Mark all the VFs as inactive */
+ for (i = 0; i < wx->num_vfs; i++)
+ wx->vfinfo[i].clear_to_send = 0;
+ /* update setting rx tx for all active vfs */
+ wx_set_all_vfs(wx);
+ }
+
if (!(((wx->subsystem_device_id & WX_NCSI_MASK) == WX_NCSI_SUP) ||
((wx->subsystem_device_id & WX_WOL_MASK) == WX_WOL_SUP))) {
/* disable mac transmiter */
@@ -307,12 +325,15 @@ static int txgbe_sw_init(struct wx *wx)
/* set default ring sizes */
wx->tx_ring_count = TXGBE_DEFAULT_TXD;
wx->rx_ring_count = TXGBE_DEFAULT_RXD;
+ wx->mbx.size = WX_VXMAILBOX_SIZE;
/* set default work limits */
wx->tx_work_limit = TXGBE_DEFAULT_TX_WORK;
wx->rx_work_limit = TXGBE_DEFAULT_RX_WORK;
+ wx->setup_tc = txgbe_setup_tc;
wx->do_reset = txgbe_do_reset;
+ set_bit(0, &wx->fwd_bitmask);
switch (wx->mac.type) {
case wx_mac_sp:
@@ -604,6 +625,10 @@ static int txgbe_probe(struct pci_dev *pdev,
goto err_pci_release_regions;
}
+ /* The sapphire supports up to 63 VFs per pf, but physical
+ * function also need one pool for basic networking.
+ */
+ pci_sriov_set_totalvfs(pdev, TXGBE_MAX_VFS_DRV_LIMIT);
wx->driver_name = txgbe_driver_name;
txgbe_set_ethtool_ops(netdev);
netdev->netdev_ops = &txgbe_netdev_ops;
@@ -795,6 +820,7 @@ static void txgbe_remove(struct pci_dev *pdev)
struct net_device *netdev;
netdev = wx->netdev;
+ wx_disable_sriov(wx);
unregister_netdev(netdev);
txgbe_remove_phy(txgbe);
@@ -817,11 +843,12 @@ static struct pci_driver txgbe_driver = {
.probe = txgbe_probe,
.remove = txgbe_remove,
.shutdown = txgbe_shutdown,
+ .sriov_configure = wx_pci_sriov_configure,
};
module_pci_driver(txgbe_driver);
MODULE_DEVICE_TABLE(pci, txgbe_pci_tbl);
MODULE_AUTHOR("Beijing WangXun Technology Co., Ltd, <software@trustnetic.com>");
-MODULE_DESCRIPTION("WangXun(R) 10 Gigabit PCI Express Network Driver");
+MODULE_DESCRIPTION("WangXun(R) 10/25/40 Gigabit PCI Express Network Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
index 85f022ceef4f..1863cfd27ee7 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
@@ -16,6 +16,8 @@
#include "../libwx/wx_type.h"
#include "../libwx/wx_lib.h"
#include "../libwx/wx_ptp.h"
+#include "../libwx/wx_sriov.h"
+#include "../libwx/wx_mbx.h"
#include "../libwx/wx_hw.h"
#include "txgbe_type.h"
#include "txgbe_phy.h"
@@ -184,6 +186,8 @@ static void txgbe_mac_link_down(struct phylink_config *config,
wx->speed = SPEED_UNKNOWN;
if (test_bit(WX_STATE_PTP_RUNNING, wx->state))
wx_ptp_reset_cyclecounter(wx);
+ /* ping all the active vfs to let them know we are going down */
+ wx_ping_all_vfs_with_link_status(wx, false);
}
static void txgbe_mac_link_up(struct phylink_config *config,
@@ -225,6 +229,8 @@ static void txgbe_mac_link_up(struct phylink_config *config,
wx->last_rx_ptp_check = jiffies;
if (test_bit(WX_STATE_PTP_RUNNING, wx->state))
wx_ptp_reset_cyclecounter(wx);
+ /* ping all the active vfs to let them know we are going up */
+ wx_ping_all_vfs_with_link_status(wx, true);
}
static int txgbe_mac_prepare(struct phylink_config *config, unsigned int mode,
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
index 9c1c26234cad..5937cbc6bd05 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
@@ -77,11 +77,13 @@
#define TXGBE_PX_MISC_ETH_LK BIT(18)
#define TXGBE_PX_MISC_ETH_AN BIT(19)
#define TXGBE_PX_MISC_INT_ERR BIT(20)
+#define TXGBE_PX_MISC_IC_VF_MBOX BIT(23)
#define TXGBE_PX_MISC_GPIO BIT(26)
#define TXGBE_PX_MISC_IEN_MASK \
(TXGBE_PX_MISC_ETH_LKDN | TXGBE_PX_MISC_DEV_RST | \
TXGBE_PX_MISC_ETH_EVENT | TXGBE_PX_MISC_ETH_LK | \
- TXGBE_PX_MISC_ETH_AN | TXGBE_PX_MISC_INT_ERR)
+ TXGBE_PX_MISC_ETH_AN | TXGBE_PX_MISC_INT_ERR | \
+ TXGBE_PX_MISC_IC_VF_MBOX)
/* Port cfg registers */
#define TXGBE_CFG_PORT_ST 0x14404
@@ -174,6 +176,8 @@
#define TXGBE_SP_RX_PB_SIZE 512
#define TXGBE_SP_TDB_PB_SZ (160 * 1024) /* 160KB Packet Buffer */
+#define TXGBE_MAX_VFS_DRV_LIMIT 63
+
#define TXGBE_DEFAULT_ATR_SAMPLE_RATE 20
/* Software ATR hash keys */
@@ -348,6 +352,7 @@ struct txgbe {
struct clk *clk;
struct gpio_chip *gpio;
unsigned int link_irq;
+ u32 eicr;
/* flow director */
struct hlist_head fdir_filter_list;
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 66e38ce9cd1d..ffc15a432689 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1946,22 +1946,14 @@ static __net_init int geneve_init_net(struct net *net)
return 0;
}
-static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
+static void __net_exit geneve_exit_rtnl_net(struct net *net,
+ struct list_head *dev_to_kill)
{
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_dev *geneve, *next;
list_for_each_entry_safe(geneve, next, &gn->geneve_list, next)
- geneve_dellink(geneve->dev, head);
-}
-
-static void __net_exit geneve_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
-{
- struct net *net;
-
- list_for_each_entry(net, net_list, exit_list)
- geneve_destroy_tunnels(net, dev_to_kill);
+ geneve_dellink(geneve->dev, dev_to_kill);
}
static void __net_exit geneve_exit_net(struct net *net)
@@ -1973,7 +1965,7 @@ static void __net_exit geneve_exit_net(struct net *net)
static struct pernet_operations geneve_net_ops = {
.init = geneve_init_net,
- .exit_batch_rtnl = geneve_exit_batch_rtnl,
+ .exit_rtnl = geneve_exit_rtnl_net,
.exit = geneve_exit_net,
.id = &geneve_net_id,
.size = sizeof(struct geneve_net),
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index ef793607890d..d4dec741c7f4 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -2475,23 +2475,19 @@ static int __net_init gtp_net_init(struct net *net)
return 0;
}
-static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
+static void __net_exit gtp_net_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
{
- struct net *net;
-
- list_for_each_entry(net, net_list, exit_list) {
- struct gtp_net *gn = net_generic(net, gtp_net_id);
- struct gtp_dev *gtp, *gtp_next;
+ struct gtp_net *gn = net_generic(net, gtp_net_id);
+ struct gtp_dev *gtp, *gtp_next;
- list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list)
- gtp_dellink(gtp->dev, dev_to_kill);
- }
+ list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list)
+ gtp_dellink(gtp->dev, dev_to_kill);
}
static struct pernet_operations gtp_net_ops = {
.init = gtp_net_init,
- .exit_batch_rtnl = gtp_net_exit_batch_rtnl,
+ .exit_rtnl = gtp_net_exit_rtnl,
.id = &gtp_net_id,
.size = sizeof(struct gtp_net),
};
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 9e366f275406..5fda7a0fcce0 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -1074,7 +1074,7 @@ static int baycom_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
return 0;
case HDLCDRVCTL_DRIVERNAME:
- strscpy_pad(hi.data.drivername, "baycom_epp", sizeof(hi.data.drivername));
+ strscpy_pad(hi.data.drivername, "baycom_epp");
break;
case HDLCDRVCTL_GETMODE:
@@ -1091,8 +1091,7 @@ static int baycom_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
return baycom_setmode(bc, hi.data.modename);
case HDLCDRVCTL_MODELIST:
- strscpy_pad(hi.data.modename, "intclk,extclk,intmodem,extmodem,divider=x",
- sizeof(hi.data.modename));
+ strscpy_pad(hi.data.modename, "intclk,extclk,intmodem,extmodem,divider=x");
break;
case HDLCDRVCTL_MODEMPARMASK:
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index ca62188a317a..e3e65772c599 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -219,7 +219,7 @@ void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type)
unsigned int ipvlan_mac_hash(const unsigned char *addr)
{
- u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2),
+ u32 hash = jhash_1word(get_unaligned((u32 *)(addr + 2)),
ipvlan_jhash_secret);
return hash & IPVLAN_MAC_FILTER_MASK;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d0dfa6bca6cc..7045b1d58754 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -254,7 +254,7 @@ static u32 macvlan_hash_mix(const struct macvlan_dev *vlan)
static unsigned int mc_hash(const struct macvlan_dev *vlan,
const unsigned char *addr)
{
- u32 val = __get_unaligned_cpu32(addr + 2);
+ u32 val = get_unaligned((u32 *)(addr + 2));
val ^= macvlan_hash_mix(vlan);
return hash_32(val, MACVLAN_MC_FILTER_BITS);
diff --git a/drivers/net/mdio/Kconfig b/drivers/net/mdio/Kconfig
index 4a7a303be2f7..058fcdaf6c18 100644
--- a/drivers/net/mdio/Kconfig
+++ b/drivers/net/mdio/Kconfig
@@ -185,6 +185,13 @@ config MDIO_IPQ8064
This driver supports the MDIO interface found in the network
interface units of the IPQ8064 SoC
+config MDIO_REALTEK_RTL9300
+ tristate "Realtek RTL9300 MDIO interface support"
+ depends on MACH_REALTEK_RTL || COMPILE_TEST
+ help
+ This driver supports the MDIO interface found in the Realtek
+ RTL9300 family of Ethernet switches with integrated SoC.
+
config MDIO_REGMAP
tristate
help
diff --git a/drivers/net/mdio/Makefile b/drivers/net/mdio/Makefile
index 1015f0db4531..c23778e73890 100644
--- a/drivers/net/mdio/Makefile
+++ b/drivers/net/mdio/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_MDIO_MOXART) += mdio-moxart.o
obj-$(CONFIG_MDIO_MSCC_MIIM) += mdio-mscc-miim.o
obj-$(CONFIG_MDIO_MVUSB) += mdio-mvusb.o
obj-$(CONFIG_MDIO_OCTEON) += mdio-octeon.o
+obj-$(CONFIG_MDIO_REALTEK_RTL9300) += mdio-realtek-rtl9300.o
obj-$(CONFIG_MDIO_REGMAP) += mdio-regmap.o
obj-$(CONFIG_MDIO_SUN4I) += mdio-sun4i.o
obj-$(CONFIG_MDIO_THUNDER) += mdio-thunder.o
diff --git a/drivers/net/mdio/mdio-realtek-rtl9300.c b/drivers/net/mdio/mdio-realtek-rtl9300.c
new file mode 100644
index 000000000000..33694c3ff9a7
--- /dev/null
+++ b/drivers/net/mdio/mdio-realtek-rtl9300.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MDIO controller for RTL9300 switches with integrated SoC.
+ *
+ * The MDIO communication is abstracted by the switch. At the software level
+ * communication uses the switch port to address the PHY. We work out the
+ * mapping based on the MDIO bus described in device tree and phandles on the
+ * ethernet-ports property.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitmap.h>
+#include <linux/bits.h>
+#include <linux/find.h>
+#include <linux/mdio.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mod_devicetable.h>
+#include <linux/mutex.h>
+#include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+
+#define SMI_GLB_CTRL 0xca00
+#define GLB_CTRL_INTF_SEL(intf) BIT(16 + (intf))
+#define SMI_PORT0_15_POLLING_SEL 0xca08
+#define SMI_ACCESS_PHY_CTRL_0 0xcb70
+#define SMI_ACCESS_PHY_CTRL_1 0xcb74
+#define PHY_CTRL_REG_ADDR GENMASK(24, 20)
+#define PHY_CTRL_PARK_PAGE GENMASK(19, 15)
+#define PHY_CTRL_MAIN_PAGE GENMASK(14, 3)
+#define PHY_CTRL_WRITE BIT(2)
+#define PHY_CTRL_READ 0
+#define PHY_CTRL_TYPE_C45 BIT(1)
+#define PHY_CTRL_TYPE_C22 0
+#define PHY_CTRL_CMD BIT(0)
+#define PHY_CTRL_FAIL BIT(25)
+#define SMI_ACCESS_PHY_CTRL_2 0xcb78
+#define PHY_CTRL_INDATA GENMASK(31, 16)
+#define PHY_CTRL_DATA GENMASK(15, 0)
+#define SMI_ACCESS_PHY_CTRL_3 0xcb7c
+#define PHY_CTRL_MMD_DEVAD GENMASK(20, 16)
+#define PHY_CTRL_MMD_REG GENMASK(15, 0)
+#define SMI_PORT0_5_ADDR_CTRL 0xcb80
+
+#define MAX_PORTS 28
+#define MAX_SMI_BUSSES 4
+#define MAX_SMI_ADDR 0x1f
+
+struct rtl9300_mdio_priv {
+ struct regmap *regmap;
+ struct mutex lock; /* protect HW access */
+ DECLARE_BITMAP(valid_ports, MAX_PORTS);
+ u8 smi_bus[MAX_PORTS];
+ u8 smi_addr[MAX_PORTS];
+ bool smi_bus_is_c45[MAX_SMI_BUSSES];
+ struct mii_bus *bus[MAX_SMI_BUSSES];
+};
+
+struct rtl9300_mdio_chan {
+ struct rtl9300_mdio_priv *priv;
+ u8 mdio_bus;
+};
+
+static int rtl9300_mdio_phy_to_port(struct mii_bus *bus, int phy_id)
+{
+ struct rtl9300_mdio_chan *chan = bus->priv;
+ struct rtl9300_mdio_priv *priv;
+ int i;
+
+ priv = chan->priv;
+
+ for_each_set_bit(i, priv->valid_ports, MAX_PORTS)
+ if (priv->smi_bus[i] == chan->mdio_bus &&
+ priv->smi_addr[i] == phy_id)
+ return i;
+
+ return -ENOENT;
+}
+
+static int rtl9300_mdio_wait_ready(struct rtl9300_mdio_priv *priv)
+{
+ struct regmap *regmap = priv->regmap;
+ u32 val;
+
+ lockdep_assert_held(&priv->lock);
+
+ return regmap_read_poll_timeout(regmap, SMI_ACCESS_PHY_CTRL_1,
+ val, !(val & PHY_CTRL_CMD), 10, 1000);
+}
+
+static int rtl9300_mdio_read_c22(struct mii_bus *bus, int phy_id, int regnum)
+{
+ struct rtl9300_mdio_chan *chan = bus->priv;
+ struct rtl9300_mdio_priv *priv;
+ struct regmap *regmap;
+ int port;
+ u32 val;
+ int err;
+
+ priv = chan->priv;
+ regmap = priv->regmap;
+
+ port = rtl9300_mdio_phy_to_port(bus, phy_id);
+ if (port < 0)
+ return port;
+
+ mutex_lock(&priv->lock);
+ err = rtl9300_mdio_wait_ready(priv);
+ if (err)
+ goto out_err;
+
+ err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_2, FIELD_PREP(PHY_CTRL_INDATA, port));
+ if (err)
+ goto out_err;
+
+ val = FIELD_PREP(PHY_CTRL_REG_ADDR, regnum) |
+ FIELD_PREP(PHY_CTRL_PARK_PAGE, 0x1f) |
+ FIELD_PREP(PHY_CTRL_MAIN_PAGE, 0xfff) |
+ PHY_CTRL_READ | PHY_CTRL_TYPE_C22 | PHY_CTRL_CMD;
+ err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_1, val);
+ if (err)
+ goto out_err;
+
+ err = rtl9300_mdio_wait_ready(priv);
+ if (err)
+ goto out_err;
+
+ err = regmap_read(regmap, SMI_ACCESS_PHY_CTRL_2, &val);
+ if (err)
+ goto out_err;
+
+ mutex_unlock(&priv->lock);
+ return FIELD_GET(PHY_CTRL_DATA, val);
+
+out_err:
+ mutex_unlock(&priv->lock);
+ return err;
+}
+
+static int rtl9300_mdio_write_c22(struct mii_bus *bus, int phy_id, int regnum, u16 value)
+{
+ struct rtl9300_mdio_chan *chan = bus->priv;
+ struct rtl9300_mdio_priv *priv;
+ struct regmap *regmap;
+ int port;
+ u32 val;
+ int err;
+
+ priv = chan->priv;
+ regmap = priv->regmap;
+
+ port = rtl9300_mdio_phy_to_port(bus, phy_id);
+ if (port < 0)
+ return port;
+
+ mutex_lock(&priv->lock);
+ err = rtl9300_mdio_wait_ready(priv);
+ if (err)
+ goto out_err;
+
+ err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_0, BIT(port));
+ if (err)
+ goto out_err;
+
+ err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_2, FIELD_PREP(PHY_CTRL_INDATA, value));
+ if (err)
+ goto out_err;
+
+ val = FIELD_PREP(PHY_CTRL_REG_ADDR, regnum) |
+ FIELD_PREP(PHY_CTRL_PARK_PAGE, 0x1f) |
+ FIELD_PREP(PHY_CTRL_MAIN_PAGE, 0xfff) |
+ PHY_CTRL_WRITE | PHY_CTRL_TYPE_C22 | PHY_CTRL_CMD;
+ err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_1, val);
+ if (err)
+ goto out_err;
+
+ err = regmap_read_poll_timeout(regmap, SMI_ACCESS_PHY_CTRL_1,
+ val, !(val & PHY_CTRL_CMD), 10, 100);
+ if (err)
+ goto out_err;
+
+ if (val & PHY_CTRL_FAIL) {
+ err = -ENXIO;
+ goto out_err;
+ }
+
+ mutex_unlock(&priv->lock);
+ return 0;
+
+out_err:
+ mutex_unlock(&priv->lock);
+ return err;
+}
+
+static int rtl9300_mdio_read_c45(struct mii_bus *bus, int phy_id, int dev_addr, int regnum)
+{
+ struct rtl9300_mdio_chan *chan = bus->priv;
+ struct rtl9300_mdio_priv *priv;
+ struct regmap *regmap;
+ int port;
+ u32 val;
+ int err;
+
+ priv = chan->priv;
+ regmap = priv->regmap;
+
+ port = rtl9300_mdio_phy_to_port(bus, phy_id);
+ if (port < 0)
+ return port;
+
+ mutex_lock(&priv->lock);
+ err = rtl9300_mdio_wait_ready(priv);
+ if (err)
+ goto out_err;
+
+ val = FIELD_PREP(PHY_CTRL_INDATA, port);
+ err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_2, val);
+ if (err)
+ goto out_err;
+
+ val = FIELD_PREP(PHY_CTRL_MMD_DEVAD, dev_addr) |
+ FIELD_PREP(PHY_CTRL_MMD_REG, regnum);
+ err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_3, val);
+ if (err)
+ goto out_err;
+
+ err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_1,
+ PHY_CTRL_READ | PHY_CTRL_TYPE_C45 | PHY_CTRL_CMD);
+ if (err)
+ goto out_err;
+
+ err = rtl9300_mdio_wait_ready(priv);
+ if (err)
+ goto out_err;
+
+ err = regmap_read(regmap, SMI_ACCESS_PHY_CTRL_2, &val);
+ if (err)
+ goto out_err;
+
+ mutex_unlock(&priv->lock);
+ return FIELD_GET(PHY_CTRL_DATA, val);
+
+out_err:
+ mutex_unlock(&priv->lock);
+ return err;
+}
+
+static int rtl9300_mdio_write_c45(struct mii_bus *bus, int phy_id, int dev_addr,
+ int regnum, u16 value)
+{
+ struct rtl9300_mdio_chan *chan = bus->priv;
+ struct rtl9300_mdio_priv *priv;
+ struct regmap *regmap;
+ int port;
+ u32 val;
+ int err;
+
+ priv = chan->priv;
+ regmap = priv->regmap;
+
+ port = rtl9300_mdio_phy_to_port(bus, phy_id);
+ if (port < 0)
+ return port;
+
+ mutex_lock(&priv->lock);
+ err = rtl9300_mdio_wait_ready(priv);
+ if (err)
+ goto out_err;
+
+ err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_0, BIT(port));
+ if (err)
+ goto out_err;
+
+ val = FIELD_PREP(PHY_CTRL_INDATA, value);
+ err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_2, val);
+ if (err)
+ goto out_err;
+
+ val = FIELD_PREP(PHY_CTRL_MMD_DEVAD, dev_addr) |
+ FIELD_PREP(PHY_CTRL_MMD_REG, regnum);
+ err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_3, val);
+ if (err)
+ goto out_err;
+
+ err = regmap_write(regmap, SMI_ACCESS_PHY_CTRL_1,
+ PHY_CTRL_TYPE_C45 | PHY_CTRL_WRITE | PHY_CTRL_CMD);
+ if (err)
+ goto out_err;
+
+ err = regmap_read_poll_timeout(regmap, SMI_ACCESS_PHY_CTRL_1,
+ val, !(val & PHY_CTRL_CMD), 10, 100);
+ if (err)
+ goto out_err;
+
+ if (val & PHY_CTRL_FAIL) {
+ err = -ENXIO;
+ goto out_err;
+ }
+
+ mutex_unlock(&priv->lock);
+ return 0;
+
+out_err:
+ mutex_unlock(&priv->lock);
+ return err;
+}
+
+static int rtl9300_mdiobus_init(struct rtl9300_mdio_priv *priv)
+{
+ u32 glb_ctrl_mask = 0, glb_ctrl_val = 0;
+ struct regmap *regmap = priv->regmap;
+ u32 port_addr[5] = { 0 };
+ u32 poll_sel[2] = { 0 };
+ int i, err;
+
+ /* Associate the port with the SMI interface and PHY */
+ for_each_set_bit(i, priv->valid_ports, MAX_PORTS) {
+ int pos;
+
+ pos = (i % 6) * 5;
+ port_addr[i / 6] |= (priv->smi_addr[i] & 0x1f) << pos;
+
+ pos = (i % 16) * 2;
+ poll_sel[i / 16] |= (priv->smi_bus[i] & 0x3) << pos;
+ }
+
+ /* Put the interfaces into C45 mode if required */
+ glb_ctrl_mask = GENMASK(19, 16);
+ for (i = 0; i < MAX_SMI_BUSSES; i++)
+ if (priv->smi_bus_is_c45[i])
+ glb_ctrl_val |= GLB_CTRL_INTF_SEL(i);
+
+ err = regmap_bulk_write(regmap, SMI_PORT0_5_ADDR_CTRL,
+ port_addr, 5);
+ if (err)
+ return err;
+
+ err = regmap_bulk_write(regmap, SMI_PORT0_15_POLLING_SEL,
+ poll_sel, 2);
+ if (err)
+ return err;
+
+ err = regmap_update_bits(regmap, SMI_GLB_CTRL,
+ glb_ctrl_mask, glb_ctrl_val);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int rtl9300_mdiobus_probe_one(struct device *dev, struct rtl9300_mdio_priv *priv,
+ struct fwnode_handle *node)
+{
+ struct rtl9300_mdio_chan *chan;
+ struct fwnode_handle *child;
+ struct mii_bus *bus;
+ u32 mdio_bus;
+ int err;
+
+ err = fwnode_property_read_u32(node, "reg", &mdio_bus);
+ if (err)
+ return err;
+
+ /* The MDIO accesses from the kernel work with the PHY polling unit in
+ * the switch. We need to tell the PPU to operate either in GPHY (i.e.
+ * clause 22) or 10GPHY mode (i.e. clause 45).
+ *
+ * We select 10GPHY mode if there is at least one PHY that declares
+ * compatible = "ethernet-phy-ieee802.3-c45". This does mean we can't
+ * support both c45 and c22 on the same MDIO bus.
+ */
+ fwnode_for_each_child_node(node, child)
+ if (fwnode_device_is_compatible(child, "ethernet-phy-ieee802.3-c45"))
+ priv->smi_bus_is_c45[mdio_bus] = true;
+
+ bus = devm_mdiobus_alloc_size(dev, sizeof(*chan));
+ if (!bus)
+ return -ENOMEM;
+
+ bus->name = "Realtek Switch MDIO Bus";
+ if (priv->smi_bus_is_c45[mdio_bus]) {
+ bus->read_c45 = rtl9300_mdio_read_c45;
+ bus->write_c45 = rtl9300_mdio_write_c45;
+ } else {
+ bus->read = rtl9300_mdio_read_c22;
+ bus->write = rtl9300_mdio_write_c22;
+ }
+ bus->parent = dev;
+ chan = bus->priv;
+ chan->mdio_bus = mdio_bus;
+ chan->priv = priv;
+
+ snprintf(bus->id, MII_BUS_ID_SIZE, "%s-%d", dev_name(dev), mdio_bus);
+
+ err = devm_of_mdiobus_register(dev, bus, to_of_node(node));
+ if (err)
+ return dev_err_probe(dev, err, "cannot register MDIO bus\n");
+
+ return 0;
+}
+
+/* The mdio-controller is part of a switch block so we parse the sibling
+ * ethernet-ports node and build a mapping of the switch port to MDIO bus/addr
+ * based on the phy-handle.
+ */
+static int rtl9300_mdiobus_map_ports(struct device *dev)
+{
+ struct rtl9300_mdio_priv *priv = dev_get_drvdata(dev);
+ struct device *parent = dev->parent;
+ struct fwnode_handle *port;
+ int err;
+
+ struct fwnode_handle *ports __free(fwnode_handle) =
+ device_get_named_child_node(parent, "ethernet-ports");
+ if (!ports)
+ return dev_err_probe(dev, -EINVAL, "%pfwP missing ethernet-ports\n",
+ dev_fwnode(parent));
+
+ fwnode_for_each_child_node(ports, port) {
+ struct device_node *mdio_dn;
+ u32 addr;
+ u32 bus;
+ u32 pn;
+
+ struct device_node *phy_dn __free(device_node) =
+ of_parse_phandle(to_of_node(port), "phy-handle", 0);
+ /* skip ports without phys */
+ if (!phy_dn)
+ continue;
+
+ mdio_dn = phy_dn->parent;
+ /* only map ports that are connected to this mdio-controller */
+ if (mdio_dn->parent != dev->of_node)
+ continue;
+
+ err = fwnode_property_read_u32(port, "reg", &pn);
+ if (err)
+ return err;
+
+ if (pn >= MAX_PORTS)
+ return dev_err_probe(dev, -EINVAL, "illegal port number %d\n", pn);
+
+ if (test_bit(pn, priv->valid_ports))
+ return dev_err_probe(dev, -EINVAL, "duplicated port number %d\n", pn);
+
+ err = of_property_read_u32(mdio_dn, "reg", &bus);
+ if (err)
+ return err;
+
+ if (bus >= MAX_SMI_BUSSES)
+ return dev_err_probe(dev, -EINVAL, "illegal smi bus number %d\n", bus);
+
+ err = of_property_read_u32(phy_dn, "reg", &addr);
+ if (err)
+ return err;
+
+ __set_bit(pn, priv->valid_ports);
+ priv->smi_bus[pn] = bus;
+ priv->smi_addr[pn] = addr;
+ }
+
+ return 0;
+}
+
+static int rtl9300_mdiobus_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct rtl9300_mdio_priv *priv;
+ struct fwnode_handle *child;
+ int err;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ err = devm_mutex_init(dev, &priv->lock);
+ if (err)
+ return err;
+
+ priv->regmap = syscon_node_to_regmap(dev->parent->of_node);
+ if (IS_ERR(priv->regmap))
+ return PTR_ERR(priv->regmap);
+
+ platform_set_drvdata(pdev, priv);
+
+ err = rtl9300_mdiobus_map_ports(dev);
+ if (err)
+ return err;
+
+ device_for_each_child_node(dev, child) {
+ err = rtl9300_mdiobus_probe_one(dev, priv, child);
+ if (err)
+ return err;
+ }
+
+ err = rtl9300_mdiobus_init(priv);
+ if (err)
+ return dev_err_probe(dev, err, "failed to initialise MDIO bus controller\n");
+
+ return 0;
+}
+
+static const struct of_device_id rtl9300_mdio_ids[] = {
+ { .compatible = "realtek,rtl9301-mdio" },
+ {}
+};
+MODULE_DEVICE_TABLE(of, rtl9300_mdio_ids);
+
+static struct platform_driver rtl9300_mdio_driver = {
+ .probe = rtl9300_mdiobus_probe,
+ .driver = {
+ .name = "mdio-rtl9300",
+ .of_match_table = rtl9300_mdio_ids,
+ },
+};
+
+module_platform_driver(rtl9300_mdio_driver);
+
+MODULE_DESCRIPTION("RTL9300 MDIO driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ovpn/Makefile b/drivers/net/ovpn/Makefile
new file mode 100644
index 000000000000..229be66167e1
--- /dev/null
+++ b/drivers/net/ovpn/Makefile
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# ovpn -- OpenVPN data channel offload in kernel space
+#
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+obj-$(CONFIG_OVPN) := ovpn.o
+ovpn-y += bind.o
+ovpn-y += crypto.o
+ovpn-y += crypto_aead.o
+ovpn-y += main.o
+ovpn-y += io.o
+ovpn-y += netlink.o
+ovpn-y += netlink-gen.o
+ovpn-y += peer.o
+ovpn-y += pktid.o
+ovpn-y += socket.o
+ovpn-y += stats.o
+ovpn-y += tcp.o
+ovpn-y += udp.o
diff --git a/drivers/net/ovpn/bind.c b/drivers/net/ovpn/bind.c
new file mode 100644
index 000000000000..24d2788a277e
--- /dev/null
+++ b/drivers/net/ovpn/bind.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2012-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/socket.h>
+
+#include "ovpnpriv.h"
+#include "bind.h"
+#include "peer.h"
+
+/**
+ * ovpn_bind_from_sockaddr - retrieve binding matching sockaddr
+ * @ss: the sockaddr to match
+ *
+ * Return: the bind matching the passed sockaddr if found, NULL otherwise
+ */
+struct ovpn_bind *ovpn_bind_from_sockaddr(const struct sockaddr_storage *ss)
+{
+ struct ovpn_bind *bind;
+ size_t sa_len;
+
+ if (ss->ss_family == AF_INET)
+ sa_len = sizeof(struct sockaddr_in);
+ else if (ss->ss_family == AF_INET6)
+ sa_len = sizeof(struct sockaddr_in6);
+ else
+ return ERR_PTR(-EAFNOSUPPORT);
+
+ bind = kzalloc(sizeof(*bind), GFP_ATOMIC);
+ if (unlikely(!bind))
+ return ERR_PTR(-ENOMEM);
+
+ memcpy(&bind->remote, ss, sa_len);
+
+ return bind;
+}
+
+/**
+ * ovpn_bind_reset - assign new binding to peer
+ * @peer: the peer whose binding has to be replaced
+ * @new: the new bind to assign
+ */
+void ovpn_bind_reset(struct ovpn_peer *peer, struct ovpn_bind *new)
+{
+ lockdep_assert_held(&peer->lock);
+
+ kfree_rcu(rcu_replace_pointer(peer->bind, new,
+ lockdep_is_held(&peer->lock)), rcu);
+}
diff --git a/drivers/net/ovpn/bind.h b/drivers/net/ovpn/bind.h
new file mode 100644
index 000000000000..4e0b8398bfd9
--- /dev/null
+++ b/drivers/net/ovpn/bind.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2012-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_OVPNBIND_H_
+#define _NET_OVPN_OVPNBIND_H_
+
+#include <net/ip.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+
+struct ovpn_peer;
+
+/**
+ * union ovpn_sockaddr - basic transport layer address
+ * @in4: IPv4 address
+ * @in6: IPv6 address
+ */
+union ovpn_sockaddr {
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+};
+
+/**
+ * struct ovpn_bind - remote peer binding
+ * @remote: the remote peer sockaddress
+ * @local: local endpoint used to talk to the peer
+ * @local.ipv4: local IPv4 used to talk to the peer
+ * @local.ipv6: local IPv6 used to talk to the peer
+ * @rcu: used to schedule RCU cleanup job
+ */
+struct ovpn_bind {
+ union ovpn_sockaddr remote; /* remote sockaddr */
+
+ union {
+ struct in_addr ipv4;
+ struct in6_addr ipv6;
+ } local;
+
+ struct rcu_head rcu;
+};
+
+/**
+ * ovpn_bind_skb_src_match - match packet source with binding
+ * @bind: the binding to match
+ * @skb: the packet to match
+ *
+ * Return: true if the packet source matches the remote peer sockaddr
+ * in the binding
+ */
+static inline bool ovpn_bind_skb_src_match(const struct ovpn_bind *bind,
+ const struct sk_buff *skb)
+{
+ const union ovpn_sockaddr *remote;
+
+ if (unlikely(!bind))
+ return false;
+
+ remote = &bind->remote;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (unlikely(remote->in4.sin_family != AF_INET))
+ return false;
+
+ if (unlikely(remote->in4.sin_addr.s_addr != ip_hdr(skb)->saddr))
+ return false;
+
+ if (unlikely(remote->in4.sin_port != udp_hdr(skb)->source))
+ return false;
+ break;
+ case htons(ETH_P_IPV6):
+ if (unlikely(remote->in6.sin6_family != AF_INET6))
+ return false;
+
+ if (unlikely(!ipv6_addr_equal(&remote->in6.sin6_addr,
+ &ipv6_hdr(skb)->saddr)))
+ return false;
+
+ if (unlikely(remote->in6.sin6_port != udp_hdr(skb)->source))
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+struct ovpn_bind *ovpn_bind_from_sockaddr(const struct sockaddr_storage *sa);
+void ovpn_bind_reset(struct ovpn_peer *peer, struct ovpn_bind *bind);
+
+#endif /* _NET_OVPN_OVPNBIND_H_ */
diff --git a/drivers/net/ovpn/crypto.c b/drivers/net/ovpn/crypto.c
new file mode 100644
index 000000000000..90580e32052f
--- /dev/null
+++ b/drivers/net/ovpn/crypto.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <linux/types.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <uapi/linux/ovpn.h>
+
+#include "ovpnpriv.h"
+#include "main.h"
+#include "pktid.h"
+#include "crypto_aead.h"
+#include "crypto.h"
+
+static void ovpn_ks_destroy_rcu(struct rcu_head *head)
+{
+ struct ovpn_crypto_key_slot *ks;
+
+ ks = container_of(head, struct ovpn_crypto_key_slot, rcu);
+ ovpn_aead_crypto_key_slot_destroy(ks);
+}
+
+void ovpn_crypto_key_slot_release(struct kref *kref)
+{
+ struct ovpn_crypto_key_slot *ks;
+
+ ks = container_of(kref, struct ovpn_crypto_key_slot, refcount);
+ call_rcu(&ks->rcu, ovpn_ks_destroy_rcu);
+}
+
+/* can only be invoked when all peer references have been dropped (i.e. RCU
+ * release routine)
+ */
+void ovpn_crypto_state_release(struct ovpn_crypto_state *cs)
+{
+ struct ovpn_crypto_key_slot *ks;
+
+ ks = rcu_access_pointer(cs->slots[0]);
+ if (ks) {
+ RCU_INIT_POINTER(cs->slots[0], NULL);
+ ovpn_crypto_key_slot_put(ks);
+ }
+
+ ks = rcu_access_pointer(cs->slots[1]);
+ if (ks) {
+ RCU_INIT_POINTER(cs->slots[1], NULL);
+ ovpn_crypto_key_slot_put(ks);
+ }
+}
+
+/* removes the key matching the specified id from the crypto context */
+bool ovpn_crypto_kill_key(struct ovpn_crypto_state *cs, u8 key_id)
+{
+ struct ovpn_crypto_key_slot *ks = NULL;
+
+ spin_lock_bh(&cs->lock);
+ if (rcu_access_pointer(cs->slots[0])->key_id == key_id) {
+ ks = rcu_replace_pointer(cs->slots[0], NULL,
+ lockdep_is_held(&cs->lock));
+ } else if (rcu_access_pointer(cs->slots[1])->key_id == key_id) {
+ ks = rcu_replace_pointer(cs->slots[1], NULL,
+ lockdep_is_held(&cs->lock));
+ }
+ spin_unlock_bh(&cs->lock);
+
+ if (ks)
+ ovpn_crypto_key_slot_put(ks);
+
+ /* let the caller know if a key was actually killed */
+ return ks;
+}
+
+/* Reset the ovpn_crypto_state object in a way that is atomic
+ * to RCU readers.
+ */
+int ovpn_crypto_state_reset(struct ovpn_crypto_state *cs,
+ const struct ovpn_peer_key_reset *pkr)
+{
+ struct ovpn_crypto_key_slot *old = NULL, *new;
+ u8 idx;
+
+ if (pkr->slot != OVPN_KEY_SLOT_PRIMARY &&
+ pkr->slot != OVPN_KEY_SLOT_SECONDARY)
+ return -EINVAL;
+
+ new = ovpn_aead_crypto_key_slot_new(&pkr->key);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+
+ spin_lock_bh(&cs->lock);
+ idx = cs->primary_idx;
+ switch (pkr->slot) {
+ case OVPN_KEY_SLOT_PRIMARY:
+ old = rcu_replace_pointer(cs->slots[idx], new,
+ lockdep_is_held(&cs->lock));
+ break;
+ case OVPN_KEY_SLOT_SECONDARY:
+ old = rcu_replace_pointer(cs->slots[!idx], new,
+ lockdep_is_held(&cs->lock));
+ break;
+ }
+ spin_unlock_bh(&cs->lock);
+
+ if (old)
+ ovpn_crypto_key_slot_put(old);
+
+ return 0;
+}
+
+void ovpn_crypto_key_slot_delete(struct ovpn_crypto_state *cs,
+ enum ovpn_key_slot slot)
+{
+ struct ovpn_crypto_key_slot *ks = NULL;
+ u8 idx;
+
+ if (slot != OVPN_KEY_SLOT_PRIMARY &&
+ slot != OVPN_KEY_SLOT_SECONDARY) {
+ pr_warn("Invalid slot to release: %u\n", slot);
+ return;
+ }
+
+ spin_lock_bh(&cs->lock);
+ idx = cs->primary_idx;
+ switch (slot) {
+ case OVPN_KEY_SLOT_PRIMARY:
+ ks = rcu_replace_pointer(cs->slots[idx], NULL,
+ lockdep_is_held(&cs->lock));
+ break;
+ case OVPN_KEY_SLOT_SECONDARY:
+ ks = rcu_replace_pointer(cs->slots[!idx], NULL,
+ lockdep_is_held(&cs->lock));
+ break;
+ }
+ spin_unlock_bh(&cs->lock);
+
+ if (!ks) {
+ pr_debug("Key slot already released: %u\n", slot);
+ return;
+ }
+
+ pr_debug("deleting key slot %u, key_id=%u\n", slot, ks->key_id);
+ ovpn_crypto_key_slot_put(ks);
+}
+
+void ovpn_crypto_key_slots_swap(struct ovpn_crypto_state *cs)
+{
+ const struct ovpn_crypto_key_slot *old_primary, *old_secondary;
+ u8 idx;
+
+ spin_lock_bh(&cs->lock);
+ idx = cs->primary_idx;
+ old_primary = rcu_dereference_protected(cs->slots[idx],
+ lockdep_is_held(&cs->lock));
+ old_secondary = rcu_dereference_protected(cs->slots[!idx],
+ lockdep_is_held(&cs->lock));
+ /* perform real swap by switching the index of the primary key */
+ WRITE_ONCE(cs->primary_idx, !cs->primary_idx);
+
+ pr_debug("key swapped: (old primary) %d <-> (new primary) %d\n",
+ old_primary ? old_primary->key_id : -1,
+ old_secondary ? old_secondary->key_id : -1);
+
+ spin_unlock_bh(&cs->lock);
+}
+
+/**
+ * ovpn_crypto_config_get - populate keyconf object with non-sensible key data
+ * @cs: the crypto state to extract the key data from
+ * @slot: the specific slot to inspect
+ * @keyconf: the output object to populate
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+int ovpn_crypto_config_get(struct ovpn_crypto_state *cs,
+ enum ovpn_key_slot slot,
+ struct ovpn_key_config *keyconf)
+{
+ struct ovpn_crypto_key_slot *ks;
+ int idx;
+
+ switch (slot) {
+ case OVPN_KEY_SLOT_PRIMARY:
+ idx = cs->primary_idx;
+ break;
+ case OVPN_KEY_SLOT_SECONDARY:
+ idx = !cs->primary_idx;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ rcu_read_lock();
+ ks = rcu_dereference(cs->slots[idx]);
+ if (!ks) {
+ rcu_read_unlock();
+ return -ENOENT;
+ }
+
+ keyconf->cipher_alg = ovpn_aead_crypto_alg(ks);
+ keyconf->key_id = ks->key_id;
+ rcu_read_unlock();
+
+ return 0;
+}
diff --git a/drivers/net/ovpn/crypto.h b/drivers/net/ovpn/crypto.h
new file mode 100644
index 000000000000..0e284fec3a75
--- /dev/null
+++ b/drivers/net/ovpn/crypto.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_OVPNCRYPTO_H_
+#define _NET_OVPN_OVPNCRYPTO_H_
+
+#include "pktid.h"
+#include "proto.h"
+
+/* info needed for both encrypt and decrypt directions */
+struct ovpn_key_direction {
+ const u8 *cipher_key;
+ size_t cipher_key_size;
+ const u8 *nonce_tail; /* only needed for GCM modes */
+ size_t nonce_tail_size; /* only needed for GCM modes */
+};
+
+/* all info for a particular symmetric key (primary or secondary) */
+struct ovpn_key_config {
+ enum ovpn_cipher_alg cipher_alg;
+ u8 key_id;
+ struct ovpn_key_direction encrypt;
+ struct ovpn_key_direction decrypt;
+};
+
+/* used to pass settings from netlink to the crypto engine */
+struct ovpn_peer_key_reset {
+ enum ovpn_key_slot slot;
+ struct ovpn_key_config key;
+};
+
+struct ovpn_crypto_key_slot {
+ u8 key_id;
+
+ struct crypto_aead *encrypt;
+ struct crypto_aead *decrypt;
+ u8 nonce_tail_xmit[OVPN_NONCE_TAIL_SIZE];
+ u8 nonce_tail_recv[OVPN_NONCE_TAIL_SIZE];
+
+ struct ovpn_pktid_recv pid_recv ____cacheline_aligned_in_smp;
+ struct ovpn_pktid_xmit pid_xmit ____cacheline_aligned_in_smp;
+ struct kref refcount;
+ struct rcu_head rcu;
+};
+
+struct ovpn_crypto_state {
+ struct ovpn_crypto_key_slot __rcu *slots[2];
+ u8 primary_idx;
+
+ /* protects primary and secondary slots */
+ spinlock_t lock;
+};
+
+static inline bool ovpn_crypto_key_slot_hold(struct ovpn_crypto_key_slot *ks)
+{
+ return kref_get_unless_zero(&ks->refcount);
+}
+
+static inline void ovpn_crypto_state_init(struct ovpn_crypto_state *cs)
+{
+ RCU_INIT_POINTER(cs->slots[0], NULL);
+ RCU_INIT_POINTER(cs->slots[1], NULL);
+ cs->primary_idx = 0;
+ spin_lock_init(&cs->lock);
+}
+
+static inline struct ovpn_crypto_key_slot *
+ovpn_crypto_key_id_to_slot(const struct ovpn_crypto_state *cs, u8 key_id)
+{
+ struct ovpn_crypto_key_slot *ks;
+ u8 idx;
+
+ if (unlikely(!cs))
+ return NULL;
+
+ rcu_read_lock();
+ idx = READ_ONCE(cs->primary_idx);
+ ks = rcu_dereference(cs->slots[idx]);
+ if (ks && ks->key_id == key_id) {
+ if (unlikely(!ovpn_crypto_key_slot_hold(ks)))
+ ks = NULL;
+ goto out;
+ }
+
+ ks = rcu_dereference(cs->slots[!idx]);
+ if (ks && ks->key_id == key_id) {
+ if (unlikely(!ovpn_crypto_key_slot_hold(ks)))
+ ks = NULL;
+ goto out;
+ }
+
+ /* when both key slots are occupied but no matching key ID is found, ks
+ * has to be reset to NULL to avoid carrying a stale pointer
+ */
+ ks = NULL;
+out:
+ rcu_read_unlock();
+
+ return ks;
+}
+
+static inline struct ovpn_crypto_key_slot *
+ovpn_crypto_key_slot_primary(const struct ovpn_crypto_state *cs)
+{
+ struct ovpn_crypto_key_slot *ks;
+
+ rcu_read_lock();
+ ks = rcu_dereference(cs->slots[cs->primary_idx]);
+ if (unlikely(ks && !ovpn_crypto_key_slot_hold(ks)))
+ ks = NULL;
+ rcu_read_unlock();
+
+ return ks;
+}
+
+void ovpn_crypto_key_slot_release(struct kref *kref);
+
+static inline void ovpn_crypto_key_slot_put(struct ovpn_crypto_key_slot *ks)
+{
+ kref_put(&ks->refcount, ovpn_crypto_key_slot_release);
+}
+
+int ovpn_crypto_state_reset(struct ovpn_crypto_state *cs,
+ const struct ovpn_peer_key_reset *pkr);
+
+void ovpn_crypto_key_slot_delete(struct ovpn_crypto_state *cs,
+ enum ovpn_key_slot slot);
+
+void ovpn_crypto_state_release(struct ovpn_crypto_state *cs);
+
+void ovpn_crypto_key_slots_swap(struct ovpn_crypto_state *cs);
+
+int ovpn_crypto_config_get(struct ovpn_crypto_state *cs,
+ enum ovpn_key_slot slot,
+ struct ovpn_key_config *keyconf);
+
+bool ovpn_crypto_kill_key(struct ovpn_crypto_state *cs, u8 key_id);
+
+#endif /* _NET_OVPN_OVPNCRYPTO_H_ */
diff --git a/drivers/net/ovpn/crypto_aead.c b/drivers/net/ovpn/crypto_aead.c
new file mode 100644
index 000000000000..74ee639ac868
--- /dev/null
+++ b/drivers/net/ovpn/crypto_aead.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <crypto/aead.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+
+#include "ovpnpriv.h"
+#include "main.h"
+#include "io.h"
+#include "pktid.h"
+#include "crypto_aead.h"
+#include "crypto.h"
+#include "peer.h"
+#include "proto.h"
+#include "skb.h"
+
+#define OVPN_AUTH_TAG_SIZE 16
+#define OVPN_AAD_SIZE (OVPN_OPCODE_SIZE + OVPN_NONCE_WIRE_SIZE)
+
+#define ALG_NAME_AES "gcm(aes)"
+#define ALG_NAME_CHACHAPOLY "rfc7539(chacha20,poly1305)"
+
+static int ovpn_aead_encap_overhead(const struct ovpn_crypto_key_slot *ks)
+{
+ return OVPN_OPCODE_SIZE + /* OP header size */
+ sizeof(u32) + /* Packet ID */
+ crypto_aead_authsize(ks->encrypt); /* Auth Tag */
+}
+
+int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks,
+ struct sk_buff *skb)
+{
+ const unsigned int tag_size = crypto_aead_authsize(ks->encrypt);
+ struct aead_request *req;
+ struct sk_buff *trailer;
+ struct scatterlist *sg;
+ int nfrags, ret;
+ u32 pktid, op;
+ u8 *iv;
+
+ ovpn_skb_cb(skb)->peer = peer;
+ ovpn_skb_cb(skb)->ks = ks;
+
+ /* Sample AEAD header format:
+ * 48000001 00000005 7e7046bd 444a7e28 cc6387b1 64a4d6c1 380275a...
+ * [ OP32 ] [seq # ] [ auth tag ] [ payload ... ]
+ * [4-byte
+ * IV head]
+ */
+
+ /* check that there's enough headroom in the skb for packet
+ * encapsulation
+ */
+ if (unlikely(skb_cow_head(skb, OVPN_HEAD_ROOM)))
+ return -ENOBUFS;
+
+ /* get number of skb frags and ensure that packet data is writable */
+ nfrags = skb_cow_data(skb, 0, &trailer);
+ if (unlikely(nfrags < 0))
+ return nfrags;
+
+ if (unlikely(nfrags + 2 > (MAX_SKB_FRAGS + 2)))
+ return -ENOSPC;
+
+ /* sg may be required by async crypto */
+ ovpn_skb_cb(skb)->sg = kmalloc(sizeof(*ovpn_skb_cb(skb)->sg) *
+ (nfrags + 2), GFP_ATOMIC);
+ if (unlikely(!ovpn_skb_cb(skb)->sg))
+ return -ENOMEM;
+
+ sg = ovpn_skb_cb(skb)->sg;
+
+ /* sg table:
+ * 0: op, wire nonce (AD, len=OVPN_OP_SIZE_V2+OVPN_NONCE_WIRE_SIZE),
+ * 1, 2, 3, ..., n: payload,
+ * n+1: auth_tag (len=tag_size)
+ */
+ sg_init_table(sg, nfrags + 2);
+
+ /* build scatterlist to encrypt packet payload */
+ ret = skb_to_sgvec_nomark(skb, sg + 1, 0, skb->len);
+ if (unlikely(nfrags != ret))
+ return -EINVAL;
+
+ /* append auth_tag onto scatterlist */
+ __skb_push(skb, tag_size);
+ sg_set_buf(sg + nfrags + 1, skb->data, tag_size);
+
+ /* obtain packet ID, which is used both as a first
+ * 4 bytes of nonce and last 4 bytes of associated data.
+ */
+ ret = ovpn_pktid_xmit_next(&ks->pid_xmit, &pktid);
+ if (unlikely(ret < 0))
+ return ret;
+
+ /* iv may be required by async crypto */
+ ovpn_skb_cb(skb)->iv = kmalloc(OVPN_NONCE_SIZE, GFP_ATOMIC);
+ if (unlikely(!ovpn_skb_cb(skb)->iv))
+ return -ENOMEM;
+
+ iv = ovpn_skb_cb(skb)->iv;
+
+ /* concat 4 bytes packet id and 8 bytes nonce tail into 12 bytes
+ * nonce
+ */
+ ovpn_pktid_aead_write(pktid, ks->nonce_tail_xmit, iv);
+
+ /* make space for packet id and push it to the front */
+ __skb_push(skb, OVPN_NONCE_WIRE_SIZE);
+ memcpy(skb->data, iv, OVPN_NONCE_WIRE_SIZE);
+
+ /* add packet op as head of additional data */
+ op = ovpn_opcode_compose(OVPN_DATA_V2, ks->key_id, peer->id);
+ __skb_push(skb, OVPN_OPCODE_SIZE);
+ BUILD_BUG_ON(sizeof(op) != OVPN_OPCODE_SIZE);
+ *((__force __be32 *)skb->data) = htonl(op);
+
+ /* AEAD Additional data */
+ sg_set_buf(sg, skb->data, OVPN_AAD_SIZE);
+
+ req = aead_request_alloc(ks->encrypt, GFP_ATOMIC);
+ if (unlikely(!req))
+ return -ENOMEM;
+
+ ovpn_skb_cb(skb)->req = req;
+
+ /* setup async crypto operation */
+ aead_request_set_tfm(req, ks->encrypt);
+ aead_request_set_callback(req, 0, ovpn_encrypt_post, skb);
+ aead_request_set_crypt(req, sg, sg,
+ skb->len - ovpn_aead_encap_overhead(ks), iv);
+ aead_request_set_ad(req, OVPN_AAD_SIZE);
+
+ /* encrypt it */
+ return crypto_aead_encrypt(req);
+}
+
+int ovpn_aead_decrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks,
+ struct sk_buff *skb)
+{
+ const unsigned int tag_size = crypto_aead_authsize(ks->decrypt);
+ int ret, payload_len, nfrags;
+ unsigned int payload_offset;
+ struct aead_request *req;
+ struct sk_buff *trailer;
+ struct scatterlist *sg;
+ u8 *iv;
+
+ payload_offset = OVPN_AAD_SIZE + tag_size;
+ payload_len = skb->len - payload_offset;
+
+ ovpn_skb_cb(skb)->payload_offset = payload_offset;
+ ovpn_skb_cb(skb)->peer = peer;
+ ovpn_skb_cb(skb)->ks = ks;
+
+ /* sanity check on packet size, payload size must be >= 0 */
+ if (unlikely(payload_len < 0))
+ return -EINVAL;
+
+ /* Prepare the skb data buffer to be accessed up until the auth tag.
+ * This is required because this area is directly mapped into the sg
+ * list.
+ */
+ if (unlikely(!pskb_may_pull(skb, payload_offset)))
+ return -ENODATA;
+
+ /* get number of skb frags and ensure that packet data is writable */
+ nfrags = skb_cow_data(skb, 0, &trailer);
+ if (unlikely(nfrags < 0))
+ return nfrags;
+
+ if (unlikely(nfrags + 2 > (MAX_SKB_FRAGS + 2)))
+ return -ENOSPC;
+
+ /* sg may be required by async crypto */
+ ovpn_skb_cb(skb)->sg = kmalloc(sizeof(*ovpn_skb_cb(skb)->sg) *
+ (nfrags + 2), GFP_ATOMIC);
+ if (unlikely(!ovpn_skb_cb(skb)->sg))
+ return -ENOMEM;
+
+ sg = ovpn_skb_cb(skb)->sg;
+
+ /* sg table:
+ * 0: op, wire nonce (AD, len=OVPN_OPCODE_SIZE+OVPN_NONCE_WIRE_SIZE),
+ * 1, 2, 3, ..., n: payload,
+ * n+1: auth_tag (len=tag_size)
+ */
+ sg_init_table(sg, nfrags + 2);
+
+ /* packet op is head of additional data */
+ sg_set_buf(sg, skb->data, OVPN_AAD_SIZE);
+
+ /* build scatterlist to decrypt packet payload */
+ ret = skb_to_sgvec_nomark(skb, sg + 1, payload_offset, payload_len);
+ if (unlikely(nfrags != ret))
+ return -EINVAL;
+
+ /* append auth_tag onto scatterlist */
+ sg_set_buf(sg + nfrags + 1, skb->data + OVPN_AAD_SIZE, tag_size);
+
+ /* iv may be required by async crypto */
+ ovpn_skb_cb(skb)->iv = kmalloc(OVPN_NONCE_SIZE, GFP_ATOMIC);
+ if (unlikely(!ovpn_skb_cb(skb)->iv))
+ return -ENOMEM;
+
+ iv = ovpn_skb_cb(skb)->iv;
+
+ /* copy nonce into IV buffer */
+ memcpy(iv, skb->data + OVPN_OPCODE_SIZE, OVPN_NONCE_WIRE_SIZE);
+ memcpy(iv + OVPN_NONCE_WIRE_SIZE, ks->nonce_tail_recv,
+ OVPN_NONCE_TAIL_SIZE);
+
+ req = aead_request_alloc(ks->decrypt, GFP_ATOMIC);
+ if (unlikely(!req))
+ return -ENOMEM;
+
+ ovpn_skb_cb(skb)->req = req;
+
+ /* setup async crypto operation */
+ aead_request_set_tfm(req, ks->decrypt);
+ aead_request_set_callback(req, 0, ovpn_decrypt_post, skb);
+ aead_request_set_crypt(req, sg, sg, payload_len + tag_size, iv);
+
+ aead_request_set_ad(req, OVPN_AAD_SIZE);
+
+ /* decrypt it */
+ return crypto_aead_decrypt(req);
+}
+
+/* Initialize a struct crypto_aead object */
+static struct crypto_aead *ovpn_aead_init(const char *title,
+ const char *alg_name,
+ const unsigned char *key,
+ unsigned int keylen)
+{
+ struct crypto_aead *aead;
+ int ret;
+
+ aead = crypto_alloc_aead(alg_name, 0, 0);
+ if (IS_ERR(aead)) {
+ ret = PTR_ERR(aead);
+ pr_err("%s crypto_alloc_aead failed, err=%d\n", title, ret);
+ aead = NULL;
+ goto error;
+ }
+
+ ret = crypto_aead_setkey(aead, key, keylen);
+ if (ret) {
+ pr_err("%s crypto_aead_setkey size=%u failed, err=%d\n", title,
+ keylen, ret);
+ goto error;
+ }
+
+ ret = crypto_aead_setauthsize(aead, OVPN_AUTH_TAG_SIZE);
+ if (ret) {
+ pr_err("%s crypto_aead_setauthsize failed, err=%d\n", title,
+ ret);
+ goto error;
+ }
+
+ /* basic AEAD assumption */
+ if (crypto_aead_ivsize(aead) != OVPN_NONCE_SIZE) {
+ pr_err("%s IV size must be %d\n", title, OVPN_NONCE_SIZE);
+ ret = -EINVAL;
+ goto error;
+ }
+
+ pr_debug("********* Cipher %s (%s)\n", alg_name, title);
+ pr_debug("*** IV size=%u\n", crypto_aead_ivsize(aead));
+ pr_debug("*** req size=%u\n", crypto_aead_reqsize(aead));
+ pr_debug("*** block size=%u\n", crypto_aead_blocksize(aead));
+ pr_debug("*** auth size=%u\n", crypto_aead_authsize(aead));
+ pr_debug("*** alignmask=0x%x\n", crypto_aead_alignmask(aead));
+
+ return aead;
+
+error:
+ crypto_free_aead(aead);
+ return ERR_PTR(ret);
+}
+
+void ovpn_aead_crypto_key_slot_destroy(struct ovpn_crypto_key_slot *ks)
+{
+ if (!ks)
+ return;
+
+ crypto_free_aead(ks->encrypt);
+ crypto_free_aead(ks->decrypt);
+ kfree(ks);
+}
+
+struct ovpn_crypto_key_slot *
+ovpn_aead_crypto_key_slot_new(const struct ovpn_key_config *kc)
+{
+ struct ovpn_crypto_key_slot *ks = NULL;
+ const char *alg_name;
+ int ret;
+
+ /* validate crypto alg */
+ switch (kc->cipher_alg) {
+ case OVPN_CIPHER_ALG_AES_GCM:
+ alg_name = ALG_NAME_AES;
+ break;
+ case OVPN_CIPHER_ALG_CHACHA20_POLY1305:
+ alg_name = ALG_NAME_CHACHAPOLY;
+ break;
+ default:
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ if (kc->encrypt.nonce_tail_size != OVPN_NONCE_TAIL_SIZE ||
+ kc->decrypt.nonce_tail_size != OVPN_NONCE_TAIL_SIZE)
+ return ERR_PTR(-EINVAL);
+
+ /* build the key slot */
+ ks = kmalloc(sizeof(*ks), GFP_KERNEL);
+ if (!ks)
+ return ERR_PTR(-ENOMEM);
+
+ ks->encrypt = NULL;
+ ks->decrypt = NULL;
+ kref_init(&ks->refcount);
+ ks->key_id = kc->key_id;
+
+ ks->encrypt = ovpn_aead_init("encrypt", alg_name,
+ kc->encrypt.cipher_key,
+ kc->encrypt.cipher_key_size);
+ if (IS_ERR(ks->encrypt)) {
+ ret = PTR_ERR(ks->encrypt);
+ ks->encrypt = NULL;
+ goto destroy_ks;
+ }
+
+ ks->decrypt = ovpn_aead_init("decrypt", alg_name,
+ kc->decrypt.cipher_key,
+ kc->decrypt.cipher_key_size);
+ if (IS_ERR(ks->decrypt)) {
+ ret = PTR_ERR(ks->decrypt);
+ ks->decrypt = NULL;
+ goto destroy_ks;
+ }
+
+ memcpy(ks->nonce_tail_xmit, kc->encrypt.nonce_tail,
+ OVPN_NONCE_TAIL_SIZE);
+ memcpy(ks->nonce_tail_recv, kc->decrypt.nonce_tail,
+ OVPN_NONCE_TAIL_SIZE);
+
+ /* init packet ID generation/validation */
+ ovpn_pktid_xmit_init(&ks->pid_xmit);
+ ovpn_pktid_recv_init(&ks->pid_recv);
+
+ return ks;
+
+destroy_ks:
+ ovpn_aead_crypto_key_slot_destroy(ks);
+ return ERR_PTR(ret);
+}
+
+enum ovpn_cipher_alg ovpn_aead_crypto_alg(struct ovpn_crypto_key_slot *ks)
+{
+ const char *alg_name;
+
+ if (!ks->encrypt)
+ return OVPN_CIPHER_ALG_NONE;
+
+ alg_name = crypto_tfm_alg_name(crypto_aead_tfm(ks->encrypt));
+
+ if (!strcmp(alg_name, ALG_NAME_AES))
+ return OVPN_CIPHER_ALG_AES_GCM;
+ else if (!strcmp(alg_name, ALG_NAME_CHACHAPOLY))
+ return OVPN_CIPHER_ALG_CHACHA20_POLY1305;
+ else
+ return OVPN_CIPHER_ALG_NONE;
+}
diff --git a/drivers/net/ovpn/crypto_aead.h b/drivers/net/ovpn/crypto_aead.h
new file mode 100644
index 000000000000..65a2ff307898
--- /dev/null
+++ b/drivers/net/ovpn/crypto_aead.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_OVPNAEAD_H_
+#define _NET_OVPN_OVPNAEAD_H_
+
+#include "crypto.h"
+
+#include <asm/types.h>
+#include <linux/skbuff.h>
+
+int ovpn_aead_encrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks,
+ struct sk_buff *skb);
+int ovpn_aead_decrypt(struct ovpn_peer *peer, struct ovpn_crypto_key_slot *ks,
+ struct sk_buff *skb);
+
+struct ovpn_crypto_key_slot *
+ovpn_aead_crypto_key_slot_new(const struct ovpn_key_config *kc);
+void ovpn_aead_crypto_key_slot_destroy(struct ovpn_crypto_key_slot *ks);
+
+enum ovpn_cipher_alg ovpn_aead_crypto_alg(struct ovpn_crypto_key_slot *ks);
+
+#endif /* _NET_OVPN_OVPNAEAD_H_ */
diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c
new file mode 100644
index 000000000000..dd8a8055d967
--- /dev/null
+++ b/drivers/net/ovpn/io.c
@@ -0,0 +1,446 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2019-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <crypto/aead.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <net/gro_cells.h>
+#include <net/gso.h>
+#include <net/ip.h>
+
+#include "ovpnpriv.h"
+#include "peer.h"
+#include "io.h"
+#include "bind.h"
+#include "crypto.h"
+#include "crypto_aead.h"
+#include "netlink.h"
+#include "proto.h"
+#include "tcp.h"
+#include "udp.h"
+#include "skb.h"
+#include "socket.h"
+
+const unsigned char ovpn_keepalive_message[OVPN_KEEPALIVE_SIZE] = {
+ 0x2a, 0x18, 0x7b, 0xf3, 0x64, 0x1e, 0xb4, 0xcb,
+ 0x07, 0xed, 0x2d, 0x0a, 0x98, 0x1f, 0xc7, 0x48
+};
+
+/**
+ * ovpn_is_keepalive - check if skb contains a keepalive message
+ * @skb: packet to check
+ *
+ * Assumes that the first byte of skb->data is defined.
+ *
+ * Return: true if skb contains a keepalive or false otherwise
+ */
+static bool ovpn_is_keepalive(struct sk_buff *skb)
+{
+ if (*skb->data != ovpn_keepalive_message[0])
+ return false;
+
+ if (skb->len != OVPN_KEEPALIVE_SIZE)
+ return false;
+
+ if (!pskb_may_pull(skb, OVPN_KEEPALIVE_SIZE))
+ return false;
+
+ return !memcmp(skb->data, ovpn_keepalive_message, OVPN_KEEPALIVE_SIZE);
+}
+
+/* Called after decrypt to write the IP packet to the device.
+ * This method is expected to manage/free the skb.
+ */
+static void ovpn_netdev_write(struct ovpn_peer *peer, struct sk_buff *skb)
+{
+ unsigned int pkt_len;
+ int ret;
+
+ /* we can't guarantee the packet wasn't corrupted before entering the
+ * VPN, therefore we give other layers a chance to check that
+ */
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /* skb hash for transport packet no longer valid after decapsulation */
+ skb_clear_hash(skb);
+
+ /* post-decrypt scrub -- prepare to inject encapsulated packet onto the
+ * interface, based on __skb_tunnel_rx() in dst.h
+ */
+ skb->dev = peer->ovpn->dev;
+ skb_set_queue_mapping(skb, 0);
+ skb_scrub_packet(skb, true);
+
+ /* network header reset in ovpn_decrypt_post() */
+ skb_reset_transport_header(skb);
+ skb_reset_inner_headers(skb);
+
+ /* cause packet to be "received" by the interface */
+ pkt_len = skb->len;
+ ret = gro_cells_receive(&peer->ovpn->gro_cells, skb);
+ if (likely(ret == NET_RX_SUCCESS)) {
+ /* update RX stats with the size of decrypted packet */
+ ovpn_peer_stats_increment_rx(&peer->vpn_stats, pkt_len);
+ dev_dstats_rx_add(peer->ovpn->dev, pkt_len);
+ }
+}
+
+void ovpn_decrypt_post(void *data, int ret)
+{
+ struct ovpn_crypto_key_slot *ks;
+ unsigned int payload_offset = 0;
+ struct sk_buff *skb = data;
+ struct ovpn_socket *sock;
+ struct ovpn_peer *peer;
+ __be16 proto;
+ __be32 *pid;
+
+ /* crypto is happening asynchronously. this function will be called
+ * again later by the crypto callback with a proper return code
+ */
+ if (unlikely(ret == -EINPROGRESS))
+ return;
+
+ payload_offset = ovpn_skb_cb(skb)->payload_offset;
+ ks = ovpn_skb_cb(skb)->ks;
+ peer = ovpn_skb_cb(skb)->peer;
+
+ /* crypto is done, cleanup skb CB and its members */
+ kfree(ovpn_skb_cb(skb)->iv);
+ kfree(ovpn_skb_cb(skb)->sg);
+ aead_request_free(ovpn_skb_cb(skb)->req);
+
+ if (unlikely(ret < 0))
+ goto drop;
+
+ /* PID sits after the op */
+ pid = (__force __be32 *)(skb->data + OVPN_OPCODE_SIZE);
+ ret = ovpn_pktid_recv(&ks->pid_recv, ntohl(*pid), 0);
+ if (unlikely(ret < 0)) {
+ net_err_ratelimited("%s: PKT ID RX error for peer %u: %d\n",
+ netdev_name(peer->ovpn->dev), peer->id,
+ ret);
+ goto drop;
+ }
+
+ /* keep track of last received authenticated packet for keepalive */
+ WRITE_ONCE(peer->last_recv, ktime_get_real_seconds());
+
+ rcu_read_lock();
+ sock = rcu_dereference(peer->sock);
+ if (sock && sock->sock->sk->sk_protocol == IPPROTO_UDP)
+ /* check if this peer changed local or remote endpoint */
+ ovpn_peer_endpoints_update(peer, skb);
+ rcu_read_unlock();
+
+ /* point to encapsulated IP packet */
+ __skb_pull(skb, payload_offset);
+
+ /* check if this is a valid datapacket that has to be delivered to the
+ * ovpn interface
+ */
+ skb_reset_network_header(skb);
+ proto = ovpn_ip_check_protocol(skb);
+ if (unlikely(!proto)) {
+ /* check if null packet */
+ if (unlikely(!pskb_may_pull(skb, 1))) {
+ net_info_ratelimited("%s: NULL packet received from peer %u\n",
+ netdev_name(peer->ovpn->dev),
+ peer->id);
+ goto drop;
+ }
+
+ if (ovpn_is_keepalive(skb)) {
+ net_dbg_ratelimited("%s: ping received from peer %u\n",
+ netdev_name(peer->ovpn->dev),
+ peer->id);
+ /* we drop the packet, but this is not a failure */
+ consume_skb(skb);
+ goto drop_nocount;
+ }
+
+ net_info_ratelimited("%s: unsupported protocol received from peer %u\n",
+ netdev_name(peer->ovpn->dev), peer->id);
+ goto drop;
+ }
+ skb->protocol = proto;
+
+ /* perform Reverse Path Filtering (RPF) */
+ if (unlikely(!ovpn_peer_check_by_src(peer->ovpn, skb, peer))) {
+ if (skb->protocol == htons(ETH_P_IPV6))
+ net_dbg_ratelimited("%s: RPF dropped packet from peer %u, src: %pI6c\n",
+ netdev_name(peer->ovpn->dev),
+ peer->id, &ipv6_hdr(skb)->saddr);
+ else
+ net_dbg_ratelimited("%s: RPF dropped packet from peer %u, src: %pI4\n",
+ netdev_name(peer->ovpn->dev),
+ peer->id, &ip_hdr(skb)->saddr);
+ goto drop;
+ }
+
+ ovpn_netdev_write(peer, skb);
+ /* skb is passed to upper layer - don't free it */
+ skb = NULL;
+drop:
+ if (unlikely(skb))
+ dev_dstats_rx_dropped(peer->ovpn->dev);
+ kfree_skb(skb);
+drop_nocount:
+ if (likely(peer))
+ ovpn_peer_put(peer);
+ if (likely(ks))
+ ovpn_crypto_key_slot_put(ks);
+}
+
+/* RX path entry point: decrypt packet and forward it to the device */
+void ovpn_recv(struct ovpn_peer *peer, struct sk_buff *skb)
+{
+ struct ovpn_crypto_key_slot *ks;
+ u8 key_id;
+
+ ovpn_peer_stats_increment_rx(&peer->link_stats, skb->len);
+
+ /* get the key slot matching the key ID in the received packet */
+ key_id = ovpn_key_id_from_skb(skb);
+ ks = ovpn_crypto_key_id_to_slot(&peer->crypto, key_id);
+ if (unlikely(!ks)) {
+ net_info_ratelimited("%s: no available key for peer %u, key-id: %u\n",
+ netdev_name(peer->ovpn->dev), peer->id,
+ key_id);
+ dev_dstats_rx_dropped(peer->ovpn->dev);
+ kfree_skb(skb);
+ ovpn_peer_put(peer);
+ return;
+ }
+
+ memset(ovpn_skb_cb(skb), 0, sizeof(struct ovpn_cb));
+ ovpn_decrypt_post(skb, ovpn_aead_decrypt(peer, ks, skb));
+}
+
+void ovpn_encrypt_post(void *data, int ret)
+{
+ struct ovpn_crypto_key_slot *ks;
+ struct sk_buff *skb = data;
+ struct ovpn_socket *sock;
+ struct ovpn_peer *peer;
+ unsigned int orig_len;
+
+ /* encryption is happening asynchronously. This function will be
+ * called later by the crypto callback with a proper return value
+ */
+ if (unlikely(ret == -EINPROGRESS))
+ return;
+
+ ks = ovpn_skb_cb(skb)->ks;
+ peer = ovpn_skb_cb(skb)->peer;
+
+ /* crypto is done, cleanup skb CB and its members */
+ kfree(ovpn_skb_cb(skb)->iv);
+ kfree(ovpn_skb_cb(skb)->sg);
+ aead_request_free(ovpn_skb_cb(skb)->req);
+
+ if (unlikely(ret == -ERANGE)) {
+ /* we ran out of IVs and we must kill the key as it can't be
+ * use anymore
+ */
+ netdev_warn(peer->ovpn->dev,
+ "killing key %u for peer %u\n", ks->key_id,
+ peer->id);
+ if (ovpn_crypto_kill_key(&peer->crypto, ks->key_id))
+ /* let userspace know so that a new key must be negotiated */
+ ovpn_nl_key_swap_notify(peer, ks->key_id);
+
+ goto err;
+ }
+
+ if (unlikely(ret < 0))
+ goto err;
+
+ skb_mark_not_on_list(skb);
+ orig_len = skb->len;
+
+ rcu_read_lock();
+ sock = rcu_dereference(peer->sock);
+ if (unlikely(!sock))
+ goto err_unlock;
+
+ switch (sock->sock->sk->sk_protocol) {
+ case IPPROTO_UDP:
+ ovpn_udp_send_skb(peer, sock->sock, skb);
+ break;
+ case IPPROTO_TCP:
+ ovpn_tcp_send_skb(peer, sock->sock, skb);
+ break;
+ default:
+ /* no transport configured yet */
+ goto err_unlock;
+ }
+
+ ovpn_peer_stats_increment_tx(&peer->link_stats, orig_len);
+ /* keep track of last sent packet for keepalive */
+ WRITE_ONCE(peer->last_sent, ktime_get_real_seconds());
+ /* skb passed down the stack - don't free it */
+ skb = NULL;
+err_unlock:
+ rcu_read_unlock();
+err:
+ if (unlikely(skb))
+ dev_dstats_tx_dropped(peer->ovpn->dev);
+ if (likely(peer))
+ ovpn_peer_put(peer);
+ if (likely(ks))
+ ovpn_crypto_key_slot_put(ks);
+ kfree_skb(skb);
+}
+
+static bool ovpn_encrypt_one(struct ovpn_peer *peer, struct sk_buff *skb)
+{
+ struct ovpn_crypto_key_slot *ks;
+
+ /* get primary key to be used for encrypting data */
+ ks = ovpn_crypto_key_slot_primary(&peer->crypto);
+ if (unlikely(!ks))
+ return false;
+
+ /* take a reference to the peer because the crypto code may run async.
+ * ovpn_encrypt_post() will release it upon completion
+ */
+ if (unlikely(!ovpn_peer_hold(peer))) {
+ DEBUG_NET_WARN_ON_ONCE(1);
+ ovpn_crypto_key_slot_put(ks);
+ return false;
+ }
+
+ memset(ovpn_skb_cb(skb), 0, sizeof(struct ovpn_cb));
+ ovpn_encrypt_post(skb, ovpn_aead_encrypt(peer, ks, skb));
+ return true;
+}
+
+/* send skb to connected peer, if any */
+static void ovpn_send(struct ovpn_priv *ovpn, struct sk_buff *skb,
+ struct ovpn_peer *peer)
+{
+ struct sk_buff *curr, *next;
+
+ /* this might be a GSO-segmented skb list: process each skb
+ * independently
+ */
+ skb_list_walk_safe(skb, curr, next) {
+ if (unlikely(!ovpn_encrypt_one(peer, curr))) {
+ dev_dstats_tx_dropped(ovpn->dev);
+ kfree_skb(curr);
+ }
+ }
+
+ ovpn_peer_put(peer);
+}
+
+/* Send user data to the network
+ */
+netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ovpn_priv *ovpn = netdev_priv(dev);
+ struct sk_buff *segments, *curr, *next;
+ struct sk_buff_head skb_list;
+ struct ovpn_peer *peer;
+ __be16 proto;
+ int ret;
+
+ /* reset netfilter state */
+ nf_reset_ct(skb);
+
+ /* verify IP header size in network packet */
+ proto = ovpn_ip_check_protocol(skb);
+ if (unlikely(!proto || skb->protocol != proto))
+ goto drop;
+
+ if (skb_is_gso(skb)) {
+ segments = skb_gso_segment(skb, 0);
+ if (IS_ERR(segments)) {
+ ret = PTR_ERR(segments);
+ net_err_ratelimited("%s: cannot segment payload packet: %d\n",
+ netdev_name(dev), ret);
+ goto drop;
+ }
+
+ consume_skb(skb);
+ skb = segments;
+ }
+
+ /* from this moment on, "skb" might be a list */
+
+ __skb_queue_head_init(&skb_list);
+ skb_list_walk_safe(skb, curr, next) {
+ skb_mark_not_on_list(curr);
+
+ curr = skb_share_check(curr, GFP_ATOMIC);
+ if (unlikely(!curr)) {
+ net_err_ratelimited("%s: skb_share_check failed for payload packet\n",
+ netdev_name(dev));
+ dev_dstats_tx_dropped(ovpn->dev);
+ continue;
+ }
+
+ __skb_queue_tail(&skb_list, curr);
+ }
+ skb_list.prev->next = NULL;
+
+ /* retrieve peer serving the destination IP of this packet */
+ peer = ovpn_peer_get_by_dst(ovpn, skb);
+ if (unlikely(!peer)) {
+ net_dbg_ratelimited("%s: no peer to send data to\n",
+ netdev_name(ovpn->dev));
+ goto drop;
+ }
+
+ ovpn_peer_stats_increment_tx(&peer->vpn_stats, skb->len);
+ ovpn_send(ovpn, skb_list.next, peer);
+
+ return NETDEV_TX_OK;
+
+drop:
+ dev_dstats_tx_dropped(ovpn->dev);
+ skb_tx_error(skb);
+ kfree_skb_list(skb);
+ return NET_XMIT_DROP;
+}
+
+/**
+ * ovpn_xmit_special - encrypt and transmit an out-of-band message to peer
+ * @peer: peer to send the message to
+ * @data: message content
+ * @len: message length
+ *
+ * Assumes that caller holds a reference to peer, which will be
+ * passed to ovpn_send()
+ */
+void ovpn_xmit_special(struct ovpn_peer *peer, const void *data,
+ const unsigned int len)
+{
+ struct ovpn_priv *ovpn;
+ struct sk_buff *skb;
+
+ ovpn = peer->ovpn;
+ if (unlikely(!ovpn)) {
+ ovpn_peer_put(peer);
+ return;
+ }
+
+ skb = alloc_skb(256 + len, GFP_ATOMIC);
+ if (unlikely(!skb)) {
+ ovpn_peer_put(peer);
+ return;
+ }
+
+ skb_reserve(skb, 128);
+ skb->priority = TC_PRIO_BESTEFFORT;
+ __skb_put_data(skb, data, len);
+
+ ovpn_send(ovpn, skb, peer);
+}
diff --git a/drivers/net/ovpn/io.h b/drivers/net/ovpn/io.h
new file mode 100644
index 000000000000..db9e10f9077c
--- /dev/null
+++ b/drivers/net/ovpn/io.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2019-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_OVPN_H_
+#define _NET_OVPN_OVPN_H_
+
+/* DATA_V2 header size with AEAD encryption */
+#define OVPN_HEAD_ROOM (OVPN_OPCODE_SIZE + OVPN_NONCE_WIRE_SIZE + \
+ 16 /* AEAD TAG length */ + \
+ max(sizeof(struct udphdr), sizeof(struct tcphdr)) +\
+ max(sizeof(struct ipv6hdr), sizeof(struct iphdr)))
+
+/* max padding required by encryption */
+#define OVPN_MAX_PADDING 16
+
+#define OVPN_KEEPALIVE_SIZE 16
+extern const unsigned char ovpn_keepalive_message[OVPN_KEEPALIVE_SIZE];
+
+netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev);
+
+void ovpn_recv(struct ovpn_peer *peer, struct sk_buff *skb);
+void ovpn_xmit_special(struct ovpn_peer *peer, const void *data,
+ const unsigned int len);
+
+void ovpn_encrypt_post(void *data, int ret);
+void ovpn_decrypt_post(void *data, int ret);
+
+#endif /* _NET_OVPN_OVPN_H_ */
diff --git a/drivers/net/ovpn/main.c b/drivers/net/ovpn/main.c
new file mode 100644
index 000000000000..0acb0934c1be
--- /dev/null
+++ b/drivers/net/ovpn/main.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ * James Yonan <james@openvpn.net>
+ */
+
+#include <linux/ethtool.h>
+#include <linux/genetlink.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <net/gro_cells.h>
+#include <net/ip.h>
+#include <net/rtnetlink.h>
+#include <uapi/linux/if_arp.h>
+
+#include "ovpnpriv.h"
+#include "main.h"
+#include "netlink.h"
+#include "io.h"
+#include "peer.h"
+#include "proto.h"
+#include "tcp.h"
+#include "udp.h"
+
+static void ovpn_priv_free(struct net_device *net)
+{
+ struct ovpn_priv *ovpn = netdev_priv(net);
+
+ kfree(ovpn->peers);
+}
+
+static int ovpn_mp_alloc(struct ovpn_priv *ovpn)
+{
+ struct in_device *dev_v4;
+ int i;
+
+ if (ovpn->mode != OVPN_MODE_MP)
+ return 0;
+
+ dev_v4 = __in_dev_get_rtnl(ovpn->dev);
+ if (dev_v4) {
+ /* disable redirects as Linux gets confused by ovpn
+ * handling same-LAN routing.
+ * This happens because a multipeer interface is used as
+ * relay point between hosts in the same subnet, while
+ * in a classic LAN this would not be needed because the
+ * two hosts would be able to talk directly.
+ */
+ IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false);
+ IPV4_DEVCONF_ALL(dev_net(ovpn->dev), SEND_REDIRECTS) = false;
+ }
+
+ /* the peer container is fairly large, therefore we allocate it only in
+ * MP mode
+ */
+ ovpn->peers = kzalloc(sizeof(*ovpn->peers), GFP_KERNEL);
+ if (!ovpn->peers)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(ovpn->peers->by_id); i++) {
+ INIT_HLIST_HEAD(&ovpn->peers->by_id[i]);
+ INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_vpn_addr4[i], i);
+ INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_vpn_addr6[i], i);
+ INIT_HLIST_NULLS_HEAD(&ovpn->peers->by_transp_addr[i], i);
+ }
+
+ return 0;
+}
+
+static int ovpn_net_init(struct net_device *dev)
+{
+ struct ovpn_priv *ovpn = netdev_priv(dev);
+ int err = gro_cells_init(&ovpn->gro_cells, dev);
+
+ if (err < 0)
+ return err;
+
+ err = ovpn_mp_alloc(ovpn);
+ if (err < 0) {
+ gro_cells_destroy(&ovpn->gro_cells);
+ return err;
+ }
+
+ return 0;
+}
+
+static void ovpn_net_uninit(struct net_device *dev)
+{
+ struct ovpn_priv *ovpn = netdev_priv(dev);
+
+ gro_cells_destroy(&ovpn->gro_cells);
+}
+
+static const struct net_device_ops ovpn_netdev_ops = {
+ .ndo_init = ovpn_net_init,
+ .ndo_uninit = ovpn_net_uninit,
+ .ndo_start_xmit = ovpn_net_xmit,
+};
+
+static const struct device_type ovpn_type = {
+ .name = OVPN_FAMILY_NAME,
+};
+
+static const struct nla_policy ovpn_policy[IFLA_OVPN_MAX + 1] = {
+ [IFLA_OVPN_MODE] = NLA_POLICY_RANGE(NLA_U8, OVPN_MODE_P2P,
+ OVPN_MODE_MP),
+};
+
+/**
+ * ovpn_dev_is_valid - check if the netdevice is of type 'ovpn'
+ * @dev: the interface to check
+ *
+ * Return: whether the netdevice is of type 'ovpn'
+ */
+bool ovpn_dev_is_valid(const struct net_device *dev)
+{
+ return dev->netdev_ops == &ovpn_netdev_ops;
+}
+
+static void ovpn_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
+{
+ strscpy(info->driver, "ovpn", sizeof(info->driver));
+ strscpy(info->bus_info, "ovpn", sizeof(info->bus_info));
+}
+
+static const struct ethtool_ops ovpn_ethtool_ops = {
+ .get_drvinfo = ovpn_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+ .get_ts_info = ethtool_op_get_ts_info,
+};
+
+static void ovpn_setup(struct net_device *dev)
+{
+ netdev_features_t feat = NETIF_F_SG | NETIF_F_GSO |
+ NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA;
+
+ dev->needs_free_netdev = true;
+
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
+
+ dev->ethtool_ops = &ovpn_ethtool_ops;
+ dev->netdev_ops = &ovpn_netdev_ops;
+
+ dev->priv_destructor = ovpn_priv_free;
+
+ dev->hard_header_len = 0;
+ dev->addr_len = 0;
+ dev->mtu = ETH_DATA_LEN - OVPN_HEAD_ROOM;
+ dev->min_mtu = IPV4_MIN_MTU;
+ dev->max_mtu = IP_MAX_MTU - OVPN_HEAD_ROOM;
+
+ dev->type = ARPHRD_NONE;
+ dev->flags = IFF_POINTOPOINT | IFF_NOARP;
+ dev->priv_flags |= IFF_NO_QUEUE;
+
+ dev->lltx = true;
+ dev->features |= feat;
+ dev->hw_features |= feat;
+ dev->hw_enc_features |= feat;
+
+ dev->needed_headroom = ALIGN(OVPN_HEAD_ROOM, 4);
+ dev->needed_tailroom = OVPN_MAX_PADDING;
+
+ SET_NETDEV_DEVTYPE(dev, &ovpn_type);
+}
+
+static int ovpn_newlink(struct net_device *dev,
+ struct rtnl_newlink_params *params,
+ struct netlink_ext_ack *extack)
+{
+ struct ovpn_priv *ovpn = netdev_priv(dev);
+ struct nlattr **data = params->data;
+ enum ovpn_mode mode = OVPN_MODE_P2P;
+
+ if (data && data[IFLA_OVPN_MODE]) {
+ mode = nla_get_u8(data[IFLA_OVPN_MODE]);
+ netdev_dbg(dev, "setting device mode: %u\n", mode);
+ }
+
+ ovpn->dev = dev;
+ ovpn->mode = mode;
+ spin_lock_init(&ovpn->lock);
+ INIT_DELAYED_WORK(&ovpn->keepalive_work, ovpn_peer_keepalive_work);
+
+ /* Set carrier explicitly after registration, this way state is
+ * clearly defined.
+ *
+ * In case of MP interfaces we keep the carrier always on.
+ *
+ * Carrier for P2P interfaces is initially off and it is then
+ * switched on and off when the remote peer is added or deleted.
+ */
+ if (ovpn->mode == OVPN_MODE_MP)
+ netif_carrier_on(dev);
+ else
+ netif_carrier_off(dev);
+
+ return register_netdevice(dev);
+}
+
+static void ovpn_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct ovpn_priv *ovpn = netdev_priv(dev);
+
+ cancel_delayed_work_sync(&ovpn->keepalive_work);
+ ovpn_peers_free(ovpn, NULL, OVPN_DEL_PEER_REASON_TEARDOWN);
+ unregister_netdevice_queue(dev, head);
+}
+
+static int ovpn_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ovpn_priv *ovpn = netdev_priv(dev);
+
+ if (nla_put_u8(skb, IFLA_OVPN_MODE, ovpn->mode))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static struct rtnl_link_ops ovpn_link_ops = {
+ .kind = "ovpn",
+ .netns_refund = false,
+ .priv_size = sizeof(struct ovpn_priv),
+ .setup = ovpn_setup,
+ .policy = ovpn_policy,
+ .maxtype = IFLA_OVPN_MAX,
+ .newlink = ovpn_newlink,
+ .dellink = ovpn_dellink,
+ .fill_info = ovpn_fill_info,
+};
+
+static int __init ovpn_init(void)
+{
+ int err = rtnl_link_register(&ovpn_link_ops);
+
+ if (err) {
+ pr_err("ovpn: can't register rtnl link ops: %d\n", err);
+ return err;
+ }
+
+ err = ovpn_nl_register();
+ if (err) {
+ pr_err("ovpn: can't register netlink family: %d\n", err);
+ goto unreg_rtnl;
+ }
+
+ ovpn_tcp_init();
+
+ return 0;
+
+unreg_rtnl:
+ rtnl_link_unregister(&ovpn_link_ops);
+ return err;
+}
+
+static __exit void ovpn_cleanup(void)
+{
+ ovpn_nl_unregister();
+ rtnl_link_unregister(&ovpn_link_ops);
+
+ rcu_barrier();
+}
+
+module_init(ovpn_init);
+module_exit(ovpn_cleanup);
+
+MODULE_DESCRIPTION("OpenVPN data channel offload (ovpn)");
+MODULE_AUTHOR("Antonio Quartulli <antonio@openvpn.net>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ovpn/main.h b/drivers/net/ovpn/main.h
new file mode 100644
index 000000000000..017cd0100765
--- /dev/null
+++ b/drivers/net/ovpn/main.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_MAIN_H_
+#define _NET_OVPN_MAIN_H_
+
+bool ovpn_dev_is_valid(const struct net_device *dev);
+
+#endif /* _NET_OVPN_MAIN_H_ */
diff --git a/drivers/net/ovpn/netlink-gen.c b/drivers/net/ovpn/netlink-gen.c
new file mode 100644
index 000000000000..58e1a4342378
--- /dev/null
+++ b/drivers/net/ovpn/netlink-gen.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/ovpn.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "netlink-gen.h"
+
+#include <uapi/linux/ovpn.h>
+
+/* Integer value ranges */
+static const struct netlink_range_validation ovpn_a_peer_id_range = {
+ .max = 16777215ULL,
+};
+
+static const struct netlink_range_validation ovpn_a_keyconf_peer_id_range = {
+ .max = 16777215ULL,
+};
+
+/* Common nested types */
+const struct nla_policy ovpn_keyconf_nl_policy[OVPN_A_KEYCONF_DECRYPT_DIR + 1] = {
+ [OVPN_A_KEYCONF_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_keyconf_peer_id_range),
+ [OVPN_A_KEYCONF_SLOT] = NLA_POLICY_MAX(NLA_U32, 1),
+ [OVPN_A_KEYCONF_KEY_ID] = NLA_POLICY_MAX(NLA_U32, 7),
+ [OVPN_A_KEYCONF_CIPHER_ALG] = NLA_POLICY_MAX(NLA_U32, 2),
+ [OVPN_A_KEYCONF_ENCRYPT_DIR] = NLA_POLICY_NESTED(ovpn_keydir_nl_policy),
+ [OVPN_A_KEYCONF_DECRYPT_DIR] = NLA_POLICY_NESTED(ovpn_keydir_nl_policy),
+};
+
+const struct nla_policy ovpn_keydir_nl_policy[OVPN_A_KEYDIR_NONCE_TAIL + 1] = {
+ [OVPN_A_KEYDIR_CIPHER_KEY] = NLA_POLICY_MAX_LEN(256),
+ [OVPN_A_KEYDIR_NONCE_TAIL] = NLA_POLICY_EXACT_LEN(OVPN_NONCE_TAIL_SIZE),
+};
+
+const struct nla_policy ovpn_peer_nl_policy[OVPN_A_PEER_LINK_TX_PACKETS + 1] = {
+ [OVPN_A_PEER_ID] = NLA_POLICY_FULL_RANGE(NLA_U32, &ovpn_a_peer_id_range),
+ [OVPN_A_PEER_REMOTE_IPV4] = { .type = NLA_BE32, },
+ [OVPN_A_PEER_REMOTE_IPV6] = NLA_POLICY_EXACT_LEN(16),
+ [OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID] = { .type = NLA_U32, },
+ [OVPN_A_PEER_REMOTE_PORT] = NLA_POLICY_MIN(NLA_BE16, 1),
+ [OVPN_A_PEER_SOCKET] = { .type = NLA_U32, },
+ [OVPN_A_PEER_SOCKET_NETNSID] = { .type = NLA_S32, },
+ [OVPN_A_PEER_VPN_IPV4] = { .type = NLA_BE32, },
+ [OVPN_A_PEER_VPN_IPV6] = NLA_POLICY_EXACT_LEN(16),
+ [OVPN_A_PEER_LOCAL_IPV4] = { .type = NLA_BE32, },
+ [OVPN_A_PEER_LOCAL_IPV6] = NLA_POLICY_EXACT_LEN(16),
+ [OVPN_A_PEER_LOCAL_PORT] = NLA_POLICY_MIN(NLA_BE16, 1),
+ [OVPN_A_PEER_KEEPALIVE_INTERVAL] = { .type = NLA_U32, },
+ [OVPN_A_PEER_KEEPALIVE_TIMEOUT] = { .type = NLA_U32, },
+ [OVPN_A_PEER_DEL_REASON] = NLA_POLICY_MAX(NLA_U32, 4),
+ [OVPN_A_PEER_VPN_RX_BYTES] = { .type = NLA_UINT, },
+ [OVPN_A_PEER_VPN_TX_BYTES] = { .type = NLA_UINT, },
+ [OVPN_A_PEER_VPN_RX_PACKETS] = { .type = NLA_UINT, },
+ [OVPN_A_PEER_VPN_TX_PACKETS] = { .type = NLA_UINT, },
+ [OVPN_A_PEER_LINK_RX_BYTES] = { .type = NLA_UINT, },
+ [OVPN_A_PEER_LINK_TX_BYTES] = { .type = NLA_UINT, },
+ [OVPN_A_PEER_LINK_RX_PACKETS] = { .type = NLA_UINT, },
+ [OVPN_A_PEER_LINK_TX_PACKETS] = { .type = NLA_UINT, },
+};
+
+/* OVPN_CMD_PEER_NEW - do */
+static const struct nla_policy ovpn_peer_new_nl_policy[OVPN_A_PEER + 1] = {
+ [OVPN_A_IFINDEX] = { .type = NLA_U32, },
+ [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy),
+};
+
+/* OVPN_CMD_PEER_SET - do */
+static const struct nla_policy ovpn_peer_set_nl_policy[OVPN_A_PEER + 1] = {
+ [OVPN_A_IFINDEX] = { .type = NLA_U32, },
+ [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy),
+};
+
+/* OVPN_CMD_PEER_GET - do */
+static const struct nla_policy ovpn_peer_get_do_nl_policy[OVPN_A_PEER + 1] = {
+ [OVPN_A_IFINDEX] = { .type = NLA_U32, },
+ [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy),
+};
+
+/* OVPN_CMD_PEER_GET - dump */
+static const struct nla_policy ovpn_peer_get_dump_nl_policy[OVPN_A_IFINDEX + 1] = {
+ [OVPN_A_IFINDEX] = { .type = NLA_U32, },
+};
+
+/* OVPN_CMD_PEER_DEL - do */
+static const struct nla_policy ovpn_peer_del_nl_policy[OVPN_A_PEER + 1] = {
+ [OVPN_A_IFINDEX] = { .type = NLA_U32, },
+ [OVPN_A_PEER] = NLA_POLICY_NESTED(ovpn_peer_nl_policy),
+};
+
+/* OVPN_CMD_KEY_NEW - do */
+static const struct nla_policy ovpn_key_new_nl_policy[OVPN_A_KEYCONF + 1] = {
+ [OVPN_A_IFINDEX] = { .type = NLA_U32, },
+ [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy),
+};
+
+/* OVPN_CMD_KEY_GET - do */
+static const struct nla_policy ovpn_key_get_nl_policy[OVPN_A_KEYCONF + 1] = {
+ [OVPN_A_IFINDEX] = { .type = NLA_U32, },
+ [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy),
+};
+
+/* OVPN_CMD_KEY_SWAP - do */
+static const struct nla_policy ovpn_key_swap_nl_policy[OVPN_A_KEYCONF + 1] = {
+ [OVPN_A_IFINDEX] = { .type = NLA_U32, },
+ [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy),
+};
+
+/* OVPN_CMD_KEY_DEL - do */
+static const struct nla_policy ovpn_key_del_nl_policy[OVPN_A_KEYCONF + 1] = {
+ [OVPN_A_IFINDEX] = { .type = NLA_U32, },
+ [OVPN_A_KEYCONF] = NLA_POLICY_NESTED(ovpn_keyconf_nl_policy),
+};
+
+/* Ops table for ovpn */
+static const struct genl_split_ops ovpn_nl_ops[] = {
+ {
+ .cmd = OVPN_CMD_PEER_NEW,
+ .pre_doit = ovpn_nl_pre_doit,
+ .doit = ovpn_nl_peer_new_doit,
+ .post_doit = ovpn_nl_post_doit,
+ .policy = ovpn_peer_new_nl_policy,
+ .maxattr = OVPN_A_PEER,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = OVPN_CMD_PEER_SET,
+ .pre_doit = ovpn_nl_pre_doit,
+ .doit = ovpn_nl_peer_set_doit,
+ .post_doit = ovpn_nl_post_doit,
+ .policy = ovpn_peer_set_nl_policy,
+ .maxattr = OVPN_A_PEER,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = OVPN_CMD_PEER_GET,
+ .pre_doit = ovpn_nl_pre_doit,
+ .doit = ovpn_nl_peer_get_doit,
+ .post_doit = ovpn_nl_post_doit,
+ .policy = ovpn_peer_get_do_nl_policy,
+ .maxattr = OVPN_A_PEER,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = OVPN_CMD_PEER_GET,
+ .dumpit = ovpn_nl_peer_get_dumpit,
+ .policy = ovpn_peer_get_dump_nl_policy,
+ .maxattr = OVPN_A_IFINDEX,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = OVPN_CMD_PEER_DEL,
+ .pre_doit = ovpn_nl_pre_doit,
+ .doit = ovpn_nl_peer_del_doit,
+ .post_doit = ovpn_nl_post_doit,
+ .policy = ovpn_peer_del_nl_policy,
+ .maxattr = OVPN_A_PEER,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = OVPN_CMD_KEY_NEW,
+ .pre_doit = ovpn_nl_pre_doit,
+ .doit = ovpn_nl_key_new_doit,
+ .post_doit = ovpn_nl_post_doit,
+ .policy = ovpn_key_new_nl_policy,
+ .maxattr = OVPN_A_KEYCONF,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = OVPN_CMD_KEY_GET,
+ .pre_doit = ovpn_nl_pre_doit,
+ .doit = ovpn_nl_key_get_doit,
+ .post_doit = ovpn_nl_post_doit,
+ .policy = ovpn_key_get_nl_policy,
+ .maxattr = OVPN_A_KEYCONF,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = OVPN_CMD_KEY_SWAP,
+ .pre_doit = ovpn_nl_pre_doit,
+ .doit = ovpn_nl_key_swap_doit,
+ .post_doit = ovpn_nl_post_doit,
+ .policy = ovpn_key_swap_nl_policy,
+ .maxattr = OVPN_A_KEYCONF,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = OVPN_CMD_KEY_DEL,
+ .pre_doit = ovpn_nl_pre_doit,
+ .doit = ovpn_nl_key_del_doit,
+ .post_doit = ovpn_nl_post_doit,
+ .policy = ovpn_key_del_nl_policy,
+ .maxattr = OVPN_A_KEYCONF,
+ .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+ },
+};
+
+static const struct genl_multicast_group ovpn_nl_mcgrps[] = {
+ [OVPN_NLGRP_PEERS] = { "peers", },
+};
+
+struct genl_family ovpn_nl_family __ro_after_init = {
+ .name = OVPN_FAMILY_NAME,
+ .version = OVPN_FAMILY_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .module = THIS_MODULE,
+ .split_ops = ovpn_nl_ops,
+ .n_split_ops = ARRAY_SIZE(ovpn_nl_ops),
+ .mcgrps = ovpn_nl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(ovpn_nl_mcgrps),
+};
diff --git a/drivers/net/ovpn/netlink-gen.h b/drivers/net/ovpn/netlink-gen.h
new file mode 100644
index 000000000000..66a4e4a0a055
--- /dev/null
+++ b/drivers/net/ovpn/netlink-gen.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/ovpn.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_OVPN_GEN_H
+#define _LINUX_OVPN_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/ovpn.h>
+
+/* Common nested types */
+extern const struct nla_policy ovpn_keyconf_nl_policy[OVPN_A_KEYCONF_DECRYPT_DIR + 1];
+extern const struct nla_policy ovpn_keydir_nl_policy[OVPN_A_KEYDIR_NONCE_TAIL + 1];
+extern const struct nla_policy ovpn_peer_nl_policy[OVPN_A_PEER_LINK_TX_PACKETS + 1];
+
+int ovpn_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+void
+ovpn_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info);
+
+int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info);
+int ovpn_nl_peer_set_doit(struct sk_buff *skb, struct genl_info *info);
+int ovpn_nl_peer_get_doit(struct sk_buff *skb, struct genl_info *info);
+int ovpn_nl_peer_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int ovpn_nl_peer_del_doit(struct sk_buff *skb, struct genl_info *info);
+int ovpn_nl_key_new_doit(struct sk_buff *skb, struct genl_info *info);
+int ovpn_nl_key_get_doit(struct sk_buff *skb, struct genl_info *info);
+int ovpn_nl_key_swap_doit(struct sk_buff *skb, struct genl_info *info);
+int ovpn_nl_key_del_doit(struct sk_buff *skb, struct genl_info *info);
+
+enum {
+ OVPN_NLGRP_PEERS,
+};
+
+extern struct genl_family ovpn_nl_family;
+
+#endif /* _LINUX_OVPN_GEN_H */
diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c
new file mode 100644
index 000000000000..bea03913bfb1
--- /dev/null
+++ b/drivers/net/ovpn/netlink.c
@@ -0,0 +1,1258 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/ovpn.h>
+
+#include "ovpnpriv.h"
+#include "main.h"
+#include "netlink.h"
+#include "netlink-gen.h"
+#include "bind.h"
+#include "crypto.h"
+#include "peer.h"
+#include "socket.h"
+
+MODULE_ALIAS_GENL_FAMILY(OVPN_FAMILY_NAME);
+
+/**
+ * ovpn_get_dev_from_attrs - retrieve the ovpn private data from the netdevice
+ * a netlink message is targeting
+ * @net: network namespace where to look for the interface
+ * @info: generic netlink info from the user request
+ * @tracker: tracker object to be used for the netdev reference acquisition
+ *
+ * Return: the ovpn private data, if found, or an error otherwise
+ */
+static struct ovpn_priv *
+ovpn_get_dev_from_attrs(struct net *net, const struct genl_info *info,
+ netdevice_tracker *tracker)
+{
+ struct ovpn_priv *ovpn;
+ struct net_device *dev;
+ int ifindex;
+
+ if (GENL_REQ_ATTR_CHECK(info, OVPN_A_IFINDEX))
+ return ERR_PTR(-EINVAL);
+
+ ifindex = nla_get_u32(info->attrs[OVPN_A_IFINDEX]);
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (!dev) {
+ rcu_read_unlock();
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "ifindex does not match any interface");
+ return ERR_PTR(-ENODEV);
+ }
+
+ if (!ovpn_dev_is_valid(dev)) {
+ rcu_read_unlock();
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "specified interface is not ovpn");
+ NL_SET_BAD_ATTR(info->extack, info->attrs[OVPN_A_IFINDEX]);
+ return ERR_PTR(-EINVAL);
+ }
+
+ ovpn = netdev_priv(dev);
+ netdev_hold(dev, tracker, GFP_ATOMIC);
+ rcu_read_unlock();
+
+ return ovpn;
+}
+
+int ovpn_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info)
+{
+ netdevice_tracker *tracker = (netdevice_tracker *)&info->user_ptr[1];
+ struct ovpn_priv *ovpn = ovpn_get_dev_from_attrs(genl_info_net(info),
+ info, tracker);
+
+ if (IS_ERR(ovpn))
+ return PTR_ERR(ovpn);
+
+ info->user_ptr[0] = ovpn;
+
+ return 0;
+}
+
+void ovpn_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
+ struct genl_info *info)
+{
+ netdevice_tracker *tracker = (netdevice_tracker *)&info->user_ptr[1];
+ struct ovpn_priv *ovpn = info->user_ptr[0];
+
+ if (ovpn)
+ netdev_put(ovpn->dev, tracker);
+}
+
+static bool ovpn_nl_attr_sockaddr_remote(struct nlattr **attrs,
+ struct sockaddr_storage *ss)
+{
+ struct sockaddr_in6 *sin6;
+ struct sockaddr_in *sin;
+ struct in6_addr *in6;
+ __be16 port = 0;
+ __be32 *in;
+
+ ss->ss_family = AF_UNSPEC;
+
+ if (attrs[OVPN_A_PEER_REMOTE_PORT])
+ port = nla_get_be16(attrs[OVPN_A_PEER_REMOTE_PORT]);
+
+ if (attrs[OVPN_A_PEER_REMOTE_IPV4]) {
+ ss->ss_family = AF_INET;
+ in = nla_data(attrs[OVPN_A_PEER_REMOTE_IPV4]);
+ } else if (attrs[OVPN_A_PEER_REMOTE_IPV6]) {
+ ss->ss_family = AF_INET6;
+ in6 = nla_data(attrs[OVPN_A_PEER_REMOTE_IPV6]);
+ } else {
+ return false;
+ }
+
+ switch (ss->ss_family) {
+ case AF_INET6:
+ /* If this is a regular IPv6 just break and move on,
+ * otherwise switch to AF_INET and extract the IPv4 accordingly
+ */
+ if (!ipv6_addr_v4mapped(in6)) {
+ sin6 = (struct sockaddr_in6 *)ss;
+ sin6->sin6_port = port;
+ memcpy(&sin6->sin6_addr, in6, sizeof(*in6));
+ break;
+ }
+
+ /* v4-mapped-v6 address */
+ ss->ss_family = AF_INET;
+ in = &in6->s6_addr32[3];
+ fallthrough;
+ case AF_INET:
+ sin = (struct sockaddr_in *)ss;
+ sin->sin_port = port;
+ sin->sin_addr.s_addr = *in;
+ break;
+ }
+
+ return true;
+}
+
+static u8 *ovpn_nl_attr_local_ip(struct nlattr **attrs)
+{
+ u8 *addr6;
+
+ if (!attrs[OVPN_A_PEER_LOCAL_IPV4] && !attrs[OVPN_A_PEER_LOCAL_IPV6])
+ return NULL;
+
+ if (attrs[OVPN_A_PEER_LOCAL_IPV4])
+ return nla_data(attrs[OVPN_A_PEER_LOCAL_IPV4]);
+
+ addr6 = nla_data(attrs[OVPN_A_PEER_LOCAL_IPV6]);
+ /* this is an IPv4-mapped IPv6 address, therefore extract the actual
+ * v4 address from the last 4 bytes
+ */
+ if (ipv6_addr_v4mapped((struct in6_addr *)addr6))
+ return addr6 + 12;
+
+ return addr6;
+}
+
+static sa_family_t ovpn_nl_family_get(struct nlattr *addr4,
+ struct nlattr *addr6)
+{
+ if (addr4)
+ return AF_INET;
+
+ if (addr6) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)nla_data(addr6)))
+ return AF_INET;
+ return AF_INET6;
+ }
+
+ return AF_UNSPEC;
+}
+
+static int ovpn_nl_peer_precheck(struct ovpn_priv *ovpn,
+ struct genl_info *info,
+ struct nlattr **attrs)
+{
+ sa_family_t local_fam, remote_fam;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs,
+ OVPN_A_PEER_ID))
+ return -EINVAL;
+
+ if (attrs[OVPN_A_PEER_REMOTE_IPV4] && attrs[OVPN_A_PEER_REMOTE_IPV6]) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "cannot specify both remote IPv4 or IPv6 address");
+ return -EINVAL;
+ }
+
+ if (!attrs[OVPN_A_PEER_REMOTE_IPV4] &&
+ !attrs[OVPN_A_PEER_REMOTE_IPV6] && attrs[OVPN_A_PEER_REMOTE_PORT]) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "cannot specify remote port without IP address");
+ return -EINVAL;
+ }
+
+ if ((attrs[OVPN_A_PEER_REMOTE_IPV4] ||
+ attrs[OVPN_A_PEER_REMOTE_IPV6]) &&
+ !attrs[OVPN_A_PEER_REMOTE_PORT]) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "cannot specify remote IP address without port");
+ return -EINVAL;
+ }
+
+ if (!attrs[OVPN_A_PEER_REMOTE_IPV4] &&
+ attrs[OVPN_A_PEER_LOCAL_IPV4]) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "cannot specify local IPv4 address without remote");
+ return -EINVAL;
+ }
+
+ if (!attrs[OVPN_A_PEER_REMOTE_IPV6] &&
+ attrs[OVPN_A_PEER_LOCAL_IPV6]) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "cannot specify local IPV6 address without remote");
+ return -EINVAL;
+ }
+
+ /* check that local and remote address families are the same even
+ * after parsing v4mapped IPv6 addresses.
+ * (if addresses are not provided, family will be AF_UNSPEC and
+ * the check is skipped)
+ */
+ local_fam = ovpn_nl_family_get(attrs[OVPN_A_PEER_LOCAL_IPV4],
+ attrs[OVPN_A_PEER_LOCAL_IPV6]);
+ remote_fam = ovpn_nl_family_get(attrs[OVPN_A_PEER_REMOTE_IPV4],
+ attrs[OVPN_A_PEER_REMOTE_IPV6]);
+ if (local_fam != AF_UNSPEC && remote_fam != AF_UNSPEC &&
+ local_fam != remote_fam) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "mismatching local and remote address families");
+ return -EINVAL;
+ }
+
+ if (remote_fam != AF_INET6 && attrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "cannot specify scope id without remote IPv6 address");
+ return -EINVAL;
+ }
+
+ /* VPN IPs are needed only in MP mode for selecting the right peer */
+ if (ovpn->mode == OVPN_MODE_P2P && (attrs[OVPN_A_PEER_VPN_IPV4] ||
+ attrs[OVPN_A_PEER_VPN_IPV6])) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "unexpected VPN IP in P2P mode");
+ return -EINVAL;
+ }
+
+ if ((attrs[OVPN_A_PEER_KEEPALIVE_INTERVAL] &&
+ !attrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]) ||
+ (!attrs[OVPN_A_PEER_KEEPALIVE_INTERVAL] &&
+ attrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "keepalive interval and timeout are required together");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ovpn_nl_peer_modify - modify the peer attributes according to the incoming msg
+ * @peer: the peer to modify
+ * @info: generic netlink info from the user request
+ * @attrs: the attributes from the user request
+ *
+ * Return: a negative error code in case of failure, 0 on success or 1 on
+ * success and the VPN IPs have been modified (requires rehashing in MP
+ * mode)
+ */
+static int ovpn_nl_peer_modify(struct ovpn_peer *peer, struct genl_info *info,
+ struct nlattr **attrs)
+{
+ struct sockaddr_storage ss = {};
+ void *local_ip = NULL;
+ u32 interv, timeout;
+ bool rehash = false;
+ int ret;
+
+ spin_lock_bh(&peer->lock);
+
+ if (ovpn_nl_attr_sockaddr_remote(attrs, &ss)) {
+ /* we carry the local IP in a generic container.
+ * ovpn_peer_reset_sockaddr() will properly interpret it
+ * based on ss.ss_family
+ */
+ local_ip = ovpn_nl_attr_local_ip(attrs);
+
+ /* set peer sockaddr */
+ ret = ovpn_peer_reset_sockaddr(peer, &ss, local_ip);
+ if (ret < 0) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "cannot set peer sockaddr: %d",
+ ret);
+ goto err_unlock;
+ }
+ dst_cache_reset(&peer->dst_cache);
+ }
+
+ if (attrs[OVPN_A_PEER_VPN_IPV4]) {
+ rehash = true;
+ peer->vpn_addrs.ipv4.s_addr =
+ nla_get_in_addr(attrs[OVPN_A_PEER_VPN_IPV4]);
+ }
+
+ if (attrs[OVPN_A_PEER_VPN_IPV6]) {
+ rehash = true;
+ peer->vpn_addrs.ipv6 =
+ nla_get_in6_addr(attrs[OVPN_A_PEER_VPN_IPV6]);
+ }
+
+ /* when setting the keepalive, both parameters have to be configured */
+ if (attrs[OVPN_A_PEER_KEEPALIVE_INTERVAL] &&
+ attrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]) {
+ interv = nla_get_u32(attrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]);
+ timeout = nla_get_u32(attrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]);
+ ovpn_peer_keepalive_set(peer, interv, timeout);
+ }
+
+ netdev_dbg(peer->ovpn->dev,
+ "modify peer id=%u endpoint=%pIScp VPN-IPv4=%pI4 VPN-IPv6=%pI6c\n",
+ peer->id, &ss,
+ &peer->vpn_addrs.ipv4.s_addr, &peer->vpn_addrs.ipv6);
+
+ spin_unlock_bh(&peer->lock);
+
+ return rehash ? 1 : 0;
+err_unlock:
+ spin_unlock_bh(&peer->lock);
+ return ret;
+}
+
+int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attrs[OVPN_A_PEER_MAX + 1];
+ struct ovpn_priv *ovpn = info->user_ptr[0];
+ struct ovpn_socket *ovpn_sock;
+ struct socket *sock = NULL;
+ struct ovpn_peer *peer;
+ u32 sockfd, peer_id;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER))
+ return -EINVAL;
+
+ ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER],
+ ovpn_peer_nl_policy, info->extack);
+ if (ret)
+ return ret;
+
+ ret = ovpn_nl_peer_precheck(ovpn, info, attrs);
+ if (ret < 0)
+ return ret;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs,
+ OVPN_A_PEER_SOCKET))
+ return -EINVAL;
+
+ /* in MP mode VPN IPs are required for selecting the right peer */
+ if (ovpn->mode == OVPN_MODE_MP && !attrs[OVPN_A_PEER_VPN_IPV4] &&
+ !attrs[OVPN_A_PEER_VPN_IPV6]) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "VPN IP must be provided in MP mode");
+ return -EINVAL;
+ }
+
+ peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]);
+ peer = ovpn_peer_new(ovpn, peer_id);
+ if (IS_ERR(peer)) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "cannot create new peer object for peer %u: %ld",
+ peer_id, PTR_ERR(peer));
+ return PTR_ERR(peer);
+ }
+
+ /* lookup the fd in the kernel table and extract the socket object */
+ sockfd = nla_get_u32(attrs[OVPN_A_PEER_SOCKET]);
+ /* sockfd_lookup() increases sock's refcounter */
+ sock = sockfd_lookup(sockfd, &ret);
+ if (!sock) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "cannot lookup peer socket (fd=%u): %d",
+ sockfd, ret);
+ ret = -ENOTSOCK;
+ goto peer_release;
+ }
+
+ /* Only when using UDP as transport protocol the remote endpoint
+ * can be configured so that ovpn knows where to send packets to.
+ */
+ if (sock->sk->sk_protocol == IPPROTO_UDP &&
+ !attrs[OVPN_A_PEER_REMOTE_IPV4] &&
+ !attrs[OVPN_A_PEER_REMOTE_IPV6]) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "missing remote IP address for UDP socket");
+ sockfd_put(sock);
+ ret = -EINVAL;
+ goto peer_release;
+ }
+
+ /* In case of TCP, the socket is connected to the peer and ovpn
+ * will just send bytes over it, without the need to specify a
+ * destination.
+ */
+ if (sock->sk->sk_protocol == IPPROTO_TCP &&
+ (attrs[OVPN_A_PEER_REMOTE_IPV4] ||
+ attrs[OVPN_A_PEER_REMOTE_IPV6])) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "unexpected remote IP address with TCP socket");
+ sockfd_put(sock);
+ ret = -EINVAL;
+ goto peer_release;
+ }
+
+ ovpn_sock = ovpn_socket_new(sock, peer);
+ /* at this point we unconditionally drop the reference to the socket:
+ * - in case of error, the socket has to be dropped
+ * - if case of success, the socket is configured and let
+ * userspace own the reference, so that the latter can
+ * trigger the final close()
+ */
+ sockfd_put(sock);
+ if (IS_ERR(ovpn_sock)) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "cannot encapsulate socket: %ld",
+ PTR_ERR(ovpn_sock));
+ ret = -ENOTSOCK;
+ goto peer_release;
+ }
+
+ rcu_assign_pointer(peer->sock, ovpn_sock);
+
+ ret = ovpn_nl_peer_modify(peer, info, attrs);
+ if (ret < 0)
+ goto sock_release;
+
+ ret = ovpn_peer_add(ovpn, peer);
+ if (ret < 0) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "cannot add new peer (id=%u) to hashtable: %d",
+ peer->id, ret);
+ goto sock_release;
+ }
+
+ return 0;
+
+sock_release:
+ ovpn_socket_release(peer);
+peer_release:
+ /* release right away because peer was not yet hashed, thus it is not
+ * used in any context
+ */
+ ovpn_peer_release(peer);
+
+ return ret;
+}
+
+int ovpn_nl_peer_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attrs[OVPN_A_PEER_MAX + 1];
+ struct ovpn_priv *ovpn = info->user_ptr[0];
+ struct ovpn_socket *sock;
+ struct ovpn_peer *peer;
+ u32 peer_id;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER))
+ return -EINVAL;
+
+ ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER],
+ ovpn_peer_nl_policy, info->extack);
+ if (ret)
+ return ret;
+
+ ret = ovpn_nl_peer_precheck(ovpn, info, attrs);
+ if (ret < 0)
+ return ret;
+
+ if (attrs[OVPN_A_PEER_SOCKET]) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "socket cannot be modified");
+ return -EINVAL;
+ }
+
+ peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]);
+ peer = ovpn_peer_get_by_id(ovpn, peer_id);
+ if (!peer) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "cannot find peer with id %u", peer_id);
+ return -ENOENT;
+ }
+
+ /* when using a TCP socket the remote IP is not expected */
+ rcu_read_lock();
+ sock = rcu_dereference(peer->sock);
+ if (sock && sock->sock->sk->sk_protocol == IPPROTO_TCP &&
+ (attrs[OVPN_A_PEER_REMOTE_IPV4] ||
+ attrs[OVPN_A_PEER_REMOTE_IPV6])) {
+ rcu_read_unlock();
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "unexpected remote IP address with TCP socket");
+ ovpn_peer_put(peer);
+ return -EINVAL;
+ }
+ rcu_read_unlock();
+
+ spin_lock_bh(&ovpn->lock);
+ ret = ovpn_nl_peer_modify(peer, info, attrs);
+ if (ret < 0) {
+ spin_unlock_bh(&ovpn->lock);
+ ovpn_peer_put(peer);
+ return ret;
+ }
+
+ /* ret == 1 means that VPN IPv4/6 has been modified and rehashing
+ * is required
+ */
+ if (ret > 0)
+ ovpn_peer_hash_vpn_ip(peer);
+ spin_unlock_bh(&ovpn->lock);
+ ovpn_peer_put(peer);
+
+ return 0;
+}
+
+static int ovpn_nl_send_peer(struct sk_buff *skb, const struct genl_info *info,
+ const struct ovpn_peer *peer, u32 portid, u32 seq,
+ int flags)
+{
+ const struct ovpn_bind *bind;
+ struct ovpn_socket *sock;
+ int ret = -EMSGSIZE;
+ struct nlattr *attr;
+ __be16 local_port;
+ void *hdr;
+ int id;
+
+ hdr = genlmsg_put(skb, portid, seq, &ovpn_nl_family, flags,
+ OVPN_CMD_PEER_GET);
+ if (!hdr)
+ return -ENOBUFS;
+
+ attr = nla_nest_start(skb, OVPN_A_PEER);
+ if (!attr)
+ goto err;
+
+ rcu_read_lock();
+ sock = rcu_dereference(peer->sock);
+ if (!sock) {
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+
+ if (!net_eq(genl_info_net(info), sock_net(sock->sock->sk))) {
+ id = peernet2id_alloc(genl_info_net(info),
+ sock_net(sock->sock->sk),
+ GFP_ATOMIC);
+ if (nla_put_s32(skb, OVPN_A_PEER_SOCKET_NETNSID, id))
+ goto err_unlock;
+ }
+ local_port = inet_sk(sock->sock->sk)->inet_sport;
+ rcu_read_unlock();
+
+ if (nla_put_u32(skb, OVPN_A_PEER_ID, peer->id))
+ goto err;
+
+ if (peer->vpn_addrs.ipv4.s_addr != htonl(INADDR_ANY))
+ if (nla_put_in_addr(skb, OVPN_A_PEER_VPN_IPV4,
+ peer->vpn_addrs.ipv4.s_addr))
+ goto err;
+
+ if (!ipv6_addr_equal(&peer->vpn_addrs.ipv6, &in6addr_any))
+ if (nla_put_in6_addr(skb, OVPN_A_PEER_VPN_IPV6,
+ &peer->vpn_addrs.ipv6))
+ goto err;
+
+ if (nla_put_u32(skb, OVPN_A_PEER_KEEPALIVE_INTERVAL,
+ peer->keepalive_interval) ||
+ nla_put_u32(skb, OVPN_A_PEER_KEEPALIVE_TIMEOUT,
+ peer->keepalive_timeout))
+ goto err;
+
+ rcu_read_lock();
+ bind = rcu_dereference(peer->bind);
+ if (bind) {
+ if (bind->remote.in4.sin_family == AF_INET) {
+ if (nla_put_in_addr(skb, OVPN_A_PEER_REMOTE_IPV4,
+ bind->remote.in4.sin_addr.s_addr) ||
+ nla_put_net16(skb, OVPN_A_PEER_REMOTE_PORT,
+ bind->remote.in4.sin_port) ||
+ nla_put_in_addr(skb, OVPN_A_PEER_LOCAL_IPV4,
+ bind->local.ipv4.s_addr))
+ goto err_unlock;
+ } else if (bind->remote.in4.sin_family == AF_INET6) {
+ if (nla_put_in6_addr(skb, OVPN_A_PEER_REMOTE_IPV6,
+ &bind->remote.in6.sin6_addr) ||
+ nla_put_u32(skb, OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID,
+ bind->remote.in6.sin6_scope_id) ||
+ nla_put_net16(skb, OVPN_A_PEER_REMOTE_PORT,
+ bind->remote.in6.sin6_port) ||
+ nla_put_in6_addr(skb, OVPN_A_PEER_LOCAL_IPV6,
+ &bind->local.ipv6))
+ goto err_unlock;
+ }
+ }
+ rcu_read_unlock();
+
+ if (nla_put_net16(skb, OVPN_A_PEER_LOCAL_PORT, local_port) ||
+ /* VPN RX stats */
+ nla_put_uint(skb, OVPN_A_PEER_VPN_RX_BYTES,
+ atomic64_read(&peer->vpn_stats.rx.bytes)) ||
+ nla_put_uint(skb, OVPN_A_PEER_VPN_RX_PACKETS,
+ atomic64_read(&peer->vpn_stats.rx.packets)) ||
+ /* VPN TX stats */
+ nla_put_uint(skb, OVPN_A_PEER_VPN_TX_BYTES,
+ atomic64_read(&peer->vpn_stats.tx.bytes)) ||
+ nla_put_uint(skb, OVPN_A_PEER_VPN_TX_PACKETS,
+ atomic64_read(&peer->vpn_stats.tx.packets)) ||
+ /* link RX stats */
+ nla_put_uint(skb, OVPN_A_PEER_LINK_RX_BYTES,
+ atomic64_read(&peer->link_stats.rx.bytes)) ||
+ nla_put_uint(skb, OVPN_A_PEER_LINK_RX_PACKETS,
+ atomic64_read(&peer->link_stats.rx.packets)) ||
+ /* link TX stats */
+ nla_put_uint(skb, OVPN_A_PEER_LINK_TX_BYTES,
+ atomic64_read(&peer->link_stats.tx.bytes)) ||
+ nla_put_uint(skb, OVPN_A_PEER_LINK_TX_PACKETS,
+ atomic64_read(&peer->link_stats.tx.packets)))
+ goto err;
+
+ nla_nest_end(skb, attr);
+ genlmsg_end(skb, hdr);
+
+ return 0;
+err_unlock:
+ rcu_read_unlock();
+err:
+ genlmsg_cancel(skb, hdr);
+ return ret;
+}
+
+int ovpn_nl_peer_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attrs[OVPN_A_PEER_MAX + 1];
+ struct ovpn_priv *ovpn = info->user_ptr[0];
+ struct ovpn_peer *peer;
+ struct sk_buff *msg;
+ u32 peer_id;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER))
+ return -EINVAL;
+
+ ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER],
+ ovpn_peer_nl_policy, info->extack);
+ if (ret)
+ return ret;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs,
+ OVPN_A_PEER_ID))
+ return -EINVAL;
+
+ peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]);
+ peer = ovpn_peer_get_by_id(ovpn, peer_id);
+ if (!peer) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "cannot find peer with id %u", peer_id);
+ return -ENOENT;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = ovpn_nl_send_peer(msg, info, peer, info->snd_portid,
+ info->snd_seq, 0);
+ if (ret < 0) {
+ nlmsg_free(msg);
+ goto err;
+ }
+
+ ret = genlmsg_reply(msg, info);
+err:
+ ovpn_peer_put(peer);
+ return ret;
+}
+
+int ovpn_nl_peer_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ const struct genl_info *info = genl_info_dump(cb);
+ int bkt, last_idx = cb->args[1], dumped = 0;
+ netdevice_tracker tracker;
+ struct ovpn_priv *ovpn;
+ struct ovpn_peer *peer;
+
+ ovpn = ovpn_get_dev_from_attrs(sock_net(cb->skb->sk), info, &tracker);
+ if (IS_ERR(ovpn))
+ return PTR_ERR(ovpn);
+
+ if (ovpn->mode == OVPN_MODE_P2P) {
+ /* if we already dumped a peer it means we are done */
+ if (last_idx)
+ goto out;
+
+ rcu_read_lock();
+ peer = rcu_dereference(ovpn->peer);
+ if (peer) {
+ if (ovpn_nl_send_peer(skb, info, peer,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI) == 0)
+ dumped++;
+ }
+ rcu_read_unlock();
+ } else {
+ rcu_read_lock();
+ hash_for_each_rcu(ovpn->peers->by_id, bkt, peer,
+ hash_entry_id) {
+ /* skip already dumped peers that were dumped by
+ * previous invocations
+ */
+ if (last_idx > 0) {
+ last_idx--;
+ continue;
+ }
+
+ if (ovpn_nl_send_peer(skb, info, peer,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI) < 0)
+ break;
+
+ /* count peers being dumped during this invocation */
+ dumped++;
+ }
+ rcu_read_unlock();
+ }
+
+out:
+ netdev_put(ovpn->dev, &tracker);
+
+ /* sum up peers dumped in this message, so that at the next invocation
+ * we can continue from where we left
+ */
+ cb->args[1] += dumped;
+ return skb->len;
+}
+
+int ovpn_nl_peer_del_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attrs[OVPN_A_PEER_MAX + 1];
+ struct ovpn_priv *ovpn = info->user_ptr[0];
+ struct ovpn_peer *peer;
+ u32 peer_id;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER))
+ return -EINVAL;
+
+ ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER],
+ ovpn_peer_nl_policy, info->extack);
+ if (ret)
+ return ret;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs,
+ OVPN_A_PEER_ID))
+ return -EINVAL;
+
+ peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]);
+ peer = ovpn_peer_get_by_id(ovpn, peer_id);
+ if (!peer) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "cannot find peer with id %u", peer_id);
+ return -ENOENT;
+ }
+
+ netdev_dbg(ovpn->dev, "del peer %u\n", peer->id);
+ ret = ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_USERSPACE);
+ ovpn_peer_put(peer);
+
+ return ret;
+}
+
+static int ovpn_nl_get_key_dir(struct genl_info *info, struct nlattr *key,
+ enum ovpn_cipher_alg cipher,
+ struct ovpn_key_direction *dir)
+{
+ struct nlattr *attrs[OVPN_A_KEYDIR_MAX + 1];
+ int ret;
+
+ ret = nla_parse_nested(attrs, OVPN_A_KEYDIR_MAX, key,
+ ovpn_keydir_nl_policy, info->extack);
+ if (ret)
+ return ret;
+
+ switch (cipher) {
+ case OVPN_CIPHER_ALG_AES_GCM:
+ case OVPN_CIPHER_ALG_CHACHA20_POLY1305:
+ if (NL_REQ_ATTR_CHECK(info->extack, key, attrs,
+ OVPN_A_KEYDIR_CIPHER_KEY) ||
+ NL_REQ_ATTR_CHECK(info->extack, key, attrs,
+ OVPN_A_KEYDIR_NONCE_TAIL))
+ return -EINVAL;
+
+ dir->cipher_key = nla_data(attrs[OVPN_A_KEYDIR_CIPHER_KEY]);
+ dir->cipher_key_size = nla_len(attrs[OVPN_A_KEYDIR_CIPHER_KEY]);
+
+ /* These algorithms require a 96bit nonce,
+ * Construct it by combining 4-bytes packet id and
+ * 8-bytes nonce-tail from userspace
+ */
+ dir->nonce_tail = nla_data(attrs[OVPN_A_KEYDIR_NONCE_TAIL]);
+ dir->nonce_tail_size = nla_len(attrs[OVPN_A_KEYDIR_NONCE_TAIL]);
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(info->extack, "unsupported cipher");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * ovpn_nl_key_new_doit - configure a new key for the specified peer
+ * @skb: incoming netlink message
+ * @info: genetlink metadata
+ *
+ * This function allows the user to install a new key in the peer crypto
+ * state.
+ * Each peer has two 'slots', namely 'primary' and 'secondary', where
+ * keys can be installed. The key in the 'primary' slot is used for
+ * encryption, while both keys can be used for decryption by matching the
+ * key ID carried in the incoming packet.
+ *
+ * The user is responsible for rotating keys when necessary. The user
+ * may fetch peer traffic statistics via netlink in order to better
+ * identify the right time to rotate keys.
+ * The renegotiation follows these steps:
+ * 1. a new key is computed by the user and is installed in the 'secondary'
+ * slot
+ * 2. at user discretion (usually after a predetermined time) 'primary' and
+ * 'secondary' contents are swapped and the new key starts being used for
+ * encryption, while the old key is kept around for decryption of late
+ * packets.
+ *
+ * Return: 0 on success or a negative error code otherwise.
+ */
+int ovpn_nl_key_new_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attrs[OVPN_A_KEYCONF_MAX + 1];
+ struct ovpn_priv *ovpn = info->user_ptr[0];
+ struct ovpn_peer_key_reset pkr;
+ struct ovpn_peer *peer;
+ u32 peer_id;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, OVPN_A_KEYCONF))
+ return -EINVAL;
+
+ ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX,
+ info->attrs[OVPN_A_KEYCONF],
+ ovpn_keyconf_nl_policy, info->extack);
+ if (ret)
+ return ret;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs,
+ OVPN_A_KEYCONF_PEER_ID))
+ return -EINVAL;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs,
+ OVPN_A_KEYCONF_SLOT) ||
+ NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs,
+ OVPN_A_KEYCONF_KEY_ID) ||
+ NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs,
+ OVPN_A_KEYCONF_CIPHER_ALG) ||
+ NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs,
+ OVPN_A_KEYCONF_ENCRYPT_DIR) ||
+ NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs,
+ OVPN_A_KEYCONF_DECRYPT_DIR))
+ return -EINVAL;
+
+ pkr.slot = nla_get_u32(attrs[OVPN_A_KEYCONF_SLOT]);
+ pkr.key.key_id = nla_get_u32(attrs[OVPN_A_KEYCONF_KEY_ID]);
+ pkr.key.cipher_alg = nla_get_u32(attrs[OVPN_A_KEYCONF_CIPHER_ALG]);
+
+ ret = ovpn_nl_get_key_dir(info, attrs[OVPN_A_KEYCONF_ENCRYPT_DIR],
+ pkr.key.cipher_alg, &pkr.key.encrypt);
+ if (ret < 0)
+ return ret;
+
+ ret = ovpn_nl_get_key_dir(info, attrs[OVPN_A_KEYCONF_DECRYPT_DIR],
+ pkr.key.cipher_alg, &pkr.key.decrypt);
+ if (ret < 0)
+ return ret;
+
+ peer_id = nla_get_u32(attrs[OVPN_A_KEYCONF_PEER_ID]);
+ peer = ovpn_peer_get_by_id(ovpn, peer_id);
+ if (!peer) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "no peer with id %u to set key for",
+ peer_id);
+ return -ENOENT;
+ }
+
+ ret = ovpn_crypto_state_reset(&peer->crypto, &pkr);
+ if (ret < 0) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "cannot install new key for peer %u",
+ peer_id);
+ goto out;
+ }
+
+ netdev_dbg(ovpn->dev, "new key installed (id=%u) for peer %u\n",
+ pkr.key.key_id, peer_id);
+out:
+ ovpn_peer_put(peer);
+ return ret;
+}
+
+static int ovpn_nl_send_key(struct sk_buff *skb, const struct genl_info *info,
+ u32 peer_id, enum ovpn_key_slot slot,
+ const struct ovpn_key_config *keyconf)
+{
+ struct nlattr *attr;
+ void *hdr;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq, &ovpn_nl_family,
+ 0, OVPN_CMD_KEY_GET);
+ if (!hdr)
+ return -ENOBUFS;
+
+ attr = nla_nest_start(skb, OVPN_A_KEYCONF);
+ if (!attr)
+ goto err;
+
+ if (nla_put_u32(skb, OVPN_A_KEYCONF_PEER_ID, peer_id))
+ goto err;
+
+ if (nla_put_u32(skb, OVPN_A_KEYCONF_SLOT, slot) ||
+ nla_put_u32(skb, OVPN_A_KEYCONF_KEY_ID, keyconf->key_id) ||
+ nla_put_u32(skb, OVPN_A_KEYCONF_CIPHER_ALG, keyconf->cipher_alg))
+ goto err;
+
+ nla_nest_end(skb, attr);
+ genlmsg_end(skb, hdr);
+
+ return 0;
+err:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+int ovpn_nl_key_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attrs[OVPN_A_KEYCONF_MAX + 1];
+ struct ovpn_priv *ovpn = info->user_ptr[0];
+ struct ovpn_key_config keyconf = { 0 };
+ enum ovpn_key_slot slot;
+ struct ovpn_peer *peer;
+ struct sk_buff *msg;
+ u32 peer_id;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, OVPN_A_KEYCONF))
+ return -EINVAL;
+
+ ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX,
+ info->attrs[OVPN_A_KEYCONF],
+ ovpn_keyconf_nl_policy, info->extack);
+ if (ret)
+ return ret;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs,
+ OVPN_A_KEYCONF_PEER_ID))
+ return -EINVAL;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs,
+ OVPN_A_KEYCONF_SLOT))
+ return -EINVAL;
+
+ peer_id = nla_get_u32(attrs[OVPN_A_KEYCONF_PEER_ID]);
+ peer = ovpn_peer_get_by_id(ovpn, peer_id);
+ if (!peer) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "cannot find peer with id %u", peer_id);
+ return -ENOENT;
+ }
+
+ slot = nla_get_u32(attrs[OVPN_A_KEYCONF_SLOT]);
+
+ ret = ovpn_crypto_config_get(&peer->crypto, slot, &keyconf);
+ if (ret < 0) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "cannot extract key from slot %u for peer %u",
+ slot, peer_id);
+ goto err;
+ }
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = ovpn_nl_send_key(msg, info, peer->id, slot, &keyconf);
+ if (ret < 0) {
+ nlmsg_free(msg);
+ goto err;
+ }
+
+ ret = genlmsg_reply(msg, info);
+err:
+ ovpn_peer_put(peer);
+ return ret;
+}
+
+int ovpn_nl_key_swap_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ovpn_priv *ovpn = info->user_ptr[0];
+ struct nlattr *attrs[OVPN_A_PEER_MAX + 1];
+ struct ovpn_peer *peer;
+ u32 peer_id;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, OVPN_A_KEYCONF))
+ return -EINVAL;
+
+ ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX,
+ info->attrs[OVPN_A_KEYCONF],
+ ovpn_keyconf_nl_policy, info->extack);
+ if (ret)
+ return ret;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs,
+ OVPN_A_KEYCONF_PEER_ID))
+ return -EINVAL;
+
+ peer_id = nla_get_u32(attrs[OVPN_A_KEYCONF_PEER_ID]);
+ peer = ovpn_peer_get_by_id(ovpn, peer_id);
+ if (!peer) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "no peer with id %u to swap keys for",
+ peer_id);
+ return -ENOENT;
+ }
+
+ ovpn_crypto_key_slots_swap(&peer->crypto);
+ ovpn_peer_put(peer);
+
+ return 0;
+}
+
+int ovpn_nl_key_del_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *attrs[OVPN_A_KEYCONF_MAX + 1];
+ struct ovpn_priv *ovpn = info->user_ptr[0];
+ enum ovpn_key_slot slot;
+ struct ovpn_peer *peer;
+ u32 peer_id;
+ int ret;
+
+ if (GENL_REQ_ATTR_CHECK(info, OVPN_A_KEYCONF))
+ return -EINVAL;
+
+ ret = nla_parse_nested(attrs, OVPN_A_KEYCONF_MAX,
+ info->attrs[OVPN_A_KEYCONF],
+ ovpn_keyconf_nl_policy, info->extack);
+ if (ret)
+ return ret;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs,
+ OVPN_A_KEYCONF_PEER_ID))
+ return -EINVAL;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_KEYCONF], attrs,
+ OVPN_A_KEYCONF_SLOT))
+ return -EINVAL;
+
+ peer_id = nla_get_u32(attrs[OVPN_A_KEYCONF_PEER_ID]);
+ slot = nla_get_u32(attrs[OVPN_A_KEYCONF_SLOT]);
+
+ peer = ovpn_peer_get_by_id(ovpn, peer_id);
+ if (!peer) {
+ NL_SET_ERR_MSG_FMT_MOD(info->extack,
+ "no peer with id %u to delete key for",
+ peer_id);
+ return -ENOENT;
+ }
+
+ ovpn_crypto_key_slot_delete(&peer->crypto, slot);
+ ovpn_peer_put(peer);
+
+ return 0;
+}
+
+/**
+ * ovpn_nl_peer_del_notify - notify userspace about peer being deleted
+ * @peer: the peer being deleted
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+int ovpn_nl_peer_del_notify(struct ovpn_peer *peer)
+{
+ struct ovpn_socket *sock;
+ struct sk_buff *msg;
+ struct nlattr *attr;
+ int ret = -EMSGSIZE;
+ void *hdr;
+
+ netdev_info(peer->ovpn->dev, "deleting peer with id %u, reason %d\n",
+ peer->id, peer->delete_reason);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, 0, 0, &ovpn_nl_family, 0, OVPN_CMD_PEER_DEL_NTF);
+ if (!hdr) {
+ ret = -ENOBUFS;
+ goto err_free_msg;
+ }
+
+ if (nla_put_u32(msg, OVPN_A_IFINDEX, peer->ovpn->dev->ifindex))
+ goto err_cancel_msg;
+
+ attr = nla_nest_start(msg, OVPN_A_PEER);
+ if (!attr)
+ goto err_cancel_msg;
+
+ if (nla_put_u32(msg, OVPN_A_PEER_DEL_REASON, peer->delete_reason))
+ goto err_cancel_msg;
+
+ if (nla_put_u32(msg, OVPN_A_PEER_ID, peer->id))
+ goto err_cancel_msg;
+
+ nla_nest_end(msg, attr);
+
+ genlmsg_end(msg, hdr);
+
+ rcu_read_lock();
+ sock = rcu_dereference(peer->sock);
+ if (!sock) {
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+ genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sock->sk),
+ msg, 0, OVPN_NLGRP_PEERS, GFP_ATOMIC);
+ rcu_read_unlock();
+
+ return 0;
+
+err_unlock:
+ rcu_read_unlock();
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+err_free_msg:
+ nlmsg_free(msg);
+ return ret;
+}
+
+/**
+ * ovpn_nl_key_swap_notify - notify userspace peer's key must be renewed
+ * @peer: the peer whose key needs to be renewed
+ * @key_id: the ID of the key that needs to be renewed
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+int ovpn_nl_key_swap_notify(struct ovpn_peer *peer, u8 key_id)
+{
+ struct ovpn_socket *sock;
+ struct nlattr *k_attr;
+ struct sk_buff *msg;
+ int ret = -EMSGSIZE;
+ void *hdr;
+
+ netdev_info(peer->ovpn->dev, "peer with id %u must rekey - primary key unusable.\n",
+ peer->id);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, 0, 0, &ovpn_nl_family, 0, OVPN_CMD_KEY_SWAP_NTF);
+ if (!hdr) {
+ ret = -ENOBUFS;
+ goto err_free_msg;
+ }
+
+ if (nla_put_u32(msg, OVPN_A_IFINDEX, peer->ovpn->dev->ifindex))
+ goto err_cancel_msg;
+
+ k_attr = nla_nest_start(msg, OVPN_A_KEYCONF);
+ if (!k_attr)
+ goto err_cancel_msg;
+
+ if (nla_put_u32(msg, OVPN_A_KEYCONF_PEER_ID, peer->id))
+ goto err_cancel_msg;
+
+ if (nla_put_u16(msg, OVPN_A_KEYCONF_KEY_ID, key_id))
+ goto err_cancel_msg;
+
+ nla_nest_end(msg, k_attr);
+ genlmsg_end(msg, hdr);
+
+ rcu_read_lock();
+ sock = rcu_dereference(peer->sock);
+ if (!sock) {
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+ genlmsg_multicast_netns(&ovpn_nl_family, sock_net(sock->sock->sk),
+ msg, 0, OVPN_NLGRP_PEERS, GFP_ATOMIC);
+ rcu_read_unlock();
+
+ return 0;
+err_unlock:
+ rcu_read_unlock();
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+err_free_msg:
+ nlmsg_free(msg);
+ return ret;
+}
+
+/**
+ * ovpn_nl_register - perform any needed registration in the NL subsustem
+ *
+ * Return: 0 on success, a negative error code otherwise
+ */
+int __init ovpn_nl_register(void)
+{
+ int ret = genl_register_family(&ovpn_nl_family);
+
+ if (ret) {
+ pr_err("ovpn: genl_register_family failed: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * ovpn_nl_unregister - undo any module wide netlink registration
+ */
+void ovpn_nl_unregister(void)
+{
+ genl_unregister_family(&ovpn_nl_family);
+}
diff --git a/drivers/net/ovpn/netlink.h b/drivers/net/ovpn/netlink.h
new file mode 100644
index 000000000000..8615dfc3c472
--- /dev/null
+++ b/drivers/net/ovpn/netlink.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_NETLINK_H_
+#define _NET_OVPN_NETLINK_H_
+
+int ovpn_nl_register(void);
+void ovpn_nl_unregister(void);
+
+int ovpn_nl_peer_del_notify(struct ovpn_peer *peer);
+int ovpn_nl_key_swap_notify(struct ovpn_peer *peer, u8 key_id);
+
+#endif /* _NET_OVPN_NETLINK_H_ */
diff --git a/drivers/net/ovpn/ovpnpriv.h b/drivers/net/ovpn/ovpnpriv.h
new file mode 100644
index 000000000000..5898f6adada7
--- /dev/null
+++ b/drivers/net/ovpn/ovpnpriv.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2019-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_OVPNSTRUCT_H_
+#define _NET_OVPN_OVPNSTRUCT_H_
+
+#include <linux/workqueue.h>
+#include <net/gro_cells.h>
+#include <uapi/linux/if_link.h>
+#include <uapi/linux/ovpn.h>
+
+/**
+ * struct ovpn_peer_collection - container of peers for MultiPeer mode
+ * @by_id: table of peers index by ID
+ * @by_vpn_addr4: table of peers indexed by VPN IPv4 address (items can be
+ * rehashed on the fly due to peer IP change)
+ * @by_vpn_addr6: table of peers indexed by VPN IPv6 address (items can be
+ * rehashed on the fly due to peer IP change)
+ * @by_transp_addr: table of peers indexed by transport address (items can be
+ * rehashed on the fly due to peer IP change)
+ */
+struct ovpn_peer_collection {
+ DECLARE_HASHTABLE(by_id, 12);
+ struct hlist_nulls_head by_vpn_addr4[1 << 12];
+ struct hlist_nulls_head by_vpn_addr6[1 << 12];
+ struct hlist_nulls_head by_transp_addr[1 << 12];
+};
+
+/**
+ * struct ovpn_priv - per ovpn interface state
+ * @dev: the actual netdev representing the tunnel
+ * @mode: device operation mode (i.e. p2p, mp, ..)
+ * @lock: protect this object
+ * @peers: data structures holding multi-peer references
+ * @peer: in P2P mode, this is the only remote peer
+ * @gro_cells: pointer to the Generic Receive Offload cell
+ * @keepalive_work: struct used to schedule keepalive periodic job
+ */
+struct ovpn_priv {
+ struct net_device *dev;
+ enum ovpn_mode mode;
+ spinlock_t lock; /* protect writing to the ovpn_priv object */
+ struct ovpn_peer_collection *peers;
+ struct ovpn_peer __rcu *peer;
+ struct gro_cells gro_cells;
+ struct delayed_work keepalive_work;
+};
+
+#endif /* _NET_OVPN_OVPNSTRUCT_H_ */
diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c
new file mode 100644
index 000000000000..a37f89fffb02
--- /dev/null
+++ b/drivers/net/ovpn/peer.c
@@ -0,0 +1,1365 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <linux/skbuff.h>
+#include <linux/list.h>
+#include <linux/hashtable.h>
+#include <net/ip6_route.h>
+
+#include "ovpnpriv.h"
+#include "bind.h"
+#include "pktid.h"
+#include "crypto.h"
+#include "io.h"
+#include "main.h"
+#include "netlink.h"
+#include "peer.h"
+#include "socket.h"
+
+static void unlock_ovpn(struct ovpn_priv *ovpn,
+ struct llist_head *release_list)
+ __releases(&ovpn->lock)
+{
+ struct ovpn_peer *peer;
+
+ spin_unlock_bh(&ovpn->lock);
+
+ llist_for_each_entry(peer, release_list->first, release_entry) {
+ ovpn_socket_release(peer);
+ ovpn_peer_put(peer);
+ }
+}
+
+/**
+ * ovpn_peer_keepalive_set - configure keepalive values for peer
+ * @peer: the peer to configure
+ * @interval: outgoing keepalive interval
+ * @timeout: incoming keepalive timeout
+ */
+void ovpn_peer_keepalive_set(struct ovpn_peer *peer, u32 interval, u32 timeout)
+{
+ time64_t now = ktime_get_real_seconds();
+
+ netdev_dbg(peer->ovpn->dev,
+ "scheduling keepalive for peer %u: interval=%u timeout=%u\n",
+ peer->id, interval, timeout);
+
+ peer->keepalive_interval = interval;
+ WRITE_ONCE(peer->last_sent, now);
+ peer->keepalive_xmit_exp = now + interval;
+
+ peer->keepalive_timeout = timeout;
+ WRITE_ONCE(peer->last_recv, now);
+ peer->keepalive_recv_exp = now + timeout;
+
+ /* now that interval and timeout have been changed, kick
+ * off the worker so that the next delay can be recomputed
+ */
+ mod_delayed_work(system_wq, &peer->ovpn->keepalive_work, 0);
+}
+
+/**
+ * ovpn_peer_keepalive_send - periodic worker sending keepalive packets
+ * @work: pointer to the work member of the related peer object
+ *
+ * NOTE: the reference to peer is not dropped because it gets inherited
+ * by ovpn_xmit_special()
+ */
+static void ovpn_peer_keepalive_send(struct work_struct *work)
+{
+ struct ovpn_peer *peer = container_of(work, struct ovpn_peer,
+ keepalive_work);
+
+ local_bh_disable();
+ ovpn_xmit_special(peer, ovpn_keepalive_message,
+ sizeof(ovpn_keepalive_message));
+ local_bh_enable();
+}
+
+/**
+ * ovpn_peer_new - allocate and initialize a new peer object
+ * @ovpn: the openvpn instance inside which the peer should be created
+ * @id: the ID assigned to this peer
+ *
+ * Return: a pointer to the new peer on success or an error code otherwise
+ */
+struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id)
+{
+ struct ovpn_peer *peer;
+ int ret;
+
+ /* alloc and init peer object */
+ peer = kzalloc(sizeof(*peer), GFP_KERNEL);
+ if (!peer)
+ return ERR_PTR(-ENOMEM);
+
+ peer->id = id;
+ peer->ovpn = ovpn;
+
+ peer->vpn_addrs.ipv4.s_addr = htonl(INADDR_ANY);
+ peer->vpn_addrs.ipv6 = in6addr_any;
+
+ RCU_INIT_POINTER(peer->bind, NULL);
+ ovpn_crypto_state_init(&peer->crypto);
+ spin_lock_init(&peer->lock);
+ kref_init(&peer->refcount);
+ ovpn_peer_stats_init(&peer->vpn_stats);
+ ovpn_peer_stats_init(&peer->link_stats);
+ INIT_WORK(&peer->keepalive_work, ovpn_peer_keepalive_send);
+
+ ret = dst_cache_init(&peer->dst_cache, GFP_KERNEL);
+ if (ret < 0) {
+ netdev_err(ovpn->dev,
+ "cannot initialize dst cache for peer %u\n",
+ peer->id);
+ kfree(peer);
+ return ERR_PTR(ret);
+ }
+
+ netdev_hold(ovpn->dev, &peer->dev_tracker, GFP_KERNEL);
+
+ return peer;
+}
+
+/**
+ * ovpn_peer_reset_sockaddr - recreate binding for peer
+ * @peer: peer to recreate the binding for
+ * @ss: sockaddr to use as remote endpoint for the binding
+ * @local_ip: local IP for the binding
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+int ovpn_peer_reset_sockaddr(struct ovpn_peer *peer,
+ const struct sockaddr_storage *ss,
+ const void *local_ip)
+{
+ struct ovpn_bind *bind;
+ size_t ip_len;
+
+ lockdep_assert_held(&peer->lock);
+
+ /* create new ovpn_bind object */
+ bind = ovpn_bind_from_sockaddr(ss);
+ if (IS_ERR(bind))
+ return PTR_ERR(bind);
+
+ if (local_ip) {
+ if (ss->ss_family == AF_INET) {
+ ip_len = sizeof(struct in_addr);
+ } else if (ss->ss_family == AF_INET6) {
+ ip_len = sizeof(struct in6_addr);
+ } else {
+ net_dbg_ratelimited("%s: invalid family %u for remote endpoint for peer %u\n",
+ netdev_name(peer->ovpn->dev),
+ ss->ss_family, peer->id);
+ kfree(bind);
+ return -EINVAL;
+ }
+
+ memcpy(&bind->local, local_ip, ip_len);
+ }
+
+ /* set binding */
+ ovpn_bind_reset(peer, bind);
+
+ return 0;
+}
+
+/* variable name __tbl2 needs to be different from __tbl1
+ * in the macro below to avoid confusing clang
+ */
+#define ovpn_get_hash_slot(_tbl, _key, _key_len) ({ \
+ typeof(_tbl) *__tbl2 = &(_tbl); \
+ jhash(_key, _key_len, 0) % HASH_SIZE(*__tbl2); \
+})
+
+#define ovpn_get_hash_head(_tbl, _key, _key_len) ({ \
+ typeof(_tbl) *__tbl1 = &(_tbl); \
+ &(*__tbl1)[ovpn_get_hash_slot(*__tbl1, _key, _key_len)];\
+})
+
+/**
+ * ovpn_peer_endpoints_update - update remote or local endpoint for peer
+ * @peer: peer to update the remote endpoint for
+ * @skb: incoming packet to retrieve the source/destination address from
+ */
+void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb)
+{
+ struct hlist_nulls_head *nhead;
+ struct sockaddr_storage ss;
+ struct sockaddr_in6 *sa6;
+ bool reset_cache = false;
+ struct sockaddr_in *sa;
+ struct ovpn_bind *bind;
+ const void *local_ip;
+ size_t salen = 0;
+
+ spin_lock_bh(&peer->lock);
+ bind = rcu_dereference_protected(peer->bind,
+ lockdep_is_held(&peer->lock));
+ if (unlikely(!bind))
+ goto unlock;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ /* float check */
+ if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) {
+ /* unconditionally save local endpoint in case
+ * of float, as it may have changed as well
+ */
+ local_ip = &ip_hdr(skb)->daddr;
+ sa = (struct sockaddr_in *)&ss;
+ sa->sin_family = AF_INET;
+ sa->sin_addr.s_addr = ip_hdr(skb)->saddr;
+ sa->sin_port = udp_hdr(skb)->source;
+ salen = sizeof(*sa);
+ reset_cache = true;
+ break;
+ }
+
+ /* if no float happened, let's double check if the local endpoint
+ * has changed
+ */
+ if (unlikely(bind->local.ipv4.s_addr != ip_hdr(skb)->daddr)) {
+ net_dbg_ratelimited("%s: learning local IPv4 for peer %d (%pI4 -> %pI4)\n",
+ netdev_name(peer->ovpn->dev),
+ peer->id, &bind->local.ipv4.s_addr,
+ &ip_hdr(skb)->daddr);
+ bind->local.ipv4.s_addr = ip_hdr(skb)->daddr;
+ reset_cache = true;
+ }
+ break;
+ case htons(ETH_P_IPV6):
+ /* float check */
+ if (unlikely(!ovpn_bind_skb_src_match(bind, skb))) {
+ /* unconditionally save local endpoint in case
+ * of float, as it may have changed as well
+ */
+ local_ip = &ipv6_hdr(skb)->daddr;
+ sa6 = (struct sockaddr_in6 *)&ss;
+ sa6->sin6_family = AF_INET6;
+ sa6->sin6_addr = ipv6_hdr(skb)->saddr;
+ sa6->sin6_port = udp_hdr(skb)->source;
+ sa6->sin6_scope_id = ipv6_iface_scope_id(&ipv6_hdr(skb)->saddr,
+ skb->skb_iif);
+ salen = sizeof(*sa6);
+ reset_cache = true;
+ break;
+ }
+
+ /* if no float happened, let's double check if the local endpoint
+ * has changed
+ */
+ if (unlikely(!ipv6_addr_equal(&bind->local.ipv6,
+ &ipv6_hdr(skb)->daddr))) {
+ net_dbg_ratelimited("%s: learning local IPv6 for peer %d (%pI6c -> %pI6c\n",
+ netdev_name(peer->ovpn->dev),
+ peer->id, &bind->local.ipv6,
+ &ipv6_hdr(skb)->daddr);
+ bind->local.ipv6 = ipv6_hdr(skb)->daddr;
+ reset_cache = true;
+ }
+ break;
+ default:
+ goto unlock;
+ }
+
+ if (unlikely(reset_cache))
+ dst_cache_reset(&peer->dst_cache);
+
+ /* if the peer did not float, we can bail out now */
+ if (likely(!salen))
+ goto unlock;
+
+ if (unlikely(ovpn_peer_reset_sockaddr(peer,
+ (struct sockaddr_storage *)&ss,
+ local_ip) < 0))
+ goto unlock;
+
+ net_dbg_ratelimited("%s: peer %d floated to %pIScp",
+ netdev_name(peer->ovpn->dev), peer->id, &ss);
+
+ spin_unlock_bh(&peer->lock);
+
+ /* rehashing is required only in MP mode as P2P has one peer
+ * only and thus there is no hashtable
+ */
+ if (peer->ovpn->mode == OVPN_MODE_MP) {
+ spin_lock_bh(&peer->ovpn->lock);
+ spin_lock_bh(&peer->lock);
+ bind = rcu_dereference_protected(peer->bind,
+ lockdep_is_held(&peer->lock));
+ if (unlikely(!bind)) {
+ spin_unlock_bh(&peer->lock);
+ spin_unlock_bh(&peer->ovpn->lock);
+ return;
+ }
+
+ /* This function may be invoked concurrently, therefore another
+ * float may have happened in parallel: perform rehashing
+ * using the peer->bind->remote directly as key
+ */
+
+ switch (bind->remote.in4.sin_family) {
+ case AF_INET:
+ salen = sizeof(*sa);
+ break;
+ case AF_INET6:
+ salen = sizeof(*sa6);
+ break;
+ }
+
+ /* remove old hashing */
+ hlist_nulls_del_init_rcu(&peer->hash_entry_transp_addr);
+ /* re-add with new transport address */
+ nhead = ovpn_get_hash_head(peer->ovpn->peers->by_transp_addr,
+ &bind->remote, salen);
+ hlist_nulls_add_head_rcu(&peer->hash_entry_transp_addr, nhead);
+ spin_unlock_bh(&peer->lock);
+ spin_unlock_bh(&peer->ovpn->lock);
+ }
+ return;
+unlock:
+ spin_unlock_bh(&peer->lock);
+}
+
+/**
+ * ovpn_peer_release_rcu - RCU callback performing last peer release steps
+ * @head: RCU member of the ovpn_peer
+ */
+static void ovpn_peer_release_rcu(struct rcu_head *head)
+{
+ struct ovpn_peer *peer = container_of(head, struct ovpn_peer, rcu);
+
+ /* this call will immediately free the dst_cache, therefore we
+ * perform it in the RCU callback, when all contexts are done
+ */
+ dst_cache_destroy(&peer->dst_cache);
+ kfree(peer);
+}
+
+/**
+ * ovpn_peer_release - release peer private members
+ * @peer: the peer to release
+ */
+void ovpn_peer_release(struct ovpn_peer *peer)
+{
+ ovpn_crypto_state_release(&peer->crypto);
+ spin_lock_bh(&peer->lock);
+ ovpn_bind_reset(peer, NULL);
+ spin_unlock_bh(&peer->lock);
+ call_rcu(&peer->rcu, ovpn_peer_release_rcu);
+ netdev_put(peer->ovpn->dev, &peer->dev_tracker);
+}
+
+/**
+ * ovpn_peer_release_kref - callback for kref_put
+ * @kref: the kref object belonging to the peer
+ */
+void ovpn_peer_release_kref(struct kref *kref)
+{
+ struct ovpn_peer *peer = container_of(kref, struct ovpn_peer, refcount);
+
+ ovpn_peer_release(peer);
+}
+
+/**
+ * ovpn_peer_skb_to_sockaddr - fill sockaddr with skb source address
+ * @skb: the packet to extract data from
+ * @ss: the sockaddr to fill
+ *
+ * Return: sockaddr length on success or -1 otherwise
+ */
+static int ovpn_peer_skb_to_sockaddr(struct sk_buff *skb,
+ struct sockaddr_storage *ss)
+{
+ struct sockaddr_in6 *sa6;
+ struct sockaddr_in *sa4;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ sa4 = (struct sockaddr_in *)ss;
+ sa4->sin_family = AF_INET;
+ sa4->sin_addr.s_addr = ip_hdr(skb)->saddr;
+ sa4->sin_port = udp_hdr(skb)->source;
+ return sizeof(*sa4);
+ case htons(ETH_P_IPV6):
+ sa6 = (struct sockaddr_in6 *)ss;
+ sa6->sin6_family = AF_INET6;
+ sa6->sin6_addr = ipv6_hdr(skb)->saddr;
+ sa6->sin6_port = udp_hdr(skb)->source;
+ return sizeof(*sa6);
+ }
+
+ return -1;
+}
+
+/**
+ * ovpn_nexthop_from_skb4 - retrieve IPv4 nexthop for outgoing skb
+ * @skb: the outgoing packet
+ *
+ * Return: the IPv4 of the nexthop
+ */
+static __be32 ovpn_nexthop_from_skb4(struct sk_buff *skb)
+{
+ const struct rtable *rt = skb_rtable(skb);
+
+ if (rt && rt->rt_uses_gateway)
+ return rt->rt_gw4;
+
+ return ip_hdr(skb)->daddr;
+}
+
+/**
+ * ovpn_nexthop_from_skb6 - retrieve IPv6 nexthop for outgoing skb
+ * @skb: the outgoing packet
+ *
+ * Return: the IPv6 of the nexthop
+ */
+static struct in6_addr ovpn_nexthop_from_skb6(struct sk_buff *skb)
+{
+ const struct rt6_info *rt = skb_rt6_info(skb);
+
+ if (!rt || !(rt->rt6i_flags & RTF_GATEWAY))
+ return ipv6_hdr(skb)->daddr;
+
+ return rt->rt6i_gateway;
+}
+
+/**
+ * ovpn_peer_get_by_vpn_addr4 - retrieve peer by its VPN IPv4 address
+ * @ovpn: the openvpn instance to search
+ * @addr: VPN IPv4 to use as search key
+ *
+ * Refcounter is not increased for the returned peer.
+ *
+ * Return: the peer if found or NULL otherwise
+ */
+static struct ovpn_peer *ovpn_peer_get_by_vpn_addr4(struct ovpn_priv *ovpn,
+ __be32 addr)
+{
+ struct hlist_nulls_head *nhead;
+ struct hlist_nulls_node *ntmp;
+ struct ovpn_peer *tmp;
+ unsigned int slot;
+
+begin:
+ slot = ovpn_get_hash_slot(ovpn->peers->by_vpn_addr4, &addr,
+ sizeof(addr));
+ nhead = &ovpn->peers->by_vpn_addr4[slot];
+
+ hlist_nulls_for_each_entry_rcu(tmp, ntmp, nhead, hash_entry_addr4)
+ if (addr == tmp->vpn_addrs.ipv4.s_addr)
+ return tmp;
+
+ /* item may have moved during lookup - check nulls and restart
+ * if that's the case
+ */
+ if (get_nulls_value(ntmp) != slot)
+ goto begin;
+
+ return NULL;
+}
+
+/**
+ * ovpn_peer_get_by_vpn_addr6 - retrieve peer by its VPN IPv6 address
+ * @ovpn: the openvpn instance to search
+ * @addr: VPN IPv6 to use as search key
+ *
+ * Refcounter is not increased for the returned peer.
+ *
+ * Return: the peer if found or NULL otherwise
+ */
+static struct ovpn_peer *ovpn_peer_get_by_vpn_addr6(struct ovpn_priv *ovpn,
+ struct in6_addr *addr)
+{
+ struct hlist_nulls_head *nhead;
+ struct hlist_nulls_node *ntmp;
+ struct ovpn_peer *tmp;
+ unsigned int slot;
+
+begin:
+ slot = ovpn_get_hash_slot(ovpn->peers->by_vpn_addr6, addr,
+ sizeof(*addr));
+ nhead = &ovpn->peers->by_vpn_addr6[slot];
+
+ hlist_nulls_for_each_entry_rcu(tmp, ntmp, nhead, hash_entry_addr6)
+ if (ipv6_addr_equal(addr, &tmp->vpn_addrs.ipv6))
+ return tmp;
+
+ /* item may have moved during lookup - check nulls and restart
+ * if that's the case
+ */
+ if (get_nulls_value(ntmp) != slot)
+ goto begin;
+
+ return NULL;
+}
+
+/**
+ * ovpn_peer_transp_match - check if sockaddr and peer binding match
+ * @peer: the peer to get the binding from
+ * @ss: the sockaddr to match
+ *
+ * Return: true if sockaddr and binding match or false otherwise
+ */
+static bool ovpn_peer_transp_match(const struct ovpn_peer *peer,
+ const struct sockaddr_storage *ss)
+{
+ struct ovpn_bind *bind = rcu_dereference(peer->bind);
+ struct sockaddr_in6 *sa6;
+ struct sockaddr_in *sa4;
+
+ if (unlikely(!bind))
+ return false;
+
+ if (ss->ss_family != bind->remote.in4.sin_family)
+ return false;
+
+ switch (ss->ss_family) {
+ case AF_INET:
+ sa4 = (struct sockaddr_in *)ss;
+ if (sa4->sin_addr.s_addr != bind->remote.in4.sin_addr.s_addr)
+ return false;
+ if (sa4->sin_port != bind->remote.in4.sin_port)
+ return false;
+ break;
+ case AF_INET6:
+ sa6 = (struct sockaddr_in6 *)ss;
+ if (!ipv6_addr_equal(&sa6->sin6_addr,
+ &bind->remote.in6.sin6_addr))
+ return false;
+ if (sa6->sin6_port != bind->remote.in6.sin6_port)
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * ovpn_peer_get_by_transp_addr_p2p - get peer by transport address in a P2P
+ * instance
+ * @ovpn: the openvpn instance to search
+ * @ss: the transport socket address
+ *
+ * Return: the peer if found or NULL otherwise
+ */
+static struct ovpn_peer *
+ovpn_peer_get_by_transp_addr_p2p(struct ovpn_priv *ovpn,
+ struct sockaddr_storage *ss)
+{
+ struct ovpn_peer *tmp, *peer = NULL;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(ovpn->peer);
+ if (likely(tmp && ovpn_peer_transp_match(tmp, ss) &&
+ ovpn_peer_hold(tmp)))
+ peer = tmp;
+ rcu_read_unlock();
+
+ return peer;
+}
+
+/**
+ * ovpn_peer_get_by_transp_addr - retrieve peer by transport address
+ * @ovpn: the openvpn instance to search
+ * @skb: the skb to retrieve the source transport address from
+ *
+ * Return: a pointer to the peer if found or NULL otherwise
+ */
+struct ovpn_peer *ovpn_peer_get_by_transp_addr(struct ovpn_priv *ovpn,
+ struct sk_buff *skb)
+{
+ struct ovpn_peer *tmp, *peer = NULL;
+ struct sockaddr_storage ss = { 0 };
+ struct hlist_nulls_head *nhead;
+ struct hlist_nulls_node *ntmp;
+ unsigned int slot;
+ ssize_t sa_len;
+
+ sa_len = ovpn_peer_skb_to_sockaddr(skb, &ss);
+ if (unlikely(sa_len < 0))
+ return NULL;
+
+ if (ovpn->mode == OVPN_MODE_P2P)
+ return ovpn_peer_get_by_transp_addr_p2p(ovpn, &ss);
+
+ rcu_read_lock();
+begin:
+ slot = ovpn_get_hash_slot(ovpn->peers->by_transp_addr, &ss, sa_len);
+ nhead = &ovpn->peers->by_transp_addr[slot];
+
+ hlist_nulls_for_each_entry_rcu(tmp, ntmp, nhead,
+ hash_entry_transp_addr) {
+ if (!ovpn_peer_transp_match(tmp, &ss))
+ continue;
+
+ if (!ovpn_peer_hold(tmp))
+ continue;
+
+ peer = tmp;
+ break;
+ }
+
+ /* item may have moved during lookup - check nulls and restart
+ * if that's the case
+ */
+ if (!peer && get_nulls_value(ntmp) != slot)
+ goto begin;
+ rcu_read_unlock();
+
+ return peer;
+}
+
+/**
+ * ovpn_peer_get_by_id_p2p - get peer by ID in a P2P instance
+ * @ovpn: the openvpn instance to search
+ * @peer_id: the ID of the peer to find
+ *
+ * Return: the peer if found or NULL otherwise
+ */
+static struct ovpn_peer *ovpn_peer_get_by_id_p2p(struct ovpn_priv *ovpn,
+ u32 peer_id)
+{
+ struct ovpn_peer *tmp, *peer = NULL;
+
+ rcu_read_lock();
+ tmp = rcu_dereference(ovpn->peer);
+ if (likely(tmp && tmp->id == peer_id && ovpn_peer_hold(tmp)))
+ peer = tmp;
+ rcu_read_unlock();
+
+ return peer;
+}
+
+/**
+ * ovpn_peer_get_by_id - retrieve peer by ID
+ * @ovpn: the openvpn instance to search
+ * @peer_id: the unique peer identifier to match
+ *
+ * Return: a pointer to the peer if found or NULL otherwise
+ */
+struct ovpn_peer *ovpn_peer_get_by_id(struct ovpn_priv *ovpn, u32 peer_id)
+{
+ struct ovpn_peer *tmp, *peer = NULL;
+ struct hlist_head *head;
+
+ if (ovpn->mode == OVPN_MODE_P2P)
+ return ovpn_peer_get_by_id_p2p(ovpn, peer_id);
+
+ head = ovpn_get_hash_head(ovpn->peers->by_id, &peer_id,
+ sizeof(peer_id));
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tmp, head, hash_entry_id) {
+ if (tmp->id != peer_id)
+ continue;
+
+ if (!ovpn_peer_hold(tmp))
+ continue;
+
+ peer = tmp;
+ break;
+ }
+ rcu_read_unlock();
+
+ return peer;
+}
+
+static void ovpn_peer_remove(struct ovpn_peer *peer,
+ enum ovpn_del_peer_reason reason,
+ struct llist_head *release_list)
+{
+ lockdep_assert_held(&peer->ovpn->lock);
+
+ switch (peer->ovpn->mode) {
+ case OVPN_MODE_MP:
+ /* prevent double remove */
+ if (hlist_unhashed(&peer->hash_entry_id))
+ return;
+
+ hlist_del_init_rcu(&peer->hash_entry_id);
+ hlist_nulls_del_init_rcu(&peer->hash_entry_addr4);
+ hlist_nulls_del_init_rcu(&peer->hash_entry_addr6);
+ hlist_nulls_del_init_rcu(&peer->hash_entry_transp_addr);
+ break;
+ case OVPN_MODE_P2P:
+ /* prevent double remove */
+ if (peer != rcu_access_pointer(peer->ovpn->peer))
+ return;
+
+ RCU_INIT_POINTER(peer->ovpn->peer, NULL);
+ /* in P2P mode the carrier is switched off when the peer is
+ * deleted so that third party protocols can react accordingly
+ */
+ netif_carrier_off(peer->ovpn->dev);
+ break;
+ }
+
+ peer->delete_reason = reason;
+ ovpn_nl_peer_del_notify(peer);
+
+ /* append to provided list for later socket release and ref drop */
+ llist_add(&peer->release_entry, release_list);
+}
+
+/**
+ * ovpn_peer_get_by_dst - Lookup peer to send skb to
+ * @ovpn: the private data representing the current VPN session
+ * @skb: the skb to extract the destination address from
+ *
+ * This function takes a tunnel packet and looks up the peer to send it to
+ * after encapsulation. The skb is expected to be the in-tunnel packet, without
+ * any OpenVPN related header.
+ *
+ * Assume that the IP header is accessible in the skb data.
+ *
+ * Return: the peer if found or NULL otherwise.
+ */
+struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_priv *ovpn,
+ struct sk_buff *skb)
+{
+ struct ovpn_peer *peer = NULL;
+ struct in6_addr addr6;
+ __be32 addr4;
+
+ /* in P2P mode, no matter the destination, packets are always sent to
+ * the single peer listening on the other side
+ */
+ if (ovpn->mode == OVPN_MODE_P2P) {
+ rcu_read_lock();
+ peer = rcu_dereference(ovpn->peer);
+ if (unlikely(peer && !ovpn_peer_hold(peer)))
+ peer = NULL;
+ rcu_read_unlock();
+ return peer;
+ }
+
+ rcu_read_lock();
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ addr4 = ovpn_nexthop_from_skb4(skb);
+ peer = ovpn_peer_get_by_vpn_addr4(ovpn, addr4);
+ break;
+ case htons(ETH_P_IPV6):
+ addr6 = ovpn_nexthop_from_skb6(skb);
+ peer = ovpn_peer_get_by_vpn_addr6(ovpn, &addr6);
+ break;
+ }
+
+ if (unlikely(peer && !ovpn_peer_hold(peer)))
+ peer = NULL;
+ rcu_read_unlock();
+
+ return peer;
+}
+
+/**
+ * ovpn_nexthop_from_rt4 - look up the IPv4 nexthop for the given destination
+ * @ovpn: the private data representing the current VPN session
+ * @dest: the destination to be looked up
+ *
+ * Looks up in the IPv4 system routing table the IP of the nexthop to be used
+ * to reach the destination passed as argument. If no nexthop can be found, the
+ * destination itself is returned as it probably has to be used as nexthop.
+ *
+ * Return: the IP of the next hop if found or dest itself otherwise
+ */
+static __be32 ovpn_nexthop_from_rt4(struct ovpn_priv *ovpn, __be32 dest)
+{
+ struct rtable *rt;
+ struct flowi4 fl = {
+ .daddr = dest
+ };
+
+ rt = ip_route_output_flow(dev_net(ovpn->dev), &fl, NULL);
+ if (IS_ERR(rt)) {
+ net_dbg_ratelimited("%s: no route to host %pI4\n",
+ netdev_name(ovpn->dev), &dest);
+ /* if we end up here this packet is probably going to be
+ * thrown away later
+ */
+ return dest;
+ }
+
+ if (!rt->rt_uses_gateway)
+ goto out;
+
+ dest = rt->rt_gw4;
+out:
+ ip_rt_put(rt);
+ return dest;
+}
+
+/**
+ * ovpn_nexthop_from_rt6 - look up the IPv6 nexthop for the given destination
+ * @ovpn: the private data representing the current VPN session
+ * @dest: the destination to be looked up
+ *
+ * Looks up in the IPv6 system routing table the IP of the nexthop to be used
+ * to reach the destination passed as argument. If no nexthop can be found, the
+ * destination itself is returned as it probably has to be used as nexthop.
+ *
+ * Return: the IP of the next hop if found or dest itself otherwise
+ */
+static struct in6_addr ovpn_nexthop_from_rt6(struct ovpn_priv *ovpn,
+ struct in6_addr dest)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct dst_entry *entry;
+ struct rt6_info *rt;
+ struct flowi6 fl = {
+ .daddr = dest,
+ };
+
+ entry = ipv6_stub->ipv6_dst_lookup_flow(dev_net(ovpn->dev), NULL, &fl,
+ NULL);
+ if (IS_ERR(entry)) {
+ net_dbg_ratelimited("%s: no route to host %pI6c\n",
+ netdev_name(ovpn->dev), &dest);
+ /* if we end up here this packet is probably going to be
+ * thrown away later
+ */
+ return dest;
+ }
+
+ rt = dst_rt6_info(entry);
+
+ if (!(rt->rt6i_flags & RTF_GATEWAY))
+ goto out;
+
+ dest = rt->rt6i_gateway;
+out:
+ dst_release((struct dst_entry *)rt);
+#endif
+ return dest;
+}
+
+/**
+ * ovpn_peer_check_by_src - check that skb source is routed via peer
+ * @ovpn: the openvpn instance to search
+ * @skb: the packet to extract source address from
+ * @peer: the peer to check against the source address
+ *
+ * Return: true if the peer is matching or false otherwise
+ */
+bool ovpn_peer_check_by_src(struct ovpn_priv *ovpn, struct sk_buff *skb,
+ struct ovpn_peer *peer)
+{
+ bool match = false;
+ struct in6_addr addr6;
+ __be32 addr4;
+
+ if (ovpn->mode == OVPN_MODE_P2P) {
+ /* in P2P mode, no matter the destination, packets are always
+ * sent to the single peer listening on the other side
+ */
+ return peer == rcu_access_pointer(ovpn->peer);
+ }
+
+ /* This function performs a reverse path check, therefore we now
+ * lookup the nexthop we would use if we wanted to route a packet
+ * to the source IP. If the nexthop matches the sender we know the
+ * latter is valid and we allow the packet to come in
+ */
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ addr4 = ovpn_nexthop_from_rt4(ovpn, ip_hdr(skb)->saddr);
+ rcu_read_lock();
+ match = (peer == ovpn_peer_get_by_vpn_addr4(ovpn, addr4));
+ rcu_read_unlock();
+ break;
+ case htons(ETH_P_IPV6):
+ addr6 = ovpn_nexthop_from_rt6(ovpn, ipv6_hdr(skb)->saddr);
+ rcu_read_lock();
+ match = (peer == ovpn_peer_get_by_vpn_addr6(ovpn, &addr6));
+ rcu_read_unlock();
+ break;
+ }
+
+ return match;
+}
+
+void ovpn_peer_hash_vpn_ip(struct ovpn_peer *peer)
+{
+ struct hlist_nulls_head *nhead;
+
+ lockdep_assert_held(&peer->ovpn->lock);
+
+ /* rehashing makes sense only in multipeer mode */
+ if (peer->ovpn->mode != OVPN_MODE_MP)
+ return;
+
+ if (peer->vpn_addrs.ipv4.s_addr != htonl(INADDR_ANY)) {
+ /* remove potential old hashing */
+ hlist_nulls_del_init_rcu(&peer->hash_entry_addr4);
+
+ nhead = ovpn_get_hash_head(peer->ovpn->peers->by_vpn_addr4,
+ &peer->vpn_addrs.ipv4,
+ sizeof(peer->vpn_addrs.ipv4));
+ hlist_nulls_add_head_rcu(&peer->hash_entry_addr4, nhead);
+ }
+
+ if (!ipv6_addr_any(&peer->vpn_addrs.ipv6)) {
+ /* remove potential old hashing */
+ hlist_nulls_del_init_rcu(&peer->hash_entry_addr6);
+
+ nhead = ovpn_get_hash_head(peer->ovpn->peers->by_vpn_addr6,
+ &peer->vpn_addrs.ipv6,
+ sizeof(peer->vpn_addrs.ipv6));
+ hlist_nulls_add_head_rcu(&peer->hash_entry_addr6, nhead);
+ }
+}
+
+/**
+ * ovpn_peer_add_mp - add peer to related tables in a MP instance
+ * @ovpn: the instance to add the peer to
+ * @peer: the peer to add
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+static int ovpn_peer_add_mp(struct ovpn_priv *ovpn, struct ovpn_peer *peer)
+{
+ struct sockaddr_storage sa = { 0 };
+ struct hlist_nulls_head *nhead;
+ struct sockaddr_in6 *sa6;
+ struct sockaddr_in *sa4;
+ struct ovpn_bind *bind;
+ struct ovpn_peer *tmp;
+ size_t salen;
+ int ret = 0;
+
+ spin_lock_bh(&ovpn->lock);
+ /* do not add duplicates */
+ tmp = ovpn_peer_get_by_id(ovpn, peer->id);
+ if (tmp) {
+ ovpn_peer_put(tmp);
+ ret = -EEXIST;
+ goto out;
+ }
+
+ bind = rcu_dereference_protected(peer->bind, true);
+ /* peers connected via TCP have bind == NULL */
+ if (bind) {
+ switch (bind->remote.in4.sin_family) {
+ case AF_INET:
+ sa4 = (struct sockaddr_in *)&sa;
+
+ sa4->sin_family = AF_INET;
+ sa4->sin_addr.s_addr = bind->remote.in4.sin_addr.s_addr;
+ sa4->sin_port = bind->remote.in4.sin_port;
+ salen = sizeof(*sa4);
+ break;
+ case AF_INET6:
+ sa6 = (struct sockaddr_in6 *)&sa;
+
+ sa6->sin6_family = AF_INET6;
+ sa6->sin6_addr = bind->remote.in6.sin6_addr;
+ sa6->sin6_port = bind->remote.in6.sin6_port;
+ salen = sizeof(*sa6);
+ break;
+ default:
+ ret = -EPROTONOSUPPORT;
+ goto out;
+ }
+
+ nhead = ovpn_get_hash_head(ovpn->peers->by_transp_addr, &sa,
+ salen);
+ hlist_nulls_add_head_rcu(&peer->hash_entry_transp_addr, nhead);
+ }
+
+ hlist_add_head_rcu(&peer->hash_entry_id,
+ ovpn_get_hash_head(ovpn->peers->by_id, &peer->id,
+ sizeof(peer->id)));
+
+ ovpn_peer_hash_vpn_ip(peer);
+out:
+ spin_unlock_bh(&ovpn->lock);
+ return ret;
+}
+
+/**
+ * ovpn_peer_add_p2p - add peer to related tables in a P2P instance
+ * @ovpn: the instance to add the peer to
+ * @peer: the peer to add
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+static int ovpn_peer_add_p2p(struct ovpn_priv *ovpn, struct ovpn_peer *peer)
+{
+ LLIST_HEAD(release_list);
+ struct ovpn_peer *tmp;
+
+ spin_lock_bh(&ovpn->lock);
+ /* in p2p mode it is possible to have a single peer only, therefore the
+ * old one is released and substituted by the new one
+ */
+ tmp = rcu_dereference_protected(ovpn->peer,
+ lockdep_is_held(&ovpn->lock));
+ if (tmp)
+ ovpn_peer_remove(tmp, OVPN_DEL_PEER_REASON_TEARDOWN,
+ &release_list);
+
+ rcu_assign_pointer(ovpn->peer, peer);
+ /* in P2P mode the carrier is switched on when the peer is added */
+ netif_carrier_on(ovpn->dev);
+ unlock_ovpn(ovpn, &release_list);
+
+ return 0;
+}
+
+/**
+ * ovpn_peer_add - add peer to the related tables
+ * @ovpn: the openvpn instance the peer belongs to
+ * @peer: the peer object to add
+ *
+ * Assume refcounter was increased by caller
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+int ovpn_peer_add(struct ovpn_priv *ovpn, struct ovpn_peer *peer)
+{
+ switch (ovpn->mode) {
+ case OVPN_MODE_MP:
+ return ovpn_peer_add_mp(ovpn, peer);
+ case OVPN_MODE_P2P:
+ return ovpn_peer_add_p2p(ovpn, peer);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+/**
+ * ovpn_peer_del_mp - delete peer from related tables in a MP instance
+ * @peer: the peer to delete
+ * @reason: reason why the peer was deleted (sent to userspace)
+ * @release_list: list where delete peer should be appended
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+static int ovpn_peer_del_mp(struct ovpn_peer *peer,
+ enum ovpn_del_peer_reason reason,
+ struct llist_head *release_list)
+{
+ struct ovpn_peer *tmp;
+ int ret = -ENOENT;
+
+ lockdep_assert_held(&peer->ovpn->lock);
+
+ tmp = ovpn_peer_get_by_id(peer->ovpn, peer->id);
+ if (tmp == peer) {
+ ovpn_peer_remove(peer, reason, release_list);
+ ret = 0;
+ }
+
+ if (tmp)
+ ovpn_peer_put(tmp);
+
+ return ret;
+}
+
+/**
+ * ovpn_peer_del_p2p - delete peer from related tables in a P2P instance
+ * @peer: the peer to delete
+ * @reason: reason why the peer was deleted (sent to userspace)
+ * @release_list: list where delete peer should be appended
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+static int ovpn_peer_del_p2p(struct ovpn_peer *peer,
+ enum ovpn_del_peer_reason reason,
+ struct llist_head *release_list)
+{
+ struct ovpn_peer *tmp;
+
+ lockdep_assert_held(&peer->ovpn->lock);
+
+ tmp = rcu_dereference_protected(peer->ovpn->peer,
+ lockdep_is_held(&peer->ovpn->lock));
+ if (tmp != peer)
+ return -ENOENT;
+
+ ovpn_peer_remove(peer, reason, release_list);
+
+ return 0;
+}
+
+/**
+ * ovpn_peer_del - delete peer from related tables
+ * @peer: the peer object to delete
+ * @reason: reason for deleting peer (will be sent to userspace)
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+int ovpn_peer_del(struct ovpn_peer *peer, enum ovpn_del_peer_reason reason)
+{
+ LLIST_HEAD(release_list);
+ int ret = -EOPNOTSUPP;
+
+ spin_lock_bh(&peer->ovpn->lock);
+ switch (peer->ovpn->mode) {
+ case OVPN_MODE_MP:
+ ret = ovpn_peer_del_mp(peer, reason, &release_list);
+ break;
+ case OVPN_MODE_P2P:
+ ret = ovpn_peer_del_p2p(peer, reason, &release_list);
+ break;
+ default:
+ break;
+ }
+ unlock_ovpn(peer->ovpn, &release_list);
+
+ return ret;
+}
+
+/**
+ * ovpn_peer_release_p2p - release peer upon P2P device teardown
+ * @ovpn: the instance being torn down
+ * @sk: if not NULL, release peer only if it's using this specific socket
+ * @reason: the reason for releasing the peer
+ */
+static void ovpn_peer_release_p2p(struct ovpn_priv *ovpn, struct sock *sk,
+ enum ovpn_del_peer_reason reason)
+{
+ struct ovpn_socket *ovpn_sock;
+ LLIST_HEAD(release_list);
+ struct ovpn_peer *peer;
+
+ spin_lock_bh(&ovpn->lock);
+ peer = rcu_dereference_protected(ovpn->peer,
+ lockdep_is_held(&ovpn->lock));
+ if (!peer) {
+ spin_unlock_bh(&ovpn->lock);
+ return;
+ }
+
+ if (sk) {
+ ovpn_sock = rcu_access_pointer(peer->sock);
+ if (!ovpn_sock || ovpn_sock->sock->sk != sk) {
+ spin_unlock_bh(&ovpn->lock);
+ ovpn_peer_put(peer);
+ return;
+ }
+ }
+
+ ovpn_peer_remove(peer, reason, &release_list);
+ unlock_ovpn(ovpn, &release_list);
+}
+
+static void ovpn_peers_release_mp(struct ovpn_priv *ovpn, struct sock *sk,
+ enum ovpn_del_peer_reason reason)
+{
+ struct ovpn_socket *ovpn_sock;
+ LLIST_HEAD(release_list);
+ struct ovpn_peer *peer;
+ struct hlist_node *tmp;
+ int bkt;
+
+ spin_lock_bh(&ovpn->lock);
+ hash_for_each_safe(ovpn->peers->by_id, bkt, tmp, peer, hash_entry_id) {
+ bool remove = true;
+
+ /* if a socket was passed as argument, skip all peers except
+ * those using it
+ */
+ if (sk) {
+ rcu_read_lock();
+ ovpn_sock = rcu_dereference(peer->sock);
+ remove = ovpn_sock && ovpn_sock->sock->sk == sk;
+ rcu_read_unlock();
+ }
+
+ if (remove)
+ ovpn_peer_remove(peer, reason, &release_list);
+ }
+ unlock_ovpn(ovpn, &release_list);
+}
+
+/**
+ * ovpn_peers_free - free all peers in the instance
+ * @ovpn: the instance whose peers should be released
+ * @sk: if not NULL, only peers using this socket are removed and the socket
+ * is released immediately
+ * @reason: the reason for releasing all peers
+ */
+void ovpn_peers_free(struct ovpn_priv *ovpn, struct sock *sk,
+ enum ovpn_del_peer_reason reason)
+{
+ switch (ovpn->mode) {
+ case OVPN_MODE_P2P:
+ ovpn_peer_release_p2p(ovpn, sk, reason);
+ break;
+ case OVPN_MODE_MP:
+ ovpn_peers_release_mp(ovpn, sk, reason);
+ break;
+ }
+}
+
+static time64_t ovpn_peer_keepalive_work_single(struct ovpn_peer *peer,
+ time64_t now,
+ struct llist_head *release_list)
+{
+ time64_t last_recv, last_sent, next_run1, next_run2;
+ unsigned long timeout, interval;
+ bool expired;
+
+ spin_lock_bh(&peer->lock);
+ /* we expect both timers to be configured at the same time,
+ * therefore bail out if either is not set
+ */
+ if (!peer->keepalive_timeout || !peer->keepalive_interval) {
+ spin_unlock_bh(&peer->lock);
+ return 0;
+ }
+
+ /* check for peer timeout */
+ expired = false;
+ timeout = peer->keepalive_timeout;
+ last_recv = READ_ONCE(peer->last_recv);
+ if (now < last_recv + timeout) {
+ peer->keepalive_recv_exp = last_recv + timeout;
+ next_run1 = peer->keepalive_recv_exp;
+ } else if (peer->keepalive_recv_exp > now) {
+ next_run1 = peer->keepalive_recv_exp;
+ } else {
+ expired = true;
+ }
+
+ if (expired) {
+ /* peer is dead -> kill it and move on */
+ spin_unlock_bh(&peer->lock);
+ netdev_dbg(peer->ovpn->dev, "peer %u expired\n",
+ peer->id);
+ ovpn_peer_remove(peer, OVPN_DEL_PEER_REASON_EXPIRED,
+ release_list);
+ return 0;
+ }
+
+ /* check for peer keepalive */
+ expired = false;
+ interval = peer->keepalive_interval;
+ last_sent = READ_ONCE(peer->last_sent);
+ if (now < last_sent + interval) {
+ peer->keepalive_xmit_exp = last_sent + interval;
+ next_run2 = peer->keepalive_xmit_exp;
+ } else if (peer->keepalive_xmit_exp > now) {
+ next_run2 = peer->keepalive_xmit_exp;
+ } else {
+ expired = true;
+ next_run2 = now + interval;
+ }
+ spin_unlock_bh(&peer->lock);
+
+ if (expired) {
+ /* a keepalive packet is required */
+ netdev_dbg(peer->ovpn->dev,
+ "sending keepalive to peer %u\n",
+ peer->id);
+ if (schedule_work(&peer->keepalive_work))
+ ovpn_peer_hold(peer);
+ }
+
+ if (next_run1 < next_run2)
+ return next_run1;
+
+ return next_run2;
+}
+
+static time64_t ovpn_peer_keepalive_work_mp(struct ovpn_priv *ovpn,
+ time64_t now,
+ struct llist_head *release_list)
+{
+ time64_t tmp_next_run, next_run = 0;
+ struct hlist_node *tmp;
+ struct ovpn_peer *peer;
+ int bkt;
+
+ lockdep_assert_held(&ovpn->lock);
+
+ hash_for_each_safe(ovpn->peers->by_id, bkt, tmp, peer, hash_entry_id) {
+ tmp_next_run = ovpn_peer_keepalive_work_single(peer, now,
+ release_list);
+ if (!tmp_next_run)
+ continue;
+
+ /* the next worker run will be scheduled based on the shortest
+ * required interval across all peers
+ */
+ if (!next_run || tmp_next_run < next_run)
+ next_run = tmp_next_run;
+ }
+
+ return next_run;
+}
+
+static time64_t ovpn_peer_keepalive_work_p2p(struct ovpn_priv *ovpn,
+ time64_t now,
+ struct llist_head *release_list)
+{
+ struct ovpn_peer *peer;
+ time64_t next_run = 0;
+
+ lockdep_assert_held(&ovpn->lock);
+
+ peer = rcu_dereference_protected(ovpn->peer,
+ lockdep_is_held(&ovpn->lock));
+ if (peer)
+ next_run = ovpn_peer_keepalive_work_single(peer, now,
+ release_list);
+
+ return next_run;
+}
+
+/**
+ * ovpn_peer_keepalive_work - run keepalive logic on each known peer
+ * @work: pointer to the work member of the related ovpn object
+ *
+ * Each peer has two timers (if configured):
+ * 1. peer timeout: when no data is received for a certain interval,
+ * the peer is considered dead and it gets killed.
+ * 2. peer keepalive: when no data is sent to a certain peer for a
+ * certain interval, a special 'keepalive' packet is explicitly sent.
+ *
+ * This function iterates across the whole peer collection while
+ * checking the timers described above.
+ */
+void ovpn_peer_keepalive_work(struct work_struct *work)
+{
+ struct ovpn_priv *ovpn = container_of(work, struct ovpn_priv,
+ keepalive_work.work);
+ time64_t next_run = 0, now = ktime_get_real_seconds();
+ LLIST_HEAD(release_list);
+
+ spin_lock_bh(&ovpn->lock);
+ switch (ovpn->mode) {
+ case OVPN_MODE_MP:
+ next_run = ovpn_peer_keepalive_work_mp(ovpn, now,
+ &release_list);
+ break;
+ case OVPN_MODE_P2P:
+ next_run = ovpn_peer_keepalive_work_p2p(ovpn, now,
+ &release_list);
+ break;
+ }
+
+ /* prevent rearming if the interface is being destroyed */
+ if (next_run > 0 &&
+ READ_ONCE(ovpn->dev->reg_state) == NETREG_REGISTERED) {
+ netdev_dbg(ovpn->dev,
+ "scheduling keepalive work: now=%llu next_run=%llu delta=%llu\n",
+ next_run, now, next_run - now);
+ schedule_delayed_work(&ovpn->keepalive_work,
+ (next_run - now) * HZ);
+ }
+ unlock_ovpn(ovpn, &release_list);
+}
diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
new file mode 100644
index 000000000000..a1423f2b09e0
--- /dev/null
+++ b/drivers/net/ovpn/peer.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_OVPNPEER_H_
+#define _NET_OVPN_OVPNPEER_H_
+
+#include <net/dst_cache.h>
+#include <net/strparser.h>
+
+#include "crypto.h"
+#include "socket.h"
+#include "stats.h"
+
+/**
+ * struct ovpn_peer - the main remote peer object
+ * @ovpn: main openvpn instance this peer belongs to
+ * @dev_tracker: reference tracker for associated dev
+ * @id: unique identifier
+ * @vpn_addrs: IP addresses assigned over the tunnel
+ * @vpn_addrs.ipv4: IPv4 assigned to peer on the tunnel
+ * @vpn_addrs.ipv6: IPv6 assigned to peer on the tunnel
+ * @hash_entry_id: entry in the peer ID hashtable
+ * @hash_entry_addr4: entry in the peer IPv4 hashtable
+ * @hash_entry_addr6: entry in the peer IPv6 hashtable
+ * @hash_entry_transp_addr: entry in the peer transport address hashtable
+ * @sock: the socket being used to talk to this peer
+ * @tcp: keeps track of TCP specific state
+ * @tcp.strp: stream parser context (TCP only)
+ * @tcp.user_queue: received packets that have to go to userspace (TCP only)
+ * @tcp.out_queue: packets on hold while socket is taken by user (TCP only)
+ * @tcp.tx_in_progress: true if TX is already ongoing (TCP only)
+ * @tcp.out_msg.skb: packet scheduled for sending (TCP only)
+ * @tcp.out_msg.offset: offset where next send should start (TCP only)
+ * @tcp.out_msg.len: remaining data to send within packet (TCP only)
+ * @tcp.sk_cb.sk_data_ready: pointer to original cb (TCP only)
+ * @tcp.sk_cb.sk_write_space: pointer to original cb (TCP only)
+ * @tcp.sk_cb.prot: pointer to original prot object (TCP only)
+ * @tcp.sk_cb.ops: pointer to the original prot_ops object (TCP only)
+ * @crypto: the crypto configuration (ciphers, keys, etc..)
+ * @dst_cache: cache for dst_entry used to send to peer
+ * @bind: remote peer binding
+ * @keepalive_interval: seconds after which a new keepalive should be sent
+ * @keepalive_xmit_exp: future timestamp when next keepalive should be sent
+ * @last_sent: timestamp of the last successfully sent packet
+ * @keepalive_timeout: seconds after which an inactive peer is considered dead
+ * @keepalive_recv_exp: future timestamp when the peer should expire
+ * @last_recv: timestamp of the last authenticated received packet
+ * @vpn_stats: per-peer in-VPN TX/RX stats
+ * @link_stats: per-peer link/transport TX/RX stats
+ * @delete_reason: why peer was deleted (i.e. timeout, transport error, ..)
+ * @lock: protects binding to peer (bind) and keepalive* fields
+ * @refcount: reference counter
+ * @rcu: used to free peer in an RCU safe way
+ * @release_entry: entry for the socket release list
+ * @keepalive_work: used to schedule keepalive sending
+ */
+struct ovpn_peer {
+ struct ovpn_priv *ovpn;
+ netdevice_tracker dev_tracker;
+ u32 id;
+ struct {
+ struct in_addr ipv4;
+ struct in6_addr ipv6;
+ } vpn_addrs;
+ struct hlist_node hash_entry_id;
+ struct hlist_nulls_node hash_entry_addr4;
+ struct hlist_nulls_node hash_entry_addr6;
+ struct hlist_nulls_node hash_entry_transp_addr;
+ struct ovpn_socket __rcu *sock;
+
+ struct {
+ struct strparser strp;
+ struct sk_buff_head user_queue;
+ struct sk_buff_head out_queue;
+ bool tx_in_progress;
+
+ struct {
+ struct sk_buff *skb;
+ int offset;
+ int len;
+ } out_msg;
+
+ struct {
+ void (*sk_data_ready)(struct sock *sk);
+ void (*sk_write_space)(struct sock *sk);
+ struct proto *prot;
+ const struct proto_ops *ops;
+ } sk_cb;
+
+ struct work_struct defer_del_work;
+ } tcp;
+ struct ovpn_crypto_state crypto;
+ struct dst_cache dst_cache;
+ struct ovpn_bind __rcu *bind;
+ unsigned long keepalive_interval;
+ unsigned long keepalive_xmit_exp;
+ time64_t last_sent;
+ unsigned long keepalive_timeout;
+ unsigned long keepalive_recv_exp;
+ time64_t last_recv;
+ struct ovpn_peer_stats vpn_stats;
+ struct ovpn_peer_stats link_stats;
+ enum ovpn_del_peer_reason delete_reason;
+ spinlock_t lock; /* protects bind and keepalive* */
+ struct kref refcount;
+ struct rcu_head rcu;
+ struct llist_node release_entry;
+ struct work_struct keepalive_work;
+};
+
+/**
+ * ovpn_peer_hold - increase reference counter
+ * @peer: the peer whose counter should be increased
+ *
+ * Return: true if the counter was increased or false if it was zero already
+ */
+static inline bool ovpn_peer_hold(struct ovpn_peer *peer)
+{
+ return kref_get_unless_zero(&peer->refcount);
+}
+
+void ovpn_peer_release(struct ovpn_peer *peer);
+void ovpn_peer_release_kref(struct kref *kref);
+
+/**
+ * ovpn_peer_put - decrease reference counter
+ * @peer: the peer whose counter should be decreased
+ */
+static inline void ovpn_peer_put(struct ovpn_peer *peer)
+{
+ kref_put(&peer->refcount, ovpn_peer_release_kref);
+}
+
+struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id);
+int ovpn_peer_add(struct ovpn_priv *ovpn, struct ovpn_peer *peer);
+int ovpn_peer_del(struct ovpn_peer *peer, enum ovpn_del_peer_reason reason);
+void ovpn_peers_free(struct ovpn_priv *ovpn, struct sock *sock,
+ enum ovpn_del_peer_reason reason);
+
+struct ovpn_peer *ovpn_peer_get_by_transp_addr(struct ovpn_priv *ovpn,
+ struct sk_buff *skb);
+struct ovpn_peer *ovpn_peer_get_by_id(struct ovpn_priv *ovpn, u32 peer_id);
+struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_priv *ovpn,
+ struct sk_buff *skb);
+void ovpn_peer_hash_vpn_ip(struct ovpn_peer *peer);
+bool ovpn_peer_check_by_src(struct ovpn_priv *ovpn, struct sk_buff *skb,
+ struct ovpn_peer *peer);
+
+void ovpn_peer_keepalive_set(struct ovpn_peer *peer, u32 interval, u32 timeout);
+void ovpn_peer_keepalive_work(struct work_struct *work);
+
+void ovpn_peer_endpoints_update(struct ovpn_peer *peer, struct sk_buff *skb);
+int ovpn_peer_reset_sockaddr(struct ovpn_peer *peer,
+ const struct sockaddr_storage *ss,
+ const void *local_ip);
+
+#endif /* _NET_OVPN_OVPNPEER_H_ */
diff --git a/drivers/net/ovpn/pktid.c b/drivers/net/ovpn/pktid.c
new file mode 100644
index 000000000000..2f29049897e3
--- /dev/null
+++ b/drivers/net/ovpn/pktid.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ * James Yonan <james@openvpn.net>
+ */
+
+#include <linux/atomic.h>
+#include <linux/jiffies.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/types.h>
+
+#include "ovpnpriv.h"
+#include "main.h"
+#include "pktid.h"
+
+void ovpn_pktid_xmit_init(struct ovpn_pktid_xmit *pid)
+{
+ atomic_set(&pid->seq_num, 1);
+}
+
+void ovpn_pktid_recv_init(struct ovpn_pktid_recv *pr)
+{
+ memset(pr, 0, sizeof(*pr));
+ spin_lock_init(&pr->lock);
+}
+
+/* Packet replay detection.
+ * Allows ID backtrack of up to REPLAY_WINDOW_SIZE - 1.
+ */
+int ovpn_pktid_recv(struct ovpn_pktid_recv *pr, u32 pkt_id, u32 pkt_time)
+{
+ const unsigned long now = jiffies;
+ int ret;
+
+ /* ID must not be zero */
+ if (unlikely(pkt_id == 0))
+ return -EINVAL;
+
+ spin_lock_bh(&pr->lock);
+
+ /* expire backtracks at or below pr->id after PKTID_RECV_EXPIRE time */
+ if (unlikely(time_after_eq(now, pr->expire)))
+ pr->id_floor = pr->id;
+
+ /* time changed? */
+ if (unlikely(pkt_time != pr->time)) {
+ if (pkt_time > pr->time) {
+ /* time moved forward, accept */
+ pr->base = 0;
+ pr->extent = 0;
+ pr->id = 0;
+ pr->time = pkt_time;
+ pr->id_floor = 0;
+ } else {
+ /* time moved backward, reject */
+ ret = -ETIME;
+ goto out;
+ }
+ }
+
+ if (likely(pkt_id == pr->id + 1)) {
+ /* well-formed ID sequence (incremented by 1) */
+ pr->base = REPLAY_INDEX(pr->base, -1);
+ pr->history[pr->base / 8] |= (1 << (pr->base % 8));
+ if (pr->extent < REPLAY_WINDOW_SIZE)
+ ++pr->extent;
+ pr->id = pkt_id;
+ } else if (pkt_id > pr->id) {
+ /* ID jumped forward by more than one */
+ const unsigned int delta = pkt_id - pr->id;
+
+ if (delta < REPLAY_WINDOW_SIZE) {
+ unsigned int i;
+
+ pr->base = REPLAY_INDEX(pr->base, -delta);
+ pr->history[pr->base / 8] |= (1 << (pr->base % 8));
+ pr->extent += delta;
+ if (pr->extent > REPLAY_WINDOW_SIZE)
+ pr->extent = REPLAY_WINDOW_SIZE;
+ for (i = 1; i < delta; ++i) {
+ unsigned int newb = REPLAY_INDEX(pr->base, i);
+
+ pr->history[newb / 8] &= ~BIT(newb % 8);
+ }
+ } else {
+ pr->base = 0;
+ pr->extent = REPLAY_WINDOW_SIZE;
+ memset(pr->history, 0, sizeof(pr->history));
+ pr->history[0] = 1;
+ }
+ pr->id = pkt_id;
+ } else {
+ /* ID backtrack */
+ const unsigned int delta = pr->id - pkt_id;
+
+ if (delta > pr->max_backtrack)
+ pr->max_backtrack = delta;
+ if (delta < pr->extent) {
+ if (pkt_id > pr->id_floor) {
+ const unsigned int ri = REPLAY_INDEX(pr->base,
+ delta);
+ u8 *p = &pr->history[ri / 8];
+ const u8 mask = (1 << (ri % 8));
+
+ if (*p & mask) {
+ ret = -EINVAL;
+ goto out;
+ }
+ *p |= mask;
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ pr->expire = now + PKTID_RECV_EXPIRE;
+ ret = 0;
+out:
+ spin_unlock_bh(&pr->lock);
+ return ret;
+}
diff --git a/drivers/net/ovpn/pktid.h b/drivers/net/ovpn/pktid.h
new file mode 100644
index 000000000000..0262d026d15e
--- /dev/null
+++ b/drivers/net/ovpn/pktid.h
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ * James Yonan <james@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_OVPNPKTID_H_
+#define _NET_OVPN_OVPNPKTID_H_
+
+#include "proto.h"
+
+/* If no packets received for this length of time, set a backtrack floor
+ * at highest received packet ID thus far.
+ */
+#define PKTID_RECV_EXPIRE (30 * HZ)
+
+/* Packet-ID state for transmitter */
+struct ovpn_pktid_xmit {
+ atomic_t seq_num;
+};
+
+/* replay window sizing in bytes = 2^REPLAY_WINDOW_ORDER */
+#define REPLAY_WINDOW_ORDER 8
+
+#define REPLAY_WINDOW_BYTES BIT(REPLAY_WINDOW_ORDER)
+#define REPLAY_WINDOW_SIZE (REPLAY_WINDOW_BYTES * 8)
+#define REPLAY_INDEX(base, i) (((base) + (i)) & (REPLAY_WINDOW_SIZE - 1))
+
+/* Packet-ID state for receiver.
+ * Other than lock member, can be zeroed to initialize.
+ */
+struct ovpn_pktid_recv {
+ /* "sliding window" bitmask of recent packet IDs received */
+ u8 history[REPLAY_WINDOW_BYTES];
+ /* bit position of deque base in history */
+ unsigned int base;
+ /* extent (in bits) of deque in history */
+ unsigned int extent;
+ /* expiration of history in jiffies */
+ unsigned long expire;
+ /* highest sequence number received */
+ u32 id;
+ /* highest time stamp received */
+ u32 time;
+ /* we will only accept backtrack IDs > id_floor */
+ u32 id_floor;
+ unsigned int max_backtrack;
+ /* protects entire pktd ID state */
+ spinlock_t lock;
+};
+
+/* Get the next packet ID for xmit */
+static inline int ovpn_pktid_xmit_next(struct ovpn_pktid_xmit *pid, u32 *pktid)
+{
+ const u32 seq_num = atomic_fetch_add_unless(&pid->seq_num, 1, 0);
+ /* when the 32bit space is over, we return an error because the packet
+ * ID is used to create the cipher IV and we do not want to reuse the
+ * same value more than once
+ */
+ if (unlikely(!seq_num))
+ return -ERANGE;
+
+ *pktid = seq_num;
+
+ return 0;
+}
+
+/* Write 12-byte AEAD IV to dest */
+static inline void ovpn_pktid_aead_write(const u32 pktid,
+ const u8 nt[],
+ unsigned char *dest)
+{
+ *(__force __be32 *)(dest) = htonl(pktid);
+ BUILD_BUG_ON(4 + OVPN_NONCE_TAIL_SIZE != OVPN_NONCE_SIZE);
+ memcpy(dest + 4, nt, OVPN_NONCE_TAIL_SIZE);
+}
+
+void ovpn_pktid_xmit_init(struct ovpn_pktid_xmit *pid);
+void ovpn_pktid_recv_init(struct ovpn_pktid_recv *pr);
+
+int ovpn_pktid_recv(struct ovpn_pktid_recv *pr, u32 pkt_id, u32 pkt_time);
+
+#endif /* _NET_OVPN_OVPNPKTID_H_ */
diff --git a/drivers/net/ovpn/proto.h b/drivers/net/ovpn/proto.h
new file mode 100644
index 000000000000..b7d285b4d9c1
--- /dev/null
+++ b/drivers/net/ovpn/proto.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ * James Yonan <james@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_PROTO_H_
+#define _NET_OVPN_PROTO_H_
+
+#include "main.h"
+
+#include <linux/bitfield.h>
+#include <linux/skbuff.h>
+
+/* When the OpenVPN protocol is ran in AEAD mode, use
+ * the OpenVPN packet ID as the AEAD nonce:
+ *
+ * 00000005 521c3b01 4308c041
+ * [seq # ] [ nonce_tail ]
+ * [ 12-byte full IV ] -> OVPN_NONCE_SIZE
+ * [4-bytes -> OVPN_NONCE_WIRE_SIZE
+ * on wire]
+ */
+
+/* nonce size (96bits) as required by AEAD ciphers */
+#define OVPN_NONCE_SIZE 12
+/* last 8 bytes of AEAD nonce: provided by userspace and usually derived
+ * from key material generated during TLS handshake
+ */
+#define OVPN_NONCE_TAIL_SIZE 8
+
+/* OpenVPN nonce size reduced by 8-byte nonce tail -- this is the
+ * size of the AEAD Associated Data (AD) sent over the wire
+ * and is normally the head of the IV
+ */
+#define OVPN_NONCE_WIRE_SIZE (OVPN_NONCE_SIZE - OVPN_NONCE_TAIL_SIZE)
+
+#define OVPN_OPCODE_SIZE 4 /* DATA_V2 opcode size */
+#define OVPN_OPCODE_KEYID_MASK 0x07000000
+#define OVPN_OPCODE_PKTTYPE_MASK 0xF8000000
+#define OVPN_OPCODE_PEERID_MASK 0x00FFFFFF
+
+/* packet opcodes of interest to us */
+#define OVPN_DATA_V1 6 /* data channel v1 packet */
+#define OVPN_DATA_V2 9 /* data channel v2 packet */
+
+#define OVPN_PEER_ID_UNDEF 0x00FFFFFF
+
+/**
+ * ovpn_opcode_from_skb - extract OP code from skb at specified offset
+ * @skb: the packet to extract the OP code from
+ * @offset: the offset in the data buffer where the OP code is located
+ *
+ * Note: this function assumes that the skb head was pulled enough
+ * to access the first 4 bytes.
+ *
+ * Return: the OP code
+ */
+static inline u8 ovpn_opcode_from_skb(const struct sk_buff *skb, u16 offset)
+{
+ u32 opcode = be32_to_cpu(*(__be32 *)(skb->data + offset));
+
+ return FIELD_GET(OVPN_OPCODE_PKTTYPE_MASK, opcode);
+}
+
+/**
+ * ovpn_peer_id_from_skb - extract peer ID from skb at specified offset
+ * @skb: the packet to extract the OP code from
+ * @offset: the offset in the data buffer where the OP code is located
+ *
+ * Note: this function assumes that the skb head was pulled enough
+ * to access the first 4 bytes.
+ *
+ * Return: the peer ID
+ */
+static inline u32 ovpn_peer_id_from_skb(const struct sk_buff *skb, u16 offset)
+{
+ u32 opcode = be32_to_cpu(*(__be32 *)(skb->data + offset));
+
+ return FIELD_GET(OVPN_OPCODE_PEERID_MASK, opcode);
+}
+
+/**
+ * ovpn_key_id_from_skb - extract key ID from the skb head
+ * @skb: the packet to extract the key ID code from
+ *
+ * Note: this function assumes that the skb head was pulled enough
+ * to access the first 4 bytes.
+ *
+ * Return: the key ID
+ */
+static inline u8 ovpn_key_id_from_skb(const struct sk_buff *skb)
+{
+ u32 opcode = be32_to_cpu(*(__be32 *)skb->data);
+
+ return FIELD_GET(OVPN_OPCODE_KEYID_MASK, opcode);
+}
+
+/**
+ * ovpn_opcode_compose - combine OP code, key ID and peer ID to wire format
+ * @opcode: the OP code
+ * @key_id: the key ID
+ * @peer_id: the peer ID
+ *
+ * Return: a 4 bytes integer obtained combining all input values following the
+ * OpenVPN wire format. This integer can then be written to the packet header.
+ */
+static inline u32 ovpn_opcode_compose(u8 opcode, u8 key_id, u32 peer_id)
+{
+ return FIELD_PREP(OVPN_OPCODE_PKTTYPE_MASK, opcode) |
+ FIELD_PREP(OVPN_OPCODE_KEYID_MASK, key_id) |
+ FIELD_PREP(OVPN_OPCODE_PEERID_MASK, peer_id);
+}
+
+#endif /* _NET_OVPN_OVPNPROTO_H_ */
diff --git a/drivers/net/ovpn/skb.h b/drivers/net/ovpn/skb.h
new file mode 100644
index 000000000000..64430880f1da
--- /dev/null
+++ b/drivers/net/ovpn/skb.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ * James Yonan <james@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_SKB_H_
+#define _NET_OVPN_SKB_H_
+
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/types.h>
+
+struct ovpn_cb {
+ struct ovpn_peer *peer;
+ struct ovpn_crypto_key_slot *ks;
+ struct aead_request *req;
+ struct scatterlist *sg;
+ u8 *iv;
+ unsigned int payload_offset;
+ bool nosignal;
+};
+
+static inline struct ovpn_cb *ovpn_skb_cb(struct sk_buff *skb)
+{
+ BUILD_BUG_ON(sizeof(struct ovpn_cb) > sizeof(skb->cb));
+ return (struct ovpn_cb *)skb->cb;
+}
+
+/* Return IP protocol version from skb header.
+ * Return 0 if protocol is not IPv4/IPv6 or cannot be read.
+ */
+static inline __be16 ovpn_ip_check_protocol(struct sk_buff *skb)
+{
+ __be16 proto = 0;
+
+ /* skb could be non-linear,
+ * make sure IP header is in non-fragmented part
+ */
+ if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
+ return 0;
+
+ if (ip_hdr(skb)->version == 4) {
+ proto = htons(ETH_P_IP);
+ } else if (ip_hdr(skb)->version == 6) {
+ if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
+ return 0;
+ proto = htons(ETH_P_IPV6);
+ }
+
+ return proto;
+}
+
+#endif /* _NET_OVPN_SKB_H_ */
diff --git a/drivers/net/ovpn/socket.c b/drivers/net/ovpn/socket.c
new file mode 100644
index 000000000000..a83cbab72591
--- /dev/null
+++ b/drivers/net/ovpn/socket.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/udp.h>
+
+#include "ovpnpriv.h"
+#include "main.h"
+#include "io.h"
+#include "peer.h"
+#include "socket.h"
+#include "tcp.h"
+#include "udp.h"
+
+static void ovpn_socket_release_kref(struct kref *kref)
+{
+ struct ovpn_socket *sock = container_of(kref, struct ovpn_socket,
+ refcount);
+
+ if (sock->sock->sk->sk_protocol == IPPROTO_UDP)
+ ovpn_udp_socket_detach(sock);
+ else if (sock->sock->sk->sk_protocol == IPPROTO_TCP)
+ ovpn_tcp_socket_detach(sock);
+}
+
+/**
+ * ovpn_socket_put - decrease reference counter
+ * @peer: peer whose socket reference counter should be decreased
+ * @sock: the RCU protected peer socket
+ *
+ * This function is only used internally. Users willing to release
+ * references to the ovpn_socket should use ovpn_socket_release()
+ *
+ * Return: true if the socket was released, false otherwise
+ */
+static bool ovpn_socket_put(struct ovpn_peer *peer, struct ovpn_socket *sock)
+{
+ return kref_put(&sock->refcount, ovpn_socket_release_kref);
+}
+
+/**
+ * ovpn_socket_release - release resources owned by socket user
+ * @peer: peer whose socket should be released
+ *
+ * This function should be invoked when the peer is being removed
+ * and wants to drop its link to the socket.
+ *
+ * In case of UDP, the detach routine will drop a reference to the
+ * ovpn netdev, pointed by the ovpn_socket.
+ *
+ * In case of TCP, releasing the socket will cause dropping
+ * the refcounter for the peer it is linked to, thus allowing the peer
+ * disappear as well.
+ *
+ * This function is expected to be invoked exactly once per peer
+ *
+ * NOTE: this function may sleep
+ */
+void ovpn_socket_release(struct ovpn_peer *peer)
+{
+ struct ovpn_socket *sock;
+ bool released;
+
+ might_sleep();
+
+ sock = rcu_replace_pointer(peer->sock, NULL, true);
+ /* release may be invoked after socket was detached */
+ if (!sock)
+ return;
+
+ /* sanity check: we should not end up here if the socket
+ * was already closed
+ */
+ if (!sock->sock->sk) {
+ DEBUG_NET_WARN_ON_ONCE(1);
+ return;
+ }
+
+ /* Drop the reference while holding the sock lock to avoid
+ * concurrent ovpn_socket_new call to mess up with a partially
+ * detached socket.
+ *
+ * Holding the lock ensures that a socket with refcnt 0 is fully
+ * detached before it can be picked by a concurrent reader.
+ */
+ lock_sock(sock->sock->sk);
+ released = ovpn_socket_put(peer, sock);
+ release_sock(sock->sock->sk);
+
+ /* align all readers with sk_user_data being NULL */
+ synchronize_rcu();
+
+ /* following cleanup should happen with lock released */
+ if (released) {
+ if (sock->sock->sk->sk_protocol == IPPROTO_UDP) {
+ netdev_put(sock->ovpn->dev, &sock->dev_tracker);
+ } else if (sock->sock->sk->sk_protocol == IPPROTO_TCP) {
+ /* wait for TCP jobs to terminate */
+ ovpn_tcp_socket_wait_finish(sock);
+ ovpn_peer_put(sock->peer);
+ }
+ /* we can call plain kfree() because we already waited one RCU
+ * period due to synchronize_rcu()
+ */
+ kfree(sock);
+ }
+}
+
+static bool ovpn_socket_hold(struct ovpn_socket *sock)
+{
+ return kref_get_unless_zero(&sock->refcount);
+}
+
+static int ovpn_socket_attach(struct ovpn_socket *sock, struct ovpn_peer *peer)
+{
+ if (sock->sock->sk->sk_protocol == IPPROTO_UDP)
+ return ovpn_udp_socket_attach(sock, peer->ovpn);
+ else if (sock->sock->sk->sk_protocol == IPPROTO_TCP)
+ return ovpn_tcp_socket_attach(sock, peer);
+
+ return -EOPNOTSUPP;
+}
+
+/**
+ * ovpn_socket_new - create a new socket and initialize it
+ * @sock: the kernel socket to embed
+ * @peer: the peer reachable via this socket
+ *
+ * Return: an openvpn socket on success or a negative error code otherwise
+ */
+struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
+{
+ struct ovpn_socket *ovpn_sock;
+ int ret;
+
+ lock_sock(sock->sk);
+
+ /* a TCP socket can only be owned by a single peer, therefore there
+ * can't be any other user
+ */
+ if (sock->sk->sk_protocol == IPPROTO_TCP && sock->sk->sk_user_data) {
+ ovpn_sock = ERR_PTR(-EBUSY);
+ goto sock_release;
+ }
+
+ /* a UDP socket can be shared across multiple peers, but we must make
+ * sure it is not owned by something else
+ */
+ if (sock->sk->sk_protocol == IPPROTO_UDP) {
+ u8 type = READ_ONCE(udp_sk(sock->sk)->encap_type);
+
+ /* socket owned by other encapsulation module */
+ if (type && type != UDP_ENCAP_OVPNINUDP) {
+ ovpn_sock = ERR_PTR(-EBUSY);
+ goto sock_release;
+ }
+
+ rcu_read_lock();
+ ovpn_sock = rcu_dereference_sk_user_data(sock->sk);
+ if (ovpn_sock) {
+ /* socket owned by another ovpn instance, we can't use it */
+ if (ovpn_sock->ovpn != peer->ovpn) {
+ ovpn_sock = ERR_PTR(-EBUSY);
+ rcu_read_unlock();
+ goto sock_release;
+ }
+
+ /* this socket is already owned by this instance,
+ * therefore we can increase the refcounter and
+ * use it as expected
+ */
+ if (WARN_ON(!ovpn_socket_hold(ovpn_sock))) {
+ /* this should never happen because setting
+ * the refcnt to 0 and detaching the socket
+ * is expected to be atomic
+ */
+ ovpn_sock = ERR_PTR(-EAGAIN);
+ rcu_read_unlock();
+ goto sock_release;
+ }
+
+ rcu_read_unlock();
+ goto sock_release;
+ }
+ rcu_read_unlock();
+ }
+
+ /* socket is not owned: attach to this ovpn instance */
+
+ ovpn_sock = kzalloc(sizeof(*ovpn_sock), GFP_KERNEL);
+ if (!ovpn_sock) {
+ ovpn_sock = ERR_PTR(-ENOMEM);
+ goto sock_release;
+ }
+
+ ovpn_sock->sock = sock;
+ kref_init(&ovpn_sock->refcount);
+
+ ret = ovpn_socket_attach(ovpn_sock, peer);
+ if (ret < 0) {
+ kfree(ovpn_sock);
+ ovpn_sock = ERR_PTR(ret);
+ goto sock_release;
+ }
+
+ /* TCP sockets are per-peer, therefore they are linked to their unique
+ * peer
+ */
+ if (sock->sk->sk_protocol == IPPROTO_TCP) {
+ INIT_WORK(&ovpn_sock->tcp_tx_work, ovpn_tcp_tx_work);
+ ovpn_sock->peer = peer;
+ ovpn_peer_hold(peer);
+ } else if (sock->sk->sk_protocol == IPPROTO_UDP) {
+ /* in UDP we only link the ovpn instance since the socket is
+ * shared among multiple peers
+ */
+ ovpn_sock->ovpn = peer->ovpn;
+ netdev_hold(peer->ovpn->dev, &ovpn_sock->dev_tracker,
+ GFP_KERNEL);
+ }
+
+ rcu_assign_sk_user_data(sock->sk, ovpn_sock);
+sock_release:
+ release_sock(sock->sk);
+ return ovpn_sock;
+}
diff --git a/drivers/net/ovpn/socket.h b/drivers/net/ovpn/socket.h
new file mode 100644
index 000000000000..00d856b1a5d8
--- /dev/null
+++ b/drivers/net/ovpn/socket.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_SOCK_H_
+#define _NET_OVPN_SOCK_H_
+
+#include <linux/net.h>
+#include <linux/kref.h>
+#include <net/sock.h>
+
+struct ovpn_priv;
+struct ovpn_peer;
+
+/**
+ * struct ovpn_socket - a kernel socket referenced in the ovpn code
+ * @ovpn: ovpn instance owning this socket (UDP only)
+ * @dev_tracker: reference tracker for associated dev (UDP only)
+ * @peer: unique peer transmitting over this socket (TCP only)
+ * @sock: the low level sock object
+ * @refcount: amount of contexts currently referencing this object
+ * @work: member used to schedule release routine (it may block)
+ * @tcp_tx_work: work for deferring outgoing packet processing (TCP only)
+ */
+struct ovpn_socket {
+ union {
+ struct {
+ struct ovpn_priv *ovpn;
+ netdevice_tracker dev_tracker;
+ };
+ struct ovpn_peer *peer;
+ };
+
+ struct socket *sock;
+ struct kref refcount;
+ struct work_struct work;
+ struct work_struct tcp_tx_work;
+};
+
+struct ovpn_socket *ovpn_socket_new(struct socket *sock,
+ struct ovpn_peer *peer);
+void ovpn_socket_release(struct ovpn_peer *peer);
+
+#endif /* _NET_OVPN_SOCK_H_ */
diff --git a/drivers/net/ovpn/stats.c b/drivers/net/ovpn/stats.c
new file mode 100644
index 000000000000..d637143473bb
--- /dev/null
+++ b/drivers/net/ovpn/stats.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <linux/atomic.h>
+
+#include "stats.h"
+
+void ovpn_peer_stats_init(struct ovpn_peer_stats *ps)
+{
+ atomic64_set(&ps->rx.bytes, 0);
+ atomic64_set(&ps->rx.packets, 0);
+
+ atomic64_set(&ps->tx.bytes, 0);
+ atomic64_set(&ps->tx.packets, 0);
+}
diff --git a/drivers/net/ovpn/stats.h b/drivers/net/ovpn/stats.h
new file mode 100644
index 000000000000..53433d8b6c33
--- /dev/null
+++ b/drivers/net/ovpn/stats.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: James Yonan <james@openvpn.net>
+ * Antonio Quartulli <antonio@openvpn.net>
+ * Lev Stipakov <lev@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_OVPNSTATS_H_
+#define _NET_OVPN_OVPNSTATS_H_
+
+/* one stat */
+struct ovpn_peer_stat {
+ atomic64_t bytes;
+ atomic64_t packets;
+};
+
+/* rx and tx stats combined */
+struct ovpn_peer_stats {
+ struct ovpn_peer_stat rx;
+ struct ovpn_peer_stat tx;
+};
+
+void ovpn_peer_stats_init(struct ovpn_peer_stats *ps);
+
+static inline void ovpn_peer_stats_increment(struct ovpn_peer_stat *stat,
+ const unsigned int n)
+{
+ atomic64_add(n, &stat->bytes);
+ atomic64_inc(&stat->packets);
+}
+
+static inline void ovpn_peer_stats_increment_rx(struct ovpn_peer_stats *stats,
+ const unsigned int n)
+{
+ ovpn_peer_stats_increment(&stats->rx, n);
+}
+
+static inline void ovpn_peer_stats_increment_tx(struct ovpn_peer_stats *stats,
+ const unsigned int n)
+{
+ ovpn_peer_stats_increment(&stats->tx, n);
+}
+
+#endif /* _NET_OVPN_OVPNSTATS_H_ */
diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c
new file mode 100644
index 000000000000..7c42d84987ad
--- /dev/null
+++ b/drivers/net/ovpn/tcp.c
@@ -0,0 +1,598 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2019-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <linux/skbuff.h>
+#include <net/hotdata.h>
+#include <net/inet_common.h>
+#include <net/ipv6.h>
+#include <net/tcp.h>
+#include <net/transp_v6.h>
+#include <net/route.h>
+#include <trace/events/sock.h>
+
+#include "ovpnpriv.h"
+#include "main.h"
+#include "io.h"
+#include "peer.h"
+#include "proto.h"
+#include "skb.h"
+#include "tcp.h"
+
+#define OVPN_TCP_DEPTH_NESTING 2
+#if OVPN_TCP_DEPTH_NESTING == SINGLE_DEPTH_NESTING
+#error "OVPN TCP requires its own lockdep subclass"
+#endif
+
+static struct proto ovpn_tcp_prot __ro_after_init;
+static struct proto_ops ovpn_tcp_ops __ro_after_init;
+static struct proto ovpn_tcp6_prot __ro_after_init;
+static struct proto_ops ovpn_tcp6_ops __ro_after_init;
+
+static int ovpn_tcp_parse(struct strparser *strp, struct sk_buff *skb)
+{
+ struct strp_msg *rxm = strp_msg(skb);
+ __be16 blen;
+ u16 len;
+ int err;
+
+ /* when packets are written to the TCP stream, they are prepended with
+ * two bytes indicating the actual packet size.
+ * Parse accordingly and return the actual size (including the size
+ * header)
+ */
+
+ if (skb->len < rxm->offset + 2)
+ return 0;
+
+ err = skb_copy_bits(skb, rxm->offset, &blen, sizeof(blen));
+ if (err < 0)
+ return err;
+
+ len = be16_to_cpu(blen);
+ if (len < 2)
+ return -EINVAL;
+
+ return len + 2;
+}
+
+/* queue skb for sending to userspace via recvmsg on the socket */
+static void ovpn_tcp_to_userspace(struct ovpn_peer *peer, struct sock *sk,
+ struct sk_buff *skb)
+{
+ skb_set_owner_r(skb, sk);
+ memset(skb->cb, 0, sizeof(skb->cb));
+ skb_queue_tail(&peer->tcp.user_queue, skb);
+ peer->tcp.sk_cb.sk_data_ready(sk);
+}
+
+static void ovpn_tcp_rcv(struct strparser *strp, struct sk_buff *skb)
+{
+ struct ovpn_peer *peer = container_of(strp, struct ovpn_peer, tcp.strp);
+ struct strp_msg *msg = strp_msg(skb);
+ size_t pkt_len = msg->full_len - 2;
+ size_t off = msg->offset + 2;
+ u8 opcode;
+
+ /* ensure skb->data points to the beginning of the openvpn packet */
+ if (!pskb_pull(skb, off)) {
+ net_warn_ratelimited("%s: packet too small for peer %u\n",
+ netdev_name(peer->ovpn->dev), peer->id);
+ goto err;
+ }
+
+ /* strparser does not trim the skb for us, therefore we do it now */
+ if (pskb_trim(skb, pkt_len) != 0) {
+ net_warn_ratelimited("%s: trimming skb failed for peer %u\n",
+ netdev_name(peer->ovpn->dev), peer->id);
+ goto err;
+ }
+
+ /* we need the first 4 bytes of data to be accessible
+ * to extract the opcode and the key ID later on
+ */
+ if (!pskb_may_pull(skb, OVPN_OPCODE_SIZE)) {
+ net_warn_ratelimited("%s: packet too small to fetch opcode for peer %u\n",
+ netdev_name(peer->ovpn->dev), peer->id);
+ goto err;
+ }
+
+ /* DATA_V2 packets are handled in kernel, the rest goes to user space */
+ opcode = ovpn_opcode_from_skb(skb, 0);
+ if (unlikely(opcode != OVPN_DATA_V2)) {
+ if (opcode == OVPN_DATA_V1) {
+ net_warn_ratelimited("%s: DATA_V1 detected on the TCP stream\n",
+ netdev_name(peer->ovpn->dev));
+ goto err;
+ }
+
+ /* The packet size header must be there when sending the packet
+ * to userspace, therefore we put it back
+ */
+ skb_push(skb, 2);
+ ovpn_tcp_to_userspace(peer, strp->sk, skb);
+ return;
+ }
+
+ /* hold reference to peer as required by ovpn_recv().
+ *
+ * NOTE: in this context we should already be holding a reference to
+ * this peer, therefore ovpn_peer_hold() is not expected to fail
+ */
+ if (WARN_ON(!ovpn_peer_hold(peer)))
+ goto err;
+
+ ovpn_recv(peer, skb);
+ return;
+err:
+ dev_dstats_rx_dropped(peer->ovpn->dev);
+ kfree_skb(skb);
+ ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);
+}
+
+static int ovpn_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+ int flags, int *addr_len)
+{
+ int err = 0, off, copied = 0, ret;
+ struct ovpn_socket *sock;
+ struct ovpn_peer *peer;
+ struct sk_buff *skb;
+
+ rcu_read_lock();
+ sock = rcu_dereference_sk_user_data(sk);
+ if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) {
+ rcu_read_unlock();
+ return -EBADF;
+ }
+ peer = sock->peer;
+ rcu_read_unlock();
+
+ skb = __skb_recv_datagram(sk, &peer->tcp.user_queue, flags, &off, &err);
+ if (!skb) {
+ if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN) {
+ ret = 0;
+ goto out;
+ }
+ ret = err;
+ goto out;
+ }
+
+ copied = len;
+ if (copied > skb->len)
+ copied = skb->len;
+ else if (copied < skb->len)
+ msg->msg_flags |= MSG_TRUNC;
+
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
+ if (unlikely(err)) {
+ kfree_skb(skb);
+ ret = err;
+ goto out;
+ }
+
+ if (flags & MSG_TRUNC)
+ copied = skb->len;
+ kfree_skb(skb);
+ ret = copied;
+out:
+ ovpn_peer_put(peer);
+ return ret;
+}
+
+void ovpn_tcp_socket_detach(struct ovpn_socket *ovpn_sock)
+{
+ struct ovpn_peer *peer = ovpn_sock->peer;
+ struct socket *sock = ovpn_sock->sock;
+
+ strp_stop(&peer->tcp.strp);
+ skb_queue_purge(&peer->tcp.user_queue);
+
+ /* restore CBs that were saved in ovpn_sock_set_tcp_cb() */
+ sock->sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready;
+ sock->sk->sk_write_space = peer->tcp.sk_cb.sk_write_space;
+ sock->sk->sk_prot = peer->tcp.sk_cb.prot;
+ sock->sk->sk_socket->ops = peer->tcp.sk_cb.ops;
+
+ rcu_assign_sk_user_data(sock->sk, NULL);
+}
+
+void ovpn_tcp_socket_wait_finish(struct ovpn_socket *sock)
+{
+ struct ovpn_peer *peer = sock->peer;
+
+ /* NOTE: we don't wait for peer->tcp.defer_del_work to finish:
+ * either the worker is not running or this function
+ * was invoked by that worker.
+ */
+
+ cancel_work_sync(&sock->tcp_tx_work);
+ strp_done(&peer->tcp.strp);
+
+ skb_queue_purge(&peer->tcp.out_queue);
+ kfree_skb(peer->tcp.out_msg.skb);
+ peer->tcp.out_msg.skb = NULL;
+}
+
+static void ovpn_tcp_send_sock(struct ovpn_peer *peer, struct sock *sk)
+{
+ struct sk_buff *skb = peer->tcp.out_msg.skb;
+ int ret, flags;
+
+ if (!skb)
+ return;
+
+ if (peer->tcp.tx_in_progress)
+ return;
+
+ peer->tcp.tx_in_progress = true;
+
+ do {
+ flags = ovpn_skb_cb(skb)->nosignal ? MSG_NOSIGNAL : 0;
+ ret = skb_send_sock_locked_with_flags(sk, skb,
+ peer->tcp.out_msg.offset,
+ peer->tcp.out_msg.len,
+ flags);
+ if (unlikely(ret < 0)) {
+ if (ret == -EAGAIN)
+ goto out;
+
+ net_warn_ratelimited("%s: TCP error to peer %u: %d\n",
+ netdev_name(peer->ovpn->dev),
+ peer->id, ret);
+
+ /* in case of TCP error we can't recover the VPN
+ * stream therefore we abort the connection
+ */
+ ovpn_peer_hold(peer);
+ schedule_work(&peer->tcp.defer_del_work);
+
+ /* we bail out immediately and keep tx_in_progress set
+ * to true. This way we prevent more TX attempts
+ * which would lead to more invocations of
+ * schedule_work()
+ */
+ return;
+ }
+
+ peer->tcp.out_msg.len -= ret;
+ peer->tcp.out_msg.offset += ret;
+ } while (peer->tcp.out_msg.len > 0);
+
+ if (!peer->tcp.out_msg.len) {
+ preempt_disable();
+ dev_dstats_tx_add(peer->ovpn->dev, skb->len);
+ preempt_enable();
+ }
+
+ kfree_skb(peer->tcp.out_msg.skb);
+ peer->tcp.out_msg.skb = NULL;
+ peer->tcp.out_msg.len = 0;
+ peer->tcp.out_msg.offset = 0;
+
+out:
+ peer->tcp.tx_in_progress = false;
+}
+
+void ovpn_tcp_tx_work(struct work_struct *work)
+{
+ struct ovpn_socket *sock;
+
+ sock = container_of(work, struct ovpn_socket, tcp_tx_work);
+
+ lock_sock(sock->sock->sk);
+ if (sock->peer)
+ ovpn_tcp_send_sock(sock->peer, sock->sock->sk);
+ release_sock(sock->sock->sk);
+}
+
+static void ovpn_tcp_send_sock_skb(struct ovpn_peer *peer, struct sock *sk,
+ struct sk_buff *skb)
+{
+ if (peer->tcp.out_msg.skb)
+ ovpn_tcp_send_sock(peer, sk);
+
+ if (peer->tcp.out_msg.skb) {
+ dev_dstats_tx_dropped(peer->ovpn->dev);
+ kfree_skb(skb);
+ return;
+ }
+
+ peer->tcp.out_msg.skb = skb;
+ peer->tcp.out_msg.len = skb->len;
+ peer->tcp.out_msg.offset = 0;
+ ovpn_tcp_send_sock(peer, sk);
+}
+
+void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct socket *sock,
+ struct sk_buff *skb)
+{
+ u16 len = skb->len;
+
+ *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len);
+
+ spin_lock_nested(&sock->sk->sk_lock.slock, OVPN_TCP_DEPTH_NESTING);
+ if (sock_owned_by_user(sock->sk)) {
+ if (skb_queue_len(&peer->tcp.out_queue) >=
+ READ_ONCE(net_hotdata.max_backlog)) {
+ dev_dstats_tx_dropped(peer->ovpn->dev);
+ kfree_skb(skb);
+ goto unlock;
+ }
+ __skb_queue_tail(&peer->tcp.out_queue, skb);
+ } else {
+ ovpn_tcp_send_sock_skb(peer, sock->sk, skb);
+ }
+unlock:
+ spin_unlock(&sock->sk->sk_lock.slock);
+}
+
+static void ovpn_tcp_release(struct sock *sk)
+{
+ struct sk_buff_head queue;
+ struct ovpn_socket *sock;
+ struct ovpn_peer *peer;
+ struct sk_buff *skb;
+
+ rcu_read_lock();
+ sock = rcu_dereference_sk_user_data(sk);
+ if (!sock) {
+ rcu_read_unlock();
+ return;
+ }
+
+ peer = sock->peer;
+
+ /* during initialization this function is called before
+ * assigning sock->peer
+ */
+ if (unlikely(!peer || !ovpn_peer_hold(peer))) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+
+ __skb_queue_head_init(&queue);
+ skb_queue_splice_init(&peer->tcp.out_queue, &queue);
+
+ while ((skb = __skb_dequeue(&queue)))
+ ovpn_tcp_send_sock_skb(peer, sk, skb);
+
+ peer->tcp.sk_cb.prot->release_cb(sk);
+ ovpn_peer_put(peer);
+}
+
+static int ovpn_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+ struct ovpn_socket *sock;
+ int ret, linear = PAGE_SIZE;
+ struct ovpn_peer *peer;
+ struct sk_buff *skb;
+
+ lock_sock(sk);
+ rcu_read_lock();
+ sock = rcu_dereference_sk_user_data(sk);
+ if (unlikely(!sock || !sock->peer || !ovpn_peer_hold(sock->peer))) {
+ rcu_read_unlock();
+ release_sock(sk);
+ return -EIO;
+ }
+ rcu_read_unlock();
+ peer = sock->peer;
+
+ if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_NOSIGNAL)) {
+ ret = -EOPNOTSUPP;
+ goto peer_free;
+ }
+
+ if (peer->tcp.out_msg.skb) {
+ ret = -EAGAIN;
+ goto peer_free;
+ }
+
+ if (size < linear)
+ linear = size;
+
+ skb = sock_alloc_send_pskb(sk, linear, size - linear,
+ msg->msg_flags & MSG_DONTWAIT, &ret, 0);
+ if (!skb) {
+ net_err_ratelimited("%s: skb alloc failed: %d\n",
+ netdev_name(peer->ovpn->dev), ret);
+ goto peer_free;
+ }
+
+ skb_put(skb, linear);
+ skb->len = size;
+ skb->data_len = size - linear;
+
+ ret = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+ if (ret) {
+ kfree_skb(skb);
+ net_err_ratelimited("%s: skb copy from iter failed: %d\n",
+ netdev_name(peer->ovpn->dev), ret);
+ goto peer_free;
+ }
+
+ ovpn_skb_cb(skb)->nosignal = msg->msg_flags & MSG_NOSIGNAL;
+ ovpn_tcp_send_sock_skb(peer, sk, skb);
+ ret = size;
+peer_free:
+ release_sock(sk);
+ ovpn_peer_put(peer);
+ return ret;
+}
+
+static int ovpn_tcp_disconnect(struct sock *sk, int flags)
+{
+ return -EBUSY;
+}
+
+static void ovpn_tcp_data_ready(struct sock *sk)
+{
+ struct ovpn_socket *sock;
+
+ trace_sk_data_ready(sk);
+
+ rcu_read_lock();
+ sock = rcu_dereference_sk_user_data(sk);
+ if (likely(sock && sock->peer))
+ strp_data_ready(&sock->peer->tcp.strp);
+ rcu_read_unlock();
+}
+
+static void ovpn_tcp_write_space(struct sock *sk)
+{
+ struct ovpn_socket *sock;
+
+ rcu_read_lock();
+ sock = rcu_dereference_sk_user_data(sk);
+ if (likely(sock && sock->peer)) {
+ schedule_work(&sock->tcp_tx_work);
+ sock->peer->tcp.sk_cb.sk_write_space(sk);
+ }
+ rcu_read_unlock();
+}
+
+static void ovpn_tcp_build_protos(struct proto *new_prot,
+ struct proto_ops *new_ops,
+ const struct proto *orig_prot,
+ const struct proto_ops *orig_ops);
+
+static void ovpn_tcp_peer_del_work(struct work_struct *work)
+{
+ struct ovpn_peer *peer = container_of(work, struct ovpn_peer,
+ tcp.defer_del_work);
+
+ ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);
+ ovpn_peer_put(peer);
+}
+
+/* Set TCP encapsulation callbacks */
+int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock,
+ struct ovpn_peer *peer)
+{
+ struct socket *sock = ovpn_sock->sock;
+ struct strp_callbacks cb = {
+ .rcv_msg = ovpn_tcp_rcv,
+ .parse_msg = ovpn_tcp_parse,
+ };
+ int ret;
+
+ /* make sure no pre-existing encapsulation handler exists */
+ if (sock->sk->sk_user_data)
+ return -EBUSY;
+
+ /* only a fully connected socket is expected. Connection should be
+ * handled in userspace
+ */
+ if (sock->sk->sk_state != TCP_ESTABLISHED) {
+ net_err_ratelimited("%s: provided TCP socket is not in ESTABLISHED state: %d\n",
+ netdev_name(peer->ovpn->dev),
+ sock->sk->sk_state);
+ return -EINVAL;
+ }
+
+ ret = strp_init(&peer->tcp.strp, sock->sk, &cb);
+ if (ret < 0) {
+ DEBUG_NET_WARN_ON_ONCE(1);
+ return ret;
+ }
+
+ INIT_WORK(&peer->tcp.defer_del_work, ovpn_tcp_peer_del_work);
+
+ __sk_dst_reset(sock->sk);
+ skb_queue_head_init(&peer->tcp.user_queue);
+ skb_queue_head_init(&peer->tcp.out_queue);
+
+ /* save current CBs so that they can be restored upon socket release */
+ peer->tcp.sk_cb.sk_data_ready = sock->sk->sk_data_ready;
+ peer->tcp.sk_cb.sk_write_space = sock->sk->sk_write_space;
+ peer->tcp.sk_cb.prot = sock->sk->sk_prot;
+ peer->tcp.sk_cb.ops = sock->sk->sk_socket->ops;
+
+ /* assign our static CBs and prot/ops */
+ sock->sk->sk_data_ready = ovpn_tcp_data_ready;
+ sock->sk->sk_write_space = ovpn_tcp_write_space;
+
+ if (sock->sk->sk_family == AF_INET) {
+ sock->sk->sk_prot = &ovpn_tcp_prot;
+ sock->sk->sk_socket->ops = &ovpn_tcp_ops;
+ } else {
+ sock->sk->sk_prot = &ovpn_tcp6_prot;
+ sock->sk->sk_socket->ops = &ovpn_tcp6_ops;
+ }
+
+ /* avoid using task_frag */
+ sock->sk->sk_allocation = GFP_ATOMIC;
+ sock->sk->sk_use_task_frag = false;
+
+ /* enqueue the RX worker */
+ strp_check_rcv(&peer->tcp.strp);
+
+ return 0;
+}
+
+static void ovpn_tcp_close(struct sock *sk, long timeout)
+{
+ struct ovpn_socket *sock;
+ struct ovpn_peer *peer;
+
+ rcu_read_lock();
+ sock = rcu_dereference_sk_user_data(sk);
+ if (!sock || !sock->peer || !ovpn_peer_hold(sock->peer)) {
+ rcu_read_unlock();
+ return;
+ }
+ peer = sock->peer;
+ rcu_read_unlock();
+
+ ovpn_peer_del(sock->peer, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT);
+ peer->tcp.sk_cb.prot->close(sk, timeout);
+ ovpn_peer_put(peer);
+}
+
+static __poll_t ovpn_tcp_poll(struct file *file, struct socket *sock,
+ poll_table *wait)
+{
+ __poll_t mask = datagram_poll(file, sock, wait);
+ struct ovpn_socket *ovpn_sock;
+
+ rcu_read_lock();
+ ovpn_sock = rcu_dereference_sk_user_data(sock->sk);
+ if (ovpn_sock && ovpn_sock->peer &&
+ !skb_queue_empty(&ovpn_sock->peer->tcp.user_queue))
+ mask |= EPOLLIN | EPOLLRDNORM;
+ rcu_read_unlock();
+
+ return mask;
+}
+
+static void ovpn_tcp_build_protos(struct proto *new_prot,
+ struct proto_ops *new_ops,
+ const struct proto *orig_prot,
+ const struct proto_ops *orig_ops)
+{
+ memcpy(new_prot, orig_prot, sizeof(*new_prot));
+ memcpy(new_ops, orig_ops, sizeof(*new_ops));
+ new_prot->recvmsg = ovpn_tcp_recvmsg;
+ new_prot->sendmsg = ovpn_tcp_sendmsg;
+ new_prot->disconnect = ovpn_tcp_disconnect;
+ new_prot->close = ovpn_tcp_close;
+ new_prot->release_cb = ovpn_tcp_release;
+ new_ops->poll = ovpn_tcp_poll;
+}
+
+/* Initialize TCP static objects */
+void __init ovpn_tcp_init(void)
+{
+ ovpn_tcp_build_protos(&ovpn_tcp_prot, &ovpn_tcp_ops, &tcp_prot,
+ &inet_stream_ops);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ ovpn_tcp_build_protos(&ovpn_tcp6_prot, &ovpn_tcp6_ops, &tcpv6_prot,
+ &inet6_stream_ops);
+#endif
+}
diff --git a/drivers/net/ovpn/tcp.h b/drivers/net/ovpn/tcp.h
new file mode 100644
index 000000000000..10aefa834cf3
--- /dev/null
+++ b/drivers/net/ovpn/tcp.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2019-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_TCP_H_
+#define _NET_OVPN_TCP_H_
+
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/types.h>
+
+#include "peer.h"
+#include "skb.h"
+#include "socket.h"
+
+void __init ovpn_tcp_init(void);
+
+int ovpn_tcp_socket_attach(struct ovpn_socket *ovpn_sock,
+ struct ovpn_peer *peer);
+void ovpn_tcp_socket_detach(struct ovpn_socket *ovpn_sock);
+void ovpn_tcp_socket_wait_finish(struct ovpn_socket *sock);
+
+/* Prepare skb and enqueue it for sending to peer.
+ *
+ * Preparation consist in prepending the skb payload with its size.
+ * Required by the OpenVPN protocol in order to extract packets from
+ * the TCP stream on the receiver side.
+ */
+void ovpn_tcp_send_skb(struct ovpn_peer *peer, struct socket *sock, struct sk_buff *skb);
+void ovpn_tcp_tx_work(struct work_struct *work);
+
+#endif /* _NET_OVPN_TCP_H_ */
diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c
new file mode 100644
index 000000000000..c9e189056f33
--- /dev/null
+++ b/drivers/net/ovpn/udp.c
@@ -0,0 +1,439 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2019-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/udp.h>
+#include <net/addrconf.h>
+#include <net/dst_cache.h>
+#include <net/route.h>
+#include <net/ipv6_stubs.h>
+#include <net/transp_v6.h>
+#include <net/udp.h>
+#include <net/udp_tunnel.h>
+
+#include "ovpnpriv.h"
+#include "main.h"
+#include "bind.h"
+#include "io.h"
+#include "peer.h"
+#include "proto.h"
+#include "socket.h"
+#include "udp.h"
+
+/* Retrieve the corresponding ovpn object from a UDP socket
+ * rcu_read_lock must be held on entry
+ */
+static struct ovpn_socket *ovpn_socket_from_udp_sock(struct sock *sk)
+{
+ struct ovpn_socket *ovpn_sock;
+
+ if (unlikely(READ_ONCE(udp_sk(sk)->encap_type) != UDP_ENCAP_OVPNINUDP))
+ return NULL;
+
+ ovpn_sock = rcu_dereference_sk_user_data(sk);
+ if (unlikely(!ovpn_sock))
+ return NULL;
+
+ /* make sure that sk matches our stored transport socket */
+ if (unlikely(!ovpn_sock->sock || sk != ovpn_sock->sock->sk))
+ return NULL;
+
+ return ovpn_sock;
+}
+
+/**
+ * ovpn_udp_encap_recv - Start processing a received UDP packet.
+ * @sk: socket over which the packet was received
+ * @skb: the received packet
+ *
+ * If the first byte of the payload is:
+ * - DATA_V2 the packet is accepted for further processing,
+ * - DATA_V1 the packet is dropped as not supported,
+ * - anything else the packet is forwarded to the UDP stack for
+ * delivery to user space.
+ *
+ * Return:
+ * 0 if skb was consumed or dropped
+ * >0 if skb should be passed up to userspace as UDP (packet not consumed)
+ * <0 if skb should be resubmitted as proto -N (packet not consumed)
+ */
+static int ovpn_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+{
+ struct ovpn_socket *ovpn_sock;
+ struct ovpn_priv *ovpn;
+ struct ovpn_peer *peer;
+ u32 peer_id;
+ u8 opcode;
+
+ ovpn_sock = ovpn_socket_from_udp_sock(sk);
+ if (unlikely(!ovpn_sock)) {
+ net_err_ratelimited("ovpn: %s invoked on non ovpn socket\n",
+ __func__);
+ goto drop_noovpn;
+ }
+
+ ovpn = ovpn_sock->ovpn;
+ if (unlikely(!ovpn)) {
+ net_err_ratelimited("ovpn: cannot obtain ovpn object from UDP socket\n");
+ goto drop_noovpn;
+ }
+
+ /* Make sure the first 4 bytes of the skb data buffer after the UDP
+ * header are accessible.
+ * They are required to fetch the OP code, the key ID and the peer ID.
+ */
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct udphdr) +
+ OVPN_OPCODE_SIZE))) {
+ net_dbg_ratelimited("%s: packet too small from UDP socket\n",
+ netdev_name(ovpn->dev));
+ goto drop;
+ }
+
+ opcode = ovpn_opcode_from_skb(skb, sizeof(struct udphdr));
+ if (unlikely(opcode != OVPN_DATA_V2)) {
+ /* DATA_V1 is not supported */
+ if (opcode == OVPN_DATA_V1)
+ goto drop;
+
+ /* unknown or control packet: let it bubble up to userspace */
+ return 1;
+ }
+
+ peer_id = ovpn_peer_id_from_skb(skb, sizeof(struct udphdr));
+ /* some OpenVPN server implementations send data packets with the
+ * peer-id set to UNDEF. In this case we skip the peer lookup by peer-id
+ * and we try with the transport address
+ */
+ if (peer_id == OVPN_PEER_ID_UNDEF)
+ peer = ovpn_peer_get_by_transp_addr(ovpn, skb);
+ else
+ peer = ovpn_peer_get_by_id(ovpn, peer_id);
+
+ if (unlikely(!peer))
+ goto drop;
+
+ /* pop off outer UDP header */
+ __skb_pull(skb, sizeof(struct udphdr));
+ ovpn_recv(peer, skb);
+ return 0;
+
+drop:
+ dev_dstats_rx_dropped(ovpn->dev);
+drop_noovpn:
+ kfree_skb(skb);
+ return 0;
+}
+
+/**
+ * ovpn_udp4_output - send IPv4 packet over udp socket
+ * @peer: the destination peer
+ * @bind: the binding related to the destination peer
+ * @cache: dst cache
+ * @sk: the socket to send the packet over
+ * @skb: the packet to send
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+static int ovpn_udp4_output(struct ovpn_peer *peer, struct ovpn_bind *bind,
+ struct dst_cache *cache, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct rtable *rt;
+ struct flowi4 fl = {
+ .saddr = bind->local.ipv4.s_addr,
+ .daddr = bind->remote.in4.sin_addr.s_addr,
+ .fl4_sport = inet_sk(sk)->inet_sport,
+ .fl4_dport = bind->remote.in4.sin_port,
+ .flowi4_proto = sk->sk_protocol,
+ .flowi4_mark = sk->sk_mark,
+ };
+ int ret;
+
+ local_bh_disable();
+ rt = dst_cache_get_ip4(cache, &fl.saddr);
+ if (rt)
+ goto transmit;
+
+ if (unlikely(!inet_confirm_addr(sock_net(sk), NULL, 0, fl.saddr,
+ RT_SCOPE_HOST))) {
+ /* we may end up here when the cached address is not usable
+ * anymore. In this case we reset address/cache and perform a
+ * new look up
+ */
+ fl.saddr = 0;
+ spin_lock_bh(&peer->lock);
+ bind->local.ipv4.s_addr = 0;
+ spin_unlock_bh(&peer->lock);
+ dst_cache_reset(cache);
+ }
+
+ rt = ip_route_output_flow(sock_net(sk), &fl, sk);
+ if (IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) {
+ fl.saddr = 0;
+ spin_lock_bh(&peer->lock);
+ bind->local.ipv4.s_addr = 0;
+ spin_unlock_bh(&peer->lock);
+ dst_cache_reset(cache);
+
+ rt = ip_route_output_flow(sock_net(sk), &fl, sk);
+ }
+
+ if (IS_ERR(rt)) {
+ ret = PTR_ERR(rt);
+ net_dbg_ratelimited("%s: no route to host %pISpc: %d\n",
+ netdev_name(peer->ovpn->dev),
+ &bind->remote.in4,
+ ret);
+ goto err;
+ }
+ dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
+
+transmit:
+ udp_tunnel_xmit_skb(rt, sk, skb, fl.saddr, fl.daddr, 0,
+ ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport,
+ fl.fl4_dport, false, sk->sk_no_check_tx);
+ ret = 0;
+err:
+ local_bh_enable();
+ return ret;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+/**
+ * ovpn_udp6_output - send IPv6 packet over udp socket
+ * @peer: the destination peer
+ * @bind: the binding related to the destination peer
+ * @cache: dst cache
+ * @sk: the socket to send the packet over
+ * @skb: the packet to send
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+static int ovpn_udp6_output(struct ovpn_peer *peer, struct ovpn_bind *bind,
+ struct dst_cache *cache, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct dst_entry *dst;
+ int ret;
+
+ struct flowi6 fl = {
+ .saddr = bind->local.ipv6,
+ .daddr = bind->remote.in6.sin6_addr,
+ .fl6_sport = inet_sk(sk)->inet_sport,
+ .fl6_dport = bind->remote.in6.sin6_port,
+ .flowi6_proto = sk->sk_protocol,
+ .flowi6_mark = sk->sk_mark,
+ .flowi6_oif = bind->remote.in6.sin6_scope_id,
+ };
+
+ local_bh_disable();
+ dst = dst_cache_get_ip6(cache, &fl.saddr);
+ if (dst)
+ goto transmit;
+
+ if (unlikely(!ipv6_chk_addr(sock_net(sk), &fl.saddr, NULL, 0))) {
+ /* we may end up here when the cached address is not usable
+ * anymore. In this case we reset address/cache and perform a
+ * new look up
+ */
+ fl.saddr = in6addr_any;
+ spin_lock_bh(&peer->lock);
+ bind->local.ipv6 = in6addr_any;
+ spin_unlock_bh(&peer->lock);
+ dst_cache_reset(cache);
+ }
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sk), sk, &fl, NULL);
+ if (IS_ERR(dst)) {
+ ret = PTR_ERR(dst);
+ net_dbg_ratelimited("%s: no route to host %pISpc: %d\n",
+ netdev_name(peer->ovpn->dev),
+ &bind->remote.in6, ret);
+ goto err;
+ }
+ dst_cache_set_ip6(cache, dst, &fl.saddr);
+
+transmit:
+ udp_tunnel6_xmit_skb(dst, sk, skb, skb->dev, &fl.saddr, &fl.daddr, 0,
+ ip6_dst_hoplimit(dst), 0, fl.fl6_sport,
+ fl.fl6_dport, udp_get_no_check6_tx(sk));
+ ret = 0;
+err:
+ local_bh_enable();
+ return ret;
+}
+#endif
+
+/**
+ * ovpn_udp_output - transmit skb using udp-tunnel
+ * @peer: the destination peer
+ * @cache: dst cache
+ * @sk: the socket to send the packet over
+ * @skb: the packet to send
+ *
+ * rcu_read_lock should be held on entry.
+ * On return, the skb is consumed.
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+static int ovpn_udp_output(struct ovpn_peer *peer, struct dst_cache *cache,
+ struct sock *sk, struct sk_buff *skb)
+{
+ struct ovpn_bind *bind;
+ int ret;
+
+ /* set sk to null if skb is already orphaned */
+ if (!skb->destructor)
+ skb->sk = NULL;
+
+ rcu_read_lock();
+ bind = rcu_dereference(peer->bind);
+ if (unlikely(!bind)) {
+ net_warn_ratelimited("%s: no bind for remote peer %u\n",
+ netdev_name(peer->ovpn->dev), peer->id);
+ ret = -ENODEV;
+ goto out;
+ }
+
+ switch (bind->remote.in4.sin_family) {
+ case AF_INET:
+ ret = ovpn_udp4_output(peer, bind, cache, sk, skb);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case AF_INET6:
+ ret = ovpn_udp6_output(peer, bind, cache, sk, skb);
+ break;
+#endif
+ default:
+ ret = -EAFNOSUPPORT;
+ break;
+ }
+
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
+/**
+ * ovpn_udp_send_skb - prepare skb and send it over via UDP
+ * @peer: the destination peer
+ * @sock: the RCU protected peer socket
+ * @skb: the packet to send
+ */
+void ovpn_udp_send_skb(struct ovpn_peer *peer, struct socket *sock,
+ struct sk_buff *skb)
+{
+ int ret = -1;
+
+ skb->dev = peer->ovpn->dev;
+ /* no checksum performed at this layer */
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /* get socket info */
+ if (unlikely(!sock)) {
+ net_warn_ratelimited("%s: no sock for remote peer %u\n",
+ netdev_name(peer->ovpn->dev), peer->id);
+ goto out;
+ }
+
+ /* crypto layer -> transport (UDP) */
+ ret = ovpn_udp_output(peer, &peer->dst_cache, sock->sk, skb);
+out:
+ if (unlikely(ret < 0)) {
+ kfree_skb(skb);
+ return;
+ }
+}
+
+static void ovpn_udp_encap_destroy(struct sock *sk)
+{
+ struct ovpn_socket *sock;
+ struct ovpn_priv *ovpn;
+
+ rcu_read_lock();
+ sock = rcu_dereference_sk_user_data(sk);
+ if (!sock || !sock->ovpn) {
+ rcu_read_unlock();
+ return;
+ }
+ ovpn = sock->ovpn;
+ rcu_read_unlock();
+
+ ovpn_peers_free(ovpn, sk, OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT);
+}
+
+/**
+ * ovpn_udp_socket_attach - set udp-tunnel CBs on socket and link it to ovpn
+ * @ovpn_sock: socket to configure
+ * @ovpn: the openvp instance to link
+ *
+ * After invoking this function, the sock will be controlled by ovpn so that
+ * any incoming packet may be processed by ovpn first.
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock,
+ struct ovpn_priv *ovpn)
+{
+ struct udp_tunnel_sock_cfg cfg = {
+ .encap_type = UDP_ENCAP_OVPNINUDP,
+ .encap_rcv = ovpn_udp_encap_recv,
+ .encap_destroy = ovpn_udp_encap_destroy,
+ };
+ struct socket *sock = ovpn_sock->sock;
+ struct ovpn_socket *old_data;
+ int ret;
+
+ /* make sure no pre-existing encapsulation handler exists */
+ rcu_read_lock();
+ old_data = rcu_dereference_sk_user_data(sock->sk);
+ if (!old_data) {
+ /* socket is currently unused - we can take it */
+ rcu_read_unlock();
+ setup_udp_tunnel_sock(sock_net(sock->sk), sock, &cfg);
+ return 0;
+ }
+
+ /* socket is in use. We need to understand if it's owned by this ovpn
+ * instance or by something else.
+ * In the former case, we can increase the refcounter and happily
+ * use it, because the same UDP socket is expected to be shared among
+ * different peers.
+ *
+ * Unlikely TCP, a single UDP socket can be used to talk to many remote
+ * hosts and therefore openvpn instantiates one only for all its peers
+ */
+ if ((READ_ONCE(udp_sk(sock->sk)->encap_type) == UDP_ENCAP_OVPNINUDP) &&
+ old_data->ovpn == ovpn) {
+ netdev_dbg(ovpn->dev,
+ "provided socket already owned by this interface\n");
+ ret = -EALREADY;
+ } else {
+ netdev_dbg(ovpn->dev,
+ "provided socket already taken by other user\n");
+ ret = -EBUSY;
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/**
+ * ovpn_udp_socket_detach - clean udp-tunnel status for this socket
+ * @ovpn_sock: the socket to clean
+ */
+void ovpn_udp_socket_detach(struct ovpn_socket *ovpn_sock)
+{
+ struct udp_tunnel_sock_cfg cfg = { };
+
+ setup_udp_tunnel_sock(sock_net(ovpn_sock->sock->sk), ovpn_sock->sock,
+ &cfg);
+}
diff --git a/drivers/net/ovpn/udp.h b/drivers/net/ovpn/udp.h
new file mode 100644
index 000000000000..9994eb6e0428
--- /dev/null
+++ b/drivers/net/ovpn/udp.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* OpenVPN data channel offload
+ *
+ * Copyright (C) 2019-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_UDP_H_
+#define _NET_OVPN_UDP_H_
+
+#include <net/sock.h>
+
+struct ovpn_peer;
+struct ovpn_priv;
+struct socket;
+
+int ovpn_udp_socket_attach(struct ovpn_socket *ovpn_sock,
+ struct ovpn_priv *ovpn);
+void ovpn_udp_socket_detach(struct ovpn_socket *ovpn_sock);
+
+void ovpn_udp_send_skb(struct ovpn_peer *peer, struct socket *sock,
+ struct sk_buff *skb);
+
+#endif /* _NET_OVPN_UDP_H_ */
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index d29f9f7fd2e1..0b8cc325e2bf 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -5,7 +5,6 @@
config PHYLINK
tristate
- depends on NETDEVICES
select PHYLIB
select SWPHY
help
@@ -15,7 +14,6 @@ config PHYLINK
menuconfig PHYLIB
tristate "PHY Device support and infrastructure"
- depends on NETDEVICES
select MDIO_DEVICE
select MDIO_DEVRES
help
diff --git a/drivers/net/phy/air_en8811h.c b/drivers/net/phy/air_en8811h.c
index e9fd24cb7270..57fbd8df9438 100644
--- a/drivers/net/phy/air_en8811h.c
+++ b/drivers/net/phy/air_en8811h.c
@@ -11,6 +11,7 @@
* Copyright (C) 2023 Airoha Technology Corp.
*/
+#include <linux/clk-provider.h>
#include <linux/phy.h>
#include <linux/firmware.h>
#include <linux/property.h>
@@ -115,6 +116,11 @@
#define EN8811H_GPIO_OUTPUT 0xcf8b8
#define EN8811H_GPIO_OUTPUT_345 (BIT(3) | BIT(4) | BIT(5))
+#define EN8811H_HWTRAP1 0xcf914
+#define EN8811H_HWTRAP1_CKO BIT(12)
+#define EN8811H_CLK_CGM 0xcf958
+#define EN8811H_CLK_CGM_CKO BIT(26)
+
#define EN8811H_FW_CTRL_1 0x0f0018
#define EN8811H_FW_CTRL_1_START 0x0
#define EN8811H_FW_CTRL_1_FINISH 0x1
@@ -142,10 +148,15 @@ struct led {
unsigned long state;
};
+#define clk_hw_to_en8811h_priv(_hw) \
+ container_of(_hw, struct en8811h_priv, hw)
+
struct en8811h_priv {
- u32 firmware_version;
- bool mcu_needs_restart;
- struct led led[EN8811H_LED_COUNT];
+ u32 firmware_version;
+ bool mcu_needs_restart;
+ struct led led[EN8811H_LED_COUNT];
+ struct clk_hw hw;
+ struct phy_device *phydev;
};
enum {
@@ -806,6 +817,86 @@ static int en8811h_led_hw_is_supported(struct phy_device *phydev, u8 index,
return 0;
};
+static unsigned long en8811h_clk_recalc_rate(struct clk_hw *hw,
+ unsigned long parent)
+{
+ struct en8811h_priv *priv = clk_hw_to_en8811h_priv(hw);
+ struct phy_device *phydev = priv->phydev;
+ u32 pbus_value;
+ int ret;
+
+ ret = air_buckpbus_reg_read(phydev, EN8811H_HWTRAP1, &pbus_value);
+ if (ret < 0)
+ return ret;
+
+ return (pbus_value & EN8811H_HWTRAP1_CKO) ? 50000000 : 25000000;
+}
+
+static int en8811h_clk_enable(struct clk_hw *hw)
+{
+ struct en8811h_priv *priv = clk_hw_to_en8811h_priv(hw);
+ struct phy_device *phydev = priv->phydev;
+
+ return air_buckpbus_reg_modify(phydev, EN8811H_CLK_CGM,
+ EN8811H_CLK_CGM_CKO,
+ EN8811H_CLK_CGM_CKO);
+}
+
+static void en8811h_clk_disable(struct clk_hw *hw)
+{
+ struct en8811h_priv *priv = clk_hw_to_en8811h_priv(hw);
+ struct phy_device *phydev = priv->phydev;
+
+ air_buckpbus_reg_modify(phydev, EN8811H_CLK_CGM,
+ EN8811H_CLK_CGM_CKO, 0);
+}
+
+static int en8811h_clk_is_enabled(struct clk_hw *hw)
+{
+ struct en8811h_priv *priv = clk_hw_to_en8811h_priv(hw);
+ struct phy_device *phydev = priv->phydev;
+ u32 pbus_value;
+ int ret;
+
+ ret = air_buckpbus_reg_read(phydev, EN8811H_CLK_CGM, &pbus_value);
+ if (ret < 0)
+ return ret;
+
+ return (pbus_value & EN8811H_CLK_CGM_CKO);
+}
+
+static const struct clk_ops en8811h_clk_ops = {
+ .recalc_rate = en8811h_clk_recalc_rate,
+ .enable = en8811h_clk_enable,
+ .disable = en8811h_clk_disable,
+ .is_enabled = en8811h_clk_is_enabled,
+};
+
+static int en8811h_clk_provider_setup(struct device *dev, struct clk_hw *hw)
+{
+ struct clk_init_data init;
+ int ret;
+
+ if (!IS_ENABLED(CONFIG_COMMON_CLK))
+ return 0;
+
+ init.name = devm_kasprintf(dev, GFP_KERNEL, "%s-cko",
+ fwnode_get_name(dev_fwnode(dev)));
+ if (!init.name)
+ return -ENOMEM;
+
+ init.ops = &en8811h_clk_ops;
+ init.flags = 0;
+ init.num_parents = 0;
+ hw->init = &init;
+
+ ret = devm_clk_hw_register(dev, hw);
+ if (ret)
+ return ret;
+
+ return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw);
+}
+
static int en8811h_probe(struct phy_device *phydev)
{
struct en8811h_priv *priv;
@@ -838,6 +929,12 @@ static int en8811h_probe(struct phy_device *phydev)
return ret;
}
+ priv->phydev = phydev;
+ /* Co-Clock Output */
+ ret = en8811h_clk_provider_setup(&phydev->mdio.dev, &priv->hw);
+ if (ret)
+ return ret;
+
/* Configure led gpio pins as output */
ret = air_buckpbus_reg_modify(phydev, EN8811H_GPIO_OUTPUT,
EN8811H_GPIO_OUTPUT_345,
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 85e231451093..daab555721df 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -478,13 +478,6 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp,
switch (rq->type) {
case PTP_CLK_REQ_EXTTS:
- /* Reject requests with unsupported flags */
- if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_FALLING_EDGE |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
/* Reject requests to enable time stamping on both edges. */
if ((rq->extts.flags & PTP_STRICT_FLAGS) &&
(rq->extts.flags & PTP_ENABLE_FEATURE) &&
@@ -513,9 +506,6 @@ static int ptp_dp83640_enable(struct ptp_clock_info *ptp,
return 0;
case PTP_CLK_REQ_PEROUT:
- /* Reject requests with unsupported flags */
- if (rq->perout.flags)
- return -EOPNOTSUPP;
if (rq->perout.index >= N_PER_OUT)
return -EINVAL;
return periodic_output(clock, rq, on, rq->perout.index);
@@ -1002,6 +992,9 @@ static void dp83640_clock_init(struct dp83640_clock *clock, struct mii_bus *bus)
clock->caps.n_per_out = N_PER_OUT;
clock->caps.n_pins = DP83640_N_PINS;
clock->caps.pps = 0;
+ clock->caps.supported_extts_flags = PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS;
clock->caps.adjfine = ptp_dp83640_adjfine;
clock->caps.adjtime = ptp_dp83640_adjtime;
clock->caps.gettime64 = ptp_dp83640_gettime;
diff --git a/drivers/net/phy/mediatek/Kconfig b/drivers/net/phy/mediatek/Kconfig
index 2a8ac5aed0f8..4308002bb82c 100644
--- a/drivers/net/phy/mediatek/Kconfig
+++ b/drivers/net/phy/mediatek/Kconfig
@@ -15,8 +15,9 @@ config MEDIATEK_GE_PHY
config MEDIATEK_GE_SOC_PHY
tristate "MediaTek SoC Ethernet PHYs"
- depends on (ARM64 && ARCH_MEDIATEK) || COMPILE_TEST
- depends on NVMEM_MTK_EFUSE
+ depends on ARM64 || COMPILE_TEST
+ depends on ARCH_AIROHA || (ARCH_MEDIATEK && NVMEM_MTK_EFUSE) || \
+ COMPILE_TEST
select MTK_NET_PHYLIB
help
Supports MediaTek SoC built-in Gigabit Ethernet PHYs.
diff --git a/drivers/net/phy/mediatek/mtk-ge-soc.c b/drivers/net/phy/mediatek/mtk-ge-soc.c
index 175cf5239bba..cd09684780a4 100644
--- a/drivers/net/phy/mediatek/mtk-ge-soc.c
+++ b/drivers/net/phy/mediatek/mtk-ge-soc.c
@@ -11,8 +11,11 @@
#include "../phylib.h"
#include "mtk.h"
+#define MTK_PHY_MAX_LEDS 2
+
#define MTK_GPHY_ID_MT7981 0x03a29461
#define MTK_GPHY_ID_MT7988 0x03a29481
+#define MTK_GPHY_ID_AN7581 0x03a294c1
#define MTK_EXT_PAGE_ACCESS 0x1f
#define MTK_PHY_PAGE_STANDARD 0x0000
@@ -1406,6 +1409,52 @@ static int mt7981_phy_probe(struct phy_device *phydev)
return mt798x_phy_calibration(phydev);
}
+static int an7581_phy_probe(struct phy_device *phydev)
+{
+ struct mtk_socphy_priv *priv;
+ struct pinctrl *pinctrl;
+
+ /* Toggle pinctrl to enable PHY LED */
+ pinctrl = devm_pinctrl_get_select(&phydev->mdio.dev, "gbe-led");
+ if (IS_ERR(pinctrl))
+ dev_err(&phydev->mdio.bus->dev,
+ "Failed to setup PHY LED pinctrl\n");
+
+ priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ phydev->priv = priv;
+
+ return 0;
+}
+
+static int an7581_phy_led_polarity_set(struct phy_device *phydev, int index,
+ unsigned long modes)
+{
+ u16 val = 0;
+ u32 mode;
+
+ if (index >= MTK_PHY_MAX_LEDS)
+ return -EINVAL;
+
+ for_each_set_bit(mode, &modes, __PHY_LED_MODES_NUM) {
+ switch (mode) {
+ case PHY_LED_ACTIVE_LOW:
+ val = MTK_PHY_LED_ON_POLARITY;
+ break;
+ case PHY_LED_ACTIVE_HIGH:
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return phy_modify_mmd(phydev, MDIO_MMD_VEND2, index ?
+ MTK_PHY_LED1_ON_CTRL : MTK_PHY_LED0_ON_CTRL,
+ MTK_PHY_LED_ON_POLARITY, val);
+}
+
static struct phy_driver mtk_socphy_driver[] = {
{
PHY_ID_MATCH_EXACT(MTK_GPHY_ID_MT7981),
@@ -1441,6 +1490,17 @@ static struct phy_driver mtk_socphy_driver[] = {
.led_hw_control_set = mt798x_phy_led_hw_control_set,
.led_hw_control_get = mt798x_phy_led_hw_control_get,
},
+ {
+ PHY_ID_MATCH_EXACT(MTK_GPHY_ID_AN7581),
+ .name = "Airoha AN7581 PHY",
+ .probe = an7581_phy_probe,
+ .led_blink_set = mt798x_phy_led_blink_set,
+ .led_brightness_set = mt798x_phy_led_brightness_set,
+ .led_hw_is_supported = mt798x_phy_led_hw_is_supported,
+ .led_hw_control_set = mt798x_phy_led_hw_control_set,
+ .led_hw_control_get = mt798x_phy_led_hw_control_get,
+ .led_polarity_set = an7581_phy_led_polarity_set,
+ },
};
module_phy_driver(mtk_socphy_driver);
@@ -1448,6 +1508,7 @@ module_phy_driver(mtk_socphy_driver);
static const struct mdio_device_id __maybe_unused mtk_socphy_tbl[] = {
{ PHY_ID_MATCH_EXACT(MTK_GPHY_ID_MT7981) },
{ PHY_ID_MATCH_EXACT(MTK_GPHY_ID_MT7988) },
+ { PHY_ID_MATCH_EXACT(MTK_GPHY_ID_AN7581) },
{ }
};
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 24882d30f685..71fb4410c31b 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -3236,10 +3236,6 @@ static int lan8814_ptp_perout(struct ptp_clock_info *ptpci,
int pulse_width;
int pin, event;
- /* Reject requests with unsupported flags */
- if (rq->perout.flags & ~PTP_PEROUT_DUTY_CYCLE)
- return -EOPNOTSUPP;
-
mutex_lock(&shared->shared_lock);
event = rq->perout.index;
pin = ptp_find_pin(shared->ptp_clock, PTP_PF_PEROUT, event);
@@ -3406,11 +3402,6 @@ static int lan8814_ptp_extts(struct ptp_clock_info *ptpci,
struct phy_device *phydev = shared->phydev;
int pin;
- if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
- PTP_EXTTS_EDGES |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
pin = ptp_find_pin(shared->ptp_clock, PTP_PF_EXTTS,
rq->extts.index);
if (pin == -1 || pin != LAN8814_PTP_EXTTS_NUM)
@@ -3917,6 +3908,10 @@ static int lan8814_ptp_probe_once(struct phy_device *phydev)
shared->ptp_clock_info.n_ext_ts = LAN8814_PTP_EXTTS_NUM;
shared->ptp_clock_info.n_pins = LAN8814_PTP_GPIO_NUM;
shared->ptp_clock_info.pps = 0;
+ shared->ptp_clock_info.supported_extts_flags = PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS;
+ shared->ptp_clock_info.supported_perout_flags = PTP_PEROUT_DUTY_CYCLE;
shared->ptp_clock_info.pin_config = shared->pin_config;
shared->ptp_clock_info.n_per_out = LAN8814_PTP_PEROUT_NUM;
shared->ptp_clock_info.adjfine = lan8814_ptpci_adjfine;
@@ -5068,9 +5063,6 @@ static int lan8841_ptp_perout(struct ptp_clock_info *ptp,
int pin;
int ret;
- if (rq->perout.flags & ~PTP_PEROUT_DUTY_CYCLE)
- return -EOPNOTSUPP;
-
pin = ptp_find_pin(ptp_priv->ptp_clock, PTP_PF_PEROUT, rq->perout.index);
if (pin == -1 || pin >= LAN8841_PTP_GPIO_NUM)
return -EINVAL;
@@ -5314,6 +5306,7 @@ static struct ptp_clock_info lan8841_ptp_clock_info = {
.n_per_out = LAN8841_PTP_GPIO_NUM,
.n_ext_ts = LAN8841_PTP_GPIO_NUM,
.n_pins = LAN8841_PTP_GPIO_NUM,
+ .supported_perout_flags = PTP_PEROUT_DUTY_CYCLE,
};
#define LAN8841_OPERATION_MODE_STRAP_LOW_REGISTER 3
diff --git a/drivers/net/phy/microchip_rds_ptp.c b/drivers/net/phy/microchip_rds_ptp.c
index 3e6bf10cdeed..e6514ce04c29 100644
--- a/drivers/net/phy/microchip_rds_ptp.c
+++ b/drivers/net/phy/microchip_rds_ptp.c
@@ -224,10 +224,6 @@ static int mchp_rds_ptp_perout(struct ptp_clock_info *ptpci,
struct phy_device *phydev = clock->phydev;
int ret, event_pin, pulsewidth;
- /* Reject requests with unsupported flags */
- if (perout->flags & ~PTP_PEROUT_DUTY_CYCLE)
- return -EOPNOTSUPP;
-
event_pin = ptp_find_pin(clock->ptp_clock, PTP_PF_PEROUT,
perout->index);
if (event_pin != clock->event_pin)
@@ -1259,6 +1255,7 @@ struct mchp_rds_ptp_clock *mchp_rds_ptp_probe(struct phy_device *phydev, u8 mmd,
clock->caps.pps = 0;
clock->caps.n_pins = MCHP_RDS_PTP_N_PIN;
clock->caps.n_per_out = MCHP_RDS_PTP_N_PEROUT;
+ clock->caps.supported_perout_flags = PTP_PEROUT_DUTY_CYCLE;
clock->caps.pin_config = clock->pin_config;
clock->caps.adjfine = mchp_rds_ptp_ltc_adjfine;
clock->caps.adjtime = mchp_rds_ptp_ltc_adjtime;
diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index 250a018d5546..f11dd32494c3 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -763,9 +763,6 @@ static int nxp_c45_perout_enable(struct nxp_c45_phy *priv,
struct phy_device *phydev = priv->phydev;
int pin;
- if (perout->flags & ~PTP_PEROUT_PHASE)
- return -EOPNOTSUPP;
-
pin = ptp_find_pin(priv->ptp_clock, PTP_PF_PEROUT, perout->index);
if (pin < 0)
return pin;
@@ -861,12 +858,6 @@ static int nxp_c45_extts_enable(struct nxp_c45_phy *priv,
const struct nxp_c45_phy_data *data = nxp_c45_get_data(priv->phydev);
int pin;
- if (extts->flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_FALLING_EDGE |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
/* Sampling on both edges is not supported */
if ((extts->flags & PTP_RISING_EDGE) &&
(extts->flags & PTP_FALLING_EDGE) &&
@@ -962,6 +953,10 @@ static int nxp_c45_init_ptp_clock(struct nxp_c45_phy *priv)
.n_pins = ARRAY_SIZE(nxp_c45_ptp_pins),
.n_ext_ts = 1,
.n_per_out = 1,
+ .supported_extts_flags = PTP_RISING_EDGE |
+ PTP_FALLING_EDGE |
+ PTP_STRICT_FLAGS,
+ .supported_perout_flags = PTP_PEROUT_PHASE,
};
priv->ptp_clock = ptp_clock_register(&priv->caps,
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index cc1bfd22fb81..cc6c209fe702 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -3236,18 +3236,6 @@ struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode)
EXPORT_SYMBOL(fwnode_phy_find_device);
/**
- * device_phy_find_device - For the given device, get the phy_device
- * @dev: Pointer to the given device
- *
- * Refer return conditions of fwnode_phy_find_device().
- */
-struct phy_device *device_phy_find_device(struct device *dev)
-{
- return fwnode_phy_find_device(dev_fwnode(dev));
-}
-EXPORT_SYMBOL_GPL(device_phy_find_device);
-
-/**
* fwnode_get_phy_node - Get the phy_node using the named reference.
* @fwnode: Pointer to fwnode from which phy_node has to be obtained.
*
diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h
index 74162190bccc..8531b804021a 100644
--- a/drivers/net/usb/asix.h
+++ b/drivers/net/usb/asix.h
@@ -224,7 +224,6 @@ int asix_write_rx_ctl(struct usbnet *dev, u16 mode, int in_pm);
u16 asix_read_medium_status(struct usbnet *dev, int in_pm);
int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm);
-void asix_adjust_link(struct net_device *netdev);
int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm);
diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index 72ffc89b477a..7fd763917ae2 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -414,28 +414,6 @@ int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm)
return ret;
}
-/* set MAC link settings according to information from phylib */
-void asix_adjust_link(struct net_device *netdev)
-{
- struct phy_device *phydev = netdev->phydev;
- struct usbnet *dev = netdev_priv(netdev);
- u16 mode = 0;
-
- if (phydev->link) {
- mode = AX88772_MEDIUM_DEFAULT;
-
- if (phydev->duplex == DUPLEX_HALF)
- mode &= ~AX_MEDIUM_FD;
-
- if (phydev->speed != SPEED_100)
- mode &= ~AX_MEDIUM_PS;
- }
-
- asix_write_medium_mode(dev, mode, 0);
- phy_print_status(phydev);
- usbnet_link_change(dev, phydev->link, 0);
-}
-
int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm)
{
int ret;
diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c
index da24941a6e44..9b0318fb50b5 100644
--- a/drivers/net/usb/asix_devices.c
+++ b/drivers/net/usb/asix_devices.c
@@ -752,7 +752,6 @@ static void ax88772_mac_link_down(struct phylink_config *config,
struct usbnet *dev = netdev_priv(to_net_dev(config->dev));
asix_write_medium_mode(dev, 0, 0);
- usbnet_link_change(dev, false, false);
}
static void ax88772_mac_link_up(struct phylink_config *config,
@@ -783,7 +782,6 @@ static void ax88772_mac_link_up(struct phylink_config *config,
m |= AX_MEDIUM_RFC;
asix_write_medium_mode(dev, m, 0);
- usbnet_link_change(dev, true, false);
}
static const struct phylink_mac_ops ax88772_phylink_mac_ops = {
@@ -1350,10 +1348,9 @@ static const struct driver_info ax88772_info = {
.description = "ASIX AX88772 USB 2.0 Ethernet",
.bind = ax88772_bind,
.unbind = ax88772_unbind,
- .status = asix_status,
.reset = ax88772_reset,
.stop = ax88772_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | FLAG_MULTI_PACKET,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_MULTI_PACKET,
.rx_fixup = asix_rx_fixup_common,
.tx_fixup = asix_tx_fixup,
};
@@ -1362,11 +1359,9 @@ static const struct driver_info ax88772b_info = {
.description = "ASIX AX88772B USB 2.0 Ethernet",
.bind = ax88772_bind,
.unbind = ax88772_unbind,
- .status = asix_status,
.reset = ax88772_reset,
.stop = ax88772_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR |
- FLAG_MULTI_PACKET,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_MULTI_PACKET,
.rx_fixup = asix_rx_fixup_common,
.tx_fixup = asix_tx_fixup,
.data = FLAG_EEPROM_MAC,
@@ -1376,11 +1371,9 @@ static const struct driver_info lxausb_t1l_info = {
.description = "Linux Automation GmbH USB 10Base-T1L",
.bind = ax88772_bind,
.unbind = ax88772_unbind,
- .status = asix_status,
.reset = ax88772_reset,
.stop = ax88772_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR |
- FLAG_MULTI_PACKET,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_MULTI_PACKET,
.rx_fixup = asix_rx_fixup_common,
.tx_fixup = asix_tx_fixup,
.data = FLAG_EEPROM_MAC,
@@ -1412,10 +1405,8 @@ static const struct driver_info hg20f9_info = {
.description = "HG20F9 USB 2.0 Ethernet",
.bind = ax88772_bind,
.unbind = ax88772_unbind,
- .status = asix_status,
.reset = ax88772_reset,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR |
- FLAG_MULTI_PACKET,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_MULTI_PACKET,
.rx_fixup = asix_rx_fixup_common,
.tx_fixup = asix_tx_fixup,
.data = FLAG_EEPROM_MAC,
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 9ccc3f09f71b..56aee539c235 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -4966,19 +4966,15 @@ static void __net_exit vxlan_destroy_tunnels(struct vxlan_net *vn,
vxlan_dellink(vxlan->dev, dev_to_kill);
}
-static void __net_exit vxlan_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
+static void __net_exit vxlan_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
{
- struct net *net;
-
- ASSERT_RTNL();
- list_for_each_entry(net, net_list, exit_list) {
- struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
- __unregister_nexthop_notifier(net, &vn->nexthop_notifier_block);
+ ASSERT_RTNL_NET(net);
- vxlan_destroy_tunnels(vn, dev_to_kill);
- }
+ __unregister_nexthop_notifier(net, &vn->nexthop_notifier_block);
+ vxlan_destroy_tunnels(vn, dev_to_kill);
}
static void __net_exit vxlan_exit_net(struct net *net)
@@ -4992,7 +4988,7 @@ static void __net_exit vxlan_exit_net(struct net *net)
static struct pernet_operations vxlan_net_ops = {
.init = vxlan_init_net,
- .exit_batch_rtnl = vxlan_exit_batch_rtnl,
+ .exit_rtnl = vxlan_exit_rtnl,
.exit = vxlan_exit_net,
.id = &vxlan_net_id,
.size = sizeof(struct vxlan_net),
diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c
index 6e6e9f05509a..d0753776d339 100644
--- a/drivers/net/vxlan/vxlan_vnifilter.c
+++ b/drivers/net/vxlan/vxlan_vnifilter.c
@@ -411,13 +411,12 @@ static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb
struct tunnel_msg *tmsg;
struct net_device *dev;
- if (cb->nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct tunnel_msg))) {
+ tmsg = nlmsg_payload(cb->nlh, sizeof(*tmsg));
+ if (!tmsg) {
NL_SET_ERR_MSG(cb->extack, "Invalid msg length");
return -EINVAL;
}
- tmsg = nlmsg_data(cb->nlh);
-
if (tmsg->flags & ~TUNNEL_MSG_VALID_USER_FLAGS) {
NL_SET_ERR_MSG(cb->extack, "Invalid tunnelmsg flags in ancillary header");
return -EINVAL;
diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 4380e6ddb849..4bf421765d03 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -162,6 +162,7 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd,
{
struct ptp_clock *ptp =
container_of(pccontext->clk, struct ptp_clock, clock);
+ unsigned int i, pin_index, supported_extts_flags;
struct ptp_sys_offset_extended *extoff = NULL;
struct ptp_sys_offset_precise precise_offset;
struct system_device_crosststamp xtstamp;
@@ -172,7 +173,6 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd,
struct ptp_clock_request req;
struct ptp_clock_caps caps;
struct ptp_clock_time *pct;
- unsigned int i, pin_index;
struct ptp_pin_desc pd;
struct timespec64 ts;
int enable, err = 0;
@@ -240,6 +240,18 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd,
err = -EINVAL;
break;
}
+ supported_extts_flags = ptp->info->supported_extts_flags;
+ /* The PTP_ENABLE_FEATURE flag is always supported. */
+ supported_extts_flags |= PTP_ENABLE_FEATURE;
+ /* If the driver does not support strictly checking flags, the
+ * PTP_RISING_EDGE and PTP_FALLING_EDGE flags are merely
+ * hints which are not enforced.
+ */
+ if (!(supported_extts_flags & PTP_STRICT_FLAGS))
+ supported_extts_flags |= PTP_EXTTS_EDGES;
+ /* Reject unsupported flags */
+ if (req.extts.flags & ~supported_extts_flags)
+ return -EOPNOTSUPP;
req.type = PTP_CLK_REQ_EXTTS;
enable = req.extts.flags & PTP_ENABLE_FEATURE ? 1 : 0;
if (mutex_lock_interruptible(&ptp->pincfg_mux))
@@ -312,6 +324,8 @@ long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd,
err = -EINVAL;
break;
}
+ if (req.perout.flags & ~ptp->info->supported_perout_flags)
+ return -EOPNOTSUPP;
req.type = PTP_CLK_REQ_PEROUT;
enable = req.perout.period.sec || req.perout.period.nsec;
if (mutex_lock_interruptible(&ptp->pincfg_mux))
diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c
index fbb3fa8fc60b..b8d4df8c6da2 100644
--- a/drivers/ptp/ptp_clockmatrix.c
+++ b/drivers/ptp/ptp_clockmatrix.c
@@ -283,18 +283,6 @@ static int idtcm_extts_enable(struct idtcm_channel *channel,
idtcm = channel->idtcm;
old_mask = idtcm->extts_mask;
- /* Reject requests with unsupported flags */
- if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_FALLING_EDGE |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
- /* Reject requests to enable time stamping on falling edge */
- if ((rq->extts.flags & PTP_ENABLE_FEATURE) &&
- (rq->extts.flags & PTP_FALLING_EDGE))
- return -EOPNOTSUPP;
-
if (index >= MAX_TOD)
return -EINVAL;
@@ -2043,6 +2031,7 @@ static const struct ptp_clock_info idtcm_caps = {
.n_per_out = 12,
.n_ext_ts = MAX_TOD,
.n_pins = MAX_REF_CLK,
+ .supported_extts_flags = PTP_RISING_EDGE | PTP_STRICT_FLAGS,
.adjphase = &idtcm_adjphase,
.getmaxphase = &idtcm_getmaxphase,
.adjfine = &idtcm_adjfine,
@@ -2060,6 +2049,7 @@ static const struct ptp_clock_info idtcm_caps_deprecated = {
.n_per_out = 12,
.n_ext_ts = MAX_TOD,
.n_pins = MAX_REF_CLK,
+ .supported_extts_flags = PTP_RISING_EDGE | PTP_STRICT_FLAGS,
.adjphase = &idtcm_adjphase,
.getmaxphase = &idtcm_getmaxphase,
.adjfine = &idtcm_adjfine,
diff --git a/drivers/ptp/ptp_fc3.c b/drivers/ptp/ptp_fc3.c
index cfced36c70bc..70002500170e 100644
--- a/drivers/ptp/ptp_fc3.c
+++ b/drivers/ptp/ptp_fc3.c
@@ -592,6 +592,7 @@ static const struct ptp_clock_info idtfc3_caps = {
.max_adj = MAX_FFO_PPB,
.n_per_out = 1,
.n_ext_ts = 1,
+ .supported_extts_flags = PTP_STRICT_FLAGS | PTP_EXT_OFFSET,
.adjphase = &idtfc3_adjphase,
.adjfine = &idtfc3_adjfine,
.adjtime = &idtfc3_adjtime,
diff --git a/drivers/ptp/ptp_idt82p33.c b/drivers/ptp/ptp_idt82p33.c
index b2fd94d4f863..f01c50dfa44e 100644
--- a/drivers/ptp/ptp_idt82p33.c
+++ b/drivers/ptp/ptp_idt82p33.c
@@ -246,18 +246,6 @@ static int idt82p33_extts_enable(struct idt82p33_channel *channel,
idt82p33 = channel->idt82p33;
old_mask = idt82p33->extts_mask;
- /* Reject requests with unsupported flags */
- if (rq->extts.flags & ~(PTP_ENABLE_FEATURE |
- PTP_RISING_EDGE |
- PTP_FALLING_EDGE |
- PTP_STRICT_FLAGS))
- return -EOPNOTSUPP;
-
- /* Reject requests to enable time stamping on falling edge */
- if ((rq->extts.flags & PTP_ENABLE_FEATURE) &&
- (rq->extts.flags & PTP_FALLING_EDGE))
- return -EOPNOTSUPP;
-
if (index >= MAX_PHC_PLL)
return -EINVAL;
@@ -1187,6 +1175,9 @@ static void idt82p33_caps_init(u32 index, struct ptp_clock_info *caps,
caps->pin_config = pin_cfg;
+ caps->supported_extts_flags = PTP_RISING_EDGE |
+ PTP_STRICT_FLAGS;
+
for (i = 0; i < max_pins; ++i) {
ppd = &pin_cfg[i];
diff --git a/fs/afs/Kconfig b/fs/afs/Kconfig
index fc8ba9142f2f..682bd8ec2c10 100644
--- a/fs/afs/Kconfig
+++ b/fs/afs/Kconfig
@@ -5,6 +5,7 @@ config AFS_FS
select AF_RXRPC
select DNS_RESOLVER
select NETFS_SUPPORT
+ select CRYPTO_KRB5
help
If you say Y here, you will get an experimental Andrew File System
driver. It currently only supports unsecured read-only AFS access.
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index 5efd7e13b304..b49b8fe682f3 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -8,6 +8,7 @@ kafs-y := \
addr_prefs.o \
callback.o \
cell.o \
+ cm_security.o \
cmservice.o \
dir.o \
dir_edit.o \
diff --git a/fs/afs/cm_security.c b/fs/afs/cm_security.c
new file mode 100644
index 000000000000..edcbd249d202
--- /dev/null
+++ b/fs/afs/cm_security.c
@@ -0,0 +1,340 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Cache manager security.
+ *
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/slab.h>
+#include <crypto/krb5.h>
+#include "internal.h"
+#include "afs_cm.h"
+#include "afs_fs.h"
+#include "protocol_yfs.h"
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
+
+#define RXGK_SERVER_ENC_TOKEN 1036U // 0x40c
+#define xdr_round_up(x) (round_up((x), sizeof(__be32)))
+#define xdr_len_object(x) (4 + round_up((x), sizeof(__be32)))
+
+#ifdef CONFIG_RXGK
+static int afs_create_yfs_cm_token(struct sk_buff *challenge,
+ struct afs_server *server);
+#endif
+
+/*
+ * Respond to an RxGK challenge, adding appdata.
+ */
+static int afs_respond_to_challenge(struct sk_buff *challenge)
+{
+#ifdef CONFIG_RXGK
+ struct krb5_buffer appdata = {};
+ struct afs_server *server;
+#endif
+ struct rxrpc_peer *peer;
+ unsigned long peer_data;
+ u16 service_id;
+ u8 security_index;
+
+ rxrpc_kernel_query_challenge(challenge, &peer, &peer_data,
+ &service_id, &security_index);
+
+ _enter("%u,%u", service_id, security_index);
+
+ switch (service_id) {
+ /* We don't send CM_SERVICE RPCs, so don't expect a challenge
+ * therefrom.
+ */
+ case FS_SERVICE:
+ case VL_SERVICE:
+ case YFS_FS_SERVICE:
+ case YFS_VL_SERVICE:
+ break;
+ default:
+ pr_warn("Can't respond to unknown challenge %u:%u",
+ service_id, security_index);
+ return rxrpc_kernel_reject_challenge(challenge, RX_USER_ABORT, -EPROTO,
+ afs_abort_unsupported_sec_class);
+ }
+
+ switch (security_index) {
+#ifdef CONFIG_RXKAD
+ case RXRPC_SECURITY_RXKAD:
+ return rxkad_kernel_respond_to_challenge(challenge);
+#endif
+
+#ifdef CONFIG_RXGK
+ case RXRPC_SECURITY_RXGK:
+ return rxgk_kernel_respond_to_challenge(challenge, &appdata);
+
+ case RXRPC_SECURITY_YFS_RXGK:
+ switch (service_id) {
+ case FS_SERVICE:
+ case YFS_FS_SERVICE:
+ server = (struct afs_server *)peer_data;
+ if (!server->cm_rxgk_appdata.data) {
+ mutex_lock(&server->cm_token_lock);
+ if (!server->cm_rxgk_appdata.data)
+ afs_create_yfs_cm_token(challenge, server);
+ mutex_unlock(&server->cm_token_lock);
+ }
+ if (server->cm_rxgk_appdata.data)
+ appdata = server->cm_rxgk_appdata;
+ break;
+ }
+ return rxgk_kernel_respond_to_challenge(challenge, &appdata);
+#endif
+
+ default:
+ return rxrpc_kernel_reject_challenge(challenge, RX_USER_ABORT, -EPROTO,
+ afs_abort_unsupported_sec_class);
+ }
+}
+
+/*
+ * Process the OOB message queue, processing challenge packets.
+ */
+void afs_process_oob_queue(struct work_struct *work)
+{
+ struct afs_net *net = container_of(work, struct afs_net, rx_oob_work);
+ struct sk_buff *oob;
+ enum rxrpc_oob_type type;
+
+ while ((oob = rxrpc_kernel_dequeue_oob(net->socket, &type))) {
+ switch (type) {
+ case RXRPC_OOB_CHALLENGE:
+ afs_respond_to_challenge(oob);
+ break;
+ }
+ rxrpc_kernel_free_oob(oob);
+ }
+}
+
+#ifdef CONFIG_RXGK
+/*
+ * Create a securities keyring for the cache manager and attach a key to it for
+ * the RxGK tokens we want to use to secure the callback connection back from
+ * the fileserver.
+ */
+int afs_create_token_key(struct afs_net *net, struct socket *socket)
+{
+ const struct krb5_enctype *krb5;
+ struct key *ring;
+ key_ref_t key;
+ char K0[32], *desc;
+ int ret;
+
+ ring = keyring_alloc("kafs",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
+ KEY_POS_SEARCH | KEY_POS_WRITE |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH,
+ KEY_ALLOC_NOT_IN_QUOTA,
+ NULL, NULL);
+ if (IS_ERR(ring))
+ return PTR_ERR(ring);
+
+ ret = rxrpc_sock_set_security_keyring(socket->sk, ring);
+ if (ret < 0)
+ goto out;
+
+ ret = -ENOPKG;
+ krb5 = crypto_krb5_find_enctype(KRB5_ENCTYPE_AES128_CTS_HMAC_SHA1_96);
+ if (!krb5)
+ goto out;
+
+ if (WARN_ON_ONCE(krb5->key_len > sizeof(K0)))
+ goto out;
+
+ ret = -ENOMEM;
+ desc = kasprintf(GFP_KERNEL, "%u:%u:%u:%u",
+ YFS_CM_SERVICE, RXRPC_SECURITY_YFS_RXGK, 1, krb5->etype);
+ if (!desc)
+ goto out;
+
+ wait_for_random_bytes();
+ get_random_bytes(K0, krb5->key_len);
+
+ key = key_create(make_key_ref(ring, true),
+ "rxrpc_s", desc,
+ K0, krb5->key_len,
+ KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | KEY_USR_VIEW,
+ KEY_ALLOC_NOT_IN_QUOTA);
+ kfree(desc);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto out;
+ }
+
+ net->fs_cm_token_key = key_ref_to_ptr(key);
+ ret = 0;
+out:
+ key_put(ring);
+ return ret;
+}
+
+/*
+ * Create an YFS RxGK GSS token to use as a ticket to the specified fileserver.
+ */
+static int afs_create_yfs_cm_token(struct sk_buff *challenge,
+ struct afs_server *server)
+{
+ const struct krb5_enctype *conn_krb5, *token_krb5;
+ const struct krb5_buffer *token_key;
+ struct crypto_aead *aead;
+ struct scatterlist sg;
+ struct afs_net *net = server->cell->net;
+ const struct key *key = net->fs_cm_token_key;
+ size_t keysize, uuidsize, authsize, toksize, encsize, contsize, adatasize, offset;
+ __be32 caps[1] = {
+ [0] = htonl(AFS_CAP_ERROR_TRANSLATION),
+ };
+ __be32 *xdr;
+ void *appdata, *K0, *encbase;
+ u32 enctype;
+ int ret;
+
+ if (!key)
+ return -ENOKEY;
+
+ /* Assume that the fileserver is happy to use the same encoding type as
+ * we were told to use by the token obtained by the user.
+ */
+ enctype = rxgk_kernel_query_challenge(challenge);
+
+ conn_krb5 = crypto_krb5_find_enctype(enctype);
+ if (!conn_krb5)
+ return -ENOPKG;
+ token_krb5 = key->payload.data[0];
+ token_key = (const struct krb5_buffer *)&key->payload.data[2];
+
+ /* struct rxgk_key {
+ * afs_uint32 enctype;
+ * opaque key<>;
+ * };
+ */
+ keysize = 4 + xdr_len_object(conn_krb5->key_len);
+
+ /* struct RXGK_AuthName {
+ * afs_int32 kind;
+ * opaque data<AUTHDATAMAX>;
+ * opaque display<AUTHPRINTABLEMAX>;
+ * };
+ */
+ uuidsize = sizeof(server->uuid);
+ authsize = 4 + xdr_len_object(uuidsize) + xdr_len_object(0);
+
+ /* struct RXGK_Token {
+ * rxgk_key K0;
+ * RXGK_Level level;
+ * rxgkTime starttime;
+ * afs_int32 lifetime;
+ * afs_int32 bytelife;
+ * rxgkTime expirationtime;
+ * struct RXGK_AuthName identities<>;
+ * };
+ */
+ toksize = keysize + 8 + 4 + 4 + 8 + xdr_len_object(authsize);
+
+ offset = 0;
+ encsize = crypto_krb5_how_much_buffer(token_krb5, KRB5_ENCRYPT_MODE, toksize, &offset);
+
+ /* struct RXGK_TokenContainer {
+ * afs_int32 kvno;
+ * afs_int32 enctype;
+ * opaque encrypted_token<>;
+ * };
+ */
+ contsize = 4 + 4 + xdr_len_object(encsize);
+
+ /* struct YFSAppData {
+ * opr_uuid initiatorUuid;
+ * opr_uuid acceptorUuid;
+ * Capabilities caps;
+ * afs_int32 enctype;
+ * opaque callbackKey<>;
+ * opaque callbackToken<>;
+ * };
+ */
+ adatasize = 16 + 16 +
+ xdr_len_object(sizeof(caps)) +
+ 4 +
+ xdr_len_object(conn_krb5->key_len) +
+ xdr_len_object(contsize);
+
+ ret = -ENOMEM;
+ appdata = kzalloc(adatasize, GFP_KERNEL);
+ if (!appdata)
+ goto out;
+ xdr = appdata;
+
+ memcpy(xdr, &net->uuid, 16); /* appdata.initiatorUuid */
+ xdr += 16 / 4;
+ memcpy(xdr, &server->uuid, 16); /* appdata.acceptorUuid */
+ xdr += 16 / 4;
+ *xdr++ = htonl(ARRAY_SIZE(caps)); /* appdata.caps.len */
+ memcpy(xdr, &caps, sizeof(caps)); /* appdata.caps */
+ xdr += ARRAY_SIZE(caps);
+ *xdr++ = htonl(conn_krb5->etype); /* appdata.enctype */
+
+ *xdr++ = htonl(conn_krb5->key_len); /* appdata.callbackKey.len */
+ K0 = xdr;
+ get_random_bytes(K0, conn_krb5->key_len); /* appdata.callbackKey.data */
+ xdr += xdr_round_up(conn_krb5->key_len) / 4;
+
+ *xdr++ = htonl(contsize); /* appdata.callbackToken.len */
+ *xdr++ = htonl(1); /* cont.kvno */
+ *xdr++ = htonl(token_krb5->etype); /* cont.enctype */
+ *xdr++ = htonl(encsize); /* cont.encrypted_token.len */
+
+ encbase = xdr;
+ xdr += offset / 4;
+ *xdr++ = htonl(conn_krb5->etype); /* token.K0.enctype */
+ *xdr++ = htonl(conn_krb5->key_len); /* token.K0.key.len */
+ memcpy(xdr, K0, conn_krb5->key_len); /* token.K0.key.data */
+ xdr += xdr_round_up(conn_krb5->key_len) / 4;
+
+ *xdr++ = htonl(RXRPC_SECURITY_ENCRYPT); /* token.level */
+ *xdr++ = htonl(0); /* token.starttime */
+ *xdr++ = htonl(0); /* " */
+ *xdr++ = htonl(0); /* token.lifetime */
+ *xdr++ = htonl(0); /* token.bytelife */
+ *xdr++ = htonl(0); /* token.expirationtime */
+ *xdr++ = htonl(0); /* " */
+ *xdr++ = htonl(1); /* token.identities.count */
+ *xdr++ = htonl(0); /* token.identities[0].kind */
+ *xdr++ = htonl(uuidsize); /* token.identities[0].data.len */
+ memcpy(xdr, &server->uuid, uuidsize);
+ xdr += xdr_round_up(uuidsize) / 4;
+ *xdr++ = htonl(0); /* token.identities[0].display.len */
+
+ xdr = encbase + xdr_round_up(encsize);
+
+ if ((unsigned long)xdr - (unsigned long)appdata != adatasize)
+ pr_err("Appdata size incorrect %lx != %zx\n",
+ (unsigned long)xdr - (unsigned long)appdata, adatasize);
+
+ aead = crypto_krb5_prepare_encryption(token_krb5, token_key, RXGK_SERVER_ENC_TOKEN,
+ GFP_KERNEL);
+ if (IS_ERR(aead)) {
+ ret = PTR_ERR(aead);
+ goto out_token;
+ }
+
+ sg_init_one(&sg, encbase, encsize);
+ ret = crypto_krb5_encrypt(token_krb5, aead, &sg, 1, encsize, offset, toksize, false);
+ if (ret < 0)
+ goto out_aead;
+
+ server->cm_rxgk_appdata.len = adatasize;
+ server->cm_rxgk_appdata.data = appdata;
+ appdata = NULL;
+
+out_aead:
+ crypto_free_aead(aead);
+out_token:
+ kfree(appdata);
+out:
+ return ret;
+}
+#endif /* CONFIG_RXGK */
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 440b0e731093..1124ea4000cb 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -20,6 +20,7 @@
#include <linux/uuid.h>
#include <linux/mm_types.h>
#include <linux/dns_resolver.h>
+#include <crypto/krb5.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/sock.h>
@@ -176,8 +177,10 @@ struct afs_call {
bool intr; /* T if interruptible */
bool unmarshalling_error; /* T if an unmarshalling error occurred */
bool responded; /* Got a response from the call (may be abort) */
+ u8 security_ix; /* Security class */
u16 service_id; /* Actual service ID (after upgrade) */
unsigned int debug_id; /* Trace ID */
+ u32 enctype; /* Security encoding type */
u32 operation_ID; /* operation ID for an incoming call */
u32 count; /* count for use in unmarshalling */
union { /* place to extract temporary data */
@@ -281,6 +284,7 @@ struct afs_net {
struct socket *socket;
struct afs_call *spare_incoming_call;
struct work_struct charge_preallocation_work;
+ struct work_struct rx_oob_work;
struct mutex socket_mutex;
atomic_t nr_outstanding_calls;
atomic_t nr_superblocks;
@@ -305,6 +309,7 @@ struct afs_net {
struct list_head fs_probe_slow; /* List of afs_server to probe at 5m intervals */
struct hlist_head fs_proc; /* procfs servers list */
+ struct key *fs_cm_token_key; /* Key for creating CM tokens */
struct work_struct fs_prober;
struct timer_list fs_probe_timer;
atomic_t servers_outstanding;
@@ -540,6 +545,8 @@ struct afs_server {
struct list_head volumes; /* RCU list of afs_server_entry objects */
struct work_struct destroyer; /* Work item to try and destroy a server */
struct timer_list timer; /* Management timer */
+ struct mutex cm_token_lock; /* Lock governing creation of appdata */
+ struct krb5_buffer cm_rxgk_appdata; /* Appdata to be included in RESPONSE packet */
time64_t unuse_time; /* Time at which last unused */
unsigned long flags;
#define AFS_SERVER_FL_RESPONDING 0 /* The server is responding */
@@ -1059,6 +1066,19 @@ extern void __net_exit afs_cell_purge(struct afs_net *);
extern bool afs_cm_incoming_call(struct afs_call *);
/*
+ * cm_security.c
+ */
+void afs_process_oob_queue(struct work_struct *work);
+#ifdef CONFIG_RXGK
+int afs_create_token_key(struct afs_net *net, struct socket *socket);
+#else
+static inline int afs_create_token_key(struct afs_net *net, struct socket *socket)
+{
+ return 0;
+}
+#endif
+
+/*
* dir.c
*/
extern const struct file_operations afs_dir_file_operations;
diff --git a/fs/afs/main.c b/fs/afs/main.c
index c845c5daaeba..02475d415d88 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -73,6 +73,7 @@ static int __net_init afs_net_init(struct net *net_ns)
generate_random_uuid((unsigned char *)&net->uuid);
INIT_WORK(&net->charge_preallocation_work, afs_charge_preallocation);
+ INIT_WORK(&net->rx_oob_work, afs_process_oob_queue);
mutex_init(&net->socket_mutex);
net->cells = RB_ROOT;
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index b8180bf2281f..8f2b3a177690 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -8,6 +8,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/errno.h>
+#include <crypto/krb5.h>
#include "internal.h"
#include "afs_fs.h"
#include "protocol_uae.h"
@@ -103,6 +104,32 @@ int afs_abort_to_error(u32 abort_code)
case RXKADDATALEN: return -EKEYREJECTED;
case RXKADILLEGALLEVEL: return -EKEYREJECTED;
+ case RXGK_INCONSISTENCY: return -EPROTO;
+ case RXGK_PACKETSHORT: return -EPROTO;
+ case RXGK_BADCHALLENGE: return -EPROTO;
+ case RXGK_SEALEDINCON: return -EKEYREJECTED;
+ case RXGK_NOTAUTH: return -EKEYREJECTED;
+ case RXGK_EXPIRED: return -EKEYEXPIRED;
+ case RXGK_BADLEVEL: return -EKEYREJECTED;
+ case RXGK_BADKEYNO: return -EKEYREJECTED;
+ case RXGK_NOTRXGK: return -EKEYREJECTED;
+ case RXGK_UNSUPPORTED: return -EKEYREJECTED;
+ case RXGK_GSSERROR: return -EKEYREJECTED;
+#ifdef RXGK_BADETYPE
+ case RXGK_BADETYPE: return -ENOPKG;
+#endif
+#ifdef RXGK_BADTOKEN
+ case RXGK_BADTOKEN: return -EKEYREJECTED;
+#endif
+#ifdef RXGK_BADETYPE
+ case RXGK_DATALEN: return -EPROTO;
+#endif
+#ifdef RXGK_BADQOP
+ case RXGK_BADQOP: return -EKEYREJECTED;
+#endif
+
+ case KRB5_PROG_KEYTYPE_NOSUPP: return -ENOPKG;
+
case RXGEN_OPCODE: return -ENOTSUPP;
default: return -EREMOTEIO;
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index d5e480a33859..c1cadf8fb346 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -24,8 +24,17 @@ static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned
static void afs_process_async_call(struct work_struct *);
static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long);
static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long);
+static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID);
+static void afs_rx_notify_oob(struct sock *sk, struct sk_buff *oob);
static int afs_deliver_cm_op_id(struct afs_call *);
+static const struct rxrpc_kernel_ops afs_rxrpc_callback_ops = {
+ .notify_new_call = afs_rx_new_call,
+ .discard_new_call = afs_rx_discard_new_call,
+ .user_attach_call = afs_rx_attach,
+ .notify_oob = afs_rx_notify_oob,
+};
+
/* asynchronous incoming call initial processing */
static const struct afs_call_type afs_RXCMxxxx = {
.name = "CB.xxxx",
@@ -49,6 +58,7 @@ int afs_open_socket(struct afs_net *net)
goto error_1;
socket->sk->sk_allocation = GFP_NOFS;
+ socket->sk->sk_user_data = net;
/* bind the callback manager's address to make this a server socket */
memset(&srx, 0, sizeof(srx));
@@ -64,6 +74,14 @@ int afs_open_socket(struct afs_net *net)
if (ret < 0)
goto error_2;
+ ret = rxrpc_sock_set_manage_response(socket->sk, true);
+ if (ret < 0)
+ goto error_2;
+
+ ret = afs_create_token_key(net, socket);
+ if (ret < 0)
+ pr_err("Couldn't create RxGK CM key: %d\n", ret);
+
ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx));
if (ret == -EADDRINUSE) {
srx.transport.sin6.sin6_port = 0;
@@ -84,8 +102,7 @@ int afs_open_socket(struct afs_net *net)
* it sends back to us.
*/
- rxrpc_kernel_new_call_notification(socket, afs_rx_new_call,
- afs_rx_discard_new_call);
+ rxrpc_kernel_set_notifications(socket, &afs_rxrpc_callback_ops);
ret = kernel_listen(socket, INT_MAX);
if (ret < 0)
@@ -125,7 +142,9 @@ void afs_close_socket(struct afs_net *net)
kernel_sock_shutdown(net->socket, SHUT_RDWR);
flush_workqueue(afs_async_calls);
+ net->socket->sk->sk_user_data = NULL;
sock_release(net->socket);
+ key_put(net->fs_cm_token_key);
_debug("dework");
_leave("");
@@ -738,7 +757,6 @@ void afs_charge_preallocation(struct work_struct *work)
if (rxrpc_kernel_charge_accept(net->socket,
afs_wake_up_async_call,
- afs_rx_attach,
(unsigned long)call,
GFP_KERNEL,
call->debug_id) < 0)
@@ -800,10 +818,14 @@ static int afs_deliver_cm_op_id(struct afs_call *call)
if (!afs_cm_incoming_call(call))
return -ENOTSUPP;
+ call->security_ix = rxrpc_kernel_query_call_security(call->rxcall,
+ &call->service_id,
+ &call->enctype);
+
trace_afs_cb_call(call);
call->work.func = call->type->work;
- /* pass responsibility for the remainer of this message off to the
+ /* pass responsibility for the remainder of this message off to the
* cache manager op */
return call->type->deliver(call);
}
@@ -952,3 +974,13 @@ noinline int afs_protocol_error(struct afs_call *call,
call->unmarshalling_error = true;
return -EBADMSG;
}
+
+/*
+ * Wake up OOB notification processing.
+ */
+static void afs_rx_notify_oob(struct sock *sk, struct sk_buff *oob)
+{
+ struct afs_net *net = sk->sk_user_data;
+
+ schedule_work(&net->rx_oob_work);
+}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 8755f2703815..a97562f831eb 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -131,6 +131,7 @@ static struct afs_server *afs_alloc_server(struct afs_cell *cell, const uuid_t *
timer_setup(&server->timer, afs_server_timer, 0);
INIT_LIST_HEAD(&server->volumes);
init_waitqueue_head(&server->probe_wq);
+ mutex_init(&server->cm_token_lock);
INIT_LIST_HEAD(&server->probe_link);
INIT_HLIST_NODE(&server->proc_link);
spin_lock_init(&server->probe_lock);
@@ -396,6 +397,7 @@ static void afs_server_rcu(struct rcu_head *rcu)
afs_put_endpoint_state(rcu_access_pointer(server->endpoint_state),
afs_estate_trace_put_server);
afs_put_cell(server->cell, afs_cell_trace_put_server);
+ kfree(server->cm_rxgk_appdata.data);
kfree(server);
}
diff --git a/include/crypto/krb5.h b/include/crypto/krb5.h
index 62d998e62f47..71dd38f59be1 100644
--- a/include/crypto/krb5.h
+++ b/include/crypto/krb5.h
@@ -64,6 +64,11 @@ struct scatterlist;
#define KEY_USAGE_SEED_INTEGRITY (0x55)
/*
+ * Standard Kerberos error codes.
+ */
+#define KRB5_PROG_KEYTYPE_NOSUPP -1765328233
+
+/*
* Mode of operation.
*/
enum krb5_crypto_mode {
diff --git a/include/keys/rxrpc-type.h b/include/keys/rxrpc-type.h
index 333c0f49a9cd..0ddbe197a261 100644
--- a/include/keys/rxrpc-type.h
+++ b/include/keys/rxrpc-type.h
@@ -9,6 +9,7 @@
#define _KEYS_RXRPC_TYPE_H
#include <linux/key.h>
+#include <crypto/krb5.h>
/*
* key type for AF_RXRPC keys
@@ -32,6 +33,21 @@ struct rxkad_key {
};
/*
+ * RxRPC key for YFS-RxGK (type-6 security)
+ */
+struct rxgk_key {
+ s64 begintime; /* Time at which the ticket starts */
+ s64 endtime; /* Time at which the ticket ends */
+ u64 lifetime; /* Maximum lifespan of a connection (seconds) */
+ u64 bytelife; /* Maximum number of bytes on a connection */
+ unsigned int enctype; /* Encoding type */
+ s8 level; /* Negotiated security RXRPC_SECURITY_PLAIN/AUTH/ENCRYPT */
+ struct krb5_buffer key; /* Master key, K0 */
+ struct krb5_buffer ticket; /* Ticket to be passed to server */
+ u8 _key[]; /* Key storage */
+};
+
+/*
* list of tokens attached to an rxrpc key
*/
struct rxrpc_key_token {
@@ -40,6 +56,7 @@ struct rxrpc_key_token {
struct rxrpc_key_token *next; /* the next token in the list */
union {
struct rxkad_key *kad;
+ struct rxgk_key *rxgk;
};
};
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 325af611909f..0b61b8b996d4 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -2,79 +2,8 @@
#ifndef _LINUX_DCCP_H
#define _LINUX_DCCP_H
-
-#include <linux/in.h>
-#include <linux/interrupt.h>
-#include <linux/ktime.h>
-#include <linux/list.h>
-#include <linux/uio.h>
-#include <linux/workqueue.h>
-
-#include <net/inet_connection_sock.h>
-#include <net/inet_sock.h>
-#include <net/inet_timewait_sock.h>
-#include <net/tcp_states.h>
#include <uapi/linux/dccp.h>
-enum dccp_state {
- DCCP_OPEN = TCP_ESTABLISHED,
- DCCP_REQUESTING = TCP_SYN_SENT,
- DCCP_LISTEN = TCP_LISTEN,
- DCCP_RESPOND = TCP_SYN_RECV,
- /*
- * States involved in closing a DCCP connection:
- * 1) ACTIVE_CLOSEREQ is entered by a server sending a CloseReq.
- *
- * 2) CLOSING can have three different meanings (RFC 4340, 8.3):
- * a. Client has performed active-close, has sent a Close to the server
- * from state OPEN or PARTOPEN, and is waiting for the final Reset
- * (in this case, SOCK_DONE == 1).
- * b. Client is asked to perform passive-close, by receiving a CloseReq
- * in (PART)OPEN state. It sends a Close and waits for final Reset
- * (in this case, SOCK_DONE == 0).
- * c. Server performs an active-close as in (a), keeps TIMEWAIT state.
- *
- * 3) The following intermediate states are employed to give passively
- * closing nodes a chance to process their unread data:
- * - PASSIVE_CLOSE (from OPEN => CLOSED) and
- * - PASSIVE_CLOSEREQ (from (PART)OPEN to CLOSING; case (b) above).
- */
- DCCP_ACTIVE_CLOSEREQ = TCP_FIN_WAIT1,
- DCCP_PASSIVE_CLOSE = TCP_CLOSE_WAIT, /* any node receiving a Close */
- DCCP_CLOSING = TCP_CLOSING,
- DCCP_TIME_WAIT = TCP_TIME_WAIT,
- DCCP_CLOSED = TCP_CLOSE,
- DCCP_NEW_SYN_RECV = TCP_NEW_SYN_RECV,
- DCCP_PARTOPEN = TCP_MAX_STATES,
- DCCP_PASSIVE_CLOSEREQ, /* clients receiving CloseReq */
- DCCP_MAX_STATES
-};
-
-enum {
- DCCPF_OPEN = TCPF_ESTABLISHED,
- DCCPF_REQUESTING = TCPF_SYN_SENT,
- DCCPF_LISTEN = TCPF_LISTEN,
- DCCPF_RESPOND = TCPF_SYN_RECV,
- DCCPF_ACTIVE_CLOSEREQ = TCPF_FIN_WAIT1,
- DCCPF_CLOSING = TCPF_CLOSING,
- DCCPF_TIME_WAIT = TCPF_TIME_WAIT,
- DCCPF_CLOSED = TCPF_CLOSE,
- DCCPF_NEW_SYN_RECV = TCPF_NEW_SYN_RECV,
- DCCPF_PARTOPEN = (1 << DCCP_PARTOPEN),
-};
-
-static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb)
-{
- return (struct dccp_hdr *)skb_transport_header(skb);
-}
-
-static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen)
-{
- skb_push(skb, headlen);
- skb_reset_transport_header(skb);
- return memset(skb_transport_header(skb), 0, headlen);
-}
-
static inline struct dccp_hdr_ext *dccp_hdrx(const struct dccp_hdr *dh)
{
return (struct dccp_hdr_ext *)((unsigned char *)dh + sizeof(*dh));
@@ -85,12 +14,6 @@ static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh)
return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0);
}
-static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb)
-{
- const struct dccp_hdr *dh = dccp_hdr(skb);
- return __dccp_basic_hdr_len(dh);
-}
-
static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh)
{
__u64 seq_nr = ntohs(dh->dccph_seq);
@@ -103,222 +26,10 @@ static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh)
return seq_nr;
}
-static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb)
-{
- return (struct dccp_hdr_request *)(skb_transport_header(skb) +
- dccp_basic_hdr_len(skb));
-}
-
-static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb)
-{
- return (struct dccp_hdr_ack_bits *)(skb_transport_header(skb) +
- dccp_basic_hdr_len(skb));
-}
-
-static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
-{
- const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb);
- return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + ntohl(dhack->dccph_ack_nr_low);
-}
-
-static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb)
-{
- return (struct dccp_hdr_response *)(skb_transport_header(skb) +
- dccp_basic_hdr_len(skb));
-}
-
-static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb)
-{
- return (struct dccp_hdr_reset *)(skb_transport_header(skb) +
- dccp_basic_hdr_len(skb));
-}
-
static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh)
{
return __dccp_basic_hdr_len(dh) +
dccp_packet_hdr_len(dh->dccph_type);
}
-static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
-{
- return __dccp_hdr_len(dccp_hdr(skb));
-}
-
-/**
- * struct dccp_request_sock - represent DCCP-specific connection request
- * @dreq_inet_rsk: structure inherited from
- * @dreq_iss: initial sequence number, sent on the first Response (RFC 4340, 7.1)
- * @dreq_gss: greatest sequence number sent (for retransmitted Responses)
- * @dreq_isr: initial sequence number received in the first Request
- * @dreq_gsr: greatest sequence number received (for retransmitted Request(s))
- * @dreq_service: service code present on the Request (there is just one)
- * @dreq_featneg: feature negotiation options for this connection
- * The following two fields are analogous to the ones in dccp_sock:
- * @dreq_timestamp_echo: last received timestamp to echo (13.1)
- * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo
- */
-struct dccp_request_sock {
- struct inet_request_sock dreq_inet_rsk;
- __u64 dreq_iss;
- __u64 dreq_gss;
- __u64 dreq_isr;
- __u64 dreq_gsr;
- __be32 dreq_service;
- spinlock_t dreq_lock;
- struct list_head dreq_featneg;
- __u32 dreq_timestamp_echo;
- __u32 dreq_timestamp_time;
-};
-
-static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req)
-{
- return (struct dccp_request_sock *)req;
-}
-
-extern struct inet_timewait_death_row dccp_death_row;
-
-extern int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
- struct sk_buff *skb);
-
-struct dccp_options_received {
- u64 dccpor_ndp:48;
- u32 dccpor_timestamp;
- u32 dccpor_timestamp_echo;
- u32 dccpor_elapsed_time;
-};
-
-struct ccid;
-
-enum dccp_role {
- DCCP_ROLE_UNDEFINED,
- DCCP_ROLE_LISTEN,
- DCCP_ROLE_CLIENT,
- DCCP_ROLE_SERVER,
-};
-
-struct dccp_service_list {
- __u32 dccpsl_nr;
- __be32 dccpsl_list[];
-};
-
-#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1)
-#define DCCP_SERVICE_CODE_IS_ABSENT 0
-
-static inline bool dccp_list_has_service(const struct dccp_service_list *sl,
- const __be32 service)
-{
- if (likely(sl != NULL)) {
- u32 i = sl->dccpsl_nr;
- while (i--)
- if (sl->dccpsl_list[i] == service)
- return true;
- }
- return false;
-}
-
-struct dccp_ackvec;
-
-/**
- * struct dccp_sock - DCCP socket state
- *
- * @dccps_swl - sequence number window low
- * @dccps_swh - sequence number window high
- * @dccps_awl - acknowledgement number window low
- * @dccps_awh - acknowledgement number window high
- * @dccps_iss - initial sequence number sent
- * @dccps_isr - initial sequence number received
- * @dccps_osr - first OPEN sequence number received
- * @dccps_gss - greatest sequence number sent
- * @dccps_gsr - greatest valid sequence number received
- * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss
- * @dccps_service - first (passive sock) or unique (active sock) service code
- * @dccps_service_list - second .. last service code on passive socket
- * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option
- * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo
- * @dccps_l_ack_ratio - feature-local Ack Ratio
- * @dccps_r_ack_ratio - feature-remote Ack Ratio
- * @dccps_l_seq_win - local Sequence Window (influences ack number validity)
- * @dccps_r_seq_win - remote Sequence Window (influences seq number validity)
- * @dccps_pcslen - sender partial checksum coverage (via sockopt)
- * @dccps_pcrlen - receiver partial checksum coverage (via sockopt)
- * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2)
- * @dccps_ndp_count - number of Non Data Packets since last data packet
- * @dccps_mss_cache - current value of MSS (path MTU minus header sizes)
- * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4)
- * @dccps_featneg - tracks feature-negotiation state (mostly during handshake)
- * @dccps_hc_rx_ackvec - rx half connection ack vector
- * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection)
- * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection)
- * @dccps_options_received - parsed set of retrieved options
- * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy
- * @dccps_tx_qlen - maximum length of the TX queue
- * @dccps_role - role of this sock, one of %dccp_role
- * @dccps_hc_rx_insert_options - receiver wants to add options when acking
- * @dccps_hc_tx_insert_options - sender wants to add options when sending
- * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3)
- * @dccps_sync_scheduled - flag which signals "send out-of-band message soon"
- * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets
- * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing)
- * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs)
- */
-struct dccp_sock {
- /* inet_connection_sock has to be the first member of dccp_sock */
- struct inet_connection_sock dccps_inet_connection;
-#define dccps_syn_rtt dccps_inet_connection.icsk_ack.lrcvtime
- __u64 dccps_swl;
- __u64 dccps_swh;
- __u64 dccps_awl;
- __u64 dccps_awh;
- __u64 dccps_iss;
- __u64 dccps_isr;
- __u64 dccps_osr;
- __u64 dccps_gss;
- __u64 dccps_gsr;
- __u64 dccps_gar;
- __be32 dccps_service;
- __u32 dccps_mss_cache;
- struct dccp_service_list *dccps_service_list;
- __u32 dccps_timestamp_echo;
- __u32 dccps_timestamp_time;
- __u16 dccps_l_ack_ratio;
- __u16 dccps_r_ack_ratio;
- __u64 dccps_l_seq_win:48;
- __u64 dccps_r_seq_win:48;
- __u8 dccps_pcslen:4;
- __u8 dccps_pcrlen:4;
- __u8 dccps_send_ndp_count:1;
- __u64 dccps_ndp_count:48;
- unsigned long dccps_rate_last;
- struct list_head dccps_featneg;
- struct dccp_ackvec *dccps_hc_rx_ackvec;
- struct ccid *dccps_hc_rx_ccid;
- struct ccid *dccps_hc_tx_ccid;
- struct dccp_options_received dccps_options_received;
- __u8 dccps_qpolicy;
- __u32 dccps_tx_qlen;
- enum dccp_role dccps_role:2;
- __u8 dccps_hc_rx_insert_options:1;
- __u8 dccps_hc_tx_insert_options:1;
- __u8 dccps_server_timewait:1;
- __u8 dccps_sync_scheduled:1;
- struct tasklet_struct dccps_xmitlet;
- struct timer_list dccps_xmit_timer;
-};
-
-#define dccp_sk(ptr) container_of_const(ptr, struct dccp_sock, \
- dccps_inet_connection.icsk_inet.sk)
-
-static inline const char *dccp_role(const struct sock *sk)
-{
- switch (dccp_sk(sk)->dccps_role) {
- case DCCP_ROLE_UNDEFINED: return "undefined";
- case DCCP_ROLE_LISTEN: return "listen";
- case DCCP_ROLE_SERVER: return "server";
- case DCCP_ROLE_CLIENT: return "client";
- }
- return NULL;
-}
-
-extern void dccp_syn_ack_timeout(const struct request_sock *req);
-
#endif /* _LINUX_DCCP_H */
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 8210ece94fa6..013d25858642 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -926,10 +926,11 @@ struct kernel_ethtool_ts_info {
* @get_ts_info: Get the time stamping and PTP hardware clock capabilities.
* It may be called with RCU, or rtnl or reference on the device.
* Drivers supporting transmit time stamps in software should set this to
- * ethtool_op_get_ts_info(). Drivers must not zero statistics which they
- * don't report. The stats structure is initialized to ETHTOOL_STAT_NOT_SET
- * indicating driver does not report statistics.
- * @get_ts_stats: Query the device hardware timestamping statistics.
+ * ethtool_op_get_ts_info().
+ * @get_ts_stats: Query the device hardware timestamping statistics. Drivers
+ * must not zero statistics which they don't report. The stats structure
+ * is initialized to ETHTOOL_STAT_NOT_SET indicating driver does not
+ * report statistics.
* @get_module_info: Get the size and type of the eeprom contained within
* a plug-in module.
* @get_module_eeprom: Get the eeprom information from the plug-in module
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b7f13f087954..130d3c9d2ee4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4248,4 +4248,62 @@ int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
#define VM_SEALED_SYSMAP VM_NONE
#endif
+/*
+ * DMA mapping IDs for page_pool
+ *
+ * When DMA-mapping a page, page_pool allocates an ID (from an xarray) and
+ * stashes it in the upper bits of page->pp_magic. We always want to be able to
+ * unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP
+ * pages can have arbitrary kernel pointers stored in the same field as pp_magic
+ * (since it overlaps with page->lru.next), so we must ensure that we cannot
+ * mistake a valid kernel pointer with any of the values we write into this
+ * field.
+ *
+ * On architectures that set POISON_POINTER_DELTA, this is already ensured,
+ * since this value becomes part of PP_SIGNATURE; meaning we can just use the
+ * space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the
+ * lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is
+ * 0, we make sure that we leave the two topmost bits empty, as that guarantees
+ * we won't mistake a valid kernel pointer for a value we set, regardless of the
+ * VMSPLIT setting.
+ *
+ * Altogether, this means that the number of bits available is constrained by
+ * the size of an unsigned long (at the upper end, subtracting two bits per the
+ * above), and the definition of PP_SIGNATURE (with or without
+ * POISON_POINTER_DELTA).
+ */
+#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA))
+#if POISON_POINTER_DELTA > 0
+/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA
+ * index to not overlap with that if set
+ */
+#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT)
+#else
+/* Always leave out the topmost two; see above. */
+#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2)
+#endif
+
+#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \
+ PP_DMA_INDEX_SHIFT)
+
+/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is
+ * OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for
+ * the head page of compound page and bit 1 for pfmemalloc page, as well as the
+ * bits used for the DMA index. page_is_pfmemalloc() is checked in
+ * __page_pool_put_page() to avoid recycling the pfmemalloc page.
+ */
+#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL)
+
+#ifdef CONFIG_PAGE_POOL
+static inline bool page_pool_page_is_pp(struct page *page)
+{
+ return (page->pp_magic & PP_MAGIC_MASK) == PP_SIGNATURE;
+}
+#else
+static inline bool page_pool_page_is_pp(struct page *page)
+{
+ return false;
+}
+#endif
+
#endif /* _LINUX_MM_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2d11d013cabe..e6036b82ef4c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -688,6 +688,7 @@ struct netdev_queue {
/* Subordinate device that the queue has been assigned to */
struct net_device *sb_dev;
#ifdef CONFIG_XDP_SOCKETS
+ /* "ops protected", see comment about net_device::lock */
struct xsk_buff_pool *pool;
#endif
@@ -1945,13 +1946,11 @@ enum netdev_reg_state {
*
* @reg_state: Register/unregister state machine
* @dismantle: Device is going to be freed
- * @rtnl_link_state: This enum represents the phases of creating
- * a new link
- *
* @needs_free_netdev: Should unregister perform free_netdev?
* @priv_destructor: Called from unregister
* @npinfo: XXX: need comments on this one
* @nd_net: Network namespace this network device is inside
+ * protected by @lock
*
* @ml_priv: Mid-layer private
* @ml_priv_type: Mid-layer private type
@@ -2359,10 +2358,10 @@ struct net_device {
bool dismantle;
- enum {
- RTNL_LINK_INITIALIZED,
- RTNL_LINK_INITIALIZING,
- } rtnl_link_state:16;
+ /** @moving_ns: device is changing netns, protected by @lock */
+ bool moving_ns;
+ /** @rtnl_link_initializing: Device being created, suppress events */
+ bool rtnl_link_initializing;
bool needs_free_netdev;
void (*priv_destructor)(struct net_device *dev);
@@ -2521,7 +2520,7 @@ struct net_device {
* @net_shaper_hierarchy, @reg_state, @threaded
*
* Double protects:
- * @up
+ * @up, @moving_ns, @nd_net, @xdp_flags
*
* Double ops protects:
* @real_num_rx_queues, @real_num_tx_queues
@@ -4689,9 +4688,10 @@ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
/*
* txq->trans_start can be read locklessly from dev_watchdog()
*/
-static inline void txq_trans_update(struct netdev_queue *txq)
+static inline void txq_trans_update(const struct net_device *dev,
+ struct netdev_queue *txq)
{
- if (txq->xmit_lock_owner != -1)
+ if (!dev->lltx)
WRITE_ONCE(txq->trans_start, jiffies);
}
@@ -5211,7 +5211,7 @@ static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_devi
rc = __netdev_start_xmit(ops, skb, dev, more);
if (rc == NETDEV_TX_OK)
- txq_trans_update(txq);
+ txq_trans_update(dev, txq);
return rc;
}
diff --git a/include/linux/phy.h b/include/linux/phy.h
index a2bfae80c449..fb755358d965 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1757,7 +1757,6 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id,
int fwnode_get_phy_id(struct fwnode_handle *fwnode, u32 *phy_id);
struct mdio_device *fwnode_mdio_find_device(struct fwnode_handle *fwnode);
struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode);
-struct phy_device *device_phy_find_device(struct device *dev);
struct fwnode_handle *fwnode_get_phy_node(const struct fwnode_handle *fwnode);
struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45);
int phy_device_register(struct phy_device *phy);
@@ -1779,11 +1778,6 @@ struct phy_device *fwnode_phy_find_device(struct fwnode_handle *phy_fwnode)
return NULL;
}
-static inline struct phy_device *device_phy_find_device(struct device *dev)
-{
- return NULL;
-}
-
static inline
struct fwnode_handle *fwnode_get_phy_node(struct fwnode_handle *fwnode)
{
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 331a9a996fa8..8ca2235f78d5 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -70,6 +70,10 @@
#define KEY_DESTROY 0xbd
/********** net/core/page_pool.c **********/
+/*
+ * page_pool uses additional free bits within this value to store data, see the
+ * definition of PP_DMA_INDEX_MASK in mm.h
+ */
#define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA)
/********** net/core/skbuff.c **********/
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 0d68d09bedd1..eced7e9bf69a 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -68,6 +68,22 @@ struct ptp_system_timestamp {
* @n_per_out: The number of programmable periodic signals.
* @n_pins: The number of programmable pins.
* @pps: Indicates whether the clock supports a PPS callback.
+ *
+ * @supported_perout_flags: The set of flags the driver supports for the
+ * PTP_PEROUT_REQUEST ioctl. The PTP core will
+ * reject a request with any flag not specified
+ * here.
+ *
+ * @supported_extts_flags: The set of flags the driver supports for the
+ * PTP_EXTTS_REQUEST ioctl. The PTP core will use
+ * this list to reject unsupported requests.
+ * PTP_ENABLE_FEATURE is assumed and does not need to
+ * be included. If PTP_STRICT_FLAGS is *not* set,
+ * then both PTP_RISING_EDGE and PTP_FALLING_EDGE
+ * will be assumed. Note that PTP_STRICT_FLAGS must
+ * be set if the drivers wants to honor
+ * PTP_EXTTS_REQUEST2 and any future flags.
+ *
* @pin_config: Array of length 'n_pins'. If the number of
* programmable pins is nonzero, then drivers must
* allocate and initialize this array.
@@ -174,6 +190,8 @@ struct ptp_clock_info {
int n_per_out;
int n_pins;
int pps;
+ unsigned int supported_perout_flags;
+ unsigned int supported_extts_flags;
struct ptp_pin_desc *pin_config;
int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm);
int (*adjphase)(struct ptp_clock_info *ptp, s32 phase);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b974a277975a..beb084ee4f4d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4105,8 +4105,7 @@ static inline void skb_frag_list_init(struct sk_buff *skb)
int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue,
int *err, long *timeo_p,
const struct sk_buff *skb);
-struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
- struct sk_buff_head *queue,
+struct sk_buff *__skb_try_recv_from_queue(struct sk_buff_head *queue,
unsigned int flags,
int *off, int *err,
struct sk_buff **last);
@@ -4146,6 +4145,8 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
unsigned int flags);
int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
int len);
+int skb_send_sock_locked_with_flags(struct sock *sk, struct sk_buff *skb,
+ int offset, int len, int flags);
int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len);
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
diff --git a/include/linux/socket.h b/include/linux/socket.h
index c3322eb3d686..3b262487ec06 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr
return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
}
-static inline size_t msg_data_left(struct msghdr *msg)
+static inline size_t msg_data_left(const struct msghdr *msg)
{
return iov_iter_count(&msg->msg_iter);
}
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index c4ec8bb8144e..8aed09d65b4a 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -276,7 +276,6 @@ struct plat_stmmacenet_data {
int mac_port_sel_speed;
int has_xgmac;
u8 vlan_fail_q;
- unsigned long eee_usecs_rate;
struct pci_dev *pdev;
int int_snapshot_num;
int msi_mac_vec;
diff --git a/include/linux/tfrc.h b/include/linux/tfrc.h
deleted file mode 100644
index a5acc768085d..000000000000
--- a/include/linux/tfrc.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _LINUX_TFRC_H_
-#define _LINUX_TFRC_H_
-/*
- * TFRC - Data Structures for the TCP-Friendly Rate Control congestion
- * control mechanism as specified in RFC 3448.
- *
- * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- * Copyright (c) 2005 Ian McDonald <iam4@cs.waikato.ac.nz>
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
- */
-#include <linux/types.h>
-
-/** tfrc_rx_info - TFRC Receiver Data Structure
- *
- * @tfrcrx_x_recv: receiver estimate of sending rate (3.2.2)
- * @tfrcrx_rtt: round-trip-time (communicated by sender)
- * @tfrcrx_p: current estimate of loss event rate (3.2.2)
- */
-struct tfrc_rx_info {
- __u32 tfrcrx_x_recv;
- __u32 tfrcrx_rtt;
- __u32 tfrcrx_p;
-};
-
-/** tfrc_tx_info - TFRC Sender Data Structure
- *
- * @tfrctx_x: computed transmit rate (4.3 (4))
- * @tfrctx_x_recv: receiver estimate of send rate (4.3)
- * @tfrctx_x_calc: return value of throughput equation (3.1)
- * @tfrctx_rtt: (moving average) estimate of RTT (4.3)
- * @tfrctx_p: current loss event rate (5.4)
- * @tfrctx_rto: estimate of RTO, equals 4*RTT (4.3)
- * @tfrctx_ipi: inter-packet interval (4.6)
- *
- * Note: X and X_recv are both maintained in units of 64 * bytes/second. This
- * enables a finer resolution of sending rates and avoids problems with
- * integer arithmetic; u32 is not sufficient as scaling consumes 6 bits.
- */
-struct tfrc_tx_info {
- __u64 tfrctx_x;
- __u64 tfrctx_x_recv;
- __u32 tfrctx_x_calc;
- __u32 tfrctx_rtt;
- __u32 tfrctx_p;
- __u32 tfrctx_rto;
- __u32 tfrctx_ipi;
-};
-
-#endif /* _LINUX_TFRC_H_ */
diff --git a/include/linux/udp.h b/include/linux/udp.h
index 0807e21cfec9..895240177f4f 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -101,6 +101,13 @@ struct udp_sock {
/* Cache friendly copy of sk->sk_peek_off >= 0 */
bool peeking_with_offset;
+
+ /*
+ * Accounting for the tunnel GRO fastpath.
+ * Unprotected by compilers guard, as it uses space available in
+ * the last UDP socket cacheline.
+ */
+ struct hlist_node tunnel_list;
};
#define udp_test_bit(nr, sk) \
@@ -219,4 +226,13 @@ static inline void udp_allow_gso(struct sock *sk)
#define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE)
+static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6)
+{
+#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
+ return rcu_dereference(net->ipv4.udp_tunnel_gro[is_ipv6].sk);
+#else
+ return NULL;
+#endif
+}
+
#endif /* _LINUX_UDP_H */
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index cf793d18e5df..f15341594cc8 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -16,6 +16,7 @@ struct sock;
struct socket;
struct rxrpc_call;
struct rxrpc_peer;
+struct krb5_buffer;
enum rxrpc_abort_reason;
enum rxrpc_interruptibility {
@@ -24,23 +25,33 @@ enum rxrpc_interruptibility {
RXRPC_UNINTERRUPTIBLE, /* Call should not be interruptible at all */
};
+enum rxrpc_oob_type {
+ RXRPC_OOB_CHALLENGE, /* Security challenge for a connection */
+};
+
/*
* Debug ID counter for tracing.
*/
extern atomic_t rxrpc_debug_id;
+/*
+ * Operations table for rxrpc to call out to a kernel application (e.g. kAFS).
+ */
+struct rxrpc_kernel_ops {
+ void (*notify_new_call)(struct sock *sk, struct rxrpc_call *call,
+ unsigned long user_call_ID);
+ void (*discard_new_call)(struct rxrpc_call *call, unsigned long user_call_ID);
+ void (*user_attach_call)(struct rxrpc_call *call, unsigned long user_call_ID);
+ void (*notify_oob)(struct sock *sk, struct sk_buff *oob);
+};
+
typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *,
unsigned long);
typedef void (*rxrpc_notify_end_tx_t)(struct sock *, struct rxrpc_call *,
unsigned long);
-typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *,
- unsigned long);
-typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long);
-typedef void (*rxrpc_user_attach_call_t)(struct rxrpc_call *, unsigned long);
-void rxrpc_kernel_new_call_notification(struct socket *,
- rxrpc_notify_new_call_t,
- rxrpc_discard_new_call_t);
+void rxrpc_kernel_set_notifications(struct socket *sock,
+ const struct rxrpc_kernel_ops *app_ops);
struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
struct rxrpc_peer *peer,
struct key *key,
@@ -72,9 +83,9 @@ const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer);
unsigned long rxrpc_kernel_set_peer_data(struct rxrpc_peer *peer, unsigned long app_data);
unsigned long rxrpc_kernel_get_peer_data(const struct rxrpc_peer *peer);
unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *);
-int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
- rxrpc_user_attach_call_t, unsigned long, gfp_t,
- unsigned int);
+int rxrpc_kernel_charge_accept(struct socket *sock, rxrpc_notify_rx_t notify_rx,
+ unsigned long user_call_ID, gfp_t gfp,
+ unsigned int debug_id);
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
@@ -83,5 +94,25 @@ void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *,
int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val);
int rxrpc_sock_set_security_keyring(struct sock *, struct key *);
+int rxrpc_sock_set_manage_response(struct sock *sk, bool set);
+
+enum rxrpc_oob_type rxrpc_kernel_query_oob(struct sk_buff *oob,
+ struct rxrpc_peer **_peer,
+ unsigned long *_peer_appdata);
+struct sk_buff *rxrpc_kernel_dequeue_oob(struct socket *sock,
+ enum rxrpc_oob_type *_type);
+void rxrpc_kernel_free_oob(struct sk_buff *oob);
+void rxrpc_kernel_query_challenge(struct sk_buff *challenge,
+ struct rxrpc_peer **_peer,
+ unsigned long *_peer_appdata,
+ u16 *_service_id, u8 *_security_index);
+int rxrpc_kernel_reject_challenge(struct sk_buff *challenge, u32 abort_code,
+ int error, enum rxrpc_abort_reason why);
+int rxkad_kernel_respond_to_challenge(struct sk_buff *challenge);
+u32 rxgk_kernel_query_challenge(struct sk_buff *challenge);
+int rxgk_kernel_respond_to_challenge(struct sk_buff *challenge,
+ struct krb5_buffer *appdata);
+u8 rxrpc_kernel_query_call_security(struct rxrpc_call *call,
+ u16 *_service_id, u32 *_enctype);
#endif /* _NET_RXRPC_H */
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index e4fdc6b54cef..bea77934a235 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -40,6 +40,7 @@
FN(TCP_OFOMERGE) \
FN(TCP_RFC7323_PAWS) \
FN(TCP_RFC7323_PAWS_ACK) \
+ FN(TCP_RFC7323_TW_PAWS) \
FN(TCP_RFC7323_TSECR) \
FN(TCP_LISTEN_OVERFLOW) \
FN(TCP_OLD_SEQUENCE) \
@@ -284,6 +285,12 @@ enum skb_drop_reason {
*/
SKB_DROP_REASON_TCP_RFC7323_PAWS_ACK,
/**
+ * @SKB_DROP_REASON_TCP_RFC7323_TW_PAWS: PAWS check, socket is in
+ * TIME_WAIT state.
+ * Corresponds to LINUX_MIB_PAWS_TW_REJECTED.
+ */
+ SKB_DROP_REASON_TCP_RFC7323_TW_PAWS,
+ /**
* @SKB_DROP_REASON_TCP_RFC7323_TSECR: PAWS check, invalid TSEcr.
* Corresponds to LINUX_MIB_TSECRREJECTED.
*/
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 949641e92539..4564b5d348b1 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -175,14 +175,9 @@ struct inet_hashinfo {
bool pernet;
} ____cacheline_aligned_in_smp;
-static inline struct inet_hashinfo *tcp_or_dccp_get_hashinfo(const struct sock *sk)
+static inline struct inet_hashinfo *tcp_get_hashinfo(const struct sock *sk)
{
-#if IS_ENABLED(CONFIG_IP_DCCP)
- return sk->sk_prot->h.hashinfo ? :
- sock_net(sk)->ipv4.tcp_death_row.hashinfo;
-#else
return sock_net(sk)->ipv4.tcp_death_row.hashinfo;
-#endif
}
static inline struct inet_listen_hashbucket *
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index a36a335cef9f..0c3d571a04a1 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -377,10 +377,9 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev);
int ip_tunnel_get_iflink(const struct net_device *dev);
int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname);
-
-void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
- struct rtnl_link_ops *ops,
- struct list_head *dev_to_kill);
+void ip_tunnel_delete_net(struct net *net, unsigned int id,
+ struct rtnl_link_ops *ops,
+ struct list_head *dev_to_kill);
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol);
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index bfbad695951c..f7263fe2a2e4 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -101,18 +101,9 @@ struct mptcp_out_options {
#define MPTCP_SCHED_MAX 128
#define MPTCP_SCHED_BUF_MAX (MPTCP_SCHED_NAME_MAX * MPTCP_SCHED_MAX)
-#define MPTCP_SUBFLOWS_MAX 8
-
-struct mptcp_sched_data {
- u8 subflows;
- struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX];
-};
-
struct mptcp_sched_ops {
- int (*get_send)(struct mptcp_sock *msk,
- struct mptcp_sched_data *data);
- int (*get_retrans)(struct mptcp_sock *msk,
- struct mptcp_sched_data *data);
+ int (*get_send)(struct mptcp_sock *msk);
+ int (*get_retrans)(struct mptcp_sock *msk);
char name[MPTCP_SCHED_NAME_MAX];
struct module *owner;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index bd57d8fb54f1..025a7574b275 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -475,8 +475,8 @@ struct pernet_operations {
void (*exit)(struct net *net);
void (*exit_batch)(struct list_head *net_exit_list);
/* Following method is called with RTNL held. */
- void (*exit_batch_rtnl)(struct list_head *net_exit_list,
- struct list_head *dev_kill_list);
+ void (*exit_rtnl)(struct net *net,
+ struct list_head *dev_kill_list);
unsigned int * const id;
const size_t size;
};
diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h
index c316b551df8d..5706835a660c 100644
--- a/include/net/netdev_lock.h
+++ b/include/net/netdev_lock.h
@@ -64,6 +64,22 @@ netdev_ops_assert_locked_or_invisible(const struct net_device *dev)
netdev_ops_assert_locked(dev);
}
+static inline void netdev_lock_ops_compat(struct net_device *dev)
+{
+ if (netdev_need_ops_lock(dev))
+ netdev_lock(dev);
+ else
+ rtnl_lock();
+}
+
+static inline void netdev_unlock_ops_compat(struct net_device *dev)
+{
+ if (netdev_need_ops_lock(dev))
+ netdev_unlock(dev);
+ else
+ rtnl_unlock();
+}
+
static inline int netdev_lock_cmp_fn(const struct lockdep_map *a,
const struct lockdep_map *b)
{
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
index 825141d675e5..ea709b59d827 100644
--- a/include/net/netdev_queues.h
+++ b/include/net/netdev_queues.h
@@ -85,9 +85,11 @@ struct netdev_queue_stats_tx {
* for some of the events is not maintained, and reliable "total" cannot
* be provided).
*
+ * Ops are called under the instance lock if netdev_need_ops_lock()
+ * returns true, otherwise under rtnl_lock.
* Device drivers can assume that when collecting total device stats,
* the @get_base_stats and subsequent per-queue calls are performed
- * "atomically" (without releasing the rtnl_lock).
+ * "atomically" (without releasing the relevant lock).
*
* Device drivers are encouraged to reset the per-queue statistics when
* number of queues change. This is because the primary use case for
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index b2238b551dce..8cdcd138b33f 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -20,12 +20,12 @@ struct netdev_rx_queue {
struct net_device *dev;
netdevice_tracker dev_tracker;
+ /* All fields below are "ops protected",
+ * see comment about net_device::lock
+ */
#ifdef CONFIG_XDP_SOCKETS
struct xsk_buff_pool *pool;
#endif
- /* NAPI instance for the queue
- * "ops protected", see comment about net_device::lock
- */
struct napi_struct *napi;
struct pp_memory_provider_params mp_params;
} ____cacheline_aligned_in_smp;
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 29e0db940382..82e07e272290 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -612,6 +612,22 @@ static inline int nlmsg_len(const struct nlmsghdr *nlh)
}
/**
+ * nlmsg_payload - message payload if the data fits in the len
+ * @nlh: netlink message header
+ * @len: struct length
+ *
+ * Returns: The netlink message payload/data if the length is sufficient,
+ * otherwise NULL.
+ */
+static inline void *nlmsg_payload(const struct nlmsghdr *nlh, size_t len)
+{
+ if (nlh->nlmsg_len < nlmsg_msg_size(len))
+ return NULL;
+
+ return nlmsg_data(nlh);
+}
+
+/**
* nlmsg_attrdata - head of attributes data
* @nlh: netlink message header
* @hdrlen: length of family specific header
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 650b2dc9199f..6373e3f17da8 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -47,6 +47,11 @@ struct sysctl_fib_multipath_hash_seed {
};
#endif
+struct udp_tunnel_gro {
+ struct sock __rcu *sk;
+ struct hlist_head list;
+};
+
struct netns_ipv4 {
/* Cacheline organization can be found documented in
* Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
@@ -85,6 +90,11 @@ struct netns_ipv4 {
struct inet_timewait_death_row tcp_death_row;
struct udp_table *udp_table;
+#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
+ /* Not in a pernet subsys because need to be available at GRO stage */
+ struct udp_tunnel_gro udp_tunnel_gro[2];
+#endif
+
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
struct ctl_table_header *frags_hdr;
@@ -277,4 +287,5 @@ struct netns_ipv4 {
struct hlist_head *inet_addr_lst;
struct delayed_work addr_chk_work;
};
+
#endif
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 582a3d00cbe2..93f2c31baf9b 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -395,6 +395,12 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
page_pool_put_full_page(pool, page, true);
}
+static inline void page_pool_recycle_direct_netmem(struct page_pool *pool,
+ netmem_ref netmem)
+{
+ page_pool_put_full_netmem(pool, netmem, true);
+}
+
#define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA \
(sizeof(dma_addr_t) > sizeof(unsigned long))
@@ -492,4 +498,9 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid)
page_pool_update_nid(pool, new_nid);
}
+static inline bool page_pool_is_unreadable(struct page_pool *pool)
+{
+ return !!pool->mp_ops;
+}
+
#endif /* _NET_PAGE_POOL_HELPERS_H */
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 36eb57d73abc..431b593de709 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -6,6 +6,7 @@
#include <linux/dma-direction.h>
#include <linux/ptr_ring.h>
#include <linux/types.h>
+#include <linux/xarray.h>
#include <net/netmem.h>
#define PP_FLAG_DMA_MAP BIT(0) /* Should page_pool do the DMA
@@ -33,6 +34,9 @@
#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \
PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM)
+/* Index limit to stay within PP_DMA_INDEX_BITS for DMA indices */
+#define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1)
+
/*
* Fast allocation side cache array/stack
*
@@ -221,6 +225,8 @@ struct page_pool {
void *mp_priv;
const struct memory_provider_ops *mp_ops;
+ struct xarray dma_mapped;
+
#ifdef CONFIG_PAGE_POOL_STATS
/* recycle stats are per-cpu to avoid locking */
struct page_pool_recycle_stats __percpu *recycle_stats;
diff --git a/include/net/rps.h b/include/net/rps.h
index e358e9711f27..507f4aa5d39b 100644
--- a/include/net/rps.h
+++ b/include/net/rps.h
@@ -57,9 +57,10 @@ struct rps_dev_flow_table {
* meaning we use 32-6=26 bits for the hash.
*/
struct rps_sock_flow_table {
- u32 mask;
+ struct rcu_head rcu;
+ u32 mask;
- u32 ents[] ____cacheline_aligned_in_smp;
+ u32 ents[] ____cacheline_aligned_in_smp;
};
#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
diff --git a/include/net/rstreason.h b/include/net/rstreason.h
index 69cb2e52b7da..979ac87b5d99 100644
--- a/include/net/rstreason.h
+++ b/include/net/rstreason.h
@@ -36,7 +36,7 @@
/**
* enum sk_rst_reason - the reasons of socket reset
*
- * The reasons of sk reset, which are used in DCCP/TCP/MPTCP protocols.
+ * The reasons of sk reset, which are used in TCP/MPTCP protocols.
*
* There are three parts in order:
* 1) skb drop reasons: relying on drop reasons for such as passive reset
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index 21e7fa2a1813..cddebafb9f77 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -16,9 +16,5 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
__be16 sport, __be16 dport);
u32 secure_tcpv6_ts_off(const struct net *net,
const __be32 *saddr, const __be32 *daddr);
-u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport);
-u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
- __be16 sport, __be16 dport);
#endif /* _NET_SECURE_SEQ */
diff --git a/include/net/sock.h b/include/net/sock.h
index 694f954258d4..bb4d6189292f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1781,7 +1781,6 @@ void sk_free(struct sock *sk);
void sk_net_refcnt_upgrade(struct sock *sk);
void sk_destruct(struct sock *sk);
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
-void sk_free_unlock_clone(struct sock *sk);
struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
gfp_t priority);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4450c384ef17..5078ad868fee 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -427,7 +427,8 @@ enum tcp_tw_status {
enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
struct sk_buff *skb,
const struct tcphdr *th,
- u32 *tw_isn);
+ u32 *tw_isn,
+ enum skb_drop_reason *drop_reason);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, bool fastopen,
bool *lost_race, enum skb_drop_reason *drop_reason);
diff --git a/include/net/udp.h b/include/net/udp.h
index 6e89520e100d..a772510b2aa5 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -290,6 +290,7 @@ static inline void udp_lib_init_sock(struct sock *sk)
struct udp_sock *up = udp_sk(sk);
skb_queue_head_init(&up->reader_queue);
+ INIT_HLIST_NODE(&up->tunnel_list);
up->forward_threshold = sk->sk_rcvbuf >> 2;
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
}
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index a93dc51f6323..2df3b8344eb5 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -191,6 +191,21 @@ static inline int udp_tunnel_handle_offloads(struct sk_buff *skb, bool udp_csum)
}
#endif
+#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
+void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add);
+void udp_tunnel_update_gro_rcv(struct sock *sk, bool add);
+#else
+static inline void udp_tunnel_update_gro_lookup(struct net *net,
+ struct sock *sk, bool add) {}
+static inline void udp_tunnel_update_gro_rcv(struct sock *sk, bool add) {}
+#endif
+
+static inline void udp_tunnel_cleanup_gro(struct sock *sk)
+{
+ udp_tunnel_update_gro_rcv(sk, false);
+ udp_tunnel_update_gro_lookup(sock_net(sk), sk, false);
+}
+
static inline void udp_tunnel_encap_enable(struct sock *sk)
{
if (udp_test_and_set_bit(ENCAP_ENABLED, sk))
diff --git a/include/net/xdp.h b/include/net/xdp.h
index 48efacbaa35d..20e41b5ff319 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -616,6 +616,7 @@ struct xdp_metadata_ops {
u32 bpf_xdp_metadata_kfunc_id(int id);
bool bpf_dev_bound_kfunc_id(u32 btf_id);
void xdp_set_features_flag(struct net_device *dev, xdp_features_t val);
+void xdp_set_features_flag_locked(struct net_device *dev, xdp_features_t val);
void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg);
void xdp_features_clear_redirect_target(struct net_device *dev);
#else
diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h
index 8857f5ea77d4..7f83d242c8e9 100644
--- a/include/trace/events/afs.h
+++ b/include/trace/events/afs.h
@@ -663,19 +663,26 @@ TRACE_EVENT(afs_cb_call,
__field(unsigned int, call)
__field(u32, op)
__field(u16, service_id)
+ __field(u8, security_ix)
+ __field(u32, enctype)
),
TP_fast_assign(
__entry->call = call->debug_id;
__entry->op = call->operation_ID;
__entry->service_id = call->service_id;
+ __entry->security_ix = call->security_ix;
+ __entry->enctype = call->enctype;
),
- TP_printk("c=%08x %s",
+ TP_printk("c=%08x %s sv=%u sx=%u en=%u",
__entry->call,
__entry->service_id == 2501 ?
__print_symbolic(__entry->op, yfs_cm_operations) :
- __print_symbolic(__entry->op, afs_cm_operations))
+ __print_symbolic(__entry->op, afs_cm_operations),
+ __entry->service_id,
+ __entry->security_ix,
+ __entry->enctype)
);
TRACE_EVENT(afs_call,
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index cad50d91077e..378d2dfc7392 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -25,6 +25,7 @@
EM(afs_abort_probeuuid_negative, "afs-probeuuid-neg") \
EM(afs_abort_send_data_error, "afs-send-data") \
EM(afs_abort_unmarshal_error, "afs-unmarshal") \
+ EM(afs_abort_unsupported_sec_class, "afs-unsup-sec-class") \
/* rxperf errors */ \
EM(rxperf_abort_general_error, "rxperf-error") \
EM(rxperf_abort_oom, "rxperf-oom") \
@@ -68,6 +69,38 @@
EM(rxkad_abort_resp_tkt_sname, "rxkad-resp-tk-sname") \
EM(rxkad_abort_resp_unknown_tkt, "rxkad-resp-unknown-tkt") \
EM(rxkad_abort_resp_version, "rxkad-resp-version") \
+ /* RxGK security errors */ \
+ EM(rxgk_abort_1_verify_mic_eproto, "rxgk1-vfy-mic-eproto") \
+ EM(rxgk_abort_2_decrypt_eproto, "rxgk2-dec-eproto") \
+ EM(rxgk_abort_2_short_data, "rxgk2-short-data") \
+ EM(rxgk_abort_2_short_encdata, "rxgk2-short-encdata") \
+ EM(rxgk_abort_2_short_header, "rxgk2-short-hdr") \
+ EM(rxgk_abort_bad_key_number, "rxgk-bad-key-num") \
+ EM(rxgk_abort_chall_key_expired, "rxgk-chall-key-exp") \
+ EM(rxgk_abort_chall_no_key, "rxgk-chall-nokey") \
+ EM(rxgk_abort_chall_short, "rxgk-chall-short") \
+ EM(rxgk_abort_resp_auth_dec, "rxgk-resp-auth-dec") \
+ EM(rxgk_abort_resp_bad_callid, "rxgk-resp-bad-callid") \
+ EM(rxgk_abort_resp_bad_nonce, "rxgk-resp-bad-nonce") \
+ EM(rxgk_abort_resp_bad_param, "rxgk-resp-bad-param") \
+ EM(rxgk_abort_resp_call_ctr, "rxgk-resp-call-ctr") \
+ EM(rxgk_abort_resp_call_state, "rxgk-resp-call-state") \
+ EM(rxgk_abort_resp_internal_error, "rxgk-resp-int-error") \
+ EM(rxgk_abort_resp_nopkg, "rxgk-resp-nopkg") \
+ EM(rxgk_abort_resp_short_applen, "rxgk-resp-short-applen") \
+ EM(rxgk_abort_resp_short_auth, "rxgk-resp-short-auth") \
+ EM(rxgk_abort_resp_short_call_list, "rxgk-resp-short-callls") \
+ EM(rxgk_abort_resp_short_packet, "rxgk-resp-short-packet") \
+ EM(rxgk_abort_resp_short_yfs_klen, "rxgk-resp-short-yfs-klen") \
+ EM(rxgk_abort_resp_short_yfs_key, "rxgk-resp-short-yfs-key") \
+ EM(rxgk_abort_resp_short_yfs_tkt, "rxgk-resp-short-yfs-tkt") \
+ EM(rxgk_abort_resp_tok_dec, "rxgk-resp-tok-dec") \
+ EM(rxgk_abort_resp_tok_internal_error, "rxgk-resp-tok-int-err") \
+ EM(rxgk_abort_resp_tok_keyerr, "rxgk-resp-tok-keyerr") \
+ EM(rxgk_abort_resp_tok_nokey, "rxgk-resp-tok-nokey") \
+ EM(rxgk_abort_resp_tok_nopkg, "rxgk-resp-tok-nopkg") \
+ EM(rxgk_abort_resp_tok_short, "rxgk-resp-tok-short") \
+ EM(rxgk_abort_resp_xdr_align, "rxgk-resp-xdr-align") \
/* rxrpc errors */ \
EM(rxrpc_abort_call_improper_term, "call-improper-term") \
EM(rxrpc_abort_call_reset, "call-reset") \
@@ -77,6 +110,7 @@
EM(rxrpc_abort_call_timeout, "call-timeout") \
EM(rxrpc_abort_no_service_key, "no-serv-key") \
EM(rxrpc_abort_nomem, "nomem") \
+ EM(rxrpc_abort_response_sendmsg, "resp-sendmsg") \
EM(rxrpc_abort_service_not_offered, "serv-not-offered") \
EM(rxrpc_abort_shut_down, "shut-down") \
EM(rxrpc_abort_unsupported_security, "unsup-sec") \
@@ -133,24 +167,33 @@
EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \
EM(rxrpc_skb_get_conn_work, "GET conn-work") \
EM(rxrpc_skb_get_local_work, "GET locl-work") \
+ EM(rxrpc_skb_get_post_oob, "GET post-oob ") \
EM(rxrpc_skb_get_reject_work, "GET rej-work ") \
EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \
EM(rxrpc_skb_get_to_recvmsg_oos, "GET to-recv-o") \
EM(rxrpc_skb_new_encap_rcv, "NEW encap-rcv") \
EM(rxrpc_skb_new_error_report, "NEW error-rpt") \
EM(rxrpc_skb_new_jumbo_subpacket, "NEW jumbo-sub") \
+ EM(rxrpc_skb_new_response_rxgk, "NEW resp-rxgk") \
+ EM(rxrpc_skb_new_response_rxkad, "NEW resp-rxkd") \
EM(rxrpc_skb_new_unshared, "NEW unshared ") \
EM(rxrpc_skb_put_call_rx, "PUT call-rx ") \
+ EM(rxrpc_skb_put_challenge, "PUT challenge") \
EM(rxrpc_skb_put_conn_secured, "PUT conn-secd") \
EM(rxrpc_skb_put_conn_work, "PUT conn-work") \
EM(rxrpc_skb_put_error_report, "PUT error-rep") \
EM(rxrpc_skb_put_input, "PUT input ") \
EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \
+ EM(rxrpc_skb_put_oob, "PUT oob ") \
EM(rxrpc_skb_put_purge, "PUT purge ") \
+ EM(rxrpc_skb_put_purge_oob, "PUT purge-oob") \
+ EM(rxrpc_skb_put_response, "PUT response ") \
EM(rxrpc_skb_put_rotate, "PUT rotate ") \
EM(rxrpc_skb_put_unknown, "PUT unknown ") \
EM(rxrpc_skb_see_conn_work, "SEE conn-work") \
+ EM(rxrpc_skb_see_oob_challenge, "SEE oob-chall") \
EM(rxrpc_skb_see_recvmsg, "SEE recvmsg ") \
+ EM(rxrpc_skb_see_recvmsg_oob, "SEE recvm-oob") \
EM(rxrpc_skb_see_reject, "SEE reject ") \
EM(rxrpc_skb_see_rotate, "SEE rotate ") \
E_(rxrpc_skb_see_version, "SEE version ")
@@ -216,9 +259,11 @@
EM(rxrpc_conn_free, "FREE ") \
EM(rxrpc_conn_get_activate_call, "GET act-call") \
EM(rxrpc_conn_get_call_input, "GET inp-call") \
+ EM(rxrpc_conn_get_challenge_input, "GET inp-chal") \
EM(rxrpc_conn_get_conn_input, "GET inp-conn") \
EM(rxrpc_conn_get_idle, "GET idle ") \
EM(rxrpc_conn_get_poke_abort, "GET pk-abort") \
+ EM(rxrpc_conn_get_poke_response, "GET response") \
EM(rxrpc_conn_get_poke_secured, "GET secured ") \
EM(rxrpc_conn_get_poke_timer, "GET poke ") \
EM(rxrpc_conn_get_service_conn, "GET svc-conn") \
@@ -226,10 +271,12 @@
EM(rxrpc_conn_new_service, "NEW service ") \
EM(rxrpc_conn_put_call, "PUT call ") \
EM(rxrpc_conn_put_call_input, "PUT inp-call") \
+ EM(rxrpc_conn_put_challenge_input, "PUT inp-chal") \
EM(rxrpc_conn_put_conn_input, "PUT inp-conn") \
EM(rxrpc_conn_put_discard_idle, "PUT disc-idl") \
EM(rxrpc_conn_put_local_dead, "PUT loc-dead") \
EM(rxrpc_conn_put_noreuse, "PUT noreuse ") \
+ EM(rxrpc_conn_put_oob, "PUT oob ") \
EM(rxrpc_conn_put_poke, "PUT poke ") \
EM(rxrpc_conn_put_service_reaped, "PUT svc-reap") \
EM(rxrpc_conn_put_unbundle, "PUT unbundle") \
@@ -331,6 +378,7 @@
EM(rxrpc_recvmsg_full, "FULL") \
EM(rxrpc_recvmsg_hole, "HOLE") \
EM(rxrpc_recvmsg_next, "NEXT") \
+ EM(rxrpc_recvmsg_oobq, "OOBQ") \
EM(rxrpc_recvmsg_requeue, "REQU") \
EM(rxrpc_recvmsg_return, "RETN") \
EM(rxrpc_recvmsg_terminal, "TERM") \
@@ -455,8 +503,9 @@
EM(rxrpc_tx_point_call_final_resend, "CallFinalResend") \
EM(rxrpc_tx_point_conn_abort, "ConnAbort") \
EM(rxrpc_tx_point_reject, "Reject") \
+ EM(rxrpc_tx_point_rxgk_challenge, "RxGKChall") \
EM(rxrpc_tx_point_rxkad_challenge, "RxkadChall") \
- EM(rxrpc_tx_point_rxkad_response, "RxkadResp") \
+ EM(rxrpc_tx_point_response, "Response") \
EM(rxrpc_tx_point_version_keepalive, "VerKeepalive") \
E_(rxrpc_tx_point_version_reply, "VerReply")
@@ -473,6 +522,7 @@
#define rxrpc_txbuf_traces \
EM(rxrpc_txbuf_alloc_data, "ALLOC DATA ") \
+ EM(rxrpc_txbuf_alloc_response, "ALLOC RESP ") \
EM(rxrpc_txbuf_free, "FREE ") \
EM(rxrpc_txbuf_get_buffer, "GET BUFFER ") \
EM(rxrpc_txbuf_get_trans, "GET TRANS ") \
@@ -480,6 +530,7 @@
EM(rxrpc_txbuf_put_cleaned, "PUT CLEANED") \
EM(rxrpc_txbuf_put_nomem, "PUT NOMEM ") \
EM(rxrpc_txbuf_put_rotated, "PUT ROTATED") \
+ EM(rxrpc_txbuf_put_response_tx, "PUT RESP TX") \
EM(rxrpc_txbuf_put_send_aborted, "PUT SEND-X ") \
EM(rxrpc_txbuf_put_trans, "PUT TRANS ") \
EM(rxrpc_txbuf_see_lost, "SEE LOST ") \
@@ -1150,6 +1201,39 @@ TRACE_EVENT(rxrpc_rx_conn_abort,
__entry->abort_code)
);
+TRACE_EVENT(rxrpc_tx_challenge,
+ TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial,
+ u32 version, u32 nonce),
+
+ TP_ARGS(conn, serial, version, nonce),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, conn)
+ __field(rxrpc_serial_t, serial)
+ __field(u32, version)
+ __field(u32, nonce)
+ __field(u16, service_id)
+ __field(u8, security_ix)
+ ),
+
+ TP_fast_assign(
+ __entry->conn = conn->debug_id;
+ __entry->serial = serial;
+ __entry->version = version;
+ __entry->nonce = nonce;
+ __entry->service_id = conn->service_id;
+ __entry->security_ix = conn->security_ix;
+ ),
+
+ TP_printk("C=%08x CHALLENGE r=%08x sv=%u+%u v=%x n=%x",
+ __entry->conn,
+ __entry->serial,
+ __entry->service_id,
+ __entry->security_ix,
+ __entry->version,
+ __entry->nonce)
+ );
+
TRACE_EVENT(rxrpc_rx_challenge,
TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial,
u32 version, u32 nonce, u32 min_level),
@@ -1162,6 +1246,8 @@ TRACE_EVENT(rxrpc_rx_challenge,
__field(u32, version)
__field(u32, nonce)
__field(u32, min_level)
+ __field(u16, service_id)
+ __field(u8, security_ix)
),
TP_fast_assign(
@@ -1170,16 +1256,60 @@ TRACE_EVENT(rxrpc_rx_challenge,
__entry->version = version;
__entry->nonce = nonce;
__entry->min_level = min_level;
+ __entry->service_id = conn->service_id;
+ __entry->security_ix = conn->security_ix;
),
- TP_printk("C=%08x CHALLENGE %08x v=%x n=%x ml=%x",
+ TP_printk("C=%08x CHALLENGE r=%08x sv=%u+%u v=%x n=%x ml=%x",
__entry->conn,
__entry->serial,
+ __entry->service_id,
+ __entry->security_ix,
__entry->version,
__entry->nonce,
__entry->min_level)
);
+TRACE_EVENT(rxrpc_tx_response,
+ TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial,
+ struct rxrpc_skb_priv *rsp),
+
+ TP_ARGS(conn, serial, rsp),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, conn)
+ __field(rxrpc_serial_t, serial)
+ __field(rxrpc_serial_t, challenge)
+ __field(u32, version)
+ __field(u32, kvno)
+ __field(u16, ticket_len)
+ __field(u16, appdata_len)
+ __field(u16, service_id)
+ __field(u8, security_ix)
+ ),
+
+ TP_fast_assign(
+ __entry->conn = conn->debug_id;
+ __entry->serial = serial;
+ __entry->challenge = rsp->resp.challenge_serial;
+ __entry->version = rsp->resp.version;
+ __entry->kvno = rsp->resp.kvno;
+ __entry->ticket_len = rsp->resp.ticket_len;
+ __entry->service_id = conn->service_id;
+ __entry->security_ix = conn->security_ix;
+ ),
+
+ TP_printk("C=%08x RESPONSE r=%08x cr=%08x sv=%u+%u v=%x kv=%x tl=%u",
+ __entry->conn,
+ __entry->serial,
+ __entry->challenge,
+ __entry->service_id,
+ __entry->security_ix,
+ __entry->version,
+ __entry->kvno,
+ __entry->ticket_len)
+ );
+
TRACE_EVENT(rxrpc_rx_response,
TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial,
u32 version, u32 kvno, u32 ticket_len),
@@ -1192,6 +1322,7 @@ TRACE_EVENT(rxrpc_rx_response,
__field(u32, version)
__field(u32, kvno)
__field(u32, ticket_len)
+ __field(u8, security_ix)
),
TP_fast_assign(
@@ -1200,11 +1331,13 @@ TRACE_EVENT(rxrpc_rx_response,
__entry->version = version;
__entry->kvno = kvno;
__entry->ticket_len = ticket_len;
+ __entry->security_ix = conn->security_ix;
),
- TP_printk("C=%08x RESPONSE %08x v=%x kvno=%x tl=%x",
+ TP_printk("C=%08x RESPONSE r=%08x sx=%u v=%x kvno=%x tl=%x",
__entry->conn,
__entry->serial,
+ __entry->security_ix,
__entry->version,
__entry->kvno,
__entry->ticket_len)
@@ -2668,6 +2801,30 @@ TRACE_EVENT(rxrpc_rack_timer,
ktime_to_us(__entry->delay))
);
+TRACE_EVENT(rxrpc_rxgk_rekey,
+ TP_PROTO(struct rxrpc_connection *conn,
+ unsigned int current_key, unsigned int requested_key),
+
+ TP_ARGS(conn, current_key, requested_key),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, conn)
+ __field(unsigned int, current_key)
+ __field(unsigned int, requested_key)
+ ),
+
+ TP_fast_assign(
+ __entry->conn = conn->debug_id;
+ __entry->current_key = current_key;
+ __entry->requested_key = requested_key;
+ ),
+
+ TP_printk("C=%08x cur=%x req=%x",
+ __entry->conn,
+ __entry->current_key,
+ __entry->requested_key)
+ );
+
#undef EM
#undef E_
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index 3836de435d9d..b5310439536e 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -19,7 +19,6 @@
/* The protocol traced by inet_sock_set_state */
#define inet_protocol_names \
EM(IPPROTO_TCP) \
- EM(IPPROTO_DCCP) \
EM(IPPROTO_SCTP) \
EMe(IPPROTO_MPTCP)
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 5d331383047b..de214f1dea58 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -21,7 +21,6 @@ TRACE_DEFINE_ENUM(SOCK_DGRAM);
TRACE_DEFINE_ENUM(SOCK_RAW);
TRACE_DEFINE_ENUM(SOCK_RDM);
TRACE_DEFINE_ENUM(SOCK_SEQPACKET);
-TRACE_DEFINE_ENUM(SOCK_DCCP);
TRACE_DEFINE_ENUM(SOCK_PACKET);
#define show_socket_type(type) \
@@ -31,7 +30,6 @@ TRACE_DEFINE_ENUM(SOCK_PACKET);
{ SOCK_RAW, "RAW" }, \
{ SOCK_RDM, "RDM" }, \
{ SOCK_SEQPACKET, "SEQPACKET" }, \
- { SOCK_DCCP, "DCCP" }, \
{ SOCK_PACKET, "PACKET" })
/* This list is known to be incomplete, add new enums as needed. */
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 1a40c41ff8c3..75d3d53a3832 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -259,6 +259,30 @@ TRACE_EVENT(tcp_retransmit_synack,
__entry->saddr_v6, __entry->daddr_v6)
);
+TRACE_EVENT(tcp_sendmsg_locked,
+ TP_PROTO(const struct sock *sk, const struct msghdr *msg,
+ const struct sk_buff *skb, int size_goal),
+
+ TP_ARGS(sk, msg, skb, size_goal),
+
+ TP_STRUCT__entry(
+ __field(const void *, skb_addr)
+ __field(int, skb_len)
+ __field(int, msg_left)
+ __field(int, size_goal)
+ ),
+
+ TP_fast_assign(
+ __entry->skb_addr = skb;
+ __entry->skb_len = skb ? skb->len : 0;
+ __entry->msg_left = msg_data_left(msg);
+ __entry->size_goal = size_goal;
+ ),
+
+ TP_printk("skb_addr %p skb_len %d msg_left %d size_goal %d",
+ __entry->skb_addr, __entry->skb_len, __entry->msg_left,
+ __entry->size_goal));
+
DECLARE_TRACE(tcp_cwnd_reduction_tp,
TP_PROTO(const struct sock *sk, int newly_acked_sacked,
int newly_lost, int flag),
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index a5b743a2f775..73876c0e2bba 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -699,10 +699,11 @@ struct br_mdb_entry {
#define MDB_TEMPORARY 0
#define MDB_PERMANENT 1
__u8 state;
-#define MDB_FLAGS_OFFLOAD (1 << 0)
-#define MDB_FLAGS_FAST_LEAVE (1 << 1)
-#define MDB_FLAGS_STAR_EXCL (1 << 2)
-#define MDB_FLAGS_BLOCKED (1 << 3)
+#define MDB_FLAGS_OFFLOAD (1 << 0)
+#define MDB_FLAGS_FAST_LEAVE (1 << 1)
+#define MDB_FLAGS_STAR_EXCL (1 << 2)
+#define MDB_FLAGS_BLOCKED (1 << 3)
+#define MDB_FLAGS_OFFLOAD_FAILED (1 << 4)
__u8 flags;
__u16 vid;
struct {
@@ -830,6 +831,7 @@ enum br_boolopt_id {
BR_BOOLOPT_NO_LL_LEARN,
BR_BOOLOPT_MCAST_VLAN_SNOOPING,
BR_BOOLOPT_MST_ENABLE,
+ BR_BOOLOPT_MDB_OFFLOAD_FAIL_NOTIFICATION,
BR_BOOLOPT_MAX
};
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 318386cc5b0d..3ad2d5d98034 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1986,4 +1986,19 @@ enum {
#define IFLA_DSA_MAX (__IFLA_DSA_MAX - 1)
+/* OVPN section */
+
+enum ovpn_mode {
+ OVPN_MODE_P2P,
+ OVPN_MODE_MP,
+};
+
+enum {
+ IFLA_OVPN_UNSPEC,
+ IFLA_OVPN_MODE,
+ __IFLA_OVPN_MAX,
+};
+
+#define IFLA_OVPN_MAX (__IFLA_OVPN_MAX - 1)
+
#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/include/uapi/linux/ovpn.h b/include/uapi/linux/ovpn.h
new file mode 100644
index 000000000000..680d1522dc87
--- /dev/null
+++ b/include/uapi/linux/ovpn.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/ovpn.yaml */
+/* YNL-GEN uapi header */
+
+#ifndef _UAPI_LINUX_OVPN_H
+#define _UAPI_LINUX_OVPN_H
+
+#define OVPN_FAMILY_NAME "ovpn"
+#define OVPN_FAMILY_VERSION 1
+
+#define OVPN_NONCE_TAIL_SIZE 8
+
+enum ovpn_cipher_alg {
+ OVPN_CIPHER_ALG_NONE,
+ OVPN_CIPHER_ALG_AES_GCM,
+ OVPN_CIPHER_ALG_CHACHA20_POLY1305,
+};
+
+enum ovpn_del_peer_reason {
+ OVPN_DEL_PEER_REASON_TEARDOWN,
+ OVPN_DEL_PEER_REASON_USERSPACE,
+ OVPN_DEL_PEER_REASON_EXPIRED,
+ OVPN_DEL_PEER_REASON_TRANSPORT_ERROR,
+ OVPN_DEL_PEER_REASON_TRANSPORT_DISCONNECT,
+};
+
+enum ovpn_key_slot {
+ OVPN_KEY_SLOT_PRIMARY,
+ OVPN_KEY_SLOT_SECONDARY,
+};
+
+enum {
+ OVPN_A_PEER_ID = 1,
+ OVPN_A_PEER_REMOTE_IPV4,
+ OVPN_A_PEER_REMOTE_IPV6,
+ OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID,
+ OVPN_A_PEER_REMOTE_PORT,
+ OVPN_A_PEER_SOCKET,
+ OVPN_A_PEER_SOCKET_NETNSID,
+ OVPN_A_PEER_VPN_IPV4,
+ OVPN_A_PEER_VPN_IPV6,
+ OVPN_A_PEER_LOCAL_IPV4,
+ OVPN_A_PEER_LOCAL_IPV6,
+ OVPN_A_PEER_LOCAL_PORT,
+ OVPN_A_PEER_KEEPALIVE_INTERVAL,
+ OVPN_A_PEER_KEEPALIVE_TIMEOUT,
+ OVPN_A_PEER_DEL_REASON,
+ OVPN_A_PEER_VPN_RX_BYTES,
+ OVPN_A_PEER_VPN_TX_BYTES,
+ OVPN_A_PEER_VPN_RX_PACKETS,
+ OVPN_A_PEER_VPN_TX_PACKETS,
+ OVPN_A_PEER_LINK_RX_BYTES,
+ OVPN_A_PEER_LINK_TX_BYTES,
+ OVPN_A_PEER_LINK_RX_PACKETS,
+ OVPN_A_PEER_LINK_TX_PACKETS,
+
+ __OVPN_A_PEER_MAX,
+ OVPN_A_PEER_MAX = (__OVPN_A_PEER_MAX - 1)
+};
+
+enum {
+ OVPN_A_KEYCONF_PEER_ID = 1,
+ OVPN_A_KEYCONF_SLOT,
+ OVPN_A_KEYCONF_KEY_ID,
+ OVPN_A_KEYCONF_CIPHER_ALG,
+ OVPN_A_KEYCONF_ENCRYPT_DIR,
+ OVPN_A_KEYCONF_DECRYPT_DIR,
+
+ __OVPN_A_KEYCONF_MAX,
+ OVPN_A_KEYCONF_MAX = (__OVPN_A_KEYCONF_MAX - 1)
+};
+
+enum {
+ OVPN_A_KEYDIR_CIPHER_KEY = 1,
+ OVPN_A_KEYDIR_NONCE_TAIL,
+
+ __OVPN_A_KEYDIR_MAX,
+ OVPN_A_KEYDIR_MAX = (__OVPN_A_KEYDIR_MAX - 1)
+};
+
+enum {
+ OVPN_A_IFINDEX = 1,
+ OVPN_A_PEER,
+ OVPN_A_KEYCONF,
+
+ __OVPN_A_MAX,
+ OVPN_A_MAX = (__OVPN_A_MAX - 1)
+};
+
+enum {
+ OVPN_CMD_PEER_NEW = 1,
+ OVPN_CMD_PEER_SET,
+ OVPN_CMD_PEER_GET,
+ OVPN_CMD_PEER_DEL,
+ OVPN_CMD_PEER_DEL_NTF,
+ OVPN_CMD_KEY_NEW,
+ OVPN_CMD_KEY_GET,
+ OVPN_CMD_KEY_SWAP,
+ OVPN_CMD_KEY_SWAP_NTF,
+ OVPN_CMD_KEY_DEL,
+
+ __OVPN_CMD_MAX,
+ OVPN_CMD_MAX = (__OVPN_CMD_MAX - 1)
+};
+
+#define OVPN_MCGRP_PEERS "peers"
+
+#endif /* _UAPI_LINUX_OVPN_H */
diff --git a/include/uapi/linux/rxrpc.h b/include/uapi/linux/rxrpc.h
index 8f8dc7a937a4..d9735abd4c79 100644
--- a/include/uapi/linux/rxrpc.h
+++ b/include/uapi/linux/rxrpc.h
@@ -36,26 +36,33 @@ struct sockaddr_rxrpc {
#define RXRPC_MIN_SECURITY_LEVEL 4 /* minimum security level */
#define RXRPC_UPGRADEABLE_SERVICE 5 /* Upgrade service[0] -> service[1] */
#define RXRPC_SUPPORTED_CMSG 6 /* Get highest supported control message type */
+#define RXRPC_MANAGE_RESPONSE 7 /* [clnt] Want to manage RESPONSE packets */
/*
* RxRPC control messages
* - If neither abort or accept are specified, the message is a data message.
* - terminal messages mean that a user call ID tag can be recycled
+ * - C/S/- indicate whether these are applicable to client, server or both
* - s/r/- indicate whether these are applicable to sendmsg() and/or recvmsg()
*/
enum rxrpc_cmsg_type {
- RXRPC_USER_CALL_ID = 1, /* sr: user call ID specifier */
- RXRPC_ABORT = 2, /* sr: abort request / notification [terminal] */
- RXRPC_ACK = 3, /* -r: [Service] RPC op final ACK received [terminal] */
- RXRPC_NET_ERROR = 5, /* -r: network error received [terminal] */
- RXRPC_BUSY = 6, /* -r: server busy received [terminal] */
- RXRPC_LOCAL_ERROR = 7, /* -r: local error generated [terminal] */
- RXRPC_NEW_CALL = 8, /* -r: [Service] new incoming call notification */
- RXRPC_EXCLUSIVE_CALL = 10, /* s-: Call should be on exclusive connection */
- RXRPC_UPGRADE_SERVICE = 11, /* s-: Request service upgrade for client call */
- RXRPC_TX_LENGTH = 12, /* s-: Total length of Tx data */
- RXRPC_SET_CALL_TIMEOUT = 13, /* s-: Set one or more call timeouts */
- RXRPC_CHARGE_ACCEPT = 14, /* s-: Charge the accept pool with a user call ID */
+ RXRPC_USER_CALL_ID = 1, /* -sr: User call ID specifier */
+ RXRPC_ABORT = 2, /* -sr: Abort request / notification [terminal] */
+ RXRPC_ACK = 3, /* S-r: RPC op final ACK received [terminal] */
+ RXRPC_NET_ERROR = 5, /* --r: Network error received [terminal] */
+ RXRPC_BUSY = 6, /* C-r: Server busy received [terminal] */
+ RXRPC_LOCAL_ERROR = 7, /* --r: Local error generated [terminal] */
+ RXRPC_NEW_CALL = 8, /* S-r: New incoming call notification */
+ RXRPC_EXCLUSIVE_CALL = 10, /* Cs-: Call should be on exclusive connection */
+ RXRPC_UPGRADE_SERVICE = 11, /* Cs-: Request service upgrade for client call */
+ RXRPC_TX_LENGTH = 12, /* -s-: Total length of Tx data */
+ RXRPC_SET_CALL_TIMEOUT = 13, /* -s-: Set one or more call timeouts */
+ RXRPC_CHARGE_ACCEPT = 14, /* Ss-: Charge the accept pool with a user call ID */
+ RXRPC_OOB_ID = 15, /* -sr: OOB message ID */
+ RXRPC_CHALLENGED = 16, /* C-r: Info on a received CHALLENGE */
+ RXRPC_RESPOND = 17, /* Cs-: Respond to a challenge */
+ RXRPC_RESPONDED = 18, /* S-r: Data received in RESPONSE */
+ RXRPC_RESP_RXGK_APPDATA = 19, /* Cs-: RESPONSE: RxGK app data to include */
RXRPC__SUPPORTED
};
@@ -73,6 +80,7 @@ enum rxrpc_cmsg_type {
#define RXRPC_SECURITY_RXKAD 2 /* kaserver or kerberos 4 */
#define RXRPC_SECURITY_RXGK 4 /* gssapi-based */
#define RXRPC_SECURITY_RXK5 5 /* kerberos 5 */
+#define RXRPC_SECURITY_YFS_RXGK 6 /* YFS gssapi-based */
/*
* RxRPC-level abort codes
@@ -118,4 +126,49 @@ enum rxrpc_cmsg_type {
#define RXKADDATALEN 19270411 /* user data too long */
#define RXKADILLEGALLEVEL 19270412 /* caller not authorised to use encrypted conns */
+/*
+ * RxGK GSSAPI security abort codes.
+ */
+#if 0 /* Original standard abort codes (used by OpenAFS) */
+#define RXGK_INCONSISTENCY 1233242880 /* Security module structure inconsistent */
+#define RXGK_PACKETSHORT 1233242881 /* Packet too short for security challenge */
+#define RXGK_BADCHALLENGE 1233242882 /* Invalid security challenge */
+#define RXGK_BADETYPE 1233242883 /* Invalid or impermissible encryption type */
+#define RXGK_BADLEVEL 1233242884 /* Invalid or impermissible security level */
+#define RXGK_BADKEYNO 1233242885 /* Key version number not found */
+#define RXGK_EXPIRED 1233242886 /* Token has expired */
+#define RXGK_NOTAUTH 1233242887 /* Caller not authorized */
+#define RXGK_BAD_TOKEN 1233242888 /* Security object was passed a bad token */
+#define RXGK_SEALED_INCON 1233242889 /* Sealed data inconsistent */
+#define RXGK_DATA_LEN 1233242890 /* User data too long */
+#define RXGK_BAD_QOP 1233242891 /* Inadequate quality of protection available */
+#else /* Revised standard abort codes (used by YFS) */
+#define RXGK_INCONSISTENCY 1233242880 /* Security module structure inconsistent */
+#define RXGK_PACKETSHORT 1233242881 /* Packet too short for security challenge */
+#define RXGK_BADCHALLENGE 1233242882 /* Security challenge/response failed */
+#define RXGK_SEALEDINCON 1233242883 /* Sealed data is inconsistent */
+#define RXGK_NOTAUTH 1233242884 /* Caller not authorised */
+#define RXGK_EXPIRED 1233242885 /* Authentication expired */
+#define RXGK_BADLEVEL 1233242886 /* Unsupported or not permitted security level */
+#define RXGK_BADKEYNO 1233242887 /* Bad transport key number */
+#define RXGK_NOTRXGK 1233242888 /* Security layer is not rxgk */
+#define RXGK_UNSUPPORTED 1233242889 /* Endpoint does not support rxgk */
+#define RXGK_GSSERROR 1233242890 /* GSSAPI mechanism error */
+#endif
+
+/*
+ * Challenge information in the RXRPC_CHALLENGED control message.
+ */
+struct rxrpc_challenge {
+ __u16 service_id; /* The service ID of the connection (may be upgraded) */
+ __u8 security_index; /* The security index of the connection */
+ __u8 pad; /* Round out to a multiple of 4 bytes. */
+ /* ... The security class gets to append extra information ... */
+};
+
+struct rxgk_challenge {
+ struct rxrpc_challenge base;
+ __u32 enctype; /* Krb5 encoding type */
+};
+
#endif /* _UAPI_LINUX_RXRPC_H */
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index ec47f9b68a1b..1d234d7e1892 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -188,6 +188,7 @@ enum
LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */
LINUX_MIB_TSECRREJECTED, /* TSEcrRejected */
LINUX_MIB_PAWS_OLD_ACK, /* PAWSOldAck */
+ LINUX_MIB_PAWS_TW_REJECTED, /* PAWSTimewait */
LINUX_MIB_DELAYEDACKS, /* DelayedACKs */
LINUX_MIB_DELAYEDACKLOCKED, /* DelayedACKLocked */
LINUX_MIB_DELAYEDACKLOST, /* DelayedACKLost */
diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index d85d671deed3..edca3e430305 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -43,5 +43,6 @@ struct udphdr {
#define UDP_ENCAP_GTP1U 5 /* 3GPP TS 29.060 */
#define UDP_ENCAP_RXRPC 6
#define TCP_ENCAP_ESPINTCP 7 /* Yikes, this is really xfrm encap types. */
+#define UDP_ENCAP_OVPNINUDP 8 /* OpenVPN traffic */
#endif /* _UAPI_LINUX_UDP_H */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 16ba36f34dfa..24a26b4bb0b8 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6541,6 +6541,7 @@ static const struct bpf_raw_tp_null_args raw_tp_null_args[] = {
{ "xprt_put_cong", 0x10 },
/* tcp */
{ "tcp_send_reset", 0x11 },
+ { "tcp_sendmsg_locked", 0x100 },
/* tegra_apb_dma */
{ "tegra_dma_tx_status", 0x100 },
/* timer_migration */
diff --git a/kernel/configs/debug.config b/kernel/configs/debug.config
index 8aafd050b754..e81327d2cd63 100644
--- a/kernel/configs/debug.config
+++ b/kernel/configs/debug.config
@@ -112,3 +112,8 @@ CONFIG_BRANCH_PROFILE_NONE=y
CONFIG_DYNAMIC_FTRACE=y
CONFIG_FTRACE=y
CONFIG_FUNCTION_TRACER=y
+#
+# Preemption
+#
+CONFIG_DEBUG_PREEMPT=y
+CONFIG_PREEMPT=y
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1715e34b91af..244111c88183 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -897,9 +897,7 @@ static inline bool page_expected_state(struct page *page,
#ifdef CONFIG_MEMCG
page->memcg_data |
#endif
-#ifdef CONFIG_PAGE_POOL
- ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) |
-#endif
+ page_pool_page_is_pp(page) |
(page->flags & check_flags)))
return false;
@@ -926,10 +924,8 @@ static const char *page_bad_reason(struct page *page, unsigned long flags)
if (unlikely(page->memcg_data))
bad_reason = "page still charged to cgroup";
#endif
-#ifdef CONFIG_PAGE_POOL
- if (unlikely((page->pp_magic & ~0x3UL) == PP_SIGNATURE))
+ if (unlikely(page_pool_page_is_pp(page)))
bad_reason = "page_pool leak";
-#endif
return bad_reason;
}
diff --git a/net/Kconfig b/net/Kconfig
index c3fca69a7c83..202cc595e5e6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -245,7 +245,6 @@ source "net/bridge/netfilter/Kconfig"
endif
-source "net/dccp/Kconfig"
source "net/sctp/Kconfig"
source "net/rds/Kconfig"
source "net/tipc/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 60ed5190eda8..aac960c41db6 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -42,7 +42,6 @@ obj-$(CONFIG_PHONET) += phonet/
ifneq ($(CONFIG_VLAN_8021Q),)
obj-y += 8021q/
endif
-obj-$(CONFIG_IP_DCCP) += dccp/
obj-$(CONFIG_IP_SCTP) += sctp/
obj-$(CONFIG_RDS) += rds/
obj-$(CONFIG_WIRELESS) += wireless/
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 183fcb362f9e..0adeafe11a36 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -284,6 +284,9 @@ int br_boolopt_toggle(struct net_bridge *br, enum br_boolopt_id opt, bool on,
case BR_BOOLOPT_MST_ENABLE:
err = br_mst_set_enabled(br, on, extack);
break;
+ case BR_BOOLOPT_MDB_OFFLOAD_FAIL_NOTIFICATION:
+ br_opt_toggle(br, BROPT_MDB_OFFLOAD_FAIL_NOTIFICATION, on);
+ break;
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
@@ -302,6 +305,8 @@ int br_boolopt_get(const struct net_bridge *br, enum br_boolopt_id opt)
return br_opt_get(br, BROPT_MCAST_VLAN_SNOOPING_ENABLED);
case BR_BOOLOPT_MST_ENABLE:
return br_opt_get(br, BROPT_MST_ENABLED);
+ case BR_BOOLOPT_MDB_OFFLOAD_FAIL_NOTIFICATION:
+ return br_opt_get(br, BROPT_MDB_OFFLOAD_FAIL_NOTIFICATION);
default:
/* shouldn't be called with unsupported options */
WARN_ON(1);
@@ -363,21 +368,20 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
clear_bit(opt, &br->options);
}
-static void __net_exit br_net_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
+static void __net_exit br_net_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
{
struct net_device *dev;
- struct net *net;
- ASSERT_RTNL();
- list_for_each_entry(net, net_list, exit_list)
- for_each_netdev(net, dev)
- if (netif_is_bridge_master(dev))
- br_dev_delete(dev, dev_to_kill);
+ ASSERT_RTNL_NET(net);
+
+ for_each_netdev(net, dev)
+ if (netif_is_bridge_master(dev))
+ br_dev_delete(dev, dev_to_kill);
}
static struct pernet_operations br_net_ops = {
- .exit_batch_rtnl = br_net_exit_batch_rtnl,
+ .exit_rtnl = br_net_exit_rtnl,
};
static const struct stp_proto br_stp_proto = {
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index 115a23054a58..1e2b51769eec 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -160,6 +160,9 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
if (br_opt_get(br, BROPT_NEIGH_SUPPRESS_ENABLED)) {
if (br_is_neigh_suppress_enabled(p, vid))
return;
+ if (is_unicast_ether_addr(eth_hdr(skb)->h_dest) &&
+ parp->ar_op == htons(ARPOP_REQUEST))
+ return;
if (parp->ar_op != htons(ARPOP_RREQUEST) &&
parp->ar_op != htons(ARPOP_RREPLY) &&
(ipv4_is_zeronet(sip) || sip == tip)) {
@@ -410,6 +413,10 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
if (br_is_neigh_suppress_enabled(p, vid))
return;
+ if (is_unicast_ether_addr(eth_hdr(skb)->h_dest) &&
+ msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
+ return;
+
if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT &&
!msg->icmph.icmp6_solicited) {
/* prevent flooding to neigh suppress ports */
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 232133a0fd21..5f6ac9bf1527 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -189,7 +189,8 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) &&
br_multicast_querier_exists(brmctx, eth_hdr(skb), mdst)) {
if ((mdst && mdst->host_joined) ||
- br_multicast_is_router(brmctx, skb)) {
+ br_multicast_is_router(brmctx, skb) ||
+ br->dev->flags & IFF_ALLMULTI) {
local_rcv = true;
DEV_STATS_INC(br->dev, multicast);
}
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 722203b98ff7..400eb872b403 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -144,6 +144,8 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
e->flags |= MDB_FLAGS_STAR_EXCL;
if (flags & MDB_PG_FLAGS_BLOCKED)
e->flags |= MDB_FLAGS_BLOCKED;
+ if (flags & MDB_PG_FLAGS_OFFLOAD_FAILED)
+ e->flags |= MDB_FLAGS_OFFLOAD_FAILED;
}
static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip,
@@ -517,16 +519,17 @@ static size_t rtnl_mdb_nlmsg_size(const struct net_bridge_port_group *pg)
rtnl_mdb_nlmsg_pg_size(pg);
}
-void br_mdb_notify(struct net_device *dev,
- struct net_bridge_mdb_entry *mp,
- struct net_bridge_port_group *pg,
- int type)
+static void __br_mdb_notify(struct net_device *dev,
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ int type, bool notify_switchdev)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
int err = -ENOBUFS;
- br_switchdev_mdb_notify(dev, mp, pg, type);
+ if (notify_switchdev)
+ br_switchdev_mdb_notify(dev, mp, pg, type);
skb = nlmsg_new(rtnl_mdb_nlmsg_size(pg), GFP_ATOMIC);
if (!skb)
@@ -544,6 +547,21 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_MDB, err);
}
+void br_mdb_notify(struct net_device *dev,
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg,
+ int type)
+{
+ __br_mdb_notify(dev, mp, pg, type, true);
+}
+
+void br_mdb_flag_change_notify(struct net_device *dev,
+ struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg)
+{
+ __br_mdb_notify(dev, mp, pg, RTM_NEWMDB, false);
+}
+
static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
struct net_device *dev,
int ifindex, u16 vid, u32 pid,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d5b3c5936a79..71f351a6ce1b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -306,11 +306,12 @@ struct net_bridge_fdb_flush_desc {
u16 vlan_id;
};
-#define MDB_PG_FLAGS_PERMANENT BIT(0)
-#define MDB_PG_FLAGS_OFFLOAD BIT(1)
-#define MDB_PG_FLAGS_FAST_LEAVE BIT(2)
-#define MDB_PG_FLAGS_STAR_EXCL BIT(3)
-#define MDB_PG_FLAGS_BLOCKED BIT(4)
+#define MDB_PG_FLAGS_PERMANENT BIT(0)
+#define MDB_PG_FLAGS_OFFLOAD BIT(1)
+#define MDB_PG_FLAGS_FAST_LEAVE BIT(2)
+#define MDB_PG_FLAGS_STAR_EXCL BIT(3)
+#define MDB_PG_FLAGS_BLOCKED BIT(4)
+#define MDB_PG_FLAGS_OFFLOAD_FAILED BIT(5)
#define PG_SRC_ENT_LIMIT 32
@@ -483,6 +484,7 @@ enum net_bridge_opts {
BROPT_VLAN_BRIDGE_BINDING,
BROPT_MCAST_VLAN_SNOOPING_ENABLED,
BROPT_MST_ENABLED,
+ BROPT_MDB_OFFLOAD_FAIL_NOTIFICATION,
};
struct net_bridge {
@@ -1002,6 +1004,8 @@ int br_mdb_hash_init(struct net_bridge *br);
void br_mdb_hash_fini(struct net_bridge *br);
void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
struct net_bridge_port_group *pg, int type);
+void br_mdb_flag_change_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
+ struct net_bridge_port_group *pg);
void br_rtr_notify(struct net_device *dev, struct net_bridge_mcast_port *pmctx,
int type);
void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
@@ -1342,6 +1346,22 @@ br_multicast_ctx_matches_vlan_snooping(const struct net_bridge_mcast *brmctx)
return !!(vlan_snooping_enabled == br_multicast_ctx_is_vlan(brmctx));
}
+
+static inline void
+br_multicast_set_pg_offload_flags(struct net_bridge_port_group *p,
+ bool offloaded)
+{
+ p->flags &= ~(MDB_PG_FLAGS_OFFLOAD | MDB_PG_FLAGS_OFFLOAD_FAILED);
+ p->flags |= (offloaded ? MDB_PG_FLAGS_OFFLOAD :
+ MDB_PG_FLAGS_OFFLOAD_FAILED);
+}
+
+static inline bool
+br_mdb_should_notify(const struct net_bridge *br, u8 changed_flags)
+{
+ return br_opt_get(br, BROPT_MDB_OFFLOAD_FAIL_NOTIFICATION) &&
+ (changed_flags & MDB_PG_FLAGS_OFFLOAD_FAILED);
+}
#else
static inline int br_multicast_rcv(struct net_bridge_mcast **brmctx,
struct net_bridge_mcast_port **pmctx,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 7b41ee8740cb..95d7355a0407 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -504,9 +504,10 @@ static void br_switchdev_mdb_complete(struct net_device *dev, int err, void *pri
struct net_bridge_mdb_entry *mp;
struct net_bridge_port *port = data->port;
struct net_bridge *br = port->br;
+ u8 old_flags;
- if (err)
- goto err;
+ if (err == -EOPNOTSUPP)
+ goto out_free;
spin_lock_bh(&br->multicast_lock);
mp = br_mdb_ip_get(br, &data->ip);
@@ -516,11 +517,15 @@ static void br_switchdev_mdb_complete(struct net_device *dev, int err, void *pri
pp = &p->next) {
if (p->key.port != port)
continue;
- p->flags |= MDB_PG_FLAGS_OFFLOAD;
+
+ old_flags = p->flags;
+ br_multicast_set_pg_offload_flags(p, !err);
+ if (br_mdb_should_notify(br, old_flags ^ p->flags))
+ br_mdb_flag_change_notify(br->dev, mp, p);
}
out:
spin_unlock_bh(&br->multicast_lock);
-err:
+out_free:
kfree(priv);
}
diff --git a/net/core/datagram.c b/net/core/datagram.c
index f0693707aece..f0634f0cb834 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -163,8 +163,7 @@ done:
return skb;
}
-struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
- struct sk_buff_head *queue,
+struct sk_buff *__skb_try_recv_from_queue(struct sk_buff_head *queue,
unsigned int flags,
int *off, int *err,
struct sk_buff **last)
@@ -261,7 +260,7 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk,
* However, this function was correct in any case. 8)
*/
spin_lock_irqsave(&queue->lock, cpu_flags);
- skb = __skb_try_recv_from_queue(sk, queue, flags, off, &error,
+ skb = __skb_try_recv_from_queue(queue, flags, off, &error,
last);
spin_unlock_irqrestore(&queue->lock, cpu_flags);
if (error)
diff --git a/net/core/dev.c b/net/core/dev.c
index 1be7cb73a602..d1a8cad0c99c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -828,7 +828,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id)
dev_hold(dev);
rcu_read_unlock();
- dev = __netdev_put_lock(dev);
+ dev = __netdev_put_lock(dev, net);
if (!dev)
return NULL;
@@ -1039,10 +1039,11 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
* This helper is intended for locking net_device after it has been looked up
* using a lockless lookup helper. Lock prevents the instance from going away.
*/
-struct net_device *__netdev_put_lock(struct net_device *dev)
+struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net)
{
netdev_lock(dev);
- if (dev->reg_state > NETREG_REGISTERED) {
+ if (dev->reg_state > NETREG_REGISTERED ||
+ dev->moving_ns || !net_eq(dev_net(dev), net)) {
netdev_unlock(dev);
dev_put(dev);
return NULL;
@@ -1051,6 +1052,20 @@ struct net_device *__netdev_put_lock(struct net_device *dev)
return dev;
}
+static struct net_device *
+__netdev_put_lock_ops_compat(struct net_device *dev, struct net *net)
+{
+ netdev_lock_ops_compat(dev);
+ if (dev->reg_state > NETREG_REGISTERED ||
+ dev->moving_ns || !net_eq(dev_net(dev), net)) {
+ netdev_unlock_ops_compat(dev);
+ dev_put(dev);
+ return NULL;
+ }
+ dev_put(dev);
+ return dev;
+}
+
/**
* netdev_get_by_index_lock() - find a device by its ifindex
* @net: the applicable net namespace
@@ -1070,7 +1085,19 @@ struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex)
if (!dev)
return NULL;
- return __netdev_put_lock(dev);
+ return __netdev_put_lock(dev, net);
+}
+
+struct net_device *
+netdev_get_by_index_lock_ops_compat(struct net *net, int ifindex)
+{
+ struct net_device *dev;
+
+ dev = dev_get_by_index(net, ifindex);
+ if (!dev)
+ return NULL;
+
+ return __netdev_put_lock_ops_compat(dev, net);
}
struct net_device *
@@ -1090,7 +1117,32 @@ netdev_xa_find_lock(struct net *net, struct net_device *dev,
dev_hold(dev);
rcu_read_unlock();
- dev = __netdev_put_lock(dev);
+ dev = __netdev_put_lock(dev, net);
+ if (dev)
+ return dev;
+
+ (*index)++;
+ } while (true);
+}
+
+struct net_device *
+netdev_xa_find_lock_ops_compat(struct net *net, struct net_device *dev,
+ unsigned long *index)
+{
+ if (dev)
+ netdev_unlock_ops_compat(dev);
+
+ do {
+ rcu_read_lock();
+ dev = xa_find(&net->dev_by_index, index, ULONG_MAX, XA_PRESENT);
+ if (!dev) {
+ rcu_read_unlock();
+ return NULL;
+ }
+ dev_hold(dev);
+ rcu_read_unlock();
+
+ dev = __netdev_put_lock_ops_compat(dev, net);
if (dev)
return dev;
@@ -4946,7 +4998,8 @@ static void rps_trigger_softirq(void *data)
struct softnet_data *sd = data;
____napi_schedule(sd, &sd->backlog);
- sd->received_rps++;
+ /* Pairs with READ_ONCE() in softnet_seq_show() */
+ WRITE_ONCE(sd->received_rps, sd->received_rps + 1);
}
#endif /* CONFIG_RPS */
@@ -5031,7 +5084,7 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
rcu_read_lock();
fl = rcu_dereference(sd->flow_limit);
if (fl) {
- new_flow = skb_get_hash(skb) & (fl->num_buckets - 1);
+ new_flow = hash_32(skb_get_hash(skb), fl->log_buckets);
old_flow = fl->history[fl->history_head];
fl->history[fl->history_head] = new_flow;
@@ -5042,7 +5095,8 @@ static bool skb_flow_limit(struct sk_buff *skb, unsigned int qlen)
fl->buckets[old_flow]--;
if (++fl->buckets[new_flow] > (FLOW_LIMIT_HISTORY >> 1)) {
- fl->count++;
+ /* Pairs with READ_ONCE() in softnet_seq_show() */
+ WRITE_ONCE(fl->count, fl->count + 1);
rcu_read_unlock();
return true;
}
@@ -7515,7 +7569,8 @@ start:
*/
if (unlikely(budget <= 0 ||
time_after_eq(jiffies, time_limit))) {
- sd->time_squeeze++;
+ /* Pairs with READ_ONCE() in softnet_seq_show() */
+ WRITE_ONCE(sd->time_squeeze, sd->time_squeeze + 1);
break;
}
}
@@ -11057,8 +11112,7 @@ int register_netdevice(struct net_device *dev)
* Prevent userspace races by waiting until the network
* device is fully setup before sending notifications.
*/
- if (!dev->rtnl_link_ops ||
- dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ if (!(dev->rtnl_link_ops && dev->rtnl_link_initializing))
rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL);
out:
@@ -11981,8 +12035,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
*/
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
- if (!dev->rtnl_link_ops ||
- dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+ if (!(dev->rtnl_link_ops && dev->rtnl_link_initializing))
skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
GFP_KERNEL, NULL, 0,
portid, nlh);
@@ -12156,7 +12209,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
netif_close(dev);
/* And unlink it from device chain */
unlist_netdevice(dev);
- netdev_unlock_ops(dev);
+
+ if (!netdev_need_ops_lock(dev))
+ netdev_lock(dev);
+ dev->moving_ns = true;
+ netdev_unlock(dev);
synchronize_net();
@@ -12192,7 +12249,9 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
move_netdevice_notifiers_dev_net(dev, net);
/* Actually switch the network namespace */
+ netdev_lock(dev);
dev_net_set(dev, net);
+ netdev_unlock(dev);
dev->ifindex = new_ifindex;
if (new_name[0]) {
@@ -12218,7 +12277,11 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
err = netdev_change_owner(dev, net_old, net);
WARN_ON(err);
- netdev_lock_ops(dev);
+ netdev_lock(dev);
+ dev->moving_ns = false;
+ if (!netdev_need_ops_lock(dev))
+ netdev_unlock(dev);
+
/* Add the device back in the hashes */
list_netdevice(dev);
/* Notify protocols, that a new device appeared. */
diff --git a/net/core/dev.h b/net/core/dev.h
index 7ee203395d8e..e93f36b7ddf3 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -15,8 +15,9 @@ struct cpumask;
/* Random bits of netdevice that don't need to be exposed */
#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */
struct sd_flow_limit {
- u64 count;
- unsigned int num_buckets;
+ struct rcu_head rcu;
+ unsigned int count;
+ u8 log_buckets;
unsigned int history_head;
u16 history[FLOW_LIMIT_HISTORY];
u8 buckets[];
@@ -29,7 +30,7 @@ netdev_napi_by_id_lock(struct net *net, unsigned int napi_id);
struct net_device *dev_get_by_napi_id(unsigned int napi_id);
struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex);
-struct net_device *__netdev_put_lock(struct net_device *dev);
+struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net);
struct net_device *
netdev_xa_find_lock(struct net *net, struct net_device *dev,
unsigned long *index);
@@ -41,6 +42,21 @@ DEFINE_FREE(netdev_unlock, struct net_device *, if (_T) netdev_unlock(_T));
(var_name = netdev_xa_find_lock(net, var_name, &ifindex)); \
ifindex++)
+struct net_device *
+netdev_get_by_index_lock_ops_compat(struct net *net, int ifindex);
+struct net_device *
+netdev_xa_find_lock_ops_compat(struct net *net, struct net_device *dev,
+ unsigned long *index);
+
+DEFINE_FREE(netdev_unlock_ops_compat, struct net_device *,
+ if (_T) netdev_unlock_ops_compat(_T));
+
+#define for_each_netdev_lock_ops_compat_scoped(net, var_name, ifindex) \
+ for (struct net_device *var_name __free(netdev_unlock_ops_compat) = NULL; \
+ (var_name = netdev_xa_find_lock_ops_compat(net, var_name, \
+ &ifindex)); \
+ ifindex++)
+
#ifdef CONFIG_PROC_FS
int __init dev_proc_init(void);
#else
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 7af302080a66..8ca634964e36 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -874,13 +874,14 @@ int fib_newrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack, bool rtnl_held)
{
struct fib_rule *rule = NULL, *r, *last = NULL;
- struct fib_rule_hdr *frh = nlmsg_data(nlh);
int err = -EINVAL, unresolved = 0;
struct fib_rules_ops *ops = NULL;
struct nlattr *tb[FRA_MAX + 1];
bool user_priority = false;
+ struct fib_rule_hdr *frh;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
+ frh = nlmsg_payload(nlh, sizeof(*frh));
+ if (!frh) {
NL_SET_ERR_MSG(extack, "Invalid msg length");
goto errout;
}
@@ -1002,13 +1003,14 @@ int fib_delrule(struct net *net, struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack, bool rtnl_held)
{
struct fib_rule *rule = NULL, *nlrule = NULL;
- struct fib_rule_hdr *frh = nlmsg_data(nlh);
struct fib_rules_ops *ops = NULL;
struct nlattr *tb[FRA_MAX+1];
bool user_priority = false;
+ struct fib_rule_hdr *frh;
int err = -EINVAL;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
+ frh = nlmsg_payload(nlh, sizeof(*frh));
+ if (!frh) {
NL_SET_ERR_MSG(extack, "Invalid msg length");
goto errout;
}
@@ -1260,12 +1262,12 @@ static int fib_valid_dumprule_req(const struct nlmsghdr *nlh,
{
struct fib_rule_hdr *frh;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
+ frh = nlmsg_payload(nlh, sizeof(*frh));
+ if (!frh) {
NL_SET_ERR_MSG(extack, "Invalid header for fib rule dump request");
return -EINVAL;
}
- frh = nlmsg_data(nlh);
if (frh->dst_len || frh->src_len || frh->tos || frh->table ||
frh->res1 || frh->res2 || frh->action || frh->flags) {
NL_SET_ERR_MSG(extack,
diff --git a/net/core/lock_debug.c b/net/core/lock_debug.c
index 941e26c1343d..6fade574bc2a 100644
--- a/net/core/lock_debug.c
+++ b/net/core/lock_debug.c
@@ -18,9 +18,10 @@ int netdev_debug_event(struct notifier_block *nb, unsigned long event,
/* Keep enum and don't add default to trigger -Werror=switch */
switch (cmd) {
+ case NETDEV_CHANGE:
case NETDEV_REGISTER:
case NETDEV_UP:
- case NETDEV_CHANGE:
+ case NETDEV_XDP_FEAT_CHANGE:
netdev_ops_assert_locked(dev);
fallthrough;
case NETDEV_DOWN:
@@ -58,7 +59,6 @@ int netdev_debug_event(struct notifier_block *nb, unsigned long event,
case NETDEV_OFFLOAD_XSTATS_DISABLE:
case NETDEV_OFFLOAD_XSTATS_REPORT_USED:
case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA:
- case NETDEV_XDP_FEAT_CHANGE:
ASSERT_RTNL();
break;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a07249b59ae1..65cf582b5dac 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2430,12 +2430,12 @@ static int neightbl_valid_dump_info(const struct nlmsghdr *nlh,
{
struct ndtmsg *ndtm;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) {
+ ndtm = nlmsg_payload(nlh, sizeof(*ndtm));
+ if (!ndtm) {
NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request");
return -EINVAL;
}
- ndtm = nlmsg_data(nlh);
if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) {
NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request");
return -EINVAL;
@@ -2855,12 +2855,12 @@ static int neigh_valid_get_req(const struct nlmsghdr *nlh,
struct ndmsg *ndm;
int err, i;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+ ndm = nlmsg_payload(nlh, sizeof(*ndm));
+ if (!ndm) {
NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request");
return -EINVAL;
}
- ndm = nlmsg_data(nlh);
if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
ndm->ndm_type) {
NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request");
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 3e92bf0f9060..4f0f0709a1cb 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -132,8 +132,9 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
rcu_read_lock();
fl = rcu_dereference(sd->flow_limit);
+ /* Pairs with WRITE_ONCE() in skb_flow_limit() */
if (fl)
- flow_limit_count = fl->count;
+ flow_limit_count = READ_ONCE(fl->count);
rcu_read_unlock();
#endif
@@ -144,11 +145,11 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
seq_printf(seq,
"%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x "
"%08x %08x\n",
- sd->processed, atomic_read(&sd->dropped),
- sd->time_squeeze, 0,
+ READ_ONCE(sd->processed), atomic_read(&sd->dropped),
+ READ_ONCE(sd->time_squeeze), 0,
0, 0, 0, 0, /* was fastroute */
0, /* was cpu_collision */
- sd->received_rps, flow_limit_count,
+ READ_ONCE(sd->received_rps), flow_limit_count,
input_qlen + process_qlen, (int)seq->index,
input_qlen, process_qlen);
return 0;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index b0dfdf791ece..0a2b24af4028 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -163,16 +163,45 @@ static void ops_pre_exit_list(const struct pernet_operations *ops,
}
}
+static void ops_exit_rtnl_list(const struct list_head *ops_list,
+ const struct pernet_operations *ops,
+ struct list_head *net_exit_list)
+{
+ const struct pernet_operations *saved_ops = ops;
+ LIST_HEAD(dev_kill_list);
+ struct net *net;
+
+ rtnl_lock();
+
+ list_for_each_entry(net, net_exit_list, exit_list) {
+ __rtnl_net_lock(net);
+
+ ops = saved_ops;
+ list_for_each_entry_continue_reverse(ops, ops_list, list) {
+ if (ops->exit_rtnl)
+ ops->exit_rtnl(net, &dev_kill_list);
+ }
+
+ __rtnl_net_unlock(net);
+ }
+
+ unregister_netdevice_many(&dev_kill_list);
+
+ rtnl_unlock();
+}
+
static void ops_exit_list(const struct pernet_operations *ops,
struct list_head *net_exit_list)
{
- struct net *net;
if (ops->exit) {
+ struct net *net;
+
list_for_each_entry(net, net_exit_list, exit_list) {
ops->exit(net);
cond_resched();
}
}
+
if (ops->exit_batch)
ops->exit_batch(net_exit_list);
}
@@ -188,6 +217,54 @@ static void ops_free_list(const struct pernet_operations *ops,
}
}
+static void ops_undo_list(const struct list_head *ops_list,
+ const struct pernet_operations *ops,
+ struct list_head *net_exit_list,
+ bool expedite_rcu, bool hold_rtnl)
+{
+ const struct pernet_operations *saved_ops;
+
+ if (!ops)
+ ops = list_entry(ops_list, typeof(*ops), list);
+
+ saved_ops = ops;
+
+ list_for_each_entry_continue_reverse(ops, ops_list, list)
+ ops_pre_exit_list(ops, net_exit_list);
+
+ /* Another CPU might be rcu-iterating the list, wait for it.
+ * This needs to be before calling the exit() notifiers, so the
+ * rcu_barrier() after ops_undo_list() isn't sufficient alone.
+ * Also the pre_exit() and exit() methods need this barrier.
+ */
+ if (expedite_rcu)
+ synchronize_rcu_expedited();
+ else
+ synchronize_rcu();
+
+ if (hold_rtnl)
+ ops_exit_rtnl_list(ops_list, saved_ops, net_exit_list);
+
+ ops = saved_ops;
+ list_for_each_entry_continue_reverse(ops, ops_list, list)
+ ops_exit_list(ops, net_exit_list);
+
+ ops = saved_ops;
+ list_for_each_entry_continue_reverse(ops, ops_list, list)
+ ops_free_list(ops, net_exit_list);
+}
+
+static void ops_undo_single(struct pernet_operations *ops,
+ struct list_head *net_exit_list)
+{
+ bool hold_rtnl = !!ops->exit_rtnl;
+ LIST_HEAD(ops_list);
+
+ list_add(&ops->list, &ops_list);
+ ops_undo_list(&ops_list, NULL, net_exit_list, false, hold_rtnl);
+ list_del(&ops->list);
+}
+
/* should be called with nsid_lock held */
static int alloc_netid(struct net *net, struct net *peer, int reqid)
{
@@ -351,9 +428,8 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_
static __net_init int setup_net(struct net *net)
{
/* Must be called with pernet_ops_rwsem held */
- const struct pernet_operations *ops, *saved_ops;
+ const struct pernet_operations *ops;
LIST_HEAD(net_exit_list);
- LIST_HEAD(dev_kill_list);
int error = 0;
preempt_disable();
@@ -376,29 +452,7 @@ out_undo:
* for the pernet modules whose init functions did not fail.
*/
list_add(&net->exit_list, &net_exit_list);
- saved_ops = ops;
- list_for_each_entry_continue_reverse(ops, &pernet_list, list)
- ops_pre_exit_list(ops, &net_exit_list);
-
- synchronize_rcu();
-
- ops = saved_ops;
- rtnl_lock();
- list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
- if (ops->exit_batch_rtnl)
- ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
- }
- unregister_netdevice_many(&dev_kill_list);
- rtnl_unlock();
-
- ops = saved_ops;
- list_for_each_entry_continue_reverse(ops, &pernet_list, list)
- ops_exit_list(ops, &net_exit_list);
-
- ops = saved_ops;
- list_for_each_entry_continue_reverse(ops, &pernet_list, list)
- ops_free_list(ops, &net_exit_list);
-
+ ops_undo_list(&pernet_list, ops, &net_exit_list, false, true);
rcu_barrier();
goto out;
}
@@ -594,11 +648,9 @@ struct task_struct *cleanup_net_task;
static void cleanup_net(struct work_struct *work)
{
- const struct pernet_operations *ops;
- struct net *net, *tmp, *last;
struct llist_node *net_kill_list;
+ struct net *net, *tmp, *last;
LIST_HEAD(net_exit_list);
- LIST_HEAD(dev_kill_list);
cleanup_net_task = current;
@@ -629,33 +681,7 @@ static void cleanup_net(struct work_struct *work)
list_add_tail(&net->exit_list, &net_exit_list);
}
- /* Run all of the network namespace pre_exit methods */
- list_for_each_entry_reverse(ops, &pernet_list, list)
- ops_pre_exit_list(ops, &net_exit_list);
-
- /*
- * Another CPU might be rcu-iterating the list, wait for it.
- * This needs to be before calling the exit() notifiers, so
- * the rcu_barrier() below isn't sufficient alone.
- * Also the pre_exit() and exit() methods need this barrier.
- */
- synchronize_rcu_expedited();
-
- rtnl_lock();
- list_for_each_entry_reverse(ops, &pernet_list, list) {
- if (ops->exit_batch_rtnl)
- ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
- }
- unregister_netdevice_many(&dev_kill_list);
- rtnl_unlock();
-
- /* Run all of the network namespace exit methods */
- list_for_each_entry_reverse(ops, &pernet_list, list)
- ops_exit_list(ops, &net_exit_list);
-
- /* Free the net generic variables */
- list_for_each_entry_reverse(ops, &pernet_list, list)
- ops_free_list(ops, &net_exit_list);
+ ops_undo_list(&pernet_list, NULL, &net_exit_list, true, true);
up_read(&pernet_ops_rwsem);
@@ -1239,31 +1265,13 @@ void __init net_ns_init(void)
rtnl_register_many(net_ns_rtnl_msg_handlers);
}
-static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
-{
- ops_pre_exit_list(ops, net_exit_list);
- synchronize_rcu();
-
- if (ops->exit_batch_rtnl) {
- LIST_HEAD(dev_kill_list);
-
- rtnl_lock();
- ops->exit_batch_rtnl(net_exit_list, &dev_kill_list);
- unregister_netdevice_many(&dev_kill_list);
- rtnl_unlock();
- }
- ops_exit_list(ops, net_exit_list);
-
- ops_free_list(ops, net_exit_list);
-}
-
#ifdef CONFIG_NET_NS
static int __register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
{
+ LIST_HEAD(net_exit_list);
struct net *net;
int error;
- LIST_HEAD(net_exit_list);
list_add_tail(&ops->list, list);
if (ops->init || ops->id) {
@@ -1282,21 +1290,21 @@ static int __register_pernet_operations(struct list_head *list,
out_undo:
/* If I have an error cleanup all namespaces I initialized */
list_del(&ops->list);
- free_exit_list(ops, &net_exit_list);
+ ops_undo_single(ops, &net_exit_list);
return error;
}
static void __unregister_pernet_operations(struct pernet_operations *ops)
{
- struct net *net;
LIST_HEAD(net_exit_list);
+ struct net *net;
- list_del(&ops->list);
/* See comment in __register_pernet_operations() */
for_each_net(net)
list_add_tail(&net->exit_list, &net_exit_list);
- free_exit_list(ops, &net_exit_list);
+ list_del(&ops->list);
+ ops_undo_single(ops, &net_exit_list);
}
#else
@@ -1304,22 +1312,23 @@ static void __unregister_pernet_operations(struct pernet_operations *ops)
static int __register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
{
- if (!init_net_initialized) {
- list_add_tail(&ops->list, list);
+ list_add_tail(&ops->list, list);
+
+ if (!init_net_initialized)
return 0;
- }
return ops_init(ops, &init_net);
}
static void __unregister_pernet_operations(struct pernet_operations *ops)
{
- if (!init_net_initialized) {
- list_del(&ops->list);
- } else {
+ list_del(&ops->list);
+
+ if (init_net_initialized) {
LIST_HEAD(net_exit_list);
+
list_add(&init_net.exit_list, &net_exit_list);
- free_exit_list(ops, &net_exit_list);
+ ops_undo_single(ops, &net_exit_list);
}
}
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index 5d7af50fe702..b64c614a00c4 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -38,6 +38,8 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
u64 xdp_rx_meta = 0;
void *hdr;
+ netdev_assert_locked(netdev); /* note: rtnl_lock may not be held! */
+
hdr = genlmsg_iput(rsp, info);
if (!hdr)
return -EMSGSIZE;
@@ -122,15 +124,14 @@ int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
if (!rsp)
return -ENOMEM;
- rtnl_lock();
-
- netdev = __dev_get_by_index(genl_info_net(info), ifindex);
- if (netdev)
- err = netdev_nl_dev_fill(netdev, rsp, info);
- else
+ netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
+ if (!netdev) {
err = -ENODEV;
+ goto err_free_msg;
+ }
- rtnl_unlock();
+ err = netdev_nl_dev_fill(netdev, rsp, info);
+ netdev_unlock(netdev);
if (err)
goto err_free_msg;
@@ -146,18 +147,15 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
struct net *net = sock_net(skb->sk);
- struct net_device *netdev;
- int err = 0;
+ int err;
- rtnl_lock();
- for_each_netdev_dump(net, netdev, ctx->ifindex) {
+ for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb));
if (err < 0)
- break;
+ return err;
}
- rtnl_unlock();
- return err;
+ return 0;
}
static int
@@ -481,18 +479,15 @@ int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info)
if (!rsp)
return -ENOMEM;
- rtnl_lock();
-
- netdev = netdev_get_by_index_lock(genl_info_net(info), ifindex);
+ netdev = netdev_get_by_index_lock_ops_compat(genl_info_net(info),
+ ifindex);
if (netdev) {
err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info);
- netdev_unlock(netdev);
+ netdev_unlock_ops_compat(netdev);
} else {
err = -ENODEV;
}
- rtnl_unlock();
-
if (err)
goto err_free_msg;
@@ -541,17 +536,17 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
if (info->attrs[NETDEV_A_QUEUE_IFINDEX])
ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
- rtnl_lock();
if (ifindex) {
- netdev = netdev_get_by_index_lock(net, ifindex);
+ netdev = netdev_get_by_index_lock_ops_compat(net, ifindex);
if (netdev) {
err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
- netdev_unlock(netdev);
+ netdev_unlock_ops_compat(netdev);
} else {
err = -ENODEV;
}
} else {
- for_each_netdev_lock_scoped(net, netdev, ctx->ifindex) {
+ for_each_netdev_lock_ops_compat_scoped(net, netdev,
+ ctx->ifindex) {
err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
if (err < 0)
break;
@@ -559,7 +554,6 @@ int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
ctx->txq_idx = 0;
}
}
- rtnl_unlock();
return err;
}
@@ -801,26 +795,31 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
if (info->attrs[NETDEV_A_QSTATS_IFINDEX])
ifindex = nla_get_u32(info->attrs[NETDEV_A_QSTATS_IFINDEX]);
- rtnl_lock();
if (ifindex) {
- netdev = __dev_get_by_index(net, ifindex);
- if (netdev && netdev->stat_ops) {
- err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
- info, ctx);
- } else {
+ netdev = netdev_get_by_index_lock_ops_compat(net, ifindex);
+ if (!netdev) {
NL_SET_BAD_ATTR(info->extack,
info->attrs[NETDEV_A_QSTATS_IFINDEX]);
- err = netdev ? -EOPNOTSUPP : -ENODEV;
+ return -ENODEV;
}
- } else {
- for_each_netdev_dump(net, netdev, ctx->ifindex) {
+ if (netdev->stat_ops) {
err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
info, ctx);
- if (err < 0)
- break;
+ } else {
+ NL_SET_BAD_ATTR(info->extack,
+ info->attrs[NETDEV_A_QSTATS_IFINDEX]);
+ err = -EOPNOTSUPP;
}
+ netdev_unlock_ops_compat(netdev);
+ return err;
+ }
+
+ for_each_netdev_lock_ops_compat_scoped(net, netdev, ctx->ifindex) {
+ err = netdev_nl_qstats_get_dump_one(netdev, scope, skb,
+ info, ctx);
+ if (err < 0)
+ break;
}
- rtnl_unlock();
return err;
}
diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h
index 7eadb8393e00..cd95394399b4 100644
--- a/net/core/netmem_priv.h
+++ b/net/core/netmem_priv.h
@@ -5,7 +5,7 @@
static inline unsigned long netmem_get_pp_magic(netmem_ref netmem)
{
- return __netmem_clear_lsb(netmem)->pp_magic;
+ return __netmem_clear_lsb(netmem)->pp_magic & ~PP_DMA_INDEX_MASK;
}
static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
@@ -15,9 +15,16 @@ static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
static inline void netmem_clear_pp_magic(netmem_ref netmem)
{
+ WARN_ON_ONCE(__netmem_clear_lsb(netmem)->pp_magic & PP_DMA_INDEX_MASK);
+
__netmem_clear_lsb(netmem)->pp_magic = 0;
}
+static inline bool netmem_is_pp(netmem_ref netmem)
+{
+ return (netmem_get_pp_magic(netmem) & PP_MAGIC_MASK) == PP_SIGNATURE;
+}
+
static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool)
{
__netmem_clear_lsb(netmem)->pp = pool;
@@ -28,4 +35,28 @@ static inline void netmem_set_dma_addr(netmem_ref netmem,
{
__netmem_clear_lsb(netmem)->dma_addr = dma_addr;
}
+
+static inline unsigned long netmem_get_dma_index(netmem_ref netmem)
+{
+ unsigned long magic;
+
+ if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
+ return 0;
+
+ magic = __netmem_clear_lsb(netmem)->pp_magic;
+
+ return (magic & PP_DMA_INDEX_MASK) >> PP_DMA_INDEX_SHIFT;
+}
+
+static inline void netmem_set_dma_index(netmem_ref netmem,
+ unsigned long id)
+{
+ unsigned long magic;
+
+ if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
+ return;
+
+ magic = netmem_get_pp_magic(netmem) | (id << PP_DMA_INDEX_SHIFT);
+ __netmem_clear_lsb(netmem)->pp_magic = magic;
+}
#endif
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 7745ad924ae2..2b7684865941 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -276,8 +276,7 @@ static int page_pool_init(struct page_pool *pool,
/* Driver calling page_pool_create() also call page_pool_destroy() */
refcount_set(&pool->user_cnt, 1);
- if (pool->dma_map)
- get_device(pool->p.dev);
+ xa_init_flags(&pool->dma_mapped, XA_FLAGS_ALLOC1);
if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) {
netdev_assert_locked(pool->slow.netdev);
@@ -320,9 +319,7 @@ free_ptr_ring:
static void page_pool_uninit(struct page_pool *pool)
{
ptr_ring_cleanup(&pool->ring, NULL);
-
- if (pool->dma_map)
- put_device(pool->p.dev);
+ xa_destroy(&pool->dma_mapped);
#ifdef CONFIG_PAGE_POOL_STATS
if (!pool->system)
@@ -463,13 +460,21 @@ page_pool_dma_sync_for_device(const struct page_pool *pool,
netmem_ref netmem,
u32 dma_sync_size)
{
- if (pool->dma_sync && dma_dev_need_sync(pool->p.dev))
- __page_pool_dma_sync_for_device(pool, netmem, dma_sync_size);
+ if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) {
+ rcu_read_lock();
+ /* re-check under rcu_read_lock() to sync with page_pool_scrub() */
+ if (pool->dma_sync)
+ __page_pool_dma_sync_for_device(pool, netmem,
+ dma_sync_size);
+ rcu_read_unlock();
+ }
}
-static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem)
+static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp)
{
dma_addr_t dma;
+ int err;
+ u32 id;
/* Setup DMA mapping: use 'struct page' area for storing DMA-addr
* since dma_addr_t can be either 32 or 64 bits and does not always fit
@@ -483,15 +488,30 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem)
if (dma_mapping_error(pool->p.dev, dma))
return false;
- if (page_pool_set_dma_addr_netmem(netmem, dma))
+ if (page_pool_set_dma_addr_netmem(netmem, dma)) {
+ WARN_ONCE(1, "unexpected DMA address, please report to netdev@");
goto unmap_failed;
+ }
+
+ if (in_softirq())
+ err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem),
+ PP_DMA_INDEX_LIMIT, gfp);
+ else
+ err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem),
+ PP_DMA_INDEX_LIMIT, gfp);
+ if (err) {
+ WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@");
+ goto unset_failed;
+ }
+ netmem_set_dma_index(netmem, id);
page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len);
return true;
+unset_failed:
+ page_pool_set_dma_addr_netmem(netmem, 0);
unmap_failed:
- WARN_ONCE(1, "unexpected DMA address, please report to netdev@");
dma_unmap_page_attrs(pool->p.dev, dma,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
@@ -508,7 +528,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
if (unlikely(!page))
return NULL;
- if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) {
+ if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page), gfp))) {
put_page(page);
return NULL;
}
@@ -554,7 +574,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool,
*/
for (i = 0; i < nr_pages; i++) {
netmem = pool->alloc.cache[i];
- if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) {
+ if (dma_map && unlikely(!page_pool_dma_map(pool, netmem, gfp))) {
put_page(netmem_to_page(netmem));
continue;
}
@@ -656,6 +676,8 @@ void page_pool_clear_pp_info(netmem_ref netmem)
static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
netmem_ref netmem)
{
+ struct page *old, *page = netmem_to_page(netmem);
+ unsigned long id;
dma_addr_t dma;
if (!pool->dma_map)
@@ -664,6 +686,17 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
*/
return;
+ id = netmem_get_dma_index(netmem);
+ if (!id)
+ return;
+
+ if (in_softirq())
+ old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0);
+ else
+ old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0);
+ if (old != page)
+ return;
+
dma = page_pool_get_dma_addr_netmem(netmem);
/* When page is unmapped, it cannot be returned to our pool */
@@ -671,6 +704,7 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
page_pool_set_dma_addr_netmem(netmem, 0);
+ netmem_set_dma_index(netmem, 0);
}
/* Disconnects a page (from a page_pool). API users can have a need
@@ -1080,8 +1114,29 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
static void page_pool_scrub(struct page_pool *pool)
{
+ unsigned long id;
+ void *ptr;
+
page_pool_empty_alloc_cache_once(pool);
- pool->destroy_cnt++;
+ if (!pool->destroy_cnt++ && pool->dma_map) {
+ if (pool->dma_sync) {
+ /* Disable page_pool_dma_sync_for_device() */
+ pool->dma_sync = false;
+
+ /* Make sure all concurrent returns that may see the old
+ * value of dma_sync (and thus perform a sync) have
+ * finished before doing the unmapping below. Skip the
+ * wait if the device doesn't actually need syncing, or
+ * if there are no outstanding mapped pages.
+ */
+ if (dma_dev_need_sync(pool->p.dev) &&
+ !xa_empty(&pool->dma_mapped))
+ synchronize_net();
+ }
+
+ xa_for_each(&pool->dma_mapped, id, ptr)
+ __page_pool_release_page_dma(pool, page_to_netmem(ptr));
+ }
/* No more consumers should exist, but producers could still
* be in-flight.
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fe7fdefab994..0ebe5461d4d9 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -177,7 +177,7 @@
#define MAX_IMIX_ENTRIES 20
#define IMIX_PRECISION 100 /* Precision of IMIX distribution */
-#define func_enter() pr_debug("entering %s\n", __func__);
+#define func_enter() pr_debug("entering %s\n", __func__)
#define PKT_FLAGS \
pf(IPV6) /* Interface in IPV6 Mode */ \
@@ -227,12 +227,12 @@ static char *pkt_flag_names[] = {
/* Xmit modes */
#define M_START_XMIT 0 /* Default normal TX */
-#define M_NETIF_RECEIVE 1 /* Inject packets into stack */
+#define M_NETIF_RECEIVE 1 /* Inject packets into stack */
#define M_QUEUE_XMIT 2 /* Inject packet into qdisc */
/* If lock -- protects updating of if_list */
-#define if_lock(t) mutex_lock(&(t->if_lock));
-#define if_unlock(t) mutex_unlock(&(t->if_lock));
+#define if_lock(t) mutex_lock(&(t->if_lock))
+#define if_unlock(t) mutex_unlock(&(t->if_lock))
/* Used to help with determining the pkts on receive */
#define PKTGEN_MAGIC 0xbe9be955
@@ -283,7 +283,8 @@ struct pktgen_dev {
int pkt_overhead; /* overhead for MPLS, VLANs, IPSEC etc */
int nfrags;
int removal_mark; /* non-zero => the device is marked for
- * removal by worker thread */
+ * removal by worker thread
+ */
struct page *page;
u64 delay; /* nano-seconds */
@@ -346,10 +347,12 @@ struct pktgen_dev {
__u16 udp_dst_max; /* exclusive, dest UDP port */
/* DSCP + ECN */
- __u8 tos; /* six MSB of (former) IPv4 TOS
- are for dscp codepoint */
- __u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6
- (see RFC 3260, sec. 4) */
+ __u8 tos; /* six MSB of (former) IPv4 TOS
+ * are for dscp codepoint
+ */
+ __u8 traffic_class; /* ditto for the (former) Traffic Class in IPv6
+ * (see RFC 3260, sec. 4)
+ */
/* IMIX */
unsigned int n_imix_entries;
@@ -389,12 +392,12 @@ struct pktgen_dev {
__u8 hh[14];
/* = {
- 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB,
-
- We fill in SRC address later
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x08, 0x00
- };
+ * 0x00, 0x80, 0xC8, 0x79, 0xB3, 0xCB,
+ *
+ * We fill in SRC address later
+ * 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ * 0x08, 0x00
+ * };
*/
__u16 pad; /* pad out the hh struct to an even 16 bytes */
@@ -458,7 +461,8 @@ struct pktgen_thread {
char result[512];
/* Field for thread to receive "posted" events terminate,
- stop ifs etc. */
+ * stop ifs etc.
+ */
u32 control;
int cpu;
@@ -472,8 +476,7 @@ struct pktgen_thread {
#define FIND 0
static const char version[] =
- "Packet Generator for packet performance testing. "
- "Version: " VERSION "\n";
+ "Packet Generator for packet performance testing. Version: " VERSION "\n";
static int pktgen_remove_device(struct pktgen_thread *t, struct pktgen_dev *i);
static int pktgen_add_device(struct pktgen_thread *t, const char *ifname);
@@ -624,8 +627,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_printf(seq, "%pM\n", pkt_dev->dst_mac);
seq_printf(seq,
- " udp_src_min: %d udp_src_max: %d"
- " udp_dst_min: %d udp_dst_max: %d\n",
+ " udp_src_min: %d udp_src_max: %d udp_dst_min: %d udp_dst_max: %d\n",
pkt_dev->udp_src_min, pkt_dev->udp_src_max,
pkt_dev->udp_dst_min, pkt_dev->udp_dst_max);
@@ -754,6 +756,7 @@ static ssize_t hex32_arg(const char __user *user_buffer, size_t maxlen,
for (; i < maxlen; i++) {
int value;
char c;
+
if (get_user(c, &user_buffer[i]))
return -EFAULT;
value = hex_to_bin(c);
@@ -773,6 +776,7 @@ static ssize_t count_trail_chars(const char __user *user_buffer, size_t maxlen)
for (i = 0; i < maxlen; i++) {
char c;
+
if (get_user(c, &user_buffer[i]))
return -EFAULT;
switch (c) {
@@ -799,6 +803,7 @@ static ssize_t num_arg(const char __user *user_buffer, size_t maxlen,
for (i = 0; i < maxlen; i++) {
char c;
+
if (get_user(c, &user_buffer[i]))
return -EFAULT;
if ((c >= '0') && (c <= '9')) {
@@ -816,6 +821,7 @@ static ssize_t strn_len(const char __user *user_buffer, size_t maxlen)
for (i = 0; i < maxlen; i++) {
char c;
+
if (get_user(c, &user_buffer[i]))
return -EFAULT;
switch (c) {
@@ -974,8 +980,8 @@ static __u32 pktgen_read_flag(const char *f, bool *disable)
}
static ssize_t pktgen_if_write(struct file *file,
- const char __user * user_buffer, size_t count,
- loff_t * offset)
+ const char __user *user_buffer, size_t count,
+ loff_t *offset)
{
struct seq_file *seq = file->private_data;
struct pktgen_dev *pkt_dev = seq->private;
@@ -1307,9 +1313,9 @@ static ssize_t pktgen_if_write(struct file *file,
put_page(pkt_dev->page);
pkt_dev->page = NULL;
}
- }
- else
+ } else {
sprintf(pg_result, "ERROR: node not possible");
+ }
return count;
}
if (!strcmp(name, "xmit_mode")) {
@@ -1413,8 +1419,7 @@ static ssize_t pktgen_if_write(struct file *file,
return -EFAULT;
buf[len] = 0;
if (strcmp(buf, pkt_dev->dst_min) != 0) {
- memset(pkt_dev->dst_min, 0, sizeof(pkt_dev->dst_min));
- strcpy(pkt_dev->dst_min, buf);
+ strscpy_pad(pkt_dev->dst_min, buf);
pkt_dev->daddr_min = in_aton(pkt_dev->dst_min);
pkt_dev->cur_daddr = pkt_dev->daddr_min;
}
@@ -1434,8 +1439,7 @@ static ssize_t pktgen_if_write(struct file *file,
return -EFAULT;
buf[len] = 0;
if (strcmp(buf, pkt_dev->dst_max) != 0) {
- memset(pkt_dev->dst_max, 0, sizeof(pkt_dev->dst_max));
- strcpy(pkt_dev->dst_max, buf);
+ strscpy_pad(pkt_dev->dst_max, buf);
pkt_dev->daddr_max = in_aton(pkt_dev->dst_max);
pkt_dev->cur_daddr = pkt_dev->daddr_max;
}
@@ -1544,8 +1548,7 @@ static ssize_t pktgen_if_write(struct file *file,
return -EFAULT;
buf[len] = 0;
if (strcmp(buf, pkt_dev->src_min) != 0) {
- memset(pkt_dev->src_min, 0, sizeof(pkt_dev->src_min));
- strcpy(pkt_dev->src_min, buf);
+ strscpy_pad(pkt_dev->src_min, buf);
pkt_dev->saddr_min = in_aton(pkt_dev->src_min);
pkt_dev->cur_saddr = pkt_dev->saddr_min;
}
@@ -1565,8 +1568,7 @@ static ssize_t pktgen_if_write(struct file *file,
return -EFAULT;
buf[len] = 0;
if (strcmp(buf, pkt_dev->src_max) != 0) {
- memset(pkt_dev->src_max, 0, sizeof(pkt_dev->src_max));
- strcpy(pkt_dev->src_max, buf);
+ strscpy_pad(pkt_dev->src_max, buf);
pkt_dev->saddr_max = in_aton(pkt_dev->src_max);
pkt_dev->cur_saddr = pkt_dev->saddr_max;
}
@@ -1909,8 +1911,8 @@ static int pktgen_thread_show(struct seq_file *seq, void *v)
}
static ssize_t pktgen_thread_write(struct file *file,
- const char __user * user_buffer,
- size_t count, loff_t * offset)
+ const char __user *user_buffer,
+ size_t count, loff_t *offset)
{
struct seq_file *seq = file->private_data;
struct pktgen_thread *t = seq->private;
@@ -1962,6 +1964,7 @@ static ssize_t pktgen_thread_write(struct file *file,
if (!strcmp(name, "add_device")) {
char f[32];
+
memset(f, 0, 32);
max = min(sizeof(f) - 1, count - i);
len = strn_len(&user_buffer[i], max);
@@ -2397,13 +2400,14 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
/* If there was already an IPSEC SA, we keep it as is, else
* we go look for it ...
-*/
+ */
#define DUMMY_MARK 0
static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow)
{
#ifdef CONFIG_XFRM
struct xfrm_state *x = pkt_dev->flows[flow].x;
struct pktgen_net *pn = net_generic(dev_net(pkt_dev->odev), pg_net_id);
+
if (!x) {
if (pkt_dev->spi) {
@@ -2436,6 +2440,7 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) {
__u16 t;
+
if (pkt_dev->flags & F_QUEUE_MAP_RND) {
t = get_random_u32_inclusive(pkt_dev->queue_map_min,
pkt_dev->queue_map_max);
@@ -2517,6 +2522,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->flags & F_MPLS_RND) {
unsigned int i;
+
for (i = 0; i < pkt_dev->nr_labels; i++)
if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
@@ -2561,6 +2567,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
imx = ntohl(pkt_dev->saddr_max);
if (imn < imx) {
__u32 t;
+
if (pkt_dev->flags & F_IPSRC_RND)
t = get_random_u32_inclusive(imn, imx - 1);
else {
@@ -2581,6 +2588,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (imn < imx) {
__u32 t;
__be32 s;
+
if (pkt_dev->flags & F_IPDST_RND) {
do {
@@ -2628,6 +2636,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
__u32 t;
+
if (pkt_dev->flags & F_TXSIZE_RND) {
t = get_random_u32_inclusive(pkt_dev->min_pkt_size,
pkt_dev->max_pkt_size - 1);
@@ -2694,7 +2703,8 @@ static int pktgen_output_ipsec(struct sk_buff *skb, struct pktgen_dev *pkt_dev)
if (!x)
return 0;
/* XXX: we dont support tunnel mode for now until
- * we resolve the dst issue */
+ * we resolve the dst issue
+ */
if ((x->props.mode != XFRM_MODE_TRANSPORT) && (pkt_dev->spi == 0))
return 0;
@@ -2729,8 +2739,10 @@ static void free_SAs(struct pktgen_dev *pkt_dev)
if (pkt_dev->cflows) {
/* let go of the SAs if we have them */
int i;
+
for (i = 0; i < pkt_dev->cflows; i++) {
struct xfrm_state *x = pkt_dev->flows[i].x;
+
if (x) {
xfrm_state_put(x);
pkt_dev->flows[i].x = NULL;
@@ -2745,6 +2757,7 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
if (pkt_dev->flags & F_IPSEC) {
struct xfrm_state *x = pkt_dev->flows[pkt_dev->curfl].x;
int nhead = 0;
+
if (x) {
struct ethhdr *eth;
struct iphdr *iph;
@@ -2788,6 +2801,7 @@ err:
static void mpls_push(__be32 *mpls, struct pktgen_dev *pkt_dev)
{
unsigned int i;
+
for (i = 0; i < pkt_dev->nr_labels; i++)
*mpls++ = pkt_dev->labels[i] & ~MPLS_STACK_BOTTOM;
@@ -2900,7 +2914,7 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
skb->dev = dev;
}
} else {
- skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT);
+ skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT);
}
/* the caller pre-fetches from skb->data and reserves for the mac hdr */
@@ -2981,7 +2995,7 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
skb->priority = pkt_dev->skb_priority;
memcpy(eth, pkt_dev->hh, 12);
- *(__be16 *) & eth[12] = protocol;
+ *(__be16 *)&eth[12] = protocol;
/* Eth + IPh + UDPh + mpls */
datalen = pkt_dev->cur_pkt_size - 14 - 20 - 8 -
@@ -3210,11 +3224,11 @@ static void pktgen_run(struct pktgen_thread *t)
set_pkt_overhead(pkt_dev);
- strcpy(pkt_dev->result, "Starting");
+ strscpy(pkt_dev->result, "Starting");
pkt_dev->running = 1; /* Cranke yeself! */
started++;
} else
- strcpy(pkt_dev->result, "Error starting");
+ strscpy(pkt_dev->result, "Error starting");
}
rcu_read_unlock();
if (started)
@@ -3473,6 +3487,7 @@ static void pktgen_rem_thread(struct pktgen_thread *t)
static void pktgen_resched(struct pktgen_dev *pkt_dev)
{
ktime_t idle_start = ktime_get();
+
schedule();
pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start));
}
@@ -3788,7 +3803,8 @@ static int add_dev_to_thread(struct pktgen_thread *t,
* userspace on another CPU than the kthread. The if_lock()
* is used here to sync with concurrent instances of
* _rem_dev_from_if_list() invoked via kthread, which is also
- * updating the if_list */
+ * updating the if_list
+ */
if_lock(t);
if (pkt_dev->pg_thread) {
@@ -3826,7 +3842,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
if (!pkt_dev)
return -ENOMEM;
- strcpy(pkt_dev->odevname, ifname);
+ strscpy(pkt_dev->odevname, ifname);
pkt_dev->flows = vzalloc_node(array_size(MAX_CFLOWS,
sizeof(struct flow_state)),
node);
@@ -3983,7 +3999,8 @@ static int pktgen_remove_device(struct pktgen_thread *t,
/* Remove proc before if_list entry, because add_device uses
* list to determine if interface already exist, avoid race
- * with proc_create_data() */
+ * with proc_create_data()
+ */
proc_remove(pkt_dev->entry);
/* And update the thread if_list */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c5a7f41982a5..ea2e15de8d99 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3580,7 +3580,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm,
u32 portid, const struct nlmsghdr *nlh)
{
- unsigned int old_flags;
+ unsigned int old_flags, changed;
int err;
old_flags = dev->flags;
@@ -3591,12 +3591,13 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm,
return err;
}
- if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
- __dev_notify_flags(dev, old_flags, (old_flags ^ dev->flags), portid, nlh);
- } else {
- dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
- __dev_notify_flags(dev, old_flags, ~0U, portid, nlh);
+ changed = old_flags ^ dev->flags;
+ if (dev->rtnl_link_initializing) {
+ dev->rtnl_link_initializing = false;
+ changed = ~0U;
}
+
+ __dev_notify_flags(dev, old_flags, changed, portid, nlh);
return 0;
}
EXPORT_SYMBOL(rtnl_configure_link);
@@ -3654,7 +3655,7 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
dev_net_set(dev, net);
dev->rtnl_link_ops = ops;
- dev->rtnl_link_state = RTNL_LINK_INITIALIZING;
+ dev->rtnl_link_initializing = true;
if (tb[IFLA_MTU]) {
u32 mtu = nla_get_u32(tb[IFLA_MTU]);
@@ -4883,12 +4884,12 @@ static int valid_fdb_dump_strict(const struct nlmsghdr *nlh,
struct ndmsg *ndm;
int err, i;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) {
+ ndm = nlmsg_payload(nlh, sizeof(*ndm));
+ if (!ndm) {
NL_SET_ERR_MSG(extack, "Invalid header for fdb dump request");
return -EINVAL;
}
- ndm = nlmsg_data(nlh);
if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state ||
ndm->ndm_flags || ndm->ndm_type) {
NL_SET_ERR_MSG(extack, "Invalid values in header for fdb dump request");
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 568779d5a0ef..9a3965680451 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -156,45 +156,3 @@ u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
}
EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
#endif
-
-#if IS_ENABLED(CONFIG_IP_DCCP)
-u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
- __be16 sport, __be16 dport)
-{
- u64 seq;
- net_secret_init();
- seq = siphash_3u32((__force u32)saddr, (__force u32)daddr,
- (__force u32)sport << 16 | (__force u32)dport,
- &net_secret);
- seq += ktime_get_real_ns();
- seq &= (1ull << 48) - 1;
- return seq;
-}
-EXPORT_SYMBOL(secure_dccp_sequence_number);
-
-#if IS_ENABLED(CONFIG_IPV6)
-u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
- __be16 sport, __be16 dport)
-{
- const struct {
- struct in6_addr saddr;
- struct in6_addr daddr;
- __be16 sport;
- __be16 dport;
- } __aligned(SIPHASH_ALIGNMENT) combined = {
- .saddr = *(struct in6_addr *)saddr,
- .daddr = *(struct in6_addr *)daddr,
- .sport = sport,
- .dport = dport
- };
- u64 seq;
- net_secret_init();
- seq = siphash(&combined, offsetofend(typeof(combined), dport),
- &net_secret);
- seq += ktime_get_real_ns();
- seq &= (1ull << 48) - 1;
- return seq;
-}
-EXPORT_SYMBOL(secure_dccpv6_sequence_number);
-#endif
-#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6cbf77bc61fc..d73ad79fe739 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -893,11 +893,6 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
-static bool is_pp_netmem(netmem_ref netmem)
-{
- return (netmem_get_pp_magic(netmem) & ~0x3UL) == PP_SIGNATURE;
-}
-
int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb,
unsigned int headroom)
{
@@ -995,14 +990,7 @@ bool napi_pp_put_page(netmem_ref netmem)
{
netmem = netmem_compound_head(netmem);
- /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation
- * in order to preserve any existing bits, such as bit 0 for the
- * head page of compound page and bit 1 for pfmemalloc page, so
- * mask those bits for freeing side when doing below checking,
- * and page_is_pfmemalloc() is checked in __page_pool_put_page()
- * to avoid recycling the pfmemalloc page.
- */
- if (unlikely(!is_pp_netmem(netmem)))
+ if (unlikely(!netmem_is_pp(netmem)))
return false;
page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false);
@@ -1042,7 +1030,7 @@ static int skb_pp_frag_ref(struct sk_buff *skb)
for (i = 0; i < shinfo->nr_frags; i++) {
head_netmem = netmem_compound_head(shinfo->frags[i].netmem);
- if (likely(is_pp_netmem(head_netmem)))
+ if (likely(netmem_is_pp(head_netmem)))
page_pool_ref_netmem(head_netmem);
else
page_ref_inc(netmem_to_page(head_netmem));
@@ -3239,7 +3227,7 @@ static int sendmsg_unlocked(struct sock *sk, struct msghdr *msg)
typedef int (*sendmsg_func)(struct sock *sk, struct msghdr *msg);
static int __skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset,
- int len, sendmsg_func sendmsg)
+ int len, sendmsg_func sendmsg, int flags)
{
unsigned int orig_len = len;
struct sk_buff *head = skb;
@@ -3257,7 +3245,7 @@ do_frag_list:
kv.iov_base = skb->data + offset;
kv.iov_len = slen;
memset(&msg, 0, sizeof(msg));
- msg.msg_flags = MSG_DONTWAIT;
+ msg.msg_flags = MSG_DONTWAIT | flags;
iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &kv, 1, slen);
ret = INDIRECT_CALL_2(sendmsg, sendmsg_locked,
@@ -3294,7 +3282,8 @@ do_frag_list:
while (slen) {
struct bio_vec bvec;
struct msghdr msg = {
- .msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT,
+ .msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT |
+ flags,
};
bvec_set_page(&bvec, skb_frag_page(frag), slen,
@@ -3340,14 +3329,21 @@ error:
int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
int len)
{
- return __skb_send_sock(sk, skb, offset, len, sendmsg_locked);
+ return __skb_send_sock(sk, skb, offset, len, sendmsg_locked, 0);
}
EXPORT_SYMBOL_GPL(skb_send_sock_locked);
+int skb_send_sock_locked_with_flags(struct sock *sk, struct sk_buff *skb,
+ int offset, int len, int flags)
+{
+ return __skb_send_sock(sk, skb, offset, len, sendmsg_locked, flags);
+}
+EXPORT_SYMBOL_GPL(skb_send_sock_locked_with_flags);
+
/* Send skb data on a socket. Socket must be unlocked. */
int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len)
{
- return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked);
+ return __skb_send_sock(sk, skb, offset, len, sendmsg_unlocked, 0);
}
/**
diff --git a/net/core/sock.c b/net/core/sock.c
index e54449c9ab0b..b64df2463300 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2494,17 +2494,14 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
*/
if (!is_charged)
RCU_INIT_POINTER(newsk->sk_filter, NULL);
- sk_free_unlock_clone(newsk);
- newsk = NULL;
- goto out;
+
+ goto free;
}
+
RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
- if (bpf_sk_storage_clone(sk, newsk)) {
- sk_free_unlock_clone(newsk);
- newsk = NULL;
- goto out;
- }
+ if (bpf_sk_storage_clone(sk, newsk))
+ goto free;
/* Clear sk_user_data if parent had the pointer tagged
* as not suitable for copying when cloning.
@@ -2534,18 +2531,17 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
net_enable_timestamp();
out:
return newsk;
-}
-EXPORT_SYMBOL_GPL(sk_clone_lock);
-
-void sk_free_unlock_clone(struct sock *sk)
-{
+free:
/* It is still raw copy of parent, so invalidate
- * destructor and make plain sk_free() */
- sk->sk_destruct = NULL;
- bh_unlock_sock(sk);
- sk_free(sk);
+ * destructor and make plain sk_free()
+ */
+ newsk->sk_destruct = NULL;
+ bh_unlock_sock(newsk);
+ sk_free(newsk);
+ newsk = NULL;
+ goto out;
}
-EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
+EXPORT_SYMBOL_GPL(sk_clone_lock);
static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
{
@@ -4004,7 +4000,7 @@ static int assign_proto_idx(struct proto *prot)
{
prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
- if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
+ if (unlikely(prot->inuse_idx == PROTO_INUSE_NR)) {
pr_err("PROTO_INUSE_NR exhausted\n");
return -ENOSPC;
}
@@ -4015,7 +4011,7 @@ static int assign_proto_idx(struct proto *prot)
static void release_proto_idx(struct proto *prot)
{
- if (prot->inuse_idx != PROTO_INUSE_NR - 1)
+ if (prot->inuse_idx != PROTO_INUSE_NR)
clear_bit(prot->inuse_idx, proto_inuse_idx);
}
#else
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index a08eed9b9142..b23594c767f2 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -264,8 +264,6 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
switch (nlh->nlmsg_type) {
case TCPDIAG_GETSOCK:
- case DCCPDIAG_GETSOCK:
-
if (!rcu_access_pointer(inet_rcv_compat))
sock_load_diag_module(AF_INET, 0);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index c7769ee0d9c5..5dbb2c6f371d 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -201,7 +201,7 @@ static int rps_sock_flow_sysctl(const struct ctl_table *table, int write,
if (orig_sock_table) {
static_branch_dec(&rps_needed);
static_branch_dec(&rfs_needed);
- kvfree_rcu_mightsleep(orig_sock_table);
+ kvfree_rcu(orig_sock_table, rcu);
}
}
}
@@ -239,7 +239,7 @@ static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write,
lockdep_is_held(&flow_limit_update_mutex));
if (cur && !cpumask_test_cpu(i, mask)) {
RCU_INIT_POINTER(sd->flow_limit, NULL);
- kfree_rcu_mightsleep(cur);
+ kfree_rcu(cur, rcu);
} else if (!cur && cpumask_test_cpu(i, mask)) {
cur = kzalloc_node(len, GFP_KERNEL,
cpu_to_node(i));
@@ -248,7 +248,7 @@ static int flow_limit_cpu_sysctl(const struct ctl_table *table, int write,
ret = -ENOMEM;
goto write_unlock;
}
- cur->num_buckets = netdev_flow_limit_table_len;
+ cur->log_buckets = ilog2(netdev_flow_limit_table_len);
rcu_assign_pointer(sd->flow_limit, cur);
}
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index f86eedad586a..a014df88620c 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -17,6 +17,7 @@
#include <net/page_pool/helpers.h>
#include <net/hotdata.h>
+#include <net/netdev_lock.h>
#include <net/xdp.h>
#include <net/xdp_priv.h> /* struct xdp_mem_allocator */
#include <trace/events/xdp.h>
@@ -437,8 +438,8 @@ void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type,
netmem = netmem_compound_head(netmem);
if (napi_direct && xdp_return_frame_no_direct())
napi_direct = false;
- /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE)
- * as mem->type knows this a page_pool page
+ /* No need to check netmem_is_pp() as mem->type knows this a
+ * page_pool page
*/
page_pool_put_full_netmem(netmem_get_pp(netmem), netmem,
napi_direct);
@@ -991,17 +992,26 @@ static int __init xdp_metadata_init(void)
}
late_initcall(xdp_metadata_init);
-void xdp_set_features_flag(struct net_device *dev, xdp_features_t val)
+void xdp_set_features_flag_locked(struct net_device *dev, xdp_features_t val)
{
val &= NETDEV_XDP_ACT_MASK;
if (dev->xdp_features == val)
return;
+ netdev_assert_locked_or_invisible(dev);
dev->xdp_features = val;
if (dev->reg_state == NETREG_REGISTERED)
call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
}
+EXPORT_SYMBOL_GPL(xdp_set_features_flag_locked);
+
+void xdp_set_features_flag(struct net_device *dev, xdp_features_t val)
+{
+ netdev_lock(dev);
+ xdp_set_features_flag_locked(dev, val);
+ netdev_unlock(dev);
+}
EXPORT_SYMBOL_GPL(xdp_set_features_flag);
void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg)
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
deleted file mode 100644
index 0c7d2f66ba27..000000000000
--- a/net/dccp/Kconfig
+++ /dev/null
@@ -1,46 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-menuconfig IP_DCCP
- tristate "The DCCP Protocol"
- depends on INET
- help
- Datagram Congestion Control Protocol (RFC 4340)
-
- From https://www.ietf.org/rfc/rfc4340.txt:
-
- The Datagram Congestion Control Protocol (DCCP) is a transport
- protocol that implements bidirectional, unicast connections of
- congestion-controlled, unreliable datagrams. It should be suitable
- for use by applications such as streaming media, Internet telephony,
- and on-line games.
-
- To compile this protocol support as a module, choose M here: the
- module will be called dccp.
-
- If in doubt, say N.
-
-if IP_DCCP
-
-config INET_DCCP_DIAG
- depends on INET_DIAG
- def_tristate y if (IP_DCCP = y && INET_DIAG = y)
- def_tristate m
-
-source "net/dccp/ccids/Kconfig"
-
-menu "DCCP Kernel Hacking"
- depends on DEBUG_KERNEL=y
-
-config IP_DCCP_DEBUG
- bool "DCCP debug messages"
- help
- Only use this if you're hacking DCCP.
-
- When compiling DCCP as a module, this debugging output can be toggled
- by setting the parameter dccp_debug of the `dccp' module to 0 or 1.
-
- Just say N.
-
-
-endmenu
-
-endif # IP_DDCP
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
deleted file mode 100644
index 5b4ff37bc806..000000000000
--- a/net/dccp/Makefile
+++ /dev/null
@@ -1,30 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o
-
-dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \
- qpolicy.o
-#
-# CCID algorithms to be used by dccp.ko
-#
-# CCID-2 is default (RFC 4340, p. 77) and has Ack Vectors as dependency
-dccp-y += ccids/ccid2.o ackvec.o
-dccp-$(CONFIG_IP_DCCP_CCID3) += ccids/ccid3.o
-dccp-$(CONFIG_IP_DCCP_TFRC_LIB) += ccids/lib/tfrc.o \
- ccids/lib/tfrc_equation.o \
- ccids/lib/packet_history.o \
- ccids/lib/loss_interval.o
-
-dccp_ipv4-y := ipv4.o
-
-# build dccp_ipv6 as module whenever either IPv6 or DCCP is a module
-obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
-dccp_ipv6-y := ipv6.o
-
-obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
-
-dccp-$(CONFIG_SYSCTL) += sysctl.o
-
-dccp_diag-y := diag.o
-
-# build with local directory for trace.h
-CFLAGS_proto.o := -I$(src)
diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
deleted file mode 100644
index 1cba001bb4c8..000000000000
--- a/net/dccp/ackvec.c
+++ /dev/null
@@ -1,403 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * net/dccp/ackvec.c
- *
- * An implementation of Ack Vectors for the DCCP protocol
- * Copyright (c) 2007 University of Aberdeen, Scotland, UK
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
- */
-#include "dccp.h"
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-
-static struct kmem_cache *dccp_ackvec_slab;
-static struct kmem_cache *dccp_ackvec_record_slab;
-
-struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
-{
- struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
-
- if (av != NULL) {
- av->av_buf_head = av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1;
- INIT_LIST_HEAD(&av->av_records);
- }
- return av;
-}
-
-static void dccp_ackvec_purge_records(struct dccp_ackvec *av)
-{
- struct dccp_ackvec_record *cur, *next;
-
- list_for_each_entry_safe(cur, next, &av->av_records, avr_node)
- kmem_cache_free(dccp_ackvec_record_slab, cur);
- INIT_LIST_HEAD(&av->av_records);
-}
-
-void dccp_ackvec_free(struct dccp_ackvec *av)
-{
- if (likely(av != NULL)) {
- dccp_ackvec_purge_records(av);
- kmem_cache_free(dccp_ackvec_slab, av);
- }
-}
-
-/**
- * dccp_ackvec_update_records - Record information about sent Ack Vectors
- * @av: Ack Vector records to update
- * @seqno: Sequence number of the packet carrying the Ack Vector just sent
- * @nonce_sum: The sum of all buffer nonces contained in the Ack Vector
- */
-int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
-{
- struct dccp_ackvec_record *avr;
-
- avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
- if (avr == NULL)
- return -ENOBUFS;
-
- avr->avr_ack_seqno = seqno;
- avr->avr_ack_ptr = av->av_buf_head;
- avr->avr_ack_ackno = av->av_buf_ackno;
- avr->avr_ack_nonce = nonce_sum;
- avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
- /*
- * When the buffer overflows, we keep no more than one record. This is
- * the simplest way of disambiguating sender-Acks dating from before the
- * overflow from sender-Acks which refer to after the overflow; a simple
- * solution is preferable here since we are handling an exception.
- */
- if (av->av_overflow)
- dccp_ackvec_purge_records(av);
- /*
- * Since GSS is incremented for each packet, the list is automatically
- * arranged in descending order of @ack_seqno.
- */
- list_add(&avr->avr_node, &av->av_records);
-
- dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n",
- (unsigned long long)avr->avr_ack_seqno,
- (unsigned long long)avr->avr_ack_ackno,
- avr->avr_ack_runlen);
- return 0;
-}
-
-static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list,
- const u64 ackno)
-{
- struct dccp_ackvec_record *avr;
- /*
- * Exploit that records are inserted in descending order of sequence
- * number, start with the oldest record first. If @ackno is `before'
- * the earliest ack_ackno, the packet is too old to be considered.
- */
- list_for_each_entry_reverse(avr, av_list, avr_node) {
- if (avr->avr_ack_seqno == ackno)
- return avr;
- if (before48(ackno, avr->avr_ack_seqno))
- break;
- }
- return NULL;
-}
-
-/*
- * Buffer index and length computation using modulo-buffersize arithmetic.
- * Note that, as pointers move from right to left, head is `before' tail.
- */
-static inline u16 __ackvec_idx_add(const u16 a, const u16 b)
-{
- return (a + b) % DCCPAV_MAX_ACKVEC_LEN;
-}
-
-static inline u16 __ackvec_idx_sub(const u16 a, const u16 b)
-{
- return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b);
-}
-
-u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
-{
- if (unlikely(av->av_overflow))
- return DCCPAV_MAX_ACKVEC_LEN;
- return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
-}
-
-/**
- * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1
- * @av: non-empty buffer to update
- * @distance: negative or zero distance of @seqno from buf_ackno downward
- * @seqno: the (old) sequence number whose record is to be updated
- * @state: state in which packet carrying @seqno was received
- */
-static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance,
- u64 seqno, enum dccp_ackvec_states state)
-{
- u16 ptr = av->av_buf_head;
-
- BUG_ON(distance > 0);
- if (unlikely(dccp_ackvec_is_empty(av)))
- return;
-
- do {
- u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr);
-
- if (distance + runlen >= 0) {
- /*
- * Only update the state if packet has not been received
- * yet. This is OK as per the second table in RFC 4340,
- * 11.4.1; i.e. here we are using the following table:
- * RECEIVED
- * 0 1 3
- * S +---+---+---+
- * T 0 | 0 | 0 | 0 |
- * O +---+---+---+
- * R 1 | 1 | 1 | 1 |
- * E +---+---+---+
- * D 3 | 0 | 1 | 3 |
- * +---+---+---+
- * The "Not Received" state was set by reserve_seats().
- */
- if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED)
- av->av_buf[ptr] = state;
- else
- dccp_pr_debug("Not changing %llu state to %u\n",
- (unsigned long long)seqno, state);
- break;
- }
-
- distance += runlen + 1;
- ptr = __ackvec_idx_add(ptr, 1);
-
- } while (ptr != av->av_buf_tail);
-}
-
-/* Mark @num entries after buf_head as "Not yet received". */
-static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num)
-{
- u16 start = __ackvec_idx_add(av->av_buf_head, 1),
- len = DCCPAV_MAX_ACKVEC_LEN - start;
-
- /* check for buffer wrap-around */
- if (num > len) {
- memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len);
- start = 0;
- num -= len;
- }
- if (num)
- memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num);
-}
-
-/**
- * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer
- * @av: container of buffer to update (can be empty or non-empty)
- * @num_packets: number of packets to register (must be >= 1)
- * @seqno: sequence number of the first packet in @num_packets
- * @state: state in which packet carrying @seqno was received
- */
-static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets,
- u64 seqno, enum dccp_ackvec_states state)
-{
- u32 num_cells = num_packets;
-
- if (num_packets > DCCPAV_BURST_THRESH) {
- u32 lost_packets = num_packets - 1;
-
- DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets);
- /*
- * We received 1 packet and have a loss of size "num_packets-1"
- * which we squeeze into num_cells-1 rather than reserving an
- * entire byte for each lost packet.
- * The reason is that the vector grows in O(burst_length); when
- * it grows too large there will no room left for the payload.
- * This is a trade-off: if a few packets out of the burst show
- * up later, their state will not be changed; it is simply too
- * costly to reshuffle/reallocate/copy the buffer each time.
- * Should such problems persist, we will need to switch to a
- * different underlying data structure.
- */
- for (num_packets = num_cells = 1; lost_packets; ++num_cells) {
- u8 len = min_t(u32, lost_packets, DCCPAV_MAX_RUNLEN);
-
- av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1);
- av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len;
-
- lost_packets -= len;
- }
- }
-
- if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) {
- DCCP_CRIT("Ack Vector buffer overflow: dropping old entries");
- av->av_overflow = true;
- }
-
- av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets);
- if (av->av_overflow)
- av->av_buf_tail = av->av_buf_head;
-
- av->av_buf[av->av_buf_head] = state;
- av->av_buf_ackno = seqno;
-
- if (num_packets > 1)
- dccp_ackvec_reserve_seats(av, num_packets - 1);
-}
-
-/**
- * dccp_ackvec_input - Register incoming packet in the buffer
- * @av: Ack Vector to register packet to
- * @skb: Packet to register
- */
-void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
-{
- u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
- enum dccp_ackvec_states state = DCCPAV_RECEIVED;
-
- if (dccp_ackvec_is_empty(av)) {
- dccp_ackvec_add_new(av, 1, seqno, state);
- av->av_tail_ackno = seqno;
-
- } else {
- s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno);
- u8 *current_head = av->av_buf + av->av_buf_head;
-
- if (num_packets == 1 &&
- dccp_ackvec_state(current_head) == state &&
- dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) {
-
- *current_head += 1;
- av->av_buf_ackno = seqno;
-
- } else if (num_packets > 0) {
- dccp_ackvec_add_new(av, num_packets, seqno, state);
- } else {
- dccp_ackvec_update_old(av, num_packets, seqno, state);
- }
- }
-}
-
-/**
- * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection
- * @av: Ack Vector record to clean
- * @ackno: last Ack Vector which has been acknowledged
- *
- * This routine is called when the peer acknowledges the receipt of Ack Vectors
- * up to and including @ackno. While based on section A.3 of RFC 4340, here
- * are additional precautions to prevent corrupted buffer state. In particular,
- * we use tail_ackno to identify outdated records; it always marks the earliest
- * packet of group (2) in 11.4.2.
- */
-void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno)
-{
- struct dccp_ackvec_record *avr, *next;
- u8 runlen_now, eff_runlen;
- s64 delta;
-
- avr = dccp_ackvec_lookup(&av->av_records, ackno);
- if (avr == NULL)
- return;
- /*
- * Deal with outdated acknowledgments: this arises when e.g. there are
- * several old records and the acks from the peer come in slowly. In
- * that case we may still have records that pre-date tail_ackno.
- */
- delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno);
- if (delta < 0)
- goto free_records;
- /*
- * Deal with overlapping Ack Vectors: don't subtract more than the
- * number of packets between tail_ackno and ack_ackno.
- */
- eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen;
-
- runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr);
- /*
- * The run length of Ack Vector cells does not decrease over time. If
- * the run length is the same as at the time the Ack Vector was sent, we
- * free the ack_ptr cell. That cell can however not be freed if the run
- * length has increased: in this case we need to move the tail pointer
- * backwards (towards higher indices), to its next-oldest neighbour.
- */
- if (runlen_now > eff_runlen) {
-
- av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1;
- av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1);
-
- /* This move may not have cleared the overflow flag. */
- if (av->av_overflow)
- av->av_overflow = (av->av_buf_head == av->av_buf_tail);
- } else {
- av->av_buf_tail = avr->avr_ack_ptr;
- /*
- * We have made sure that avr points to a valid cell within the
- * buffer. This cell is either older than head, or equals head
- * (empty buffer): in both cases we no longer have any overflow.
- */
- av->av_overflow = 0;
- }
-
- /*
- * The peer has acknowledged up to and including ack_ackno. Hence the
- * first packet in group (2) of 11.4.2 is the successor of ack_ackno.
- */
- av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1);
-
-free_records:
- list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
- list_del(&avr->avr_node);
- kmem_cache_free(dccp_ackvec_record_slab, avr);
- }
-}
-
-/*
- * Routines to keep track of Ack Vectors received in an skb
- */
-int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce)
-{
- struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC);
-
- if (new == NULL)
- return -ENOBUFS;
- new->vec = vec;
- new->len = len;
- new->nonce = nonce;
-
- list_add_tail(&new->node, head);
- return 0;
-}
-EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add);
-
-void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks)
-{
- struct dccp_ackvec_parsed *cur, *next;
-
- list_for_each_entry_safe(cur, next, parsed_chunks, node)
- kfree(cur);
- INIT_LIST_HEAD(parsed_chunks);
-}
-EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup);
-
-int __init dccp_ackvec_init(void)
-{
- dccp_ackvec_slab = KMEM_CACHE(dccp_ackvec, SLAB_HWCACHE_ALIGN);
- if (dccp_ackvec_slab == NULL)
- goto out_err;
-
- dccp_ackvec_record_slab = KMEM_CACHE(dccp_ackvec_record, SLAB_HWCACHE_ALIGN);
- if (dccp_ackvec_record_slab == NULL)
- goto out_destroy_slab;
-
- return 0;
-
-out_destroy_slab:
- kmem_cache_destroy(dccp_ackvec_slab);
- dccp_ackvec_slab = NULL;
-out_err:
- DCCP_CRIT("Unable to create Ack Vector slab cache");
- return -ENOBUFS;
-}
-
-void dccp_ackvec_exit(void)
-{
- kmem_cache_destroy(dccp_ackvec_slab);
- dccp_ackvec_slab = NULL;
- kmem_cache_destroy(dccp_ackvec_record_slab);
- dccp_ackvec_record_slab = NULL;
-}
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
deleted file mode 100644
index d2c4220fb377..000000000000
--- a/net/dccp/ackvec.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _ACKVEC_H
-#define _ACKVEC_H
-/*
- * net/dccp/ackvec.h
- *
- * An implementation of Ack Vectors for the DCCP protocol
- * Copyright (c) 2007 University of Aberdeen, Scotland, UK
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@mandriva.com>
- */
-
-#include <linux/dccp.h>
-#include <linux/compiler.h>
-#include <linux/list.h>
-#include <linux/types.h>
-
-/*
- * Ack Vector buffer space is static, in multiples of %DCCP_SINGLE_OPT_MAXLEN,
- * the maximum size of a single Ack Vector. Setting %DCCPAV_NUM_ACKVECS to 1
- * will be sufficient for most cases of low Ack Ratios, using a value of 2 gives
- * more headroom if Ack Ratio is higher or when the sender acknowledges slowly.
- * The maximum value is bounded by the u16 types for indices and functions.
- */
-#define DCCPAV_NUM_ACKVECS 2
-#define DCCPAV_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * DCCPAV_NUM_ACKVECS)
-
-/* Estimated minimum average Ack Vector length - used for updating MPS */
-#define DCCPAV_MIN_OPTLEN 16
-
-/* Threshold for coping with large bursts of losses */
-#define DCCPAV_BURST_THRESH (DCCPAV_MAX_ACKVEC_LEN / 8)
-
-enum dccp_ackvec_states {
- DCCPAV_RECEIVED = 0x00,
- DCCPAV_ECN_MARKED = 0x40,
- DCCPAV_RESERVED = 0x80,
- DCCPAV_NOT_RECEIVED = 0xC0
-};
-#define DCCPAV_MAX_RUNLEN 0x3F
-
-static inline u8 dccp_ackvec_runlen(const u8 *cell)
-{
- return *cell & DCCPAV_MAX_RUNLEN;
-}
-
-static inline u8 dccp_ackvec_state(const u8 *cell)
-{
- return *cell & ~DCCPAV_MAX_RUNLEN;
-}
-
-/**
- * struct dccp_ackvec - Ack Vector main data structure
- *
- * This implements a fixed-size circular buffer within an array and is largely
- * based on Appendix A of RFC 4340.
- *
- * @av_buf: circular buffer storage area
- * @av_buf_head: head index; begin of live portion in @av_buf
- * @av_buf_tail: tail index; first index _after_ the live portion in @av_buf
- * @av_buf_ackno: highest seqno of acknowledgeable packet recorded in @av_buf
- * @av_tail_ackno: lowest seqno of acknowledgeable packet recorded in @av_buf
- * @av_buf_nonce: ECN nonce sums, each covering subsequent segments of up to
- * %DCCP_SINGLE_OPT_MAXLEN cells in the live portion of @av_buf
- * @av_overflow: if 1 then buf_head == buf_tail indicates buffer wraparound
- * @av_records: list of %dccp_ackvec_record (Ack Vectors sent previously)
- */
-struct dccp_ackvec {
- u8 av_buf[DCCPAV_MAX_ACKVEC_LEN];
- u16 av_buf_head;
- u16 av_buf_tail;
- u64 av_buf_ackno:48;
- u64 av_tail_ackno:48;
- bool av_buf_nonce[DCCPAV_NUM_ACKVECS];
- u8 av_overflow:1;
- struct list_head av_records;
-};
-
-/**
- * struct dccp_ackvec_record - Records information about sent Ack Vectors
- *
- * These list entries define the additional information which the HC-Receiver
- * keeps about recently-sent Ack Vectors; again refer to RFC 4340, Appendix A.
- *
- * @avr_node: the list node in @av_records
- * @avr_ack_seqno: sequence number of the packet the Ack Vector was sent on
- * @avr_ack_ackno: the Ack number that this record/Ack Vector refers to
- * @avr_ack_ptr: pointer into @av_buf where this record starts
- * @avr_ack_runlen: run length of @avr_ack_ptr at the time of sending
- * @avr_ack_nonce: the sum of @av_buf_nonce's at the time this record was sent
- *
- * The list as a whole is sorted in descending order by @avr_ack_seqno.
- */
-struct dccp_ackvec_record {
- struct list_head avr_node;
- u64 avr_ack_seqno:48;
- u64 avr_ack_ackno:48;
- u16 avr_ack_ptr;
- u8 avr_ack_runlen;
- u8 avr_ack_nonce:1;
-};
-
-int dccp_ackvec_init(void);
-void dccp_ackvec_exit(void);
-
-struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority);
-void dccp_ackvec_free(struct dccp_ackvec *av);
-
-void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb);
-int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seq, u8 sum);
-void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno);
-u16 dccp_ackvec_buflen(const struct dccp_ackvec *av);
-
-static inline bool dccp_ackvec_is_empty(const struct dccp_ackvec *av)
-{
- return av->av_overflow == 0 && av->av_buf_head == av->av_buf_tail;
-}
-
-/**
- * struct dccp_ackvec_parsed - Record offsets of Ack Vectors in skb
- * @vec: start of vector (offset into skb)
- * @len: length of @vec
- * @nonce: whether @vec had an ECN nonce of 0 or 1
- * @node: FIFO - arranged in descending order of ack_ackno
- *
- * This structure is used by CCIDs to access Ack Vectors in a received skb.
- */
-struct dccp_ackvec_parsed {
- u8 *vec,
- len,
- nonce:1;
- struct list_head node;
-};
-
-int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce);
-void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks);
-#endif /* _ACKVEC_H */
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
deleted file mode 100644
index 6beac5d348e2..000000000000
--- a/net/dccp/ccid.c
+++ /dev/null
@@ -1,219 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * net/dccp/ccid.c
- *
- * An implementation of the DCCP protocol
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- * CCID infrastructure
- */
-
-#include <linux/slab.h>
-
-#include "ccid.h"
-#include "ccids/lib/tfrc.h"
-
-static struct ccid_operations *ccids[] = {
- &ccid2_ops,
-#ifdef CONFIG_IP_DCCP_CCID3
- &ccid3_ops,
-#endif
-};
-
-static struct ccid_operations *ccid_by_number(const u8 id)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(ccids); i++)
- if (ccids[i]->ccid_id == id)
- return ccids[i];
- return NULL;
-}
-
-/* check that up to @array_len members in @ccid_array are supported */
-bool ccid_support_check(u8 const *ccid_array, u8 array_len)
-{
- while (array_len > 0)
- if (ccid_by_number(ccid_array[--array_len]) == NULL)
- return false;
- return true;
-}
-
-/**
- * ccid_get_builtin_ccids - Populate a list of built-in CCIDs
- * @ccid_array: pointer to copy into
- * @array_len: value to return length into
- *
- * This function allocates memory - caller must see that it is freed after use.
- */
-int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len)
-{
- *ccid_array = kmalloc(ARRAY_SIZE(ccids), gfp_any());
- if (*ccid_array == NULL)
- return -ENOBUFS;
-
- for (*array_len = 0; *array_len < ARRAY_SIZE(ccids); *array_len += 1)
- (*ccid_array)[*array_len] = ccids[*array_len]->ccid_id;
- return 0;
-}
-
-int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
- char __user *optval, int __user *optlen)
-{
- u8 *ccid_array, array_len;
- int err = 0;
-
- if (ccid_get_builtin_ccids(&ccid_array, &array_len))
- return -ENOBUFS;
-
- if (put_user(array_len, optlen))
- err = -EFAULT;
- else if (len > 0 && copy_to_user(optval, ccid_array,
- len > array_len ? array_len : len))
- err = -EFAULT;
-
- kfree(ccid_array);
- return err;
-}
-
-static __printf(3, 4) struct kmem_cache *ccid_kmem_cache_create(int obj_size, char *slab_name_fmt, const char *fmt,...)
-{
- struct kmem_cache *slab;
- va_list args;
-
- va_start(args, fmt);
- vsnprintf(slab_name_fmt, CCID_SLAB_NAME_LENGTH, fmt, args);
- va_end(args);
-
- slab = kmem_cache_create(slab_name_fmt, sizeof(struct ccid) + obj_size, 0,
- SLAB_HWCACHE_ALIGN, NULL);
- return slab;
-}
-
-static void ccid_kmem_cache_destroy(struct kmem_cache *slab)
-{
- kmem_cache_destroy(slab);
-}
-
-static int __init ccid_activate(struct ccid_operations *ccid_ops)
-{
- int err = -ENOBUFS;
-
- ccid_ops->ccid_hc_rx_slab =
- ccid_kmem_cache_create(ccid_ops->ccid_hc_rx_obj_size,
- ccid_ops->ccid_hc_rx_slab_name,
- "ccid%u_hc_rx_sock",
- ccid_ops->ccid_id);
- if (ccid_ops->ccid_hc_rx_slab == NULL)
- goto out;
-
- ccid_ops->ccid_hc_tx_slab =
- ccid_kmem_cache_create(ccid_ops->ccid_hc_tx_obj_size,
- ccid_ops->ccid_hc_tx_slab_name,
- "ccid%u_hc_tx_sock",
- ccid_ops->ccid_id);
- if (ccid_ops->ccid_hc_tx_slab == NULL)
- goto out_free_rx_slab;
-
- pr_info("DCCP: Activated CCID %d (%s)\n",
- ccid_ops->ccid_id, ccid_ops->ccid_name);
- err = 0;
-out:
- return err;
-out_free_rx_slab:
- ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab);
- ccid_ops->ccid_hc_rx_slab = NULL;
- goto out;
-}
-
-static void ccid_deactivate(struct ccid_operations *ccid_ops)
-{
- ccid_kmem_cache_destroy(ccid_ops->ccid_hc_tx_slab);
- ccid_ops->ccid_hc_tx_slab = NULL;
- ccid_kmem_cache_destroy(ccid_ops->ccid_hc_rx_slab);
- ccid_ops->ccid_hc_rx_slab = NULL;
-
- pr_info("DCCP: Deactivated CCID %d (%s)\n",
- ccid_ops->ccid_id, ccid_ops->ccid_name);
-}
-
-struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx)
-{
- struct ccid_operations *ccid_ops = ccid_by_number(id);
- struct ccid *ccid = NULL;
-
- if (ccid_ops == NULL)
- goto out;
-
- ccid = kmem_cache_alloc(rx ? ccid_ops->ccid_hc_rx_slab :
- ccid_ops->ccid_hc_tx_slab, gfp_any());
- if (ccid == NULL)
- goto out;
- ccid->ccid_ops = ccid_ops;
- if (rx) {
- memset(ccid + 1, 0, ccid_ops->ccid_hc_rx_obj_size);
- if (ccid->ccid_ops->ccid_hc_rx_init != NULL &&
- ccid->ccid_ops->ccid_hc_rx_init(ccid, sk) != 0)
- goto out_free_ccid;
- } else {
- memset(ccid + 1, 0, ccid_ops->ccid_hc_tx_obj_size);
- if (ccid->ccid_ops->ccid_hc_tx_init != NULL &&
- ccid->ccid_ops->ccid_hc_tx_init(ccid, sk) != 0)
- goto out_free_ccid;
- }
-out:
- return ccid;
-out_free_ccid:
- kmem_cache_free(rx ? ccid_ops->ccid_hc_rx_slab :
- ccid_ops->ccid_hc_tx_slab, ccid);
- ccid = NULL;
- goto out;
-}
-
-void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk)
-{
- if (ccid != NULL) {
- if (ccid->ccid_ops->ccid_hc_rx_exit != NULL)
- ccid->ccid_ops->ccid_hc_rx_exit(sk);
- kmem_cache_free(ccid->ccid_ops->ccid_hc_rx_slab, ccid);
- }
-}
-
-void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk)
-{
- if (ccid != NULL) {
- if (ccid->ccid_ops->ccid_hc_tx_exit != NULL)
- ccid->ccid_ops->ccid_hc_tx_exit(sk);
- kmem_cache_free(ccid->ccid_ops->ccid_hc_tx_slab, ccid);
- }
-}
-
-int __init ccid_initialize_builtins(void)
-{
- int i, err = tfrc_lib_init();
-
- if (err)
- return err;
-
- for (i = 0; i < ARRAY_SIZE(ccids); i++) {
- err = ccid_activate(ccids[i]);
- if (err)
- goto unwind_registrations;
- }
- return 0;
-
-unwind_registrations:
- while(--i >= 0)
- ccid_deactivate(ccids[i]);
- tfrc_lib_exit();
- return err;
-}
-
-void ccid_cleanup_builtins(void)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(ccids); i++)
- ccid_deactivate(ccids[i]);
- tfrc_lib_exit();
-}
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
deleted file mode 100644
index 105f3734dadb..000000000000
--- a/net/dccp/ccid.h
+++ /dev/null
@@ -1,262 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _CCID_H
-#define _CCID_H
-/*
- * net/dccp/ccid.h
- *
- * An implementation of the DCCP protocol
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- *
- * CCID infrastructure
- */
-
-#include <net/sock.h>
-#include <linux/compiler.h>
-#include <linux/dccp.h>
-#include <linux/list.h>
-#include <linux/module.h>
-
-/* maximum value for a CCID (RFC 4340, 19.5) */
-#define CCID_MAX 255
-#define CCID_SLAB_NAME_LENGTH 32
-
-struct tcp_info;
-
-/**
- * struct ccid_operations - Interface to Congestion-Control Infrastructure
- *
- * @ccid_id: numerical CCID ID (up to %CCID_MAX, cf. table 5 in RFC 4340, 10.)
- * @ccid_ccmps: the CCMPS including network/transport headers (0 when disabled)
- * @ccid_name: alphabetical identifier string for @ccid_id
- * @ccid_hc_{r,t}x_slab: memory pool for the receiver/sender half-connection
- * @ccid_hc_{r,t}x_obj_size: size of the receiver/sender half-connection socket
- *
- * @ccid_hc_{r,t}x_init: CCID-specific initialisation routine (before startup)
- * @ccid_hc_{r,t}x_exit: CCID-specific cleanup routine (before destruction)
- * @ccid_hc_rx_packet_recv: implements the HC-receiver side
- * @ccid_hc_{r,t}x_parse_options: parsing routine for CCID/HC-specific options
- * @ccid_hc_{r,t}x_insert_options: insert routine for CCID/HC-specific options
- * @ccid_hc_tx_packet_recv: implements feedback processing for the HC-sender
- * @ccid_hc_tx_send_packet: implements the sending part of the HC-sender
- * @ccid_hc_tx_packet_sent: does accounting for packets in flight by HC-sender
- * @ccid_hc_{r,t}x_get_info: INET_DIAG information for HC-receiver/sender
- * @ccid_hc_{r,t}x_getsockopt: socket options specific to HC-receiver/sender
- */
-struct ccid_operations {
- unsigned char ccid_id;
- __u32 ccid_ccmps;
- const char *ccid_name;
- struct kmem_cache *ccid_hc_rx_slab,
- *ccid_hc_tx_slab;
- char ccid_hc_rx_slab_name[CCID_SLAB_NAME_LENGTH];
- char ccid_hc_tx_slab_name[CCID_SLAB_NAME_LENGTH];
- __u32 ccid_hc_rx_obj_size,
- ccid_hc_tx_obj_size;
- /* Interface Routines */
- int (*ccid_hc_rx_init)(struct ccid *ccid, struct sock *sk);
- int (*ccid_hc_tx_init)(struct ccid *ccid, struct sock *sk);
- void (*ccid_hc_rx_exit)(struct sock *sk);
- void (*ccid_hc_tx_exit)(struct sock *sk);
- void (*ccid_hc_rx_packet_recv)(struct sock *sk,
- struct sk_buff *skb);
- int (*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt,
- u8 opt, u8 *val, u8 len);
- int (*ccid_hc_rx_insert_options)(struct sock *sk,
- struct sk_buff *skb);
- void (*ccid_hc_tx_packet_recv)(struct sock *sk,
- struct sk_buff *skb);
- int (*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt,
- u8 opt, u8 *val, u8 len);
- int (*ccid_hc_tx_send_packet)(struct sock *sk,
- struct sk_buff *skb);
- void (*ccid_hc_tx_packet_sent)(struct sock *sk,
- unsigned int len);
- void (*ccid_hc_rx_get_info)(struct sock *sk,
- struct tcp_info *info);
- void (*ccid_hc_tx_get_info)(struct sock *sk,
- struct tcp_info *info);
- int (*ccid_hc_rx_getsockopt)(struct sock *sk,
- const int optname, int len,
- u32 __user *optval,
- int __user *optlen);
- int (*ccid_hc_tx_getsockopt)(struct sock *sk,
- const int optname, int len,
- u32 __user *optval,
- int __user *optlen);
-};
-
-extern struct ccid_operations ccid2_ops;
-#ifdef CONFIG_IP_DCCP_CCID3
-extern struct ccid_operations ccid3_ops;
-#endif
-
-int ccid_initialize_builtins(void);
-void ccid_cleanup_builtins(void);
-
-struct ccid {
- struct ccid_operations *ccid_ops;
- char ccid_priv[];
-};
-
-static inline void *ccid_priv(const struct ccid *ccid)
-{
- return (void *)ccid->ccid_priv;
-}
-
-bool ccid_support_check(u8 const *ccid_array, u8 array_len);
-int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len);
-int ccid_getsockopt_builtin_ccids(struct sock *sk, int len,
- char __user *, int __user *);
-
-struct ccid *ccid_new(const u8 id, struct sock *sk, bool rx);
-
-static inline int ccid_get_current_rx_ccid(struct dccp_sock *dp)
-{
- struct ccid *ccid = dp->dccps_hc_rx_ccid;
-
- if (ccid == NULL || ccid->ccid_ops == NULL)
- return -1;
- return ccid->ccid_ops->ccid_id;
-}
-
-static inline int ccid_get_current_tx_ccid(struct dccp_sock *dp)
-{
- struct ccid *ccid = dp->dccps_hc_tx_ccid;
-
- if (ccid == NULL || ccid->ccid_ops == NULL)
- return -1;
- return ccid->ccid_ops->ccid_id;
-}
-
-void ccid_hc_rx_delete(struct ccid *ccid, struct sock *sk);
-void ccid_hc_tx_delete(struct ccid *ccid, struct sock *sk);
-
-/*
- * Congestion control of queued data packets via CCID decision.
- *
- * The TX CCID performs its congestion-control by indicating whether and when a
- * queued packet may be sent, using the return code of ccid_hc_tx_send_packet().
- * The following modes are supported via the symbolic constants below:
- * - timer-based pacing (CCID returns a delay value in milliseconds);
- * - autonomous dequeueing (CCID internally schedules dccps_xmitlet).
- */
-
-enum ccid_dequeueing_decision {
- CCID_PACKET_SEND_AT_ONCE = 0x00000, /* "green light": no delay */
- CCID_PACKET_DELAY_MAX = 0x0FFFF, /* maximum delay in msecs */
- CCID_PACKET_DELAY = 0x10000, /* CCID msec-delay mode */
- CCID_PACKET_WILL_DEQUEUE_LATER = 0x20000, /* CCID autonomous mode */
- CCID_PACKET_ERR = 0xF0000, /* error condition */
-};
-
-static inline int ccid_packet_dequeue_eval(const int return_code)
-{
- if (return_code < 0)
- return CCID_PACKET_ERR;
- if (return_code == 0)
- return CCID_PACKET_SEND_AT_ONCE;
- if (return_code <= CCID_PACKET_DELAY_MAX)
- return CCID_PACKET_DELAY;
- return return_code;
-}
-
-static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
- struct sk_buff *skb)
-{
- if (ccid->ccid_ops->ccid_hc_tx_send_packet != NULL)
- return ccid->ccid_ops->ccid_hc_tx_send_packet(sk, skb);
- return CCID_PACKET_SEND_AT_ONCE;
-}
-
-static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
- unsigned int len)
-{
- if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL)
- ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, len);
-}
-
-static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
- struct sk_buff *skb)
-{
- if (ccid->ccid_ops->ccid_hc_rx_packet_recv != NULL)
- ccid->ccid_ops->ccid_hc_rx_packet_recv(sk, skb);
-}
-
-static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
- struct sk_buff *skb)
-{
- if (ccid->ccid_ops->ccid_hc_tx_packet_recv != NULL)
- ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb);
-}
-
-/**
- * ccid_hc_tx_parse_options - Parse CCID-specific options sent by the receiver
- * @pkt: type of packet that @opt appears on (RFC 4340, 5.1)
- * @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3)
- * @val: value of @opt
- * @len: length of @val in bytes
- */
-static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
- u8 pkt, u8 opt, u8 *val, u8 len)
-{
- if (!ccid || !ccid->ccid_ops->ccid_hc_tx_parse_options)
- return 0;
- return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);
-}
-
-/**
- * ccid_hc_rx_parse_options - Parse CCID-specific options sent by the sender
- * Arguments are analogous to ccid_hc_tx_parse_options()
- */
-static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
- u8 pkt, u8 opt, u8 *val, u8 len)
-{
- if (!ccid || !ccid->ccid_ops->ccid_hc_rx_parse_options)
- return 0;
- return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);
-}
-
-static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
- struct sk_buff *skb)
-{
- if (ccid->ccid_ops->ccid_hc_rx_insert_options != NULL)
- return ccid->ccid_ops->ccid_hc_rx_insert_options(sk, skb);
- return 0;
-}
-
-static inline void ccid_hc_rx_get_info(struct ccid *ccid, struct sock *sk,
- struct tcp_info *info)
-{
- if (ccid->ccid_ops->ccid_hc_rx_get_info != NULL)
- ccid->ccid_ops->ccid_hc_rx_get_info(sk, info);
-}
-
-static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk,
- struct tcp_info *info)
-{
- if (ccid->ccid_ops->ccid_hc_tx_get_info != NULL)
- ccid->ccid_ops->ccid_hc_tx_get_info(sk, info);
-}
-
-static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk,
- const int optname, int len,
- u32 __user *optval, int __user *optlen)
-{
- int rc = -ENOPROTOOPT;
- if (ccid != NULL && ccid->ccid_ops->ccid_hc_rx_getsockopt != NULL)
- rc = ccid->ccid_ops->ccid_hc_rx_getsockopt(sk, optname, len,
- optval, optlen);
- return rc;
-}
-
-static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk,
- const int optname, int len,
- u32 __user *optval, int __user *optlen)
-{
- int rc = -ENOPROTOOPT;
- if (ccid != NULL && ccid->ccid_ops->ccid_hc_tx_getsockopt != NULL)
- rc = ccid->ccid_ops->ccid_hc_tx_getsockopt(sk, optname, len,
- optval, optlen);
- return rc;
-}
-#endif /* _CCID_H */
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
deleted file mode 100644
index e3d388c33d25..000000000000
--- a/net/dccp/ccids/Kconfig
+++ /dev/null
@@ -1,55 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-menu "DCCP CCIDs Configuration"
-
-config IP_DCCP_CCID2_DEBUG
- bool "CCID-2 debugging messages"
- help
- Enable CCID-2 specific debugging messages.
-
- The debugging output can additionally be toggled by setting the
- ccid2_debug parameter to 0 or 1.
-
- If in doubt, say N.
-
-config IP_DCCP_CCID3
- bool "CCID-3 (TCP-Friendly)"
- default IP_DCCP = y || IP_DCCP = m
- help
- CCID-3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
- rate-controlled congestion control mechanism. TFRC is designed to
- be reasonably fair when competing for bandwidth with TCP-like flows,
- where a flow is "reasonably fair" if its sending rate is generally
- within a factor of two of the sending rate of a TCP flow under the
- same conditions. However, TFRC has a much lower variation of
- throughput over time compared with TCP, which makes CCID-3 more
- suitable than CCID-2 for applications such streaming media where a
- relatively smooth sending rate is of importance.
-
- CCID-3 is further described in RFC 4342,
- https://www.ietf.org/rfc/rfc4342.txt
-
- The TFRC congestion control algorithms were initially described in
- RFC 5348.
-
- This text was extracted from RFC 4340 (sec. 10.2),
- https://www.ietf.org/rfc/rfc4340.txt
-
- If in doubt, say N.
-
-config IP_DCCP_CCID3_DEBUG
- bool "CCID-3 debugging messages"
- depends on IP_DCCP_CCID3
- help
- Enable CCID-3 specific debugging messages.
-
- The debugging output can additionally be toggled by setting the
- ccid3_debug parameter to 0 or 1.
-
- If in doubt, say N.
-
-config IP_DCCP_TFRC_LIB
- def_bool y if IP_DCCP_CCID3
-
-config IP_DCCP_TFRC_DEBUG
- def_bool y if IP_DCCP_CCID3_DEBUG
-endmenu
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
deleted file mode 100644
index d6b30700af67..000000000000
--- a/net/dccp/ccids/ccid2.c
+++ /dev/null
@@ -1,794 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (c) 2005, 2006 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
- *
- * Changes to meet Linux coding standards, and DCCP infrastructure fixes.
- *
- * Copyright (c) 2006 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-
-/*
- * This implementation should follow RFC 4341
- */
-#include <linux/slab.h>
-#include "../feat.h"
-#include "ccid2.h"
-
-
-#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
-static bool ccid2_debug;
-#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
-#else
-#define ccid2_pr_debug(format, a...)
-#endif
-
-static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
-{
- struct ccid2_seq *seqp;
- int i;
-
- /* check if we have space to preserve the pointer to the buffer */
- if (hc->tx_seqbufc >= (sizeof(hc->tx_seqbuf) /
- sizeof(struct ccid2_seq *)))
- return -ENOMEM;
-
- /* allocate buffer and initialize linked list */
- seqp = kmalloc_array(CCID2_SEQBUF_LEN, sizeof(struct ccid2_seq),
- gfp_any());
- if (seqp == NULL)
- return -ENOMEM;
-
- for (i = 0; i < (CCID2_SEQBUF_LEN - 1); i++) {
- seqp[i].ccid2s_next = &seqp[i + 1];
- seqp[i + 1].ccid2s_prev = &seqp[i];
- }
- seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = seqp;
- seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
-
- /* This is the first allocation. Initiate the head and tail. */
- if (hc->tx_seqbufc == 0)
- hc->tx_seqh = hc->tx_seqt = seqp;
- else {
- /* link the existing list with the one we just created */
- hc->tx_seqh->ccid2s_next = seqp;
- seqp->ccid2s_prev = hc->tx_seqh;
-
- hc->tx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
- seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hc->tx_seqt;
- }
-
- /* store the original pointer to the buffer so we can free it */
- hc->tx_seqbuf[hc->tx_seqbufc] = seqp;
- hc->tx_seqbufc++;
-
- return 0;
-}
-
-static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
-{
- if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
- return CCID_PACKET_WILL_DEQUEUE_LATER;
- return CCID_PACKET_SEND_AT_ONCE;
-}
-
-static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
-{
- u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2);
-
- /*
- * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
- * RFC 4341, 6.1.2. We ignore the statement that Ack Ratio 2 is always
- * acceptable since this causes starvation/deadlock whenever cwnd < 2.
- * The same problem arises when Ack Ratio is 0 (ie. Ack Ratio disabled).
- */
- if (val == 0 || val > max_ratio) {
- DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
- val = max_ratio;
- }
- dccp_feat_signal_nn_change(sk, DCCPF_ACK_RATIO,
- min_t(u32, val, DCCPF_ACK_RATIO_MAX));
-}
-
-static void ccid2_check_l_ack_ratio(struct sock *sk)
-{
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-
- /*
- * After a loss, idle period, application limited period, or RTO we
- * need to check that the ack ratio is still less than the congestion
- * window. Otherwise, we will send an entire congestion window of
- * packets and got no response because we haven't sent ack ratio
- * packets yet.
- * If the ack ratio does need to be reduced, we reduce it to half of
- * the congestion window (or 1 if that's zero) instead of to the
- * congestion window. This prevents problems if one ack is lost.
- */
- if (dccp_feat_nn_get(sk, DCCPF_ACK_RATIO) > hc->tx_cwnd)
- ccid2_change_l_ack_ratio(sk, hc->tx_cwnd/2 ? : 1U);
-}
-
-static void ccid2_change_l_seq_window(struct sock *sk, u64 val)
-{
- dccp_feat_signal_nn_change(sk, DCCPF_SEQUENCE_WINDOW,
- clamp_val(val, DCCPF_SEQ_WMIN,
- DCCPF_SEQ_WMAX));
-}
-
-static void dccp_tasklet_schedule(struct sock *sk)
-{
- struct tasklet_struct *t = &dccp_sk(sk)->dccps_xmitlet;
-
- if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
- sock_hold(sk);
- __tasklet_schedule(t);
- }
-}
-
-static void ccid2_hc_tx_rto_expire(struct timer_list *t)
-{
- struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer);
- struct sock *sk = hc->sk;
- const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
-
- bh_lock_sock(sk);
- if (sock_owned_by_user(sk)) {
- sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + HZ / 5);
- goto out;
- }
-
- ccid2_pr_debug("RTO_EXPIRE\n");
-
- if (sk->sk_state == DCCP_CLOSED)
- goto out;
-
- /* back-off timer */
- hc->tx_rto <<= 1;
- if (hc->tx_rto > DCCP_RTO_MAX)
- hc->tx_rto = DCCP_RTO_MAX;
-
- /* adjust pipe, cwnd etc */
- hc->tx_ssthresh = hc->tx_cwnd / 2;
- if (hc->tx_ssthresh < 2)
- hc->tx_ssthresh = 2;
- hc->tx_cwnd = 1;
- hc->tx_pipe = 0;
-
- /* clear state about stuff we sent */
- hc->tx_seqt = hc->tx_seqh;
- hc->tx_packets_acked = 0;
-
- /* clear ack ratio state. */
- hc->tx_rpseq = 0;
- hc->tx_rpdupack = -1;
- ccid2_change_l_ack_ratio(sk, 1);
-
- /* if we were blocked before, we may now send cwnd=1 packet */
- if (sender_was_blocked)
- dccp_tasklet_schedule(sk);
- /* restart backed-off timer */
- sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
-out:
- bh_unlock_sock(sk);
- sock_put(sk);
-}
-
-/*
- * Congestion window validation (RFC 2861).
- */
-static bool ccid2_do_cwv = true;
-module_param(ccid2_do_cwv, bool, 0644);
-MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation");
-
-/**
- * ccid2_update_used_window - Track how much of cwnd is actually used
- * @hc: socket to update window
- * @new_wnd: new window values to add into the filter
- *
- * This is done in addition to CWV. The sender needs to have an idea of how many
- * packets may be in flight, to set the local Sequence Window value accordingly
- * (RFC 4340, 7.5.2). The CWV mechanism is exploited to keep track of the
- * maximum-used window. We use an EWMA low-pass filter to filter out noise.
- */
-static void ccid2_update_used_window(struct ccid2_hc_tx_sock *hc, u32 new_wnd)
-{
- hc->tx_expected_wnd = (3 * hc->tx_expected_wnd + new_wnd) / 4;
-}
-
-/* This borrows the code of tcp_cwnd_application_limited() */
-static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now)
-{
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
- /* don't reduce cwnd below the initial window (IW) */
- u32 init_win = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache),
- win_used = max(hc->tx_cwnd_used, init_win);
-
- if (win_used < hc->tx_cwnd) {
- hc->tx_ssthresh = max(hc->tx_ssthresh,
- (hc->tx_cwnd >> 1) + (hc->tx_cwnd >> 2));
- hc->tx_cwnd = (hc->tx_cwnd + win_used) >> 1;
- }
- hc->tx_cwnd_used = 0;
- hc->tx_cwnd_stamp = now;
-
- ccid2_check_l_ack_ratio(sk);
-}
-
-/* This borrows the code of tcp_cwnd_restart() */
-static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
-{
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
- u32 cwnd = hc->tx_cwnd, restart_cwnd,
- iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache);
- s32 delta = now - hc->tx_lsndtime;
-
- hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2));
-
- /* don't reduce cwnd below the initial window (IW) */
- restart_cwnd = min(cwnd, iwnd);
-
- while ((delta -= hc->tx_rto) >= 0 && cwnd > restart_cwnd)
- cwnd >>= 1;
- hc->tx_cwnd = max(cwnd, restart_cwnd);
- hc->tx_cwnd_stamp = now;
- hc->tx_cwnd_used = 0;
-
- ccid2_check_l_ack_ratio(sk);
-}
-
-static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
- const u32 now = ccid2_jiffies32;
- struct ccid2_seq *next;
-
- /* slow-start after idle periods (RFC 2581, RFC 2861) */
- if (ccid2_do_cwv && !hc->tx_pipe &&
- (s32)(now - hc->tx_lsndtime) >= hc->tx_rto)
- ccid2_cwnd_restart(sk, now);
-
- hc->tx_lsndtime = now;
- hc->tx_pipe += 1;
-
- /* see whether cwnd was fully used (RFC 2861), update expected window */
- if (ccid2_cwnd_network_limited(hc)) {
- ccid2_update_used_window(hc, hc->tx_cwnd);
- hc->tx_cwnd_used = 0;
- hc->tx_cwnd_stamp = now;
- } else {
- if (hc->tx_pipe > hc->tx_cwnd_used)
- hc->tx_cwnd_used = hc->tx_pipe;
-
- ccid2_update_used_window(hc, hc->tx_cwnd_used);
-
- if (ccid2_do_cwv && (s32)(now - hc->tx_cwnd_stamp) >= hc->tx_rto)
- ccid2_cwnd_application_limited(sk, now);
- }
-
- hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
- hc->tx_seqh->ccid2s_acked = 0;
- hc->tx_seqh->ccid2s_sent = now;
-
- next = hc->tx_seqh->ccid2s_next;
- /* check if we need to alloc more space */
- if (next == hc->tx_seqt) {
- if (ccid2_hc_tx_alloc_seq(hc)) {
- DCCP_CRIT("packet history - out of memory!");
- /* FIXME: find a more graceful way to bail out */
- return;
- }
- next = hc->tx_seqh->ccid2s_next;
- BUG_ON(next == hc->tx_seqt);
- }
- hc->tx_seqh = next;
-
- ccid2_pr_debug("cwnd=%d pipe=%d\n", hc->tx_cwnd, hc->tx_pipe);
-
- /*
- * FIXME: The code below is broken and the variables have been removed
- * from the socket struct. The `ackloss' variable was always set to 0,
- * and with arsent there are several problems:
- * (i) it doesn't just count the number of Acks, but all sent packets;
- * (ii) it is expressed in # of packets, not # of windows, so the
- * comparison below uses the wrong formula: Appendix A of RFC 4341
- * comes up with the number K = cwnd / (R^2 - R) of consecutive windows
- * of data with no lost or marked Ack packets. If arsent were the # of
- * consecutive Acks received without loss, then Ack Ratio needs to be
- * decreased by 1 when
- * arsent >= K * cwnd / R = cwnd^2 / (R^3 - R^2)
- * where cwnd / R is the number of Acks received per window of data
- * (cf. RFC 4341, App. A). The problems are that
- * - arsent counts other packets as well;
- * - the comparison uses a formula different from RFC 4341;
- * - computing a cubic/quadratic equation each time is too complicated.
- * Hence a different algorithm is needed.
- */
-#if 0
- /* Ack Ratio. Need to maintain a concept of how many windows we sent */
- hc->tx_arsent++;
- /* We had an ack loss in this window... */
- if (hc->tx_ackloss) {
- if (hc->tx_arsent >= hc->tx_cwnd) {
- hc->tx_arsent = 0;
- hc->tx_ackloss = 0;
- }
- } else {
- /* No acks lost up to now... */
- /* decrease ack ratio if enough packets were sent */
- if (dp->dccps_l_ack_ratio > 1) {
- /* XXX don't calculate denominator each time */
- int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio -
- dp->dccps_l_ack_ratio;
-
- denom = hc->tx_cwnd * hc->tx_cwnd / denom;
-
- if (hc->tx_arsent >= denom) {
- ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1);
- hc->tx_arsent = 0;
- }
- } else {
- /* we can't increase ack ratio further [1] */
- hc->tx_arsent = 0; /* or maybe set it to cwnd*/
- }
- }
-#endif
-
- sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
-
-#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
- do {
- struct ccid2_seq *seqp = hc->tx_seqt;
-
- while (seqp != hc->tx_seqh) {
- ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
- (unsigned long long)seqp->ccid2s_seq,
- seqp->ccid2s_acked, seqp->ccid2s_sent);
- seqp = seqp->ccid2s_next;
- }
- } while (0);
- ccid2_pr_debug("=========\n");
-#endif
-}
-
-/**
- * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
- * @sk: socket to perform estimator on
- * @mrtt: measured RTT
- *
- * This code is almost identical with TCP's tcp_rtt_estimator(), since
- * - it has a higher sampling frequency (recommended by RFC 1323),
- * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
- * - it is simple (cf. more complex proposals such as Eifel timer or research
- * which suggests that the gain should be set according to window size),
- * - in tests it was found to work well with CCID2 [gerrit].
- */
-static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
-{
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
- long m = mrtt ? : 1;
-
- if (hc->tx_srtt == 0) {
- /* First measurement m */
- hc->tx_srtt = m << 3;
- hc->tx_mdev = m << 1;
-
- hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
- hc->tx_rttvar = hc->tx_mdev_max;
-
- hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
- } else {
- /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
- m -= (hc->tx_srtt >> 3);
- hc->tx_srtt += m;
-
- /* Similarly, update scaled mdev with regard to |m| */
- if (m < 0) {
- m = -m;
- m -= (hc->tx_mdev >> 2);
- /*
- * This neutralises RTO increase when RTT < SRTT - mdev
- * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
- * in Linux TCP", USENIX 2002, pp. 49-62).
- */
- if (m > 0)
- m >>= 3;
- } else {
- m -= (hc->tx_mdev >> 2);
- }
- hc->tx_mdev += m;
-
- if (hc->tx_mdev > hc->tx_mdev_max) {
- hc->tx_mdev_max = hc->tx_mdev;
- if (hc->tx_mdev_max > hc->tx_rttvar)
- hc->tx_rttvar = hc->tx_mdev_max;
- }
-
- /*
- * Decay RTTVAR at most once per flight, exploiting that
- * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
- * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
- * GAR is a useful bound for FlightSize = pipe.
- * AWL is probably too low here, as it over-estimates pipe.
- */
- if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
- if (hc->tx_mdev_max < hc->tx_rttvar)
- hc->tx_rttvar -= (hc->tx_rttvar -
- hc->tx_mdev_max) >> 2;
- hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
- hc->tx_mdev_max = tcp_rto_min(sk);
- }
- }
-
- /*
- * Set RTO from SRTT and RTTVAR
- * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
- * This agrees with RFC 4341, 5:
- * "Because DCCP does not retransmit data, DCCP does not require
- * TCP's recommended minimum timeout of one second".
- */
- hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
-
- if (hc->tx_rto > DCCP_RTO_MAX)
- hc->tx_rto = DCCP_RTO_MAX;
-}
-
-static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
- unsigned int *maxincr)
-{
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
- struct dccp_sock *dp = dccp_sk(sk);
- int r_seq_used = hc->tx_cwnd / dp->dccps_l_ack_ratio;
-
- if (hc->tx_cwnd < dp->dccps_l_seq_win &&
- r_seq_used < dp->dccps_r_seq_win) {
- if (hc->tx_cwnd < hc->tx_ssthresh) {
- if (*maxincr > 0 && ++hc->tx_packets_acked >= 2) {
- hc->tx_cwnd += 1;
- *maxincr -= 1;
- hc->tx_packets_acked = 0;
- }
- } else if (++hc->tx_packets_acked >= hc->tx_cwnd) {
- hc->tx_cwnd += 1;
- hc->tx_packets_acked = 0;
- }
- }
-
- /*
- * Adjust the local sequence window and the ack ratio to allow about
- * 5 times the number of packets in the network (RFC 4340 7.5.2)
- */
- if (r_seq_used * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_r_seq_win)
- ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio * 2);
- else if (r_seq_used * CCID2_WIN_CHANGE_FACTOR < dp->dccps_r_seq_win/2)
- ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio / 2 ? : 1U);
-
- if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR >= dp->dccps_l_seq_win)
- ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win * 2);
- else if (hc->tx_cwnd * CCID2_WIN_CHANGE_FACTOR < dp->dccps_l_seq_win/2)
- ccid2_change_l_seq_window(sk, dp->dccps_l_seq_win / 2);
-
- /*
- * FIXME: RTT is sampled several times per acknowledgment (for each
- * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
- * This causes the RTT to be over-estimated, since the older entries
- * in the Ack Vector have earlier sending times.
- * The cleanest solution is to not use the ccid2s_sent field at all
- * and instead use DCCP timestamps: requires changes in other places.
- */
- ccid2_rtt_estimator(sk, ccid2_jiffies32 - seqp->ccid2s_sent);
-}
-
-static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
-{
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-
- if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
- ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
- return;
- }
-
- hc->tx_last_cong = ccid2_jiffies32;
-
- hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
- hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
-
- ccid2_check_l_ack_ratio(sk);
-}
-
-static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
- u8 option, u8 *optval, u8 optlen)
-{
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
-
- switch (option) {
- case DCCPO_ACK_VECTOR_0:
- case DCCPO_ACK_VECTOR_1:
- return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen,
- option - DCCPO_ACK_VECTOR_0);
- }
- return 0;
-}
-
-static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
- const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
- struct dccp_ackvec_parsed *avp;
- u64 ackno, seqno;
- struct ccid2_seq *seqp;
- int done = 0;
- unsigned int maxincr = 0;
-
- /* check reverse path congestion */
- seqno = DCCP_SKB_CB(skb)->dccpd_seq;
-
- /* XXX this whole "algorithm" is broken. Need to fix it to keep track
- * of the seqnos of the dupacks so that rpseq and rpdupack are correct
- * -sorbo.
- */
- /* need to bootstrap */
- if (hc->tx_rpdupack == -1) {
- hc->tx_rpdupack = 0;
- hc->tx_rpseq = seqno;
- } else {
- /* check if packet is consecutive */
- if (dccp_delta_seqno(hc->tx_rpseq, seqno) == 1)
- hc->tx_rpseq = seqno;
- /* it's a later packet */
- else if (after48(seqno, hc->tx_rpseq)) {
- hc->tx_rpdupack++;
-
- /* check if we got enough dupacks */
- if (hc->tx_rpdupack >= NUMDUPACK) {
- hc->tx_rpdupack = -1; /* XXX lame */
- hc->tx_rpseq = 0;
-#ifdef __CCID2_COPES_GRACEFULLY_WITH_ACK_CONGESTION_CONTROL__
- /*
- * FIXME: Ack Congestion Control is broken; in
- * the current state instabilities occurred with
- * Ack Ratios greater than 1; causing hang-ups
- * and long RTO timeouts. This needs to be fixed
- * before opening up dynamic changes. -- gerrit
- */
- ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
-#endif
- }
- }
- }
-
- /* check forward path congestion */
- if (dccp_packet_without_ack(skb))
- return;
-
- /* still didn't send out new data packets */
- if (hc->tx_seqh == hc->tx_seqt)
- goto done;
-
- ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
- if (after48(ackno, hc->tx_high_ack))
- hc->tx_high_ack = ackno;
-
- seqp = hc->tx_seqt;
- while (before48(seqp->ccid2s_seq, ackno)) {
- seqp = seqp->ccid2s_next;
- if (seqp == hc->tx_seqh) {
- seqp = hc->tx_seqh->ccid2s_prev;
- break;
- }
- }
-
- /*
- * In slow-start, cwnd can increase up to a maximum of Ack Ratio/2
- * packets per acknowledgement. Rounding up avoids that cwnd is not
- * advanced when Ack Ratio is 1 and gives a slight edge otherwise.
- */
- if (hc->tx_cwnd < hc->tx_ssthresh)
- maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
-
- /* go through all ack vectors */
- list_for_each_entry(avp, &hc->tx_av_chunks, node) {
- /* go through this ack vector */
- for (; avp->len--; avp->vec++) {
- u64 ackno_end_rl = SUB48(ackno,
- dccp_ackvec_runlen(avp->vec));
-
- ccid2_pr_debug("ackvec %llu |%u,%u|\n",
- (unsigned long long)ackno,
- dccp_ackvec_state(avp->vec) >> 6,
- dccp_ackvec_runlen(avp->vec));
- /* if the seqno we are analyzing is larger than the
- * current ackno, then move towards the tail of our
- * seqnos.
- */
- while (after48(seqp->ccid2s_seq, ackno)) {
- if (seqp == hc->tx_seqt) {
- done = 1;
- break;
- }
- seqp = seqp->ccid2s_prev;
- }
- if (done)
- break;
-
- /* check all seqnos in the range of the vector
- * run length
- */
- while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
- const u8 state = dccp_ackvec_state(avp->vec);
-
- /* new packet received or marked */
- if (state != DCCPAV_NOT_RECEIVED &&
- !seqp->ccid2s_acked) {
- if (state == DCCPAV_ECN_MARKED)
- ccid2_congestion_event(sk,
- seqp);
- else
- ccid2_new_ack(sk, seqp,
- &maxincr);
-
- seqp->ccid2s_acked = 1;
- ccid2_pr_debug("Got ack for %llu\n",
- (unsigned long long)seqp->ccid2s_seq);
- hc->tx_pipe--;
- }
- if (seqp == hc->tx_seqt) {
- done = 1;
- break;
- }
- seqp = seqp->ccid2s_prev;
- }
- if (done)
- break;
-
- ackno = SUB48(ackno_end_rl, 1);
- }
- if (done)
- break;
- }
-
- /* The state about what is acked should be correct now
- * Check for NUMDUPACK
- */
- seqp = hc->tx_seqt;
- while (before48(seqp->ccid2s_seq, hc->tx_high_ack)) {
- seqp = seqp->ccid2s_next;
- if (seqp == hc->tx_seqh) {
- seqp = hc->tx_seqh->ccid2s_prev;
- break;
- }
- }
- done = 0;
- while (1) {
- if (seqp->ccid2s_acked) {
- done++;
- if (done == NUMDUPACK)
- break;
- }
- if (seqp == hc->tx_seqt)
- break;
- seqp = seqp->ccid2s_prev;
- }
-
- /* If there are at least 3 acknowledgements, anything unacknowledged
- * below the last sequence number is considered lost
- */
- if (done == NUMDUPACK) {
- struct ccid2_seq *last_acked = seqp;
-
- /* check for lost packets */
- while (1) {
- if (!seqp->ccid2s_acked) {
- ccid2_pr_debug("Packet lost: %llu\n",
- (unsigned long long)seqp->ccid2s_seq);
- /* XXX need to traverse from tail -> head in
- * order to detect multiple congestion events in
- * one ack vector.
- */
- ccid2_congestion_event(sk, seqp);
- hc->tx_pipe--;
- }
- if (seqp == hc->tx_seqt)
- break;
- seqp = seqp->ccid2s_prev;
- }
-
- hc->tx_seqt = last_acked;
- }
-
- /* trim acked packets in tail */
- while (hc->tx_seqt != hc->tx_seqh) {
- if (!hc->tx_seqt->ccid2s_acked)
- break;
-
- hc->tx_seqt = hc->tx_seqt->ccid2s_next;
- }
-
- /* restart RTO timer if not all outstanding data has been acked */
- if (hc->tx_pipe == 0)
- sk_stop_timer(sk, &hc->tx_rtotimer);
- else
- sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
-done:
- /* check if incoming Acks allow pending packets to be sent */
- if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
- dccp_tasklet_schedule(sk);
- dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
-}
-
-static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
-{
- struct ccid2_hc_tx_sock *hc = ccid_priv(ccid);
- struct dccp_sock *dp = dccp_sk(sk);
- u32 max_ratio;
-
- /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
- hc->tx_ssthresh = ~0U;
-
- /* Use larger initial windows (RFC 4341, section 5). */
- hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
- hc->tx_expected_wnd = hc->tx_cwnd;
-
- /* Make sure that Ack Ratio is enabled and within bounds. */
- max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
- if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio)
- dp->dccps_l_ack_ratio = max_ratio;
-
- /* XXX init ~ to window size... */
- if (ccid2_hc_tx_alloc_seq(hc))
- return -ENOMEM;
-
- hc->tx_rto = DCCP_TIMEOUT_INIT;
- hc->tx_rpdupack = -1;
- hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32;
- hc->tx_cwnd_used = 0;
- hc->sk = sk;
- timer_setup(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 0);
- INIT_LIST_HEAD(&hc->tx_av_chunks);
- return 0;
-}
-
-static void ccid2_hc_tx_exit(struct sock *sk)
-{
- struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
- int i;
-
- sk_stop_timer(sk, &hc->tx_rtotimer);
-
- for (i = 0; i < hc->tx_seqbufc; i++)
- kfree(hc->tx_seqbuf[i]);
- hc->tx_seqbufc = 0;
- dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
-}
-
-static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
-{
- struct ccid2_hc_rx_sock *hc = ccid2_hc_rx_sk(sk);
-
- if (!dccp_data_packet(skb))
- return;
-
- if (++hc->rx_num_data_pkts >= dccp_sk(sk)->dccps_r_ack_ratio) {
- dccp_send_ack(sk);
- hc->rx_num_data_pkts = 0;
- }
-}
-
-struct ccid_operations ccid2_ops = {
- .ccid_id = DCCPC_CCID2,
- .ccid_name = "TCP-like",
- .ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
- .ccid_hc_tx_init = ccid2_hc_tx_init,
- .ccid_hc_tx_exit = ccid2_hc_tx_exit,
- .ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
- .ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
- .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
- .ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
- .ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
- .ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
-};
-
-#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
-module_param(ccid2_debug, bool, 0644);
-MODULE_PARM_DESC(ccid2_debug, "Enable CCID-2 debug messages");
-#endif
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
deleted file mode 100644
index 330c7b4ec001..000000000000
--- a/net/dccp/ccids/ccid2.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
- */
-#ifndef _DCCP_CCID2_H_
-#define _DCCP_CCID2_H_
-
-#include <linux/timer.h>
-#include <linux/types.h>
-#include "../ccid.h"
-#include "../dccp.h"
-
-/*
- * CCID-2 timestamping faces the same issues as TCP timestamping.
- * Hence we reuse/share as much of the code as possible.
- */
-#define ccid2_jiffies32 ((u32)jiffies)
-
-/* NUMDUPACK parameter from RFC 4341, p. 6 */
-#define NUMDUPACK 3
-
-struct ccid2_seq {
- u64 ccid2s_seq;
- u32 ccid2s_sent;
- int ccid2s_acked;
- struct ccid2_seq *ccid2s_prev;
- struct ccid2_seq *ccid2s_next;
-};
-
-#define CCID2_SEQBUF_LEN 1024
-#define CCID2_SEQBUF_MAX 128
-
-/*
- * Multiple of congestion window to keep the sequence window at
- * (RFC 4340 7.5.2)
- */
-#define CCID2_WIN_CHANGE_FACTOR 5
-
-/**
- * struct ccid2_hc_tx_sock - CCID2 TX half connection
- * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
- * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
- * @tx_srtt: smoothed RTT estimate, scaled by 2^3
- * @tx_mdev: smoothed RTT variation, scaled by 2^2
- * @tx_mdev_max: maximum of @mdev during one flight
- * @tx_rttvar: moving average/maximum of @mdev_max
- * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
- * @tx_rtt_seq: to decay RTTVAR at most once per flight
- * @tx_cwnd_used: actually used cwnd, W_used of RFC 2861
- * @tx_expected_wnd: moving average of @tx_cwnd_used
- * @tx_cwnd_stamp: to track idle periods in CWV
- * @tx_lsndtime: last time (in jiffies) a data packet was sent
- * @tx_rpseq: last consecutive seqno
- * @tx_rpdupack: dupacks since rpseq
- * @tx_av_chunks: list of Ack Vectors received on current skb
- */
-struct ccid2_hc_tx_sock {
- u32 tx_cwnd;
- u32 tx_ssthresh;
- u32 tx_pipe;
- u32 tx_packets_acked;
- struct ccid2_seq *tx_seqbuf[CCID2_SEQBUF_MAX];
- int tx_seqbufc;
- struct ccid2_seq *tx_seqh;
- struct ccid2_seq *tx_seqt;
-
- /* RTT measurement: variables/principles are the same as in TCP */
- u32 tx_srtt,
- tx_mdev,
- tx_mdev_max,
- tx_rttvar,
- tx_rto;
- u64 tx_rtt_seq:48;
- struct timer_list tx_rtotimer;
- struct sock *sk;
-
- /* Congestion Window validation (optional, RFC 2861) */
- u32 tx_cwnd_used,
- tx_expected_wnd,
- tx_cwnd_stamp,
- tx_lsndtime;
-
- u64 tx_rpseq;
- int tx_rpdupack;
- u32 tx_last_cong;
- u64 tx_high_ack;
- struct list_head tx_av_chunks;
-};
-
-static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hc)
-{
- return hc->tx_pipe >= hc->tx_cwnd;
-}
-
-/*
- * Convert RFC 3390 larger initial window into an equivalent number of packets.
- * This is based on the numbers specified in RFC 5681, 3.1.
- */
-static inline u32 rfc3390_bytes_to_packets(const u32 smss)
-{
- return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3);
-}
-
-/**
- * struct ccid2_hc_rx_sock - Receiving end of CCID-2 half-connection
- * @rx_num_data_pkts: number of data packets received since last feedback
- */
-struct ccid2_hc_rx_sock {
- u32 rx_num_data_pkts;
-};
-
-static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk)
-{
- return ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid);
-}
-
-static inline struct ccid2_hc_rx_sock *ccid2_hc_rx_sk(const struct sock *sk)
-{
- return ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid);
-}
-#endif /* _DCCP_CCID2_H_ */
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
deleted file mode 100644
index f349d16dd8f6..000000000000
--- a/net/dccp/ccids/ccid3.c
+++ /dev/null
@@ -1,866 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
- * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
- * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
- *
- * An implementation of the DCCP protocol
- *
- * This code has been developed by the University of Waikato WAND
- * research group. For further information please see https://www.wand.net.nz/
- *
- * This code also uses code from Lulea University, rereleased as GPL by its
- * authors:
- * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
- *
- * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
- * and to make it work as a loadable module in the DCCP stack written by
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
- *
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-#include "../dccp.h"
-#include "ccid3.h"
-
-#include <linux/unaligned.h>
-
-#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
-static bool ccid3_debug;
-#define ccid3_pr_debug(format, a...) DCCP_PR_DEBUG(ccid3_debug, format, ##a)
-#else
-#define ccid3_pr_debug(format, a...)
-#endif
-
-/*
- * Transmitter Half-Connection Routines
- */
-#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
-static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
-{
- static const char *const ccid3_state_names[] = {
- [TFRC_SSTATE_NO_SENT] = "NO_SENT",
- [TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
- [TFRC_SSTATE_FBACK] = "FBACK",
- };
-
- return ccid3_state_names[state];
-}
-#endif
-
-static void ccid3_hc_tx_set_state(struct sock *sk,
- enum ccid3_hc_tx_states state)
-{
- struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
- enum ccid3_hc_tx_states oldstate = hc->tx_state;
-
- ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
- dccp_role(sk), sk, ccid3_tx_state_name(oldstate),
- ccid3_tx_state_name(state));
- WARN_ON(state == oldstate);
- hc->tx_state = state;
-}
-
-/*
- * Compute the initial sending rate X_init in the manner of RFC 3390:
- *
- * X_init = min(4 * s, max(2 * s, 4380 bytes)) / RTT
- *
- * Note that RFC 3390 uses MSS, RFC 4342 refers to RFC 3390, and rfc3448bis
- * (rev-02) clarifies the use of RFC 3390 with regard to the above formula.
- * For consistency with other parts of the code, X_init is scaled by 2^6.
- */
-static inline u64 rfc3390_initial_rate(struct sock *sk)
-{
- const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
- const __u32 w_init = clamp_t(__u32, 4380U, 2 * hc->tx_s, 4 * hc->tx_s);
-
- return scaled_div(w_init << 6, hc->tx_rtt);
-}
-
-/**
- * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst
- * @hc: socket to have the send interval updated
- *
- * This respects the granularity of X_inst (64 * bytes/second).
- */
-static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc)
-{
- hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x);
-
- DCCP_BUG_ON(hc->tx_t_ipi == 0);
- ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi,
- hc->tx_s, (unsigned int)(hc->tx_x >> 6));
-}
-
-static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now)
-{
- u32 delta = ktime_us_delta(now, hc->tx_t_last_win_count);
-
- return delta / hc->tx_rtt;
-}
-
-/**
- * ccid3_hc_tx_update_x - Update allowed sending rate X
- * @sk: socket to be updated
- * @stamp: most recent time if available - can be left NULL.
- *
- * This function tracks draft rfc3448bis, check there for latest details.
- *
- * Note: X and X_recv are both stored in units of 64 * bytes/second, to support
- * fine-grained resolution of sending rates. This requires scaling by 2^6
- * throughout the code. Only X_calc is unscaled (in bytes/second).
- *
- */
-static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
-{
- struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
- __u64 min_rate = 2 * hc->tx_x_recv;
- const __u64 old_x = hc->tx_x;
- ktime_t now = stamp ? *stamp : ktime_get_real();
-
- /*
- * Handle IDLE periods: do not reduce below RFC3390 initial sending rate
- * when idling [RFC 4342, 5.1]. Definition of idling is from rfc3448bis:
- * a sender is idle if it has not sent anything over a 2-RTT-period.
- * For consistency with X and X_recv, min_rate is also scaled by 2^6.
- */
- if (ccid3_hc_tx_idle_rtt(hc, now) >= 2) {
- min_rate = rfc3390_initial_rate(sk);
- min_rate = max(min_rate, 2 * hc->tx_x_recv);
- }
-
- if (hc->tx_p > 0) {
-
- hc->tx_x = min(((__u64)hc->tx_x_calc) << 6, min_rate);
- hc->tx_x = max(hc->tx_x, (((__u64)hc->tx_s) << 6) / TFRC_T_MBI);
-
- } else if (ktime_us_delta(now, hc->tx_t_ld) - (s64)hc->tx_rtt >= 0) {
-
- hc->tx_x = min(2 * hc->tx_x, min_rate);
- hc->tx_x = max(hc->tx_x,
- scaled_div(((__u64)hc->tx_s) << 6, hc->tx_rtt));
- hc->tx_t_ld = now;
- }
-
- if (hc->tx_x != old_x) {
- ccid3_pr_debug("X_prev=%u, X_now=%u, X_calc=%u, "
- "X_recv=%u\n", (unsigned int)(old_x >> 6),
- (unsigned int)(hc->tx_x >> 6), hc->tx_x_calc,
- (unsigned int)(hc->tx_x_recv >> 6));
-
- ccid3_update_send_interval(hc);
- }
-}
-
-/**
- * ccid3_hc_tx_update_s - Track the mean packet size `s'
- * @hc: socket to be updated
- * @len: DCCP packet payload size in bytes
- *
- * cf. RFC 4342, 5.3 and RFC 3448, 4.1
- */
-static inline void ccid3_hc_tx_update_s(struct ccid3_hc_tx_sock *hc, int len)
-{
- const u16 old_s = hc->tx_s;
-
- hc->tx_s = tfrc_ewma(hc->tx_s, len, 9);
-
- if (hc->tx_s != old_s)
- ccid3_update_send_interval(hc);
-}
-
-/*
- * Update Window Counter using the algorithm from [RFC 4342, 8.1].
- * As elsewhere, RTT > 0 is assumed by using dccp_sample_rtt().
- */
-static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hc,
- ktime_t now)
-{
- u32 delta = ktime_us_delta(now, hc->tx_t_last_win_count),
- quarter_rtts = (4 * delta) / hc->tx_rtt;
-
- if (quarter_rtts > 0) {
- hc->tx_t_last_win_count = now;
- hc->tx_last_win_count += min(quarter_rtts, 5U);
- hc->tx_last_win_count &= 0xF; /* mod 16 */
- }
-}
-
-static void ccid3_hc_tx_no_feedback_timer(struct timer_list *t)
-{
- struct ccid3_hc_tx_sock *hc = from_timer(hc, t, tx_no_feedback_timer);
- struct sock *sk = hc->sk;
- unsigned long t_nfb = USEC_PER_SEC / 5;
-
- bh_lock_sock(sk);
- if (sock_owned_by_user(sk)) {
- /* Try again later. */
- /* XXX: set some sensible MIB */
- goto restart_timer;
- }
-
- ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk,
- ccid3_tx_state_name(hc->tx_state));
-
- /* Ignore and do not restart after leaving the established state */
- if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
- goto out;
-
- /* Reset feedback state to "no feedback received" */
- if (hc->tx_state == TFRC_SSTATE_FBACK)
- ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
-
- /*
- * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
- * RTO is 0 if and only if no feedback has been received yet.
- */
- if (hc->tx_t_rto == 0 || hc->tx_p == 0) {
-
- /* halve send rate directly */
- hc->tx_x = max(hc->tx_x / 2,
- (((__u64)hc->tx_s) << 6) / TFRC_T_MBI);
- ccid3_update_send_interval(hc);
- } else {
- /*
- * Modify the cached value of X_recv
- *
- * If (X_calc > 2 * X_recv)
- * X_recv = max(X_recv / 2, s / (2 * t_mbi));
- * Else
- * X_recv = X_calc / 4;
- *
- * Note that X_recv is scaled by 2^6 while X_calc is not
- */
- if (hc->tx_x_calc > (hc->tx_x_recv >> 5))
- hc->tx_x_recv =
- max(hc->tx_x_recv / 2,
- (((__u64)hc->tx_s) << 6) / (2*TFRC_T_MBI));
- else {
- hc->tx_x_recv = hc->tx_x_calc;
- hc->tx_x_recv <<= 4;
- }
- ccid3_hc_tx_update_x(sk, NULL);
- }
- ccid3_pr_debug("Reduced X to %llu/64 bytes/sec\n",
- (unsigned long long)hc->tx_x);
-
- /*
- * Set new timeout for the nofeedback timer.
- * See comments in packet_recv() regarding the value of t_RTO.
- */
- if (unlikely(hc->tx_t_rto == 0)) /* no feedback received yet */
- t_nfb = TFRC_INITIAL_TIMEOUT;
- else
- t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi);
-
-restart_timer:
- sk_reset_timer(sk, &hc->tx_no_feedback_timer,
- jiffies + usecs_to_jiffies(t_nfb));
-out:
- bh_unlock_sock(sk);
- sock_put(sk);
-}
-
-/**
- * ccid3_hc_tx_send_packet - Delay-based dequeueing of TX packets
- * @sk: socket to send packet from
- * @skb: next packet candidate to send on @sk
- *
- * This function uses the convention of ccid_packet_dequeue_eval() and
- * returns a millisecond-delay value between 0 and t_mbi = 64000 msec.
- */
-static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
- ktime_t now = ktime_get_real();
- s64 delay;
-
- /*
- * This function is called only for Data and DataAck packets. Sending
- * zero-sized Data(Ack)s is theoretically possible, but for congestion
- * control this case is pathological - ignore it.
- */
- if (unlikely(skb->len == 0))
- return -EBADMSG;
-
- if (hc->tx_state == TFRC_SSTATE_NO_SENT) {
- sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies +
- usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
- hc->tx_last_win_count = 0;
- hc->tx_t_last_win_count = now;
-
- /* Set t_0 for initial packet */
- hc->tx_t_nom = now;
-
- hc->tx_s = skb->len;
-
- /*
- * Use initial RTT sample when available: recommended by erratum
- * to RFC 4342. This implements the initialisation procedure of
- * draft rfc3448bis, section 4.2. Remember, X is scaled by 2^6.
- */
- if (dp->dccps_syn_rtt) {
- ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt);
- hc->tx_rtt = dp->dccps_syn_rtt;
- hc->tx_x = rfc3390_initial_rate(sk);
- hc->tx_t_ld = now;
- } else {
- /*
- * Sender does not have RTT sample:
- * - set fallback RTT (RFC 4340, 3.4) since a RTT value
- * is needed in several parts (e.g. window counter);
- * - set sending rate X_pps = 1pps as per RFC 3448, 4.2.
- */
- hc->tx_rtt = DCCP_FALLBACK_RTT;
- hc->tx_x = hc->tx_s;
- hc->tx_x <<= 6;
- }
- ccid3_update_send_interval(hc);
-
- ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
-
- } else {
- delay = ktime_us_delta(hc->tx_t_nom, now);
- ccid3_pr_debug("delay=%ld\n", (long)delay);
- /*
- * Scheduling of packet transmissions (RFC 5348, 8.3)
- *
- * if (t_now > t_nom - delta)
- * // send the packet now
- * else
- * // send the packet in (t_nom - t_now) milliseconds.
- */
- if (delay >= TFRC_T_DELTA)
- return (u32)delay / USEC_PER_MSEC;
-
- ccid3_hc_tx_update_win_count(hc, now);
- }
-
- /* prepare to send now (add options etc.) */
- dp->dccps_hc_tx_insert_options = 1;
- DCCP_SKB_CB(skb)->dccpd_ccval = hc->tx_last_win_count;
-
- /* set the nominal send time for the next following packet */
- hc->tx_t_nom = ktime_add_us(hc->tx_t_nom, hc->tx_t_ipi);
- return CCID_PACKET_SEND_AT_ONCE;
-}
-
-static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len)
-{
- struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
-
- ccid3_hc_tx_update_s(hc, len);
-
- if (tfrc_tx_hist_add(&hc->tx_hist, dccp_sk(sk)->dccps_gss))
- DCCP_CRIT("packet history - out of memory!");
-}
-
-static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
-{
- struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
- struct tfrc_tx_hist_entry *acked;
- ktime_t now;
- unsigned long t_nfb;
- u32 r_sample;
-
- /* we are only interested in ACKs */
- if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
- DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
- return;
- /*
- * Locate the acknowledged packet in the TX history.
- *
- * Returning "entry not found" here can for instance happen when
- * - the host has not sent out anything (e.g. a passive server),
- * - the Ack is outdated (packet with higher Ack number was received),
- * - it is a bogus Ack (for a packet not sent on this connection).
- */
- acked = tfrc_tx_hist_find_entry(hc->tx_hist, dccp_hdr_ack_seq(skb));
- if (acked == NULL)
- return;
- /* For the sake of RTT sampling, ignore/remove all older entries */
- tfrc_tx_hist_purge(&acked->next);
-
- /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */
- now = ktime_get_real();
- r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp));
- hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9);
-
- /*
- * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
- */
- if (hc->tx_state == TFRC_SSTATE_NO_FBACK) {
- ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
-
- if (hc->tx_t_rto == 0) {
- /*
- * Initial feedback packet: Larger Initial Windows (4.2)
- */
- hc->tx_x = rfc3390_initial_rate(sk);
- hc->tx_t_ld = now;
-
- ccid3_update_send_interval(hc);
-
- goto done_computing_x;
- } else if (hc->tx_p == 0) {
- /*
- * First feedback after nofeedback timer expiry (4.3)
- */
- goto done_computing_x;
- }
- }
-
- /* Update sending rate (step 4 of [RFC 3448, 4.3]) */
- if (hc->tx_p > 0)
- hc->tx_x_calc = tfrc_calc_x(hc->tx_s, hc->tx_rtt, hc->tx_p);
- ccid3_hc_tx_update_x(sk, &now);
-
-done_computing_x:
- ccid3_pr_debug("%s(%p), RTT=%uus (sample=%uus), s=%u, "
- "p=%u, X_calc=%u, X_recv=%u, X=%u\n",
- dccp_role(sk), sk, hc->tx_rtt, r_sample,
- hc->tx_s, hc->tx_p, hc->tx_x_calc,
- (unsigned int)(hc->tx_x_recv >> 6),
- (unsigned int)(hc->tx_x >> 6));
-
- /* unschedule no feedback timer */
- sk_stop_timer(sk, &hc->tx_no_feedback_timer);
-
- /*
- * As we have calculated new ipi, delta, t_nom it is possible
- * that we now can send a packet, so wake up dccp_wait_for_ccid
- */
- sk->sk_write_space(sk);
-
- /*
- * Update timeout interval for the nofeedback timer. In order to control
- * rate halving on networks with very low RTTs (<= 1 ms), use per-route
- * tunable RTAX_RTO_MIN value as the lower bound.
- */
- hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt,
- USEC_PER_SEC/HZ * tcp_rto_min(sk));
- /*
- * Schedule no feedback timer to expire in
- * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
- */
- t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi);
-
- ccid3_pr_debug("%s(%p), Scheduled no feedback timer to "
- "expire in %lu jiffies (%luus)\n",
- dccp_role(sk), sk, usecs_to_jiffies(t_nfb), t_nfb);
-
- sk_reset_timer(sk, &hc->tx_no_feedback_timer,
- jiffies + usecs_to_jiffies(t_nfb));
-}
-
-static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type,
- u8 option, u8 *optval, u8 optlen)
-{
- struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
- __be32 opt_val;
-
- switch (option) {
- case TFRC_OPT_RECEIVE_RATE:
- case TFRC_OPT_LOSS_EVENT_RATE:
- /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */
- if (packet_type == DCCP_PKT_DATA)
- break;
- if (unlikely(optlen != 4)) {
- DCCP_WARN("%s(%p), invalid len %d for %u\n",
- dccp_role(sk), sk, optlen, option);
- return -EINVAL;
- }
- opt_val = ntohl(get_unaligned((__be32 *)optval));
-
- if (option == TFRC_OPT_RECEIVE_RATE) {
- /* Receive Rate is kept in units of 64 bytes/second */
- hc->tx_x_recv = opt_val;
- hc->tx_x_recv <<= 6;
-
- ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
- dccp_role(sk), sk, opt_val);
- } else {
- /* Update the fixpoint Loss Event Rate fraction */
- hc->tx_p = tfrc_invert_loss_event_rate(opt_val);
-
- ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
- dccp_role(sk), sk, opt_val);
- }
- }
- return 0;
-}
-
-static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
-{
- struct ccid3_hc_tx_sock *hc = ccid_priv(ccid);
-
- hc->tx_state = TFRC_SSTATE_NO_SENT;
- hc->tx_hist = NULL;
- hc->sk = sk;
- timer_setup(&hc->tx_no_feedback_timer,
- ccid3_hc_tx_no_feedback_timer, 0);
- return 0;
-}
-
-static void ccid3_hc_tx_exit(struct sock *sk)
-{
- struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
-
- sk_stop_timer(sk, &hc->tx_no_feedback_timer);
- tfrc_tx_hist_purge(&hc->tx_hist);
-}
-
-static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
-{
- info->tcpi_rto = ccid3_hc_tx_sk(sk)->tx_t_rto;
- info->tcpi_rtt = ccid3_hc_tx_sk(sk)->tx_rtt;
-}
-
-static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
- u32 __user *optval, int __user *optlen)
-{
- const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
- struct tfrc_tx_info tfrc;
- const void *val;
-
- switch (optname) {
- case DCCP_SOCKOPT_CCID_TX_INFO:
- if (len < sizeof(tfrc))
- return -EINVAL;
- memset(&tfrc, 0, sizeof(tfrc));
- tfrc.tfrctx_x = hc->tx_x;
- tfrc.tfrctx_x_recv = hc->tx_x_recv;
- tfrc.tfrctx_x_calc = hc->tx_x_calc;
- tfrc.tfrctx_rtt = hc->tx_rtt;
- tfrc.tfrctx_p = hc->tx_p;
- tfrc.tfrctx_rto = hc->tx_t_rto;
- tfrc.tfrctx_ipi = hc->tx_t_ipi;
- len = sizeof(tfrc);
- val = &tfrc;
- break;
- default:
- return -ENOPROTOOPT;
- }
-
- if (put_user(len, optlen) || copy_to_user(optval, val, len))
- return -EFAULT;
-
- return 0;
-}
-
-/*
- * Receiver Half-Connection Routines
- */
-
-/* CCID3 feedback types */
-enum ccid3_fback_type {
- CCID3_FBACK_NONE = 0,
- CCID3_FBACK_INITIAL,
- CCID3_FBACK_PERIODIC,
- CCID3_FBACK_PARAM_CHANGE
-};
-
-#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
-static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
-{
- static const char *const ccid3_rx_state_names[] = {
- [TFRC_RSTATE_NO_DATA] = "NO_DATA",
- [TFRC_RSTATE_DATA] = "DATA",
- };
-
- return ccid3_rx_state_names[state];
-}
-#endif
-
-static void ccid3_hc_rx_set_state(struct sock *sk,
- enum ccid3_hc_rx_states state)
-{
- struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
- enum ccid3_hc_rx_states oldstate = hc->rx_state;
-
- ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
- dccp_role(sk), sk, ccid3_rx_state_name(oldstate),
- ccid3_rx_state_name(state));
- WARN_ON(state == oldstate);
- hc->rx_state = state;
-}
-
-static void ccid3_hc_rx_send_feedback(struct sock *sk,
- const struct sk_buff *skb,
- enum ccid3_fback_type fbtype)
-{
- struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
- struct dccp_sock *dp = dccp_sk(sk);
- ktime_t now = ktime_get();
- s64 delta = 0;
-
- switch (fbtype) {
- case CCID3_FBACK_INITIAL:
- hc->rx_x_recv = 0;
- hc->rx_pinv = ~0U; /* see RFC 4342, 8.5 */
- break;
- case CCID3_FBACK_PARAM_CHANGE:
- /*
- * When parameters change (new loss or p > p_prev), we do not
- * have a reliable estimate for R_m of [RFC 3448, 6.2] and so
- * need to reuse the previous value of X_recv. However, when
- * X_recv was 0 (due to early loss), this would kill X down to
- * s/t_mbi (i.e. one packet in 64 seconds).
- * To avoid such drastic reduction, we approximate X_recv as
- * the number of bytes since last feedback.
- * This is a safe fallback, since X is bounded above by X_calc.
- */
- if (hc->rx_x_recv > 0)
- break;
- fallthrough;
- case CCID3_FBACK_PERIODIC:
- delta = ktime_us_delta(now, hc->rx_tstamp_last_feedback);
- if (delta <= 0)
- delta = 1;
- hc->rx_x_recv = scaled_div32(hc->rx_bytes_recv, delta);
- break;
- default:
- return;
- }
-
- ccid3_pr_debug("Interval %lldusec, X_recv=%u, 1/p=%u\n", delta,
- hc->rx_x_recv, hc->rx_pinv);
-
- hc->rx_tstamp_last_feedback = now;
- hc->rx_last_counter = dccp_hdr(skb)->dccph_ccval;
- hc->rx_bytes_recv = 0;
-
- dp->dccps_hc_rx_insert_options = 1;
- dccp_send_ack(sk);
-}
-
-static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
-{
- const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
- __be32 x_recv, pinv;
-
- if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
- return 0;
-
- if (dccp_packet_without_ack(skb))
- return 0;
-
- x_recv = htonl(hc->rx_x_recv);
- pinv = htonl(hc->rx_pinv);
-
- if (dccp_insert_option(skb, TFRC_OPT_LOSS_EVENT_RATE,
- &pinv, sizeof(pinv)) ||
- dccp_insert_option(skb, TFRC_OPT_RECEIVE_RATE,
- &x_recv, sizeof(x_recv)))
- return -1;
-
- return 0;
-}
-
-/**
- * ccid3_first_li - Implements [RFC 5348, 6.3.1]
- * @sk: socket to calculate loss interval for
- *
- * Determine the length of the first loss interval via inverse lookup.
- * Assume that X_recv can be computed by the throughput equation
- * s
- * X_recv = --------
- * R * fval
- * Find some p such that f(p) = fval; return 1/p (scaled).
- */
-static u32 ccid3_first_li(struct sock *sk)
-{
- struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
- u32 x_recv, p;
- s64 delta;
- u64 fval;
-
- if (hc->rx_rtt == 0) {
- DCCP_WARN("No RTT estimate available, using fallback RTT\n");
- hc->rx_rtt = DCCP_FALLBACK_RTT;
- }
-
- delta = ktime_us_delta(ktime_get(), hc->rx_tstamp_last_feedback);
- if (delta <= 0)
- delta = 1;
- x_recv = scaled_div32(hc->rx_bytes_recv, delta);
- if (x_recv == 0) { /* would also trigger divide-by-zero */
- DCCP_WARN("X_recv==0\n");
- if (hc->rx_x_recv == 0) {
- DCCP_BUG("stored value of X_recv is zero");
- return ~0U;
- }
- x_recv = hc->rx_x_recv;
- }
-
- fval = scaled_div(hc->rx_s, hc->rx_rtt);
- fval = scaled_div32(fval, x_recv);
- p = tfrc_calc_x_reverse_lookup(fval);
-
- ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
- "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
-
- return p == 0 ? ~0U : scaled_div(1, p);
-}
-
-static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
-{
- struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
- enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
- const u64 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
- const bool is_data_packet = dccp_data_packet(skb);
-
- if (unlikely(hc->rx_state == TFRC_RSTATE_NO_DATA)) {
- if (is_data_packet) {
- const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
- do_feedback = CCID3_FBACK_INITIAL;
- ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
- hc->rx_s = payload;
- /*
- * Not necessary to update rx_bytes_recv here,
- * since X_recv = 0 for the first feedback packet (cf.
- * RFC 3448, 6.3) -- gerrit
- */
- }
- goto update_records;
- }
-
- if (tfrc_rx_hist_duplicate(&hc->rx_hist, skb))
- return; /* done receiving */
-
- if (is_data_packet) {
- const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
- /*
- * Update moving-average of s and the sum of received payload bytes
- */
- hc->rx_s = tfrc_ewma(hc->rx_s, payload, 9);
- hc->rx_bytes_recv += payload;
- }
-
- /*
- * Perform loss detection and handle pending losses
- */
- if (tfrc_rx_handle_loss(&hc->rx_hist, &hc->rx_li_hist,
- skb, ndp, ccid3_first_li, sk)) {
- do_feedback = CCID3_FBACK_PARAM_CHANGE;
- goto done_receiving;
- }
-
- if (tfrc_rx_hist_loss_pending(&hc->rx_hist))
- return; /* done receiving */
-
- /*
- * Handle data packets: RTT sampling and monitoring p
- */
- if (unlikely(!is_data_packet))
- goto update_records;
-
- if (!tfrc_lh_is_initialised(&hc->rx_li_hist)) {
- const u32 sample = tfrc_rx_hist_sample_rtt(&hc->rx_hist, skb);
- /*
- * Empty loss history: no loss so far, hence p stays 0.
- * Sample RTT values, since an RTT estimate is required for the
- * computation of p when the first loss occurs; RFC 3448, 6.3.1.
- */
- if (sample != 0)
- hc->rx_rtt = tfrc_ewma(hc->rx_rtt, sample, 9);
-
- } else if (tfrc_lh_update_i_mean(&hc->rx_li_hist, skb)) {
- /*
- * Step (3) of [RFC 3448, 6.1]: Recompute I_mean and, if I_mean
- * has decreased (resp. p has increased), send feedback now.
- */
- do_feedback = CCID3_FBACK_PARAM_CHANGE;
- }
-
- /*
- * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
- */
- if (SUB16(dccp_hdr(skb)->dccph_ccval, hc->rx_last_counter) > 3)
- do_feedback = CCID3_FBACK_PERIODIC;
-
-update_records:
- tfrc_rx_hist_add_packet(&hc->rx_hist, skb, ndp);
-
-done_receiving:
- if (do_feedback)
- ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
-}
-
-static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
-{
- struct ccid3_hc_rx_sock *hc = ccid_priv(ccid);
-
- hc->rx_state = TFRC_RSTATE_NO_DATA;
- tfrc_lh_init(&hc->rx_li_hist);
- return tfrc_rx_hist_alloc(&hc->rx_hist);
-}
-
-static void ccid3_hc_rx_exit(struct sock *sk)
-{
- struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
-
- tfrc_rx_hist_purge(&hc->rx_hist);
- tfrc_lh_cleanup(&hc->rx_li_hist);
-}
-
-static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
-{
- info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->rx_state;
- info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
- info->tcpi_rcv_rtt = ccid3_hc_rx_sk(sk)->rx_rtt;
-}
-
-static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
- u32 __user *optval, int __user *optlen)
-{
- const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
- struct tfrc_rx_info rx_info;
- const void *val;
-
- switch (optname) {
- case DCCP_SOCKOPT_CCID_RX_INFO:
- if (len < sizeof(rx_info))
- return -EINVAL;
- rx_info.tfrcrx_x_recv = hc->rx_x_recv;
- rx_info.tfrcrx_rtt = hc->rx_rtt;
- rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hc->rx_pinv);
- len = sizeof(rx_info);
- val = &rx_info;
- break;
- default:
- return -ENOPROTOOPT;
- }
-
- if (put_user(len, optlen) || copy_to_user(optval, val, len))
- return -EFAULT;
-
- return 0;
-}
-
-struct ccid_operations ccid3_ops = {
- .ccid_id = DCCPC_CCID3,
- .ccid_name = "TCP-Friendly Rate Control",
- .ccid_hc_tx_obj_size = sizeof(struct ccid3_hc_tx_sock),
- .ccid_hc_tx_init = ccid3_hc_tx_init,
- .ccid_hc_tx_exit = ccid3_hc_tx_exit,
- .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet,
- .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent,
- .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv,
- .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options,
- .ccid_hc_rx_obj_size = sizeof(struct ccid3_hc_rx_sock),
- .ccid_hc_rx_init = ccid3_hc_rx_init,
- .ccid_hc_rx_exit = ccid3_hc_rx_exit,
- .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
- .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv,
- .ccid_hc_rx_get_info = ccid3_hc_rx_get_info,
- .ccid_hc_tx_get_info = ccid3_hc_tx_get_info,
- .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt,
- .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt,
-};
-
-#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
-module_param(ccid3_debug, bool, 0644);
-MODULE_PARM_DESC(ccid3_debug, "Enable CCID-3 debug messages");
-#endif
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
deleted file mode 100644
index 02e0fc9f6334..000000000000
--- a/net/dccp/ccids/ccid3.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
- * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
- *
- * An implementation of the DCCP protocol
- *
- * This code has been developed by the University of Waikato WAND
- * research group. For further information please see https://www.wand.net.nz/
- * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
- *
- * This code also uses code from Lulea University, rereleased as GPL by its
- * authors:
- * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
- *
- * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
- * and to make it work as a loadable module in the DCCP stack written by
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
- *
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-#ifndef _DCCP_CCID3_H_
-#define _DCCP_CCID3_H_
-
-#include <linux/ktime.h>
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/tfrc.h>
-#include "lib/tfrc.h"
-#include "../ccid.h"
-
-/* Two seconds as per RFC 5348, 4.2 */
-#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
-
-/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
-#define TFRC_T_MBI 64
-
-/*
- * The t_delta parameter (RFC 5348, 8.3): delays of less than %USEC_PER_MSEC are
- * rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
- * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
- * resolution of HZ < 500 means that the error is below one timer tick (t_gran)
- * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ).
- */
-#if (HZ >= 500)
-# define TFRC_T_DELTA USEC_PER_MSEC
-#else
-# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ))
-#endif
-
-enum ccid3_options {
- TFRC_OPT_LOSS_EVENT_RATE = 192,
- TFRC_OPT_LOSS_INTERVALS = 193,
- TFRC_OPT_RECEIVE_RATE = 194,
-};
-
-/* TFRC sender states */
-enum ccid3_hc_tx_states {
- TFRC_SSTATE_NO_SENT = 1,
- TFRC_SSTATE_NO_FBACK,
- TFRC_SSTATE_FBACK,
-};
-
-/**
- * struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
- * @tx_x: Current sending rate in 64 * bytes per second
- * @tx_x_recv: Receive rate in 64 * bytes per second
- * @tx_x_calc: Calculated rate in bytes per second
- * @tx_rtt: Estimate of current round trip time in usecs
- * @tx_p: Current loss event rate (0-1) scaled by 1000000
- * @tx_s: Packet size in bytes
- * @tx_t_rto: Nofeedback Timer setting in usecs
- * @tx_t_ipi: Interpacket (send) interval (RFC 3448, 4.6) in usecs
- * @tx_state: Sender state, one of %ccid3_hc_tx_states
- * @tx_last_win_count: Last window counter sent
- * @tx_t_last_win_count: Timestamp of earliest packet
- * with last_win_count value sent
- * @tx_no_feedback_timer: Handle to no feedback timer
- * @tx_t_ld: Time last doubled during slow start
- * @tx_t_nom: Nominal send time of next packet
- * @tx_hist: Packet history
- */
-struct ccid3_hc_tx_sock {
- u64 tx_x;
- u64 tx_x_recv;
- u32 tx_x_calc;
- u32 tx_rtt;
- u32 tx_p;
- u32 tx_t_rto;
- u32 tx_t_ipi;
- u16 tx_s;
- enum ccid3_hc_tx_states tx_state:8;
- u8 tx_last_win_count;
- ktime_t tx_t_last_win_count;
- struct timer_list tx_no_feedback_timer;
- struct sock *sk;
- ktime_t tx_t_ld;
- ktime_t tx_t_nom;
- struct tfrc_tx_hist_entry *tx_hist;
-};
-
-static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
-{
- struct ccid3_hc_tx_sock *hctx = ccid_priv(dccp_sk(sk)->dccps_hc_tx_ccid);
- BUG_ON(hctx == NULL);
- return hctx;
-}
-
-/* TFRC receiver states */
-enum ccid3_hc_rx_states {
- TFRC_RSTATE_NO_DATA = 1,
- TFRC_RSTATE_DATA,
-};
-
-/**
- * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
- * @rx_last_counter: Tracks window counter (RFC 4342, 8.1)
- * @rx_state: Receiver state, one of %ccid3_hc_rx_states
- * @rx_bytes_recv: Total sum of DCCP payload bytes
- * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3)
- * @rx_rtt: Receiver estimate of RTT
- * @rx_tstamp_last_feedback: Time at which last feedback was sent
- * @rx_hist: Packet history (loss detection + RTT sampling)
- * @rx_li_hist: Loss Interval database
- * @rx_s: Received packet size in bytes
- * @rx_pinv: Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
- */
-struct ccid3_hc_rx_sock {
- u8 rx_last_counter:4;
- enum ccid3_hc_rx_states rx_state:8;
- u32 rx_bytes_recv;
- u32 rx_x_recv;
- u32 rx_rtt;
- ktime_t rx_tstamp_last_feedback;
- struct tfrc_rx_hist rx_hist;
- struct tfrc_loss_hist rx_li_hist;
- u16 rx_s;
-#define rx_pinv rx_li_hist.i_mean
-};
-
-static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
-{
- struct ccid3_hc_rx_sock *hcrx = ccid_priv(dccp_sk(sk)->dccps_hc_rx_ccid);
- BUG_ON(hcrx == NULL);
- return hcrx;
-}
-
-#endif /* _DCCP_CCID3_H_ */
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
deleted file mode 100644
index da95319842bb..000000000000
--- a/net/dccp/ccids/lib/loss_interval.c
+++ /dev/null
@@ -1,184 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
- * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
- * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-#include <net/sock.h>
-#include "tfrc.h"
-
-static struct kmem_cache *tfrc_lh_slab __read_mostly;
-/* Loss Interval weights from [RFC 3448, 5.4], scaled by 10 */
-static const int tfrc_lh_weights[NINTERVAL] = { 10, 10, 10, 10, 8, 6, 4, 2 };
-
-/* implements LIFO semantics on the array */
-static inline u8 LIH_INDEX(const u8 ctr)
-{
- return LIH_SIZE - 1 - (ctr % LIH_SIZE);
-}
-
-/* the `counter' index always points at the next entry to be populated */
-static inline struct tfrc_loss_interval *tfrc_lh_peek(struct tfrc_loss_hist *lh)
-{
- return lh->counter ? lh->ring[LIH_INDEX(lh->counter - 1)] : NULL;
-}
-
-/* given i with 0 <= i <= k, return I_i as per the rfc3448bis notation */
-static inline u32 tfrc_lh_get_interval(struct tfrc_loss_hist *lh, const u8 i)
-{
- BUG_ON(i >= lh->counter);
- return lh->ring[LIH_INDEX(lh->counter - i - 1)]->li_length;
-}
-
-/*
- * On-demand allocation and de-allocation of entries
- */
-static struct tfrc_loss_interval *tfrc_lh_demand_next(struct tfrc_loss_hist *lh)
-{
- if (lh->ring[LIH_INDEX(lh->counter)] == NULL)
- lh->ring[LIH_INDEX(lh->counter)] = kmem_cache_alloc(tfrc_lh_slab,
- GFP_ATOMIC);
- return lh->ring[LIH_INDEX(lh->counter)];
-}
-
-void tfrc_lh_cleanup(struct tfrc_loss_hist *lh)
-{
- if (!tfrc_lh_is_initialised(lh))
- return;
-
- for (lh->counter = 0; lh->counter < LIH_SIZE; lh->counter++)
- if (lh->ring[LIH_INDEX(lh->counter)] != NULL) {
- kmem_cache_free(tfrc_lh_slab,
- lh->ring[LIH_INDEX(lh->counter)]);
- lh->ring[LIH_INDEX(lh->counter)] = NULL;
- }
-}
-
-static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
-{
- u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0;
- int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */
-
- if (k <= 0)
- return;
-
- for (i = 0; i <= k; i++) {
- i_i = tfrc_lh_get_interval(lh, i);
-
- if (i < k) {
- i_tot0 += i_i * tfrc_lh_weights[i];
- w_tot += tfrc_lh_weights[i];
- }
- if (i > 0)
- i_tot1 += i_i * tfrc_lh_weights[i-1];
- }
-
- lh->i_mean = max(i_tot0, i_tot1) / w_tot;
-}
-
-/**
- * tfrc_lh_update_i_mean - Update the `open' loss interval I_0
- * @lh: histogram to update
- * @skb: received socket triggering loss interval update
- *
- * For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev
- */
-u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
-{
- struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
- u32 old_i_mean = lh->i_mean;
- s64 len;
-
- if (cur == NULL) /* not initialised */
- return 0;
-
- len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1;
-
- if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */
- return 0;
-
- if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
- /*
- * Implements RFC 4342, 10.2:
- * If a packet S (skb) exists whose seqno comes `after' the one
- * starting the current loss interval (cur) and if the modulo-16
- * distance from C(cur) to C(S) is greater than 4, consider all
- * subsequent packets as belonging to a new loss interval. This
- * test is necessary since CCVal may wrap between intervals.
- */
- cur->li_is_closed = 1;
-
- if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */
- return 0;
-
- cur->li_length = len;
- tfrc_lh_calc_i_mean(lh);
-
- return lh->i_mean < old_i_mean;
-}
-
-/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
-static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
- struct tfrc_rx_hist_entry *new_loss)
-{
- return dccp_delta_seqno(cur->li_seqno, new_loss->tfrchrx_seqno) > 0 &&
- (cur->li_is_closed || SUB16(new_loss->tfrchrx_ccval, cur->li_ccval) > 4);
-}
-
-/**
- * tfrc_lh_interval_add - Insert new record into the Loss Interval database
- * @lh: Loss Interval database
- * @rh: Receive history containing a fresh loss event
- * @calc_first_li: Caller-dependent routine to compute length of first interval
- * @sk: Used by @calc_first_li in caller-specific way (subtyping)
- *
- * Updates I_mean and returns 1 if a new interval has in fact been added to @lh.
- */
-int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
- u32 (*calc_first_li)(struct sock *), struct sock *sk)
-{
- struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new;
-
- if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh)))
- return 0;
-
- new = tfrc_lh_demand_next(lh);
- if (unlikely(new == NULL)) {
- DCCP_CRIT("Cannot allocate/add loss record.");
- return 0;
- }
-
- new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno;
- new->li_ccval = tfrc_rx_hist_loss_prev(rh)->tfrchrx_ccval;
- new->li_is_closed = 0;
-
- if (++lh->counter == 1)
- lh->i_mean = new->li_length = (*calc_first_li)(sk);
- else {
- cur->li_length = dccp_delta_seqno(cur->li_seqno, new->li_seqno);
- new->li_length = dccp_delta_seqno(new->li_seqno,
- tfrc_rx_hist_last_rcv(rh)->tfrchrx_seqno) + 1;
- if (lh->counter > (2*LIH_SIZE))
- lh->counter -= LIH_SIZE;
-
- tfrc_lh_calc_i_mean(lh);
- }
- return 1;
-}
-
-int __init tfrc_li_init(void)
-{
- tfrc_lh_slab = kmem_cache_create("tfrc_li_hist",
- sizeof(struct tfrc_loss_interval), 0,
- SLAB_HWCACHE_ALIGN, NULL);
- return tfrc_lh_slab == NULL ? -ENOBUFS : 0;
-}
-
-void tfrc_li_exit(void)
-{
- if (tfrc_lh_slab != NULL) {
- kmem_cache_destroy(tfrc_lh_slab);
- tfrc_lh_slab = NULL;
- }
-}
diff --git a/net/dccp/ccids/lib/loss_interval.h b/net/dccp/ccids/lib/loss_interval.h
deleted file mode 100644
index c3d95f85e43b..000000000000
--- a/net/dccp/ccids/lib/loss_interval.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _DCCP_LI_HIST_
-#define _DCCP_LI_HIST_
-/*
- * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
- * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
- * Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-#include <linux/ktime.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-
-/*
- * Number of loss intervals (RFC 4342, 8.6.1). The history size is one more than
- * NINTERVAL, since the `open' interval I_0 is always stored as the first entry.
- */
-#define NINTERVAL 8
-#define LIH_SIZE (NINTERVAL + 1)
-
-/**
- * tfrc_loss_interval - Loss history record for TFRC-based protocols
- * @li_seqno: Highest received seqno before the start of loss
- * @li_ccval: The CCVal belonging to @li_seqno
- * @li_is_closed: Whether @li_seqno is older than 1 RTT
- * @li_length: Loss interval sequence length
- */
-struct tfrc_loss_interval {
- u64 li_seqno:48,
- li_ccval:4,
- li_is_closed:1;
- u32 li_length;
-};
-
-/**
- * tfrc_loss_hist - Loss record database
- * @ring: Circular queue managed in LIFO manner
- * @counter: Current count of entries (can be more than %LIH_SIZE)
- * @i_mean: Current Average Loss Interval [RFC 3448, 5.4]
- */
-struct tfrc_loss_hist {
- struct tfrc_loss_interval *ring[LIH_SIZE];
- u8 counter;
- u32 i_mean;
-};
-
-static inline void tfrc_lh_init(struct tfrc_loss_hist *lh)
-{
- memset(lh, 0, sizeof(struct tfrc_loss_hist));
-}
-
-static inline u8 tfrc_lh_is_initialised(struct tfrc_loss_hist *lh)
-{
- return lh->counter > 0;
-}
-
-static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
-{
- return min(lh->counter, (u8)LIH_SIZE);
-}
-
-struct tfrc_rx_hist;
-
-int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
- u32 (*first_li)(struct sock *), struct sock *);
-u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
-void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
-
-#endif /* _DCCP_LI_HIST_ */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
deleted file mode 100644
index 0cdda3c66fb5..000000000000
--- a/net/dccp/ccids/lib/packet_history.c
+++ /dev/null
@@ -1,439 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
- * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
- *
- * An implementation of the DCCP protocol
- *
- * This code has been developed by the University of Waikato WAND
- * research group. For further information please see https://www.wand.net.nz/
- * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
- *
- * This code also uses code from Lulea University, rereleased as GPL by its
- * authors:
- * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
- *
- * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
- * and to make it work as a loadable module in the DCCP stack written by
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
- *
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-
-#include <linux/string.h>
-#include <linux/slab.h>
-#include "packet_history.h"
-#include "../../dccp.h"
-
-/*
- * Transmitter History Routines
- */
-static struct kmem_cache *tfrc_tx_hist_slab;
-
-int __init tfrc_tx_packet_history_init(void)
-{
- tfrc_tx_hist_slab = kmem_cache_create("tfrc_tx_hist",
- sizeof(struct tfrc_tx_hist_entry),
- 0, SLAB_HWCACHE_ALIGN, NULL);
- return tfrc_tx_hist_slab == NULL ? -ENOBUFS : 0;
-}
-
-void tfrc_tx_packet_history_exit(void)
-{
- if (tfrc_tx_hist_slab != NULL) {
- kmem_cache_destroy(tfrc_tx_hist_slab);
- tfrc_tx_hist_slab = NULL;
- }
-}
-
-int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
-{
- struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
-
- if (entry == NULL)
- return -ENOBUFS;
- entry->seqno = seqno;
- entry->stamp = ktime_get_real();
- entry->next = *headp;
- *headp = entry;
- return 0;
-}
-
-void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
-{
- struct tfrc_tx_hist_entry *head = *headp;
-
- while (head != NULL) {
- struct tfrc_tx_hist_entry *next = head->next;
-
- kmem_cache_free(tfrc_tx_hist_slab, head);
- head = next;
- }
-
- *headp = NULL;
-}
-
-/*
- * Receiver History Routines
- */
-static struct kmem_cache *tfrc_rx_hist_slab;
-
-int __init tfrc_rx_packet_history_init(void)
-{
- tfrc_rx_hist_slab = kmem_cache_create("tfrc_rxh_cache",
- sizeof(struct tfrc_rx_hist_entry),
- 0, SLAB_HWCACHE_ALIGN, NULL);
- return tfrc_rx_hist_slab == NULL ? -ENOBUFS : 0;
-}
-
-void tfrc_rx_packet_history_exit(void)
-{
- if (tfrc_rx_hist_slab != NULL) {
- kmem_cache_destroy(tfrc_rx_hist_slab);
- tfrc_rx_hist_slab = NULL;
- }
-}
-
-static inline void tfrc_rx_hist_entry_from_skb(struct tfrc_rx_hist_entry *entry,
- const struct sk_buff *skb,
- const u64 ndp)
-{
- const struct dccp_hdr *dh = dccp_hdr(skb);
-
- entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
- entry->tfrchrx_ccval = dh->dccph_ccval;
- entry->tfrchrx_type = dh->dccph_type;
- entry->tfrchrx_ndp = ndp;
- entry->tfrchrx_tstamp = ktime_get_real();
-}
-
-void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
- const struct sk_buff *skb,
- const u64 ndp)
-{
- struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h);
-
- tfrc_rx_hist_entry_from_skb(entry, skb, ndp);
-}
-
-/* has the packet contained in skb been seen before? */
-int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
-{
- const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq;
- int i;
-
- if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0)
- return 1;
-
- for (i = 1; i <= h->loss_count; i++)
- if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq)
- return 1;
-
- return 0;
-}
-
-static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
-{
- const u8 idx_a = tfrc_rx_hist_index(h, a),
- idx_b = tfrc_rx_hist_index(h, b);
-
- swap(h->ring[idx_a], h->ring[idx_b]);
-}
-
-/*
- * Private helper functions for loss detection.
- *
- * In the descriptions, `Si' refers to the sequence number of entry number i,
- * whose NDP count is `Ni' (lower case is used for variables).
- * Note: All __xxx_loss functions expect that a test against duplicates has been
- * performed already: the seqno of the skb must not be less than the seqno
- * of loss_prev; and it must not equal that of any valid history entry.
- */
-static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1)
-{
- u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
- s1 = DCCP_SKB_CB(skb)->dccpd_seq;
-
- if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */
- h->loss_count = 1;
- tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1);
- }
-}
-
-static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
-{
- u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
- s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
- s2 = DCCP_SKB_CB(skb)->dccpd_seq;
-
- if (likely(dccp_delta_seqno(s1, s2) > 0)) { /* S1 < S2 */
- h->loss_count = 2;
- tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n2);
- return;
- }
-
- /* S0 < S2 < S1 */
-
- if (dccp_loss_free(s0, s2, n2)) {
- u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp;
-
- if (dccp_loss_free(s2, s1, n1)) {
- /* hole is filled: S0, S2, and S1 are consecutive */
- h->loss_count = 0;
- h->loss_start = tfrc_rx_hist_index(h, 1);
- } else
- /* gap between S2 and S1: just update loss_prev */
- tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
-
- } else { /* gap between S0 and S2 */
- /*
- * Reorder history to insert S2 between S0 and S1
- */
- tfrc_rx_hist_swap(h, 0, 3);
- h->loss_start = tfrc_rx_hist_index(h, 3);
- tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n2);
- h->loss_count = 2;
- }
-}
-
-/* return 1 if a new loss event has been identified */
-static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
-{
- u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
- s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
- s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno,
- s3 = DCCP_SKB_CB(skb)->dccpd_seq;
-
- if (likely(dccp_delta_seqno(s2, s3) > 0)) { /* S2 < S3 */
- h->loss_count = 3;
- tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 3), skb, n3);
- return 1;
- }
-
- /* S3 < S2 */
-
- if (dccp_delta_seqno(s1, s3) > 0) { /* S1 < S3 < S2 */
- /*
- * Reorder history to insert S3 between S1 and S2
- */
- tfrc_rx_hist_swap(h, 2, 3);
- tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 2), skb, n3);
- h->loss_count = 3;
- return 1;
- }
-
- /* S0 < S3 < S1 */
-
- if (dccp_loss_free(s0, s3, n3)) {
- u64 n1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_ndp;
-
- if (dccp_loss_free(s3, s1, n1)) {
- /* hole between S0 and S1 filled by S3 */
- u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp;
-
- if (dccp_loss_free(s1, s2, n2)) {
- /* entire hole filled by S0, S3, S1, S2 */
- h->loss_start = tfrc_rx_hist_index(h, 2);
- h->loss_count = 0;
- } else {
- /* gap remains between S1 and S2 */
- h->loss_start = tfrc_rx_hist_index(h, 1);
- h->loss_count = 1;
- }
-
- } else /* gap exists between S3 and S1, loss_count stays at 2 */
- tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n3);
-
- return 0;
- }
-
- /*
- * The remaining case: S0 < S3 < S1 < S2; gap between S0 and S3
- * Reorder history to insert S3 between S0 and S1.
- */
- tfrc_rx_hist_swap(h, 0, 3);
- h->loss_start = tfrc_rx_hist_index(h, 3);
- tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n3);
- h->loss_count = 3;
-
- return 1;
-}
-
-/* recycle RX history records to continue loss detection if necessary */
-static void __three_after_loss(struct tfrc_rx_hist *h)
-{
- /*
- * At this stage we know already that there is a gap between S0 and S1
- * (since S0 was the highest sequence number received before detecting
- * the loss). To recycle the loss record, it is thus only necessary to
- * check for other possible gaps between S1/S2 and between S2/S3.
- */
- u64 s1 = tfrc_rx_hist_entry(h, 1)->tfrchrx_seqno,
- s2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_seqno,
- s3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_seqno;
- u64 n2 = tfrc_rx_hist_entry(h, 2)->tfrchrx_ndp,
- n3 = tfrc_rx_hist_entry(h, 3)->tfrchrx_ndp;
-
- if (dccp_loss_free(s1, s2, n2)) {
-
- if (dccp_loss_free(s2, s3, n3)) {
- /* no gap between S2 and S3: entire hole is filled */
- h->loss_start = tfrc_rx_hist_index(h, 3);
- h->loss_count = 0;
- } else {
- /* gap between S2 and S3 */
- h->loss_start = tfrc_rx_hist_index(h, 2);
- h->loss_count = 1;
- }
-
- } else { /* gap between S1 and S2 */
- h->loss_start = tfrc_rx_hist_index(h, 1);
- h->loss_count = 2;
- }
-}
-
-/**
- * tfrc_rx_handle_loss - Loss detection and further processing
- * @h: The non-empty RX history object
- * @lh: Loss Intervals database to update
- * @skb: Currently received packet
- * @ndp: The NDP count belonging to @skb
- * @calc_first_li: Caller-dependent computation of first loss interval in @lh
- * @sk: Used by @calc_first_li (see tfrc_lh_interval_add)
- *
- * Chooses action according to pending loss, updates LI database when a new
- * loss was detected, and does required post-processing. Returns 1 when caller
- * should send feedback, 0 otherwise.
- * Since it also takes care of reordering during loss detection and updates the
- * records accordingly, the caller should not perform any more RX history
- * operations when loss_count is greater than 0 after calling this function.
- */
-int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
- struct tfrc_loss_hist *lh,
- struct sk_buff *skb, const u64 ndp,
- u32 (*calc_first_li)(struct sock *), struct sock *sk)
-{
- int is_new_loss = 0;
-
- if (h->loss_count == 0) {
- __do_track_loss(h, skb, ndp);
- } else if (h->loss_count == 1) {
- __one_after_loss(h, skb, ndp);
- } else if (h->loss_count != 2) {
- DCCP_BUG("invalid loss_count %d", h->loss_count);
- } else if (__two_after_loss(h, skb, ndp)) {
- /*
- * Update Loss Interval database and recycle RX records
- */
- is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
- __three_after_loss(h);
- }
- return is_new_loss;
-}
-
-int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
-{
- int i;
-
- for (i = 0; i <= TFRC_NDUPACK; i++) {
- h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
- if (h->ring[i] == NULL)
- goto out_free;
- }
-
- h->loss_count = h->loss_start = 0;
- return 0;
-
-out_free:
- while (i-- != 0) {
- kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
- h->ring[i] = NULL;
- }
- return -ENOBUFS;
-}
-
-void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
-{
- int i;
-
- for (i = 0; i <= TFRC_NDUPACK; ++i)
- if (h->ring[i] != NULL) {
- kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
- h->ring[i] = NULL;
- }
-}
-
-/**
- * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
- * @h: The non-empty RX history object
- */
-static inline struct tfrc_rx_hist_entry *
- tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
-{
- return h->ring[0];
-}
-
-/**
- * tfrc_rx_hist_rtt_prev_s - previously suitable (wrt rtt_last_s) RTT-sampling entry
- * @h: The non-empty RX history object
- */
-static inline struct tfrc_rx_hist_entry *
- tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
-{
- return h->ring[h->rtt_sample_prev];
-}
-
-/**
- * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal
- * @h: receive histogram
- * @skb: packet containing timestamp.
- *
- * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
- * to compute a sample with given data - calling function should check this.
- */
-u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
-{
- u32 sample = 0,
- delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
- tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
-
- if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */
- if (h->rtt_sample_prev == 2) { /* previous candidate stored */
- sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
- tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
- if (sample)
- sample = 4 / sample *
- ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
- tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
- else /*
- * FIXME: This condition is in principle not
- * possible but occurs when CCID is used for
- * two-way data traffic. I have tried to trace
- * it, but the cause does not seem to be here.
- */
- DCCP_BUG("please report to dccp@vger.kernel.org"
- " => prev = %u, last = %u",
- tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
- tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
- } else if (delta_v < 1) {
- h->rtt_sample_prev = 1;
- goto keep_ref_for_next_time;
- }
-
- } else if (delta_v == 4) /* optimal match */
- sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
- else { /* suboptimal match */
- h->rtt_sample_prev = 2;
- goto keep_ref_for_next_time;
- }
-
- if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
- DCCP_WARN("RTT sample %u too large, using max\n", sample);
- sample = DCCP_SANE_RTT_MAX;
- }
-
- h->rtt_sample_prev = 0; /* use current entry as next reference */
-keep_ref_for_next_time:
-
- return sample;
-}
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
deleted file mode 100644
index 159cc9326eab..000000000000
--- a/net/dccp/ccids/lib/packet_history.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Packet RX/TX history data structures and routines for TFRC-based protocols.
- *
- * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
- * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
- *
- * This code has been developed by the University of Waikato WAND
- * research group. For further information please see https://www.wand.net.nz/
- * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz
- *
- * This code also uses code from Lulea University, rereleased as GPL by its
- * authors:
- * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
- *
- * Changes to meet Linux coding standards, to make it meet latest ccid3 draft
- * and to make it work as a loadable module in the DCCP stack written by
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
- *
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-
-#ifndef _DCCP_PKT_HIST_
-#define _DCCP_PKT_HIST_
-
-#include <linux/list.h>
-#include <linux/slab.h>
-#include "tfrc.h"
-
-/**
- * tfrc_tx_hist_entry - Simple singly-linked TX history list
- * @next: next oldest entry (LIFO order)
- * @seqno: sequence number of this entry
- * @stamp: send time of packet with sequence number @seqno
- */
-struct tfrc_tx_hist_entry {
- struct tfrc_tx_hist_entry *next;
- u64 seqno;
- ktime_t stamp;
-};
-
-static inline struct tfrc_tx_hist_entry *
- tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
-{
- while (head != NULL && head->seqno != seqno)
- head = head->next;
- return head;
-}
-
-int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
-void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
-
-/* Subtraction a-b modulo-16, respects circular wrap-around */
-#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
-
-/* Number of packets to wait after a missing packet (RFC 4342, 6.1) */
-#define TFRC_NDUPACK 3
-
-/**
- * tfrc_rx_hist_entry - Store information about a single received packet
- * @tfrchrx_seqno: DCCP packet sequence number
- * @tfrchrx_ccval: window counter value of packet (RFC 4342, 8.1)
- * @tfrchrx_ndp: the NDP count (if any) of the packet
- * @tfrchrx_tstamp: actual receive time of packet
- */
-struct tfrc_rx_hist_entry {
- u64 tfrchrx_seqno:48,
- tfrchrx_ccval:4,
- tfrchrx_type:4;
- u64 tfrchrx_ndp:48;
- ktime_t tfrchrx_tstamp;
-};
-
-/**
- * tfrc_rx_hist - RX history structure for TFRC-based protocols
- * @ring: Packet history for RTT sampling and loss detection
- * @loss_count: Number of entries in circular history
- * @loss_start: Movable index (for loss detection)
- * @rtt_sample_prev: Used during RTT sampling, points to candidate entry
- */
-struct tfrc_rx_hist {
- struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
- u8 loss_count:2,
- loss_start:2;
-#define rtt_sample_prev loss_start
-};
-
-/**
- * tfrc_rx_hist_index - index to reach n-th entry after loss_start
- */
-static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n)
-{
- return (h->loss_start + n) & TFRC_NDUPACK;
-}
-
-/**
- * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far
- */
-static inline struct tfrc_rx_hist_entry *
- tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h)
-{
- return h->ring[tfrc_rx_hist_index(h, h->loss_count)];
-}
-
-/**
- * tfrc_rx_hist_entry - return the n-th history entry after loss_start
- */
-static inline struct tfrc_rx_hist_entry *
- tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n)
-{
- return h->ring[tfrc_rx_hist_index(h, n)];
-}
-
-/**
- * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected
- */
-static inline struct tfrc_rx_hist_entry *
- tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h)
-{
- return h->ring[h->loss_start];
-}
-
-/* indicate whether previously a packet was detected missing */
-static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
-{
- return h->loss_count > 0;
-}
-
-void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb,
- const u64 ndp);
-
-int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
-
-struct tfrc_loss_hist;
-int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh,
- struct sk_buff *skb, const u64 ndp,
- u32 (*first_li)(struct sock *sk), struct sock *sk);
-u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb);
-int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
-void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
-
-#endif /* _DCCP_PKT_HIST_ */
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
deleted file mode 100644
index d7f265e1f50c..000000000000
--- a/net/dccp/ccids/lib/tfrc.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * TFRC library initialisation
- *
- * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
- * Copyright (c) 2007 Arnaldo Carvalho de Melo <acme@redhat.com>
- */
-#include <linux/moduleparam.h>
-#include "tfrc.h"
-
-#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
-bool tfrc_debug;
-module_param(tfrc_debug, bool, 0644);
-MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages");
-#endif
-
-int __init tfrc_lib_init(void)
-{
- int rc = tfrc_li_init();
-
- if (rc)
- goto out;
-
- rc = tfrc_tx_packet_history_init();
- if (rc)
- goto out_free_loss_intervals;
-
- rc = tfrc_rx_packet_history_init();
- if (rc)
- goto out_free_tx_history;
- return 0;
-
-out_free_tx_history:
- tfrc_tx_packet_history_exit();
-out_free_loss_intervals:
- tfrc_li_exit();
-out:
- return rc;
-}
-
-void tfrc_lib_exit(void)
-{
- tfrc_rx_packet_history_exit();
- tfrc_tx_packet_history_exit();
- tfrc_li_exit();
-}
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
deleted file mode 100644
index 0a63e8750cc5..000000000000
--- a/net/dccp/ccids/lib/tfrc.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _TFRC_H_
-#define _TFRC_H_
-/*
- * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
- * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand.
- * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
- */
-#include <linux/types.h>
-#include <linux/math64.h>
-#include "../../dccp.h"
-
-/* internal includes that this library exports: */
-#include "loss_interval.h"
-#include "packet_history.h"
-
-#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
-extern bool tfrc_debug;
-#define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a)
-#else
-#define tfrc_pr_debug(format, a...)
-#endif
-
-/* integer-arithmetic divisions of type (a * 1000000)/b */
-static inline u64 scaled_div(u64 a, u64 b)
-{
- BUG_ON(b == 0);
- return div64_u64(a * 1000000, b);
-}
-
-static inline u32 scaled_div32(u64 a, u64 b)
-{
- u64 result = scaled_div(a, b);
-
- if (result > UINT_MAX) {
- DCCP_CRIT("Overflow: %llu/%llu > UINT_MAX",
- (unsigned long long)a, (unsigned long long)b);
- return UINT_MAX;
- }
- return result;
-}
-
-/**
- * tfrc_ewma - Exponentially weighted moving average
- * @weight: Weight to be used as damping factor, in units of 1/10
- */
-static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
-{
- return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval;
-}
-
-u32 tfrc_calc_x(u16 s, u32 R, u32 p);
-u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
-u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
-
-int tfrc_tx_packet_history_init(void);
-void tfrc_tx_packet_history_exit(void);
-int tfrc_rx_packet_history_init(void);
-void tfrc_rx_packet_history_exit(void);
-
-int tfrc_li_init(void);
-void tfrc_li_exit(void);
-
-#ifdef CONFIG_IP_DCCP_TFRC_LIB
-int tfrc_lib_init(void);
-void tfrc_lib_exit(void);
-#else
-#define tfrc_lib_init() (0)
-#define tfrc_lib_exit()
-#endif
-#endif /* _TFRC_H_ */
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
deleted file mode 100644
index 92a8c6bea316..000000000000
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ /dev/null
@@ -1,702 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
- * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
- */
-
-#include <linux/module.h>
-#include "../../dccp.h"
-#include "tfrc.h"
-
-#define TFRC_CALC_X_ARRSIZE 500
-#define TFRC_CALC_X_SPLIT 50000 /* 0.05 * 1000000, details below */
-#define TFRC_SMALLEST_P (TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE)
-
-/*
- TFRC TCP Reno Throughput Equation Lookup Table for f(p)
-
- The following two-column lookup table implements a part of the TCP throughput
- equation from [RFC 3448, sec. 3.1]:
-
- s
- X_calc = --------------------------------------------------------------
- R * sqrt(2*b*p/3) + (3 * t_RTO * sqrt(3*b*p/8) * (p + 32*p^3))
-
- Where:
- X is the transmit rate in bytes/second
- s is the packet size in bytes
- R is the round trip time in seconds
- p is the loss event rate, between 0 and 1.0, of the number of loss
- events as a fraction of the number of packets transmitted
- t_RTO is the TCP retransmission timeout value in seconds
- b is the number of packets acknowledged by a single TCP ACK
-
- We can assume that b = 1 and t_RTO is 4 * R. The equation now becomes:
-
- s
- X_calc = -------------------------------------------------------
- R * sqrt(p*2/3) + (12 * R * sqrt(p*3/8) * (p + 32*p^3))
-
- which we can break down into:
-
- s
- X_calc = ---------
- R * f(p)
-
- where f(p) is given for 0 < p <= 1 by:
-
- f(p) = sqrt(2*p/3) + 12 * sqrt(3*p/8) * (p + 32*p^3)
-
- Since this is kernel code, floating-point arithmetic is avoided in favour of
- integer arithmetic. This means that nearly all fractional parameters are
- scaled by 1000000:
- * the parameters p and R
- * the return result f(p)
- The lookup table therefore actually tabulates the following function g(q):
-
- g(q) = 1000000 * f(q/1000000)
-
- Hence, when p <= 1, q must be less than or equal to 1000000. To achieve finer
- granularity for the practically more relevant case of small values of p (up to
- 5%), the second column is used; the first one ranges up to 100%. This split
- corresponds to the value of q = TFRC_CALC_X_SPLIT. At the same time this also
- determines the smallest resolution possible with this lookup table:
-
- TFRC_SMALLEST_P = TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE
-
- The entire table is generated by:
- for(i=0; i < TFRC_CALC_X_ARRSIZE; i++) {
- lookup[i][0] = g((i+1) * 1000000/TFRC_CALC_X_ARRSIZE);
- lookup[i][1] = g((i+1) * TFRC_CALC_X_SPLIT/TFRC_CALC_X_ARRSIZE);
- }
-
- With the given configuration, we have, with M = TFRC_CALC_X_ARRSIZE-1,
- lookup[0][0] = g(1000000/(M+1)) = 1000000 * f(0.2%)
- lookup[M][0] = g(1000000) = 1000000 * f(100%)
- lookup[0][1] = g(TFRC_SMALLEST_P) = 1000000 * f(0.01%)
- lookup[M][1] = g(TFRC_CALC_X_SPLIT) = 1000000 * f(5%)
-
- In summary, the two columns represent f(p) for the following ranges:
- * The first column is for 0.002 <= p <= 1.0
- * The second column is for 0.0001 <= p <= 0.05
- Where the columns overlap, the second (finer-grained) is given preference,
- i.e. the first column is used only for p >= 0.05.
- */
-static const u32 tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE][2] = {
- { 37172, 8172 },
- { 53499, 11567 },
- { 66664, 14180 },
- { 78298, 16388 },
- { 89021, 18339 },
- { 99147, 20108 },
- { 108858, 21738 },
- { 118273, 23260 },
- { 127474, 24693 },
- { 136520, 26052 },
- { 145456, 27348 },
- { 154316, 28589 },
- { 163130, 29783 },
- { 171919, 30935 },
- { 180704, 32049 },
- { 189502, 33130 },
- { 198328, 34180 },
- { 207194, 35202 },
- { 216114, 36198 },
- { 225097, 37172 },
- { 234153, 38123 },
- { 243294, 39055 },
- { 252527, 39968 },
- { 261861, 40864 },
- { 271305, 41743 },
- { 280866, 42607 },
- { 290553, 43457 },
- { 300372, 44293 },
- { 310333, 45117 },
- { 320441, 45929 },
- { 330705, 46729 },
- { 341131, 47518 },
- { 351728, 48297 },
- { 362501, 49066 },
- { 373460, 49826 },
- { 384609, 50577 },
- { 395958, 51320 },
- { 407513, 52054 },
- { 419281, 52780 },
- { 431270, 53499 },
- { 443487, 54211 },
- { 455940, 54916 },
- { 468635, 55614 },
- { 481581, 56306 },
- { 494785, 56991 },
- { 508254, 57671 },
- { 521996, 58345 },
- { 536019, 59014 },
- { 550331, 59677 },
- { 564939, 60335 },
- { 579851, 60988 },
- { 595075, 61636 },
- { 610619, 62279 },
- { 626491, 62918 },
- { 642700, 63553 },
- { 659253, 64183 },
- { 676158, 64809 },
- { 693424, 65431 },
- { 711060, 66050 },
- { 729073, 66664 },
- { 747472, 67275 },
- { 766266, 67882 },
- { 785464, 68486 },
- { 805073, 69087 },
- { 825103, 69684 },
- { 845562, 70278 },
- { 866460, 70868 },
- { 887805, 71456 },
- { 909606, 72041 },
- { 931873, 72623 },
- { 954614, 73202 },
- { 977839, 73778 },
- { 1001557, 74352 },
- { 1025777, 74923 },
- { 1050508, 75492 },
- { 1075761, 76058 },
- { 1101544, 76621 },
- { 1127867, 77183 },
- { 1154739, 77741 },
- { 1182172, 78298 },
- { 1210173, 78852 },
- { 1238753, 79405 },
- { 1267922, 79955 },
- { 1297689, 80503 },
- { 1328066, 81049 },
- { 1359060, 81593 },
- { 1390684, 82135 },
- { 1422947, 82675 },
- { 1455859, 83213 },
- { 1489430, 83750 },
- { 1523671, 84284 },
- { 1558593, 84817 },
- { 1594205, 85348 },
- { 1630518, 85878 },
- { 1667543, 86406 },
- { 1705290, 86932 },
- { 1743770, 87457 },
- { 1782994, 87980 },
- { 1822973, 88501 },
- { 1863717, 89021 },
- { 1905237, 89540 },
- { 1947545, 90057 },
- { 1990650, 90573 },
- { 2034566, 91087 },
- { 2079301, 91600 },
- { 2124869, 92111 },
- { 2171279, 92622 },
- { 2218543, 93131 },
- { 2266673, 93639 },
- { 2315680, 94145 },
- { 2365575, 94650 },
- { 2416371, 95154 },
- { 2468077, 95657 },
- { 2520707, 96159 },
- { 2574271, 96660 },
- { 2628782, 97159 },
- { 2684250, 97658 },
- { 2740689, 98155 },
- { 2798110, 98651 },
- { 2856524, 99147 },
- { 2915944, 99641 },
- { 2976382, 100134 },
- { 3037850, 100626 },
- { 3100360, 101117 },
- { 3163924, 101608 },
- { 3228554, 102097 },
- { 3294263, 102586 },
- { 3361063, 103073 },
- { 3428966, 103560 },
- { 3497984, 104045 },
- { 3568131, 104530 },
- { 3639419, 105014 },
- { 3711860, 105498 },
- { 3785467, 105980 },
- { 3860253, 106462 },
- { 3936229, 106942 },
- { 4013410, 107422 },
- { 4091808, 107902 },
- { 4171435, 108380 },
- { 4252306, 108858 },
- { 4334431, 109335 },
- { 4417825, 109811 },
- { 4502501, 110287 },
- { 4588472, 110762 },
- { 4675750, 111236 },
- { 4764349, 111709 },
- { 4854283, 112182 },
- { 4945564, 112654 },
- { 5038206, 113126 },
- { 5132223, 113597 },
- { 5227627, 114067 },
- { 5324432, 114537 },
- { 5422652, 115006 },
- { 5522299, 115474 },
- { 5623389, 115942 },
- { 5725934, 116409 },
- { 5829948, 116876 },
- { 5935446, 117342 },
- { 6042439, 117808 },
- { 6150943, 118273 },
- { 6260972, 118738 },
- { 6372538, 119202 },
- { 6485657, 119665 },
- { 6600342, 120128 },
- { 6716607, 120591 },
- { 6834467, 121053 },
- { 6953935, 121514 },
- { 7075025, 121976 },
- { 7197752, 122436 },
- { 7322131, 122896 },
- { 7448175, 123356 },
- { 7575898, 123815 },
- { 7705316, 124274 },
- { 7836442, 124733 },
- { 7969291, 125191 },
- { 8103877, 125648 },
- { 8240216, 126105 },
- { 8378321, 126562 },
- { 8518208, 127018 },
- { 8659890, 127474 },
- { 8803384, 127930 },
- { 8948702, 128385 },
- { 9095861, 128840 },
- { 9244875, 129294 },
- { 9395760, 129748 },
- { 9548529, 130202 },
- { 9703198, 130655 },
- { 9859782, 131108 },
- { 10018296, 131561 },
- { 10178755, 132014 },
- { 10341174, 132466 },
- { 10505569, 132917 },
- { 10671954, 133369 },
- { 10840345, 133820 },
- { 11010757, 134271 },
- { 11183206, 134721 },
- { 11357706, 135171 },
- { 11534274, 135621 },
- { 11712924, 136071 },
- { 11893673, 136520 },
- { 12076536, 136969 },
- { 12261527, 137418 },
- { 12448664, 137867 },
- { 12637961, 138315 },
- { 12829435, 138763 },
- { 13023101, 139211 },
- { 13218974, 139658 },
- { 13417071, 140106 },
- { 13617407, 140553 },
- { 13819999, 140999 },
- { 14024862, 141446 },
- { 14232012, 141892 },
- { 14441465, 142339 },
- { 14653238, 142785 },
- { 14867346, 143230 },
- { 15083805, 143676 },
- { 15302632, 144121 },
- { 15523842, 144566 },
- { 15747453, 145011 },
- { 15973479, 145456 },
- { 16201939, 145900 },
- { 16432847, 146345 },
- { 16666221, 146789 },
- { 16902076, 147233 },
- { 17140429, 147677 },
- { 17381297, 148121 },
- { 17624696, 148564 },
- { 17870643, 149007 },
- { 18119154, 149451 },
- { 18370247, 149894 },
- { 18623936, 150336 },
- { 18880241, 150779 },
- { 19139176, 151222 },
- { 19400759, 151664 },
- { 19665007, 152107 },
- { 19931936, 152549 },
- { 20201564, 152991 },
- { 20473907, 153433 },
- { 20748982, 153875 },
- { 21026807, 154316 },
- { 21307399, 154758 },
- { 21590773, 155199 },
- { 21876949, 155641 },
- { 22165941, 156082 },
- { 22457769, 156523 },
- { 22752449, 156964 },
- { 23049999, 157405 },
- { 23350435, 157846 },
- { 23653774, 158287 },
- { 23960036, 158727 },
- { 24269236, 159168 },
- { 24581392, 159608 },
- { 24896521, 160049 },
- { 25214642, 160489 },
- { 25535772, 160929 },
- { 25859927, 161370 },
- { 26187127, 161810 },
- { 26517388, 162250 },
- { 26850728, 162690 },
- { 27187165, 163130 },
- { 27526716, 163569 },
- { 27869400, 164009 },
- { 28215234, 164449 },
- { 28564236, 164889 },
- { 28916423, 165328 },
- { 29271815, 165768 },
- { 29630428, 166208 },
- { 29992281, 166647 },
- { 30357392, 167087 },
- { 30725779, 167526 },
- { 31097459, 167965 },
- { 31472452, 168405 },
- { 31850774, 168844 },
- { 32232445, 169283 },
- { 32617482, 169723 },
- { 33005904, 170162 },
- { 33397730, 170601 },
- { 33792976, 171041 },
- { 34191663, 171480 },
- { 34593807, 171919 },
- { 34999428, 172358 },
- { 35408544, 172797 },
- { 35821174, 173237 },
- { 36237335, 173676 },
- { 36657047, 174115 },
- { 37080329, 174554 },
- { 37507197, 174993 },
- { 37937673, 175433 },
- { 38371773, 175872 },
- { 38809517, 176311 },
- { 39250924, 176750 },
- { 39696012, 177190 },
- { 40144800, 177629 },
- { 40597308, 178068 },
- { 41053553, 178507 },
- { 41513554, 178947 },
- { 41977332, 179386 },
- { 42444904, 179825 },
- { 42916290, 180265 },
- { 43391509, 180704 },
- { 43870579, 181144 },
- { 44353520, 181583 },
- { 44840352, 182023 },
- { 45331092, 182462 },
- { 45825761, 182902 },
- { 46324378, 183342 },
- { 46826961, 183781 },
- { 47333531, 184221 },
- { 47844106, 184661 },
- { 48358706, 185101 },
- { 48877350, 185541 },
- { 49400058, 185981 },
- { 49926849, 186421 },
- { 50457743, 186861 },
- { 50992759, 187301 },
- { 51531916, 187741 },
- { 52075235, 188181 },
- { 52622735, 188622 },
- { 53174435, 189062 },
- { 53730355, 189502 },
- { 54290515, 189943 },
- { 54854935, 190383 },
- { 55423634, 190824 },
- { 55996633, 191265 },
- { 56573950, 191706 },
- { 57155606, 192146 },
- { 57741621, 192587 },
- { 58332014, 193028 },
- { 58926806, 193470 },
- { 59526017, 193911 },
- { 60129666, 194352 },
- { 60737774, 194793 },
- { 61350361, 195235 },
- { 61967446, 195677 },
- { 62589050, 196118 },
- { 63215194, 196560 },
- { 63845897, 197002 },
- { 64481179, 197444 },
- { 65121061, 197886 },
- { 65765563, 198328 },
- { 66414705, 198770 },
- { 67068508, 199213 },
- { 67726992, 199655 },
- { 68390177, 200098 },
- { 69058085, 200540 },
- { 69730735, 200983 },
- { 70408147, 201426 },
- { 71090343, 201869 },
- { 71777343, 202312 },
- { 72469168, 202755 },
- { 73165837, 203199 },
- { 73867373, 203642 },
- { 74573795, 204086 },
- { 75285124, 204529 },
- { 76001380, 204973 },
- { 76722586, 205417 },
- { 77448761, 205861 },
- { 78179926, 206306 },
- { 78916102, 206750 },
- { 79657310, 207194 },
- { 80403571, 207639 },
- { 81154906, 208084 },
- { 81911335, 208529 },
- { 82672880, 208974 },
- { 83439562, 209419 },
- { 84211402, 209864 },
- { 84988421, 210309 },
- { 85770640, 210755 },
- { 86558080, 211201 },
- { 87350762, 211647 },
- { 88148708, 212093 },
- { 88951938, 212539 },
- { 89760475, 212985 },
- { 90574339, 213432 },
- { 91393551, 213878 },
- { 92218133, 214325 },
- { 93048107, 214772 },
- { 93883493, 215219 },
- { 94724314, 215666 },
- { 95570590, 216114 },
- { 96422343, 216561 },
- { 97279594, 217009 },
- { 98142366, 217457 },
- { 99010679, 217905 },
- { 99884556, 218353 },
- { 100764018, 218801 },
- { 101649086, 219250 },
- { 102539782, 219698 },
- { 103436128, 220147 },
- { 104338146, 220596 },
- { 105245857, 221046 },
- { 106159284, 221495 },
- { 107078448, 221945 },
- { 108003370, 222394 },
- { 108934074, 222844 },
- { 109870580, 223294 },
- { 110812910, 223745 },
- { 111761087, 224195 },
- { 112715133, 224646 },
- { 113675069, 225097 },
- { 114640918, 225548 },
- { 115612702, 225999 },
- { 116590442, 226450 },
- { 117574162, 226902 },
- { 118563882, 227353 },
- { 119559626, 227805 },
- { 120561415, 228258 },
- { 121569272, 228710 },
- { 122583219, 229162 },
- { 123603278, 229615 },
- { 124629471, 230068 },
- { 125661822, 230521 },
- { 126700352, 230974 },
- { 127745083, 231428 },
- { 128796039, 231882 },
- { 129853241, 232336 },
- { 130916713, 232790 },
- { 131986475, 233244 },
- { 133062553, 233699 },
- { 134144966, 234153 },
- { 135233739, 234608 },
- { 136328894, 235064 },
- { 137430453, 235519 },
- { 138538440, 235975 },
- { 139652876, 236430 },
- { 140773786, 236886 },
- { 141901190, 237343 },
- { 143035113, 237799 },
- { 144175576, 238256 },
- { 145322604, 238713 },
- { 146476218, 239170 },
- { 147636442, 239627 },
- { 148803298, 240085 },
- { 149976809, 240542 },
- { 151156999, 241000 },
- { 152343890, 241459 },
- { 153537506, 241917 },
- { 154737869, 242376 },
- { 155945002, 242835 },
- { 157158929, 243294 },
- { 158379673, 243753 },
- { 159607257, 244213 },
- { 160841704, 244673 },
- { 162083037, 245133 },
- { 163331279, 245593 },
- { 164586455, 246054 },
- { 165848586, 246514 },
- { 167117696, 246975 },
- { 168393810, 247437 },
- { 169676949, 247898 },
- { 170967138, 248360 },
- { 172264399, 248822 },
- { 173568757, 249284 },
- { 174880235, 249747 },
- { 176198856, 250209 },
- { 177524643, 250672 },
- { 178857621, 251136 },
- { 180197813, 251599 },
- { 181545242, 252063 },
- { 182899933, 252527 },
- { 184261908, 252991 },
- { 185631191, 253456 },
- { 187007807, 253920 },
- { 188391778, 254385 },
- { 189783129, 254851 },
- { 191181884, 255316 },
- { 192588065, 255782 },
- { 194001698, 256248 },
- { 195422805, 256714 },
- { 196851411, 257181 },
- { 198287540, 257648 },
- { 199731215, 258115 },
- { 201182461, 258582 },
- { 202641302, 259050 },
- { 204107760, 259518 },
- { 205581862, 259986 },
- { 207063630, 260454 },
- { 208553088, 260923 },
- { 210050262, 261392 },
- { 211555174, 261861 },
- { 213067849, 262331 },
- { 214588312, 262800 },
- { 216116586, 263270 },
- { 217652696, 263741 },
- { 219196666, 264211 },
- { 220748520, 264682 },
- { 222308282, 265153 },
- { 223875978, 265625 },
- { 225451630, 266097 },
- { 227035265, 266569 },
- { 228626905, 267041 },
- { 230226576, 267514 },
- { 231834302, 267986 },
- { 233450107, 268460 },
- { 235074016, 268933 },
- { 236706054, 269407 },
- { 238346244, 269881 },
- { 239994613, 270355 },
- { 241651183, 270830 },
- { 243315981, 271305 }
-};
-
-/* return largest index i such that fval <= lookup[i][small] */
-static inline u32 tfrc_binsearch(u32 fval, u8 small)
-{
- u32 try, low = 0, high = TFRC_CALC_X_ARRSIZE - 1;
-
- while (low < high) {
- try = (low + high) / 2;
- if (fval <= tfrc_calc_x_lookup[try][small])
- high = try;
- else
- low = try + 1;
- }
- return high;
-}
-
-/**
- * tfrc_calc_x - Calculate the send rate as per section 3.1 of RFC3448
- * @s: packet size in bytes
- * @R: RTT scaled by 1000000 (i.e., microseconds)
- * @p: loss ratio estimate scaled by 1000000
- *
- * Returns X_calc in bytes per second (not scaled).
- */
-u32 tfrc_calc_x(u16 s, u32 R, u32 p)
-{
- u16 index;
- u32 f;
- u64 result;
-
- /* check against invalid parameters and divide-by-zero */
- BUG_ON(p > 1000000); /* p must not exceed 100% */
- BUG_ON(p == 0); /* f(0) = 0, divide by zero */
- if (R == 0) { /* possible divide by zero */
- DCCP_CRIT("WARNING: RTT is 0, returning maximum X_calc.");
- return ~0U;
- }
-
- if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */
- if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */
- DCCP_WARN("Value of p (%d) below resolution. "
- "Substituting %d\n", p, TFRC_SMALLEST_P);
- index = 0;
- } else /* 0.0001 <= p <= 0.05 */
- index = p/TFRC_SMALLEST_P - 1;
-
- f = tfrc_calc_x_lookup[index][1];
-
- } else { /* 0.05 < p <= 1.00 */
- index = p/(1000000/TFRC_CALC_X_ARRSIZE) - 1;
-
- f = tfrc_calc_x_lookup[index][0];
- }
-
- /*
- * Compute X = s/(R*f(p)) in bytes per second.
- * Since f(p) and R are both scaled by 1000000, we need to multiply by
- * 1000000^2. To avoid overflow, the result is computed in two stages.
- * This works under almost all reasonable operational conditions, for a
- * wide range of parameters. Yet, should some strange combination of
- * parameters result in overflow, the use of scaled_div32 will catch
- * this and return UINT_MAX - which is a logically adequate consequence.
- */
- result = scaled_div(s, R);
- return scaled_div32(result, f);
-}
-
-/**
- * tfrc_calc_x_reverse_lookup - try to find p given f(p)
- * @fvalue: function value to match, scaled by 1000000
- *
- * Returns closest match for p, also scaled by 1000000
- */
-u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
-{
- int index;
-
- if (fvalue == 0) /* f(p) = 0 whenever p = 0 */
- return 0;
-
- /* Error cases. */
- if (fvalue < tfrc_calc_x_lookup[0][1]) {
- DCCP_WARN("fvalue %u smaller than resolution\n", fvalue);
- return TFRC_SMALLEST_P;
- }
- if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) {
- DCCP_WARN("fvalue %u exceeds bounds!\n", fvalue);
- return 1000000;
- }
-
- if (fvalue <= tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][1]) {
- index = tfrc_binsearch(fvalue, 1);
- return (index + 1) * TFRC_CALC_X_SPLIT / TFRC_CALC_X_ARRSIZE;
- }
-
- /* else ... it must be in the coarse-grained column */
- index = tfrc_binsearch(fvalue, 0);
- return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
-}
-
-/**
- * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100%
- * @loss_event_rate: loss event rate to invert
- * When @loss_event_rate is large, there is a chance that p is truncated to 0.
- * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
- */
-u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
-{
- if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */
- return 0;
- if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */
- return 1000000;
- return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
-}
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
deleted file mode 100644
index 1f748ed1279d..000000000000
--- a/net/dccp/dccp.h
+++ /dev/null
@@ -1,483 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _DCCP_H
-#define _DCCP_H
-/*
- * net/dccp/dccp.h
- *
- * An implementation of the DCCP protocol
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- * Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
- */
-
-#include <linux/dccp.h>
-#include <linux/ktime.h>
-#include <net/snmp.h>
-#include <net/sock.h>
-#include <net/tcp.h>
-#include "ackvec.h"
-
-/*
- * DCCP - specific warning and debugging macros.
- */
-#define DCCP_WARN(fmt, ...) \
- net_warn_ratelimited("%s: " fmt, __func__, ##__VA_ARGS__)
-#define DCCP_CRIT(fmt, a...) printk(KERN_CRIT fmt " at %s:%d/%s()\n", ##a, \
- __FILE__, __LINE__, __func__)
-#define DCCP_BUG(a...) do { DCCP_CRIT("BUG: " a); dump_stack(); } while(0)
-#define DCCP_BUG_ON(cond) do { if (unlikely((cond) != 0)) \
- DCCP_BUG("\"%s\" holds (exception!)", \
- __stringify(cond)); \
- } while (0)
-
-#define DCCP_PRINTK(enable, fmt, args...) do { if (enable) \
- printk(fmt, ##args); \
- } while(0)
-#define DCCP_PR_DEBUG(enable, fmt, a...) DCCP_PRINTK(enable, KERN_DEBUG \
- "%s: " fmt, __func__, ##a)
-
-#ifdef CONFIG_IP_DCCP_DEBUG
-extern bool dccp_debug;
-#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a)
-#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
-#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
-#else
-#define dccp_pr_debug(format, a...) do {} while (0)
-#define dccp_pr_debug_cat(format, a...) do {} while (0)
-#define dccp_debug(format, a...) do {} while (0)
-#endif
-
-extern struct inet_hashinfo dccp_hashinfo;
-
-DECLARE_PER_CPU(unsigned int, dccp_orphan_count);
-
-void dccp_time_wait(struct sock *sk, int state, int timeo);
-
-/*
- * Set safe upper bounds for header and option length. Since Data Offset is 8
- * bits (RFC 4340, sec. 5.1), the total header length can never be more than
- * 4 * 255 = 1020 bytes. The largest possible header length is 28 bytes (X=1):
- * - DCCP-Response with ACK Subheader and 4 bytes of Service code OR
- * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields
- * Hence a safe upper bound for the maximum option length is 1020-28 = 992
- */
-#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(uint32_t))
-#define DCCP_MAX_PACKET_HDR 28
-#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR)
-#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER)
-
-/* Upper bound for initial feature-negotiation overhead (padded to 32 bits) */
-#define DCCP_FEATNEG_OVERHEAD (32 * sizeof(uint32_t))
-
-#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
- * state, about 60 seconds */
-
-/* RFC 1122, 4.2.3.1 initial RTO value */
-#define DCCP_TIMEOUT_INIT ((unsigned int)(3 * HZ))
-
-/*
- * The maximum back-off value for retransmissions. This is needed for
- * - retransmitting client-Requests (sec. 8.1.1),
- * - retransmitting Close/CloseReq when closing (sec. 8.3),
- * - feature-negotiation retransmission (sec. 6.6.3),
- * - Acks in client-PARTOPEN state (sec. 8.1.5).
- */
-#define DCCP_RTO_MAX ((unsigned int)(64 * HZ))
-
-/*
- * RTT sampling: sanity bounds and fallback RTT value from RFC 4340, section 3.4
- */
-#define DCCP_SANE_RTT_MIN 100
-#define DCCP_FALLBACK_RTT (USEC_PER_SEC / 5)
-#define DCCP_SANE_RTT_MAX (3 * USEC_PER_SEC)
-
-/* sysctl variables for DCCP */
-extern int sysctl_dccp_request_retries;
-extern int sysctl_dccp_retries1;
-extern int sysctl_dccp_retries2;
-extern int sysctl_dccp_tx_qlen;
-extern int sysctl_dccp_sync_ratelimit;
-
-/*
- * 48-bit sequence number arithmetic (signed and unsigned)
- */
-#define INT48_MIN 0x800000000000LL /* 2^47 */
-#define UINT48_MAX 0xFFFFFFFFFFFFLL /* 2^48 - 1 */
-#define COMPLEMENT48(x) (0x1000000000000LL - (x)) /* 2^48 - x */
-#define TO_SIGNED48(x) (((x) < INT48_MIN)? (x) : -COMPLEMENT48( (x)))
-#define TO_UNSIGNED48(x) (((x) >= 0)? (x) : COMPLEMENT48(-(x)))
-#define ADD48(a, b) (((a) + (b)) & UINT48_MAX)
-#define SUB48(a, b) ADD48((a), COMPLEMENT48(b))
-
-static inline void dccp_inc_seqno(u64 *seqno)
-{
- *seqno = ADD48(*seqno, 1);
-}
-
-/* signed mod-2^48 distance: pos. if seqno1 < seqno2, neg. if seqno1 > seqno2 */
-static inline s64 dccp_delta_seqno(const u64 seqno1, const u64 seqno2)
-{
- u64 delta = SUB48(seqno2, seqno1);
-
- return TO_SIGNED48(delta);
-}
-
-/* is seq1 < seq2 ? */
-static inline int before48(const u64 seq1, const u64 seq2)
-{
- return (s64)((seq2 << 16) - (seq1 << 16)) > 0;
-}
-
-/* is seq1 > seq2 ? */
-#define after48(seq1, seq2) before48(seq2, seq1)
-
-/* is seq2 <= seq1 <= seq3 ? */
-static inline int between48(const u64 seq1, const u64 seq2, const u64 seq3)
-{
- return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
-}
-
-/**
- * dccp_loss_count - Approximate the number of lost data packets in a burst loss
- * @s1: last known sequence number before the loss ('hole')
- * @s2: first sequence number seen after the 'hole'
- * @ndp: NDP count on packet with sequence number @s2
- */
-static inline u64 dccp_loss_count(const u64 s1, const u64 s2, const u64 ndp)
-{
- s64 delta = dccp_delta_seqno(s1, s2);
-
- WARN_ON(delta < 0);
- delta -= ndp + 1;
-
- return delta > 0 ? delta : 0;
-}
-
-/**
- * dccp_loss_free - Evaluate condition for data loss from RFC 4340, 7.7.1
- */
-static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp)
-{
- return dccp_loss_count(s1, s2, ndp) == 0;
-}
-
-enum {
- DCCP_MIB_NUM = 0,
- DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */
- DCCP_MIB_ESTABRESETS, /* EstabResets */
- DCCP_MIB_CURRESTAB, /* CurrEstab */
- DCCP_MIB_OUTSEGS, /* OutSegs */
- DCCP_MIB_OUTRSTS,
- DCCP_MIB_ABORTONTIMEOUT,
- DCCP_MIB_TIMEOUTS,
- DCCP_MIB_ABORTFAILED,
- DCCP_MIB_PASSIVEOPENS,
- DCCP_MIB_ATTEMPTFAILS,
- DCCP_MIB_OUTDATAGRAMS,
- DCCP_MIB_INERRS,
- DCCP_MIB_OPTMANDATORYERROR,
- DCCP_MIB_INVALIDOPT,
- __DCCP_MIB_MAX
-};
-
-#define DCCP_MIB_MAX __DCCP_MIB_MAX
-struct dccp_mib {
- unsigned long mibs[DCCP_MIB_MAX];
-};
-
-DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
-#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field)
-#define __DCCP_INC_STATS(field) __SNMP_INC_STATS(dccp_statistics, field)
-#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field)
-
-/*
- * Checksumming routines
- */
-static inline unsigned int dccp_csum_coverage(const struct sk_buff *skb)
-{
- const struct dccp_hdr* dh = dccp_hdr(skb);
-
- if (dh->dccph_cscov == 0)
- return skb->len;
- return (dh->dccph_doff + dh->dccph_cscov - 1) * sizeof(u32);
-}
-
-static inline void dccp_csum_outgoing(struct sk_buff *skb)
-{
- unsigned int cov = dccp_csum_coverage(skb);
-
- if (cov >= skb->len)
- dccp_hdr(skb)->dccph_cscov = 0;
-
- skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0);
-}
-
-void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb);
-
-int dccp_retransmit_skb(struct sock *sk);
-
-void dccp_send_ack(struct sock *sk);
-void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
- struct request_sock *rsk);
-
-void dccp_send_sync(struct sock *sk, const u64 seq,
- const enum dccp_pkt_type pkt_type);
-
-/*
- * TX Packet Dequeueing Interface
- */
-void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb);
-bool dccp_qpolicy_full(struct sock *sk);
-void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb);
-struct sk_buff *dccp_qpolicy_top(struct sock *sk);
-struct sk_buff *dccp_qpolicy_pop(struct sock *sk);
-bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param);
-
-/*
- * TX Packet Output and TX Timers
- */
-void dccp_write_xmit(struct sock *sk);
-void dccp_write_space(struct sock *sk);
-void dccp_flush_write_queue(struct sock *sk, long *time_budget);
-
-void dccp_init_xmit_timers(struct sock *sk);
-static inline void dccp_clear_xmit_timers(struct sock *sk)
-{
- inet_csk_clear_xmit_timers(sk);
-}
-
-unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
-
-const char *dccp_packet_name(const int type);
-
-void dccp_set_state(struct sock *sk, const int state);
-void dccp_done(struct sock *sk);
-
-int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
- struct sk_buff const *skb);
-
-int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
-
-struct sock *dccp_create_openreq_child(const struct sock *sk,
- const struct request_sock *req,
- const struct sk_buff *skb);
-
-int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
-
-struct sock *dccp_v4_request_recv_sock(const struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst,
- struct request_sock *req_unhash,
- bool *own_req);
-struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req);
-
-int dccp_child_process(struct sock *parent, struct sock *child,
- struct sk_buff *skb);
-int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
- struct dccp_hdr *dh, unsigned int len);
-int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct dccp_hdr *dh, const unsigned int len);
-
-void dccp_destruct_common(struct sock *sk);
-int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
-void dccp_destroy_sock(struct sock *sk);
-
-void dccp_close(struct sock *sk, long timeout);
-struct sk_buff *dccp_make_response(const struct sock *sk, struct dst_entry *dst,
- struct request_sock *req);
-
-int dccp_connect(struct sock *sk);
-int dccp_disconnect(struct sock *sk, int flags);
-int dccp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen);
-int dccp_setsockopt(struct sock *sk, int level, int optname,
- sockptr_t optval, unsigned int optlen);
-int dccp_ioctl(struct sock *sk, int cmd, int *karg);
-int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
-int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
- int *addr_len);
-void dccp_shutdown(struct sock *sk, int how);
-int inet_dccp_listen(struct socket *sock, int backlog);
-__poll_t dccp_poll(struct file *file, struct socket *sock,
- poll_table *wait);
-int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
-void dccp_req_err(struct sock *sk, u64 seq);
-
-struct sk_buff *dccp_ctl_make_reset(struct sock *sk, struct sk_buff *skb);
-int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code);
-void dccp_send_close(struct sock *sk, const int active);
-int dccp_invalid_packet(struct sk_buff *skb);
-u32 dccp_sample_rtt(struct sock *sk, long delta);
-
-static inline bool dccp_bad_service_code(const struct sock *sk,
- const __be32 service)
-{
- const struct dccp_sock *dp = dccp_sk(sk);
-
- if (dp->dccps_service == service)
- return false;
- return !dccp_list_has_service(dp->dccps_service_list, service);
-}
-
-/**
- * dccp_skb_cb - DCCP per-packet control information
- * @dccpd_type: one of %dccp_pkt_type (or unknown)
- * @dccpd_ccval: CCVal field (5.1), see e.g. RFC 4342, 8.1
- * @dccpd_reset_code: one of %dccp_reset_codes
- * @dccpd_reset_data: Data1..3 fields (depend on @dccpd_reset_code)
- * @dccpd_opt_len: total length of all options (5.8) in the packet
- * @dccpd_seq: sequence number
- * @dccpd_ack_seq: acknowledgment number subheader field value
- *
- * This is used for transmission as well as for reception.
- */
-struct dccp_skb_cb {
- union {
- struct inet_skb_parm h4;
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_skb_parm h6;
-#endif
- } header;
- __u8 dccpd_type:4;
- __u8 dccpd_ccval:4;
- __u8 dccpd_reset_code,
- dccpd_reset_data[3];
- __u16 dccpd_opt_len;
- __u64 dccpd_seq;
- __u64 dccpd_ack_seq;
-};
-
-#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
-
-/* RFC 4340, sec. 7.7 */
-static inline int dccp_non_data_packet(const struct sk_buff *skb)
-{
- const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
-
- return type == DCCP_PKT_ACK ||
- type == DCCP_PKT_CLOSE ||
- type == DCCP_PKT_CLOSEREQ ||
- type == DCCP_PKT_RESET ||
- type == DCCP_PKT_SYNC ||
- type == DCCP_PKT_SYNCACK;
-}
-
-/* RFC 4340, sec. 7.7 */
-static inline int dccp_data_packet(const struct sk_buff *skb)
-{
- const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
-
- return type == DCCP_PKT_DATA ||
- type == DCCP_PKT_DATAACK ||
- type == DCCP_PKT_REQUEST ||
- type == DCCP_PKT_RESPONSE;
-}
-
-static inline int dccp_packet_without_ack(const struct sk_buff *skb)
-{
- const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
-
- return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
-}
-
-#define DCCP_PKT_WITHOUT_ACK_SEQ (UINT48_MAX << 2)
-
-static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
-{
- struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh +
- sizeof(*dh));
- dh->dccph_seq2 = 0;
- dh->dccph_seq = htons((gss >> 32) & 0xfffff);
- dhx->dccph_seq_low = htonl(gss & 0xffffffff);
-}
-
-static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
- const u64 gsr)
-{
- dhack->dccph_reserved1 = 0;
- dhack->dccph_ack_nr_high = htons(gsr >> 32);
- dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff);
-}
-
-static inline void dccp_update_gsr(struct sock *sk, u64 seq)
-{
- struct dccp_sock *dp = dccp_sk(sk);
-
- if (after48(seq, dp->dccps_gsr))
- dp->dccps_gsr = seq;
- /* Sequence validity window depends on remote Sequence Window (7.5.1) */
- dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
- /*
- * Adjust SWL so that it is not below ISR. In contrast to RFC 4340,
- * 7.5.1 we perform this check beyond the initial handshake: W/W' are
- * always > 32, so for the first W/W' packets in the lifetime of a
- * connection we always have to adjust SWL.
- * A second reason why we are doing this is that the window depends on
- * the feature-remote value of Sequence Window: nothing stops the peer
- * from updating this value while we are busy adjusting SWL for the
- * first W packets (we would have to count from scratch again then).
- * Therefore it is safer to always make sure that the Sequence Window
- * is not artificially extended by a peer who grows SWL downwards by
- * continually updating the feature-remote Sequence-Window.
- * If sequence numbers wrap it is bad luck. But that will take a while
- * (48 bit), and this measure prevents Sequence-number attacks.
- */
- if (before48(dp->dccps_swl, dp->dccps_isr))
- dp->dccps_swl = dp->dccps_isr;
- dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4);
-}
-
-static inline void dccp_update_gss(struct sock *sk, u64 seq)
-{
- struct dccp_sock *dp = dccp_sk(sk);
-
- dp->dccps_gss = seq;
- /* Ack validity window depends on local Sequence Window value (7.5.1) */
- dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win);
- /* Adjust AWL so that it is not below ISS - see comment above for SWL */
- if (before48(dp->dccps_awl, dp->dccps_iss))
- dp->dccps_awl = dp->dccps_iss;
- dp->dccps_awh = dp->dccps_gss;
-}
-
-static inline int dccp_ackvec_pending(const struct sock *sk)
-{
- return dccp_sk(sk)->dccps_hc_rx_ackvec != NULL &&
- !dccp_ackvec_is_empty(dccp_sk(sk)->dccps_hc_rx_ackvec);
-}
-
-static inline int dccp_ack_pending(const struct sock *sk)
-{
- return dccp_ackvec_pending(sk) || inet_csk_ack_scheduled(sk);
-}
-
-int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val);
-int dccp_feat_finalise_settings(struct dccp_sock *dp);
-int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq);
-int dccp_feat_insert_opts(struct dccp_sock*, struct dccp_request_sock*,
- struct sk_buff *skb);
-int dccp_feat_activate_values(struct sock *sk, struct list_head *fn);
-void dccp_feat_list_purge(struct list_head *fn_list);
-
-int dccp_insert_options(struct sock *sk, struct sk_buff *skb);
-int dccp_insert_options_rsk(struct dccp_request_sock *, struct sk_buff *);
-u32 dccp_timestamp(void);
-void dccp_timestamping_init(void);
-int dccp_insert_option(struct sk_buff *skb, unsigned char option,
- const void *value, unsigned char len);
-
-#ifdef CONFIG_SYSCTL
-int dccp_sysctl_init(void);
-void dccp_sysctl_exit(void);
-#else
-static inline int dccp_sysctl_init(void)
-{
- return 0;
-}
-
-static inline void dccp_sysctl_exit(void)
-{
-}
-#endif
-
-#endif /* _DCCP_H */
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
deleted file mode 100644
index f5019d95c3ae..000000000000
--- a/net/dccp/diag.c
+++ /dev/null
@@ -1,85 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * net/dccp/diag.c
- *
- * An implementation of the DCCP protocol
- * Arnaldo Carvalho de Melo <acme@mandriva.com>
- */
-
-
-#include <linux/module.h>
-#include <linux/inet_diag.h>
-
-#include "ccid.h"
-#include "dccp.h"
-
-static void dccp_get_info(struct sock *sk, struct tcp_info *info)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- const struct inet_connection_sock *icsk = inet_csk(sk);
-
- memset(info, 0, sizeof(*info));
-
- info->tcpi_state = sk->sk_state;
- info->tcpi_retransmits = icsk->icsk_retransmits;
- info->tcpi_probes = icsk->icsk_probes_out;
- info->tcpi_backoff = icsk->icsk_backoff;
- info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
-
- if (dp->dccps_hc_rx_ackvec != NULL)
- info->tcpi_options |= TCPI_OPT_SACK;
-
- if (dp->dccps_hc_rx_ccid != NULL)
- ccid_hc_rx_get_info(dp->dccps_hc_rx_ccid, sk, info);
-
- if (dp->dccps_hc_tx_ccid != NULL)
- ccid_hc_tx_get_info(dp->dccps_hc_tx_ccid, sk, info);
-}
-
-static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
- void *_info)
-{
- r->idiag_rqueue = r->idiag_wqueue = 0;
-
- if (_info != NULL)
- dccp_get_info(sk, _info);
-}
-
-static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
- const struct inet_diag_req_v2 *r)
-{
- inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r);
-}
-
-static int dccp_diag_dump_one(struct netlink_callback *cb,
- const struct inet_diag_req_v2 *req)
-{
- return inet_diag_dump_one_icsk(&dccp_hashinfo, cb, req);
-}
-
-static const struct inet_diag_handler dccp_diag_handler = {
- .owner = THIS_MODULE,
- .dump = dccp_diag_dump,
- .dump_one = dccp_diag_dump_one,
- .idiag_get_info = dccp_diag_get_info,
- .idiag_type = IPPROTO_DCCP,
- .idiag_info_size = sizeof(struct tcp_info),
-};
-
-static int __init dccp_diag_init(void)
-{
- return inet_diag_register(&dccp_diag_handler);
-}
-
-static void __exit dccp_diag_fini(void)
-{
- inet_diag_unregister(&dccp_diag_handler);
-}
-
-module_init(dccp_diag_init);
-module_exit(dccp_diag_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
-MODULE_DESCRIPTION("DCCP inet_diag handler");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-33 /* AF_INET - IPPROTO_DCCP */);
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
deleted file mode 100644
index f7554dcdaaba..000000000000
--- a/net/dccp/feat.c
+++ /dev/null
@@ -1,1581 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/dccp/feat.c
- *
- * Feature negotiation for the DCCP protocol (RFC 4340, section 6)
- *
- * Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk>
- * Rewrote from scratch, some bits from earlier code by
- * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
- *
- * ASSUMPTIONS
- * -----------
- * o Feature negotiation is coordinated with connection setup (as in TCP), wild
- * changes of parameters of an established connection are not supported.
- * o Changing non-negotiable (NN) values is supported in state OPEN/PARTOPEN.
- * o All currently known SP features have 1-byte quantities. If in the future
- * extensions of RFCs 4340..42 define features with item lengths larger than
- * one byte, a feature-specific extension of the code will be required.
- */
-#include <linux/module.h>
-#include <linux/slab.h>
-#include "ccid.h"
-#include "feat.h"
-
-/* feature-specific sysctls - initialised to the defaults from RFC 4340, 6.4 */
-unsigned long sysctl_dccp_sequence_window __read_mostly = 100;
-int sysctl_dccp_rx_ccid __read_mostly = 2,
- sysctl_dccp_tx_ccid __read_mostly = 2;
-
-/*
- * Feature activation handlers.
- *
- * These all use an u64 argument, to provide enough room for NN/SP features. At
- * this stage the negotiated values have been checked to be within their range.
- */
-static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct ccid *new_ccid = ccid_new(ccid, sk, rx);
-
- if (new_ccid == NULL)
- return -ENOMEM;
-
- if (rx) {
- ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
- dp->dccps_hc_rx_ccid = new_ccid;
- } else {
- ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
- dp->dccps_hc_tx_ccid = new_ccid;
- }
- return 0;
-}
-
-static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx)
-{
- struct dccp_sock *dp = dccp_sk(sk);
-
- if (rx) {
- dp->dccps_r_seq_win = seq_win;
- /* propagate changes to update SWL/SWH */
- dccp_update_gsr(sk, dp->dccps_gsr);
- } else {
- dp->dccps_l_seq_win = seq_win;
- /* propagate changes to update AWL */
- dccp_update_gss(sk, dp->dccps_gss);
- }
- return 0;
-}
-
-static int dccp_hdlr_ack_ratio(struct sock *sk, u64 ratio, bool rx)
-{
- if (rx)
- dccp_sk(sk)->dccps_r_ack_ratio = ratio;
- else
- dccp_sk(sk)->dccps_l_ack_ratio = ratio;
- return 0;
-}
-
-static int dccp_hdlr_ackvec(struct sock *sk, u64 enable, bool rx)
-{
- struct dccp_sock *dp = dccp_sk(sk);
-
- if (rx) {
- if (enable && dp->dccps_hc_rx_ackvec == NULL) {
- dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(gfp_any());
- if (dp->dccps_hc_rx_ackvec == NULL)
- return -ENOMEM;
- } else if (!enable) {
- dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
- dp->dccps_hc_rx_ackvec = NULL;
- }
- }
- return 0;
-}
-
-static int dccp_hdlr_ndp(struct sock *sk, u64 enable, bool rx)
-{
- if (!rx)
- dccp_sk(sk)->dccps_send_ndp_count = (enable > 0);
- return 0;
-}
-
-/*
- * Minimum Checksum Coverage is located at the RX side (9.2.1). This means that
- * `rx' holds when the sending peer informs about his partial coverage via a
- * ChangeR() option. In the other case, we are the sender and the receiver
- * announces its coverage via ChangeL() options. The policy here is to honour
- * such communication by enabling the corresponding partial coverage - but only
- * if it has not been set manually before; the warning here means that all
- * packets will be dropped.
- */
-static int dccp_hdlr_min_cscov(struct sock *sk, u64 cscov, bool rx)
-{
- struct dccp_sock *dp = dccp_sk(sk);
-
- if (rx)
- dp->dccps_pcrlen = cscov;
- else {
- if (dp->dccps_pcslen == 0)
- dp->dccps_pcslen = cscov;
- else if (cscov > dp->dccps_pcslen)
- DCCP_WARN("CsCov %u too small, peer requires >= %u\n",
- dp->dccps_pcslen, (u8)cscov);
- }
- return 0;
-}
-
-static const struct {
- u8 feat_num; /* DCCPF_xxx */
- enum dccp_feat_type rxtx; /* RX or TX */
- enum dccp_feat_type reconciliation; /* SP or NN */
- u8 default_value; /* as in 6.4 */
- int (*activation_hdlr)(struct sock *sk, u64 val, bool rx);
-/*
- * Lookup table for location and type of features (from RFC 4340/4342)
- * +--------------------------+----+-----+----+----+---------+-----------+
- * | Feature | Location | Reconc. | Initial | Section |
- * | | RX | TX | SP | NN | Value | Reference |
- * +--------------------------+----+-----+----+----+---------+-----------+
- * | DCCPF_CCID | | X | X | | 2 | 10 |
- * | DCCPF_SHORT_SEQNOS | | X | X | | 0 | 7.6.1 |
- * | DCCPF_SEQUENCE_WINDOW | | X | | X | 100 | 7.5.2 |
- * | DCCPF_ECN_INCAPABLE | X | | X | | 0 | 12.1 |
- * | DCCPF_ACK_RATIO | | X | | X | 2 | 11.3 |
- * | DCCPF_SEND_ACK_VECTOR | X | | X | | 0 | 11.5 |
- * | DCCPF_SEND_NDP_COUNT | | X | X | | 0 | 7.7.2 |
- * | DCCPF_MIN_CSUM_COVER | X | | X | | 0 | 9.2.1 |
- * | DCCPF_DATA_CHECKSUM | X | | X | | 0 | 9.3.1 |
- * | DCCPF_SEND_LEV_RATE | X | | X | | 0 | 4342/8.4 |
- * +--------------------------+----+-----+----+----+---------+-----------+
- */
-} dccp_feat_table[] = {
- { DCCPF_CCID, FEAT_AT_TX, FEAT_SP, 2, dccp_hdlr_ccid },
- { DCCPF_SHORT_SEQNOS, FEAT_AT_TX, FEAT_SP, 0, NULL },
- { DCCPF_SEQUENCE_WINDOW, FEAT_AT_TX, FEAT_NN, 100, dccp_hdlr_seq_win },
- { DCCPF_ECN_INCAPABLE, FEAT_AT_RX, FEAT_SP, 0, NULL },
- { DCCPF_ACK_RATIO, FEAT_AT_TX, FEAT_NN, 2, dccp_hdlr_ack_ratio},
- { DCCPF_SEND_ACK_VECTOR, FEAT_AT_RX, FEAT_SP, 0, dccp_hdlr_ackvec },
- { DCCPF_SEND_NDP_COUNT, FEAT_AT_TX, FEAT_SP, 0, dccp_hdlr_ndp },
- { DCCPF_MIN_CSUM_COVER, FEAT_AT_RX, FEAT_SP, 0, dccp_hdlr_min_cscov},
- { DCCPF_DATA_CHECKSUM, FEAT_AT_RX, FEAT_SP, 0, NULL },
- { DCCPF_SEND_LEV_RATE, FEAT_AT_RX, FEAT_SP, 0, NULL },
-};
-#define DCCP_FEAT_SUPPORTED_MAX ARRAY_SIZE(dccp_feat_table)
-
-/**
- * dccp_feat_index - Hash function to map feature number into array position
- * @feat_num: feature to hash, one of %dccp_feature_numbers
- *
- * Returns consecutive array index or -1 if the feature is not understood.
- */
-static int dccp_feat_index(u8 feat_num)
-{
- /* The first 9 entries are occupied by the types from RFC 4340, 6.4 */
- if (feat_num > DCCPF_RESERVED && feat_num <= DCCPF_DATA_CHECKSUM)
- return feat_num - 1;
-
- /*
- * Other features: add cases for new feature types here after adding
- * them to the above table.
- */
- switch (feat_num) {
- case DCCPF_SEND_LEV_RATE:
- return DCCP_FEAT_SUPPORTED_MAX - 1;
- }
- return -1;
-}
-
-static u8 dccp_feat_type(u8 feat_num)
-{
- int idx = dccp_feat_index(feat_num);
-
- if (idx < 0)
- return FEAT_UNKNOWN;
- return dccp_feat_table[idx].reconciliation;
-}
-
-static int dccp_feat_default_value(u8 feat_num)
-{
- int idx = dccp_feat_index(feat_num);
- /*
- * There are no default values for unknown features, so encountering a
- * negative index here indicates a serious problem somewhere else.
- */
- DCCP_BUG_ON(idx < 0);
-
- return idx < 0 ? 0 : dccp_feat_table[idx].default_value;
-}
-
-/*
- * Debugging and verbose-printing section
- */
-static const char *dccp_feat_fname(const u8 feat)
-{
- static const char *const feature_names[] = {
- [DCCPF_RESERVED] = "Reserved",
- [DCCPF_CCID] = "CCID",
- [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos",
- [DCCPF_SEQUENCE_WINDOW] = "Sequence Window",
- [DCCPF_ECN_INCAPABLE] = "ECN Incapable",
- [DCCPF_ACK_RATIO] = "Ack Ratio",
- [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector",
- [DCCPF_SEND_NDP_COUNT] = "Send NDP Count",
- [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage",
- [DCCPF_DATA_CHECKSUM] = "Send Data Checksum",
- };
- if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
- return feature_names[DCCPF_RESERVED];
-
- if (feat == DCCPF_SEND_LEV_RATE)
- return "Send Loss Event Rate";
- if (feat >= DCCPF_MIN_CCID_SPECIFIC)
- return "CCID-specific";
-
- return feature_names[feat];
-}
-
-static const char *const dccp_feat_sname[] = {
- "DEFAULT", "INITIALISING", "CHANGING", "UNSTABLE", "STABLE",
-};
-
-#ifdef CONFIG_IP_DCCP_DEBUG
-static const char *dccp_feat_oname(const u8 opt)
-{
- switch (opt) {
- case DCCPO_CHANGE_L: return "Change_L";
- case DCCPO_CONFIRM_L: return "Confirm_L";
- case DCCPO_CHANGE_R: return "Change_R";
- case DCCPO_CONFIRM_R: return "Confirm_R";
- }
- return NULL;
-}
-
-static void dccp_feat_printval(u8 feat_num, dccp_feat_val const *val)
-{
- u8 i, type = dccp_feat_type(feat_num);
-
- if (val == NULL || (type == FEAT_SP && val->sp.vec == NULL))
- dccp_pr_debug_cat("(NULL)");
- else if (type == FEAT_SP)
- for (i = 0; i < val->sp.len; i++)
- dccp_pr_debug_cat("%s%u", i ? " " : "", val->sp.vec[i]);
- else if (type == FEAT_NN)
- dccp_pr_debug_cat("%llu", (unsigned long long)val->nn);
- else
- dccp_pr_debug_cat("unknown type %u", type);
-}
-
-static void dccp_feat_printvals(u8 feat_num, u8 *list, u8 len)
-{
- u8 type = dccp_feat_type(feat_num);
- dccp_feat_val fval = { .sp.vec = list, .sp.len = len };
-
- if (type == FEAT_NN)
- fval.nn = dccp_decode_value_var(list, len);
- dccp_feat_printval(feat_num, &fval);
-}
-
-static void dccp_feat_print_entry(struct dccp_feat_entry const *entry)
-{
- dccp_debug(" * %s %s = ", entry->is_local ? "local" : "remote",
- dccp_feat_fname(entry->feat_num));
- dccp_feat_printval(entry->feat_num, &entry->val);
- dccp_pr_debug_cat(", state=%s %s\n", dccp_feat_sname[entry->state],
- entry->needs_confirm ? "(Confirm pending)" : "");
-}
-
-#define dccp_feat_print_opt(opt, feat, val, len, mandatory) do { \
- dccp_pr_debug("%s(%s, ", dccp_feat_oname(opt), dccp_feat_fname(feat));\
- dccp_feat_printvals(feat, val, len); \
- dccp_pr_debug_cat(") %s\n", mandatory ? "!" : ""); } while (0)
-
-#define dccp_feat_print_fnlist(fn_list) { \
- const struct dccp_feat_entry *___entry; \
- \
- dccp_pr_debug("List Dump:\n"); \
- list_for_each_entry(___entry, fn_list, node) \
- dccp_feat_print_entry(___entry); \
-}
-#else /* ! CONFIG_IP_DCCP_DEBUG */
-#define dccp_feat_print_opt(opt, feat, val, len, mandatory)
-#define dccp_feat_print_fnlist(fn_list)
-#endif
-
-static int __dccp_feat_activate(struct sock *sk, const int idx,
- const bool is_local, dccp_feat_val const *fval)
-{
- bool rx;
- u64 val;
-
- if (idx < 0 || idx >= DCCP_FEAT_SUPPORTED_MAX)
- return -1;
- if (dccp_feat_table[idx].activation_hdlr == NULL)
- return 0;
-
- if (fval == NULL) {
- val = dccp_feat_table[idx].default_value;
- } else if (dccp_feat_table[idx].reconciliation == FEAT_SP) {
- if (fval->sp.vec == NULL) {
- /*
- * This can happen when an empty Confirm is sent
- * for an SP (i.e. known) feature. In this case
- * we would be using the default anyway.
- */
- DCCP_CRIT("Feature #%d undefined: using default", idx);
- val = dccp_feat_table[idx].default_value;
- } else {
- val = fval->sp.vec[0];
- }
- } else {
- val = fval->nn;
- }
-
- /* Location is RX if this is a local-RX or remote-TX feature */
- rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX));
-
- dccp_debug(" -> activating %s %s, %sval=%llu\n", rx ? "RX" : "TX",
- dccp_feat_fname(dccp_feat_table[idx].feat_num),
- fval ? "" : "default ", (unsigned long long)val);
-
- return dccp_feat_table[idx].activation_hdlr(sk, val, rx);
-}
-
-/**
- * dccp_feat_activate - Activate feature value on socket
- * @sk: fully connected DCCP socket (after handshake is complete)
- * @feat_num: feature to activate, one of %dccp_feature_numbers
- * @local: whether local (1) or remote (0) @feat_num is meant
- * @fval: the value (SP or NN) to activate, or NULL to use the default value
- *
- * For general use this function is preferable over __dccp_feat_activate().
- */
-static int dccp_feat_activate(struct sock *sk, u8 feat_num, bool local,
- dccp_feat_val const *fval)
-{
- return __dccp_feat_activate(sk, dccp_feat_index(feat_num), local, fval);
-}
-
-/* Test for "Req'd" feature (RFC 4340, 6.4) */
-static inline int dccp_feat_must_be_understood(u8 feat_num)
-{
- return feat_num == DCCPF_CCID || feat_num == DCCPF_SHORT_SEQNOS ||
- feat_num == DCCPF_SEQUENCE_WINDOW;
-}
-
-/* copy constructor, fval must not already contain allocated memory */
-static int dccp_feat_clone_sp_val(dccp_feat_val *fval, u8 const *val, u8 len)
-{
- fval->sp.len = len;
- if (fval->sp.len > 0) {
- fval->sp.vec = kmemdup(val, len, gfp_any());
- if (fval->sp.vec == NULL) {
- fval->sp.len = 0;
- return -ENOMEM;
- }
- }
- return 0;
-}
-
-static void dccp_feat_val_destructor(u8 feat_num, dccp_feat_val *val)
-{
- if (unlikely(val == NULL))
- return;
- if (dccp_feat_type(feat_num) == FEAT_SP)
- kfree(val->sp.vec);
- memset(val, 0, sizeof(*val));
-}
-
-static struct dccp_feat_entry *
- dccp_feat_clone_entry(struct dccp_feat_entry const *original)
-{
- struct dccp_feat_entry *new;
- u8 type = dccp_feat_type(original->feat_num);
-
- if (type == FEAT_UNKNOWN)
- return NULL;
-
- new = kmemdup(original, sizeof(struct dccp_feat_entry), gfp_any());
- if (new == NULL)
- return NULL;
-
- if (type == FEAT_SP && dccp_feat_clone_sp_val(&new->val,
- original->val.sp.vec,
- original->val.sp.len)) {
- kfree(new);
- return NULL;
- }
- return new;
-}
-
-static void dccp_feat_entry_destructor(struct dccp_feat_entry *entry)
-{
- if (entry != NULL) {
- dccp_feat_val_destructor(entry->feat_num, &entry->val);
- kfree(entry);
- }
-}
-
-/*
- * List management functions
- *
- * Feature negotiation lists rely on and maintain the following invariants:
- * - each feat_num in the list is known, i.e. we know its type and default value
- * - each feat_num/is_local combination is unique (old entries are overwritten)
- * - SP values are always freshly allocated
- * - list is sorted in increasing order of feature number (faster lookup)
- */
-static struct dccp_feat_entry *dccp_feat_list_lookup(struct list_head *fn_list,
- u8 feat_num, bool is_local)
-{
- struct dccp_feat_entry *entry;
-
- list_for_each_entry(entry, fn_list, node) {
- if (entry->feat_num == feat_num && entry->is_local == is_local)
- return entry;
- else if (entry->feat_num > feat_num)
- break;
- }
- return NULL;
-}
-
-/**
- * dccp_feat_entry_new - Central list update routine (called by all others)
- * @head: list to add to
- * @feat: feature number
- * @local: whether the local (1) or remote feature with number @feat is meant
- *
- * This is the only constructor and serves to ensure the above invariants.
- */
-static struct dccp_feat_entry *
- dccp_feat_entry_new(struct list_head *head, u8 feat, bool local)
-{
- struct dccp_feat_entry *entry;
-
- list_for_each_entry(entry, head, node)
- if (entry->feat_num == feat && entry->is_local == local) {
- dccp_feat_val_destructor(entry->feat_num, &entry->val);
- return entry;
- } else if (entry->feat_num > feat) {
- head = &entry->node;
- break;
- }
-
- entry = kmalloc(sizeof(*entry), gfp_any());
- if (entry != NULL) {
- entry->feat_num = feat;
- entry->is_local = local;
- list_add_tail(&entry->node, head);
- }
- return entry;
-}
-
-/**
- * dccp_feat_push_change - Add/overwrite a Change option in the list
- * @fn_list: feature-negotiation list to update
- * @feat: one of %dccp_feature_numbers
- * @local: whether local (1) or remote (0) @feat_num is meant
- * @mandatory: whether to use Mandatory feature negotiation options
- * @fval: pointer to NN/SP value to be inserted (will be copied)
- */
-static int dccp_feat_push_change(struct list_head *fn_list, u8 feat, u8 local,
- u8 mandatory, dccp_feat_val *fval)
-{
- struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local);
-
- if (new == NULL)
- return -ENOMEM;
-
- new->feat_num = feat;
- new->is_local = local;
- new->state = FEAT_INITIALISING;
- new->needs_confirm = false;
- new->empty_confirm = false;
- new->val = *fval;
- new->needs_mandatory = mandatory;
-
- return 0;
-}
-
-/**
- * dccp_feat_push_confirm - Add a Confirm entry to the FN list
- * @fn_list: feature-negotiation list to add to
- * @feat: one of %dccp_feature_numbers
- * @local: whether local (1) or remote (0) @feat_num is being confirmed
- * @fval: pointer to NN/SP value to be inserted or NULL
- *
- * Returns 0 on success, a Reset code for further processing otherwise.
- */
-static int dccp_feat_push_confirm(struct list_head *fn_list, u8 feat, u8 local,
- dccp_feat_val *fval)
-{
- struct dccp_feat_entry *new = dccp_feat_entry_new(fn_list, feat, local);
-
- if (new == NULL)
- return DCCP_RESET_CODE_TOO_BUSY;
-
- new->feat_num = feat;
- new->is_local = local;
- new->state = FEAT_STABLE; /* transition in 6.6.2 */
- new->needs_confirm = true;
- new->empty_confirm = (fval == NULL);
- new->val.nn = 0; /* zeroes the whole structure */
- if (!new->empty_confirm)
- new->val = *fval;
- new->needs_mandatory = false;
-
- return 0;
-}
-
-static int dccp_push_empty_confirm(struct list_head *fn_list, u8 feat, u8 local)
-{
- return dccp_feat_push_confirm(fn_list, feat, local, NULL);
-}
-
-static inline void dccp_feat_list_pop(struct dccp_feat_entry *entry)
-{
- list_del(&entry->node);
- dccp_feat_entry_destructor(entry);
-}
-
-void dccp_feat_list_purge(struct list_head *fn_list)
-{
- struct dccp_feat_entry *entry, *next;
-
- list_for_each_entry_safe(entry, next, fn_list, node)
- dccp_feat_entry_destructor(entry);
- INIT_LIST_HEAD(fn_list);
-}
-EXPORT_SYMBOL_GPL(dccp_feat_list_purge);
-
-/* generate @to as full clone of @from - @to must not contain any nodes */
-int dccp_feat_clone_list(struct list_head const *from, struct list_head *to)
-{
- struct dccp_feat_entry *entry, *new;
-
- INIT_LIST_HEAD(to);
- list_for_each_entry(entry, from, node) {
- new = dccp_feat_clone_entry(entry);
- if (new == NULL)
- goto cloning_failed;
- list_add_tail(&new->node, to);
- }
- return 0;
-
-cloning_failed:
- dccp_feat_list_purge(to);
- return -ENOMEM;
-}
-
-/**
- * dccp_feat_valid_nn_length - Enforce length constraints on NN options
- * @feat_num: feature to return length of, one of %dccp_feature_numbers
- *
- * Length is between 0 and %DCCP_OPTVAL_MAXLEN. Used for outgoing packets only,
- * incoming options are accepted as long as their values are valid.
- */
-static u8 dccp_feat_valid_nn_length(u8 feat_num)
-{
- if (feat_num == DCCPF_ACK_RATIO) /* RFC 4340, 11.3 and 6.6.8 */
- return 2;
- if (feat_num == DCCPF_SEQUENCE_WINDOW) /* RFC 4340, 7.5.2 and 6.5 */
- return 6;
- return 0;
-}
-
-static u8 dccp_feat_is_valid_nn_val(u8 feat_num, u64 val)
-{
- switch (feat_num) {
- case DCCPF_ACK_RATIO:
- return val <= DCCPF_ACK_RATIO_MAX;
- case DCCPF_SEQUENCE_WINDOW:
- return val >= DCCPF_SEQ_WMIN && val <= DCCPF_SEQ_WMAX;
- }
- return 0; /* feature unknown - so we can't tell */
-}
-
-/* check that SP values are within the ranges defined in RFC 4340 */
-static u8 dccp_feat_is_valid_sp_val(u8 feat_num, u8 val)
-{
- switch (feat_num) {
- case DCCPF_CCID:
- return val == DCCPC_CCID2 || val == DCCPC_CCID3;
- /* Type-check Boolean feature values: */
- case DCCPF_SHORT_SEQNOS:
- case DCCPF_ECN_INCAPABLE:
- case DCCPF_SEND_ACK_VECTOR:
- case DCCPF_SEND_NDP_COUNT:
- case DCCPF_DATA_CHECKSUM:
- case DCCPF_SEND_LEV_RATE:
- return val < 2;
- case DCCPF_MIN_CSUM_COVER:
- return val < 16;
- }
- return 0; /* feature unknown */
-}
-
-static u8 dccp_feat_sp_list_ok(u8 feat_num, u8 const *sp_list, u8 sp_len)
-{
- if (sp_list == NULL || sp_len < 1)
- return 0;
- while (sp_len--)
- if (!dccp_feat_is_valid_sp_val(feat_num, *sp_list++))
- return 0;
- return 1;
-}
-
-/**
- * dccp_feat_insert_opts - Generate FN options from current list state
- * @skb: next sk_buff to be sent to the peer
- * @dp: for client during handshake and general negotiation
- * @dreq: used by the server only (all Changes/Confirms in LISTEN/RESPOND)
- */
-int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq,
- struct sk_buff *skb)
-{
- struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg;
- struct dccp_feat_entry *pos, *next;
- u8 opt, type, len, *ptr, nn_in_nbo[DCCP_OPTVAL_MAXLEN];
- bool rpt;
-
- /* put entries into @skb in the order they appear in the list */
- list_for_each_entry_safe_reverse(pos, next, fn, node) {
- opt = dccp_feat_genopt(pos);
- type = dccp_feat_type(pos->feat_num);
- rpt = false;
-
- if (pos->empty_confirm) {
- len = 0;
- ptr = NULL;
- } else {
- if (type == FEAT_SP) {
- len = pos->val.sp.len;
- ptr = pos->val.sp.vec;
- rpt = pos->needs_confirm;
- } else if (type == FEAT_NN) {
- len = dccp_feat_valid_nn_length(pos->feat_num);
- ptr = nn_in_nbo;
- dccp_encode_value_var(pos->val.nn, ptr, len);
- } else {
- DCCP_BUG("unknown feature %u", pos->feat_num);
- return -1;
- }
- }
- dccp_feat_print_opt(opt, pos->feat_num, ptr, len, 0);
-
- if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt))
- return -1;
- if (pos->needs_mandatory && dccp_insert_option_mandatory(skb))
- return -1;
-
- if (skb->sk->sk_state == DCCP_OPEN &&
- (opt == DCCPO_CONFIRM_R || opt == DCCPO_CONFIRM_L)) {
- /*
- * Confirms don't get retransmitted (6.6.3) once the
- * connection is in state OPEN
- */
- dccp_feat_list_pop(pos);
- } else {
- /*
- * Enter CHANGING after transmitting the Change
- * option (6.6.2).
- */
- if (pos->state == FEAT_INITIALISING)
- pos->state = FEAT_CHANGING;
- }
- }
- return 0;
-}
-
-/**
- * __feat_register_nn - Register new NN value on socket
- * @fn: feature-negotiation list to register with
- * @feat: an NN feature from %dccp_feature_numbers
- * @mandatory: use Mandatory option if 1
- * @nn_val: value to register (restricted to 4 bytes)
- *
- * Note that NN features are local by definition (RFC 4340, 6.3.2).
- */
-static int __feat_register_nn(struct list_head *fn, u8 feat,
- u8 mandatory, u64 nn_val)
-{
- dccp_feat_val fval = { .nn = nn_val };
-
- if (dccp_feat_type(feat) != FEAT_NN ||
- !dccp_feat_is_valid_nn_val(feat, nn_val))
- return -EINVAL;
-
- /* Don't bother with default values, they will be activated anyway. */
- if (nn_val - (u64)dccp_feat_default_value(feat) == 0)
- return 0;
-
- return dccp_feat_push_change(fn, feat, 1, mandatory, &fval);
-}
-
-/**
- * __feat_register_sp - Register new SP value/list on socket
- * @fn: feature-negotiation list to register with
- * @feat: an SP feature from %dccp_feature_numbers
- * @is_local: whether the local (1) or the remote (0) @feat is meant
- * @mandatory: use Mandatory option if 1
- * @sp_val: SP value followed by optional preference list
- * @sp_len: length of @sp_val in bytes
- */
-static int __feat_register_sp(struct list_head *fn, u8 feat, u8 is_local,
- u8 mandatory, u8 const *sp_val, u8 sp_len)
-{
- dccp_feat_val fval;
-
- if (dccp_feat_type(feat) != FEAT_SP ||
- !dccp_feat_sp_list_ok(feat, sp_val, sp_len))
- return -EINVAL;
-
- /* Avoid negotiating alien CCIDs by only advertising supported ones */
- if (feat == DCCPF_CCID && !ccid_support_check(sp_val, sp_len))
- return -EOPNOTSUPP;
-
- if (dccp_feat_clone_sp_val(&fval, sp_val, sp_len))
- return -ENOMEM;
-
- if (dccp_feat_push_change(fn, feat, is_local, mandatory, &fval)) {
- kfree(fval.sp.vec);
- return -ENOMEM;
- }
-
- return 0;
-}
-
-/**
- * dccp_feat_register_sp - Register requests to change SP feature values
- * @sk: client or listening socket
- * @feat: one of %dccp_feature_numbers
- * @is_local: whether the local (1) or remote (0) @feat is meant
- * @list: array of preferred values, in descending order of preference
- * @len: length of @list in bytes
- */
-int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
- u8 const *list, u8 len)
-{ /* any changes must be registered before establishing the connection */
- if (sk->sk_state != DCCP_CLOSED)
- return -EISCONN;
- if (dccp_feat_type(feat) != FEAT_SP)
- return -EINVAL;
- return __feat_register_sp(&dccp_sk(sk)->dccps_featneg, feat, is_local,
- 0, list, len);
-}
-
-/**
- * dccp_feat_nn_get - Query current/pending value of NN feature
- * @sk: DCCP socket of an established connection
- * @feat: NN feature number from %dccp_feature_numbers
- *
- * For a known NN feature, returns value currently being negotiated, or
- * current (confirmed) value if no negotiation is going on.
- */
-u64 dccp_feat_nn_get(struct sock *sk, u8 feat)
-{
- if (dccp_feat_type(feat) == FEAT_NN) {
- struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_feat_entry *entry;
-
- entry = dccp_feat_list_lookup(&dp->dccps_featneg, feat, 1);
- if (entry != NULL)
- return entry->val.nn;
-
- switch (feat) {
- case DCCPF_ACK_RATIO:
- return dp->dccps_l_ack_ratio;
- case DCCPF_SEQUENCE_WINDOW:
- return dp->dccps_l_seq_win;
- }
- }
- DCCP_BUG("attempt to look up unsupported feature %u", feat);
- return 0;
-}
-EXPORT_SYMBOL_GPL(dccp_feat_nn_get);
-
-/**
- * dccp_feat_signal_nn_change - Update NN values for an established connection
- * @sk: DCCP socket of an established connection
- * @feat: NN feature number from %dccp_feature_numbers
- * @nn_val: the new value to use
- *
- * This function is used to communicate NN updates out-of-band.
- */
-int dccp_feat_signal_nn_change(struct sock *sk, u8 feat, u64 nn_val)
-{
- struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
- dccp_feat_val fval = { .nn = nn_val };
- struct dccp_feat_entry *entry;
-
- if (sk->sk_state != DCCP_OPEN && sk->sk_state != DCCP_PARTOPEN)
- return 0;
-
- if (dccp_feat_type(feat) != FEAT_NN ||
- !dccp_feat_is_valid_nn_val(feat, nn_val))
- return -EINVAL;
-
- if (nn_val == dccp_feat_nn_get(sk, feat))
- return 0; /* already set or negotiation under way */
-
- entry = dccp_feat_list_lookup(fn, feat, 1);
- if (entry != NULL) {
- dccp_pr_debug("Clobbering existing NN entry %llu -> %llu\n",
- (unsigned long long)entry->val.nn,
- (unsigned long long)nn_val);
- dccp_feat_list_pop(entry);
- }
-
- inet_csk_schedule_ack(sk);
- return dccp_feat_push_change(fn, feat, 1, 0, &fval);
-}
-EXPORT_SYMBOL_GPL(dccp_feat_signal_nn_change);
-
-/*
- * Tracking features whose value depend on the choice of CCID
- *
- * This is designed with an extension in mind so that a list walk could be done
- * before activating any features. However, the existing framework was found to
- * work satisfactorily up until now, the automatic verification is left open.
- * When adding new CCIDs, add a corresponding dependency table here.
- */
-static const struct ccid_dependency *dccp_feat_ccid_deps(u8 ccid, bool is_local)
-{
- static const struct ccid_dependency ccid2_dependencies[2][2] = {
- /*
- * CCID2 mandates Ack Vectors (RFC 4341, 4.): as CCID is a TX
- * feature and Send Ack Vector is an RX feature, `is_local'
- * needs to be reversed.
- */
- { /* Dependencies of the receiver-side (remote) CCID2 */
- {
- .dependent_feat = DCCPF_SEND_ACK_VECTOR,
- .is_local = true,
- .is_mandatory = true,
- .val = 1
- },
- { 0, 0, 0, 0 }
- },
- { /* Dependencies of the sender-side (local) CCID2 */
- {
- .dependent_feat = DCCPF_SEND_ACK_VECTOR,
- .is_local = false,
- .is_mandatory = true,
- .val = 1
- },
- { 0, 0, 0, 0 }
- }
- };
- static const struct ccid_dependency ccid3_dependencies[2][5] = {
- { /*
- * Dependencies of the receiver-side CCID3
- */
- { /* locally disable Ack Vectors */
- .dependent_feat = DCCPF_SEND_ACK_VECTOR,
- .is_local = true,
- .is_mandatory = false,
- .val = 0
- },
- { /* see below why Send Loss Event Rate is on */
- .dependent_feat = DCCPF_SEND_LEV_RATE,
- .is_local = true,
- .is_mandatory = true,
- .val = 1
- },
- { /* NDP Count is needed as per RFC 4342, 6.1.1 */
- .dependent_feat = DCCPF_SEND_NDP_COUNT,
- .is_local = false,
- .is_mandatory = true,
- .val = 1
- },
- { 0, 0, 0, 0 },
- },
- { /*
- * CCID3 at the TX side: we request that the HC-receiver
- * will not send Ack Vectors (they will be ignored, so
- * Mandatory is not set); we enable Send Loss Event Rate
- * (Mandatory since the implementation does not support
- * the Loss Intervals option of RFC 4342, 8.6).
- * The last two options are for peer's information only.
- */
- {
- .dependent_feat = DCCPF_SEND_ACK_VECTOR,
- .is_local = false,
- .is_mandatory = false,
- .val = 0
- },
- {
- .dependent_feat = DCCPF_SEND_LEV_RATE,
- .is_local = false,
- .is_mandatory = true,
- .val = 1
- },
- { /* this CCID does not support Ack Ratio */
- .dependent_feat = DCCPF_ACK_RATIO,
- .is_local = true,
- .is_mandatory = false,
- .val = 0
- },
- { /* tell receiver we are sending NDP counts */
- .dependent_feat = DCCPF_SEND_NDP_COUNT,
- .is_local = true,
- .is_mandatory = false,
- .val = 1
- },
- { 0, 0, 0, 0 }
- }
- };
- switch (ccid) {
- case DCCPC_CCID2:
- return ccid2_dependencies[is_local];
- case DCCPC_CCID3:
- return ccid3_dependencies[is_local];
- default:
- return NULL;
- }
-}
-
-/**
- * dccp_feat_propagate_ccid - Resolve dependencies of features on choice of CCID
- * @fn: feature-negotiation list to update
- * @id: CCID number to track
- * @is_local: whether TX CCID (1) or RX CCID (0) is meant
- *
- * This function needs to be called after registering all other features.
- */
-static int dccp_feat_propagate_ccid(struct list_head *fn, u8 id, bool is_local)
-{
- const struct ccid_dependency *table = dccp_feat_ccid_deps(id, is_local);
- int i, rc = (table == NULL);
-
- for (i = 0; rc == 0 && table[i].dependent_feat != DCCPF_RESERVED; i++)
- if (dccp_feat_type(table[i].dependent_feat) == FEAT_SP)
- rc = __feat_register_sp(fn, table[i].dependent_feat,
- table[i].is_local,
- table[i].is_mandatory,
- &table[i].val, 1);
- else
- rc = __feat_register_nn(fn, table[i].dependent_feat,
- table[i].is_mandatory,
- table[i].val);
- return rc;
-}
-
-/**
- * dccp_feat_finalise_settings - Finalise settings before starting negotiation
- * @dp: client or listening socket (settings will be inherited)
- *
- * This is called after all registrations (socket initialisation, sysctls, and
- * sockopt calls), and before sending the first packet containing Change options
- * (ie. client-Request or server-Response), to ensure internal consistency.
- */
-int dccp_feat_finalise_settings(struct dccp_sock *dp)
-{
- struct list_head *fn = &dp->dccps_featneg;
- struct dccp_feat_entry *entry;
- int i = 2, ccids[2] = { -1, -1 };
-
- /*
- * Propagating CCIDs:
- * 1) not useful to propagate CCID settings if this host advertises more
- * than one CCID: the choice of CCID may still change - if this is
- * the client, or if this is the server and the client sends
- * singleton CCID values.
- * 2) since is that propagate_ccid changes the list, we defer changing
- * the sorted list until after the traversal.
- */
- list_for_each_entry(entry, fn, node)
- if (entry->feat_num == DCCPF_CCID && entry->val.sp.len == 1)
- ccids[entry->is_local] = entry->val.sp.vec[0];
- while (i--)
- if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i))
- return -1;
- dccp_feat_print_fnlist(fn);
- return 0;
-}
-
-/**
- * dccp_feat_server_ccid_dependencies - Resolve CCID-dependent features
- * @dreq: server socket to resolve
- *
- * It is the server which resolves the dependencies once the CCID has been
- * fully negotiated. If no CCID has been negotiated, it uses the default CCID.
- */
-int dccp_feat_server_ccid_dependencies(struct dccp_request_sock *dreq)
-{
- struct list_head *fn = &dreq->dreq_featneg;
- struct dccp_feat_entry *entry;
- u8 is_local, ccid;
-
- for (is_local = 0; is_local <= 1; is_local++) {
- entry = dccp_feat_list_lookup(fn, DCCPF_CCID, is_local);
-
- if (entry != NULL && !entry->empty_confirm)
- ccid = entry->val.sp.vec[0];
- else
- ccid = dccp_feat_default_value(DCCPF_CCID);
-
- if (dccp_feat_propagate_ccid(fn, ccid, is_local))
- return -1;
- }
- return 0;
-}
-
-/* Select the first entry in @servlist that also occurs in @clilist (6.3.1) */
-static int dccp_feat_preflist_match(u8 *servlist, u8 slen, u8 *clilist, u8 clen)
-{
- u8 c, s;
-
- for (s = 0; s < slen; s++)
- for (c = 0; c < clen; c++)
- if (servlist[s] == clilist[c])
- return servlist[s];
- return -1;
-}
-
-/**
- * dccp_feat_prefer - Move preferred entry to the start of array
- * @preferred_value: entry to move to start of array
- * @array: array of preferred entries
- * @array_len: size of the array
- *
- * Reorder the @array_len elements in @array so that @preferred_value comes
- * first. Returns >0 to indicate that @preferred_value does occur in @array.
- */
-static u8 dccp_feat_prefer(u8 preferred_value, u8 *array, u8 array_len)
-{
- u8 i, does_occur = 0;
-
- if (array != NULL) {
- for (i = 0; i < array_len; i++)
- if (array[i] == preferred_value) {
- array[i] = array[0];
- does_occur++;
- }
- if (does_occur)
- array[0] = preferred_value;
- }
- return does_occur;
-}
-
-/**
- * dccp_feat_reconcile - Reconcile SP preference lists
- * @fv: SP list to reconcile into
- * @arr: received SP preference list
- * @len: length of @arr in bytes
- * @is_server: whether this side is the server (and @fv is the server's list)
- * @reorder: whether to reorder the list in @fv after reconciling with @arr
- * When successful, > 0 is returned and the reconciled list is in @fval.
- * A value of 0 means that negotiation failed (no shared entry).
- */
-static int dccp_feat_reconcile(dccp_feat_val *fv, u8 *arr, u8 len,
- bool is_server, bool reorder)
-{
- int rc;
-
- if (!fv->sp.vec || !arr) {
- DCCP_CRIT("NULL feature value or array");
- return 0;
- }
-
- if (is_server)
- rc = dccp_feat_preflist_match(fv->sp.vec, fv->sp.len, arr, len);
- else
- rc = dccp_feat_preflist_match(arr, len, fv->sp.vec, fv->sp.len);
-
- if (!reorder)
- return rc;
- if (rc < 0)
- return 0;
-
- /*
- * Reorder list: used for activating features and in dccp_insert_fn_opt.
- */
- return dccp_feat_prefer(rc, fv->sp.vec, fv->sp.len);
-}
-
-/**
- * dccp_feat_change_recv - Process incoming ChangeL/R options
- * @fn: feature-negotiation list to update
- * @is_mandatory: whether the Change was preceded by a Mandatory option
- * @opt: %DCCPO_CHANGE_L or %DCCPO_CHANGE_R
- * @feat: one of %dccp_feature_numbers
- * @val: NN value or SP value/preference list
- * @len: length of @val in bytes
- * @server: whether this node is the server (1) or the client (0)
- */
-static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
- u8 feat, u8 *val, u8 len, const bool server)
-{
- u8 defval, type = dccp_feat_type(feat);
- const bool local = (opt == DCCPO_CHANGE_R);
- struct dccp_feat_entry *entry;
- dccp_feat_val fval;
-
- if (len == 0 || type == FEAT_UNKNOWN) /* 6.1 and 6.6.8 */
- goto unknown_feature_or_value;
-
- dccp_feat_print_opt(opt, feat, val, len, is_mandatory);
-
- /*
- * Negotiation of NN features: Change R is invalid, so there is no
- * simultaneous negotiation; hence we do not look up in the list.
- */
- if (type == FEAT_NN) {
- if (local || len > sizeof(fval.nn))
- goto unknown_feature_or_value;
-
- /* 6.3.2: "The feature remote MUST accept any valid value..." */
- fval.nn = dccp_decode_value_var(val, len);
- if (!dccp_feat_is_valid_nn_val(feat, fval.nn))
- goto unknown_feature_or_value;
-
- return dccp_feat_push_confirm(fn, feat, local, &fval);
- }
-
- /*
- * Unidirectional/simultaneous negotiation of SP features (6.3.1)
- */
- entry = dccp_feat_list_lookup(fn, feat, local);
- if (entry == NULL) {
- /*
- * No particular preferences have been registered. We deal with
- * this situation by assuming that all valid values are equally
- * acceptable, and apply the following checks:
- * - if the peer's list is a singleton, we accept a valid value;
- * - if we are the server, we first try to see if the peer (the
- * client) advertises the default value. If yes, we use it,
- * otherwise we accept the preferred value;
- * - else if we are the client, we use the first list element.
- */
- if (dccp_feat_clone_sp_val(&fval, val, 1))
- return DCCP_RESET_CODE_TOO_BUSY;
-
- if (len > 1 && server) {
- defval = dccp_feat_default_value(feat);
- if (dccp_feat_preflist_match(&defval, 1, val, len) > -1)
- fval.sp.vec[0] = defval;
- } else if (!dccp_feat_is_valid_sp_val(feat, fval.sp.vec[0])) {
- kfree(fval.sp.vec);
- goto unknown_feature_or_value;
- }
-
- /* Treat unsupported CCIDs like invalid values */
- if (feat == DCCPF_CCID && !ccid_support_check(fval.sp.vec, 1)) {
- kfree(fval.sp.vec);
- goto not_valid_or_not_known;
- }
-
- if (dccp_feat_push_confirm(fn, feat, local, &fval)) {
- kfree(fval.sp.vec);
- return DCCP_RESET_CODE_TOO_BUSY;
- }
-
- return 0;
- } else if (entry->state == FEAT_UNSTABLE) { /* 6.6.2 */
- return 0;
- }
-
- if (dccp_feat_reconcile(&entry->val, val, len, server, true)) {
- entry->empty_confirm = false;
- } else if (is_mandatory) {
- return DCCP_RESET_CODE_MANDATORY_ERROR;
- } else if (entry->state == FEAT_INITIALISING) {
- /*
- * Failed simultaneous negotiation (server only): try to `save'
- * the connection by checking whether entry contains the default
- * value for @feat. If yes, send an empty Confirm to signal that
- * the received Change was not understood - which implies using
- * the default value.
- * If this also fails, we use Reset as the last resort.
- */
- WARN_ON(!server);
- defval = dccp_feat_default_value(feat);
- if (!dccp_feat_reconcile(&entry->val, &defval, 1, server, true))
- return DCCP_RESET_CODE_OPTION_ERROR;
- entry->empty_confirm = true;
- }
- entry->needs_confirm = true;
- entry->needs_mandatory = false;
- entry->state = FEAT_STABLE;
- return 0;
-
-unknown_feature_or_value:
- if (!is_mandatory)
- return dccp_push_empty_confirm(fn, feat, local);
-
-not_valid_or_not_known:
- return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
- : DCCP_RESET_CODE_OPTION_ERROR;
-}
-
-/**
- * dccp_feat_confirm_recv - Process received Confirm options
- * @fn: feature-negotiation list to update
- * @is_mandatory: whether @opt was preceded by a Mandatory option
- * @opt: %DCCPO_CONFIRM_L or %DCCPO_CONFIRM_R
- * @feat: one of %dccp_feature_numbers
- * @val: NN value or SP value/preference list
- * @len: length of @val in bytes
- * @server: whether this node is server (1) or client (0)
- */
-static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
- u8 feat, u8 *val, u8 len, const bool server)
-{
- u8 *plist, plen, type = dccp_feat_type(feat);
- const bool local = (opt == DCCPO_CONFIRM_R);
- struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local);
-
- dccp_feat_print_opt(opt, feat, val, len, is_mandatory);
-
- if (entry == NULL) { /* nothing queued: ignore or handle error */
- if (is_mandatory && type == FEAT_UNKNOWN)
- return DCCP_RESET_CODE_MANDATORY_ERROR;
-
- if (!local && type == FEAT_NN) /* 6.3.2 */
- goto confirmation_failed;
- return 0;
- }
-
- if (entry->state != FEAT_CHANGING) /* 6.6.2 */
- return 0;
-
- if (len == 0) {
- if (dccp_feat_must_be_understood(feat)) /* 6.6.7 */
- goto confirmation_failed;
- /*
- * Empty Confirm during connection setup: this means reverting
- * to the `old' value, which in this case is the default. Since
- * we handle default values automatically when no other values
- * have been set, we revert to the old value by removing this
- * entry from the list.
- */
- dccp_feat_list_pop(entry);
- return 0;
- }
-
- if (type == FEAT_NN) {
- if (len > sizeof(entry->val.nn))
- goto confirmation_failed;
-
- if (entry->val.nn == dccp_decode_value_var(val, len))
- goto confirmation_succeeded;
-
- DCCP_WARN("Bogus Confirm for non-existing value\n");
- goto confirmation_failed;
- }
-
- /*
- * Parsing SP Confirms: the first element of @val is the preferred
- * SP value which the peer confirms, the remainder depends on @len.
- * Note that only the confirmed value need to be a valid SP value.
- */
- if (!dccp_feat_is_valid_sp_val(feat, *val))
- goto confirmation_failed;
-
- if (len == 1) { /* peer didn't supply a preference list */
- plist = val;
- plen = len;
- } else { /* preferred value + preference list */
- plist = val + 1;
- plen = len - 1;
- }
-
- /* Check whether the peer got the reconciliation right (6.6.8) */
- if (dccp_feat_reconcile(&entry->val, plist, plen, server, 0) != *val) {
- DCCP_WARN("Confirm selected the wrong value %u\n", *val);
- return DCCP_RESET_CODE_OPTION_ERROR;
- }
- entry->val.sp.vec[0] = *val;
-
-confirmation_succeeded:
- entry->state = FEAT_STABLE;
- return 0;
-
-confirmation_failed:
- DCCP_WARN("Confirmation failed\n");
- return is_mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
- : DCCP_RESET_CODE_OPTION_ERROR;
-}
-
-/**
- * dccp_feat_handle_nn_established - Fast-path reception of NN options
- * @sk: socket of an established DCCP connection
- * @mandatory: whether @opt was preceded by a Mandatory option
- * @opt: %DCCPO_CHANGE_L | %DCCPO_CONFIRM_R (NN only)
- * @feat: NN number, one of %dccp_feature_numbers
- * @val: NN value
- * @len: length of @val in bytes
- *
- * This function combines the functionality of change_recv/confirm_recv, with
- * the following differences (reset codes are the same):
- * - cleanup after receiving the Confirm;
- * - values are directly activated after successful parsing;
- * - deliberately restricted to NN features.
- * The restriction to NN features is essential since SP features can have non-
- * predictable outcomes (depending on the remote configuration), and are inter-
- * dependent (CCIDs for instance cause further dependencies).
- */
-static u8 dccp_feat_handle_nn_established(struct sock *sk, u8 mandatory, u8 opt,
- u8 feat, u8 *val, u8 len)
-{
- struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
- const bool local = (opt == DCCPO_CONFIRM_R);
- struct dccp_feat_entry *entry;
- u8 type = dccp_feat_type(feat);
- dccp_feat_val fval;
-
- dccp_feat_print_opt(opt, feat, val, len, mandatory);
-
- /* Ignore non-mandatory unknown and non-NN features */
- if (type == FEAT_UNKNOWN) {
- if (local && !mandatory)
- return 0;
- goto fast_path_unknown;
- } else if (type != FEAT_NN) {
- return 0;
- }
-
- /*
- * We don't accept empty Confirms, since in fast-path feature
- * negotiation the values are enabled immediately after sending
- * the Change option.
- * Empty Changes on the other hand are invalid (RFC 4340, 6.1).
- */
- if (len == 0 || len > sizeof(fval.nn))
- goto fast_path_unknown;
-
- if (opt == DCCPO_CHANGE_L) {
- fval.nn = dccp_decode_value_var(val, len);
- if (!dccp_feat_is_valid_nn_val(feat, fval.nn))
- goto fast_path_unknown;
-
- if (dccp_feat_push_confirm(fn, feat, local, &fval) ||
- dccp_feat_activate(sk, feat, local, &fval))
- return DCCP_RESET_CODE_TOO_BUSY;
-
- /* set the `Ack Pending' flag to piggyback a Confirm */
- inet_csk_schedule_ack(sk);
-
- } else if (opt == DCCPO_CONFIRM_R) {
- entry = dccp_feat_list_lookup(fn, feat, local);
- if (entry == NULL || entry->state != FEAT_CHANGING)
- return 0;
-
- fval.nn = dccp_decode_value_var(val, len);
- /*
- * Just ignore a value that doesn't match our current value.
- * If the option changes twice within two RTTs, then at least
- * one CONFIRM will be received for the old value after a
- * new CHANGE was sent.
- */
- if (fval.nn != entry->val.nn)
- return 0;
-
- /* Only activate after receiving the Confirm option (6.6.1). */
- dccp_feat_activate(sk, feat, local, &fval);
-
- /* It has been confirmed - so remove the entry */
- dccp_feat_list_pop(entry);
-
- } else {
- DCCP_WARN("Received illegal option %u\n", opt);
- goto fast_path_failed;
- }
- return 0;
-
-fast_path_unknown:
- if (!mandatory)
- return dccp_push_empty_confirm(fn, feat, local);
-
-fast_path_failed:
- return mandatory ? DCCP_RESET_CODE_MANDATORY_ERROR
- : DCCP_RESET_CODE_OPTION_ERROR;
-}
-
-/**
- * dccp_feat_parse_options - Process Feature-Negotiation Options
- * @sk: for general use and used by the client during connection setup
- * @dreq: used by the server during connection setup
- * @mandatory: whether @opt was preceded by a Mandatory option
- * @opt: %DCCPO_CHANGE_L | %DCCPO_CHANGE_R | %DCCPO_CONFIRM_L | %DCCPO_CONFIRM_R
- * @feat: one of %dccp_feature_numbers
- * @val: value contents of @opt
- * @len: length of @val in bytes
- *
- * Returns 0 on success, a Reset code for ending the connection otherwise.
- */
-int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
- u8 mandatory, u8 opt, u8 feat, u8 *val, u8 len)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct list_head *fn = dreq ? &dreq->dreq_featneg : &dp->dccps_featneg;
- bool server = false;
-
- switch (sk->sk_state) {
- /*
- * Negotiation during connection setup
- */
- case DCCP_LISTEN:
- server = true;
- fallthrough;
- case DCCP_REQUESTING:
- switch (opt) {
- case DCCPO_CHANGE_L:
- case DCCPO_CHANGE_R:
- return dccp_feat_change_recv(fn, mandatory, opt, feat,
- val, len, server);
- case DCCPO_CONFIRM_R:
- case DCCPO_CONFIRM_L:
- return dccp_feat_confirm_recv(fn, mandatory, opt, feat,
- val, len, server);
- }
- break;
- /*
- * Support for exchanging NN options on an established connection.
- */
- case DCCP_OPEN:
- case DCCP_PARTOPEN:
- return dccp_feat_handle_nn_established(sk, mandatory, opt, feat,
- val, len);
- }
- return 0; /* ignore FN options in all other states */
-}
-
-/**
- * dccp_feat_init - Seed feature negotiation with host-specific defaults
- * @sk: Socket to initialize.
- *
- * This initialises global defaults, depending on the value of the sysctls.
- * These can later be overridden by registering changes via setsockopt calls.
- * The last link in the chain is finalise_settings, to make sure that between
- * here and the start of actual feature negotiation no inconsistencies enter.
- *
- * All features not appearing below use either defaults or are otherwise
- * later adjusted through dccp_feat_finalise_settings().
- */
-int dccp_feat_init(struct sock *sk)
-{
- struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
- u8 on = 1, off = 0;
- int rc;
- struct {
- u8 *val;
- u8 len;
- } tx, rx;
-
- /* Non-negotiable (NN) features */
- rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0,
- sysctl_dccp_sequence_window);
- if (rc)
- return rc;
-
- /* Server-priority (SP) features */
-
- /* Advertise that short seqnos are not supported (7.6.1) */
- rc = __feat_register_sp(fn, DCCPF_SHORT_SEQNOS, true, true, &off, 1);
- if (rc)
- return rc;
-
- /* RFC 4340 12.1: "If a DCCP is not ECN capable, ..." */
- rc = __feat_register_sp(fn, DCCPF_ECN_INCAPABLE, true, true, &on, 1);
- if (rc)
- return rc;
-
- /*
- * We advertise the available list of CCIDs and reorder according to
- * preferences, to avoid failure resulting from negotiating different
- * singleton values (which always leads to failure).
- * These settings can still (later) be overridden via sockopts.
- */
- if (ccid_get_builtin_ccids(&tx.val, &tx.len))
- return -ENOBUFS;
- if (ccid_get_builtin_ccids(&rx.val, &rx.len)) {
- kfree(tx.val);
- return -ENOBUFS;
- }
-
- if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) ||
- !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len))
- goto free_ccid_lists;
-
- rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len);
- if (rc)
- goto free_ccid_lists;
-
- rc = __feat_register_sp(fn, DCCPF_CCID, false, false, rx.val, rx.len);
-
-free_ccid_lists:
- kfree(tx.val);
- kfree(rx.val);
- return rc;
-}
-
-int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_feat_entry *cur, *next;
- int idx;
- dccp_feat_val *fvals[DCCP_FEAT_SUPPORTED_MAX][2] = {
- [0 ... DCCP_FEAT_SUPPORTED_MAX-1] = { NULL, NULL }
- };
-
- list_for_each_entry(cur, fn_list, node) {
- /*
- * An empty Confirm means that either an unknown feature type
- * or an invalid value was present. In the first case there is
- * nothing to activate, in the other the default value is used.
- */
- if (cur->empty_confirm)
- continue;
-
- idx = dccp_feat_index(cur->feat_num);
- if (idx < 0) {
- DCCP_BUG("Unknown feature %u", cur->feat_num);
- goto activation_failed;
- }
- if (cur->state != FEAT_STABLE) {
- DCCP_CRIT("Negotiation of %s %s failed in state %s",
- cur->is_local ? "local" : "remote",
- dccp_feat_fname(cur->feat_num),
- dccp_feat_sname[cur->state]);
- goto activation_failed;
- }
- fvals[idx][cur->is_local] = &cur->val;
- }
-
- /*
- * Activate in decreasing order of index, so that the CCIDs are always
- * activated as the last feature. This avoids the case where a CCID
- * relies on the initialisation of one or more features that it depends
- * on (e.g. Send NDP Count, Send Ack Vector, and Ack Ratio features).
- */
- for (idx = DCCP_FEAT_SUPPORTED_MAX; --idx >= 0;)
- if (__dccp_feat_activate(sk, idx, 0, fvals[idx][0]) ||
- __dccp_feat_activate(sk, idx, 1, fvals[idx][1])) {
- DCCP_CRIT("Could not activate %d", idx);
- goto activation_failed;
- }
-
- /* Clean up Change options which have been confirmed already */
- list_for_each_entry_safe(cur, next, fn_list, node)
- if (!cur->needs_confirm)
- dccp_feat_list_pop(cur);
-
- dccp_pr_debug("Activation OK\n");
- return 0;
-
-activation_failed:
- /*
- * We clean up everything that may have been allocated, since
- * it is difficult to track at which stage negotiation failed.
- * This is ok, since all allocation functions below are robust
- * against NULL arguments.
- */
- ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
- ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
- dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
- dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
- dp->dccps_hc_rx_ackvec = NULL;
- return -1;
-}
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
deleted file mode 100644
index 57d9c026aa3f..000000000000
--- a/net/dccp/feat.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _DCCP_FEAT_H
-#define _DCCP_FEAT_H
-/*
- * net/dccp/feat.h
- *
- * Feature negotiation for the DCCP protocol (RFC 4340, section 6)
- * Copyright (c) 2008 Gerrit Renker <gerrit@erg.abdn.ac.uk>
- * Copyright (c) 2005 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
- */
-#include <linux/types.h>
-#include "dccp.h"
-
-/*
- * Known limit values
- */
-/* Ack Ratio takes 2-byte integer values (11.3) */
-#define DCCPF_ACK_RATIO_MAX 0xFFFF
-/* Wmin=32 and Wmax=2^46-1 from 7.5.2 */
-#define DCCPF_SEQ_WMIN 32
-#define DCCPF_SEQ_WMAX 0x3FFFFFFFFFFFull
-/* Maximum number of SP values that fit in a single (Confirm) option */
-#define DCCP_FEAT_MAX_SP_VALS (DCCP_SINGLE_OPT_MAXLEN - 2)
-
-enum dccp_feat_type {
- FEAT_AT_RX = 1, /* located at RX side of half-connection */
- FEAT_AT_TX = 2, /* located at TX side of half-connection */
- FEAT_SP = 4, /* server-priority reconciliation (6.3.1) */
- FEAT_NN = 8, /* non-negotiable reconciliation (6.3.2) */
- FEAT_UNKNOWN = 0xFF /* not understood or invalid feature */
-};
-
-enum dccp_feat_state {
- FEAT_DEFAULT = 0, /* using default values from 6.4 */
- FEAT_INITIALISING, /* feature is being initialised */
- FEAT_CHANGING, /* Change sent but not confirmed yet */
- FEAT_UNSTABLE, /* local modification in state CHANGING */
- FEAT_STABLE /* both ends (think they) agree */
-};
-
-/**
- * dccp_feat_val - Container for SP or NN feature values
- * @nn: single NN value
- * @sp.vec: single SP value plus optional preference list
- * @sp.len: length of @sp.vec in bytes
- */
-typedef union {
- u64 nn;
- struct {
- u8 *vec;
- u8 len;
- } sp;
-} dccp_feat_val;
-
-/**
- * struct feat_entry - Data structure to perform feature negotiation
- * @val: feature's current value (SP features may have preference list)
- * @state: feature's current state
- * @feat_num: one of %dccp_feature_numbers
- * @needs_mandatory: whether Mandatory options should be sent
- * @needs_confirm: whether to send a Confirm instead of a Change
- * @empty_confirm: whether to send an empty Confirm (depends on @needs_confirm)
- * @is_local: feature location (1) or feature-remote (0)
- * @node: list pointers, entries arranged in FIFO order
- */
-struct dccp_feat_entry {
- dccp_feat_val val;
- enum dccp_feat_state state:8;
- u8 feat_num;
-
- bool needs_mandatory,
- needs_confirm,
- empty_confirm,
- is_local;
-
- struct list_head node;
-};
-
-static inline u8 dccp_feat_genopt(struct dccp_feat_entry *entry)
-{
- if (entry->needs_confirm)
- return entry->is_local ? DCCPO_CONFIRM_L : DCCPO_CONFIRM_R;
- return entry->is_local ? DCCPO_CHANGE_L : DCCPO_CHANGE_R;
-}
-
-/**
- * struct ccid_dependency - Track changes resulting from choosing a CCID
- * @dependent_feat: one of %dccp_feature_numbers
- * @is_local: local (1) or remote (0) @dependent_feat
- * @is_mandatory: whether presence of @dependent_feat is mission-critical or not
- * @val: corresponding default value for @dependent_feat (u8 is sufficient here)
- */
-struct ccid_dependency {
- u8 dependent_feat;
- bool is_local:1,
- is_mandatory:1;
- u8 val;
-};
-
-/*
- * Sysctls to seed defaults for feature negotiation
- */
-extern unsigned long sysctl_dccp_sequence_window;
-extern int sysctl_dccp_rx_ccid;
-extern int sysctl_dccp_tx_ccid;
-
-int dccp_feat_init(struct sock *sk);
-int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
- u8 const *list, u8 len);
-int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
- u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
-int dccp_feat_clone_list(struct list_head const *, struct list_head *);
-
-/*
- * Encoding variable-length options and their maximum length.
- *
- * This affects NN options (SP options are all u8) and other variable-length
- * options (see table 3 in RFC 4340). The limit is currently given the Sequence
- * Window NN value (sec. 7.5.2) and the NDP count (sec. 7.7) option, all other
- * options consume less than 6 bytes (timestamps are 4 bytes).
- * When updating this constant (e.g. due to new internet drafts / RFCs), make
- * sure that you also update all code which refers to it.
- */
-#define DCCP_OPTVAL_MAXLEN 6
-
-void dccp_encode_value_var(const u64 value, u8 *to, const u8 len);
-u64 dccp_decode_value_var(const u8 *bf, const u8 len);
-u64 dccp_feat_nn_get(struct sock *sk, u8 feat);
-
-int dccp_insert_option_mandatory(struct sk_buff *skb);
-int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, u8 *val, u8 len,
- bool repeat_first);
-#endif /* _DCCP_FEAT_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
deleted file mode 100644
index 2cbb757a894f..000000000000
--- a/net/dccp/input.c
+++ /dev/null
@@ -1,739 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/dccp/input.c
- *
- * An implementation of the DCCP protocol
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-
-#include <linux/dccp.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-
-#include <net/sock.h>
-
-#include "ackvec.h"
-#include "ccid.h"
-#include "dccp.h"
-
-/* rate-limit for syncs in reply to sequence-invalid packets; RFC 4340, 7.5.4 */
-int sysctl_dccp_sync_ratelimit __read_mostly = HZ / 8;
-
-static void dccp_enqueue_skb(struct sock *sk, struct sk_buff *skb)
-{
- __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
- __skb_queue_tail(&sk->sk_receive_queue, skb);
- skb_set_owner_r(skb, sk);
- sk->sk_data_ready(sk);
-}
-
-static void dccp_fin(struct sock *sk, struct sk_buff *skb)
-{
- /*
- * On receiving Close/CloseReq, both RD/WR shutdown are performed.
- * RFC 4340, 8.3 says that we MAY send further Data/DataAcks after
- * receiving the closing segment, but there is no guarantee that such
- * data will be processed at all.
- */
- sk->sk_shutdown = SHUTDOWN_MASK;
- sock_set_flag(sk, SOCK_DONE);
- dccp_enqueue_skb(sk, skb);
-}
-
-static int dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
-{
- int queued = 0;
-
- switch (sk->sk_state) {
- /*
- * We ignore Close when received in one of the following states:
- * - CLOSED (may be a late or duplicate packet)
- * - PASSIVE_CLOSEREQ (the peer has sent a CloseReq earlier)
- * - RESPOND (already handled by dccp_check_req)
- */
- case DCCP_CLOSING:
- /*
- * Simultaneous-close: receiving a Close after sending one. This
- * can happen if both client and server perform active-close and
- * will result in an endless ping-pong of crossing and retrans-
- * mitted Close packets, which only terminates when one of the
- * nodes times out (min. 64 seconds). Quicker convergence can be
- * achieved when one of the nodes acts as tie-breaker.
- * This is ok as both ends are done with data transfer and each
- * end is just waiting for the other to acknowledge termination.
- */
- if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT)
- break;
- fallthrough;
- case DCCP_REQUESTING:
- case DCCP_ACTIVE_CLOSEREQ:
- dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
- dccp_done(sk);
- break;
- case DCCP_OPEN:
- case DCCP_PARTOPEN:
- /* Give waiting application a chance to read pending data */
- queued = 1;
- dccp_fin(sk, skb);
- dccp_set_state(sk, DCCP_PASSIVE_CLOSE);
- fallthrough;
- case DCCP_PASSIVE_CLOSE:
- /*
- * Retransmitted Close: we have already enqueued the first one.
- */
- sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
- }
- return queued;
-}
-
-static int dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
-{
- int queued = 0;
-
- /*
- * Step 7: Check for unexpected packet types
- * If (S.is_server and P.type == CloseReq)
- * Send Sync packet acknowledging P.seqno
- * Drop packet and return
- */
- if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
- dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_PKT_SYNC);
- return queued;
- }
-
- /* Step 13: process relevant Client states < CLOSEREQ */
- switch (sk->sk_state) {
- case DCCP_REQUESTING:
- dccp_send_close(sk, 0);
- dccp_set_state(sk, DCCP_CLOSING);
- break;
- case DCCP_OPEN:
- case DCCP_PARTOPEN:
- /* Give waiting application a chance to read pending data */
- queued = 1;
- dccp_fin(sk, skb);
- dccp_set_state(sk, DCCP_PASSIVE_CLOSEREQ);
- fallthrough;
- case DCCP_PASSIVE_CLOSEREQ:
- sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
- }
- return queued;
-}
-
-static u16 dccp_reset_code_convert(const u8 code)
-{
- static const u16 error_code[] = {
- [DCCP_RESET_CODE_CLOSED] = 0, /* normal termination */
- [DCCP_RESET_CODE_UNSPECIFIED] = 0, /* nothing known */
- [DCCP_RESET_CODE_ABORTED] = ECONNRESET,
-
- [DCCP_RESET_CODE_NO_CONNECTION] = ECONNREFUSED,
- [DCCP_RESET_CODE_CONNECTION_REFUSED] = ECONNREFUSED,
- [DCCP_RESET_CODE_TOO_BUSY] = EUSERS,
- [DCCP_RESET_CODE_AGGRESSION_PENALTY] = EDQUOT,
-
- [DCCP_RESET_CODE_PACKET_ERROR] = ENOMSG,
- [DCCP_RESET_CODE_BAD_INIT_COOKIE] = EBADR,
- [DCCP_RESET_CODE_BAD_SERVICE_CODE] = EBADRQC,
- [DCCP_RESET_CODE_OPTION_ERROR] = EILSEQ,
- [DCCP_RESET_CODE_MANDATORY_ERROR] = EOPNOTSUPP,
- };
-
- return code >= DCCP_MAX_RESET_CODES ? 0 : error_code[code];
-}
-
-static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb)
-{
- u16 err = dccp_reset_code_convert(dccp_hdr_reset(skb)->dccph_reset_code);
-
- sk->sk_err = err;
-
- /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */
- dccp_fin(sk, skb);
-
- if (err && !sock_flag(sk, SOCK_DEAD))
- sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
- dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
-}
-
-static void dccp_handle_ackvec_processing(struct sock *sk, struct sk_buff *skb)
-{
- struct dccp_ackvec *av = dccp_sk(sk)->dccps_hc_rx_ackvec;
-
- if (av == NULL)
- return;
- if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
- dccp_ackvec_clear_state(av, DCCP_SKB_CB(skb)->dccpd_ack_seq);
- dccp_ackvec_input(av, skb);
-}
-
-static void dccp_deliver_input_to_ccids(struct sock *sk, struct sk_buff *skb)
-{
- const struct dccp_sock *dp = dccp_sk(sk);
-
- /* Don't deliver to RX CCID when node has shut down read end. */
- if (!(sk->sk_shutdown & RCV_SHUTDOWN))
- ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
- /*
- * Until the TX queue has been drained, we can not honour SHUT_WR, since
- * we need received feedback as input to adjust congestion control.
- */
- if (sk->sk_write_queue.qlen > 0 || !(sk->sk_shutdown & SEND_SHUTDOWN))
- ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
-}
-
-static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
-{
- const struct dccp_hdr *dh = dccp_hdr(skb);
- struct dccp_sock *dp = dccp_sk(sk);
- u64 lswl, lawl, seqno = DCCP_SKB_CB(skb)->dccpd_seq,
- ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
-
- /*
- * Step 5: Prepare sequence numbers for Sync
- * If P.type == Sync or P.type == SyncAck,
- * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL,
- * / * P is valid, so update sequence number variables
- * accordingly. After this update, P will pass the tests
- * in Step 6. A SyncAck is generated if necessary in
- * Step 15 * /
- * Update S.GSR, S.SWL, S.SWH
- * Otherwise,
- * Drop packet and return
- */
- if (dh->dccph_type == DCCP_PKT_SYNC ||
- dh->dccph_type == DCCP_PKT_SYNCACK) {
- if (between48(ackno, dp->dccps_awl, dp->dccps_awh) &&
- dccp_delta_seqno(dp->dccps_swl, seqno) >= 0)
- dccp_update_gsr(sk, seqno);
- else
- return -1;
- }
-
- /*
- * Step 6: Check sequence numbers
- * Let LSWL = S.SWL and LAWL = S.AWL
- * If P.type == CloseReq or P.type == Close or P.type == Reset,
- * LSWL := S.GSR + 1, LAWL := S.GAR
- * If LSWL <= P.seqno <= S.SWH
- * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH),
- * Update S.GSR, S.SWL, S.SWH
- * If P.type != Sync,
- * Update S.GAR
- */
- lswl = dp->dccps_swl;
- lawl = dp->dccps_awl;
-
- if (dh->dccph_type == DCCP_PKT_CLOSEREQ ||
- dh->dccph_type == DCCP_PKT_CLOSE ||
- dh->dccph_type == DCCP_PKT_RESET) {
- lswl = ADD48(dp->dccps_gsr, 1);
- lawl = dp->dccps_gar;
- }
-
- if (between48(seqno, lswl, dp->dccps_swh) &&
- (ackno == DCCP_PKT_WITHOUT_ACK_SEQ ||
- between48(ackno, lawl, dp->dccps_awh))) {
- dccp_update_gsr(sk, seqno);
-
- if (dh->dccph_type != DCCP_PKT_SYNC &&
- ackno != DCCP_PKT_WITHOUT_ACK_SEQ &&
- after48(ackno, dp->dccps_gar))
- dp->dccps_gar = ackno;
- } else {
- unsigned long now = jiffies;
- /*
- * Step 6: Check sequence numbers
- * Otherwise,
- * If P.type == Reset,
- * Send Sync packet acknowledging S.GSR
- * Otherwise,
- * Send Sync packet acknowledging P.seqno
- * Drop packet and return
- *
- * These Syncs are rate-limited as per RFC 4340, 7.5.4:
- * at most 1 / (dccp_sync_rate_limit * HZ) Syncs per second.
- */
- if (time_before(now, (dp->dccps_rate_last +
- sysctl_dccp_sync_ratelimit)))
- return -1;
-
- DCCP_WARN("Step 6 failed for %s packet, "
- "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
- "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
- "sending SYNC...\n", dccp_packet_name(dh->dccph_type),
- (unsigned long long) lswl, (unsigned long long) seqno,
- (unsigned long long) dp->dccps_swh,
- (ackno == DCCP_PKT_WITHOUT_ACK_SEQ) ? "doesn't exist"
- : "exists",
- (unsigned long long) lawl, (unsigned long long) ackno,
- (unsigned long long) dp->dccps_awh);
-
- dp->dccps_rate_last = now;
-
- if (dh->dccph_type == DCCP_PKT_RESET)
- seqno = dp->dccps_gsr;
- dccp_send_sync(sk, seqno, DCCP_PKT_SYNC);
- return -1;
- }
-
- return 0;
-}
-
-static int __dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct dccp_hdr *dh, const unsigned int len)
-{
- struct dccp_sock *dp = dccp_sk(sk);
-
- switch (dccp_hdr(skb)->dccph_type) {
- case DCCP_PKT_DATAACK:
- case DCCP_PKT_DATA:
- /*
- * FIXME: schedule DATA_DROPPED (RFC 4340, 11.7.2) if and when
- * - sk_shutdown == RCV_SHUTDOWN, use Code 1, "Not Listening"
- * - sk_receive_queue is full, use Code 2, "Receive Buffer"
- */
- dccp_enqueue_skb(sk, skb);
- return 0;
- case DCCP_PKT_ACK:
- goto discard;
- case DCCP_PKT_RESET:
- /*
- * Step 9: Process Reset
- * If P.type == Reset,
- * Tear down connection
- * S.state := TIMEWAIT
- * Set TIMEWAIT timer
- * Drop packet and return
- */
- dccp_rcv_reset(sk, skb);
- return 0;
- case DCCP_PKT_CLOSEREQ:
- if (dccp_rcv_closereq(sk, skb))
- return 0;
- goto discard;
- case DCCP_PKT_CLOSE:
- if (dccp_rcv_close(sk, skb))
- return 0;
- goto discard;
- case DCCP_PKT_REQUEST:
- /* Step 7
- * or (S.is_server and P.type == Response)
- * or (S.is_client and P.type == Request)
- * or (S.state >= OPEN and P.type == Request
- * and P.seqno >= S.OSR)
- * or (S.state >= OPEN and P.type == Response
- * and P.seqno >= S.OSR)
- * or (S.state == RESPOND and P.type == Data),
- * Send Sync packet acknowledging P.seqno
- * Drop packet and return
- */
- if (dp->dccps_role != DCCP_ROLE_LISTEN)
- goto send_sync;
- goto check_seq;
- case DCCP_PKT_RESPONSE:
- if (dp->dccps_role != DCCP_ROLE_CLIENT)
- goto send_sync;
-check_seq:
- if (dccp_delta_seqno(dp->dccps_osr,
- DCCP_SKB_CB(skb)->dccpd_seq) >= 0) {
-send_sync:
- dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
- DCCP_PKT_SYNC);
- }
- break;
- case DCCP_PKT_SYNC:
- dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
- DCCP_PKT_SYNCACK);
- /*
- * From RFC 4340, sec. 5.7
- *
- * As with DCCP-Ack packets, DCCP-Sync and DCCP-SyncAck packets
- * MAY have non-zero-length application data areas, whose
- * contents receivers MUST ignore.
- */
- goto discard;
- }
-
- DCCP_INC_STATS(DCCP_MIB_INERRS);
-discard:
- __kfree_skb(skb);
- return 0;
-}
-
-int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct dccp_hdr *dh, const unsigned int len)
-{
- if (dccp_check_seqno(sk, skb))
- goto discard;
-
- if (dccp_parse_options(sk, NULL, skb))
- return 1;
-
- dccp_handle_ackvec_processing(sk, skb);
- dccp_deliver_input_to_ccids(sk, skb);
-
- return __dccp_rcv_established(sk, skb, dh, len);
-discard:
- __kfree_skb(skb);
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(dccp_rcv_established);
-
-static int dccp_rcv_request_sent_state_process(struct sock *sk,
- struct sk_buff *skb,
- const struct dccp_hdr *dh,
- const unsigned int len)
-{
- /*
- * Step 4: Prepare sequence numbers in REQUEST
- * If S.state == REQUEST,
- * If (P.type == Response or P.type == Reset)
- * and S.AWL <= P.ackno <= S.AWH,
- * / * Set sequence number variables corresponding to the
- * other endpoint, so P will pass the tests in Step 6 * /
- * Set S.GSR, S.ISR, S.SWL, S.SWH
- * / * Response processing continues in Step 10; Reset
- * processing continues in Step 9 * /
- */
- if (dh->dccph_type == DCCP_PKT_RESPONSE) {
- const struct inet_connection_sock *icsk = inet_csk(sk);
- struct dccp_sock *dp = dccp_sk(sk);
- long tstamp = dccp_timestamp();
-
- if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
- dp->dccps_awl, dp->dccps_awh)) {
- dccp_pr_debug("invalid ackno: S.AWL=%llu, "
- "P.ackno=%llu, S.AWH=%llu\n",
- (unsigned long long)dp->dccps_awl,
- (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
- (unsigned long long)dp->dccps_awh);
- goto out_invalid_packet;
- }
-
- /*
- * If option processing (Step 8) failed, return 1 here so that
- * dccp_v4_do_rcv() sends a Reset. The Reset code depends on
- * the option type and is set in dccp_parse_options().
- */
- if (dccp_parse_options(sk, NULL, skb))
- return 1;
-
- /* Obtain usec RTT sample from SYN exchange (used by TFRC). */
- if (likely(dp->dccps_options_received.dccpor_timestamp_echo))
- dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * (tstamp -
- dp->dccps_options_received.dccpor_timestamp_echo));
-
- /* Stop the REQUEST timer */
- inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
- WARN_ON(sk->sk_send_head == NULL);
- kfree_skb(sk->sk_send_head);
- sk->sk_send_head = NULL;
-
- /*
- * Set ISR, GSR from packet. ISS was set in dccp_v{4,6}_connect
- * and GSS in dccp_transmit_skb(). Setting AWL/AWH and SWL/SWH
- * is done as part of activating the feature values below, since
- * these settings depend on the local/remote Sequence Window
- * features, which were undefined or not confirmed until now.
- */
- dp->dccps_gsr = dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
-
- dccp_sync_mss(sk, icsk->icsk_pmtu_cookie);
-
- /*
- * Step 10: Process REQUEST state (second part)
- * If S.state == REQUEST,
- * / * If we get here, P is a valid Response from the
- * server (see Step 4), and we should move to
- * PARTOPEN state. PARTOPEN means send an Ack,
- * don't send Data packets, retransmit Acks
- * periodically, and always include any Init Cookie
- * from the Response * /
- * S.state := PARTOPEN
- * Set PARTOPEN timer
- * Continue with S.state == PARTOPEN
- * / * Step 12 will send the Ack completing the
- * three-way handshake * /
- */
- dccp_set_state(sk, DCCP_PARTOPEN);
-
- /*
- * If feature negotiation was successful, activate features now;
- * an activation failure means that this host could not activate
- * one ore more features (e.g. insufficient memory), which would
- * leave at least one feature in an undefined state.
- */
- if (dccp_feat_activate_values(sk, &dp->dccps_featneg))
- goto unable_to_proceed;
-
- /* Make sure socket is routed, for correct metrics. */
- icsk->icsk_af_ops->rebuild_header(sk);
-
- if (!sock_flag(sk, SOCK_DEAD)) {
- sk->sk_state_change(sk);
- sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
- }
-
- if (sk->sk_write_pending || inet_csk_in_pingpong_mode(sk) ||
- icsk->icsk_accept_queue.rskq_defer_accept) {
- /* Save one ACK. Data will be ready after
- * several ticks, if write_pending is set.
- *
- * It may be deleted, but with this feature tcpdumps
- * look so _wonderfully_ clever, that I was not able
- * to stand against the temptation 8) --ANK
- */
- /*
- * OK, in DCCP we can as well do a similar trick, its
- * even in the draft, but there is no need for us to
- * schedule an ack here, as dccp_sendmsg does this for
- * us, also stated in the draft. -acme
- */
- __kfree_skb(skb);
- return 0;
- }
- dccp_send_ack(sk);
- return -1;
- }
-
-out_invalid_packet:
- /* dccp_v4_do_rcv will send a reset */
- DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
- return 1;
-
-unable_to_proceed:
- DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_ABORTED;
- /*
- * We mark this socket as no longer usable, so that the loop in
- * dccp_sendmsg() terminates and the application gets notified.
- */
- dccp_set_state(sk, DCCP_CLOSED);
- sk->sk_err = ECOMM;
- return 1;
-}
-
-static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
- struct sk_buff *skb,
- const struct dccp_hdr *dh,
- const unsigned int len)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- u32 sample = dp->dccps_options_received.dccpor_timestamp_echo;
- int queued = 0;
-
- switch (dh->dccph_type) {
- case DCCP_PKT_RESET:
- inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
- break;
- case DCCP_PKT_DATA:
- if (sk->sk_state == DCCP_RESPOND)
- break;
- fallthrough;
- case DCCP_PKT_DATAACK:
- case DCCP_PKT_ACK:
- /*
- * FIXME: we should be resetting the PARTOPEN (DELACK) timer
- * here but only if we haven't used the DELACK timer for
- * something else, like sending a delayed ack for a TIMESTAMP
- * echo, etc, for now were not clearing it, sending an extra
- * ACK when there is nothing else to do in DELACK is not a big
- * deal after all.
- */
-
- /* Stop the PARTOPEN timer */
- if (sk->sk_state == DCCP_PARTOPEN)
- inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
-
- /* Obtain usec RTT sample from SYN exchange (used by TFRC). */
- if (likely(sample)) {
- long delta = dccp_timestamp() - sample;
-
- dp->dccps_syn_rtt = dccp_sample_rtt(sk, 10 * delta);
- }
-
- dp->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
- dccp_set_state(sk, DCCP_OPEN);
-
- if (dh->dccph_type == DCCP_PKT_DATAACK ||
- dh->dccph_type == DCCP_PKT_DATA) {
- __dccp_rcv_established(sk, skb, dh, len);
- queued = 1; /* packet was queued
- (by __dccp_rcv_established) */
- }
- break;
- }
-
- return queued;
-}
-
-int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
- struct dccp_hdr *dh, unsigned int len)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
- const int old_state = sk->sk_state;
- bool acceptable;
- int queued = 0;
-
- /*
- * Step 3: Process LISTEN state
- *
- * If S.state == LISTEN,
- * If P.type == Request or P contains a valid Init Cookie option,
- * (* Must scan the packet's options to check for Init
- * Cookies. Only Init Cookies are processed here,
- * however; other options are processed in Step 8. This
- * scan need only be performed if the endpoint uses Init
- * Cookies *)
- * (* Generate a new socket and switch to that socket *)
- * Set S := new socket for this port pair
- * S.state = RESPOND
- * Choose S.ISS (initial seqno) or set from Init Cookies
- * Initialize S.GAR := S.ISS
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init
- * Cookies Continue with S.state == RESPOND
- * (* A Response packet will be generated in Step 11 *)
- * Otherwise,
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
- */
- if (sk->sk_state == DCCP_LISTEN) {
- if (dh->dccph_type == DCCP_PKT_REQUEST) {
- /* It is possible that we process SYN packets from backlog,
- * so we need to make sure to disable BH and RCU right there.
- */
- rcu_read_lock();
- local_bh_disable();
- acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0;
- local_bh_enable();
- rcu_read_unlock();
- if (!acceptable)
- return 1;
- consume_skb(skb);
- return 0;
- }
- if (dh->dccph_type == DCCP_PKT_RESET)
- goto discard;
-
- /* Caller (dccp_v4_do_rcv) will send Reset */
- dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
- return 1;
- } else if (sk->sk_state == DCCP_CLOSED) {
- dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
- return 1;
- }
-
- /* Step 6: Check sequence numbers (omitted in LISTEN/REQUEST state) */
- if (sk->sk_state != DCCP_REQUESTING && dccp_check_seqno(sk, skb))
- goto discard;
-
- /*
- * Step 7: Check for unexpected packet types
- * If (S.is_server and P.type == Response)
- * or (S.is_client and P.type == Request)
- * or (S.state == RESPOND and P.type == Data),
- * Send Sync packet acknowledging P.seqno
- * Drop packet and return
- */
- if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
- dh->dccph_type == DCCP_PKT_RESPONSE) ||
- (dp->dccps_role == DCCP_ROLE_CLIENT &&
- dh->dccph_type == DCCP_PKT_REQUEST) ||
- (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) {
- dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
- goto discard;
- }
-
- /* Step 8: Process options */
- if (dccp_parse_options(sk, NULL, skb))
- return 1;
-
- /*
- * Step 9: Process Reset
- * If P.type == Reset,
- * Tear down connection
- * S.state := TIMEWAIT
- * Set TIMEWAIT timer
- * Drop packet and return
- */
- if (dh->dccph_type == DCCP_PKT_RESET) {
- dccp_rcv_reset(sk, skb);
- return 0;
- } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { /* Step 13 */
- if (dccp_rcv_closereq(sk, skb))
- return 0;
- goto discard;
- } else if (dh->dccph_type == DCCP_PKT_CLOSE) { /* Step 14 */
- if (dccp_rcv_close(sk, skb))
- return 0;
- goto discard;
- }
-
- switch (sk->sk_state) {
- case DCCP_REQUESTING:
- queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
- if (queued >= 0)
- return queued;
-
- __kfree_skb(skb);
- return 0;
-
- case DCCP_PARTOPEN:
- /* Step 8: if using Ack Vectors, mark packet acknowledgeable */
- dccp_handle_ackvec_processing(sk, skb);
- dccp_deliver_input_to_ccids(sk, skb);
- fallthrough;
- case DCCP_RESPOND:
- queued = dccp_rcv_respond_partopen_state_process(sk, skb,
- dh, len);
- break;
- }
-
- if (dh->dccph_type == DCCP_PKT_ACK ||
- dh->dccph_type == DCCP_PKT_DATAACK) {
- switch (old_state) {
- case DCCP_PARTOPEN:
- sk->sk_state_change(sk);
- sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
- break;
- }
- } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) {
- dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK);
- goto discard;
- }
-
- if (!queued) {
-discard:
- __kfree_skb(skb);
- }
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(dccp_rcv_state_process);
-
-/**
- * dccp_sample_rtt - Validate and finalise computation of RTT sample
- * @sk: socket structure
- * @delta: number of microseconds between packet and acknowledgment
- *
- * The routine is kept generic to work in different contexts. It should be
- * called immediately when the ACK used for the RTT sample arrives.
- */
-u32 dccp_sample_rtt(struct sock *sk, long delta)
-{
- /* dccpor_elapsed_time is either zeroed out or set and > 0 */
- delta -= dccp_sk(sk)->dccps_options_received.dccpor_elapsed_time * 10;
-
- if (unlikely(delta <= 0)) {
- DCCP_WARN("unusable RTT sample %ld, using min\n", delta);
- return DCCP_SANE_RTT_MIN;
- }
- if (unlikely(delta > DCCP_SANE_RTT_MAX)) {
- DCCP_WARN("RTT sample %ld too large, using max\n", delta);
- return DCCP_SANE_RTT_MAX;
- }
-
- return delta;
-}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
deleted file mode 100644
index 2045ddac0fe9..000000000000
--- a/net/dccp/ipv4.c
+++ /dev/null
@@ -1,1101 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/dccp/ipv4.c
- *
- * An implementation of the DCCP protocol
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-
-#include <linux/dccp.h>
-#include <linux/icmp.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/random.h>
-
-#include <net/icmp.h>
-#include <net/inet_common.h>
-#include <net/inet_dscp.h>
-#include <net/inet_hashtables.h>
-#include <net/inet_sock.h>
-#include <net/protocol.h>
-#include <net/sock.h>
-#include <net/timewait_sock.h>
-#include <net/tcp_states.h>
-#include <net/xfrm.h>
-#include <net/secure_seq.h>
-#include <net/netns/generic.h>
-#include <net/rstreason.h>
-
-#include "ackvec.h"
-#include "ccid.h"
-#include "dccp.h"
-#include "feat.h"
-
-struct dccp_v4_pernet {
- struct sock *v4_ctl_sk;
-};
-
-static unsigned int dccp_v4_pernet_id __read_mostly;
-
-/*
- * The per-net v4_ctl_sk socket is used for responding to
- * the Out-of-the-blue (OOTB) packets. A control sock will be created
- * for this socket at the initialization time.
- */
-
-int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
-{
- const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
- struct inet_sock *inet = inet_sk(sk);
- struct dccp_sock *dp = dccp_sk(sk);
- __be16 orig_sport, orig_dport;
- __be32 daddr, nexthop;
- struct flowi4 *fl4;
- struct rtable *rt;
- int err;
- struct ip_options_rcu *inet_opt;
-
- dp->dccps_role = DCCP_ROLE_CLIENT;
-
- if (addr_len < sizeof(struct sockaddr_in))
- return -EINVAL;
-
- if (usin->sin_family != AF_INET)
- return -EAFNOSUPPORT;
-
- nexthop = daddr = usin->sin_addr.s_addr;
-
- inet_opt = rcu_dereference_protected(inet->inet_opt,
- lockdep_sock_is_held(sk));
- if (inet_opt != NULL && inet_opt->opt.srr) {
- if (daddr == 0)
- return -EINVAL;
- nexthop = inet_opt->opt.faddr;
- }
-
- orig_sport = inet->inet_sport;
- orig_dport = usin->sin_port;
- fl4 = &inet->cork.fl.u.ip4;
- rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
- sk->sk_bound_dev_if, IPPROTO_DCCP, orig_sport,
- orig_dport, sk);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
-
- if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
- ip_rt_put(rt);
- return -ENETUNREACH;
- }
-
- if (inet_opt == NULL || !inet_opt->opt.srr)
- daddr = fl4->daddr;
-
- if (inet->inet_saddr == 0) {
- err = inet_bhash2_update_saddr(sk, &fl4->saddr, AF_INET);
- if (err) {
- ip_rt_put(rt);
- return err;
- }
- } else {
- sk_rcv_saddr_set(sk, inet->inet_saddr);
- }
-
- inet->inet_dport = usin->sin_port;
- sk_daddr_set(sk, daddr);
-
- inet_csk(sk)->icsk_ext_hdr_len = 0;
- if (inet_opt)
- inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
- /*
- * Socket identity is still unknown (sport may be zero).
- * However we set state to DCCP_REQUESTING and not releasing socket
- * lock select source port, enter ourselves into the hash tables and
- * complete initialization after this.
- */
- dccp_set_state(sk, DCCP_REQUESTING);
- err = inet_hash_connect(&dccp_death_row, sk);
- if (err != 0)
- goto failure;
-
- rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
- inet->inet_sport, inet->inet_dport, sk);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- rt = NULL;
- goto failure;
- }
- /* OK, now commit destination to socket. */
- sk_setup_caps(sk, &rt->dst);
-
- dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr,
- inet->inet_daddr,
- inet->inet_sport,
- inet->inet_dport);
- atomic_set(&inet->inet_id, get_random_u16());
-
- err = dccp_connect(sk);
- rt = NULL;
- if (err != 0)
- goto failure;
-out:
- return err;
-failure:
- /*
- * This unhashes the socket and releases the local port, if necessary.
- */
- dccp_set_state(sk, DCCP_CLOSED);
- inet_bhash2_reset_saddr(sk);
- ip_rt_put(rt);
- sk->sk_route_caps = 0;
- inet->inet_dport = 0;
- goto out;
-}
-EXPORT_SYMBOL_GPL(dccp_v4_connect);
-
-/*
- * This routine does path mtu discovery as defined in RFC1191.
- */
-static inline void dccp_do_pmtu_discovery(struct sock *sk,
- const struct iphdr *iph,
- u32 mtu)
-{
- struct dst_entry *dst;
- const struct inet_sock *inet = inet_sk(sk);
- const struct dccp_sock *dp = dccp_sk(sk);
-
- /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs
- * send out by Linux are always < 576bytes so they should go through
- * unfragmented).
- */
- if (sk->sk_state == DCCP_LISTEN)
- return;
-
- dst = inet_csk_update_pmtu(sk, mtu);
- if (!dst)
- return;
-
- /* Something is about to be wrong... Remember soft error
- * for the case, if this connection will not able to recover.
- */
- if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
- WRITE_ONCE(sk->sk_err_soft, EMSGSIZE);
-
- mtu = dst_mtu(dst);
-
- if (inet->pmtudisc != IP_PMTUDISC_DONT &&
- ip_sk_accept_pmtu(sk) &&
- inet_csk(sk)->icsk_pmtu_cookie > mtu) {
- dccp_sync_mss(sk, mtu);
-
- /*
- * From RFC 4340, sec. 14.1:
- *
- * DCCP-Sync packets are the best choice for upward
- * probing, since DCCP-Sync probes do not risk application
- * data loss.
- */
- dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
- } /* else let the usual retransmit timer handle it */
-}
-
-static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk)
-{
- struct dst_entry *dst = __sk_dst_check(sk, 0);
-
- if (dst)
- dst->ops->redirect(dst, sk, skb);
-}
-
-void dccp_req_err(struct sock *sk, u64 seq)
- {
- struct request_sock *req = inet_reqsk(sk);
- struct net *net = sock_net(sk);
-
- /*
- * ICMPs are not backlogged, hence we cannot get an established
- * socket here.
- */
- if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
- __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
- } else {
- /*
- * Still in RESPOND, just remove it silently.
- * There is no good way to pass the error to the newly
- * created socket, and POSIX does not want network
- * errors returned from accept().
- */
- inet_csk_reqsk_queue_drop(req->rsk_listener, req);
- }
- reqsk_put(req);
-}
-EXPORT_SYMBOL(dccp_req_err);
-
-/*
- * This routine is called by the ICMP module when it gets some sort of error
- * condition. If err < 0 then the socket should be closed and the error
- * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code.
- * After adjustment header points to the first 8 bytes of the tcp header. We
- * need to find the appropriate port.
- *
- * The locking strategy used here is very "optimistic". When someone else
- * accesses the socket the ICMP is just dropped and for some paths there is no
- * check at all. A more general error queue to queue errors for later handling
- * is probably better.
- */
-static int dccp_v4_err(struct sk_buff *skb, u32 info)
-{
- const struct iphdr *iph = (struct iphdr *)skb->data;
- const u8 offset = iph->ihl << 2;
- const struct dccp_hdr *dh;
- struct dccp_sock *dp;
- const int type = icmp_hdr(skb)->type;
- const int code = icmp_hdr(skb)->code;
- struct sock *sk;
- __u64 seq;
- int err;
- struct net *net = dev_net(skb->dev);
-
- if (!pskb_may_pull(skb, offset + sizeof(*dh)))
- return -EINVAL;
- dh = (struct dccp_hdr *)(skb->data + offset);
- if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
- return -EINVAL;
- iph = (struct iphdr *)skb->data;
- dh = (struct dccp_hdr *)(skb->data + offset);
-
- sk = __inet_lookup_established(net, &dccp_hashinfo,
- iph->daddr, dh->dccph_dport,
- iph->saddr, ntohs(dh->dccph_sport),
- inet_iif(skb), 0);
- if (!sk) {
- __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
- return -ENOENT;
- }
-
- if (sk->sk_state == DCCP_TIME_WAIT) {
- inet_twsk_put(inet_twsk(sk));
- return 0;
- }
- seq = dccp_hdr_seq(dh);
- if (sk->sk_state == DCCP_NEW_SYN_RECV) {
- dccp_req_err(sk, seq);
- return 0;
- }
-
- bh_lock_sock(sk);
- /* If too many ICMPs get dropped on busy
- * servers this needs to be solved differently.
- */
- if (sock_owned_by_user(sk))
- __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
-
- if (sk->sk_state == DCCP_CLOSED)
- goto out;
-
- dp = dccp_sk(sk);
- if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
- !between48(seq, dp->dccps_awl, dp->dccps_awh)) {
- __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
- goto out;
- }
-
- switch (type) {
- case ICMP_REDIRECT:
- if (!sock_owned_by_user(sk))
- dccp_do_redirect(skb, sk);
- goto out;
- case ICMP_SOURCE_QUENCH:
- /* Just silently ignore these. */
- goto out;
- case ICMP_PARAMETERPROB:
- err = EPROTO;
- break;
- case ICMP_DEST_UNREACH:
- if (code > NR_ICMP_UNREACH)
- goto out;
-
- if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
- if (!sock_owned_by_user(sk))
- dccp_do_pmtu_discovery(sk, iph, info);
- goto out;
- }
-
- err = icmp_err_convert[code].errno;
- break;
- case ICMP_TIME_EXCEEDED:
- err = EHOSTUNREACH;
- break;
- default:
- goto out;
- }
-
- switch (sk->sk_state) {
- case DCCP_REQUESTING:
- case DCCP_RESPOND:
- if (!sock_owned_by_user(sk)) {
- __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
- sk->sk_err = err;
-
- sk_error_report(sk);
-
- dccp_done(sk);
- } else {
- WRITE_ONCE(sk->sk_err_soft, err);
- }
- goto out;
- }
-
- /* If we've already connected we will keep trying
- * until we time out, or the user gives up.
- *
- * rfc1122 4.2.3.9 allows to consider as hard errors
- * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
- * but it is obsoleted by pmtu discovery).
- *
- * Note, that in modern internet, where routing is unreliable
- * and in each dark corner broken firewalls sit, sending random
- * errors ordered by their masters even this two messages finally lose
- * their original sense (even Linux sends invalid PORT_UNREACHs)
- *
- * Now we are in compliance with RFCs.
- * --ANK (980905)
- */
-
- if (!sock_owned_by_user(sk) && inet_test_bit(RECVERR, sk)) {
- sk->sk_err = err;
- sk_error_report(sk);
- } else { /* Only an error on timeout */
- WRITE_ONCE(sk->sk_err_soft, err);
- }
-out:
- bh_unlock_sock(sk);
- sock_put(sk);
- return 0;
-}
-
-static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb,
- __be32 src, __be32 dst)
-{
- return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum);
-}
-
-void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb)
-{
- const struct inet_sock *inet = inet_sk(sk);
- struct dccp_hdr *dh = dccp_hdr(skb);
-
- dccp_csum_outgoing(skb);
- dh->dccph_checksum = dccp_v4_csum_finish(skb,
- inet->inet_saddr,
- inet->inet_daddr);
-}
-EXPORT_SYMBOL_GPL(dccp_v4_send_check);
-
-static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
-{
- return secure_dccp_sequence_number(ip_hdr(skb)->daddr,
- ip_hdr(skb)->saddr,
- dccp_hdr(skb)->dccph_dport,
- dccp_hdr(skb)->dccph_sport);
-}
-
-/*
- * The three way handshake has completed - we got a valid ACK or DATAACK -
- * now create the new socket.
- *
- * This is the equivalent of TCP's tcp_v4_syn_recv_sock
- */
-struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
- struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst,
- struct request_sock *req_unhash,
- bool *own_req)
-{
- struct inet_request_sock *ireq;
- struct inet_sock *newinet;
- struct sock *newsk;
-
- if (sk_acceptq_is_full(sk))
- goto exit_overflow;
-
- newsk = dccp_create_openreq_child(sk, req, skb);
- if (newsk == NULL)
- goto exit_nonewsk;
-
- newinet = inet_sk(newsk);
- ireq = inet_rsk(req);
- RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
- newinet->mc_index = inet_iif(skb);
- newinet->mc_ttl = ip_hdr(skb)->ttl;
- atomic_set(&newinet->inet_id, get_random_u16());
-
- if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
- goto put_and_exit;
-
- sk_setup_caps(newsk, dst);
-
- dccp_sync_mss(newsk, dst_mtu(dst));
-
- if (__inet_inherit_port(sk, newsk) < 0)
- goto put_and_exit;
- *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
- if (*own_req)
- ireq->ireq_opt = NULL;
- else
- newinet->inet_opt = NULL;
- return newsk;
-
-exit_overflow:
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
-exit_nonewsk:
- dst_release(dst);
-exit:
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
- return NULL;
-put_and_exit:
- newinet->inet_opt = NULL;
- inet_csk_prepare_forced_close(newsk);
- dccp_done(newsk);
- goto exit;
-}
-EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
-
-static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
- struct sk_buff *skb)
-{
- struct rtable *rt;
- const struct iphdr *iph = ip_hdr(skb);
- struct flowi4 fl4 = {
- .flowi4_oif = inet_iif(skb),
- .daddr = iph->saddr,
- .saddr = iph->daddr,
- .flowi4_tos = inet_dscp_to_dsfield(inet_sk_dscp(inet_sk(sk))),
- .flowi4_scope = ip_sock_rt_scope(sk),
- .flowi4_proto = sk->sk_protocol,
- .fl4_sport = dccp_hdr(skb)->dccph_dport,
- .fl4_dport = dccp_hdr(skb)->dccph_sport,
- };
-
- security_skb_classify_flow(skb, flowi4_to_flowi_common(&fl4));
- rt = ip_route_output_flow(net, &fl4, sk);
- if (IS_ERR(rt)) {
- IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
- return NULL;
- }
-
- return &rt->dst;
-}
-
-static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req)
-{
- int err = -1;
- struct sk_buff *skb;
- struct dst_entry *dst;
- struct flowi4 fl4;
-
- dst = inet_csk_route_req(sk, &fl4, req);
- if (dst == NULL)
- goto out;
-
- skb = dccp_make_response(sk, dst, req);
- if (skb != NULL) {
- const struct inet_request_sock *ireq = inet_rsk(req);
- struct dccp_hdr *dh = dccp_hdr(skb);
-
- dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr,
- ireq->ir_rmt_addr);
- rcu_read_lock();
- err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
- ireq->ir_rmt_addr,
- rcu_dereference(ireq->ireq_opt),
- READ_ONCE(inet_sk(sk)->tos));
- rcu_read_unlock();
- err = net_xmit_eval(err);
- }
-
-out:
- dst_release(dst);
- return err;
-}
-
-static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb,
- enum sk_rst_reason reason)
-{
- int err;
- const struct iphdr *rxiph;
- struct sk_buff *skb;
- struct dst_entry *dst;
- struct net *net = dev_net(skb_dst(rxskb)->dev);
- struct dccp_v4_pernet *pn;
- struct sock *ctl_sk;
-
- /* Never send a reset in response to a reset. */
- if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
- return;
-
- if (skb_rtable(rxskb)->rt_type != RTN_LOCAL)
- return;
-
- pn = net_generic(net, dccp_v4_pernet_id);
- ctl_sk = pn->v4_ctl_sk;
- dst = dccp_v4_route_skb(net, ctl_sk, rxskb);
- if (dst == NULL)
- return;
-
- skb = dccp_ctl_make_reset(ctl_sk, rxskb);
- if (skb == NULL)
- goto out;
-
- rxiph = ip_hdr(rxskb);
- dccp_hdr(skb)->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr,
- rxiph->daddr);
- skb_dst_set(skb, dst_clone(dst));
-
- local_bh_disable();
- bh_lock_sock(ctl_sk);
- err = ip_build_and_send_pkt(skb, ctl_sk,
- rxiph->daddr, rxiph->saddr, NULL,
- inet_sk(ctl_sk)->tos);
- bh_unlock_sock(ctl_sk);
-
- if (net_xmit_eval(err) == 0) {
- __DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
- __DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
- }
- local_bh_enable();
-out:
- dst_release(dst);
-}
-
-static void dccp_v4_reqsk_destructor(struct request_sock *req)
-{
- dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
- kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
-}
-
-void dccp_syn_ack_timeout(const struct request_sock *req)
-{
-}
-EXPORT_SYMBOL(dccp_syn_ack_timeout);
-
-static struct request_sock_ops dccp_request_sock_ops __read_mostly = {
- .family = PF_INET,
- .obj_size = sizeof(struct dccp_request_sock),
- .rtx_syn_ack = dccp_v4_send_response,
- .send_ack = dccp_reqsk_send_ack,
- .destructor = dccp_v4_reqsk_destructor,
- .send_reset = dccp_v4_ctl_send_reset,
- .syn_ack_timeout = dccp_syn_ack_timeout,
-};
-
-int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
-{
- struct inet_request_sock *ireq;
- struct request_sock *req;
- struct dccp_request_sock *dreq;
- const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
- struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
-
- /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
- if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
- return 0; /* discard, don't send a reset here */
-
- if (dccp_bad_service_code(sk, service)) {
- dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
- goto drop;
- }
- /*
- * TW buckets are converted to open requests without
- * limitations, they conserve resources and peer is
- * evidently real one.
- */
- dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
- if (inet_csk_reqsk_queue_is_full(sk))
- goto drop;
-
- if (sk_acceptq_is_full(sk))
- goto drop;
-
- req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true);
- if (req == NULL)
- goto drop;
-
- if (dccp_reqsk_init(req, dccp_sk(sk), skb))
- goto drop_and_free;
-
- dreq = dccp_rsk(req);
- if (dccp_parse_options(sk, dreq, skb))
- goto drop_and_free;
-
- ireq = inet_rsk(req);
- sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
- sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
- ireq->ir_mark = inet_request_mark(sk, skb);
- ireq->ireq_family = AF_INET;
- ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if);
-
- if (security_inet_conn_request(sk, skb, req))
- goto drop_and_free;
-
- /*
- * Step 3: Process LISTEN state
- *
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
- *
- * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child().
- */
- dreq->dreq_isr = dcb->dccpd_seq;
- dreq->dreq_gsr = dreq->dreq_isr;
- dreq->dreq_iss = dccp_v4_init_sequence(skb);
- dreq->dreq_gss = dreq->dreq_iss;
- dreq->dreq_service = service;
-
- if (dccp_v4_send_response(sk, req))
- goto drop_and_free;
-
- if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
- reqsk_free(req);
- else
- reqsk_put(req);
-
- return 0;
-
-drop_and_free:
- reqsk_free(req);
-drop:
- __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
- return -1;
-}
-EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
-
-int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
-{
- struct dccp_hdr *dh = dccp_hdr(skb);
-
- if (sk->sk_state == DCCP_OPEN) { /* Fast path */
- if (dccp_rcv_established(sk, skb, dh, skb->len))
- goto reset;
- return 0;
- }
-
- /*
- * Step 3: Process LISTEN state
- * If P.type == Request or P contains a valid Init Cookie option,
- * (* Must scan the packet's options to check for Init
- * Cookies. Only Init Cookies are processed here,
- * however; other options are processed in Step 8. This
- * scan need only be performed if the endpoint uses Init
- * Cookies *)
- * (* Generate a new socket and switch to that socket *)
- * Set S := new socket for this port pair
- * S.state = RESPOND
- * Choose S.ISS (initial seqno) or set from Init Cookies
- * Initialize S.GAR := S.ISS
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
- * Continue with S.state == RESPOND
- * (* A Response packet will be generated in Step 11 *)
- * Otherwise,
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
- *
- * NOTE: the check for the packet types is done in
- * dccp_rcv_state_process
- */
-
- if (dccp_rcv_state_process(sk, skb, dh, skb->len))
- goto reset;
- return 0;
-
-reset:
- dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
- kfree_skb(skb);
- return 0;
-}
-EXPORT_SYMBOL_GPL(dccp_v4_do_rcv);
-
-/**
- * dccp_invalid_packet - check for malformed packets
- * @skb: Packet to validate
- *
- * Implements RFC 4340, 8.5: Step 1: Check header basics
- * Packets that fail these checks are ignored and do not receive Resets.
- */
-int dccp_invalid_packet(struct sk_buff *skb)
-{
- const struct dccp_hdr *dh;
- unsigned int cscov;
- u8 dccph_doff;
-
- if (skb->pkt_type != PACKET_HOST)
- return 1;
-
- /* If the packet is shorter than 12 bytes, drop packet and return */
- if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) {
- DCCP_WARN("pskb_may_pull failed\n");
- return 1;
- }
-
- dh = dccp_hdr(skb);
-
- /* If P.type is not understood, drop packet and return */
- if (dh->dccph_type >= DCCP_PKT_INVALID) {
- DCCP_WARN("invalid packet type\n");
- return 1;
- }
-
- /*
- * If P.Data Offset is too small for packet type, drop packet and return
- */
- dccph_doff = dh->dccph_doff;
- if (dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
- DCCP_WARN("P.Data Offset(%u) too small\n", dccph_doff);
- return 1;
- }
- /*
- * If P.Data Offset is too large for packet, drop packet and return
- */
- if (!pskb_may_pull(skb, dccph_doff * sizeof(u32))) {
- DCCP_WARN("P.Data Offset(%u) too large\n", dccph_doff);
- return 1;
- }
- dh = dccp_hdr(skb);
- /*
- * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
- * has short sequence numbers), drop packet and return
- */
- if ((dh->dccph_type < DCCP_PKT_DATA ||
- dh->dccph_type > DCCP_PKT_DATAACK) && dh->dccph_x == 0) {
- DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n",
- dccp_packet_name(dh->dccph_type));
- return 1;
- }
-
- /*
- * If P.CsCov is too large for the packet size, drop packet and return.
- * This must come _before_ checksumming (not as RFC 4340 suggests).
- */
- cscov = dccp_csum_coverage(skb);
- if (cscov > skb->len) {
- DCCP_WARN("P.CsCov %u exceeds packet length %d\n",
- dh->dccph_cscov, skb->len);
- return 1;
- }
-
- /* If header checksum is incorrect, drop packet and return.
- * (This step is completed in the AF-dependent functions.) */
- skb->csum = skb_checksum(skb, 0, cscov, 0);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(dccp_invalid_packet);
-
-/* this is called when real data arrives */
-static int dccp_v4_rcv(struct sk_buff *skb)
-{
- const struct dccp_hdr *dh;
- const struct iphdr *iph;
- bool refcounted;
- struct sock *sk;
- int min_cov;
-
- /* Step 1: Check header basics */
-
- if (dccp_invalid_packet(skb))
- goto discard_it;
-
- iph = ip_hdr(skb);
- /* Step 1: If header checksum is incorrect, drop packet and return */
- if (dccp_v4_csum_finish(skb, iph->saddr, iph->daddr)) {
- DCCP_WARN("dropped packet with invalid checksum\n");
- goto discard_it;
- }
-
- dh = dccp_hdr(skb);
-
- DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh);
- DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
-
- dccp_pr_debug("%8.8s src=%pI4@%-5d dst=%pI4@%-5d seq=%llu",
- dccp_packet_name(dh->dccph_type),
- &iph->saddr, ntohs(dh->dccph_sport),
- &iph->daddr, ntohs(dh->dccph_dport),
- (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq);
-
- if (dccp_packet_without_ack(skb)) {
- DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
- dccp_pr_debug_cat("\n");
- } else {
- DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
- dccp_pr_debug_cat(", ack=%llu\n", (unsigned long long)
- DCCP_SKB_CB(skb)->dccpd_ack_seq);
- }
-
-lookup:
- sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
- dh->dccph_sport, dh->dccph_dport, 0, &refcounted);
- if (!sk) {
- dccp_pr_debug("failed to look up flow ID in table and "
- "get corresponding socket\n");
- goto no_dccp_socket;
- }
-
- /*
- * Step 2:
- * ... or S.state == TIMEWAIT,
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
- */
- if (sk->sk_state == DCCP_TIME_WAIT) {
- dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n");
- inet_twsk_put(inet_twsk(sk));
- goto no_dccp_socket;
- }
-
- if (sk->sk_state == DCCP_NEW_SYN_RECV) {
- struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk;
-
- sk = req->rsk_listener;
- if (unlikely(sk->sk_state != DCCP_LISTEN)) {
- inet_csk_reqsk_queue_drop_and_put(sk, req);
- goto lookup;
- }
- sock_hold(sk);
- refcounted = true;
- nsk = dccp_check_req(sk, skb, req);
- if (!nsk) {
- reqsk_put(req);
- goto discard_and_relse;
- }
- if (nsk == sk) {
- reqsk_put(req);
- } else if (dccp_child_process(sk, nsk, skb)) {
- dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
- goto discard_and_relse;
- } else {
- sock_put(sk);
- return 0;
- }
- }
- /*
- * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
- * o if MinCsCov = 0, only packets with CsCov = 0 are accepted
- * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov
- */
- min_cov = dccp_sk(sk)->dccps_pcrlen;
- if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) {
- dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n",
- dh->dccph_cscov, min_cov);
- /* FIXME: "Such packets SHOULD be reported using Data Dropped
- * options (Section 11.7) with Drop Code 0, Protocol
- * Constraints." */
- goto discard_and_relse;
- }
-
- if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
- goto discard_and_relse;
- nf_reset_ct(skb);
-
- return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4, refcounted);
-
-no_dccp_socket:
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
- goto discard_it;
- /*
- * Step 2:
- * If no socket ...
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
- */
- if (dh->dccph_type != DCCP_PKT_RESET) {
- DCCP_SKB_CB(skb)->dccpd_reset_code =
- DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v4_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
- }
-
-discard_it:
- kfree_skb(skb);
- return 0;
-
-discard_and_relse:
- if (refcounted)
- sock_put(sk);
- goto discard_it;
-}
-
-static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
- .queue_xmit = ip_queue_xmit,
- .send_check = dccp_v4_send_check,
- .rebuild_header = inet_sk_rebuild_header,
- .conn_request = dccp_v4_conn_request,
- .syn_recv_sock = dccp_v4_request_recv_sock,
- .net_header_len = sizeof(struct iphdr),
- .setsockopt = ip_setsockopt,
- .getsockopt = ip_getsockopt,
-};
-
-static int dccp_v4_init_sock(struct sock *sk)
-{
- static __u8 dccp_v4_ctl_sock_initialized;
- int err = dccp_init_sock(sk, dccp_v4_ctl_sock_initialized);
-
- if (err == 0) {
- if (unlikely(!dccp_v4_ctl_sock_initialized))
- dccp_v4_ctl_sock_initialized = 1;
- inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops;
- }
-
- return err;
-}
-
-static struct timewait_sock_ops dccp_timewait_sock_ops = {
- .twsk_obj_size = sizeof(struct inet_timewait_sock),
-};
-
-static struct proto dccp_v4_prot = {
- .name = "DCCP",
- .owner = THIS_MODULE,
- .close = dccp_close,
- .connect = dccp_v4_connect,
- .disconnect = dccp_disconnect,
- .ioctl = dccp_ioctl,
- .init = dccp_v4_init_sock,
- .setsockopt = dccp_setsockopt,
- .getsockopt = dccp_getsockopt,
- .sendmsg = dccp_sendmsg,
- .recvmsg = dccp_recvmsg,
- .backlog_rcv = dccp_v4_do_rcv,
- .hash = inet_hash,
- .unhash = inet_unhash,
- .accept = inet_csk_accept,
- .get_port = inet_csk_get_port,
- .shutdown = dccp_shutdown,
- .destroy = dccp_destroy_sock,
- .orphan_count = &dccp_orphan_count,
- .max_header = MAX_DCCP_HEADER,
- .obj_size = sizeof(struct dccp_sock),
- .slab_flags = SLAB_TYPESAFE_BY_RCU,
- .rsk_prot = &dccp_request_sock_ops,
- .twsk_prot = &dccp_timewait_sock_ops,
- .h.hashinfo = &dccp_hashinfo,
-};
-
-static const struct net_protocol dccp_v4_protocol = {
- .handler = dccp_v4_rcv,
- .err_handler = dccp_v4_err,
- .no_policy = 1,
- .icmp_strict_tag_validation = 1,
-};
-
-static const struct proto_ops inet_dccp_ops = {
- .family = PF_INET,
- .owner = THIS_MODULE,
- .release = inet_release,
- .bind = inet_bind,
- .connect = inet_stream_connect,
- .socketpair = sock_no_socketpair,
- .accept = inet_accept,
- .getname = inet_getname,
- /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
- .poll = dccp_poll,
- .ioctl = inet_ioctl,
- .gettstamp = sock_gettstamp,
- /* FIXME: work on inet_listen to rename it to sock_common_listen */
- .listen = inet_dccp_listen,
- .shutdown = inet_shutdown,
- .setsockopt = sock_common_setsockopt,
- .getsockopt = sock_common_getsockopt,
- .sendmsg = inet_sendmsg,
- .recvmsg = sock_common_recvmsg,
- .mmap = sock_no_mmap,
-};
-
-static struct inet_protosw dccp_v4_protosw = {
- .type = SOCK_DCCP,
- .protocol = IPPROTO_DCCP,
- .prot = &dccp_v4_prot,
- .ops = &inet_dccp_ops,
- .flags = INET_PROTOSW_ICSK,
-};
-
-static int __net_init dccp_v4_init_net(struct net *net)
-{
- struct dccp_v4_pernet *pn = net_generic(net, dccp_v4_pernet_id);
-
- if (dccp_hashinfo.bhash == NULL)
- return -ESOCKTNOSUPPORT;
-
- return inet_ctl_sock_create(&pn->v4_ctl_sk, PF_INET,
- SOCK_DCCP, IPPROTO_DCCP, net);
-}
-
-static void __net_exit dccp_v4_exit_net(struct net *net)
-{
- struct dccp_v4_pernet *pn = net_generic(net, dccp_v4_pernet_id);
-
- inet_ctl_sock_destroy(pn->v4_ctl_sk);
-}
-
-static void __net_exit dccp_v4_exit_batch(struct list_head *net_exit_list)
-{
- inet_twsk_purge(&dccp_hashinfo);
-}
-
-static struct pernet_operations dccp_v4_ops = {
- .init = dccp_v4_init_net,
- .exit = dccp_v4_exit_net,
- .exit_batch = dccp_v4_exit_batch,
- .id = &dccp_v4_pernet_id,
- .size = sizeof(struct dccp_v4_pernet),
-};
-
-static int __init dccp_v4_init(void)
-{
- int err = proto_register(&dccp_v4_prot, 1);
-
- if (err)
- goto out;
-
- inet_register_protosw(&dccp_v4_protosw);
-
- err = register_pernet_subsys(&dccp_v4_ops);
- if (err)
- goto out_destroy_ctl_sock;
-
- err = inet_add_protocol(&dccp_v4_protocol, IPPROTO_DCCP);
- if (err)
- goto out_proto_unregister;
-
-out:
- return err;
-out_proto_unregister:
- unregister_pernet_subsys(&dccp_v4_ops);
-out_destroy_ctl_sock:
- inet_unregister_protosw(&dccp_v4_protosw);
- proto_unregister(&dccp_v4_prot);
- goto out;
-}
-
-static void __exit dccp_v4_exit(void)
-{
- inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP);
- unregister_pernet_subsys(&dccp_v4_ops);
- inet_unregister_protosw(&dccp_v4_protosw);
- proto_unregister(&dccp_v4_prot);
-}
-
-module_init(dccp_v4_init);
-module_exit(dccp_v4_exit);
-
-/*
- * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
- * values directly, Also cover the case where the protocol is not specified,
- * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
- */
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 33, 6);
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 0, 6);
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
-MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
deleted file mode 100644
index e24dbffabfc1..000000000000
--- a/net/dccp/ipv6.c
+++ /dev/null
@@ -1,1174 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * DCCP over IPv6
- * Linux INET6 implementation
- *
- * Based on net/dccp6/ipv6.c
- *
- * Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
- */
-
-#include <linux/module.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <linux/xfrm.h>
-#include <linux/string.h>
-
-#include <net/addrconf.h>
-#include <net/inet_common.h>
-#include <net/inet_hashtables.h>
-#include <net/inet_sock.h>
-#include <net/inet6_connection_sock.h>
-#include <net/inet6_hashtables.h>
-#include <net/ip6_route.h>
-#include <net/ipv6.h>
-#include <net/protocol.h>
-#include <net/transp_v6.h>
-#include <net/ip6_checksum.h>
-#include <net/xfrm.h>
-#include <net/secure_seq.h>
-#include <net/netns/generic.h>
-#include <net/sock.h>
-#include <net/rstreason.h>
-
-#include "dccp.h"
-#include "ipv6.h"
-#include "feat.h"
-
-struct dccp_v6_pernet {
- struct sock *v6_ctl_sk;
-};
-
-static unsigned int dccp_v6_pernet_id __read_mostly;
-
-/* The per-net v6_ctl_sk is used for sending RSTs and ACKs */
-
-static const struct inet_connection_sock_af_ops dccp_ipv6_mapped;
-static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops;
-
-/* add pseudo-header to DCCP checksum stored in skb->csum */
-static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
- const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum);
-}
-
-static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct dccp_hdr *dh = dccp_hdr(skb);
-
- dccp_csum_outgoing(skb);
- dh->dccph_checksum = dccp_v6_csum_finish(skb, &np->saddr, &sk->sk_v6_daddr);
-}
-
-static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
-{
- return secure_dccpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
- ipv6_hdr(skb)->saddr.s6_addr32,
- dccp_hdr(skb)->dccph_dport,
- dccp_hdr(skb)->dccph_sport );
-
-}
-
-static int dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, __be32 info)
-{
- const struct ipv6hdr *hdr;
- const struct dccp_hdr *dh;
- struct dccp_sock *dp;
- struct ipv6_pinfo *np;
- struct sock *sk;
- int err;
- __u64 seq;
- struct net *net = dev_net(skb->dev);
-
- if (!pskb_may_pull(skb, offset + sizeof(*dh)))
- return -EINVAL;
- dh = (struct dccp_hdr *)(skb->data + offset);
- if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
- return -EINVAL;
- hdr = (const struct ipv6hdr *)skb->data;
- dh = (struct dccp_hdr *)(skb->data + offset);
-
- sk = __inet6_lookup_established(net, &dccp_hashinfo,
- &hdr->daddr, dh->dccph_dport,
- &hdr->saddr, ntohs(dh->dccph_sport),
- inet6_iif(skb), 0);
-
- if (!sk) {
- __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
- ICMP6_MIB_INERRORS);
- return -ENOENT;
- }
-
- if (sk->sk_state == DCCP_TIME_WAIT) {
- inet_twsk_put(inet_twsk(sk));
- return 0;
- }
- seq = dccp_hdr_seq(dh);
- if (sk->sk_state == DCCP_NEW_SYN_RECV) {
- dccp_req_err(sk, seq);
- return 0;
- }
-
- bh_lock_sock(sk);
- if (sock_owned_by_user(sk))
- __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
-
- if (sk->sk_state == DCCP_CLOSED)
- goto out;
-
- dp = dccp_sk(sk);
- if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) &&
- !between48(seq, dp->dccps_awl, dp->dccps_awh)) {
- __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
- goto out;
- }
-
- np = inet6_sk(sk);
-
- if (type == NDISC_REDIRECT) {
- if (!sock_owned_by_user(sk)) {
- struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
-
- if (dst)
- dst->ops->redirect(dst, sk, skb);
- }
- goto out;
- }
-
- if (type == ICMPV6_PKT_TOOBIG) {
- struct dst_entry *dst = NULL;
-
- if (!ip6_sk_accept_pmtu(sk))
- goto out;
-
- if (sock_owned_by_user(sk))
- goto out;
- if ((1 << sk->sk_state) & (DCCPF_LISTEN | DCCPF_CLOSED))
- goto out;
-
- dst = inet6_csk_update_pmtu(sk, ntohl(info));
- if (!dst)
- goto out;
-
- if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst))
- dccp_sync_mss(sk, dst_mtu(dst));
- goto out;
- }
-
- icmpv6_err_convert(type, code, &err);
-
- /* Might be for an request_sock */
- switch (sk->sk_state) {
- case DCCP_REQUESTING:
- case DCCP_RESPOND: /* Cannot happen.
- It can, it SYNs are crossed. --ANK */
- if (!sock_owned_by_user(sk)) {
- __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
- sk->sk_err = err;
- /*
- * Wake people up to see the error
- * (see connect in sock.c)
- */
- sk_error_report(sk);
- dccp_done(sk);
- } else {
- WRITE_ONCE(sk->sk_err_soft, err);
- }
- goto out;
- }
-
- if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) {
- sk->sk_err = err;
- sk_error_report(sk);
- } else {
- WRITE_ONCE(sk->sk_err_soft, err);
- }
-out:
- bh_unlock_sock(sk);
- sock_put(sk);
- return 0;
-}
-
-
-static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req)
-{
- struct inet_request_sock *ireq = inet_rsk(req);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct sk_buff *skb;
- struct in6_addr *final_p, final;
- struct flowi6 fl6;
- int err = -1;
- struct dst_entry *dst;
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_DCCP;
- fl6.daddr = ireq->ir_v6_rmt_addr;
- fl6.saddr = ireq->ir_v6_loc_addr;
- fl6.flowlabel = 0;
- fl6.flowi6_oif = ireq->ir_iif;
- fl6.fl6_dport = ireq->ir_rmt_port;
- fl6.fl6_sport = htons(ireq->ir_num);
- security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
-
-
- rcu_read_lock();
- final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
- rcu_read_unlock();
-
- dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
- dst = NULL;
- goto done;
- }
-
- skb = dccp_make_response(sk, dst, req);
- if (skb != NULL) {
- struct dccp_hdr *dh = dccp_hdr(skb);
- struct ipv6_txoptions *opt;
-
- dh->dccph_checksum = dccp_v6_csum_finish(skb,
- &ireq->ir_v6_loc_addr,
- &ireq->ir_v6_rmt_addr);
- fl6.daddr = ireq->ir_v6_rmt_addr;
- rcu_read_lock();
- opt = ireq->ipv6_opt;
- if (!opt)
- opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt,
- np->tclass, READ_ONCE(sk->sk_priority));
- rcu_read_unlock();
- err = net_xmit_eval(err);
- }
-
-done:
- dst_release(dst);
- return err;
-}
-
-static void dccp_v6_reqsk_destructor(struct request_sock *req)
-{
- dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
- kfree(inet_rsk(req)->ipv6_opt);
- kfree_skb(inet_rsk(req)->pktopts);
-}
-
-static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb,
- enum sk_rst_reason reason)
-{
- const struct ipv6hdr *rxip6h;
- struct sk_buff *skb;
- struct flowi6 fl6;
- struct net *net = dev_net(skb_dst(rxskb)->dev);
- struct dccp_v6_pernet *pn;
- struct sock *ctl_sk;
- struct dst_entry *dst;
-
- if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET)
- return;
-
- if (!ipv6_unicast_destination(rxskb))
- return;
-
- pn = net_generic(net, dccp_v6_pernet_id);
- ctl_sk = pn->v6_ctl_sk;
- skb = dccp_ctl_make_reset(ctl_sk, rxskb);
- if (skb == NULL)
- return;
-
- rxip6h = ipv6_hdr(rxskb);
- dccp_hdr(skb)->dccph_checksum = dccp_v6_csum_finish(skb, &rxip6h->saddr,
- &rxip6h->daddr);
-
- memset(&fl6, 0, sizeof(fl6));
- fl6.daddr = rxip6h->saddr;
- fl6.saddr = rxip6h->daddr;
-
- fl6.flowi6_proto = IPPROTO_DCCP;
- fl6.flowi6_oif = inet6_iif(rxskb);
- fl6.fl6_dport = dccp_hdr(skb)->dccph_dport;
- fl6.fl6_sport = dccp_hdr(skb)->dccph_sport;
- security_skb_classify_flow(rxskb, flowi6_to_flowi_common(&fl6));
-
- /* sk = NULL, but it is safe for now. RST socket required. */
- dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
- if (!IS_ERR(dst)) {
- skb_dst_set(skb, dst);
- ip6_xmit(ctl_sk, skb, &fl6, 0, NULL, 0, 0);
- DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
- DCCP_INC_STATS(DCCP_MIB_OUTRSTS);
- return;
- }
-
- kfree_skb(skb);
-}
-
-static struct request_sock_ops dccp6_request_sock_ops = {
- .family = AF_INET6,
- .obj_size = sizeof(struct dccp6_request_sock),
- .rtx_syn_ack = dccp_v6_send_response,
- .send_ack = dccp_reqsk_send_ack,
- .destructor = dccp_v6_reqsk_destructor,
- .send_reset = dccp_v6_ctl_send_reset,
- .syn_ack_timeout = dccp_syn_ack_timeout,
-};
-
-static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
-{
- struct request_sock *req;
- struct dccp_request_sock *dreq;
- struct inet_request_sock *ireq;
- struct ipv6_pinfo *np = inet6_sk(sk);
- const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
- struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
-
- if (skb->protocol == htons(ETH_P_IP))
- return dccp_v4_conn_request(sk, skb);
-
- if (!ipv6_unicast_destination(skb))
- return 0; /* discard, don't send a reset here */
-
- if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
- __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
- return 0;
- }
-
- if (dccp_bad_service_code(sk, service)) {
- dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
- goto drop;
- }
- /*
- * There are no SYN attacks on IPv6, yet...
- */
- dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
- if (inet_csk_reqsk_queue_is_full(sk))
- goto drop;
-
- if (sk_acceptq_is_full(sk))
- goto drop;
-
- req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
- if (req == NULL)
- goto drop;
-
- if (dccp_reqsk_init(req, dccp_sk(sk), skb))
- goto drop_and_free;
-
- dreq = dccp_rsk(req);
- if (dccp_parse_options(sk, dreq, skb))
- goto drop_and_free;
-
- ireq = inet_rsk(req);
- ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
- ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
- ireq->ir_rmt_addr = LOOPBACK4_IPV6;
- ireq->ir_loc_addr = LOOPBACK4_IPV6;
-
- ireq->ireq_family = AF_INET6;
- ireq->ir_mark = inet_request_mark(sk, skb);
-
- if (security_inet_conn_request(sk, skb, req))
- goto drop_and_free;
-
- if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
- np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
- np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
- refcount_inc(&skb->users);
- ireq->pktopts = skb;
- }
- ireq->ir_iif = READ_ONCE(sk->sk_bound_dev_if);
-
- /* So that link locals have meaning */
- if (!ireq->ir_iif &&
- ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
- ireq->ir_iif = inet6_iif(skb);
-
- /*
- * Step 3: Process LISTEN state
- *
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
- *
- * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child().
- */
- dreq->dreq_isr = dcb->dccpd_seq;
- dreq->dreq_gsr = dreq->dreq_isr;
- dreq->dreq_iss = dccp_v6_init_sequence(skb);
- dreq->dreq_gss = dreq->dreq_iss;
- dreq->dreq_service = service;
-
- if (dccp_v6_send_response(sk, req))
- goto drop_and_free;
-
- if (unlikely(!inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT)))
- reqsk_free(req);
- else
- reqsk_put(req);
-
- return 0;
-
-drop_and_free:
- reqsk_free(req);
-drop:
- __DCCP_INC_STATS(DCCP_MIB_ATTEMPTFAILS);
- return -1;
-}
-
-static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
- struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst,
- struct request_sock *req_unhash,
- bool *own_req)
-{
- struct inet_request_sock *ireq = inet_rsk(req);
- struct ipv6_pinfo *newnp;
- const struct ipv6_pinfo *np = inet6_sk(sk);
- struct ipv6_txoptions *opt;
- struct inet_sock *newinet;
- struct dccp6_sock *newdp6;
- struct sock *newsk;
-
- if (skb->protocol == htons(ETH_P_IP)) {
- /*
- * v6 mapped
- */
- newsk = dccp_v4_request_recv_sock(sk, skb, req, dst,
- req_unhash, own_req);
- if (newsk == NULL)
- return NULL;
-
- newdp6 = (struct dccp6_sock *)newsk;
- newinet = inet_sk(newsk);
- newinet->pinet6 = &newdp6->inet6;
- newnp = inet6_sk(newsk);
-
- memcpy(newnp, np, sizeof(struct ipv6_pinfo));
-
- newnp->saddr = newsk->sk_v6_rcv_saddr;
-
- inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
- newsk->sk_backlog_rcv = dccp_v4_do_rcv;
- newnp->pktoptions = NULL;
- newnp->opt = NULL;
- newnp->ipv6_mc_list = NULL;
- newnp->ipv6_ac_list = NULL;
- newnp->ipv6_fl_list = NULL;
- newnp->mcast_oif = inet_iif(skb);
- newnp->mcast_hops = ip_hdr(skb)->ttl;
-
- /*
- * No need to charge this sock to the relevant IPv6 refcnt debug socks count
- * here, dccp_create_openreq_child now does this for us, see the comment in
- * that function for the gory details. -acme
- */
-
- /* It is tricky place. Until this moment IPv4 tcp
- worked with IPv6 icsk.icsk_af_ops.
- Sync it now.
- */
- dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
-
- return newsk;
- }
-
-
- if (sk_acceptq_is_full(sk))
- goto out_overflow;
-
- if (!dst) {
- struct flowi6 fl6;
-
- dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP);
- if (!dst)
- goto out;
- }
-
- newsk = dccp_create_openreq_child(sk, req, skb);
- if (newsk == NULL)
- goto out_nonewsk;
-
- /*
- * No need to charge this sock to the relevant IPv6 refcnt debug socks
- * count here, dccp_create_openreq_child now does this for us, see the
- * comment in that function for the gory details. -acme
- */
-
- ip6_dst_store(newsk, dst, NULL, NULL);
- newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM |
- NETIF_F_TSO);
- newdp6 = (struct dccp6_sock *)newsk;
- newinet = inet_sk(newsk);
- newinet->pinet6 = &newdp6->inet6;
- newnp = inet6_sk(newsk);
-
- memcpy(newnp, np, sizeof(struct ipv6_pinfo));
-
- newnp->saddr = ireq->ir_v6_loc_addr;
-
- /* Now IPv6 options...
-
- First: no IPv4 options.
- */
- newinet->inet_opt = NULL;
-
- /* Clone RX bits */
- newnp->rxopt.all = np->rxopt.all;
-
- newnp->ipv6_mc_list = NULL;
- newnp->ipv6_ac_list = NULL;
- newnp->ipv6_fl_list = NULL;
- newnp->pktoptions = NULL;
- newnp->opt = NULL;
- newnp->mcast_oif = inet6_iif(skb);
- newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
-
- /*
- * Clone native IPv6 options from listening socket (if any)
- *
- * Yes, keeping reference count would be much more clever, but we make
- * one more one thing there: reattach optmem to newsk.
- */
- opt = ireq->ipv6_opt;
- if (!opt)
- opt = rcu_dereference(np->opt);
- if (opt) {
- opt = ipv6_dup_options(newsk, opt);
- RCU_INIT_POINTER(newnp->opt, opt);
- }
- inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (opt)
- inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
- opt->opt_flen;
-
- dccp_sync_mss(newsk, dst_mtu(dst));
-
- if (__inet_inherit_port(sk, newsk) < 0) {
- inet_csk_prepare_forced_close(newsk);
- dccp_done(newsk);
- goto out;
- }
- *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
- /* Clone pktoptions received with SYN, if we own the req */
- if (*own_req && ireq->pktopts) {
- newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
- consume_skb(ireq->pktopts);
- ireq->pktopts = NULL;
- }
-
- return newsk;
-
-out_overflow:
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
-out_nonewsk:
- dst_release(dst);
-out:
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
- return NULL;
-}
-
-/* The socket must have it's spinlock held when we get
- * here.
- *
- * We have a potential double-lock case here, so even when
- * doing backlog processing we use the BH locking scheme.
- * This is because we cannot sleep with the original spinlock
- * held.
- */
-static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
-{
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct sk_buff *opt_skb = NULL;
-
- /* Imagine: socket is IPv6. IPv4 packet arrives,
- goes to IPv4 receive handler and backlogged.
- From backlog it always goes here. Kerboom...
- Fortunately, dccp_rcv_established and rcv_established
- handle them correctly, but it is not case with
- dccp_v6_hnd_req and dccp_v6_ctl_send_reset(). --ANK
- */
-
- if (skb->protocol == htons(ETH_P_IP))
- return dccp_v4_do_rcv(sk, skb);
-
- if (sk_filter(sk, skb))
- goto discard;
-
- /*
- * socket locking is here for SMP purposes as backlog rcv is currently
- * called with bh processing disabled.
- */
-
- /* Do Stevens' IPV6_PKTOPTIONS.
-
- Yes, guys, it is the only place in our code, where we
- may make it not affecting IPv4.
- The rest of code is protocol independent,
- and I do not like idea to uglify IPv4.
-
- Actually, all the idea behind IPV6_PKTOPTIONS
- looks not very well thought. For now we latch
- options, received in the last packet, enqueued
- by tcp. Feel free to propose better solution.
- --ANK (980728)
- */
- if (np->rxopt.all && sk->sk_state != DCCP_LISTEN)
- opt_skb = skb_clone_and_charge_r(skb, sk);
-
- if (sk->sk_state == DCCP_OPEN) { /* Fast path */
- if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
- goto reset;
- if (opt_skb)
- goto ipv6_pktoptions;
- return 0;
- }
-
- /*
- * Step 3: Process LISTEN state
- * If S.state == LISTEN,
- * If P.type == Request or P contains a valid Init Cookie option,
- * (* Must scan the packet's options to check for Init
- * Cookies. Only Init Cookies are processed here,
- * however; other options are processed in Step 8. This
- * scan need only be performed if the endpoint uses Init
- * Cookies *)
- * (* Generate a new socket and switch to that socket *)
- * Set S := new socket for this port pair
- * S.state = RESPOND
- * Choose S.ISS (initial seqno) or set from Init Cookies
- * Initialize S.GAR := S.ISS
- * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
- * Continue with S.state == RESPOND
- * (* A Response packet will be generated in Step 11 *)
- * Otherwise,
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
- *
- * NOTE: the check for the packet types is done in
- * dccp_rcv_state_process
- */
-
- if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
- goto reset;
- if (opt_skb)
- goto ipv6_pktoptions;
- return 0;
-
-reset:
- dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
-discard:
- if (opt_skb != NULL)
- __kfree_skb(opt_skb);
- kfree_skb(skb);
- return 0;
-
-/* Handling IPV6_PKTOPTIONS skb the similar
- * way it's done for net/ipv6/tcp_ipv6.c
- */
-ipv6_pktoptions:
- if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) {
- if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
- WRITE_ONCE(np->mcast_oif, inet6_iif(opt_skb));
- if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
- WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit);
- if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
- np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
- if (inet6_test_bit(REPFLOW, sk))
- np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
- if (ipv6_opt_accepted(sk, opt_skb,
- &DCCP_SKB_CB(opt_skb)->header.h6)) {
- memmove(IP6CB(opt_skb),
- &DCCP_SKB_CB(opt_skb)->header.h6,
- sizeof(struct inet6_skb_parm));
- opt_skb = xchg(&np->pktoptions, opt_skb);
- } else {
- __kfree_skb(opt_skb);
- opt_skb = xchg(&np->pktoptions, NULL);
- }
- }
-
- kfree_skb(opt_skb);
- return 0;
-}
-
-static int dccp_v6_rcv(struct sk_buff *skb)
-{
- const struct dccp_hdr *dh;
- bool refcounted;
- struct sock *sk;
- int min_cov;
-
- /* Step 1: Check header basics */
-
- if (dccp_invalid_packet(skb))
- goto discard_it;
-
- /* Step 1: If header checksum is incorrect, drop packet and return. */
- if (dccp_v6_csum_finish(skb, &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr)) {
- DCCP_WARN("dropped packet with invalid checksum\n");
- goto discard_it;
- }
-
- dh = dccp_hdr(skb);
-
- DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh);
- DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
-
- if (dccp_packet_without_ack(skb))
- DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
- else
- DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
-
-lookup:
- sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
- dh->dccph_sport, dh->dccph_dport,
- inet6_iif(skb), 0, &refcounted);
- if (!sk) {
- dccp_pr_debug("failed to look up flow ID in table and "
- "get corresponding socket\n");
- goto no_dccp_socket;
- }
-
- /*
- * Step 2:
- * ... or S.state == TIMEWAIT,
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
- */
- if (sk->sk_state == DCCP_TIME_WAIT) {
- dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n");
- inet_twsk_put(inet_twsk(sk));
- goto no_dccp_socket;
- }
-
- if (sk->sk_state == DCCP_NEW_SYN_RECV) {
- struct request_sock *req = inet_reqsk(sk);
- struct sock *nsk;
-
- sk = req->rsk_listener;
- if (unlikely(sk->sk_state != DCCP_LISTEN)) {
- inet_csk_reqsk_queue_drop_and_put(sk, req);
- goto lookup;
- }
- sock_hold(sk);
- refcounted = true;
- nsk = dccp_check_req(sk, skb, req);
- if (!nsk) {
- reqsk_put(req);
- goto discard_and_relse;
- }
- if (nsk == sk) {
- reqsk_put(req);
- } else if (dccp_child_process(sk, nsk, skb)) {
- dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
- goto discard_and_relse;
- } else {
- sock_put(sk);
- return 0;
- }
- }
- /*
- * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
- * o if MinCsCov = 0, only packets with CsCov = 0 are accepted
- * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov
- */
- min_cov = dccp_sk(sk)->dccps_pcrlen;
- if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) {
- dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n",
- dh->dccph_cscov, min_cov);
- /* FIXME: send Data Dropped option (see also dccp_v4_rcv) */
- goto discard_and_relse;
- }
-
- if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
- goto discard_and_relse;
- nf_reset_ct(skb);
-
- return __sk_receive_skb(sk, skb, 1, dh->dccph_doff * 4,
- refcounted) ? -1 : 0;
-
-no_dccp_socket:
- if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
- goto discard_it;
- /*
- * Step 2:
- * If no socket ...
- * Generate Reset(No Connection) unless P.type == Reset
- * Drop packet and return
- */
- if (dh->dccph_type != DCCP_PKT_RESET) {
- DCCP_SKB_CB(skb)->dccpd_reset_code =
- DCCP_RESET_CODE_NO_CONNECTION;
- dccp_v6_ctl_send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
- }
-
-discard_it:
- kfree_skb(skb);
- return 0;
-
-discard_and_relse:
- if (refcounted)
- sock_put(sk);
- goto discard_it;
-}
-
-static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
- int addr_len)
-{
- struct sockaddr_in6 *usin = (struct sockaddr_in6 *)uaddr;
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct dccp_sock *dp = dccp_sk(sk);
- struct in6_addr *saddr = NULL, *final_p, final;
- struct ipv6_txoptions *opt;
- struct flowi6 fl6;
- struct dst_entry *dst;
- int addr_type;
- int err;
-
- dp->dccps_role = DCCP_ROLE_CLIENT;
-
- if (addr_len < SIN6_LEN_RFC2133)
- return -EINVAL;
-
- if (usin->sin6_family != AF_INET6)
- return -EAFNOSUPPORT;
-
- memset(&fl6, 0, sizeof(fl6));
-
- if (inet6_test_bit(SNDFLOW, sk)) {
- fl6.flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
- IP6_ECN_flow_init(fl6.flowlabel);
- if (fl6.flowlabel & IPV6_FLOWLABEL_MASK) {
- struct ip6_flowlabel *flowlabel;
- flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
- if (IS_ERR(flowlabel))
- return -EINVAL;
- fl6_sock_release(flowlabel);
- }
- }
- /*
- * connect() to INADDR_ANY means loopback (BSD'ism).
- */
- if (ipv6_addr_any(&usin->sin6_addr))
- usin->sin6_addr.s6_addr[15] = 1;
-
- addr_type = ipv6_addr_type(&usin->sin6_addr);
-
- if (addr_type & IPV6_ADDR_MULTICAST)
- return -ENETUNREACH;
-
- if (addr_type & IPV6_ADDR_LINKLOCAL) {
- if (addr_len >= sizeof(struct sockaddr_in6) &&
- usin->sin6_scope_id) {
- /* If interface is set while binding, indices
- * must coincide.
- */
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != usin->sin6_scope_id)
- return -EINVAL;
-
- sk->sk_bound_dev_if = usin->sin6_scope_id;
- }
-
- /* Connect to link-local address requires an interface */
- if (!sk->sk_bound_dev_if)
- return -EINVAL;
- }
-
- sk->sk_v6_daddr = usin->sin6_addr;
- np->flow_label = fl6.flowlabel;
-
- /*
- * DCCP over IPv4
- */
- if (addr_type == IPV6_ADDR_MAPPED) {
- u32 exthdrlen = icsk->icsk_ext_hdr_len;
- struct sockaddr_in sin;
-
- net_dbg_ratelimited("connect: ipv4 mapped\n");
-
- if (ipv6_only_sock(sk))
- return -ENETUNREACH;
-
- sin.sin_family = AF_INET;
- sin.sin_port = usin->sin6_port;
- sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
-
- icsk->icsk_af_ops = &dccp_ipv6_mapped;
- sk->sk_backlog_rcv = dccp_v4_do_rcv;
-
- err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
- if (err) {
- icsk->icsk_ext_hdr_len = exthdrlen;
- icsk->icsk_af_ops = &dccp_ipv6_af_ops;
- sk->sk_backlog_rcv = dccp_v6_do_rcv;
- goto failure;
- }
- np->saddr = sk->sk_v6_rcv_saddr;
- return err;
- }
-
- if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
- saddr = &sk->sk_v6_rcv_saddr;
-
- fl6.flowi6_proto = IPPROTO_DCCP;
- fl6.daddr = sk->sk_v6_daddr;
- fl6.saddr = saddr ? *saddr : np->saddr;
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.fl6_dport = usin->sin6_port;
- fl6.fl6_sport = inet->inet_sport;
- security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
-
- opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
- final_p = fl6_update_dst(&fl6, opt, &final);
-
- dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
- goto failure;
- }
-
- if (saddr == NULL) {
- saddr = &fl6.saddr;
-
- err = inet_bhash2_update_saddr(sk, saddr, AF_INET6);
- if (err)
- goto failure;
- }
-
- /* set the source address */
- np->saddr = *saddr;
- inet->inet_rcv_saddr = LOOPBACK4_IPV6;
-
- ip6_dst_store(sk, dst, NULL, NULL);
-
- icsk->icsk_ext_hdr_len = 0;
- if (opt)
- icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
-
- inet->inet_dport = usin->sin6_port;
-
- dccp_set_state(sk, DCCP_REQUESTING);
- err = inet6_hash_connect(&dccp_death_row, sk);
- if (err)
- goto late_failure;
-
- dp->dccps_iss = secure_dccpv6_sequence_number(np->saddr.s6_addr32,
- sk->sk_v6_daddr.s6_addr32,
- inet->inet_sport,
- inet->inet_dport);
- err = dccp_connect(sk);
- if (err)
- goto late_failure;
-
- return 0;
-
-late_failure:
- dccp_set_state(sk, DCCP_CLOSED);
- inet_bhash2_reset_saddr(sk);
- __sk_dst_reset(sk);
-failure:
- inet->inet_dport = 0;
- sk->sk_route_caps = 0;
- return err;
-}
-
-static const struct inet_connection_sock_af_ops dccp_ipv6_af_ops = {
- .queue_xmit = inet6_csk_xmit,
- .send_check = dccp_v6_send_check,
- .rebuild_header = inet6_sk_rebuild_header,
- .conn_request = dccp_v6_conn_request,
- .syn_recv_sock = dccp_v6_request_recv_sock,
- .net_header_len = sizeof(struct ipv6hdr),
- .setsockopt = ipv6_setsockopt,
- .getsockopt = ipv6_getsockopt,
-};
-
-/*
- * DCCP over IPv4 via INET6 API
- */
-static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
- .queue_xmit = ip_queue_xmit,
- .send_check = dccp_v4_send_check,
- .rebuild_header = inet_sk_rebuild_header,
- .conn_request = dccp_v6_conn_request,
- .syn_recv_sock = dccp_v6_request_recv_sock,
- .net_header_len = sizeof(struct iphdr),
- .setsockopt = ipv6_setsockopt,
- .getsockopt = ipv6_getsockopt,
-};
-
-static void dccp_v6_sk_destruct(struct sock *sk)
-{
- dccp_destruct_common(sk);
- inet6_sock_destruct(sk);
-}
-
-/* NOTE: A lot of things set to zero explicitly by call to
- * sk_alloc() so need not be done here.
- */
-static int dccp_v6_init_sock(struct sock *sk)
-{
- static __u8 dccp_v6_ctl_sock_initialized;
- int err = dccp_init_sock(sk, dccp_v6_ctl_sock_initialized);
-
- if (err == 0) {
- if (unlikely(!dccp_v6_ctl_sock_initialized))
- dccp_v6_ctl_sock_initialized = 1;
- inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops;
- sk->sk_destruct = dccp_v6_sk_destruct;
- }
-
- return err;
-}
-
-static struct timewait_sock_ops dccp6_timewait_sock_ops = {
- .twsk_obj_size = sizeof(struct dccp6_timewait_sock),
-};
-
-static struct proto dccp_v6_prot = {
- .name = "DCCPv6",
- .owner = THIS_MODULE,
- .close = dccp_close,
- .connect = dccp_v6_connect,
- .disconnect = dccp_disconnect,
- .ioctl = dccp_ioctl,
- .init = dccp_v6_init_sock,
- .setsockopt = dccp_setsockopt,
- .getsockopt = dccp_getsockopt,
- .sendmsg = dccp_sendmsg,
- .recvmsg = dccp_recvmsg,
- .backlog_rcv = dccp_v6_do_rcv,
- .hash = inet6_hash,
- .unhash = inet_unhash,
- .accept = inet_csk_accept,
- .get_port = inet_csk_get_port,
- .shutdown = dccp_shutdown,
- .destroy = dccp_destroy_sock,
- .orphan_count = &dccp_orphan_count,
- .max_header = MAX_DCCP_HEADER,
- .obj_size = sizeof(struct dccp6_sock),
- .ipv6_pinfo_offset = offsetof(struct dccp6_sock, inet6),
- .slab_flags = SLAB_TYPESAFE_BY_RCU,
- .rsk_prot = &dccp6_request_sock_ops,
- .twsk_prot = &dccp6_timewait_sock_ops,
- .h.hashinfo = &dccp_hashinfo,
-};
-
-static const struct inet6_protocol dccp_v6_protocol = {
- .handler = dccp_v6_rcv,
- .err_handler = dccp_v6_err,
- .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
-};
-
-static const struct proto_ops inet6_dccp_ops = {
- .family = PF_INET6,
- .owner = THIS_MODULE,
- .release = inet6_release,
- .bind = inet6_bind,
- .connect = inet_stream_connect,
- .socketpair = sock_no_socketpair,
- .accept = inet_accept,
- .getname = inet6_getname,
- .poll = dccp_poll,
- .ioctl = inet6_ioctl,
- .gettstamp = sock_gettstamp,
- .listen = inet_dccp_listen,
- .shutdown = inet_shutdown,
- .setsockopt = sock_common_setsockopt,
- .getsockopt = sock_common_getsockopt,
- .sendmsg = inet_sendmsg,
- .recvmsg = sock_common_recvmsg,
- .mmap = sock_no_mmap,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = inet6_compat_ioctl,
-#endif
-};
-
-static struct inet_protosw dccp_v6_protosw = {
- .type = SOCK_DCCP,
- .protocol = IPPROTO_DCCP,
- .prot = &dccp_v6_prot,
- .ops = &inet6_dccp_ops,
- .flags = INET_PROTOSW_ICSK,
-};
-
-static int __net_init dccp_v6_init_net(struct net *net)
-{
- struct dccp_v6_pernet *pn = net_generic(net, dccp_v6_pernet_id);
-
- if (dccp_hashinfo.bhash == NULL)
- return -ESOCKTNOSUPPORT;
-
- return inet_ctl_sock_create(&pn->v6_ctl_sk, PF_INET6,
- SOCK_DCCP, IPPROTO_DCCP, net);
-}
-
-static void __net_exit dccp_v6_exit_net(struct net *net)
-{
- struct dccp_v6_pernet *pn = net_generic(net, dccp_v6_pernet_id);
-
- inet_ctl_sock_destroy(pn->v6_ctl_sk);
-}
-
-static struct pernet_operations dccp_v6_ops = {
- .init = dccp_v6_init_net,
- .exit = dccp_v6_exit_net,
- .id = &dccp_v6_pernet_id,
- .size = sizeof(struct dccp_v6_pernet),
-};
-
-static int __init dccp_v6_init(void)
-{
- int err = proto_register(&dccp_v6_prot, 1);
-
- if (err)
- goto out;
-
- inet6_register_protosw(&dccp_v6_protosw);
-
- err = register_pernet_subsys(&dccp_v6_ops);
- if (err)
- goto out_destroy_ctl_sock;
-
- err = inet6_add_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
- if (err)
- goto out_unregister_proto;
-
-out:
- return err;
-out_unregister_proto:
- unregister_pernet_subsys(&dccp_v6_ops);
-out_destroy_ctl_sock:
- inet6_unregister_protosw(&dccp_v6_protosw);
- proto_unregister(&dccp_v6_prot);
- goto out;
-}
-
-static void __exit dccp_v6_exit(void)
-{
- inet6_del_protocol(&dccp_v6_protocol, IPPROTO_DCCP);
- unregister_pernet_subsys(&dccp_v6_ops);
- inet6_unregister_protosw(&dccp_v6_protosw);
- proto_unregister(&dccp_v6_prot);
-}
-
-module_init(dccp_v6_init);
-module_exit(dccp_v6_exit);
-
-/*
- * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
- * values directly, Also cover the case where the protocol is not specified,
- * i.e. net-pf-PF_INET6-proto-0-type-SOCK_DCCP
- */
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 33, 6);
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 0, 6);
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
-MODULE_DESCRIPTION("DCCPv6 - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/ipv6.h b/net/dccp/ipv6.h
deleted file mode 100644
index c5d14c48def1..000000000000
--- a/net/dccp/ipv6.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-#ifndef _DCCP_IPV6_H
-#define _DCCP_IPV6_H
-/*
- * net/dccp/ipv6.h
- *
- * An implementation of the DCCP protocol
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
- */
-
-#include <linux/dccp.h>
-#include <linux/ipv6.h>
-
-struct dccp6_sock {
- struct dccp_sock dccp;
- struct ipv6_pinfo inet6;
-};
-
-struct dccp6_request_sock {
- struct dccp_request_sock dccp;
-};
-
-struct dccp6_timewait_sock {
- struct inet_timewait_sock inet;
-};
-
-#endif /* _DCCP_IPV6_H */
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
deleted file mode 100644
index fecc8190064f..000000000000
--- a/net/dccp/minisocks.c
+++ /dev/null
@@ -1,266 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/dccp/minisocks.c
- *
- * An implementation of the DCCP protocol
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-
-#include <linux/dccp.h>
-#include <linux/gfp.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/timer.h>
-
-#include <net/sock.h>
-#include <net/xfrm.h>
-#include <net/inet_timewait_sock.h>
-#include <net/rstreason.h>
-
-#include "ackvec.h"
-#include "ccid.h"
-#include "dccp.h"
-#include "feat.h"
-
-struct inet_timewait_death_row dccp_death_row = {
- .tw_refcount = REFCOUNT_INIT(1),
- .sysctl_max_tw_buckets = NR_FILE * 2,
- .hashinfo = &dccp_hashinfo,
-};
-
-EXPORT_SYMBOL_GPL(dccp_death_row);
-
-void dccp_time_wait(struct sock *sk, int state, int timeo)
-{
- struct inet_timewait_sock *tw;
-
- tw = inet_twsk_alloc(sk, &dccp_death_row, state);
-
- if (tw != NULL) {
- const struct inet_connection_sock *icsk = inet_csk(sk);
- const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
-#if IS_ENABLED(CONFIG_IPV6)
- if (tw->tw_family == PF_INET6) {
- tw->tw_v6_daddr = sk->sk_v6_daddr;
- tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- tw->tw_ipv6only = sk->sk_ipv6only;
- }
-#endif
-
- /* Get the TIME_WAIT timeout firing. */
- if (timeo < rto)
- timeo = rto;
-
- if (state == DCCP_TIME_WAIT)
- timeo = DCCP_TIMEWAIT_LEN;
-
- /* Linkage updates.
- * Note that access to tw after this point is illegal.
- */
- inet_twsk_hashdance_schedule(tw, sk, &dccp_hashinfo, timeo);
- } else {
- /* Sorry, if we're out of memory, just CLOSE this
- * socket up. We've got bigger problems than
- * non-graceful socket closings.
- */
- DCCP_WARN("time wait bucket table overflow\n");
- }
-
- dccp_done(sk);
-}
-
-struct sock *dccp_create_openreq_child(const struct sock *sk,
- const struct request_sock *req,
- const struct sk_buff *skb)
-{
- /*
- * Step 3: Process LISTEN state
- *
- * (* Generate a new socket and switch to that socket *)
- * Set S := new socket for this port pair
- */
- struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
-
- if (newsk != NULL) {
- struct dccp_request_sock *dreq = dccp_rsk(req);
- struct inet_connection_sock *newicsk = inet_csk(newsk);
- struct dccp_sock *newdp = dccp_sk(newsk);
-
- newdp->dccps_role = DCCP_ROLE_SERVER;
- newdp->dccps_hc_rx_ackvec = NULL;
- newdp->dccps_service_list = NULL;
- newdp->dccps_hc_rx_ccid = NULL;
- newdp->dccps_hc_tx_ccid = NULL;
- newdp->dccps_service = dreq->dreq_service;
- newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo;
- newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
- newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
-
- INIT_LIST_HEAD(&newdp->dccps_featneg);
- /*
- * Step 3: Process LISTEN state
- *
- * Choose S.ISS (initial seqno) or set from Init Cookies
- * Initialize S.GAR := S.ISS
- * Set S.ISR, S.GSR from packet (or Init Cookies)
- *
- * Setting AWL/AWH and SWL/SWH happens as part of the feature
- * activation below, as these windows all depend on the local
- * and remote Sequence Window feature values (7.5.2).
- */
- newdp->dccps_iss = dreq->dreq_iss;
- newdp->dccps_gss = dreq->dreq_gss;
- newdp->dccps_gar = newdp->dccps_iss;
- newdp->dccps_isr = dreq->dreq_isr;
- newdp->dccps_gsr = dreq->dreq_gsr;
-
- /*
- * Activate features: initialise CCIDs, sequence windows etc.
- */
- if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
- sk_free_unlock_clone(newsk);
- return NULL;
- }
- dccp_init_xmit_timers(newsk);
-
- __DCCP_INC_STATS(DCCP_MIB_PASSIVEOPENS);
- }
- return newsk;
-}
-
-EXPORT_SYMBOL_GPL(dccp_create_openreq_child);
-
-/*
- * Process an incoming packet for RESPOND sockets represented
- * as an request_sock.
- */
-struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req)
-{
- struct sock *child = NULL;
- struct dccp_request_sock *dreq = dccp_rsk(req);
- bool own_req;
-
- /* TCP/DCCP listeners became lockless.
- * DCCP stores complex state in its request_sock, so we need
- * a protection for them, now this code runs without being protected
- * by the parent (listener) lock.
- */
- spin_lock_bh(&dreq->dreq_lock);
-
- /* Check for retransmitted REQUEST */
- if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
-
- if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dreq->dreq_gsr)) {
- dccp_pr_debug("Retransmitted REQUEST\n");
- dreq->dreq_gsr = DCCP_SKB_CB(skb)->dccpd_seq;
- /*
- * Send another RESPONSE packet
- * To protect against Request floods, increment retrans
- * counter (backoff, monitored by dccp_response_timer).
- */
- inet_rtx_syn_ack(sk, req);
- }
- /* Network Duplicate, discard packet */
- goto out;
- }
-
- DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
-
- if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK &&
- dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK)
- goto drop;
-
- /* Invalid ACK */
- if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
- dreq->dreq_iss, dreq->dreq_gss)) {
- dccp_pr_debug("Invalid ACK number: ack_seq=%llu, "
- "dreq_iss=%llu, dreq_gss=%llu\n",
- (unsigned long long)
- DCCP_SKB_CB(skb)->dccpd_ack_seq,
- (unsigned long long) dreq->dreq_iss,
- (unsigned long long) dreq->dreq_gss);
- goto drop;
- }
-
- if (dccp_parse_options(sk, dreq, skb))
- goto drop;
-
- child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL,
- req, &own_req);
- if (child) {
- child = inet_csk_complete_hashdance(sk, child, req, own_req);
- goto out;
- }
-
- DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
-drop:
- if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
- req->rsk_ops->send_reset(sk, skb, SK_RST_REASON_NOT_SPECIFIED);
-
- inet_csk_reqsk_queue_drop(sk, req);
-out:
- spin_unlock_bh(&dreq->dreq_lock);
- return child;
-}
-
-EXPORT_SYMBOL_GPL(dccp_check_req);
-
-/*
- * Queue segment on the new socket if the new socket is active,
- * otherwise we just shortcircuit this and continue with
- * the new socket.
- */
-int dccp_child_process(struct sock *parent, struct sock *child,
- struct sk_buff *skb)
- __releases(child)
-{
- int ret = 0;
- const int state = child->sk_state;
-
- if (!sock_owned_by_user(child)) {
- ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb),
- skb->len);
-
- /* Wakeup parent, send SIGIO */
- if (state == DCCP_RESPOND && child->sk_state != state)
- parent->sk_data_ready(parent);
- } else {
- /* Alas, it is possible again, because we do lookup
- * in main socket hash table and lock on listening
- * socket does not protect us more.
- */
- __sk_add_backlog(child, skb);
- }
-
- bh_unlock_sock(child);
- sock_put(child);
- return ret;
-}
-
-EXPORT_SYMBOL_GPL(dccp_child_process);
-
-void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
- struct request_sock *rsk)
-{
- DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state");
-}
-
-EXPORT_SYMBOL_GPL(dccp_reqsk_send_ack);
-
-int dccp_reqsk_init(struct request_sock *req,
- struct dccp_sock const *dp, struct sk_buff const *skb)
-{
- struct dccp_request_sock *dreq = dccp_rsk(req);
-
- spin_lock_init(&dreq->dreq_lock);
- inet_rsk(req)->ir_rmt_port = dccp_hdr(skb)->dccph_sport;
- inet_rsk(req)->ir_num = ntohs(dccp_hdr(skb)->dccph_dport);
- inet_rsk(req)->acked = 0;
- dreq->dreq_timestamp_echo = 0;
-
- /* inherit feature negotiation options from listening socket */
- return dccp_feat_clone_list(&dp->dccps_featneg, &dreq->dreq_featneg);
-}
-
-EXPORT_SYMBOL_GPL(dccp_reqsk_init);
diff --git a/net/dccp/options.c b/net/dccp/options.c
deleted file mode 100644
index db62d4767024..000000000000
--- a/net/dccp/options.c
+++ /dev/null
@@ -1,609 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/dccp/options.c
- *
- * An implementation of the DCCP protocol
- * Copyright (c) 2005 Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
- * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
- * Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
- */
-#include <linux/dccp.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/unaligned.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-
-#include "ackvec.h"
-#include "ccid.h"
-#include "dccp.h"
-#include "feat.h"
-
-u64 dccp_decode_value_var(const u8 *bf, const u8 len)
-{
- u64 value = 0;
-
- if (len >= DCCP_OPTVAL_MAXLEN)
- value += ((u64)*bf++) << 40;
- if (len > 4)
- value += ((u64)*bf++) << 32;
- if (len > 3)
- value += ((u64)*bf++) << 24;
- if (len > 2)
- value += ((u64)*bf++) << 16;
- if (len > 1)
- value += ((u64)*bf++) << 8;
- if (len > 0)
- value += *bf;
-
- return value;
-}
-
-/**
- * dccp_parse_options - Parse DCCP options present in @skb
- * @sk: client|server|listening dccp socket (when @dreq != NULL)
- * @dreq: request socket to use during connection setup, or NULL
- * @skb: frame to parse
- */
-int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
- struct sk_buff *skb)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- const struct dccp_hdr *dh = dccp_hdr(skb);
- const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
- unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
- unsigned char *opt_ptr = options;
- const unsigned char *opt_end = (unsigned char *)dh +
- (dh->dccph_doff * 4);
- struct dccp_options_received *opt_recv = &dp->dccps_options_received;
- unsigned char opt, len;
- unsigned char *value;
- u32 elapsed_time;
- __be32 opt_val;
- int rc;
- int mandatory = 0;
-
- memset(opt_recv, 0, sizeof(*opt_recv));
-
- opt = len = 0;
- while (opt_ptr != opt_end) {
- opt = *opt_ptr++;
- len = 0;
- value = NULL;
-
- /* Check if this isn't a single byte option */
- if (opt > DCCPO_MAX_RESERVED) {
- if (opt_ptr == opt_end)
- goto out_nonsensical_length;
-
- len = *opt_ptr++;
- if (len < 2)
- goto out_nonsensical_length;
- /*
- * Remove the type and len fields, leaving
- * just the value size
- */
- len -= 2;
- value = opt_ptr;
- opt_ptr += len;
-
- if (opt_ptr > opt_end)
- goto out_nonsensical_length;
- }
-
- /*
- * CCID-specific options are ignored during connection setup, as
- * negotiation may still be in progress (see RFC 4340, 10.3).
- * The same applies to Ack Vectors, as these depend on the CCID.
- */
- if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC ||
- opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1))
- goto ignore_option;
-
- switch (opt) {
- case DCCPO_PADDING:
- break;
- case DCCPO_MANDATORY:
- if (mandatory)
- goto out_invalid_option;
- if (pkt_type != DCCP_PKT_DATA)
- mandatory = 1;
- break;
- case DCCPO_NDP_COUNT:
- if (len > 6)
- goto out_invalid_option;
-
- opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
- dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk),
- (unsigned long long)opt_recv->dccpor_ndp);
- break;
- case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R:
- if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */
- break;
- if (len == 0)
- goto out_invalid_option;
- rc = dccp_feat_parse_options(sk, dreq, mandatory, opt,
- *value, value + 1, len - 1);
- if (rc)
- goto out_featneg_failed;
- break;
- case DCCPO_TIMESTAMP:
- if (len != 4)
- goto out_invalid_option;
- /*
- * RFC 4340 13.1: "The precise time corresponding to
- * Timestamp Value zero is not specified". We use
- * zero to indicate absence of a meaningful timestamp.
- */
- opt_val = get_unaligned((__be32 *)value);
- if (unlikely(opt_val == 0)) {
- DCCP_WARN("Timestamp with zero value\n");
- break;
- }
-
- if (dreq != NULL) {
- dreq->dreq_timestamp_echo = ntohl(opt_val);
- dreq->dreq_timestamp_time = dccp_timestamp();
- } else {
- opt_recv->dccpor_timestamp =
- dp->dccps_timestamp_echo = ntohl(opt_val);
- dp->dccps_timestamp_time = dccp_timestamp();
- }
- dccp_pr_debug("%s rx opt: TIMESTAMP=%u, ackno=%llu\n",
- dccp_role(sk), ntohl(opt_val),
- (unsigned long long)
- DCCP_SKB_CB(skb)->dccpd_ack_seq);
- /* schedule an Ack in case this sender is quiescent */
- inet_csk_schedule_ack(sk);
- break;
- case DCCPO_TIMESTAMP_ECHO:
- if (len != 4 && len != 6 && len != 8)
- goto out_invalid_option;
-
- opt_val = get_unaligned((__be32 *)value);
- opt_recv->dccpor_timestamp_echo = ntohl(opt_val);
-
- dccp_pr_debug("%s rx opt: TIMESTAMP_ECHO=%u, len=%d, "
- "ackno=%llu", dccp_role(sk),
- opt_recv->dccpor_timestamp_echo,
- len + 2,
- (unsigned long long)
- DCCP_SKB_CB(skb)->dccpd_ack_seq);
-
- value += 4;
-
- if (len == 4) { /* no elapsed time included */
- dccp_pr_debug_cat("\n");
- break;
- }
-
- if (len == 6) { /* 2-byte elapsed time */
- __be16 opt_val2 = get_unaligned((__be16 *)value);
- elapsed_time = ntohs(opt_val2);
- } else { /* 4-byte elapsed time */
- opt_val = get_unaligned((__be32 *)value);
- elapsed_time = ntohl(opt_val);
- }
-
- dccp_pr_debug_cat(", ELAPSED_TIME=%u\n", elapsed_time);
-
- /* Give precedence to the biggest ELAPSED_TIME */
- if (elapsed_time > opt_recv->dccpor_elapsed_time)
- opt_recv->dccpor_elapsed_time = elapsed_time;
- break;
- case DCCPO_ELAPSED_TIME:
- if (dccp_packet_without_ack(skb)) /* RFC 4340, 13.2 */
- break;
-
- if (len == 2) {
- __be16 opt_val2 = get_unaligned((__be16 *)value);
- elapsed_time = ntohs(opt_val2);
- } else if (len == 4) {
- opt_val = get_unaligned((__be32 *)value);
- elapsed_time = ntohl(opt_val);
- } else {
- goto out_invalid_option;
- }
-
- if (elapsed_time > opt_recv->dccpor_elapsed_time)
- opt_recv->dccpor_elapsed_time = elapsed_time;
-
- dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
- dccp_role(sk), elapsed_time);
- break;
- case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC:
- if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
- pkt_type, opt, value, len))
- goto out_invalid_option;
- break;
- case DCCPO_ACK_VECTOR_0:
- case DCCPO_ACK_VECTOR_1:
- if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */
- break;
- /*
- * Ack vectors are processed by the TX CCID if it is
- * interested. The RX CCID need not parse Ack Vectors,
- * since it is only interested in clearing old state.
- */
- fallthrough;
- case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
- if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
- pkt_type, opt, value, len))
- goto out_invalid_option;
- break;
- default:
- DCCP_CRIT("DCCP(%p): option %d(len=%d) not "
- "implemented, ignoring", sk, opt, len);
- break;
- }
-ignore_option:
- if (opt != DCCPO_MANDATORY)
- mandatory = 0;
- }
-
- /* mandatory was the last byte in option list -> reset connection */
- if (mandatory)
- goto out_invalid_option;
-
-out_nonsensical_length:
- /* RFC 4340, 5.8: ignore option and all remaining option space */
- return 0;
-
-out_invalid_option:
- DCCP_INC_STATS(DCCP_MIB_INVALIDOPT);
- rc = DCCP_RESET_CODE_OPTION_ERROR;
-out_featneg_failed:
- DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc);
- DCCP_SKB_CB(skb)->dccpd_reset_code = rc;
- DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt;
- DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0;
- DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0;
- return -1;
-}
-
-EXPORT_SYMBOL_GPL(dccp_parse_options);
-
-void dccp_encode_value_var(const u64 value, u8 *to, const u8 len)
-{
- if (len >= DCCP_OPTVAL_MAXLEN)
- *to++ = (value & 0xFF0000000000ull) >> 40;
- if (len > 4)
- *to++ = (value & 0xFF00000000ull) >> 32;
- if (len > 3)
- *to++ = (value & 0xFF000000) >> 24;
- if (len > 2)
- *to++ = (value & 0xFF0000) >> 16;
- if (len > 1)
- *to++ = (value & 0xFF00) >> 8;
- if (len > 0)
- *to++ = (value & 0xFF);
-}
-
-static inline u8 dccp_ndp_len(const u64 ndp)
-{
- if (likely(ndp <= 0xFF))
- return 1;
- return likely(ndp <= USHRT_MAX) ? 2 : (ndp <= UINT_MAX ? 4 : 6);
-}
-
-int dccp_insert_option(struct sk_buff *skb, const unsigned char option,
- const void *value, const unsigned char len)
-{
- unsigned char *to;
-
- if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN)
- return -1;
-
- DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2;
-
- to = skb_push(skb, len + 2);
- *to++ = option;
- *to++ = len + 2;
-
- memcpy(to, value, len);
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(dccp_insert_option);
-
-static int dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- u64 ndp = dp->dccps_ndp_count;
-
- if (dccp_non_data_packet(skb))
- ++dp->dccps_ndp_count;
- else
- dp->dccps_ndp_count = 0;
-
- if (ndp > 0) {
- unsigned char *ptr;
- const int ndp_len = dccp_ndp_len(ndp);
- const int len = ndp_len + 2;
-
- if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
- return -1;
-
- DCCP_SKB_CB(skb)->dccpd_opt_len += len;
-
- ptr = skb_push(skb, len);
- *ptr++ = DCCPO_NDP_COUNT;
- *ptr++ = len;
- dccp_encode_value_var(ndp, ptr, ndp_len);
- }
-
- return 0;
-}
-
-static inline int dccp_elapsed_time_len(const u32 elapsed_time)
-{
- return elapsed_time == 0 ? 0 : elapsed_time <= 0xFFFF ? 2 : 4;
-}
-
-static int dccp_insert_option_timestamp(struct sk_buff *skb)
-{
- __be32 now = htonl(dccp_timestamp());
- /* yes this will overflow but that is the point as we want a
- * 10 usec 32 bit timer which mean it wraps every 11.9 hours */
-
- return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now));
-}
-
-static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
- struct dccp_request_sock *dreq,
- struct sk_buff *skb)
-{
- __be32 tstamp_echo;
- unsigned char *to;
- u32 elapsed_time, elapsed_time_len, len;
-
- if (dreq != NULL) {
- elapsed_time = dccp_timestamp() - dreq->dreq_timestamp_time;
- tstamp_echo = htonl(dreq->dreq_timestamp_echo);
- dreq->dreq_timestamp_echo = 0;
- } else {
- elapsed_time = dccp_timestamp() - dp->dccps_timestamp_time;
- tstamp_echo = htonl(dp->dccps_timestamp_echo);
- dp->dccps_timestamp_echo = 0;
- }
-
- elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
- len = 6 + elapsed_time_len;
-
- if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
- return -1;
-
- DCCP_SKB_CB(skb)->dccpd_opt_len += len;
-
- to = skb_push(skb, len);
- *to++ = DCCPO_TIMESTAMP_ECHO;
- *to++ = len;
-
- memcpy(to, &tstamp_echo, 4);
- to += 4;
-
- if (elapsed_time_len == 2) {
- const __be16 var16 = htons((u16)elapsed_time);
- memcpy(to, &var16, 2);
- } else if (elapsed_time_len == 4) {
- const __be32 var32 = htonl(elapsed_time);
- memcpy(to, &var32, 4);
- }
-
- return 0;
-}
-
-static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
- struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
- const u16 buflen = dccp_ackvec_buflen(av);
- /* Figure out how many options do we need to represent the ackvec */
- const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN);
- u16 len = buflen + 2 * nr_opts;
- u8 i, nonce = 0;
- const unsigned char *tail, *from;
- unsigned char *to;
-
- if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
- DCCP_WARN("Lacking space for %u bytes on %s packet\n", len,
- dccp_packet_name(dcb->dccpd_type));
- return -1;
- }
- /*
- * Since Ack Vectors are variable-length, we can not always predict
- * their size. To catch exception cases where the space is running out
- * on the skb, a separate Sync is scheduled to carry the Ack Vector.
- */
- if (len > DCCPAV_MIN_OPTLEN &&
- len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) {
- DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), "
- "MPS=%u ==> reduce payload size?\n", len, skb->len,
- dcb->dccpd_opt_len, dp->dccps_mss_cache);
- dp->dccps_sync_scheduled = 1;
- return 0;
- }
- dcb->dccpd_opt_len += len;
-
- to = skb_push(skb, len);
- len = buflen;
- from = av->av_buf + av->av_buf_head;
- tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
-
- for (i = 0; i < nr_opts; ++i) {
- int copylen = len;
-
- if (len > DCCP_SINGLE_OPT_MAXLEN)
- copylen = DCCP_SINGLE_OPT_MAXLEN;
-
- /*
- * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via
- * its type; ack_nonce is the sum of all individual buf_nonce's.
- */
- nonce ^= av->av_buf_nonce[i];
-
- *to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i];
- *to++ = copylen + 2;
-
- /* Check if buf_head wraps */
- if (from + copylen > tail) {
- const u16 tailsize = tail - from;
-
- memcpy(to, from, tailsize);
- to += tailsize;
- len -= tailsize;
- copylen -= tailsize;
- from = av->av_buf;
- }
-
- memcpy(to, from, copylen);
- from += copylen;
- to += copylen;
- len -= copylen;
- }
- /*
- * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340.
- */
- if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce))
- return -ENOBUFS;
- return 0;
-}
-
-/**
- * dccp_insert_option_mandatory - Mandatory option (5.8.2)
- * @skb: frame into which to insert option
- *
- * Note that since we are using skb_push, this function needs to be called
- * _after_ inserting the option it is supposed to influence (stack order).
- */
-int dccp_insert_option_mandatory(struct sk_buff *skb)
-{
- if (DCCP_SKB_CB(skb)->dccpd_opt_len >= DCCP_MAX_OPT_LEN)
- return -1;
-
- DCCP_SKB_CB(skb)->dccpd_opt_len++;
- *(u8 *)skb_push(skb, 1) = DCCPO_MANDATORY;
- return 0;
-}
-
-/**
- * dccp_insert_fn_opt - Insert single Feature-Negotiation option into @skb
- * @skb: frame to insert feature negotiation option into
- * @type: %DCCPO_CHANGE_L, %DCCPO_CHANGE_R, %DCCPO_CONFIRM_L, %DCCPO_CONFIRM_R
- * @feat: one out of %dccp_feature_numbers
- * @val: NN value or SP array (preferred element first) to copy
- * @len: true length of @val in bytes (excluding first element repetition)
- * @repeat_first: whether to copy the first element of @val twice
- *
- * The last argument is used to construct Confirm options, where the preferred
- * value and the preference list appear separately (RFC 4340, 6.3.1). Preference
- * lists are kept such that the preferred entry is always first, so we only need
- * to copy twice, and avoid the overhead of cloning into a bigger array.
- */
-int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
- u8 *val, u8 len, bool repeat_first)
-{
- u8 tot_len, *to;
-
- /* take the `Feature' field and possible repetition into account */
- if (len > (DCCP_SINGLE_OPT_MAXLEN - 2)) {
- DCCP_WARN("length %u for feature %u too large\n", len, feat);
- return -1;
- }
-
- if (unlikely(val == NULL || len == 0))
- len = repeat_first = false;
- tot_len = 3 + repeat_first + len;
-
- if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) {
- DCCP_WARN("packet too small for feature %d option!\n", feat);
- return -1;
- }
- DCCP_SKB_CB(skb)->dccpd_opt_len += tot_len;
-
- to = skb_push(skb, tot_len);
- *to++ = type;
- *to++ = tot_len;
- *to++ = feat;
-
- if (repeat_first)
- *to++ = *val;
- if (len)
- memcpy(to, val, len);
- return 0;
-}
-
-/* The length of all options needs to be a multiple of 4 (5.8) */
-static void dccp_insert_option_padding(struct sk_buff *skb)
-{
- int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
-
- if (padding != 0) {
- padding = 4 - padding;
- memset(skb_push(skb, padding), 0, padding);
- DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
- }
-}
-
-int dccp_insert_options(struct sock *sk, struct sk_buff *skb)
-{
- struct dccp_sock *dp = dccp_sk(sk);
-
- DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
-
- if (dp->dccps_send_ndp_count && dccp_insert_option_ndp(sk, skb))
- return -1;
-
- if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) {
-
- /* Feature Negotiation */
- if (dccp_feat_insert_opts(dp, NULL, skb))
- return -1;
-
- if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) {
- /*
- * Obtain RTT sample from Request/Response exchange.
- * This is currently used for TFRC initialisation.
- */
- if (dccp_insert_option_timestamp(skb))
- return -1;
-
- } else if (dccp_ackvec_pending(sk) &&
- dccp_insert_option_ackvec(sk, skb)) {
- return -1;
- }
- }
-
- if (dp->dccps_hc_rx_insert_options) {
- if (ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb))
- return -1;
- dp->dccps_hc_rx_insert_options = 0;
- }
-
- if (dp->dccps_timestamp_echo != 0 &&
- dccp_insert_option_timestamp_echo(dp, NULL, skb))
- return -1;
-
- dccp_insert_option_padding(skb);
- return 0;
-}
-
-int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb)
-{
- DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
-
- if (dccp_feat_insert_opts(NULL, dreq, skb))
- return -1;
-
- /* Obtain RTT sample from Response/Ack exchange (used by TFRC). */
- if (dccp_insert_option_timestamp(skb))
- return -1;
-
- if (dreq->dreq_timestamp_echo != 0 &&
- dccp_insert_option_timestamp_echo(NULL, dreq, skb))
- return -1;
-
- dccp_insert_option_padding(skb);
- return 0;
-}
diff --git a/net/dccp/output.c b/net/dccp/output.c
deleted file mode 100644
index 39cf3430177a..000000000000
--- a/net/dccp/output.c
+++ /dev/null
@@ -1,708 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/dccp/output.c
- *
- * An implementation of the DCCP protocol
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-
-#include <linux/dccp.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/sched/signal.h>
-
-#include <net/inet_sock.h>
-#include <net/sock.h>
-
-#include "ackvec.h"
-#include "ccid.h"
-#include "dccp.h"
-
-static inline void dccp_event_ack_sent(struct sock *sk)
-{
- inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
-}
-
-/* enqueue @skb on sk_send_head for retransmission, return clone to send now */
-static struct sk_buff *dccp_skb_entail(struct sock *sk, struct sk_buff *skb)
-{
- skb_set_owner_w(skb, sk);
- WARN_ON(sk->sk_send_head);
- sk->sk_send_head = skb;
- return skb_clone(sk->sk_send_head, gfp_any());
-}
-
-/*
- * All SKB's seen here are completely headerless. It is our
- * job to build the DCCP header, and pass the packet down to
- * IP so it can do the same plus pass the packet off to the
- * device.
- */
-static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
-{
- if (likely(skb != NULL)) {
- struct inet_sock *inet = inet_sk(sk);
- const struct inet_connection_sock *icsk = inet_csk(sk);
- struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
- struct dccp_hdr *dh;
- /* XXX For now we're using only 48 bits sequence numbers */
- const u32 dccp_header_size = sizeof(*dh) +
- sizeof(struct dccp_hdr_ext) +
- dccp_packet_hdr_len(dcb->dccpd_type);
- int err, set_ack = 1;
- u64 ackno = dp->dccps_gsr;
- /*
- * Increment GSS here already in case the option code needs it.
- * Update GSS for real only if option processing below succeeds.
- */
- dcb->dccpd_seq = ADD48(dp->dccps_gss, 1);
-
- switch (dcb->dccpd_type) {
- case DCCP_PKT_DATA:
- set_ack = 0;
- fallthrough;
- case DCCP_PKT_DATAACK:
- case DCCP_PKT_RESET:
- break;
-
- case DCCP_PKT_REQUEST:
- set_ack = 0;
- /* Use ISS on the first (non-retransmitted) Request. */
- if (icsk->icsk_retransmits == 0)
- dcb->dccpd_seq = dp->dccps_iss;
- fallthrough;
-
- case DCCP_PKT_SYNC:
- case DCCP_PKT_SYNCACK:
- ackno = dcb->dccpd_ack_seq;
- fallthrough;
- default:
- /*
- * Set owner/destructor: some skbs are allocated via
- * alloc_skb (e.g. when retransmission may happen).
- * Only Data, DataAck, and Reset packets should come
- * through here with skb->sk set.
- */
- WARN_ON(skb->sk);
- skb_set_owner_w(skb, sk);
- break;
- }
-
- if (dccp_insert_options(sk, skb)) {
- kfree_skb(skb);
- return -EPROTO;
- }
-
-
- /* Build DCCP header and checksum it. */
- dh = dccp_zeroed_hdr(skb, dccp_header_size);
- dh->dccph_type = dcb->dccpd_type;
- dh->dccph_sport = inet->inet_sport;
- dh->dccph_dport = inet->inet_dport;
- dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4;
- dh->dccph_ccval = dcb->dccpd_ccval;
- dh->dccph_cscov = dp->dccps_pcslen;
- /* XXX For now we're using only 48 bits sequence numbers */
- dh->dccph_x = 1;
-
- dccp_update_gss(sk, dcb->dccpd_seq);
- dccp_hdr_set_seq(dh, dp->dccps_gss);
- if (set_ack)
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
-
- switch (dcb->dccpd_type) {
- case DCCP_PKT_REQUEST:
- dccp_hdr_request(skb)->dccph_req_service =
- dp->dccps_service;
- /*
- * Limit Ack window to ISS <= P.ackno <= GSS, so that
- * only Responses to Requests we sent are considered.
- */
- dp->dccps_awl = dp->dccps_iss;
- break;
- case DCCP_PKT_RESET:
- dccp_hdr_reset(skb)->dccph_reset_code =
- dcb->dccpd_reset_code;
- break;
- }
-
- icsk->icsk_af_ops->send_check(sk, skb);
-
- if (set_ack)
- dccp_event_ack_sent(sk);
-
- DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
-
- err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
- return net_xmit_eval(err);
- }
- return -ENOBUFS;
-}
-
-/**
- * dccp_determine_ccmps - Find out about CCID-specific packet-size limits
- * @dp: socket to find packet size limits of
- *
- * We only consider the HC-sender CCID for setting the CCMPS (RFC 4340, 14.),
- * since the RX CCID is restricted to feedback packets (Acks), which are small
- * in comparison with the data traffic. A value of 0 means "no current CCMPS".
- */
-static u32 dccp_determine_ccmps(const struct dccp_sock *dp)
-{
- const struct ccid *tx_ccid = dp->dccps_hc_tx_ccid;
-
- if (tx_ccid == NULL || tx_ccid->ccid_ops == NULL)
- return 0;
- return tx_ccid->ccid_ops->ccid_ccmps;
-}
-
-unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct dccp_sock *dp = dccp_sk(sk);
- u32 ccmps = dccp_determine_ccmps(dp);
- u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
-
- /* Account for header lengths and IPv4/v6 option overhead */
- cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len +
- sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext));
-
- /*
- * Leave enough headroom for common DCCP header options.
- * This only considers options which may appear on DCCP-Data packets, as
- * per table 3 in RFC 4340, 5.8. When running out of space for other
- * options (eg. Ack Vector which can take up to 255 bytes), it is better
- * to schedule a separate Ack. Thus we leave headroom for the following:
- * - 1 byte for Slow Receiver (11.6)
- * - 6 bytes for Timestamp (13.1)
- * - 10 bytes for Timestamp Echo (13.3)
- * - 8 bytes for NDP count (7.7, when activated)
- * - 6 bytes for Data Checksum (9.3)
- * - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled)
- */
- cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 +
- (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4);
-
- /* And store cached results */
- icsk->icsk_pmtu_cookie = pmtu;
- WRITE_ONCE(dp->dccps_mss_cache, cur_mps);
-
- return cur_mps;
-}
-
-EXPORT_SYMBOL_GPL(dccp_sync_mss);
-
-void dccp_write_space(struct sock *sk)
-{
- struct socket_wq *wq;
-
- rcu_read_lock();
- wq = rcu_dereference(sk->sk_wq);
- if (skwq_has_sleeper(wq))
- wake_up_interruptible(&wq->wait);
- /* Should agree with poll, otherwise some programs break */
- if (sock_writeable(sk))
- sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT);
-
- rcu_read_unlock();
-}
-
-/**
- * dccp_wait_for_ccid - Await CCID send permission
- * @sk: socket to wait for
- * @delay: timeout in jiffies
- *
- * This is used by CCIDs which need to delay the send time in process context.
- */
-static int dccp_wait_for_ccid(struct sock *sk, unsigned long delay)
-{
- DEFINE_WAIT(wait);
- long remaining;
-
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- sk->sk_write_pending++;
- release_sock(sk);
-
- remaining = schedule_timeout(delay);
-
- lock_sock(sk);
- sk->sk_write_pending--;
- finish_wait(sk_sleep(sk), &wait);
-
- if (signal_pending(current) || sk->sk_err)
- return -1;
- return remaining;
-}
-
-/**
- * dccp_xmit_packet - Send data packet under control of CCID
- * @sk: socket to send data packet on
- *
- * Transmits next-queued payload and informs CCID to account for the packet.
- */
-static void dccp_xmit_packet(struct sock *sk)
-{
- int err, len;
- struct dccp_sock *dp = dccp_sk(sk);
- struct sk_buff *skb = dccp_qpolicy_pop(sk);
-
- if (unlikely(skb == NULL))
- return;
- len = skb->len;
-
- if (sk->sk_state == DCCP_PARTOPEN) {
- const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
- /*
- * See 8.1.5 - Handshake Completion.
- *
- * For robustness we resend Confirm options until the client has
- * entered OPEN. During the initial feature negotiation, the MPS
- * is smaller than usual, reduced by the Change/Confirm options.
- */
- if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
- DCCP_WARN("Payload too large (%d) for featneg.\n", len);
- dccp_send_ack(sk);
- dccp_feat_list_purge(&dp->dccps_featneg);
- }
-
- inet_csk_schedule_ack(sk);
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
- inet_csk(sk)->icsk_rto,
- DCCP_RTO_MAX);
- DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
- } else if (dccp_ack_pending(sk)) {
- DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATAACK;
- } else {
- DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_DATA;
- }
-
- err = dccp_transmit_skb(sk, skb);
- if (err)
- dccp_pr_debug("transmit_skb() returned err=%d\n", err);
- /*
- * Register this one as sent even if an error occurred. To the remote
- * end a local packet drop is indistinguishable from network loss, i.e.
- * any local drop will eventually be reported via receiver feedback.
- */
- ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
-
- /*
- * If the CCID needs to transfer additional header options out-of-band
- * (e.g. Ack Vectors or feature-negotiation options), it activates this
- * flag to schedule a Sync. The Sync will automatically incorporate all
- * currently pending header options, thus clearing the backlog.
- */
- if (dp->dccps_sync_scheduled)
- dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC);
-}
-
-/**
- * dccp_flush_write_queue - Drain queue at end of connection
- * @sk: socket to be drained
- * @time_budget: time allowed to drain the queue
- *
- * Since dccp_sendmsg queues packets without waiting for them to be sent, it may
- * happen that the TX queue is not empty at the end of a connection. We give the
- * HC-sender CCID a grace period of up to @time_budget jiffies. If this function
- * returns with a non-empty write queue, it will be purged later.
- */
-void dccp_flush_write_queue(struct sock *sk, long *time_budget)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct sk_buff *skb;
- long delay, rc;
-
- while (*time_budget > 0 && (skb = skb_peek(&sk->sk_write_queue))) {
- rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
-
- switch (ccid_packet_dequeue_eval(rc)) {
- case CCID_PACKET_WILL_DEQUEUE_LATER:
- /*
- * If the CCID determines when to send, the next sending
- * time is unknown or the CCID may not even send again
- * (e.g. remote host crashes or lost Ack packets).
- */
- DCCP_WARN("CCID did not manage to send all packets\n");
- return;
- case CCID_PACKET_DELAY:
- delay = msecs_to_jiffies(rc);
- if (delay > *time_budget)
- return;
- rc = dccp_wait_for_ccid(sk, delay);
- if (rc < 0)
- return;
- *time_budget -= (delay - rc);
- /* check again if we can send now */
- break;
- case CCID_PACKET_SEND_AT_ONCE:
- dccp_xmit_packet(sk);
- break;
- case CCID_PACKET_ERR:
- skb_dequeue(&sk->sk_write_queue);
- kfree_skb(skb);
- dccp_pr_debug("packet discarded due to err=%ld\n", rc);
- }
- }
-}
-
-void dccp_write_xmit(struct sock *sk)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct sk_buff *skb;
-
- while ((skb = dccp_qpolicy_top(sk))) {
- int rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
-
- switch (ccid_packet_dequeue_eval(rc)) {
- case CCID_PACKET_WILL_DEQUEUE_LATER:
- return;
- case CCID_PACKET_DELAY:
- sk_reset_timer(sk, &dp->dccps_xmit_timer,
- jiffies + msecs_to_jiffies(rc));
- return;
- case CCID_PACKET_SEND_AT_ONCE:
- dccp_xmit_packet(sk);
- break;
- case CCID_PACKET_ERR:
- dccp_qpolicy_drop(sk, skb);
- dccp_pr_debug("packet discarded due to err=%d\n", rc);
- }
- }
-}
-
-/**
- * dccp_retransmit_skb - Retransmit Request, Close, or CloseReq packets
- * @sk: socket to perform retransmit on
- *
- * There are only four retransmittable packet types in DCCP:
- * - Request in client-REQUEST state (sec. 8.1.1),
- * - CloseReq in server-CLOSEREQ state (sec. 8.3),
- * - Close in node-CLOSING state (sec. 8.3),
- * - Acks in client-PARTOPEN state (sec. 8.1.5, handled by dccp_delack_timer()).
- * This function expects sk->sk_send_head to contain the original skb.
- */
-int dccp_retransmit_skb(struct sock *sk)
-{
- WARN_ON(sk->sk_send_head == NULL);
-
- if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk) != 0)
- return -EHOSTUNREACH; /* Routing failure or similar. */
-
- /* this count is used to distinguish original and retransmitted skb */
- inet_csk(sk)->icsk_retransmits++;
-
- return dccp_transmit_skb(sk, skb_clone(sk->sk_send_head, GFP_ATOMIC));
-}
-
-struct sk_buff *dccp_make_response(const struct sock *sk, struct dst_entry *dst,
- struct request_sock *req)
-{
- struct dccp_hdr *dh;
- struct dccp_request_sock *dreq;
- const u32 dccp_header_size = sizeof(struct dccp_hdr) +
- sizeof(struct dccp_hdr_ext) +
- sizeof(struct dccp_hdr_response);
- struct sk_buff *skb;
-
- /* sk is marked const to clearly express we dont hold socket lock.
- * sock_wmalloc() will atomically change sk->sk_wmem_alloc,
- * it is safe to promote sk to non const.
- */
- skb = sock_wmalloc((struct sock *)sk, MAX_DCCP_HEADER, 1,
- GFP_ATOMIC);
- if (!skb)
- return NULL;
-
- skb_reserve(skb, MAX_DCCP_HEADER);
-
- skb_dst_set(skb, dst_clone(dst));
-
- dreq = dccp_rsk(req);
- if (inet_rsk(req)->acked) /* increase GSS upon retransmission */
- dccp_inc_seqno(&dreq->dreq_gss);
- DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
- DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_gss;
-
- /* Resolve feature dependencies resulting from choice of CCID */
- if (dccp_feat_server_ccid_dependencies(dreq))
- goto response_failed;
-
- if (dccp_insert_options_rsk(dreq, skb))
- goto response_failed;
-
- /* Build and checksum header */
- dh = dccp_zeroed_hdr(skb, dccp_header_size);
-
- dh->dccph_sport = htons(inet_rsk(req)->ir_num);
- dh->dccph_dport = inet_rsk(req)->ir_rmt_port;
- dh->dccph_doff = (dccp_header_size +
- DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
- dh->dccph_type = DCCP_PKT_RESPONSE;
- dh->dccph_x = 1;
- dccp_hdr_set_seq(dh, dreq->dreq_gss);
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_gsr);
- dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
-
- dccp_csum_outgoing(skb);
-
- /* We use `acked' to remember that a Response was already sent. */
- inet_rsk(req)->acked = 1;
- DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
- return skb;
-response_failed:
- kfree_skb(skb);
- return NULL;
-}
-
-EXPORT_SYMBOL_GPL(dccp_make_response);
-
-/* answer offending packet in @rcv_skb with Reset from control socket @ctl */
-struct sk_buff *dccp_ctl_make_reset(struct sock *sk, struct sk_buff *rcv_skb)
-{
- struct dccp_hdr *rxdh = dccp_hdr(rcv_skb), *dh;
- struct dccp_skb_cb *dcb = DCCP_SKB_CB(rcv_skb);
- const u32 dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
- sizeof(struct dccp_hdr_ext) +
- sizeof(struct dccp_hdr_reset);
- struct dccp_hdr_reset *dhr;
- struct sk_buff *skb;
-
- skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC);
- if (skb == NULL)
- return NULL;
-
- skb_reserve(skb, sk->sk_prot->max_header);
-
- /* Swap the send and the receive. */
- dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len);
- dh->dccph_type = DCCP_PKT_RESET;
- dh->dccph_sport = rxdh->dccph_dport;
- dh->dccph_dport = rxdh->dccph_sport;
- dh->dccph_doff = dccp_hdr_reset_len / 4;
- dh->dccph_x = 1;
-
- dhr = dccp_hdr_reset(skb);
- dhr->dccph_reset_code = dcb->dccpd_reset_code;
-
- switch (dcb->dccpd_reset_code) {
- case DCCP_RESET_CODE_PACKET_ERROR:
- dhr->dccph_reset_data[0] = rxdh->dccph_type;
- break;
- case DCCP_RESET_CODE_OPTION_ERROR:
- case DCCP_RESET_CODE_MANDATORY_ERROR:
- memcpy(dhr->dccph_reset_data, dcb->dccpd_reset_data, 3);
- break;
- }
- /*
- * From RFC 4340, 8.3.1:
- * If P.ackno exists, set R.seqno := P.ackno + 1.
- * Else set R.seqno := 0.
- */
- if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
- dccp_hdr_set_seq(dh, ADD48(dcb->dccpd_ack_seq, 1));
- dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dcb->dccpd_seq);
-
- dccp_csum_outgoing(skb);
- return skb;
-}
-
-EXPORT_SYMBOL_GPL(dccp_ctl_make_reset);
-
-/* send Reset on established socket, to close or abort the connection */
-int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
-{
- struct sk_buff *skb;
- /*
- * FIXME: what if rebuild_header fails?
- * Should we be doing a rebuild_header here?
- */
- int err = inet_csk(sk)->icsk_af_ops->rebuild_header(sk);
-
- if (err != 0)
- return err;
-
- skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1, GFP_ATOMIC);
- if (skb == NULL)
- return -ENOBUFS;
-
- /* Reserve space for headers and prepare control bits. */
- skb_reserve(skb, sk->sk_prot->max_header);
- DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET;
- DCCP_SKB_CB(skb)->dccpd_reset_code = code;
-
- return dccp_transmit_skb(sk, skb);
-}
-
-/*
- * Do all connect socket setups that can be done AF independent.
- */
-int dccp_connect(struct sock *sk)
-{
- struct sk_buff *skb;
- struct dccp_sock *dp = dccp_sk(sk);
- struct dst_entry *dst = __sk_dst_get(sk);
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- sk->sk_err = 0;
- sock_reset_flag(sk, SOCK_DONE);
-
- dccp_sync_mss(sk, dst_mtu(dst));
-
- /* do not connect if feature negotiation setup fails */
- if (dccp_feat_finalise_settings(dccp_sk(sk)))
- return -EPROTO;
-
- /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
- dp->dccps_gar = dp->dccps_iss;
-
- skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
- if (unlikely(skb == NULL))
- return -ENOBUFS;
-
- /* Reserve space for headers. */
- skb_reserve(skb, sk->sk_prot->max_header);
-
- DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
-
- dccp_transmit_skb(sk, dccp_skb_entail(sk, skb));
- DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
-
- /* Timer for repeating the REQUEST until an answer. */
- icsk->icsk_retransmits = 0;
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- icsk->icsk_rto, DCCP_RTO_MAX);
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(dccp_connect);
-
-void dccp_send_ack(struct sock *sk)
-{
- /* If we have been reset, we may not send again. */
- if (sk->sk_state != DCCP_CLOSED) {
- struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header,
- GFP_ATOMIC);
-
- if (skb == NULL) {
- inet_csk_schedule_ack(sk);
- inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
- TCP_DELACK_MAX,
- DCCP_RTO_MAX);
- return;
- }
-
- /* Reserve space for headers */
- skb_reserve(skb, sk->sk_prot->max_header);
- DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
- dccp_transmit_skb(sk, skb);
- }
-}
-
-EXPORT_SYMBOL_GPL(dccp_send_ack);
-
-#if 0
-/* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */
-void dccp_send_delayed_ack(struct sock *sk)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- /*
- * FIXME: tune this timer. elapsed time fixes the skew, so no problem
- * with using 2s, and active senders also piggyback the ACK into a
- * DATAACK packet, so this is really for quiescent senders.
- */
- unsigned long timeout = jiffies + 2 * HZ;
-
- /* Use new timeout only if there wasn't a older one earlier. */
- if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
- /* If delack timer was blocked or is about to expire,
- * send ACK now.
- *
- * FIXME: check the "about to expire" part
- */
- if (icsk->icsk_ack.blocked) {
- dccp_send_ack(sk);
- return;
- }
-
- if (!time_before(timeout, icsk_delack_timeout(icsk)))
- timeout = icsk_delack_timeout(icsk);
- }
- icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
- sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
-}
-#endif
-
-void dccp_send_sync(struct sock *sk, const u64 ackno,
- const enum dccp_pkt_type pkt_type)
-{
- /*
- * We are not putting this on the write queue, so
- * dccp_transmit_skb() will set the ownership to this
- * sock.
- */
- struct sk_buff *skb = alloc_skb(sk->sk_prot->max_header, GFP_ATOMIC);
-
- if (skb == NULL) {
- /* FIXME: how to make sure the sync is sent? */
- DCCP_CRIT("could not send %s", dccp_packet_name(pkt_type));
- return;
- }
-
- /* Reserve space for headers and prepare control bits. */
- skb_reserve(skb, sk->sk_prot->max_header);
- DCCP_SKB_CB(skb)->dccpd_type = pkt_type;
- DCCP_SKB_CB(skb)->dccpd_ack_seq = ackno;
-
- /*
- * Clear the flag in case the Sync was scheduled for out-of-band data,
- * such as carrying a long Ack Vector.
- */
- dccp_sk(sk)->dccps_sync_scheduled = 0;
-
- dccp_transmit_skb(sk, skb);
-}
-
-EXPORT_SYMBOL_GPL(dccp_send_sync);
-
-/*
- * Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This
- * cannot be allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under
- * any circumstances.
- */
-void dccp_send_close(struct sock *sk, const int active)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct sk_buff *skb;
- const gfp_t prio = active ? GFP_KERNEL : GFP_ATOMIC;
-
- skb = alloc_skb(sk->sk_prot->max_header, prio);
- if (skb == NULL)
- return;
-
- /* Reserve space for headers and prepare control bits. */
- skb_reserve(skb, sk->sk_prot->max_header);
- if (dp->dccps_role == DCCP_ROLE_SERVER && !dp->dccps_server_timewait)
- DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSEREQ;
- else
- DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_CLOSE;
-
- if (active) {
- skb = dccp_skb_entail(sk, skb);
- /*
- * Retransmission timer for active-close: RFC 4340, 8.3 requires
- * to retransmit the Close/CloseReq until the CLOSING/CLOSEREQ
- * state can be left. The initial timeout is 2 RTTs.
- * Since RTT measurement is done by the CCIDs, there is no easy
- * way to get an RTT sample. The fallback RTT from RFC 4340, 3.4
- * is too low (200ms); we use a high value to avoid unnecessary
- * retransmissions when the link RTT is > 0.2 seconds.
- * FIXME: Let main module sample RTTs and use that instead.
- */
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- DCCP_TIMEOUT_INIT, DCCP_RTO_MAX);
- }
- dccp_transmit_skb(sk, skb);
-}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
deleted file mode 100644
index fcc5c9d64f46..000000000000
--- a/net/dccp/proto.c
+++ /dev/null
@@ -1,1293 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * net/dccp/proto.c
- *
- * An implementation of the DCCP protocol
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-
-#include <linux/dccp.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/in.h>
-#include <linux/if_arp.h>
-#include <linux/init.h>
-#include <linux/random.h>
-#include <linux/slab.h>
-#include <net/checksum.h>
-
-#include <net/inet_sock.h>
-#include <net/inet_common.h>
-#include <net/sock.h>
-#include <net/xfrm.h>
-
-#include <asm/ioctls.h>
-#include <linux/spinlock.h>
-#include <linux/timer.h>
-#include <linux/delay.h>
-#include <linux/poll.h>
-
-#include "ccid.h"
-#include "dccp.h"
-#include "feat.h"
-
-#define CREATE_TRACE_POINTS
-#include "trace.h"
-
-DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
-
-EXPORT_SYMBOL_GPL(dccp_statistics);
-
-DEFINE_PER_CPU(unsigned int, dccp_orphan_count);
-EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count);
-
-struct inet_hashinfo dccp_hashinfo;
-EXPORT_SYMBOL_GPL(dccp_hashinfo);
-
-/* the maximum queue length for tx in packets. 0 is no limit */
-int sysctl_dccp_tx_qlen __read_mostly = 5;
-
-#ifdef CONFIG_IP_DCCP_DEBUG
-static const char *dccp_state_name(const int state)
-{
- static const char *const dccp_state_names[] = {
- [DCCP_OPEN] = "OPEN",
- [DCCP_REQUESTING] = "REQUESTING",
- [DCCP_PARTOPEN] = "PARTOPEN",
- [DCCP_LISTEN] = "LISTEN",
- [DCCP_RESPOND] = "RESPOND",
- [DCCP_CLOSING] = "CLOSING",
- [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
- [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
- [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
- [DCCP_TIME_WAIT] = "TIME_WAIT",
- [DCCP_CLOSED] = "CLOSED",
- };
-
- if (state >= DCCP_MAX_STATES)
- return "INVALID STATE!";
- else
- return dccp_state_names[state];
-}
-#endif
-
-void dccp_set_state(struct sock *sk, const int state)
-{
- const int oldstate = sk->sk_state;
-
- dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
- dccp_state_name(oldstate), dccp_state_name(state));
- WARN_ON(state == oldstate);
-
- switch (state) {
- case DCCP_OPEN:
- if (oldstate != DCCP_OPEN)
- DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
- /* Client retransmits all Confirm options until entering OPEN */
- if (oldstate == DCCP_PARTOPEN)
- dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
- break;
-
- case DCCP_CLOSED:
- if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
- oldstate == DCCP_CLOSING)
- DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
-
- sk->sk_prot->unhash(sk);
- if (inet_csk(sk)->icsk_bind_hash != NULL &&
- !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
- inet_put_port(sk);
- fallthrough;
- default:
- if (oldstate == DCCP_OPEN)
- DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
- }
-
- /* Change state AFTER socket is unhashed to avoid closed
- * socket sitting in hash tables.
- */
- inet_sk_set_state(sk, state);
-}
-
-EXPORT_SYMBOL_GPL(dccp_set_state);
-
-static void dccp_finish_passive_close(struct sock *sk)
-{
- switch (sk->sk_state) {
- case DCCP_PASSIVE_CLOSE:
- /* Node (client or server) has received Close packet. */
- dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
- dccp_set_state(sk, DCCP_CLOSED);
- break;
- case DCCP_PASSIVE_CLOSEREQ:
- /*
- * Client received CloseReq. We set the `active' flag so that
- * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
- */
- dccp_send_close(sk, 1);
- dccp_set_state(sk, DCCP_CLOSING);
- }
-}
-
-void dccp_done(struct sock *sk)
-{
- dccp_set_state(sk, DCCP_CLOSED);
- dccp_clear_xmit_timers(sk);
-
- sk->sk_shutdown = SHUTDOWN_MASK;
-
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
- else
- inet_csk_destroy_sock(sk);
-}
-
-EXPORT_SYMBOL_GPL(dccp_done);
-
-const char *dccp_packet_name(const int type)
-{
- static const char *const dccp_packet_names[] = {
- [DCCP_PKT_REQUEST] = "REQUEST",
- [DCCP_PKT_RESPONSE] = "RESPONSE",
- [DCCP_PKT_DATA] = "DATA",
- [DCCP_PKT_ACK] = "ACK",
- [DCCP_PKT_DATAACK] = "DATAACK",
- [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
- [DCCP_PKT_CLOSE] = "CLOSE",
- [DCCP_PKT_RESET] = "RESET",
- [DCCP_PKT_SYNC] = "SYNC",
- [DCCP_PKT_SYNCACK] = "SYNCACK",
- };
-
- if (type >= DCCP_NR_PKT_TYPES)
- return "INVALID";
- else
- return dccp_packet_names[type];
-}
-
-EXPORT_SYMBOL_GPL(dccp_packet_name);
-
-void dccp_destruct_common(struct sock *sk)
-{
- struct dccp_sock *dp = dccp_sk(sk);
-
- ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
- dp->dccps_hc_tx_ccid = NULL;
-}
-EXPORT_SYMBOL_GPL(dccp_destruct_common);
-
-static void dccp_sk_destruct(struct sock *sk)
-{
- dccp_destruct_common(sk);
- inet_sock_destruct(sk);
-}
-
-int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- pr_warn_once("DCCP is deprecated and scheduled to be removed in 2025, "
- "please contact the netdev mailing list\n");
-
- icsk->icsk_rto = DCCP_TIMEOUT_INIT;
- icsk->icsk_syn_retries = sysctl_dccp_request_retries;
- sk->sk_state = DCCP_CLOSED;
- sk->sk_write_space = dccp_write_space;
- sk->sk_destruct = dccp_sk_destruct;
- icsk->icsk_sync_mss = dccp_sync_mss;
- dp->dccps_mss_cache = 536;
- dp->dccps_rate_last = jiffies;
- dp->dccps_role = DCCP_ROLE_UNDEFINED;
- dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
- dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
-
- dccp_init_xmit_timers(sk);
-
- INIT_LIST_HEAD(&dp->dccps_featneg);
- /* control socket doesn't need feat nego */
- if (likely(ctl_sock_initialized))
- return dccp_feat_init(sk);
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(dccp_init_sock);
-
-void dccp_destroy_sock(struct sock *sk)
-{
- struct dccp_sock *dp = dccp_sk(sk);
-
- __skb_queue_purge(&sk->sk_write_queue);
- if (sk->sk_send_head != NULL) {
- kfree_skb(sk->sk_send_head);
- sk->sk_send_head = NULL;
- }
-
- /* Clean up a referenced DCCP bind bucket. */
- if (inet_csk(sk)->icsk_bind_hash != NULL)
- inet_put_port(sk);
-
- kfree(dp->dccps_service_list);
- dp->dccps_service_list = NULL;
-
- if (dp->dccps_hc_rx_ackvec != NULL) {
- dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
- dp->dccps_hc_rx_ackvec = NULL;
- }
- ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
- dp->dccps_hc_rx_ccid = NULL;
-
- /* clean up feature negotiation state */
- dccp_feat_list_purge(&dp->dccps_featneg);
-}
-
-EXPORT_SYMBOL_GPL(dccp_destroy_sock);
-
-static inline int dccp_need_reset(int state)
-{
- return state != DCCP_CLOSED && state != DCCP_LISTEN &&
- state != DCCP_REQUESTING;
-}
-
-int dccp_disconnect(struct sock *sk, int flags)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_sock *inet = inet_sk(sk);
- struct dccp_sock *dp = dccp_sk(sk);
- const int old_state = sk->sk_state;
-
- if (old_state != DCCP_CLOSED)
- dccp_set_state(sk, DCCP_CLOSED);
-
- /*
- * This corresponds to the ABORT function of RFC793, sec. 3.8
- * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
- */
- if (old_state == DCCP_LISTEN) {
- inet_csk_listen_stop(sk);
- } else if (dccp_need_reset(old_state)) {
- dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
- sk->sk_err = ECONNRESET;
- } else if (old_state == DCCP_REQUESTING)
- sk->sk_err = ECONNRESET;
-
- dccp_clear_xmit_timers(sk);
- ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
- dp->dccps_hc_rx_ccid = NULL;
-
- __skb_queue_purge(&sk->sk_receive_queue);
- __skb_queue_purge(&sk->sk_write_queue);
- if (sk->sk_send_head != NULL) {
- __kfree_skb(sk->sk_send_head);
- sk->sk_send_head = NULL;
- }
-
- inet->inet_dport = 0;
-
- inet_bhash2_reset_saddr(sk);
-
- sk->sk_shutdown = 0;
- sock_reset_flag(sk, SOCK_DONE);
-
- icsk->icsk_backoff = 0;
- inet_csk_delack_init(sk);
- __sk_dst_reset(sk);
-
- WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
-
- sk_error_report(sk);
- return 0;
-}
-
-EXPORT_SYMBOL_GPL(dccp_disconnect);
-
-/*
- * Wait for a DCCP event.
- *
- * Note that we don't need to lock the socket, as the upper poll layers
- * take care of normal races (between the test and the event) and we don't
- * go look at any of the socket buffers directly.
- */
-__poll_t dccp_poll(struct file *file, struct socket *sock,
- poll_table *wait)
-{
- struct sock *sk = sock->sk;
- __poll_t mask;
- u8 shutdown;
- int state;
-
- sock_poll_wait(file, sock, wait);
-
- state = inet_sk_state_load(sk);
- if (state == DCCP_LISTEN)
- return inet_csk_listen_poll(sk);
-
- /* Socket is not locked. We are protected from async events
- by poll logic and correct handling of state changes
- made by another threads is impossible in any case.
- */
-
- mask = 0;
- if (READ_ONCE(sk->sk_err))
- mask = EPOLLERR;
- shutdown = READ_ONCE(sk->sk_shutdown);
-
- if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED)
- mask |= EPOLLHUP;
- if (shutdown & RCV_SHUTDOWN)
- mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
-
- /* Connected? */
- if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
- if (atomic_read(&sk->sk_rmem_alloc) > 0)
- mask |= EPOLLIN | EPOLLRDNORM;
-
- if (!(shutdown & SEND_SHUTDOWN)) {
- if (sk_stream_is_writeable(sk)) {
- mask |= EPOLLOUT | EPOLLWRNORM;
- } else { /* send SIGIO later */
- sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-
- /* Race breaker. If space is freed after
- * wspace test but before the flags are set,
- * IO signal will be lost.
- */
- if (sk_stream_is_writeable(sk))
- mask |= EPOLLOUT | EPOLLWRNORM;
- }
- }
- }
- return mask;
-}
-EXPORT_SYMBOL_GPL(dccp_poll);
-
-int dccp_ioctl(struct sock *sk, int cmd, int *karg)
-{
- int rc = -ENOTCONN;
-
- lock_sock(sk);
-
- if (sk->sk_state == DCCP_LISTEN)
- goto out;
-
- switch (cmd) {
- case SIOCOUTQ: {
- *karg = sk_wmem_alloc_get(sk);
- /* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
- * always 0, comparably to UDP.
- */
-
- rc = 0;
- }
- break;
- case SIOCINQ: {
- struct sk_buff *skb;
- *karg = 0;
-
- skb = skb_peek(&sk->sk_receive_queue);
- if (skb != NULL) {
- /*
- * We will only return the amount of this packet since
- * that is all that will be read.
- */
- *karg = skb->len;
- }
- rc = 0;
- }
- break;
- default:
- rc = -ENOIOCTLCMD;
- break;
- }
-out:
- release_sock(sk);
- return rc;
-}
-
-EXPORT_SYMBOL_GPL(dccp_ioctl);
-
-static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
- sockptr_t optval, unsigned int optlen)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_service_list *sl = NULL;
-
- if (service == DCCP_SERVICE_INVALID_VALUE ||
- optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
- return -EINVAL;
-
- if (optlen > sizeof(service)) {
- sl = kmalloc(optlen, GFP_KERNEL);
- if (sl == NULL)
- return -ENOMEM;
-
- sl->dccpsl_nr = optlen / sizeof(u32) - 1;
- if (copy_from_sockptr_offset(sl->dccpsl_list, optval,
- sizeof(service), optlen - sizeof(service)) ||
- dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
- kfree(sl);
- return -EFAULT;
- }
- }
-
- lock_sock(sk);
- dp->dccps_service = service;
-
- kfree(dp->dccps_service_list);
-
- dp->dccps_service_list = sl;
- release_sock(sk);
- return 0;
-}
-
-static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
-{
- u8 *list, len;
- int i, rc;
-
- if (cscov < 0 || cscov > 15)
- return -EINVAL;
- /*
- * Populate a list of permissible values, in the range cscov...15. This
- * is necessary since feature negotiation of single values only works if
- * both sides incidentally choose the same value. Since the list starts
- * lowest-value first, negotiation will pick the smallest shared value.
- */
- if (cscov == 0)
- return 0;
- len = 16 - cscov;
-
- list = kmalloc(len, GFP_KERNEL);
- if (list == NULL)
- return -ENOBUFS;
-
- for (i = 0; i < len; i++)
- list[i] = cscov++;
-
- rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
-
- if (rc == 0) {
- if (rx)
- dccp_sk(sk)->dccps_pcrlen = cscov;
- else
- dccp_sk(sk)->dccps_pcslen = cscov;
- }
- kfree(list);
- return rc;
-}
-
-static int dccp_setsockopt_ccid(struct sock *sk, int type,
- sockptr_t optval, unsigned int optlen)
-{
- u8 *val;
- int rc = 0;
-
- if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
- return -EINVAL;
-
- val = memdup_sockptr(optval, optlen);
- if (IS_ERR(val))
- return PTR_ERR(val);
-
- lock_sock(sk);
- if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
- rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
-
- if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
- rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
- release_sock(sk);
-
- kfree(val);
- return rc;
-}
-
-static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
- sockptr_t optval, unsigned int optlen)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- int val, err = 0;
-
- switch (optname) {
- case DCCP_SOCKOPT_PACKET_SIZE:
- DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
- return 0;
- case DCCP_SOCKOPT_CHANGE_L:
- case DCCP_SOCKOPT_CHANGE_R:
- DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
- return 0;
- case DCCP_SOCKOPT_CCID:
- case DCCP_SOCKOPT_RX_CCID:
- case DCCP_SOCKOPT_TX_CCID:
- return dccp_setsockopt_ccid(sk, optname, optval, optlen);
- }
-
- if (optlen < (int)sizeof(int))
- return -EINVAL;
-
- if (copy_from_sockptr(&val, optval, sizeof(int)))
- return -EFAULT;
-
- if (optname == DCCP_SOCKOPT_SERVICE)
- return dccp_setsockopt_service(sk, val, optval, optlen);
-
- lock_sock(sk);
- switch (optname) {
- case DCCP_SOCKOPT_SERVER_TIMEWAIT:
- if (dp->dccps_role != DCCP_ROLE_SERVER)
- err = -EOPNOTSUPP;
- else
- dp->dccps_server_timewait = (val != 0);
- break;
- case DCCP_SOCKOPT_SEND_CSCOV:
- err = dccp_setsockopt_cscov(sk, val, false);
- break;
- case DCCP_SOCKOPT_RECV_CSCOV:
- err = dccp_setsockopt_cscov(sk, val, true);
- break;
- case DCCP_SOCKOPT_QPOLICY_ID:
- if (sk->sk_state != DCCP_CLOSED)
- err = -EISCONN;
- else if (val < 0 || val >= DCCPQ_POLICY_MAX)
- err = -EINVAL;
- else
- dp->dccps_qpolicy = val;
- break;
- case DCCP_SOCKOPT_QPOLICY_TXQLEN:
- if (val < 0)
- err = -EINVAL;
- else
- dp->dccps_tx_qlen = val;
- break;
- default:
- err = -ENOPROTOOPT;
- break;
- }
- release_sock(sk);
-
- return err;
-}
-
-int dccp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
- unsigned int optlen)
-{
- if (level != SOL_DCCP)
- return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
- optname, optval,
- optlen);
- return do_dccp_setsockopt(sk, level, optname, optval, optlen);
-}
-
-EXPORT_SYMBOL_GPL(dccp_setsockopt);
-
-static int dccp_getsockopt_service(struct sock *sk, int len,
- __be32 __user *optval,
- int __user *optlen)
-{
- const struct dccp_sock *dp = dccp_sk(sk);
- const struct dccp_service_list *sl;
- int err = -ENOENT, slen = 0, total_len = sizeof(u32);
-
- lock_sock(sk);
- if ((sl = dp->dccps_service_list) != NULL) {
- slen = sl->dccpsl_nr * sizeof(u32);
- total_len += slen;
- }
-
- err = -EINVAL;
- if (total_len > len)
- goto out;
-
- err = 0;
- if (put_user(total_len, optlen) ||
- put_user(dp->dccps_service, optval) ||
- (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
- err = -EFAULT;
-out:
- release_sock(sk);
- return err;
-}
-
-static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- struct dccp_sock *dp;
- int val, len;
-
- if (get_user(len, optlen))
- return -EFAULT;
-
- if (len < (int)sizeof(int))
- return -EINVAL;
-
- dp = dccp_sk(sk);
-
- switch (optname) {
- case DCCP_SOCKOPT_PACKET_SIZE:
- DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
- return 0;
- case DCCP_SOCKOPT_SERVICE:
- return dccp_getsockopt_service(sk, len,
- (__be32 __user *)optval, optlen);
- case DCCP_SOCKOPT_GET_CUR_MPS:
- val = READ_ONCE(dp->dccps_mss_cache);
- break;
- case DCCP_SOCKOPT_AVAILABLE_CCIDS:
- return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
- case DCCP_SOCKOPT_TX_CCID:
- val = ccid_get_current_tx_ccid(dp);
- if (val < 0)
- return -ENOPROTOOPT;
- break;
- case DCCP_SOCKOPT_RX_CCID:
- val = ccid_get_current_rx_ccid(dp);
- if (val < 0)
- return -ENOPROTOOPT;
- break;
- case DCCP_SOCKOPT_SERVER_TIMEWAIT:
- val = dp->dccps_server_timewait;
- break;
- case DCCP_SOCKOPT_SEND_CSCOV:
- val = dp->dccps_pcslen;
- break;
- case DCCP_SOCKOPT_RECV_CSCOV:
- val = dp->dccps_pcrlen;
- break;
- case DCCP_SOCKOPT_QPOLICY_ID:
- val = dp->dccps_qpolicy;
- break;
- case DCCP_SOCKOPT_QPOLICY_TXQLEN:
- val = dp->dccps_tx_qlen;
- break;
- case 128 ... 191:
- return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
- len, (u32 __user *)optval, optlen);
- case 192 ... 255:
- return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
- len, (u32 __user *)optval, optlen);
- default:
- return -ENOPROTOOPT;
- }
-
- len = sizeof(val);
- if (put_user(len, optlen) || copy_to_user(optval, &val, len))
- return -EFAULT;
-
- return 0;
-}
-
-int dccp_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
-{
- if (level != SOL_DCCP)
- return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
- optname, optval,
- optlen);
- return do_dccp_getsockopt(sk, level, optname, optval, optlen);
-}
-
-EXPORT_SYMBOL_GPL(dccp_getsockopt);
-
-static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
-{
- struct cmsghdr *cmsg;
-
- /*
- * Assign an (opaque) qpolicy priority value to skb->priority.
- *
- * We are overloading this skb field for use with the qpolicy subystem.
- * The skb->priority is normally used for the SO_PRIORITY option, which
- * is initialised from sk_priority. Since the assignment of sk_priority
- * to skb->priority happens later (on layer 3), we overload this field
- * for use with queueing priorities as long as the skb is on layer 4.
- * The default priority value (if nothing is set) is 0.
- */
- skb->priority = 0;
-
- for_each_cmsghdr(cmsg, msg) {
- if (!CMSG_OK(msg, cmsg))
- return -EINVAL;
-
- if (cmsg->cmsg_level != SOL_DCCP)
- continue;
-
- if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
- !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
- return -EINVAL;
-
- switch (cmsg->cmsg_type) {
- case DCCP_SCM_PRIORITY:
- if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
- return -EINVAL;
- skb->priority = *(__u32 *)CMSG_DATA(cmsg);
- break;
- default:
- return -EINVAL;
- }
- }
- return 0;
-}
-
-int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
-{
- const struct dccp_sock *dp = dccp_sk(sk);
- const int flags = msg->msg_flags;
- const int noblock = flags & MSG_DONTWAIT;
- struct sk_buff *skb;
- int rc, size;
- long timeo;
-
- trace_dccp_probe(sk, len);
-
- if (len > READ_ONCE(dp->dccps_mss_cache))
- return -EMSGSIZE;
-
- lock_sock(sk);
-
- timeo = sock_sndtimeo(sk, noblock);
-
- /*
- * We have to use sk_stream_wait_connect here to set sk_write_pending,
- * so that the trick in dccp_rcv_request_sent_state_process.
- */
- /* Wait for a connection to finish. */
- if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
- if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
- goto out_release;
-
- size = sk->sk_prot->max_header + len;
- release_sock(sk);
- skb = sock_alloc_send_skb(sk, size, noblock, &rc);
- lock_sock(sk);
- if (skb == NULL)
- goto out_release;
-
- if (dccp_qpolicy_full(sk)) {
- rc = -EAGAIN;
- goto out_discard;
- }
-
- if (sk->sk_state == DCCP_CLOSED) {
- rc = -ENOTCONN;
- goto out_discard;
- }
-
- /* We need to check dccps_mss_cache after socket is locked. */
- if (len > dp->dccps_mss_cache) {
- rc = -EMSGSIZE;
- goto out_discard;
- }
-
- skb_reserve(skb, sk->sk_prot->max_header);
- rc = memcpy_from_msg(skb_put(skb, len), msg, len);
- if (rc != 0)
- goto out_discard;
-
- rc = dccp_msghdr_parse(msg, skb);
- if (rc != 0)
- goto out_discard;
-
- dccp_qpolicy_push(sk, skb);
- /*
- * The xmit_timer is set if the TX CCID is rate-based and will expire
- * when congestion control permits to release further packets into the
- * network. Window-based CCIDs do not use this timer.
- */
- if (!timer_pending(&dp->dccps_xmit_timer))
- dccp_write_xmit(sk);
-out_release:
- release_sock(sk);
- return rc ? : len;
-out_discard:
- kfree_skb(skb);
- goto out_release;
-}
-
-EXPORT_SYMBOL_GPL(dccp_sendmsg);
-
-int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
- int *addr_len)
-{
- const struct dccp_hdr *dh;
- long timeo;
-
- lock_sock(sk);
-
- if (sk->sk_state == DCCP_LISTEN) {
- len = -ENOTCONN;
- goto out;
- }
-
- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-
- do {
- struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
-
- if (skb == NULL)
- goto verify_sock_status;
-
- dh = dccp_hdr(skb);
-
- switch (dh->dccph_type) {
- case DCCP_PKT_DATA:
- case DCCP_PKT_DATAACK:
- goto found_ok_skb;
-
- case DCCP_PKT_CLOSE:
- case DCCP_PKT_CLOSEREQ:
- if (!(flags & MSG_PEEK))
- dccp_finish_passive_close(sk);
- fallthrough;
- case DCCP_PKT_RESET:
- dccp_pr_debug("found fin (%s) ok!\n",
- dccp_packet_name(dh->dccph_type));
- len = 0;
- goto found_fin_ok;
- default:
- dccp_pr_debug("packet_type=%s\n",
- dccp_packet_name(dh->dccph_type));
- sk_eat_skb(sk, skb);
- }
-verify_sock_status:
- if (sock_flag(sk, SOCK_DONE)) {
- len = 0;
- break;
- }
-
- if (sk->sk_err) {
- len = sock_error(sk);
- break;
- }
-
- if (sk->sk_shutdown & RCV_SHUTDOWN) {
- len = 0;
- break;
- }
-
- if (sk->sk_state == DCCP_CLOSED) {
- if (!sock_flag(sk, SOCK_DONE)) {
- /* This occurs when user tries to read
- * from never connected socket.
- */
- len = -ENOTCONN;
- break;
- }
- len = 0;
- break;
- }
-
- if (!timeo) {
- len = -EAGAIN;
- break;
- }
-
- if (signal_pending(current)) {
- len = sock_intr_errno(timeo);
- break;
- }
-
- sk_wait_data(sk, &timeo, NULL);
- continue;
- found_ok_skb:
- if (len > skb->len)
- len = skb->len;
- else if (len < skb->len)
- msg->msg_flags |= MSG_TRUNC;
-
- if (skb_copy_datagram_msg(skb, 0, msg, len)) {
- /* Exception. Bailout! */
- len = -EFAULT;
- break;
- }
- if (flags & MSG_TRUNC)
- len = skb->len;
- found_fin_ok:
- if (!(flags & MSG_PEEK))
- sk_eat_skb(sk, skb);
- break;
- } while (1);
-out:
- release_sock(sk);
- return len;
-}
-
-EXPORT_SYMBOL_GPL(dccp_recvmsg);
-
-int inet_dccp_listen(struct socket *sock, int backlog)
-{
- struct sock *sk = sock->sk;
- unsigned char old_state;
- int err;
-
- lock_sock(sk);
-
- err = -EINVAL;
- if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
- goto out;
-
- old_state = sk->sk_state;
- if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
- goto out;
-
- WRITE_ONCE(sk->sk_max_ack_backlog, backlog);
- /* Really, if the socket is already in listen state
- * we can only allow the backlog to be adjusted.
- */
- if (old_state != DCCP_LISTEN) {
- struct dccp_sock *dp = dccp_sk(sk);
-
- dp->dccps_role = DCCP_ROLE_LISTEN;
-
- /* do not start to listen if feature negotiation setup fails */
- if (dccp_feat_finalise_settings(dp)) {
- err = -EPROTO;
- goto out;
- }
-
- err = inet_csk_listen_start(sk);
- if (err)
- goto out;
- }
- err = 0;
-
-out:
- release_sock(sk);
- return err;
-}
-
-EXPORT_SYMBOL_GPL(inet_dccp_listen);
-
-static void dccp_terminate_connection(struct sock *sk)
-{
- u8 next_state = DCCP_CLOSED;
-
- switch (sk->sk_state) {
- case DCCP_PASSIVE_CLOSE:
- case DCCP_PASSIVE_CLOSEREQ:
- dccp_finish_passive_close(sk);
- break;
- case DCCP_PARTOPEN:
- dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
- inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
- fallthrough;
- case DCCP_OPEN:
- dccp_send_close(sk, 1);
-
- if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
- !dccp_sk(sk)->dccps_server_timewait)
- next_state = DCCP_ACTIVE_CLOSEREQ;
- else
- next_state = DCCP_CLOSING;
- fallthrough;
- default:
- dccp_set_state(sk, next_state);
- }
-}
-
-void dccp_close(struct sock *sk, long timeout)
-{
- struct dccp_sock *dp = dccp_sk(sk);
- struct sk_buff *skb;
- u32 data_was_unread = 0;
- int state;
-
- lock_sock(sk);
-
- sk->sk_shutdown = SHUTDOWN_MASK;
-
- if (sk->sk_state == DCCP_LISTEN) {
- dccp_set_state(sk, DCCP_CLOSED);
-
- /* Special case. */
- inet_csk_listen_stop(sk);
-
- goto adjudge_to_death;
- }
-
- sk_stop_timer(sk, &dp->dccps_xmit_timer);
-
- /*
- * We need to flush the recv. buffs. We do this only on the
- * descriptor close, not protocol-sourced closes, because the
- *reader process may not have drained the data yet!
- */
- while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- data_was_unread += skb->len;
- __kfree_skb(skb);
- }
-
- /* If socket has been already reset kill it. */
- if (sk->sk_state == DCCP_CLOSED)
- goto adjudge_to_death;
-
- if (data_was_unread) {
- /* Unread data was tossed, send an appropriate Reset Code */
- DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
- dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
- dccp_set_state(sk, DCCP_CLOSED);
- } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
- /* Check zero linger _after_ checking for unread data. */
- sk->sk_prot->disconnect(sk, 0);
- } else if (sk->sk_state != DCCP_CLOSED) {
- /*
- * Normal connection termination. May need to wait if there are
- * still packets in the TX queue that are delayed by the CCID.
- */
- dccp_flush_write_queue(sk, &timeout);
- dccp_terminate_connection(sk);
- }
-
- /*
- * Flush write queue. This may be necessary in several cases:
- * - we have been closed by the peer but still have application data;
- * - abortive termination (unread data or zero linger time),
- * - normal termination but queue could not be flushed within time limit
- */
- __skb_queue_purge(&sk->sk_write_queue);
-
- sk_stream_wait_close(sk, timeout);
-
-adjudge_to_death:
- state = sk->sk_state;
- sock_hold(sk);
- sock_orphan(sk);
-
- /*
- * It is the last release_sock in its life. It will remove backlog.
- */
- release_sock(sk);
- /*
- * Now socket is owned by kernel and we acquire BH lock
- * to finish close. No need to check for user refs.
- */
- local_bh_disable();
- bh_lock_sock(sk);
- WARN_ON(sock_owned_by_user(sk));
-
- this_cpu_inc(dccp_orphan_count);
-
- /* Have we already been destroyed by a softirq or backlog? */
- if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
- goto out;
-
- if (sk->sk_state == DCCP_CLOSED)
- inet_csk_destroy_sock(sk);
-
- /* Otherwise, socket is reprieved until protocol close. */
-
-out:
- bh_unlock_sock(sk);
- local_bh_enable();
- sock_put(sk);
-}
-
-EXPORT_SYMBOL_GPL(dccp_close);
-
-void dccp_shutdown(struct sock *sk, int how)
-{
- dccp_pr_debug("called shutdown(%x)\n", how);
-}
-
-EXPORT_SYMBOL_GPL(dccp_shutdown);
-
-static inline int __init dccp_mib_init(void)
-{
- dccp_statistics = alloc_percpu(struct dccp_mib);
- if (!dccp_statistics)
- return -ENOMEM;
- return 0;
-}
-
-static inline void dccp_mib_exit(void)
-{
- free_percpu(dccp_statistics);
-}
-
-static int thash_entries;
-module_param(thash_entries, int, 0444);
-MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
-
-#ifdef CONFIG_IP_DCCP_DEBUG
-bool dccp_debug;
-module_param(dccp_debug, bool, 0644);
-MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
-
-EXPORT_SYMBOL_GPL(dccp_debug);
-#endif
-
-static int __init dccp_init(void)
-{
- unsigned long goal;
- unsigned long nr_pages = totalram_pages();
- int ehash_order, bhash_order, i;
- int rc;
-
- BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
- sizeof_field(struct sk_buff, cb));
- rc = inet_hashinfo2_init_mod(&dccp_hashinfo);
- if (rc)
- goto out_fail;
- rc = -ENOBUFS;
- dccp_hashinfo.bind_bucket_cachep =
- kmem_cache_create("dccp_bind_bucket",
- sizeof(struct inet_bind_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
- if (!dccp_hashinfo.bind_bucket_cachep)
- goto out_free_hashinfo2;
- dccp_hashinfo.bind2_bucket_cachep =
- kmem_cache_create("dccp_bind2_bucket",
- sizeof(struct inet_bind2_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
- if (!dccp_hashinfo.bind2_bucket_cachep)
- goto out_free_bind_bucket_cachep;
-
- /*
- * Size and allocate the main established and bind bucket
- * hash tables.
- *
- * The methodology is similar to that of the buffer cache.
- */
- if (nr_pages >= (128 * 1024))
- goal = nr_pages >> (21 - PAGE_SHIFT);
- else
- goal = nr_pages >> (23 - PAGE_SHIFT);
-
- if (thash_entries)
- goal = (thash_entries *
- sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
- for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
- ;
- do {
- unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
- sizeof(struct inet_ehash_bucket);
-
- while (hash_size & (hash_size - 1))
- hash_size--;
- dccp_hashinfo.ehash_mask = hash_size - 1;
- dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
- __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
- } while (!dccp_hashinfo.ehash && --ehash_order > 0);
-
- if (!dccp_hashinfo.ehash) {
- DCCP_CRIT("Failed to allocate DCCP established hash table");
- goto out_free_bind2_bucket_cachep;
- }
-
- for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
- INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
-
- if (inet_ehash_locks_alloc(&dccp_hashinfo))
- goto out_free_dccp_ehash;
-
- bhash_order = ehash_order;
-
- do {
- dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
- sizeof(struct inet_bind_hashbucket);
- if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
- bhash_order > 0)
- continue;
- dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
- __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
- } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
-
- if (!dccp_hashinfo.bhash) {
- DCCP_CRIT("Failed to allocate DCCP bind hash table");
- goto out_free_dccp_locks;
- }
-
- dccp_hashinfo.bhash2 = (struct inet_bind_hashbucket *)
- __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
-
- if (!dccp_hashinfo.bhash2) {
- DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
- goto out_free_dccp_bhash;
- }
-
- for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
- spin_lock_init(&dccp_hashinfo.bhash[i].lock);
- INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
- spin_lock_init(&dccp_hashinfo.bhash2[i].lock);
- INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
- }
-
- dccp_hashinfo.pernet = false;
-
- rc = dccp_mib_init();
- if (rc)
- goto out_free_dccp_bhash2;
-
- rc = dccp_ackvec_init();
- if (rc)
- goto out_free_dccp_mib;
-
- rc = dccp_sysctl_init();
- if (rc)
- goto out_ackvec_exit;
-
- rc = ccid_initialize_builtins();
- if (rc)
- goto out_sysctl_exit;
-
- dccp_timestamping_init();
-
- return 0;
-
-out_sysctl_exit:
- dccp_sysctl_exit();
-out_ackvec_exit:
- dccp_ackvec_exit();
-out_free_dccp_mib:
- dccp_mib_exit();
-out_free_dccp_bhash2:
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
-out_free_dccp_bhash:
- free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
-out_free_dccp_locks:
- inet_ehash_locks_free(&dccp_hashinfo);
-out_free_dccp_ehash:
- free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
-out_free_bind2_bucket_cachep:
- kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
-out_free_bind_bucket_cachep:
- kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
-out_free_hashinfo2:
- inet_hashinfo2_free_mod(&dccp_hashinfo);
-out_fail:
- dccp_hashinfo.bhash = NULL;
- dccp_hashinfo.bhash2 = NULL;
- dccp_hashinfo.ehash = NULL;
- dccp_hashinfo.bind_bucket_cachep = NULL;
- dccp_hashinfo.bind2_bucket_cachep = NULL;
- return rc;
-}
-
-static void __exit dccp_fini(void)
-{
- int bhash_order = get_order(dccp_hashinfo.bhash_size *
- sizeof(struct inet_bind_hashbucket));
-
- ccid_cleanup_builtins();
- dccp_mib_exit();
- free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
- free_pages((unsigned long)dccp_hashinfo.ehash,
- get_order((dccp_hashinfo.ehash_mask + 1) *
- sizeof(struct inet_ehash_bucket)));
- inet_ehash_locks_free(&dccp_hashinfo);
- kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
- dccp_ackvec_exit();
- dccp_sysctl_exit();
- inet_hashinfo2_free_mod(&dccp_hashinfo);
-}
-
-module_init(dccp_init);
-module_exit(dccp_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
-MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/qpolicy.c b/net/dccp/qpolicy.c
deleted file mode 100644
index 5ba204ec0aca..000000000000
--- a/net/dccp/qpolicy.c
+++ /dev/null
@@ -1,136 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * net/dccp/qpolicy.c
- *
- * Policy-based packet dequeueing interface for DCCP.
- *
- * Copyright (c) 2008 Tomasz Grobelny <tomasz@grobelny.oswiecenia.net>
- */
-#include "dccp.h"
-
-/*
- * Simple Dequeueing Policy:
- * If tx_qlen is different from 0, enqueue up to tx_qlen elements.
- */
-static void qpolicy_simple_push(struct sock *sk, struct sk_buff *skb)
-{
- skb_queue_tail(&sk->sk_write_queue, skb);
-}
-
-static bool qpolicy_simple_full(struct sock *sk)
-{
- return dccp_sk(sk)->dccps_tx_qlen &&
- sk->sk_write_queue.qlen >= dccp_sk(sk)->dccps_tx_qlen;
-}
-
-static struct sk_buff *qpolicy_simple_top(struct sock *sk)
-{
- return skb_peek(&sk->sk_write_queue);
-}
-
-/*
- * Priority-based Dequeueing Policy:
- * If tx_qlen is different from 0 and the queue has reached its upper bound
- * of tx_qlen elements, replace older packets lowest-priority-first.
- */
-static struct sk_buff *qpolicy_prio_best_skb(struct sock *sk)
-{
- struct sk_buff *skb, *best = NULL;
-
- skb_queue_walk(&sk->sk_write_queue, skb)
- if (best == NULL || skb->priority > best->priority)
- best = skb;
- return best;
-}
-
-static struct sk_buff *qpolicy_prio_worst_skb(struct sock *sk)
-{
- struct sk_buff *skb, *worst = NULL;
-
- skb_queue_walk(&sk->sk_write_queue, skb)
- if (worst == NULL || skb->priority < worst->priority)
- worst = skb;
- return worst;
-}
-
-static bool qpolicy_prio_full(struct sock *sk)
-{
- if (qpolicy_simple_full(sk))
- dccp_qpolicy_drop(sk, qpolicy_prio_worst_skb(sk));
- return false;
-}
-
-/**
- * struct dccp_qpolicy_operations - TX Packet Dequeueing Interface
- * @push: add a new @skb to the write queue
- * @full: indicates that no more packets will be admitted
- * @top: peeks at whatever the queueing policy defines as its `top'
- * @params: parameter passed to policy operation
- */
-struct dccp_qpolicy_operations {
- void (*push) (struct sock *sk, struct sk_buff *skb);
- bool (*full) (struct sock *sk);
- struct sk_buff* (*top) (struct sock *sk);
- __be32 params;
-};
-
-static struct dccp_qpolicy_operations qpol_table[DCCPQ_POLICY_MAX] = {
- [DCCPQ_POLICY_SIMPLE] = {
- .push = qpolicy_simple_push,
- .full = qpolicy_simple_full,
- .top = qpolicy_simple_top,
- .params = 0,
- },
- [DCCPQ_POLICY_PRIO] = {
- .push = qpolicy_simple_push,
- .full = qpolicy_prio_full,
- .top = qpolicy_prio_best_skb,
- .params = DCCP_SCM_PRIORITY,
- },
-};
-
-/*
- * Externally visible interface
- */
-void dccp_qpolicy_push(struct sock *sk, struct sk_buff *skb)
-{
- qpol_table[dccp_sk(sk)->dccps_qpolicy].push(sk, skb);
-}
-
-bool dccp_qpolicy_full(struct sock *sk)
-{
- return qpol_table[dccp_sk(sk)->dccps_qpolicy].full(sk);
-}
-
-void dccp_qpolicy_drop(struct sock *sk, struct sk_buff *skb)
-{
- if (skb != NULL) {
- skb_unlink(skb, &sk->sk_write_queue);
- kfree_skb(skb);
- }
-}
-
-struct sk_buff *dccp_qpolicy_top(struct sock *sk)
-{
- return qpol_table[dccp_sk(sk)->dccps_qpolicy].top(sk);
-}
-
-struct sk_buff *dccp_qpolicy_pop(struct sock *sk)
-{
- struct sk_buff *skb = dccp_qpolicy_top(sk);
-
- if (skb != NULL) {
- /* Clear any skb fields that we used internally */
- skb->priority = 0;
- skb_unlink(skb, &sk->sk_write_queue);
- }
- return skb;
-}
-
-bool dccp_qpolicy_param_ok(struct sock *sk, __be32 param)
-{
- /* check if exactly one bit is set */
- if (!param || (param & (param - 1)))
- return false;
- return (qpol_table[dccp_sk(sk)->dccps_qpolicy].params & param) == param;
-}
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
deleted file mode 100644
index b15845fd6300..000000000000
--- a/net/dccp/sysctl.c
+++ /dev/null
@@ -1,107 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * net/dccp/sysctl.c
- *
- * An implementation of the DCCP protocol
- * Arnaldo Carvalho de Melo <acme@mandriva.com>
- */
-
-#include <linux/mm.h>
-#include <linux/sysctl.h>
-#include "dccp.h"
-#include "feat.h"
-
-/* Boundary values */
-static int u8_max = 0xFF;
-static unsigned long seqw_min = DCCPF_SEQ_WMIN,
- seqw_max = 0xFFFFFFFF; /* maximum on 32 bit */
-
-static struct ctl_table dccp_default_table[] = {
- {
- .procname = "seq_window",
- .data = &sysctl_dccp_sequence_window,
- .maxlen = sizeof(sysctl_dccp_sequence_window),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax,
- .extra1 = &seqw_min, /* RFC 4340, 7.5.2 */
- .extra2 = &seqw_max,
- },
- {
- .procname = "rx_ccid",
- .data = &sysctl_dccp_rx_ccid,
- .maxlen = sizeof(sysctl_dccp_rx_ccid),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &u8_max, /* RFC 4340, 10. */
- },
- {
- .procname = "tx_ccid",
- .data = &sysctl_dccp_tx_ccid,
- .maxlen = sizeof(sysctl_dccp_tx_ccid),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &u8_max, /* RFC 4340, 10. */
- },
- {
- .procname = "request_retries",
- .data = &sysctl_dccp_request_retries,
- .maxlen = sizeof(sysctl_dccp_request_retries),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ONE,
- .extra2 = &u8_max,
- },
- {
- .procname = "retries1",
- .data = &sysctl_dccp_retries1,
- .maxlen = sizeof(sysctl_dccp_retries1),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &u8_max,
- },
- {
- .procname = "retries2",
- .data = &sysctl_dccp_retries2,
- .maxlen = sizeof(sysctl_dccp_retries2),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = &u8_max,
- },
- {
- .procname = "tx_qlen",
- .data = &sysctl_dccp_tx_qlen,
- .maxlen = sizeof(sysctl_dccp_tx_qlen),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- },
- {
- .procname = "sync_ratelimit",
- .data = &sysctl_dccp_sync_ratelimit,
- .maxlen = sizeof(sysctl_dccp_sync_ratelimit),
- .mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
- },
-};
-
-static struct ctl_table_header *dccp_table_header;
-
-int __init dccp_sysctl_init(void)
-{
- dccp_table_header = register_net_sysctl(&init_net, "net/dccp/default",
- dccp_default_table);
-
- return dccp_table_header != NULL ? 0 : -ENOMEM;
-}
-
-void dccp_sysctl_exit(void)
-{
- if (dccp_table_header != NULL) {
- unregister_net_sysctl_table(dccp_table_header);
- dccp_table_header = NULL;
- }
-}
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
deleted file mode 100644
index 232ac4ae0a73..000000000000
--- a/net/dccp/timer.c
+++ /dev/null
@@ -1,272 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/dccp/timer.c
- *
- * An implementation of the DCCP protocol
- * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
- */
-
-#include <linux/dccp.h>
-#include <linux/skbuff.h>
-#include <linux/export.h>
-
-#include "dccp.h"
-
-/* sysctl variables governing numbers of retransmission attempts */
-int sysctl_dccp_request_retries __read_mostly = TCP_SYN_RETRIES;
-int sysctl_dccp_retries1 __read_mostly = TCP_RETR1;
-int sysctl_dccp_retries2 __read_mostly = TCP_RETR2;
-
-static void dccp_write_err(struct sock *sk)
-{
- sk->sk_err = READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT;
- sk_error_report(sk);
-
- dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
- dccp_done(sk);
- __DCCP_INC_STATS(DCCP_MIB_ABORTONTIMEOUT);
-}
-
-/* A write timeout has occurred. Process the after effects. */
-static int dccp_write_timeout(struct sock *sk)
-{
- const struct inet_connection_sock *icsk = inet_csk(sk);
- int retry_until;
-
- if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
- if (icsk->icsk_retransmits != 0)
- dst_negative_advice(sk);
- retry_until = icsk->icsk_syn_retries ?
- : sysctl_dccp_request_retries;
- } else {
- if (icsk->icsk_retransmits >= sysctl_dccp_retries1) {
- /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu
- black hole detection. :-(
-
- It is place to make it. It is not made. I do not want
- to make it. It is disguisting. It does not work in any
- case. Let me to cite the same draft, which requires for
- us to implement this:
-
- "The one security concern raised by this memo is that ICMP black holes
- are often caused by over-zealous security administrators who block
- all ICMP messages. It is vitally important that those who design and
- deploy security systems understand the impact of strict filtering on
- upper-layer protocols. The safest web site in the world is worthless
- if most TCP implementations cannot transfer data from it. It would
- be far nicer to have all of the black holes fixed rather than fixing
- all of the TCP implementations."
-
- Golden words :-).
- */
-
- dst_negative_advice(sk);
- }
-
- retry_until = sysctl_dccp_retries2;
- /*
- * FIXME: see tcp_write_timout and tcp_out_of_resources
- */
- }
-
- if (icsk->icsk_retransmits >= retry_until) {
- /* Has it gone just too far? */
- dccp_write_err(sk);
- return 1;
- }
- return 0;
-}
-
-/*
- * The DCCP retransmit timer.
- */
-static void dccp_retransmit_timer(struct sock *sk)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- /*
- * More than 4MSL (8 minutes) has passed, a RESET(aborted) was
- * sent, no need to retransmit, this sock is dead.
- */
- if (dccp_write_timeout(sk))
- return;
-
- /*
- * We want to know the number of packets retransmitted, not the
- * total number of retransmissions of clones of original packets.
- */
- if (icsk->icsk_retransmits == 0)
- __DCCP_INC_STATS(DCCP_MIB_TIMEOUTS);
-
- if (dccp_retransmit_skb(sk) != 0) {
- /*
- * Retransmission failed because of local congestion,
- * do not backoff.
- */
- if (--icsk->icsk_retransmits == 0)
- icsk->icsk_retransmits = 1;
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- min(icsk->icsk_rto,
- TCP_RESOURCE_PROBE_INTERVAL),
- DCCP_RTO_MAX);
- return;
- }
-
- icsk->icsk_backoff++;
-
- icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto,
- DCCP_RTO_MAX);
- if (icsk->icsk_retransmits > sysctl_dccp_retries1)
- __sk_dst_reset(sk);
-}
-
-static void dccp_write_timer(struct timer_list *t)
-{
- struct inet_connection_sock *icsk =
- from_timer(icsk, t, icsk_retransmit_timer);
- struct sock *sk = &icsk->icsk_inet.sk;
- int event = 0;
-
- bh_lock_sock(sk);
- if (sock_owned_by_user(sk)) {
- /* Try again later */
- sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
- jiffies + (HZ / 20));
- goto out;
- }
-
- if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending)
- goto out;
-
- if (time_after(icsk_timeout(icsk), jiffies)) {
- sk_reset_timer(sk, &icsk->icsk_retransmit_timer,
- icsk_timeout(icsk));
- goto out;
- }
-
- event = icsk->icsk_pending;
- icsk->icsk_pending = 0;
-
- switch (event) {
- case ICSK_TIME_RETRANS:
- dccp_retransmit_timer(sk);
- break;
- }
-out:
- bh_unlock_sock(sk);
- sock_put(sk);
-}
-
-static void dccp_keepalive_timer(struct timer_list *t)
-{
- struct sock *sk = from_timer(sk, t, sk_timer);
-
- pr_err("dccp should not use a keepalive timer !\n");
- sock_put(sk);
-}
-
-/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
-static void dccp_delack_timer(struct timer_list *t)
-{
- struct inet_connection_sock *icsk =
- from_timer(icsk, t, icsk_delack_timer);
- struct sock *sk = &icsk->icsk_inet.sk;
-
- bh_lock_sock(sk);
- if (sock_owned_by_user(sk)) {
- /* Try again later. */
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
- sk_reset_timer(sk, &icsk->icsk_delack_timer,
- jiffies + TCP_DELACK_MIN);
- goto out;
- }
-
- if (sk->sk_state == DCCP_CLOSED ||
- !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
- goto out;
- if (time_after(icsk_delack_timeout(icsk), jiffies)) {
- sk_reset_timer(sk, &icsk->icsk_delack_timer,
- icsk_delack_timeout(icsk));
- goto out;
- }
-
- icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
-
- if (inet_csk_ack_scheduled(sk)) {
- if (!inet_csk_in_pingpong_mode(sk)) {
- /* Delayed ACK missed: inflate ATO. */
- icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1,
- icsk->icsk_rto);
- } else {
- /* Delayed ACK missed: leave pingpong mode and
- * deflate ATO.
- */
- inet_csk_exit_pingpong_mode(sk);
- icsk->icsk_ack.ato = TCP_ATO_MIN;
- }
- dccp_send_ack(sk);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKS);
- }
-out:
- bh_unlock_sock(sk);
- sock_put(sk);
-}
-
-/**
- * dccp_write_xmitlet - Workhorse for CCID packet dequeueing interface
- * @t: pointer to the tasklet associated with this handler
- *
- * See the comments above %ccid_dequeueing_decision for supported modes.
- */
-static void dccp_write_xmitlet(struct tasklet_struct *t)
-{
- struct dccp_sock *dp = from_tasklet(dp, t, dccps_xmitlet);
- struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk;
-
- bh_lock_sock(sk);
- if (sock_owned_by_user(sk))
- sk_reset_timer(sk, &dccp_sk(sk)->dccps_xmit_timer, jiffies + 1);
- else
- dccp_write_xmit(sk);
- bh_unlock_sock(sk);
- sock_put(sk);
-}
-
-static void dccp_write_xmit_timer(struct timer_list *t)
-{
- struct dccp_sock *dp = from_timer(dp, t, dccps_xmit_timer);
-
- dccp_write_xmitlet(&dp->dccps_xmitlet);
-}
-
-void dccp_init_xmit_timers(struct sock *sk)
-{
- struct dccp_sock *dp = dccp_sk(sk);
-
- tasklet_setup(&dp->dccps_xmitlet, dccp_write_xmitlet);
- timer_setup(&dp->dccps_xmit_timer, dccp_write_xmit_timer, 0);
- inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
- &dccp_keepalive_timer);
-}
-
-static ktime_t dccp_timestamp_seed;
-/**
- * dccp_timestamp - 10s of microseconds time source
- * Returns the number of 10s of microseconds since loading DCCP. This is native
- * DCCP time difference format (RFC 4340, sec. 13).
- * Please note: This will wrap around about circa every 11.9 hours.
- */
-u32 dccp_timestamp(void)
-{
- u64 delta = (u64)ktime_us_delta(ktime_get_real(), dccp_timestamp_seed);
-
- do_div(delta, 10);
- return delta;
-}
-EXPORT_SYMBOL_GPL(dccp_timestamp);
-
-void __init dccp_timestamping_init(void)
-{
- dccp_timestamp_seed = ktime_get_real();
-}
diff --git a/net/dccp/trace.h b/net/dccp/trace.h
deleted file mode 100644
index 5a43b3508c7f..000000000000
--- a/net/dccp/trace.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM dccp
-
-#if !defined(_TRACE_DCCP_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_DCCP_H
-
-#include <net/sock.h>
-#include "dccp.h"
-#include "ccids/ccid3.h"
-#include <linux/tracepoint.h>
-#include <trace/events/net_probe_common.h>
-
-TRACE_EVENT(dccp_probe,
-
- TP_PROTO(struct sock *sk, size_t size),
-
- TP_ARGS(sk, size),
-
- TP_STRUCT__entry(
- /* sockaddr_in6 is always bigger than sockaddr_in */
- __array(__u8, saddr, sizeof(struct sockaddr_in6))
- __array(__u8, daddr, sizeof(struct sockaddr_in6))
- __field(__u16, sport)
- __field(__u16, dport)
- __field(__u16, size)
- __field(__u16, tx_s)
- __field(__u32, tx_rtt)
- __field(__u32, tx_p)
- __field(__u32, tx_x_calc)
- __field(__u64, tx_x_recv)
- __field(__u64, tx_x)
- __field(__u32, tx_t_ipi)
- ),
-
- TP_fast_assign(
- const struct inet_sock *inet = inet_sk(sk);
- struct ccid3_hc_tx_sock *hc = NULL;
-
- if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
- hc = ccid3_hc_tx_sk(sk);
-
- memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
- memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
-
- TP_STORE_ADDR_PORTS(__entry, inet, sk);
-
- /* For filtering use */
- __entry->sport = ntohs(inet->inet_sport);
- __entry->dport = ntohs(inet->inet_dport);
-
- __entry->size = size;
- if (hc) {
- __entry->tx_s = hc->tx_s;
- __entry->tx_rtt = hc->tx_rtt;
- __entry->tx_p = hc->tx_p;
- __entry->tx_x_calc = hc->tx_x_calc;
- __entry->tx_x_recv = hc->tx_x_recv >> 6;
- __entry->tx_x = hc->tx_x >> 6;
- __entry->tx_t_ipi = hc->tx_t_ipi;
- } else {
- __entry->tx_s = 0;
- memset_startat(__entry, 0, tx_rtt);
- }
- ),
-
- TP_printk("src=%pISpc dest=%pISpc size=%d tx_s=%d tx_rtt=%d "
- "tx_p=%d tx_x_calc=%u tx_x_recv=%llu tx_x=%llu tx_t_ipi=%d",
- __entry->saddr, __entry->daddr, __entry->size,
- __entry->tx_s, __entry->tx_rtt, __entry->tx_p,
- __entry->tx_x_calc, __entry->tx_x_recv, __entry->tx_x,
- __entry->tx_t_ipi)
-);
-
-#endif /* _TRACE_TCP_H */
-
-/* This part must be outside protection */
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE trace
-#include <trace/define_trace.h>
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index d6e3db300acb..02602704bdea 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -775,7 +775,7 @@ static int devlink_info_version_put(struct devlink_info_req *req, int attr,
req->version_cb(version_name, version_type,
req->version_cb_priv);
- if (!req->msg)
+ if (!req->msg || !*version_value)
return 0;
nest = nla_nest_start_noflag(req->msg, attr);
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 1b1b700ec05e..0d1e56965af0 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -761,6 +761,11 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
if (res)
goto err_unregister;
+ if (protocol_version == PRP_V1) {
+ eth_hw_addr_set(slave[1], slave[0]->dev_addr);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR, slave[1]);
+ }
+
if (interlink) {
res = hsr_add_port(hsr, interlink, HSR_PT_INTERLINK, extack);
if (res)
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index d7ae32473c41..192893c3f2ec 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -78,6 +78,15 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
eth_hw_addr_set(master->dev, dev->dev_addr);
call_netdevice_notifiers(NETDEV_CHANGEADDR,
master->dev);
+
+ if (hsr->prot_version == PRP_V1) {
+ port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
+ if (port) {
+ eth_hw_addr_set(port->dev, dev->dev_addr);
+ call_netdevice_notifiers(NETDEV_CHANGEADDR,
+ port->dev);
+ }
+ }
}
/* Make sure we recognize frames from ourselves in hsr_rcv() */
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 1bc47b17a296..135ec5fce019 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -155,6 +155,7 @@ struct hsr_port {
struct hsr_priv *hsr;
enum hsr_port_type type;
struct rcu_head rcu;
+ unsigned char original_macaddress[ETH_ALEN];
};
struct hsr_frame_info;
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index 2a802a5de2ac..b87b6a6fe070 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -196,6 +196,7 @@ int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev,
port->hsr = hsr;
port->dev = dev;
port->type = type;
+ ether_addr_copy(port->original_macaddress, dev->dev_addr);
if (type != HSR_PT_MASTER) {
res = hsr_portdev_setup(hsr, dev, port, extack);
@@ -232,6 +233,7 @@ void hsr_del_port(struct hsr_port *port)
if (!port->hsr->fwd_offloaded)
dev_set_promiscuity(port->dev, -1);
netdev_upper_dev_unlink(port->dev, master->dev);
+ eth_hw_addr_set(port->dev, port->original_macaddress);
}
kfree_rcu(port, rcu);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 6d2c97f8e9ef..12850a277251 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -425,7 +425,7 @@ config INET_DIAG
tristate "INET: socket monitoring interface"
default y
help
- Support for INET (TCP, DCCP, etc) socket monitoring interface used by
+ Support for INET (TCP, UDP, etc) socket monitoring interface used by
native Linux tools such as ss. ss is included in iproute2, currently
downloadable at:
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5df1f1325259..76e38092cd8a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1328,10 +1328,7 @@ int inet_sk_rebuild_header(struct sock *sk)
/* Routing failed... */
sk->sk_route_caps = 0;
- /*
- * Other protocols have to map its equivalent state to TCP_SYN_SENT.
- * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
- */
+
if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) ||
sk->sk_state != TCP_SYN_SENT ||
(sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 77e5705ac799..c47d3828d4f6 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1792,12 +1792,12 @@ static int inet_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
struct ifaddrmsg *ifm;
int err, i;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ ifm = nlmsg_payload(nlh, sizeof(*ifm));
+ if (!ifm) {
NL_SET_ERR_MSG(extack, "ipv4: Invalid header for address dump request");
return -EINVAL;
}
- ifm = nlmsg_data(nlh);
if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for address dump request");
return -EINVAL;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3f4e629998fa..57f088e5540e 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -948,12 +948,12 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
if (filter->rtnl_held)
ASSERT_RTNL();
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+ rtm = nlmsg_payload(nlh, sizeof(*rtm));
+ if (!rtm) {
NL_SET_ERR_MSG(extack, "Invalid header for FIB dump request");
return -EINVAL;
}
- rtm = nlmsg_data(nlh);
if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos ||
rtm->rtm_scope) {
NL_SET_ERR_MSG(extack, "Invalid values in header for FIB dump request");
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index dd5cf8914a28..20915895bdaa 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -330,7 +330,7 @@ inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret,
struct inet_bind2_bucket **tb2_ret,
struct inet_bind_hashbucket **head2_ret, int *port_ret)
{
- struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
+ struct inet_hashinfo *hinfo = tcp_get_hashinfo(sk);
int i, low, high, attempt_half, port, l3mdev;
struct inet_bind_hashbucket *head, *head2;
struct net *net = sock_net(sk);
@@ -512,10 +512,10 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
*/
int inet_csk_get_port(struct sock *sk, unsigned short snum)
{
- struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
bool found_port = false, check_bind_conflict = true;
bool bhash_created = false, bhash2_created = false;
+ struct inet_hashinfo *hinfo = tcp_get_hashinfo(sk);
int ret = -EADDRINUSE, port = snum, l3mdev;
struct inet_bind_hashbucket *head, *head2;
struct inet_bind2_bucket *tb2 = NULL;
@@ -767,7 +767,6 @@ void inet_csk_init_xmit_timers(struct sock *sk,
timer_setup(&sk->sk_timer, keepalive_handler, 0);
icsk->icsk_pending = icsk->icsk_ack.pending = 0;
}
-EXPORT_SYMBOL(inet_csk_init_xmit_timers);
void inet_csk_clear_xmit_timers(struct sock *sk)
{
@@ -780,7 +779,6 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
sk_stop_timer(sk, &icsk->icsk_delack_timer);
sk_stop_timer(sk, &sk->sk_timer);
}
-EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
void inet_csk_clear_xmit_timers_sync(struct sock *sk)
{
@@ -831,7 +829,6 @@ no_route:
__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
-EXPORT_SYMBOL_GPL(inet_csk_route_req);
struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
struct sock *newsk,
@@ -898,7 +895,6 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
req->num_retrans++;
return err;
}
-EXPORT_SYMBOL(inet_rtx_syn_ack);
static struct request_sock *
reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener,
@@ -1026,9 +1022,10 @@ static bool reqsk_queue_unlink(struct request_sock *req)
bool found = false;
if (sk_hashed(sk)) {
- struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
- spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
+ struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk);
+ spinlock_t *lock;
+ lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
spin_lock(lock);
found = __sk_nulls_del_node_init_rcu(sk);
spin_unlock(lock);
@@ -1058,14 +1055,13 @@ bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
{
return __inet_csk_reqsk_queue_drop(sk, req, false);
}
-EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
{
inet_csk_reqsk_queue_drop(sk, req);
reqsk_put(req);
}
-EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
+EXPORT_IPV6_MOD(inet_csk_reqsk_queue_drop_and_put);
static void reqsk_timer_handler(struct timer_list *t)
{
@@ -1209,7 +1205,6 @@ bool inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
inet_csk_reqsk_queue_added(sk);
return true;
}
-EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk,
const gfp_t priority)
@@ -1290,7 +1285,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
return newsk;
}
-EXPORT_SYMBOL_GPL(inet_csk_clone_lock);
/*
* At this point, there should be no process reference to this
@@ -1322,7 +1316,7 @@ void inet_csk_destroy_sock(struct sock *sk)
EXPORT_SYMBOL(inet_csk_destroy_sock);
/* This function allows to force a closure of a socket after the call to
- * tcp/dccp_create_openreq_child().
+ * tcp_create_openreq_child().
*/
void inet_csk_prepare_forced_close(struct sock *sk)
__releases(&sk->sk_lock.slock)
@@ -1380,7 +1374,6 @@ int inet_csk_listen_start(struct sock *sk)
inet_sk_set_state(sk, TCP_CLOSE);
return err;
}
-EXPORT_SYMBOL_GPL(inet_csk_listen_start);
static void inet_child_forget(struct sock *sk, struct request_sock *req,
struct sock *child)
@@ -1475,7 +1468,6 @@ child_put:
sock_put(child);
return NULL;
}
-EXPORT_SYMBOL(inet_csk_complete_hashdance);
/*
* This routine closes sockets which have been at least partially
@@ -1590,4 +1582,3 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu)
out:
return dst;
}
-EXPORT_SYMBOL_GPL(inet_csk_update_pmtu);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index c2bb91d9e9ff..907bad776b42 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1369,8 +1369,6 @@ static int inet_diag_type2proto(int type)
switch (type) {
case TCPDIAG_GETSOCK:
return IPPROTO_TCP;
- case DCCPDIAG_GETSOCK:
- return IPPROTO_DCCP;
default:
return 0;
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 5bf163f756e9..da85cc30e382 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -176,7 +176,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
*/
static void __inet_put_port(struct sock *sk)
{
- struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
+ struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk);
struct inet_bind_hashbucket *head, *head2;
struct net *net = sock_net(sk);
struct inet_bind_bucket *tb;
@@ -215,7 +215,7 @@ EXPORT_SYMBOL(inet_put_port);
int __inet_inherit_port(const struct sock *sk, struct sock *child)
{
- struct inet_hashinfo *table = tcp_or_dccp_get_hashinfo(sk);
+ struct inet_hashinfo *table = tcp_get_hashinfo(sk);
unsigned short port = inet_sk(child)->inet_num;
struct inet_bind_hashbucket *head, *head2;
bool created_inet_bind_bucket = false;
@@ -668,7 +668,7 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk,
*/
bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
{
- struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
+ struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk);
struct inet_ehash_bucket *head;
struct hlist_nulls_head *list;
spinlock_t *lock;
@@ -713,7 +713,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
}
return ok;
}
-EXPORT_SYMBOL_GPL(inet_ehash_nolisten);
+EXPORT_IPV6_MOD(inet_ehash_nolisten);
static int inet_reuseport_add_sock(struct sock *sk,
struct inet_listen_hashbucket *ilb)
@@ -740,7 +740,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
int __inet_hash(struct sock *sk, struct sock *osk)
{
- struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
+ struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk);
struct inet_listen_hashbucket *ilb2;
int err = 0;
@@ -771,7 +771,7 @@ unlock:
return err;
}
-EXPORT_SYMBOL(__inet_hash);
+EXPORT_IPV6_MOD(__inet_hash);
int inet_hash(struct sock *sk)
{
@@ -782,11 +782,10 @@ int inet_hash(struct sock *sk)
return err;
}
-EXPORT_SYMBOL_GPL(inet_hash);
void inet_unhash(struct sock *sk)
{
- struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk);
+ struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk);
if (sk_unhashed(sk))
return;
@@ -823,7 +822,7 @@ void inet_unhash(struct sock *sk)
spin_unlock_bh(lock);
}
}
-EXPORT_SYMBOL_GPL(inet_unhash);
+EXPORT_IPV6_MOD(inet_unhash);
static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb,
const struct net *net, unsigned short port,
@@ -874,7 +873,7 @@ inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, const struct net
struct inet_bind_hashbucket *
inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port)
{
- struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
+ struct inet_hashinfo *hinfo = tcp_get_hashinfo(sk);
u32 hash;
#if IS_ENABLED(CONFIG_IPV6)
@@ -902,7 +901,7 @@ static void inet_update_saddr(struct sock *sk, void *saddr, int family)
static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, bool reset)
{
- struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk);
+ struct inet_hashinfo *hinfo = tcp_get_hashinfo(sk);
struct inet_bind_hashbucket *head, *head2;
struct inet_bind2_bucket *tb2, *new_tb2;
int l3mdev = inet_sk_bound_l3mdev(sk);
@@ -982,14 +981,14 @@ int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family)
{
return __inet_bhash2_update_saddr(sk, saddr, family, false);
}
-EXPORT_SYMBOL_GPL(inet_bhash2_update_saddr);
+EXPORT_IPV6_MOD(inet_bhash2_update_saddr);
void inet_bhash2_reset_saddr(struct sock *sk)
{
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
__inet_bhash2_update_saddr(sk, NULL, 0, true);
}
-EXPORT_SYMBOL_GPL(inet_bhash2_reset_saddr);
+EXPORT_IPV6_MOD(inet_bhash2_reset_saddr);
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
* Note that we use 32bit integers (vs RFC 'short integers')
@@ -1214,7 +1213,6 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row,
return __inet_hash_connect(death_row, sk, port_offset, hash_port0,
__inet_check_established);
}
-EXPORT_SYMBOL_GPL(inet_hash_connect);
static void init_hashinfo_lhash2(struct inet_hashinfo *h)
{
@@ -1265,7 +1263,6 @@ int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
init_hashinfo_lhash2(h);
return 0;
}
-EXPORT_SYMBOL_GPL(inet_hashinfo2_init_mod);
int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
{
@@ -1305,7 +1302,6 @@ set_mask:
hashinfo->ehash_locks_mask = nblocks - 1;
return 0;
}
-EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc);
struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo,
unsigned int ehash_entries)
@@ -1341,7 +1337,6 @@ free_hashinfo:
err:
return NULL;
}
-EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_alloc);
void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo)
{
@@ -1352,4 +1347,3 @@ void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo)
vfree(hashinfo->ehash);
kfree(hashinfo);
}
-EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_free);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index aded4bf1bc16..67efe9501581 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -166,7 +166,6 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw,
spin_unlock(lock);
local_bh_enable();
}
-EXPORT_SYMBOL_GPL(inet_twsk_hashdance_schedule);
static void tw_timer_handler(struct timer_list *t)
{
@@ -223,7 +222,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
return tw;
}
-EXPORT_SYMBOL_GPL(inet_twsk_alloc);
/* These are always called from BH context. See callers in
* tcp_input.c to verify this.
@@ -306,7 +304,6 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
mod_timer_pending(&tw->tw_timer, jiffies + timeo);
}
}
-EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */
void inet_twsk_purge(struct inet_hashinfo *hashinfo)
@@ -365,4 +362,3 @@ restart:
rcu_read_unlock();
}
}
-EXPORT_SYMBOL_GPL(inet_twsk_purge);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 26d15f907551..f5b9004d6938 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1066,16 +1066,15 @@ static int __net_init ipgre_init_net(struct net *net)
return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
}
-static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net,
- struct list_head *dev_to_kill)
+static void __net_exit ipgre_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops,
- dev_to_kill);
+ ip_tunnel_delete_net(net, ipgre_net_id, &ipgre_link_ops, dev_to_kill);
}
static struct pernet_operations ipgre_net_ops = {
.init = ipgre_init_net,
- .exit_batch_rtnl = ipgre_exit_batch_rtnl,
+ .exit_rtnl = ipgre_exit_rtnl,
.id = &ipgre_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1752,16 +1751,15 @@ static int __net_init ipgre_tap_init_net(struct net *net)
return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
}
-static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net,
- struct list_head *dev_to_kill)
+static void __net_exit ipgre_tap_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops,
- dev_to_kill);
+ ip_tunnel_delete_net(net, gre_tap_net_id, &ipgre_tap_ops, dev_to_kill);
}
static struct pernet_operations ipgre_tap_net_ops = {
.init = ipgre_tap_init_net,
- .exit_batch_rtnl = ipgre_tap_exit_batch_rtnl,
+ .exit_rtnl = ipgre_tap_exit_rtnl,
.id = &gre_tap_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1772,16 +1770,15 @@ static int __net_init erspan_init_net(struct net *net)
&erspan_link_ops, "erspan0");
}
-static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
+static void __net_exit erspan_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops,
- dev_to_kill);
+ ip_tunnel_delete_net(net, erspan_net_id, &erspan_link_ops, dev_to_kill);
}
static struct pernet_operations erspan_net_ops = {
.init = erspan_init_net,
- .exit_batch_rtnl = erspan_exit_batch_rtnl,
+ .exit_rtnl = erspan_exit_rtnl,
.id = &erspan_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 1024f961ec9a..3913ec89ad20 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -1174,13 +1174,16 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
}
EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
-static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
- struct list_head *head,
- struct rtnl_link_ops *ops)
+void ip_tunnel_delete_net(struct net *net, unsigned int id,
+ struct rtnl_link_ops *ops,
+ struct list_head *head)
{
+ struct ip_tunnel_net *itn = net_generic(net, id);
struct net_device *dev, *aux;
int h;
+ ASSERT_RTNL_NET(net);
+
for_each_netdev_safe(net, dev, aux)
if (dev->rtnl_link_ops == ops)
unregister_netdevice_queue(dev, head);
@@ -1198,21 +1201,7 @@ static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
unregister_netdevice_queue(t->dev, head);
}
}
-
-void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
- struct rtnl_link_ops *ops,
- struct list_head *dev_to_kill)
-{
- struct ip_tunnel_net *itn;
- struct net *net;
-
- ASSERT_RTNL();
- list_for_each_entry(net, net_list, exit_list) {
- itn = net_generic(net, id);
- ip_tunnel_destroy(net, itn, dev_to_kill, ops);
- }
-}
-EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
+EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
int ip_tunnel_newlink(struct net *net, struct net_device *dev,
struct nlattr *tb[], struct ip_tunnel_parm_kern *p,
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 159b4473290e..686e4f3d83aa 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -523,16 +523,15 @@ static int __net_init vti_init_net(struct net *net)
return 0;
}
-static void __net_exit vti_exit_batch_rtnl(struct list_head *list_net,
- struct list_head *dev_to_kill)
+static void __net_exit vti_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops,
- dev_to_kill);
+ ip_tunnel_delete_net(net, vti_net_id, &vti_link_ops, dev_to_kill);
}
static struct pernet_operations vti_net_ops = {
.init = vti_init_net,
- .exit_batch_rtnl = vti_exit_batch_rtnl,
+ .exit_rtnl = vti_exit_rtnl,
.id = &vti_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index bab0bf90c908..3e03af073a1c 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -604,16 +604,15 @@ static int __net_init ipip_init_net(struct net *net)
return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
}
-static void __net_exit ipip_exit_batch_rtnl(struct list_head *list_net,
- struct list_head *dev_to_kill)
+static void __net_exit ipip_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
{
- ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops,
- dev_to_kill);
+ ip_tunnel_delete_net(net, ipip_net_id, &ipip_link_ops, dev_to_kill);
}
static struct pernet_operations ipip_net_ops = {
.init = ipip_init_net,
- .exit_batch_rtnl = ipip_exit_batch_rtnl,
+ .exit_rtnl = ipip_exit_rtnl,
.id = &ipip_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a8b04d4abcaa..3b5677d8467e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2511,7 +2511,8 @@ static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb,
struct rtmsg *rtm;
int i, err;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+ rtm = nlmsg_payload(nlh, sizeof(*rtm));
+ if (!rtm) {
NL_SET_ERR_MSG(extack, "ipv4: Invalid header for multicast route get request");
return -EINVAL;
}
@@ -2520,7 +2521,6 @@ static int ipmr_rtm_valid_getroute_req(struct sk_buff *skb,
return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_ipv4_policy, extack);
- rtm = nlmsg_data(nlh);
if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
(rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
rtm->rtm_tos || rtm->rtm_table || rtm->rtm_protocol ||
@@ -2836,7 +2836,8 @@ static int ipmr_valid_dumplink(const struct nlmsghdr *nlh,
{
struct ifinfomsg *ifm;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ ifm = nlmsg_payload(nlh, sizeof(*ifm));
+ if (!ifm) {
NL_SET_ERR_MSG(extack, "ipv4: Invalid header for ipmr link dump");
return -EINVAL;
}
@@ -2846,7 +2847,6 @@ static int ipmr_valid_dumplink(const struct nlmsghdr *nlh,
return -EINVAL;
}
- ifm = nlmsg_data(nlh);
if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
ifm->ifi_change || ifm->ifi_index) {
NL_SET_ERR_MSG(extack, "Invalid values in header for ipmr link dump request");
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 467151517023..d9cf06b297d1 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -4040,14 +4040,11 @@ out:
}
EXPORT_SYMBOL(nexthop_res_grp_activity_update);
-static void __net_exit nexthop_net_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
+static void __net_exit nexthop_net_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
{
- struct net *net;
-
- ASSERT_RTNL();
- list_for_each_entry(net, net_list, exit_list)
- flush_all_nexthops(net);
+ ASSERT_RTNL_NET(net);
+ flush_all_nexthops(net);
}
static void __net_exit nexthop_net_exit(struct net *net)
@@ -4072,7 +4069,7 @@ static int __net_init nexthop_net_init(struct net *net)
static struct pernet_operations nexthop_net_ops = {
.init = nexthop_net_init,
.exit = nexthop_net_exit,
- .exit_batch_rtnl = nexthop_net_exit_batch_rtnl,
+ .exit_rtnl = nexthop_net_exit_rtnl,
};
static const struct rtnl_msg_handler nexthop_rtnl_msg_handlers[] __initconst = {
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 10cbeb76c274..ea2f01584379 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -191,6 +191,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED),
SNMP_MIB_ITEM("TSEcrRejected", LINUX_MIB_TSECRREJECTED),
SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK),
+ SNMP_MIB_ITEM("PAWSTimewait", LINUX_MIB_PAWS_TW_REJECTED),
SNMP_MIB_ITEM("DelayedACKs", LINUX_MIB_DELAYEDACKS),
SNMP_MIB_ITEM("DelayedACKLocked", LINUX_MIB_DELAYEDACKLOCKED),
SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 753704f75b2c..49cffbe83802 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2699,8 +2699,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4,
if (fl4->saddr) {
if (ipv4_is_multicast(fl4->saddr) ||
- ipv4_is_lbcast(fl4->saddr) ||
- ipv4_is_zeronet(fl4->saddr)) {
+ ipv4_is_lbcast(fl4->saddr)) {
rth = ERR_PTR(-EINVAL);
goto out;
}
@@ -3206,7 +3205,8 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
struct rtmsg *rtm;
int i, err;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+ rtm = nlmsg_payload(nlh, sizeof(*rtm));
+ if (!rtm) {
NL_SET_ERR_MSG(extack,
"ipv4: Invalid header for route get request");
return -EINVAL;
@@ -3216,7 +3216,6 @@ static int inet_rtm_valid_getroute_req(struct sk_buff *skb,
return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_ipv4_policy, extack);
- rtm = nlmsg_data(nlh);
if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) ||
(rtm->rtm_dst_len && rtm->rtm_dst_len != 32) ||
rtm->rtm_table || rtm->rtm_protocol ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6edc441b3702..e0e96f8fd47c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1160,6 +1160,8 @@ restart:
if (skb)
copy = size_goal - skb->len;
+ trace_tcp_sendmsg_locked(sk, msg, skb, size_goal);
+
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
bool first_skb;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8cce0d5489da..d5b5c32115d2 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2417,7 +2417,8 @@ do_time_wait:
goto csum_error;
}
- tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn);
+ tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn,
+ &drop_reason);
switch (tw_status) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(net,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index fb9349be36b8..43d7852ce07e 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -97,7 +97,8 @@ static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq,
*/
enum tcp_tw_status
tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
- const struct tcphdr *th, u32 *tw_isn)
+ const struct tcphdr *th, u32 *tw_isn,
+ enum skb_drop_reason *drop_reason)
{
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
u32 rcv_nxt = READ_ONCE(tcptw->tw_rcv_nxt);
@@ -245,8 +246,10 @@ kill:
return TCP_TW_SYN;
}
- if (paws_reject)
- __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWSESTABREJECTED);
+ if (paws_reject) {
+ *drop_reason = SKB_DROP_REASON_TCP_RFC7323_TW_PAWS;
+ __NET_INC_STATS(twsk_net(tw), LINUX_MIB_PAWS_TW_REJECTED);
+ }
if (!th->rst) {
/* In this case we must reset the TIMEWAIT timer.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2742cc7602bb..f9f5b92cf4b6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1942,8 +1942,8 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
error = -EAGAIN;
do {
spin_lock_bh(&queue->lock);
- skb = __skb_try_recv_from_queue(sk, queue, flags, off,
- err, &last);
+ skb = __skb_try_recv_from_queue(queue, flags, off, err,
+ &last);
if (skb) {
if (!(flags & MSG_PEEK))
udp_skb_destructor(sk, skb);
@@ -1964,8 +1964,8 @@ struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags,
spin_lock(&sk_queue->lock);
skb_queue_splice_tail_init(sk_queue, queue);
- skb = __skb_try_recv_from_queue(sk, queue, flags, off,
- err, &last);
+ skb = __skb_try_recv_from_queue(queue, flags, off, err,
+ &last);
if (skb && !(flags & MSG_PEEK))
udp_skb_dtor_locked(sk, skb);
spin_unlock(&sk_queue->lock);
@@ -2897,20 +2897,40 @@ void udp_destroy_sock(struct sock *sk)
if (encap_destroy)
encap_destroy(sk);
}
- if (udp_test_bit(ENCAP_ENABLED, sk))
+ if (udp_test_bit(ENCAP_ENABLED, sk)) {
static_branch_dec(&udp_encap_needed_key);
+ udp_tunnel_cleanup_gro(sk);
+ }
}
}
+typedef struct sk_buff *(*udp_gro_receive_t)(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb);
+
static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
struct sock *sk)
{
#ifdef CONFIG_XFRM
+ udp_gro_receive_t new_gro_receive;
+
if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) {
- if (family == AF_INET)
- WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv);
- else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6)
- WRITE_ONCE(udp_sk(sk)->gro_receive, ipv6_stub->xfrm6_gro_udp_encap_rcv);
+ if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6)
+ new_gro_receive = ipv6_stub->xfrm6_gro_udp_encap_rcv;
+ else
+ new_gro_receive = xfrm4_gro_udp_encap_rcv;
+
+ if (udp_sk(sk)->gro_receive != new_gro_receive) {
+ /*
+ * With IPV6_ADDRFORM the gro callback could change
+ * after being set, unregister the old one, if valid.
+ */
+ if (udp_sk(sk)->gro_receive)
+ udp_tunnel_update_gro_rcv(sk, false);
+
+ WRITE_ONCE(udp_sk(sk)->gro_receive, new_gro_receive);
+ udp_tunnel_update_gro_rcv(sk, true);
+ }
}
#endif
}
@@ -2960,6 +2980,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
break;
case UDP_ENCAP:
+ sockopt_lock_sock(sk);
switch (val) {
case 0:
#ifdef CONFIG_XFRM
@@ -2983,6 +3004,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
err = -ENOPROTOOPT;
break;
}
+ sockopt_release_sock(sk);
break;
case UDP_NO_CHECK6_TX:
@@ -3000,13 +3022,14 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
break;
case UDP_GRO:
-
+ sockopt_lock_sock(sk);
/* when enabling GRO, accept the related GSO packet type */
if (valbool)
udp_tunnel_encap_enable(sk);
udp_assign_bit(GRO_ENABLED, sk, valbool);
udp_assign_bit(ACCEPT_L4, sk, valbool);
set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk);
+ sockopt_release_sock(sk);
break;
/*
@@ -3810,6 +3833,15 @@ fallback:
static int __net_init udp_pernet_init(struct net *net)
{
+#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
+ int i;
+
+ /* No tunnel is configured */
+ for (i = 0; i < ARRAY_SIZE(net->ipv4.udp_tunnel_gro); ++i) {
+ INIT_HLIST_HEAD(&net->ipv4.udp_tunnel_gro[i].list);
+ RCU_INIT_POINTER(net->ipv4.udp_tunnel_gro[i].sk, NULL);
+ }
+#endif
udp_sysctl_init(net);
udp_set_table(net);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 2c0725583be3..787b2947d3a0 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -12,6 +12,169 @@
#include <net/udp.h>
#include <net/protocol.h>
#include <net/inet_common.h>
+#include <net/udp_tunnel.h>
+
+#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
+
+/*
+ * Dummy GRO tunnel callback, exists mainly to avoid dangling/NULL
+ * values for the udp tunnel static call.
+ */
+static struct sk_buff *dummy_gro_rcv(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb)
+{
+ NAPI_GRO_CB(skb)->flush = 1;
+ return NULL;
+}
+
+typedef struct sk_buff *(*udp_tunnel_gro_rcv_t)(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb);
+
+struct udp_tunnel_type_entry {
+ udp_tunnel_gro_rcv_t gro_receive;
+ refcount_t count;
+};
+
+#define UDP_MAX_TUNNEL_TYPES (IS_ENABLED(CONFIG_GENEVE) + \
+ IS_ENABLED(CONFIG_VXLAN) * 2 + \
+ IS_ENABLED(CONFIG_NET_FOU) * 2 + \
+ IS_ENABLED(CONFIG_XFRM) * 2)
+
+DEFINE_STATIC_CALL(udp_tunnel_gro_rcv, dummy_gro_rcv);
+static DEFINE_STATIC_KEY_FALSE(udp_tunnel_static_call);
+static struct mutex udp_tunnel_gro_type_lock;
+static struct udp_tunnel_type_entry udp_tunnel_gro_types[UDP_MAX_TUNNEL_TYPES];
+static unsigned int udp_tunnel_gro_type_nr;
+static DEFINE_SPINLOCK(udp_tunnel_gro_lock);
+
+void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add)
+{
+ bool is_ipv6 = sk->sk_family == AF_INET6;
+ struct udp_sock *tup, *up = udp_sk(sk);
+ struct udp_tunnel_gro *udp_tunnel_gro;
+
+ spin_lock(&udp_tunnel_gro_lock);
+ udp_tunnel_gro = &net->ipv4.udp_tunnel_gro[is_ipv6];
+ if (add)
+ hlist_add_head(&up->tunnel_list, &udp_tunnel_gro->list);
+ else if (up->tunnel_list.pprev)
+ hlist_del_init(&up->tunnel_list);
+
+ if (udp_tunnel_gro->list.first &&
+ !udp_tunnel_gro->list.first->next) {
+ tup = hlist_entry(udp_tunnel_gro->list.first, struct udp_sock,
+ tunnel_list);
+
+ rcu_assign_pointer(udp_tunnel_gro->sk, (struct sock *)tup);
+ } else {
+ RCU_INIT_POINTER(udp_tunnel_gro->sk, NULL);
+ }
+
+ spin_unlock(&udp_tunnel_gro_lock);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_lookup);
+
+void udp_tunnel_update_gro_rcv(struct sock *sk, bool add)
+{
+ struct udp_tunnel_type_entry *cur = NULL;
+ struct udp_sock *up = udp_sk(sk);
+ int i, old_gro_type_nr;
+
+ if (!UDP_MAX_TUNNEL_TYPES || !up->gro_receive)
+ return;
+
+ mutex_lock(&udp_tunnel_gro_type_lock);
+
+ /* Check if the static call is permanently disabled. */
+ if (udp_tunnel_gro_type_nr > UDP_MAX_TUNNEL_TYPES)
+ goto out;
+
+ for (i = 0; i < udp_tunnel_gro_type_nr; i++)
+ if (udp_tunnel_gro_types[i].gro_receive == up->gro_receive)
+ cur = &udp_tunnel_gro_types[i];
+
+ old_gro_type_nr = udp_tunnel_gro_type_nr;
+ if (add) {
+ /*
+ * Update the matching entry, if found, or add a new one
+ * if needed
+ */
+ if (cur) {
+ refcount_inc(&cur->count);
+ goto out;
+ }
+
+ if (unlikely(udp_tunnel_gro_type_nr == UDP_MAX_TUNNEL_TYPES)) {
+ pr_err_once("Too many UDP tunnel types, please increase UDP_MAX_TUNNEL_TYPES\n");
+ /* Ensure static call will never be enabled */
+ udp_tunnel_gro_type_nr = UDP_MAX_TUNNEL_TYPES + 1;
+ } else {
+ cur = &udp_tunnel_gro_types[udp_tunnel_gro_type_nr++];
+ refcount_set(&cur->count, 1);
+ cur->gro_receive = up->gro_receive;
+ }
+ } else {
+ /*
+ * The stack cleanups only successfully added tunnel, the
+ * lookup on removal should never fail.
+ */
+ if (WARN_ON_ONCE(!cur))
+ goto out;
+
+ if (!refcount_dec_and_test(&cur->count))
+ goto out;
+
+ /* Avoid gaps, so that the enable tunnel has always id 0 */
+ *cur = udp_tunnel_gro_types[--udp_tunnel_gro_type_nr];
+ }
+
+ if (udp_tunnel_gro_type_nr == 1) {
+ static_call_update(udp_tunnel_gro_rcv,
+ udp_tunnel_gro_types[0].gro_receive);
+ static_branch_enable(&udp_tunnel_static_call);
+ } else if (old_gro_type_nr == 1) {
+ static_branch_disable(&udp_tunnel_static_call);
+ static_call_update(udp_tunnel_gro_rcv, dummy_gro_rcv);
+ }
+
+out:
+ mutex_unlock(&udp_tunnel_gro_type_lock);
+}
+EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_rcv);
+
+static void udp_tunnel_gro_init(void)
+{
+ mutex_init(&udp_tunnel_gro_type_lock);
+}
+
+static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb)
+{
+ if (static_branch_likely(&udp_tunnel_static_call)) {
+ if (unlikely(gro_recursion_inc_test(skb))) {
+ NAPI_GRO_CB(skb)->flush |= 1;
+ return NULL;
+ }
+ return static_call(udp_tunnel_gro_rcv)(sk, head, skb);
+ }
+ return call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
+}
+
+#else
+
+static void udp_tunnel_gro_init(void) {}
+
+static struct sk_buff *udp_tunnel_gro_rcv(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb)
+{
+ return call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
+}
+
+#endif
static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
@@ -622,7 +785,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
- pp = call_gro_receive_sk(udp_sk(sk)->gro_receive, sk, head, skb);
+ pp = udp_tunnel_gro_rcv(sk, head, skb);
out:
skb_gro_flush_final(skb, pp, flush);
@@ -635,8 +798,13 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
{
const struct iphdr *iph = skb_gro_network_header(skb);
struct net *net = dev_net_rcu(skb->dev);
+ struct sock *sk;
int iif, sdif;
+ sk = udp_tunnel_sk(net, false);
+ if (sk && dport == htons(sk->sk_num))
+ return sk;
+
inet_get_iif_sdif(skb, &iif, &sdif);
return __udp4_lib_lookup(net, iph->saddr, sport,
@@ -767,5 +935,7 @@ int __init udpv4_offload_init(void)
.gro_complete = udp4_gro_complete,
},
};
+
+ udp_tunnel_gro_init();
return inet_add_offload(&net_hotdata.udpv4_offload, IPPROTO_UDP);
}
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index 619a53eb672d..2326548997d3 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -58,6 +58,15 @@ error:
}
EXPORT_SYMBOL(udp_sock_create4);
+static bool sk_saddr_any(struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ return ipv6_addr_any(&sk->sk_v6_rcv_saddr);
+#else
+ return !sk->sk_rcv_saddr;
+#endif
+}
+
void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
struct udp_tunnel_sock_cfg *cfg)
{
@@ -80,6 +89,12 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
udp_sk(sk)->gro_complete = cfg->gro_complete;
udp_tunnel_encap_enable(sk);
+
+ udp_tunnel_update_gro_rcv(sk, true);
+
+ if (!sk->sk_dport && !sk->sk_bound_dev_if && sk_saddr_any(sk) &&
+ sk->sk_kern_sock)
+ udp_tunnel_update_gro_lookup(net, sk, true);
}
EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9ba83f0c9928..ec6c4ca6d6d9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5346,12 +5346,12 @@ static int inet6_valid_dump_ifaddr_req(const struct nlmsghdr *nlh,
struct ifaddrmsg *ifm;
int err, i;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ ifm = nlmsg_payload(nlh, sizeof(*ifm));
+ if (!ifm) {
NL_SET_ERR_MSG_MOD(extack, "Invalid header for address dump request");
return -EINVAL;
}
- ifm = nlmsg_data(nlh);
if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address dump request");
return -EINVAL;
@@ -5484,7 +5484,8 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
struct ifaddrmsg *ifm;
int i, err;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ ifm = nlmsg_payload(nlh, sizeof(*ifm));
+ if (!ifm) {
NL_SET_ERR_MSG_MOD(extack, "Invalid header for get address request");
return -EINVAL;
}
@@ -5493,7 +5494,6 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
ifa_ipv6_policy, extack);
- ifm = nlmsg_data(nlh);
if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request");
return -EINVAL;
@@ -6112,7 +6112,8 @@ static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh,
{
struct ifinfomsg *ifm;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ ifm = nlmsg_payload(nlh, sizeof(*ifm));
+ if (!ifm) {
NL_SET_ERR_MSG_MOD(extack, "Invalid header for link dump request");
return -EINVAL;
}
@@ -6122,7 +6123,6 @@ static int inet6_valid_dump_ifinfo(const struct nlmsghdr *nlh,
return -EINVAL;
}
- ifm = nlmsg_data(nlh);
if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags ||
ifm->ifi_change || ifm->ifi_index) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for dump request");
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index ab054f329e12..fb63ffbcfc64 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -473,12 +473,12 @@ static int ip6addrlbl_valid_dump_req(const struct nlmsghdr *nlh,
{
struct ifaddrlblmsg *ifal;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) {
+ ifal = nlmsg_payload(nlh, sizeof(*ifal));
+ if (!ifal) {
NL_SET_ERR_MSG_MOD(extack, "Invalid header for address label dump request");
return -EINVAL;
}
- ifal = nlmsg_data(nlh);
if (ifal->__ifal_reserved || ifal->ifal_prefixlen ||
ifal->ifal_flags || ifal->ifal_index || ifal->ifal_seq) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for address label dump request");
@@ -543,7 +543,8 @@ static int ip6addrlbl_valid_get_req(struct sk_buff *skb,
struct ifaddrlblmsg *ifal;
int i, err;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) {
+ ifal = nlmsg_payload(nlh, sizeof(*ifal));
+ if (!ifal) {
NL_SET_ERR_MSG_MOD(extack, "Invalid header for addrlabel get request");
return -EINVAL;
}
@@ -552,7 +553,6 @@ static int ip6addrlbl_valid_get_req(struct sk_buff *skb,
return nlmsg_parse_deprecated(nlh, sizeof(*ifal), tb,
IFAL_MAX, ifal_policy, extack);
- ifal = nlmsg_data(nlh);
if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for addrlabel get request");
return -EINVAL;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index f60ec8b0f8ea..acaff1296783 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -715,6 +715,7 @@ const struct proto_ops inet6_stream_ops = {
#endif
.set_rcvlowat = tcp_set_rcvlowat,
};
+EXPORT_SYMBOL_GPL(inet6_stream_ops);
const struct proto_ops inet6_dgram_ops = {
.family = PF_INET6,
@@ -881,7 +882,6 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
}
return false;
}
-EXPORT_SYMBOL_GPL(ipv6_opt_accepted);
static struct packet_type ipv6_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_IPV6),
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index dbcf556a35bb..8f500eaf33cf 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -54,7 +54,6 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
return dst;
}
-EXPORT_SYMBOL(inet6_csk_route_req);
static inline
struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
@@ -137,4 +136,3 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
dst = inet6_csk_route_socket(sk, &fl6);
return IS_ERR(dst) ? NULL : dst;
}
-EXPORT_SYMBOL_GPL(inet6_csk_update_pmtu);
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
index 09065187378e..40df8bdfaacd 100644
--- a/net/ipv6/ioam6_iptunnel.c
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -38,6 +38,7 @@ struct ioam6_lwt_freq {
};
struct ioam6_lwt {
+ struct dst_entry null_dst;
struct dst_cache cache;
struct ioam6_lwt_freq freq;
atomic_t pkt_cnt;
@@ -177,6 +178,14 @@ static int ioam6_build_state(struct net *net, struct nlattr *nla,
if (err)
goto free_lwt;
+ /* This "fake" dst_entry will be stored in a dst_cache, which will call
+ * dst_hold() and dst_release() on it. We must ensure that dst_destroy()
+ * will never be called. For that, its initial refcount is 1 and +1 when
+ * it is stored in the cache. Then, +1/-1 each time we read the cache
+ * and release it. Long story short, we're fine.
+ */
+ dst_init(&ilwt->null_dst, NULL, NULL, DST_OBSOLETE_NONE, DST_NOCOUNT);
+
atomic_set(&ilwt->pkt_cnt, 0);
ilwt->freq.k = freq_k;
ilwt->freq.n = freq_n;
@@ -336,7 +345,8 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb,
static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL;
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct dst_entry *dst = NULL;
struct ioam6_lwt *ilwt;
int err = -EINVAL;
u32 pkt_cnt;
@@ -344,7 +354,7 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
goto drop;
- ilwt = ioam6_lwt_state(dst->lwtstate);
+ ilwt = ioam6_lwt_state(orig_dst->lwtstate);
/* Check for insertion frequency (i.e., "k over n" insertions) */
pkt_cnt = atomic_fetch_inc(&ilwt->pkt_cnt);
@@ -352,9 +362,20 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
goto out;
local_bh_disable();
- cache_dst = dst_cache_get(&ilwt->cache);
+ dst = dst_cache_get(&ilwt->cache);
local_bh_enable();
+ /* This is how we notify that the destination does not change after
+ * transformation and that we need to use orig_dst instead of the cache
+ */
+ if (dst == &ilwt->null_dst) {
+ dst_release(dst);
+
+ dst = orig_dst;
+ /* keep refcount balance: dst_release() is called at the end */
+ dst_hold(dst);
+ }
+
switch (ilwt->mode) {
case IOAM6_IPTUNNEL_MODE_INLINE:
do_inline:
@@ -362,7 +383,7 @@ do_inline:
if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
goto out;
- err = ioam6_do_inline(net, skb, &ilwt->tuninfo, cache_dst);
+ err = ioam6_do_inline(net, skb, &ilwt->tuninfo, dst);
if (unlikely(err))
goto drop;
@@ -372,7 +393,7 @@ do_encap:
/* Encapsulation (ip6ip6) */
err = ioam6_do_encap(net, skb, &ilwt->tuninfo,
ilwt->has_tunsrc, &ilwt->tunsrc,
- &ilwt->tundst, cache_dst);
+ &ilwt->tundst, dst);
if (unlikely(err))
goto drop;
@@ -390,7 +411,7 @@ do_encap:
goto drop;
}
- if (unlikely(!cache_dst)) {
+ if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct flowi6 fl6;
@@ -401,20 +422,27 @@ do_encap:
fl6.flowi6_mark = skb->mark;
fl6.flowi6_proto = hdr->nexthdr;
- cache_dst = ip6_route_output(net, NULL, &fl6);
- if (cache_dst->error) {
- err = cache_dst->error;
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ err = dst->error;
goto drop;
}
- /* cache only if we don't create a dst reference loop */
- if (dst->lwtstate != cache_dst->lwtstate) {
- local_bh_disable();
- dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr);
- local_bh_enable();
- }
-
- err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev));
+ /* If the destination is the same after transformation (which is
+ * a valid use case for IOAM), then we don't want to add it to
+ * the cache in order to avoid a reference loop. Instead, we add
+ * our fake dst_entry to the cache as a way to detect this case.
+ * Otherwise, we add the resolved destination to the cache.
+ */
+ local_bh_disable();
+ if (orig_dst->lwtstate == dst->lwtstate)
+ dst_cache_set_ip6(&ilwt->cache,
+ &ilwt->null_dst, &fl6.saddr);
+ else
+ dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr);
+ local_bh_enable();
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
if (unlikely(err))
goto drop;
}
@@ -422,22 +450,26 @@ do_encap:
/* avoid lwtunnel_output() reentry loop when destination is the same
* after transformation (e.g., with the inline mode)
*/
- if (dst->lwtstate != cache_dst->lwtstate) {
+ if (orig_dst->lwtstate != dst->lwtstate) {
skb_dst_drop(skb);
- skb_dst_set(skb, cache_dst);
+ skb_dst_set(skb, dst);
return dst_output(net, sk, skb);
}
out:
- dst_release(cache_dst);
- return dst->lwtstate->orig_output(net, sk, skb);
+ dst_release(dst);
+ return orig_dst->lwtstate->orig_output(net, sk, skb);
drop:
- dst_release(cache_dst);
+ dst_release(dst);
kfree_skb(skb);
return err;
}
static void ioam6_destroy_state(struct lwtunnel_state *lwt)
{
+ /* Since the refcount of per-cpu dst_entry caches will never be 0 (see
+ * why above) when our "fake" dst_entry is used, it is not necessary to
+ * remove them before calling dst_cache_destroy()
+ */
dst_cache_destroy(&ioam6_lwt_state(lwt)->cache);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 957ca98fa70f..2dc9dcffe2ca 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1570,7 +1570,7 @@ static struct inet6_protocol ip6gre_protocol __read_mostly = {
.flags = INET6_PROTO_FINAL,
};
-static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
+static void __net_exit ip6gre_exit_rtnl_net(struct net *net, struct list_head *head)
{
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
struct net_device *dev, *aux;
@@ -1587,16 +1587,16 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
for (h = 0; h < IP6_GRE_HASH_SIZE; h++) {
struct ip6_tnl *t;
- t = rtnl_dereference(ign->tunnels[prio][h]);
+ t = rtnl_net_dereference(net, ign->tunnels[prio][h]);
while (t) {
/* If dev is in the same netns, it has already
* been added to the list by the previous loop.
*/
if (!net_eq(dev_net(t->dev), net))
- unregister_netdevice_queue(t->dev,
- head);
- t = rtnl_dereference(t->next);
+ unregister_netdevice_queue(t->dev, head);
+
+ t = rtnl_net_dereference(net, t->next);
}
}
}
@@ -1640,19 +1640,9 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6gre_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
-{
- struct net *net;
-
- ASSERT_RTNL();
- list_for_each_entry(net, net_list, exit_list)
- ip6gre_destroy_tunnels(net, dev_to_kill);
-}
-
static struct pernet_operations ip6gre_net_ops = {
.init = ip6gre_init_net,
- .exit_batch_rtnl = ip6gre_exit_batch_rtnl,
+ .exit_rtnl = ip6gre_exit_rtnl_net,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
};
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 581bc6289081..ef052ccd9380 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -259,7 +259,7 @@ bool ip6_autoflowlabel(struct net *net, const struct sock *sk)
}
/*
- * xmit an sk_buff (used by TCP, SCTP and DCCP)
+ * xmit an sk_buff (used by TCP and SCTP)
* Note : socket lock is not held for SYNACK packets, but might be modified
* by calls to skb_set_owner_w() and ipv6_local_error(),
* which are using proper atomic operations or spinlocks.
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a04dd1bb4b19..894d3158a6f0 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -2210,7 +2210,7 @@ static struct xfrm6_tunnel mplsip6_handler __read_mostly = {
.priority = 1,
};
-static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
+static void __net_exit ip6_tnl_exit_rtnl_net(struct net *net, struct list_head *list)
{
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct net_device *dev, *aux;
@@ -2222,25 +2222,27 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head
unregister_netdevice_queue(dev, list);
for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
- t = rtnl_dereference(ip6n->tnls_r_l[h]);
+ t = rtnl_net_dereference(net, ip6n->tnls_r_l[h]);
while (t) {
/* If dev is in the same netns, it has already
* been added to the list by the previous loop.
*/
if (!net_eq(dev_net(t->dev), net))
unregister_netdevice_queue(t->dev, list);
- t = rtnl_dereference(t->next);
+
+ t = rtnl_net_dereference(net, t->next);
}
}
- t = rtnl_dereference(ip6n->tnls_wc[0]);
+ t = rtnl_net_dereference(net, ip6n->tnls_wc[0]);
while (t) {
/* If dev is in the same netns, it has already
* been added to the list by the previous loop.
*/
if (!net_eq(dev_net(t->dev), net))
unregister_netdevice_queue(t->dev, list);
- t = rtnl_dereference(t->next);
+
+ t = rtnl_net_dereference(net, t->next);
}
}
@@ -2287,19 +2289,9 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6_tnl_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
-{
- struct net *net;
-
- ASSERT_RTNL();
- list_for_each_entry(net, net_list, exit_list)
- ip6_tnl_destroy_tunnels(net, dev_to_kill);
-}
-
static struct pernet_operations ip6_tnl_net_ops = {
.init = ip6_tnl_init_net,
- .exit_batch_rtnl = ip6_tnl_exit_batch_rtnl,
+ .exit_rtnl = ip6_tnl_exit_rtnl_net,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
};
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 09ec4b0ad7dc..40464a88bca6 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1112,21 +1112,21 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
.get_link_net = ip6_tnl_get_link_net,
};
-static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n,
- struct list_head *list)
+static void __net_exit vti6_exit_rtnl_net(struct net *net, struct list_head *list)
{
- int h;
+ struct vti6_net *ip6n = net_generic(net, vti6_net_id);
struct ip6_tnl *t;
+ int h;
for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
- t = rtnl_dereference(ip6n->tnls_r_l[h]);
+ t = rtnl_net_dereference(net, ip6n->tnls_r_l[h]);
while (t) {
unregister_netdevice_queue(t->dev, list);
- t = rtnl_dereference(t->next);
+ t = rtnl_net_dereference(net, t->next);
}
}
- t = rtnl_dereference(ip6n->tnls_wc[0]);
+ t = rtnl_net_dereference(net, ip6n->tnls_wc[0]);
if (t)
unregister_netdevice_queue(t->dev, list);
}
@@ -1170,22 +1170,9 @@ err_alloc_dev:
return err;
}
-static void __net_exit vti6_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
-{
- struct vti6_net *ip6n;
- struct net *net;
-
- ASSERT_RTNL();
- list_for_each_entry(net, net_list, exit_list) {
- ip6n = net_generic(net, vti6_net_id);
- vti6_destroy_tunnels(ip6n, dev_to_kill);
- }
-}
-
static struct pernet_operations vti6_net_ops = {
.init = vti6_init_net,
- .exit_batch_rtnl = vti6_exit_batch_rtnl,
+ .exit_rtnl = vti6_exit_rtnl_net,
.id = &vti6_net_id,
.size = sizeof(struct vti6_net),
};
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 96f1621e2381..945857a8bfe3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6030,7 +6030,8 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
struct rtmsg *rtm;
int i, err;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+ rtm = nlmsg_payload(nlh, sizeof(*rtm));
+ if (!rtm) {
NL_SET_ERR_MSG_MOD(extack,
"Invalid header for get route request");
return -EINVAL;
@@ -6040,7 +6041,6 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_ipv6_policy, extack);
- rtm = nlmsg_data(nlh);
if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
(rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 9a0f32acb750..a72dbca9e8fc 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1804,8 +1804,7 @@ static struct xfrm_tunnel mplsip_handler __read_mostly = {
};
#endif
-static void __net_exit sit_destroy_tunnels(struct net *net,
- struct list_head *head)
+static void __net_exit sit_exit_rtnl_net(struct net *net, struct list_head *head)
{
struct sit_net *sitn = net_generic(net, sit_net_id);
struct net_device *dev, *aux;
@@ -1820,15 +1819,15 @@ static void __net_exit sit_destroy_tunnels(struct net *net,
for (h = 0; h < (prio ? IP6_SIT_HASH_SIZE : 1); h++) {
struct ip_tunnel *t;
- t = rtnl_dereference(sitn->tunnels[prio][h]);
+ t = rtnl_net_dereference(net, sitn->tunnels[prio][h]);
while (t) {
/* If dev is in the same netns, it has already
* been added to the list by the previous loop.
*/
if (!net_eq(dev_net(t->dev), net))
- unregister_netdevice_queue(t->dev,
- head);
- t = rtnl_dereference(t->next);
+ unregister_netdevice_queue(t->dev, head);
+
+ t = rtnl_net_dereference(net, t->next);
}
}
}
@@ -1881,19 +1880,9 @@ err_alloc_dev:
return err;
}
-static void __net_exit sit_exit_batch_rtnl(struct list_head *net_list,
- struct list_head *dev_to_kill)
-{
- struct net *net;
-
- ASSERT_RTNL();
- list_for_each_entry(net, net_list, exit_list)
- sit_destroy_tunnels(net, dev_to_kill);
-}
-
static struct pernet_operations sit_net_ops = {
.init = sit_init_net,
- .exit_batch_rtnl = sit_exit_batch_rtnl,
+ .exit_rtnl = sit_exit_rtnl_net,
.id = &sit_net_id,
.size = sizeof(struct sit_net),
};
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b03c223eda4f..7dcb33f879ee 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1970,7 +1970,8 @@ do_time_wait:
goto csum_error;
}
- tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn);
+ tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn,
+ &drop_reason);
switch (tw_status) {
case TCP_TW_SYN:
{
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 024458ef163c..7317f8e053f1 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -46,6 +46,7 @@
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/ip6_tunnel.h>
+#include <net/udp_tunnel.h>
#include <net/xfrm.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
@@ -1825,6 +1826,7 @@ void udpv6_destroy_sock(struct sock *sk)
if (udp_test_bit(ENCAP_ENABLED, sk)) {
static_branch_dec(&udpv6_encap_needed_key);
udp_encap_disable();
+ udp_tunnel_cleanup_gro(sk);
}
}
}
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 404212dfc99a..d8445ac1b2e4 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -118,8 +118,13 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
{
const struct ipv6hdr *iph = skb_gro_network_header(skb);
struct net *net = dev_net_rcu(skb->dev);
+ struct sock *sk;
int iif, sdif;
+ sk = udp_tunnel_sk(net, true);
+ if (sk && dport == htons(sk->sk_num))
+ return sk;
+
inet6_get_iif_sdif(skb, &iif, &sdif);
return __udp6_lib_lookup(net, &iph->saddr, sport,
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 1f63b32d76d6..d536c97144e9 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -2095,12 +2095,12 @@ static int mpls_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh,
struct rtmsg *rtm;
int err, i;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+ rtm = nlmsg_payload(nlh, sizeof(*rtm));
+ if (!rtm) {
NL_SET_ERR_MSG_MOD(extack, "Invalid header for FIB dump request");
return -EINVAL;
}
- rtm = nlmsg_data(nlh);
if (rtm->rtm_dst_len || rtm->rtm_src_len || rtm->rtm_tos ||
rtm->rtm_table || rtm->rtm_scope || rtm->rtm_type ||
rtm->rtm_flags) {
@@ -2288,7 +2288,8 @@ static int mpls_valid_getroute_req(struct sk_buff *skb,
struct rtmsg *rtm;
int i, err;
- if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+ rtm = nlmsg_payload(nlh, sizeof(*rtm));
+ if (!rtm) {
NL_SET_ERR_MSG_MOD(extack,
"Invalid header for get route request");
return -EINVAL;
@@ -2298,7 +2299,6 @@ static int mpls_valid_getroute_req(struct sk_buff *skb,
return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
rtm_mpls_policy, extack);
- rtm = nlmsg_data(nlh);
if ((rtm->rtm_dst_len && rtm->rtm_dst_len != 20) ||
rtm->rtm_src_len || rtm->rtm_tos || rtm->rtm_table ||
rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type) {
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 19eb9292bd60..0c24545f0e8d 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -28,6 +28,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC),
SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX),
SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC),
+ SNMP_MIB_ITEM("MPJoinRejected", MPTCP_MIB_JOINREJECTED),
SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX),
SNMP_MIB_ITEM("MPJoinSynTxCreatSkErr", MPTCP_MIB_JOINSYNTXCREATSKERR),
SNMP_MIB_ITEM("MPJoinSynTxBindErr", MPTCP_MIB_JOINSYNTXBINDERR),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 128282982843..250c6b77977e 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -23,6 +23,7 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */
MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */
MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */
+ MPTCP_MIB_JOINREJECTED, /* The PM rejected the JOIN request */
MPTCP_MIB_JOINSYNTX, /* Sending a SYN + MP_JOIN */
MPTCP_MIB_JOINSYNTXCREATSKERR, /* Not able to create a socket when sending a SYN + MP_JOIN */
MPTCP_MIB_JOINSYNTXBINDERR, /* Not able to bind() the address when sending a SYN + MP_JOIN */
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 31747f974941..1306d4dc287b 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -151,10 +151,13 @@ bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr)
{
struct mptcp_pm_add_entry *entry;
+ bool ret;
entry = mptcp_pm_del_add_timer(msk, addr, false);
+ ret = entry;
kfree(entry);
- return entry;
+
+ return ret;
}
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 44f7ab463d75..26ffa06c21e8 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3527,8 +3527,10 @@ bool mptcp_finish_join(struct sock *ssk)
return true;
}
- if (!mptcp_pm_allow_new_subflow(msk))
+ if (!mptcp_pm_allow_new_subflow(msk)) {
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_JOINREJECTED);
goto err_prohibited;
+ }
/* If we can't acquire msk socket lock here, let the release callback
* handle it
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d409586b5977..7aa38d74fef6 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -744,6 +744,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
struct mptcp_sched_ops *mptcp_sched_find(const char *name);
+int mptcp_validate_scheduler(struct mptcp_sched_ops *sched);
int mptcp_register_scheduler(struct mptcp_sched_ops *sched);
void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched);
void mptcp_sched_init(void);
diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index c16c6fbd4ba2..1e59072d478c 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -16,8 +16,7 @@
static DEFINE_SPINLOCK(mptcp_sched_list_lock);
static LIST_HEAD(mptcp_sched_list);
-static int mptcp_sched_default_get_send(struct mptcp_sock *msk,
- struct mptcp_sched_data *data)
+static int mptcp_sched_default_get_send(struct mptcp_sock *msk)
{
struct sock *ssk;
@@ -29,8 +28,7 @@ static int mptcp_sched_default_get_send(struct mptcp_sock *msk,
return 0;
}
-static int mptcp_sched_default_get_retrans(struct mptcp_sock *msk,
- struct mptcp_sched_data *data)
+static int mptcp_sched_default_get_retrans(struct mptcp_sock *msk)
{
struct sock *ssk;
@@ -84,10 +82,23 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen)
rcu_read_unlock();
}
-int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
+int mptcp_validate_scheduler(struct mptcp_sched_ops *sched)
{
- if (!sched->get_send)
+ if (!sched->get_send) {
+ pr_err("%s does not implement required ops\n", sched->name);
return -EINVAL;
+ }
+
+ return 0;
+}
+
+int mptcp_register_scheduler(struct mptcp_sched_ops *sched)
+{
+ int ret;
+
+ ret = mptcp_validate_scheduler(sched);
+ if (ret)
+ return ret;
spin_lock(&mptcp_sched_list_lock);
if (mptcp_sched_find(sched->name)) {
@@ -157,7 +168,6 @@ void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
int mptcp_sched_get_send(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
- struct mptcp_sched_data *data = NULL;
msk_owned_by_me(msk);
@@ -178,14 +188,13 @@ int mptcp_sched_get_send(struct mptcp_sock *msk)
}
if (msk->sched == &mptcp_sched_default || !msk->sched)
- return mptcp_sched_default_get_send(msk, data);
- return msk->sched->get_send(msk, data);
+ return mptcp_sched_default_get_send(msk);
+ return msk->sched->get_send(msk);
}
int mptcp_sched_get_retrans(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
- struct mptcp_sched_data *data = NULL;
msk_owned_by_me(msk);
@@ -199,8 +208,8 @@ int mptcp_sched_get_retrans(struct mptcp_sock *msk)
}
if (msk->sched == &mptcp_sched_default || !msk->sched)
- return mptcp_sched_default_get_retrans(msk, data);
+ return mptcp_sched_default_get_retrans(msk);
if (msk->sched->get_retrans)
- return msk->sched->get_retrans(msk, data);
- return msk->sched->get_send(msk, data);
+ return msk->sched->get_retrans(msk);
+ return msk->sched->get_send(msk);
}
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 24c2de1891bd..15613d691bfe 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -247,6 +247,7 @@ again:
if (unlikely(req->syncookie)) {
if (!mptcp_can_accept_new_subflow(subflow_req->msk)) {
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINREJECTED);
subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
return -EPERM;
}
@@ -745,15 +746,11 @@ struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *op
EXPORT_SYMBOL(mptcp_subflow_reqsk_alloc);
/* validate hmac received in third ACK */
-static bool subflow_hmac_valid(const struct request_sock *req,
+static bool subflow_hmac_valid(const struct mptcp_subflow_request_sock *subflow_req,
const struct mptcp_options_received *mp_opt)
{
- const struct mptcp_subflow_request_sock *subflow_req;
+ struct mptcp_sock *msk = subflow_req->msk;
u8 hmac[SHA256_DIGEST_SIZE];
- struct mptcp_sock *msk;
-
- subflow_req = mptcp_subflow_rsk(req);
- msk = subflow_req->msk;
subflow_generate_hmac(READ_ONCE(msk->remote_key),
READ_ONCE(msk->local_key),
@@ -899,13 +896,14 @@ create_child:
goto dispose_child;
}
- if (!subflow_hmac_valid(req, &mp_opt)) {
+ if (!subflow_hmac_valid(subflow_req, &mp_opt)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
goto dispose_child;
}
if (!mptcp_can_accept_new_subflow(owner)) {
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINREJECTED);
subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);
goto dispose_child;
}
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 4e0842df5234..2c260f33b55c 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -143,16 +143,15 @@ struct ncsi_channel_vlan_filter {
};
struct ncsi_channel_stats {
- u32 hnc_cnt_hi; /* Counter cleared */
- u32 hnc_cnt_lo; /* Counter cleared */
- u32 hnc_rx_bytes; /* Rx bytes */
- u32 hnc_tx_bytes; /* Tx bytes */
- u32 hnc_rx_uc_pkts; /* Rx UC packets */
- u32 hnc_rx_mc_pkts; /* Rx MC packets */
- u32 hnc_rx_bc_pkts; /* Rx BC packets */
- u32 hnc_tx_uc_pkts; /* Tx UC packets */
- u32 hnc_tx_mc_pkts; /* Tx MC packets */
- u32 hnc_tx_bc_pkts; /* Tx BC packets */
+ u64 hnc_cnt; /* Counter cleared */
+ u64 hnc_rx_bytes; /* Rx bytes */
+ u64 hnc_tx_bytes; /* Tx bytes */
+ u64 hnc_rx_uc_pkts; /* Rx UC packets */
+ u64 hnc_rx_mc_pkts; /* Rx MC packets */
+ u64 hnc_rx_bc_pkts; /* Rx BC packets */
+ u64 hnc_tx_uc_pkts; /* Tx UC packets */
+ u64 hnc_tx_mc_pkts; /* Tx MC packets */
+ u64 hnc_tx_bc_pkts; /* Tx BC packets */
u32 hnc_fcs_err; /* FCS errors */
u32 hnc_align_err; /* Alignment errors */
u32 hnc_false_carrier; /* False carrier detection */
@@ -181,7 +180,7 @@ struct ncsi_channel_stats {
u32 hnc_tx_1023_frames; /* Tx 512-1023 bytes frames */
u32 hnc_tx_1522_frames; /* Tx 1024-1522 bytes frames */
u32 hnc_tx_9022_frames; /* Tx 1523-9022 bytes frames */
- u32 hnc_rx_valid_bytes; /* Rx valid bytes */
+ u64 hnc_rx_valid_bytes; /* Rx valid bytes */
u32 hnc_rx_runt_pkts; /* Rx error runt packets */
u32 hnc_rx_jabber_pkts; /* Rx error jabber packets */
u32 ncsi_rx_cmds; /* Rx NCSI commands */
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index f2f3b5c1b941..24edb2737972 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -252,16 +252,15 @@ struct ncsi_rsp_gp_pkt {
/* Get Controller Packet Statistics */
struct ncsi_rsp_gcps_pkt {
struct ncsi_rsp_pkt_hdr rsp; /* Response header */
- __be32 cnt_hi; /* Counter cleared */
- __be32 cnt_lo; /* Counter cleared */
- __be32 rx_bytes; /* Rx bytes */
- __be32 tx_bytes; /* Tx bytes */
- __be32 rx_uc_pkts; /* Rx UC packets */
- __be32 rx_mc_pkts; /* Rx MC packets */
- __be32 rx_bc_pkts; /* Rx BC packets */
- __be32 tx_uc_pkts; /* Tx UC packets */
- __be32 tx_mc_pkts; /* Tx MC packets */
- __be32 tx_bc_pkts; /* Tx BC packets */
+ __be64 cnt; /* Counter cleared */
+ __be64 rx_bytes; /* Rx bytes */
+ __be64 tx_bytes; /* Tx bytes */
+ __be64 rx_uc_pkts; /* Rx UC packets */
+ __be64 rx_mc_pkts; /* Rx MC packets */
+ __be64 rx_bc_pkts; /* Rx BC packets */
+ __be64 tx_uc_pkts; /* Tx UC packets */
+ __be64 tx_mc_pkts; /* Tx MC packets */
+ __be64 tx_bc_pkts; /* Tx BC packets */
__be32 fcs_err; /* FCS errors */
__be32 align_err; /* Alignment errors */
__be32 false_carrier; /* False carrier detection */
@@ -290,11 +289,11 @@ struct ncsi_rsp_gcps_pkt {
__be32 tx_1023_frames; /* Tx 512-1023 bytes frames */
__be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */
__be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */
- __be32 rx_valid_bytes; /* Rx valid bytes */
+ __be64 rx_valid_bytes; /* Rx valid bytes */
__be32 rx_runt_pkts; /* Rx error runt packets */
__be32 rx_jabber_pkts; /* Rx error jabber packets */
__be32 checksum; /* Checksum */
-};
+} __packed __aligned(4);
/* Get NCSI Statistics */
struct ncsi_rsp_gns_pkt {
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 4a8ce2949fae..8668888c5a2f 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -926,16 +926,15 @@ static int ncsi_rsp_handler_gcps(struct ncsi_request *nr)
/* Update HNC's statistics */
ncs = &nc->stats;
- ncs->hnc_cnt_hi = ntohl(rsp->cnt_hi);
- ncs->hnc_cnt_lo = ntohl(rsp->cnt_lo);
- ncs->hnc_rx_bytes = ntohl(rsp->rx_bytes);
- ncs->hnc_tx_bytes = ntohl(rsp->tx_bytes);
- ncs->hnc_rx_uc_pkts = ntohl(rsp->rx_uc_pkts);
- ncs->hnc_rx_mc_pkts = ntohl(rsp->rx_mc_pkts);
- ncs->hnc_rx_bc_pkts = ntohl(rsp->rx_bc_pkts);
- ncs->hnc_tx_uc_pkts = ntohl(rsp->tx_uc_pkts);
- ncs->hnc_tx_mc_pkts = ntohl(rsp->tx_mc_pkts);
- ncs->hnc_tx_bc_pkts = ntohl(rsp->tx_bc_pkts);
+ ncs->hnc_cnt = be64_to_cpu(rsp->cnt);
+ ncs->hnc_rx_bytes = be64_to_cpu(rsp->rx_bytes);
+ ncs->hnc_tx_bytes = be64_to_cpu(rsp->tx_bytes);
+ ncs->hnc_rx_uc_pkts = be64_to_cpu(rsp->rx_uc_pkts);
+ ncs->hnc_rx_mc_pkts = be64_to_cpu(rsp->rx_mc_pkts);
+ ncs->hnc_rx_bc_pkts = be64_to_cpu(rsp->rx_bc_pkts);
+ ncs->hnc_tx_uc_pkts = be64_to_cpu(rsp->tx_uc_pkts);
+ ncs->hnc_tx_mc_pkts = be64_to_cpu(rsp->tx_mc_pkts);
+ ncs->hnc_tx_bc_pkts = be64_to_cpu(rsp->tx_bc_pkts);
ncs->hnc_fcs_err = ntohl(rsp->fcs_err);
ncs->hnc_align_err = ntohl(rsp->align_err);
ncs->hnc_false_carrier = ntohl(rsp->false_carrier);
@@ -964,7 +963,7 @@ static int ncsi_rsp_handler_gcps(struct ncsi_request *nr)
ncs->hnc_tx_1023_frames = ntohl(rsp->tx_1023_frames);
ncs->hnc_tx_1522_frames = ntohl(rsp->tx_1522_frames);
ncs->hnc_tx_9022_frames = ntohl(rsp->tx_9022_frames);
- ncs->hnc_rx_valid_bytes = ntohl(rsp->rx_valid_bytes);
+ ncs->hnc_rx_valid_bytes = be64_to_cpu(rsp->rx_valid_bytes);
ncs->hnc_rx_runt_pkts = ntohl(rsp->rx_runt_pkts);
ncs->hnc_rx_jabber_pkts = ntohl(rsp->rx_jabber_pkts);
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index a20986806fea..f60b81c66078 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -67,6 +67,29 @@ config RXKAD
See Documentation/networking/rxrpc.rst.
+config RXGK
+ bool "RxRPC GSSAPI security"
+ select CRYPTO_KRB5
+ select CRYPTO_MANAGER
+ select CRYPTO_KRB5ENC
+ select CRYPTO_AUTHENC
+ select CRYPTO_SKCIPHER
+ select CRYPTO_HASH_INFO
+ select CRYPTO_HMAC
+ select CRYPTO_CMAC
+ select CRYPTO_SHA1
+ select CRYPTO_SHA256
+ select CRYPTO_SHA512
+ select CRYPTO_CBC
+ select CRYPTO_CTS
+ select CRYPTO_AES
+ select CRYPTO_CAMELLIA
+ help
+ Provide the GSSAPI-based RxGK security class for AFS. Keys are added
+ with add_key().
+
+ See Documentation/networking/rxrpc.rst.
+
config RXPERF
tristate "RxRPC test service"
help
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 210b75e3179e..c0542bae719e 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -24,6 +24,7 @@ rxrpc-y := \
local_object.o \
misc.o \
net_ns.o \
+ oob.o \
output.o \
peer_event.o \
peer_object.o \
@@ -39,6 +40,9 @@ rxrpc-y := \
rxrpc-$(CONFIG_PROC_FS) += proc.o
rxrpc-$(CONFIG_RXKAD) += rxkad.o
rxrpc-$(CONFIG_SYSCTL) += sysctl.o
-
+rxrpc-$(CONFIG_RXGK) += \
+ rxgk.o \
+ rxgk_app.o \
+ rxgk_kdf.o
obj-$(CONFIG_RXPERF) += rxperf.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 86873399f7d5..3a558c1a541e 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -265,7 +265,10 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* @gfp: Allocation flags
*
* Lookup or create a remote transport endpoint record for the specified
- * address and return it with a ref held.
+ * address.
+ *
+ * Return: The peer record found with a reference, %NULL if no record is found
+ * or a negative error code if the address is invalid or unsupported.
*/
struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock,
struct sockaddr_rxrpc *srx, gfp_t gfp)
@@ -283,9 +286,11 @@ EXPORT_SYMBOL(rxrpc_kernel_lookup_peer);
/**
* rxrpc_kernel_get_peer - Get a reference on a peer
- * @peer: The peer to get a reference on.
+ * @peer: The peer to get a reference on (may be NULL).
+ *
+ * Get a reference for a remote peer record (if not NULL).
*
- * Get a record for the remote peer in a call.
+ * Return: The @peer argument.
*/
struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer)
{
@@ -296,6 +301,8 @@ EXPORT_SYMBOL(rxrpc_kernel_get_peer);
/**
* rxrpc_kernel_put_peer - Allow a kernel app to drop a peer reference
* @peer: The peer to drop a ref on
+ *
+ * Drop a reference on a peer record.
*/
void rxrpc_kernel_put_peer(struct rxrpc_peer *peer)
{
@@ -320,10 +327,12 @@ EXPORT_SYMBOL(rxrpc_kernel_put_peer);
*
* Allow a kernel service to begin a call on the nominated socket. This just
* sets up all the internal tracking structures and allocates connection and
- * call IDs as appropriate. The call to be used is returned.
+ * call IDs as appropriate.
*
* The default socket destination address and security may be overridden by
* supplying @srx and @key.
+ *
+ * Return: The new call or an error code.
*/
struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
struct rxrpc_peer *peer,
@@ -437,6 +446,8 @@ EXPORT_SYMBOL(rxrpc_kernel_put_call);
*
* Allow a kernel service to find out whether a call is still alive - whether
* it has completed successfully and all received data has been consumed.
+ *
+ * Return: %true if the call is still ongoing and %false if it has completed.
*/
bool rxrpc_kernel_check_life(const struct socket *sock,
const struct rxrpc_call *call)
@@ -456,6 +467,8 @@ EXPORT_SYMBOL(rxrpc_kernel_check_life);
*
* Allow a kernel service to retrieve the epoch value from a service call to
* see if the client at the other end rebooted.
+ *
+ * Return: The epoch of the call's connection.
*/
u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call)
{
@@ -464,24 +477,20 @@ u32 rxrpc_kernel_get_epoch(struct socket *sock, struct rxrpc_call *call)
EXPORT_SYMBOL(rxrpc_kernel_get_epoch);
/**
- * rxrpc_kernel_new_call_notification - Get notifications of new calls
- * @sock: The socket to intercept received messages on
- * @notify_new_call: Function to be called when new calls appear
- * @discard_new_call: Function to discard preallocated calls
+ * rxrpc_kernel_set_notifications - Set table of callback operations
+ * @sock: The socket to install table upon
+ * @app_ops: Callback operation table to set
*
- * Allow a kernel service to be given notifications about new calls.
+ * Allow a kernel service to set a table of event notifications on a socket.
*/
-void rxrpc_kernel_new_call_notification(
- struct socket *sock,
- rxrpc_notify_new_call_t notify_new_call,
- rxrpc_discard_new_call_t discard_new_call)
+void rxrpc_kernel_set_notifications(struct socket *sock,
+ const struct rxrpc_kernel_ops *app_ops)
{
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
- rx->notify_new_call = notify_new_call;
- rx->discard_new_call = discard_new_call;
+ rx->app_ops = app_ops;
}
-EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
+EXPORT_SYMBOL(rxrpc_kernel_set_notifications);
/**
* rxrpc_kernel_set_max_life - Set maximum lifespan on a call
@@ -624,7 +633,10 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
fallthrough;
case RXRPC_SERVER_BOUND:
case RXRPC_SERVER_LISTENING:
- ret = rxrpc_do_sendmsg(rx, m, len);
+ if (m->msg_flags & MSG_OOB)
+ ret = rxrpc_sendmsg_oob(rx, m, len);
+ else
+ ret = rxrpc_do_sendmsg(rx, m, len);
/* The socket has been unlocked */
goto out;
default:
@@ -659,7 +671,7 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
- unsigned int min_sec_level;
+ unsigned int min_sec_level, val;
u16 service_upgrade[2];
int ret;
@@ -740,6 +752,26 @@ static int rxrpc_setsockopt(struct socket *sock, int level, int optname,
rx->service_upgrade.to = service_upgrade[1];
goto success;
+ case RXRPC_MANAGE_RESPONSE:
+ ret = -EINVAL;
+ if (optlen != sizeof(unsigned int))
+ goto error;
+ ret = -EISCONN;
+ if (rx->sk.sk_state != RXRPC_UNBOUND)
+ goto error;
+ ret = copy_safe_from_sockptr(&val, sizeof(val),
+ optval, optlen);
+ if (ret)
+ goto error;
+ ret = -EINVAL;
+ if (val > 1)
+ goto error;
+ if (val)
+ set_bit(RXRPC_SOCK_MANAGE_RESPONSE, &rx->flags);
+ else
+ clear_bit(RXRPC_SOCK_MANAGE_RESPONSE, &rx->flags);
+ goto success;
+
default:
break;
}
@@ -846,6 +878,8 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
rx->calls = RB_ROOT;
spin_lock_init(&rx->incoming_lock);
+ skb_queue_head_init(&rx->recvmsg_oobq);
+ rx->pending_oobq = RB_ROOT;
INIT_LIST_HEAD(&rx->sock_calls);
INIT_LIST_HEAD(&rx->to_be_accepted);
INIT_LIST_HEAD(&rx->recvmsg_q);
@@ -879,8 +913,10 @@ static int rxrpc_shutdown(struct socket *sock, int flags)
lock_sock(sk);
if (sk->sk_state < RXRPC_CLOSE) {
+ spin_lock_irq(&rx->recvmsg_lock);
sk->sk_state = RXRPC_CLOSE;
sk->sk_shutdown = SHUTDOWN_MASK;
+ spin_unlock_irq(&rx->recvmsg_lock);
} else {
ret = -ESHUTDOWN;
}
@@ -892,12 +928,30 @@ static int rxrpc_shutdown(struct socket *sock, int flags)
}
/*
+ * Purge the out-of-band queue.
+ */
+static void rxrpc_purge_oob_queue(struct sock *sk)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&rx->recvmsg_oobq)))
+ rxrpc_kernel_free_oob(skb);
+ while (!RB_EMPTY_ROOT(&rx->pending_oobq)) {
+ skb = rb_entry(rx->pending_oobq.rb_node, struct sk_buff, rbnode);
+ rb_erase(&skb->rbnode, &rx->pending_oobq);
+ rxrpc_kernel_free_oob(skb);
+ }
+}
+
+/*
* RxRPC socket destructor
*/
static void rxrpc_sock_destructor(struct sock *sk)
{
_enter("%p", sk);
+ rxrpc_purge_oob_queue(sk);
rxrpc_purge_queue(&sk->sk_receive_queue);
WARN_ON(refcount_read(&sk->sk_wmem_alloc));
@@ -936,7 +990,9 @@ static int rxrpc_release_sock(struct sock *sk)
break;
}
+ spin_lock_irq(&rx->recvmsg_lock);
sk->sk_state = RXRPC_CLOSE;
+ spin_unlock_irq(&rx->recvmsg_lock);
if (rx->local && rx->local->service == rx) {
write_lock(&rx->local->services_lock);
@@ -948,6 +1004,7 @@ static int rxrpc_release_sock(struct sock *sk)
rxrpc_discard_prealloc(rx);
rxrpc_release_calls_on_socket(rx);
flush_workqueue(rxrpc_workqueue);
+ rxrpc_purge_oob_queue(sk);
rxrpc_purge_queue(&sk->sk_receive_queue);
rxrpc_unuse_local(rx->local, rxrpc_local_unuse_release_sock);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 3cc3af15086f..ca62a1db3286 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -31,6 +31,7 @@ struct key_preparsed_payload;
struct rxrpc_connection;
struct rxrpc_txbuf;
struct rxrpc_txqueue;
+struct rxgk_context;
/*
* Mark applied to socket buffers in skb->mark. skb->priority is used
@@ -39,6 +40,7 @@ struct rxrpc_txqueue;
enum rxrpc_skb_mark {
RXRPC_SKB_MARK_PACKET, /* Received packet */
RXRPC_SKB_MARK_ERROR, /* Error notification */
+ RXRPC_SKB_MARK_CHALLENGE, /* Challenge notification */
RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */
RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */
RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */
@@ -146,10 +148,12 @@ struct rxrpc_backlog {
struct rxrpc_sock {
/* WARNING: sk has to be the first member */
struct sock sk;
- rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */
- rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */
+ const struct rxrpc_kernel_ops *app_ops; /* Table of kernel app notification funcs */
struct rxrpc_local *local; /* local endpoint */
struct rxrpc_backlog *backlog; /* Preallocation for services */
+ struct sk_buff_head recvmsg_oobq; /* OOB messages for recvmsg to pick up */
+ struct rb_root pending_oobq; /* OOB messages awaiting userspace to respond to */
+ u64 oob_id_counter; /* OOB message ID counter */
spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */
struct list_head sock_calls; /* List of calls owned by this socket */
struct list_head to_be_accepted; /* calls awaiting acceptance */
@@ -160,6 +164,7 @@ struct rxrpc_sock {
struct rb_root calls; /* User ID -> call mapping */
unsigned long flags;
#define RXRPC_SOCK_CONNECTED 0 /* connect_srx is set */
+#define RXRPC_SOCK_MANAGE_RESPONSE 1 /* User wants to manage RESPONSE packets */
rwlock_t call_lock; /* lock for calls */
u32 min_sec_level; /* minimum security level */
#define RXRPC_SECURITY_MAX RXRPC_SECURITY_ENCRYPT
@@ -203,7 +208,7 @@ struct rxrpc_host_header {
*/
struct rxrpc_skb_priv {
union {
- struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
+ struct rxrpc_connection *poke_conn; /* Conn referred to (poke packet) */
struct {
u16 offset; /* Offset of data */
u16 len; /* Length of data */
@@ -217,6 +222,19 @@ struct rxrpc_skb_priv {
u16 nr_acks; /* Number of acks+nacks */
u8 reason; /* Reason for ack */
} ack;
+ struct {
+ struct rxrpc_connection *conn; /* Connection referred to */
+ union {
+ u32 rxkad_nonce;
+ };
+ } chall;
+ struct {
+ rxrpc_serial_t challenge_serial;
+ u32 kvno;
+ u32 version;
+ u16 len;
+ u16 ticket_len;
+ } resp;
};
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
};
@@ -270,9 +288,24 @@ struct rxrpc_security {
/* issue a challenge */
int (*issue_challenge)(struct rxrpc_connection *);
+ /* Validate a challenge packet */
+ bool (*validate_challenge)(struct rxrpc_connection *conn,
+ struct sk_buff *skb);
+
+ /* Fill out the cmsg for recvmsg() to pass on a challenge to userspace.
+ * The security class gets to add additional information.
+ */
+ int (*challenge_to_recvmsg)(struct rxrpc_connection *conn,
+ struct sk_buff *challenge,
+ struct msghdr *msg);
+
+ /* Parse sendmsg() control message and respond to challenge. */
+ int (*sendmsg_respond_to_challenge)(struct sk_buff *challenge,
+ struct msghdr *msg);
+
/* respond to a challenge */
- int (*respond_to_challenge)(struct rxrpc_connection *,
- struct sk_buff *);
+ int (*respond_to_challenge)(struct rxrpc_connection *conn,
+ struct sk_buff *challenge);
/* verify a response */
int (*verify_response)(struct rxrpc_connection *,
@@ -280,6 +313,11 @@ struct rxrpc_security {
/* clear connection security */
void (*clear)(struct rxrpc_connection *);
+
+ /* Default ticket -> key decoder */
+ int (*default_decode_ticket)(struct rxrpc_connection *conn, struct sk_buff *skb,
+ unsigned int ticket_offset, unsigned int ticket_len,
+ struct key **_key);
};
/*
@@ -526,7 +564,17 @@ struct rxrpc_connection {
struct rxrpc_crypt csum_iv; /* packet checksum base */
u32 nonce; /* response re-use preventer */
} rxkad;
+ struct {
+ struct rxgk_context *keys[4]; /* (Re-)keying buffer */
+ u64 start_time; /* The start time for TK derivation */
+ u8 nonce[20]; /* Response re-use preventer */
+ u32 enctype; /* Kerberos 5 encoding type */
+ u32 key_number; /* Current key number */
+ } rxgk;
};
+ rwlock_t security_use_lock; /* Security use/modification lock */
+ struct sk_buff *tx_response; /* Response packet to be transmitted */
+
unsigned long flags;
unsigned long events;
unsigned long idle_timestamp; /* Time at which last became idle */
@@ -692,6 +740,7 @@ struct rxrpc_call {
u32 call_id; /* call ID on connection */
u32 cid; /* connection ID plus channel index */
u32 security_level; /* Security level selected */
+ u32 security_enctype; /* Security-specific encoding type (or 0) */
int debug_id; /* debug ID for printks */
unsigned short rx_pkt_offset; /* Current recvmsg packet offset */
unsigned short rx_pkt_len; /* Current recvmsg packet len */
@@ -867,6 +916,8 @@ struct rxrpc_txbuf {
unsigned short len; /* Amount of data in buffer */
unsigned short space; /* Remaining data space */
unsigned short offset; /* Offset of fill point */
+ unsigned short crypto_header; /* Size of crypto header */
+ unsigned short sec_header; /* Size of security header */
unsigned short pkt_len; /* Size of packet content */
unsigned short alloc_size; /* Amount of bufferage allocated */
unsigned int flags;
@@ -1001,7 +1052,9 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_conn_parameters *,
struct rxrpc_call_params *, gfp_t,
- unsigned int);
+ unsigned int)
+ __releases(&rx->sk.sk_lock)
+ __acquires(&call->user_mutex);
void rxrpc_start_call_timer(struct rxrpc_call *call);
void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
struct sk_buff *);
@@ -1198,8 +1251,11 @@ void rxrpc_error_report(struct sock *);
bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
s32 abort_code, int err);
int rxrpc_io_thread(void *data);
+void rxrpc_post_response(struct rxrpc_connection *conn, struct sk_buff *skb);
static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local)
{
+ if (!local->io_thread)
+ return;
wake_up_process(READ_ONCE(local->io_thread));
}
@@ -1289,8 +1345,16 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
}
/*
+ * out_of_band.c
+ */
+void rxrpc_notify_socket_oob(struct rxrpc_call *call, struct sk_buff *skb);
+void rxrpc_add_pending_oob(struct rxrpc_sock *rx, struct sk_buff *skb);
+int rxrpc_sendmsg_oob(struct rxrpc_sock *rx, struct msghdr *msg, size_t len);
+
+/*
* output.c
*/
+ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len);
void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason,
rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why);
void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call);
@@ -1299,6 +1363,7 @@ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req
void rxrpc_send_conn_abort(struct rxrpc_connection *conn);
void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb);
void rxrpc_send_keepalive(struct rxrpc_peer *);
+void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *skb);
/*
* peer_event.c
@@ -1363,6 +1428,11 @@ ktime_t rxrpc_get_rto_backoff(struct rxrpc_call *call, bool retrans);
void rxrpc_call_init_rtt(struct rxrpc_call *call);
/*
+ * rxgk.c
+ */
+extern const struct rxrpc_security rxgk_yfs;
+
+/*
* rxkad.c
*/
#ifdef CONFIG_RXKAD
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index e685034ce4f7..a4b363b47cca 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -34,7 +34,6 @@ static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call,
static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
struct rxrpc_backlog *b,
rxrpc_notify_rx_t notify_rx,
- rxrpc_user_attach_call_t user_attach_call,
unsigned long user_call_ID, gfp_t gfp,
unsigned int debug_id)
{
@@ -123,9 +122,10 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
call->user_call_ID = user_call_ID;
call->notify_rx = notify_rx;
- if (user_attach_call) {
+ if (rx->app_ops &&
+ rx->app_ops->user_attach_call) {
rxrpc_get_call(call, rxrpc_call_get_kernel_service);
- user_attach_call(call, user_call_ID);
+ rx->app_ops->user_attach_call(call, user_call_ID);
}
rxrpc_get_call(call, rxrpc_call_get_userid);
@@ -219,9 +219,10 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_call *call = b->call_backlog[tail];
rcu_assign_pointer(call->socket, rx);
- if (rx->discard_new_call) {
+ if (rx->app_ops &&
+ rx->app_ops->discard_new_call) {
_debug("discard %lx", call->user_call_ID);
- rx->discard_new_call(call, call->user_call_ID);
+ rx->app_ops->discard_new_call(call, call->user_call_ID);
if (call->notify_rx)
call->notify_rx = rxrpc_dummy_notify;
rxrpc_put_call(call, rxrpc_call_put_kernel);
@@ -387,8 +388,9 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
rxrpc_incoming_call(rx, call, skb);
conn = call->conn;
- if (rx->notify_new_call)
- rx->notify_new_call(&rx->sk, call, call->user_call_ID);
+ if (rx->app_ops &&
+ rx->app_ops->notify_new_call)
+ rx->app_ops->notify_new_call(&rx->sk, call, call->user_call_ID);
spin_lock(&conn->state_lock);
if (conn->state == RXRPC_CONN_SERVICE_UNSECURED) {
@@ -440,8 +442,7 @@ int rxrpc_user_charge_accept(struct rxrpc_sock *rx, unsigned long user_call_ID)
if (rx->sk.sk_state == RXRPC_CLOSE)
return -ESHUTDOWN;
- return rxrpc_service_prealloc_one(rx, b, NULL, NULL, user_call_ID,
- GFP_KERNEL,
+ return rxrpc_service_prealloc_one(rx, b, NULL, user_call_ID, GFP_KERNEL,
atomic_inc_return(&rxrpc_debug_id));
}
@@ -449,20 +450,18 @@ int rxrpc_user_charge_accept(struct rxrpc_sock *rx, unsigned long user_call_ID)
* rxrpc_kernel_charge_accept - Charge up socket with preallocated calls
* @sock: The socket on which to preallocate
* @notify_rx: Event notification function for the call
- * @user_attach_call: Func to attach call to user_call_ID
* @user_call_ID: The tag to attach to the preallocated call
* @gfp: The allocation conditions.
* @debug_id: The tracing debug ID.
*
- * Charge up the socket with preallocated calls, each with a user ID. A
- * function should be provided to effect the attachment from the user's side.
- * The user is given a ref to hold on the call.
+ * Charge up the socket with preallocated calls, each with a user ID. The
+ * ->user_attach_call() callback function should be provided to effect the
+ * attachment from the user's side. The user is given a ref to hold on the
+ * call.
*
* Note that the call may be come connected before this function returns.
*/
-int rxrpc_kernel_charge_accept(struct socket *sock,
- rxrpc_notify_rx_t notify_rx,
- rxrpc_user_attach_call_t user_attach_call,
+int rxrpc_kernel_charge_accept(struct socket *sock, rxrpc_notify_rx_t notify_rx,
unsigned long user_call_ID, gfp_t gfp,
unsigned int debug_id)
{
@@ -472,8 +471,7 @@ int rxrpc_kernel_charge_accept(struct socket *sock,
if (sock->sk->sk_state == RXRPC_CLOSE)
return -ESHUTDOWN;
- return rxrpc_service_prealloc_one(rx, b, notify_rx,
- user_attach_call, user_call_ID,
+ return rxrpc_service_prealloc_one(rx, b, notify_rx, user_call_ID,
gfp, debug_id);
}
EXPORT_SYMBOL(rxrpc_kernel_charge_accept);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index fce58be65e7c..e9e8f0ef3fd5 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -145,8 +145,8 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
INIT_LIST_HEAD(&call->recvmsg_link);
INIT_LIST_HEAD(&call->sock_link);
INIT_LIST_HEAD(&call->attend_link);
- skb_queue_head_init(&call->rx_queue);
skb_queue_head_init(&call->recvmsg_queue);
+ skb_queue_head_init(&call->rx_queue);
skb_queue_head_init(&call->rx_oos_queue);
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->notify_lock);
@@ -322,7 +322,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
struct rxrpc_call_params *p,
gfp_t gfp,
unsigned int debug_id)
- __releases(&rx->sk.sk_lock.slock)
+ __releases(&rx->sk.sk_lock)
__acquires(&call->user_mutex)
{
struct rxrpc_call *call, *xcall;
@@ -760,3 +760,23 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
atomic_dec(&rxnet->nr_calls);
wait_var_event(&rxnet->nr_calls, !atomic_read(&rxnet->nr_calls));
}
+
+/**
+ * rxrpc_kernel_query_call_security - Query call's security parameters
+ * @call: The call to query
+ * @_service_id: Where to return the service ID
+ * @_enctype: Where to return the "encoding type"
+ *
+ * This queries the security parameters of a call, setting *@_service_id and
+ * *@_enctype and returning the security class.
+ *
+ * Return: The security class protocol number.
+ */
+u8 rxrpc_kernel_query_call_security(struct rxrpc_call *call,
+ u16 *_service_id, u32 *_enctype)
+{
+ *_service_id = call->dest_srx.srx_service;
+ *_enctype = call->security_enctype;
+ return call->security_ix;
+}
+EXPORT_SYMBOL(rxrpc_kernel_query_call_security);
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 4d9c5e21ba78..232b6986da83 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -19,7 +19,7 @@
/*
* Set the completion state on an aborted connection.
*/
-static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff *skb,
+static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn,
s32 abort_code, int err,
enum rxrpc_call_completion compl)
{
@@ -49,12 +49,20 @@ static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff
int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
s32 abort_code, int err, enum rxrpc_abort_reason why)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- if (rxrpc_set_conn_aborted(conn, skb, abort_code, err,
+ u32 cid = conn->proto.cid, call = 0, seq = 0;
+
+ if (skb) {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ cid = sp->hdr.cid;
+ call = sp->hdr.callNumber;
+ seq = sp->hdr.seq;
+ }
+
+ if (rxrpc_set_conn_aborted(conn, abort_code, err,
RXRPC_CALL_LOCALLY_ABORTED)) {
- trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber,
- sp->hdr.seq, abort_code, err);
+ trace_rxrpc_abort(0, why, cid, call, seq, abort_code, err);
rxrpc_poke_conn(conn, rxrpc_conn_get_poke_abort);
}
return -EPROTO;
@@ -67,7 +75,7 @@ static void rxrpc_input_conn_abort(struct rxrpc_connection *conn,
struct sk_buff *skb)
{
trace_rxrpc_rx_conn_abort(conn, skb);
- rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED,
+ rxrpc_set_conn_aborted(conn, skb->priority, -ECONNABORTED,
RXRPC_CALL_REMOTELY_ABORTED);
}
@@ -248,7 +256,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_CHALLENGE:
- return conn->security->respond_to_challenge(conn, skb);
+ ret = conn->security->respond_to_challenge(conn, skb);
+ sp->chall.conn = NULL;
+ rxrpc_put_connection(conn, rxrpc_conn_put_challenge_input);
+ return ret;
case RXRPC_PACKET_TYPE_RESPONSE:
ret = conn->security->verify_response(conn, skb);
@@ -270,7 +281,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
* we've already received the packet, put it on the
* front of the queue.
*/
- sp->conn = rxrpc_get_connection(conn, rxrpc_conn_get_poke_secured);
+ sp->poke_conn = rxrpc_get_connection(
+ conn, rxrpc_conn_get_poke_secured);
skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED;
rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured);
skb_queue_head(&conn->local->rx_queue, skb);
@@ -392,6 +404,61 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
}
/*
+ * Post a CHALLENGE packet to the socket of one of a connection's calls so that
+ * it can get application data to include in the packet, possibly querying
+ * userspace.
+ */
+static bool rxrpc_post_challenge(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_call *call = NULL;
+ struct rxrpc_sock *rx;
+ bool respond = false;
+
+ sp->chall.conn =
+ rxrpc_get_connection(conn, rxrpc_conn_get_challenge_input);
+
+ if (!conn->security->challenge_to_recvmsg) {
+ rxrpc_post_packet_to_conn(conn, skb);
+ return true;
+ }
+
+ rcu_read_lock();
+
+ for (int i = 0; i < ARRAY_SIZE(conn->channels); i++) {
+ if (conn->channels[i].call) {
+ call = conn->channels[i].call;
+ rx = rcu_dereference(call->socket);
+ if (!rx) {
+ call = NULL;
+ continue;
+ }
+
+ respond = true;
+ if (test_bit(RXRPC_SOCK_MANAGE_RESPONSE, &rx->flags))
+ break;
+ call = NULL;
+ }
+ }
+
+ if (!respond) {
+ rcu_read_unlock();
+ rxrpc_put_connection(conn, rxrpc_conn_put_challenge_input);
+ sp->chall.conn = NULL;
+ return false;
+ }
+
+ if (call)
+ rxrpc_notify_socket_oob(call, skb);
+ rcu_read_unlock();
+
+ if (!call)
+ rxrpc_post_packet_to_conn(conn, skb);
+ return true;
+}
+
+/*
* Input a connection-level packet.
*/
bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
@@ -411,6 +478,16 @@ bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
return true;
case RXRPC_PACKET_TYPE_CHALLENGE:
+ rxrpc_see_skb(skb, rxrpc_skb_see_oob_challenge);
+ if (rxrpc_is_conn_aborted(conn)) {
+ if (conn->completion == RXRPC_CALL_LOCALLY_ABORTED)
+ rxrpc_send_conn_abort(conn);
+ return true;
+ }
+ if (!conn->security->validate_challenge(conn, skb))
+ return false;
+ return rxrpc_post_challenge(conn, skb);
+
case RXRPC_PACKET_TYPE_RESPONSE:
if (rxrpc_is_conn_aborted(conn)) {
if (conn->completion == RXRPC_CALL_LOCALLY_ABORTED)
@@ -436,6 +513,19 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events))
rxrpc_abort_calls(conn);
+ if (conn->tx_response) {
+ struct sk_buff *skb;
+
+ spin_lock_irq(&conn->local->lock);
+ skb = conn->tx_response;
+ conn->tx_response = NULL;
+ spin_unlock_irq(&conn->local->lock);
+
+ if (conn->state != RXRPC_CONN_ABORTED)
+ rxrpc_send_response(conn, skb);
+ rxrpc_free_skb(skb, rxrpc_skb_put_response);
+ }
+
if (skb) {
switch (skb->mark) {
case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
@@ -452,3 +542,31 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
rxrpc_process_delayed_final_acks(conn, false);
}
+
+/*
+ * Post a RESPONSE message to the I/O thread for transmission.
+ */
+void rxrpc_post_response(struct rxrpc_connection *conn, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_local *local = conn->local;
+ struct sk_buff *old;
+
+ _enter("%x", sp->resp.challenge_serial);
+
+ spin_lock_irq(&local->lock);
+ old = conn->tx_response;
+ if (old) {
+ struct rxrpc_skb_priv *osp = rxrpc_skb(skb);
+
+ /* Always go with the response to the most recent challenge. */
+ if (after(sp->resp.challenge_serial, osp->resp.challenge_serial))
+ conn->tx_response = old;
+ else
+ old = skb;
+ } else {
+ conn->tx_response = skb;
+ }
+ spin_unlock_irq(&local->lock);
+ rxrpc_poke_conn(conn, rxrpc_conn_get_poke_response);
+}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 8ac22dde8b39..37340becb224 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -73,6 +73,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
skb_queue_head_init(&conn->rx_queue);
conn->rxnet = rxnet;
conn->security = &rxrpc_no_security;
+ rwlock_init(&conn->security_use_lock);
spin_lock_init(&conn->state_lock);
conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
conn->idle_timestamp = jiffies;
@@ -329,6 +330,7 @@ static void rxrpc_clean_up_connection(struct work_struct *work)
}
rxrpc_purge_queue(&conn->rx_queue);
+ rxrpc_free_skb(conn->tx_response, rxrpc_skb_put_response);
rxrpc_kill_client_conn(conn);
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index e068f9b79d02..1f7c136d6d0e 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -42,13 +42,19 @@ static void none_free_call_crypto(struct rxrpc_call *call)
{
}
-static int none_respond_to_challenge(struct rxrpc_connection *conn,
- struct sk_buff *skb)
+static bool none_validate_challenge(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
{
return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
rxrpc_eproto_rxnull_challenge);
}
+static int none_sendmsg_respond_to_challenge(struct sk_buff *challenge,
+ struct msghdr *msg)
+{
+ return -EINVAL;
+}
+
static int none_verify_response(struct rxrpc_connection *conn,
struct sk_buff *skb)
{
@@ -82,7 +88,8 @@ const struct rxrpc_security rxrpc_no_security = {
.alloc_txbuf = none_alloc_txbuf,
.secure_packet = none_secure_packet,
.verify_packet = none_verify_packet,
- .respond_to_challenge = none_respond_to_challenge,
+ .validate_challenge = none_validate_challenge,
+ .sendmsg_respond_to_challenge = none_sendmsg_respond_to_challenge,
.verify_response = none_verify_response,
.clear = none_clear,
};
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 64f8d77b8731..27b650d30f4d 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -489,8 +489,8 @@ int rxrpc_io_thread(void *data)
rxrpc_free_skb(skb, rxrpc_skb_put_error_report);
break;
case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
- rxrpc_input_conn_event(sp->conn, skb);
- rxrpc_put_connection(sp->conn, rxrpc_conn_put_poke);
+ rxrpc_input_conn_event(sp->poke_conn, skb);
+ rxrpc_put_connection(sp->poke_conn, rxrpc_conn_put_poke);
rxrpc_free_skb(skb, rxrpc_skb_put_conn_secured);
break;
default:
@@ -501,9 +501,11 @@ int rxrpc_io_thread(void *data)
}
/* Deal with connections that want immediate attention. */
- spin_lock_irq(&local->lock);
- list_splice_tail_init(&local->conn_attend_q, &conn_attend_q);
- spin_unlock_irq(&local->lock);
+ if (!list_empty_careful(&local->conn_attend_q)) {
+ spin_lock_irq(&local->lock);
+ list_splice_tail_init(&local->conn_attend_q, &conn_attend_q);
+ spin_unlock_irq(&local->lock);
+ }
while ((conn = list_first_entry_or_null(&conn_attend_q,
struct rxrpc_connection,
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 33e8302a79e3..9fdc1f031c9d 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -129,6 +129,160 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep,
return 0;
}
+static u64 xdr_dec64(const __be32 *xdr)
+{
+ return (u64)ntohl(xdr[0]) << 32 | (u64)ntohl(xdr[1]);
+}
+
+static time64_t rxrpc_s64_to_time64(s64 time_in_100ns)
+{
+ bool neg = false;
+ u64 tmp = time_in_100ns;
+
+ if (time_in_100ns < 0) {
+ tmp = -time_in_100ns;
+ neg = true;
+ }
+ do_div(tmp, 10000000);
+ return neg ? -tmp : tmp;
+}
+
+/*
+ * Parse a YFS-RxGK type XDR format token
+ * - the caller guarantees we have at least 4 words
+ *
+ * struct token_rxgk {
+ * opr_time begintime;
+ * opr_time endtime;
+ * afs_int64 level;
+ * afs_int64 lifetime;
+ * afs_int64 bytelife;
+ * afs_int64 enctype;
+ * opaque key<>;
+ * opaque ticket<>;
+ * };
+ */
+static int rxrpc_preparse_xdr_yfs_rxgk(struct key_preparsed_payload *prep,
+ size_t datalen,
+ const __be32 *xdr, unsigned int toklen)
+{
+ struct rxrpc_key_token *token, **pptoken;
+ time64_t expiry;
+ size_t plen;
+ const __be32 *ticket, *key;
+ s64 tmp;
+ u32 tktlen, keylen;
+
+ _enter(",{%x,%x,%x,%x},%x",
+ ntohl(xdr[0]), ntohl(xdr[1]), ntohl(xdr[2]), ntohl(xdr[3]),
+ toklen);
+
+ if (6 * 2 + 2 > toklen / 4)
+ goto reject;
+
+ key = xdr + (6 * 2 + 1);
+ keylen = ntohl(key[-1]);
+ _debug("keylen: %x", keylen);
+ keylen = round_up(keylen, 4);
+ if ((6 * 2 + 2) * 4 + keylen > toklen)
+ goto reject;
+
+ ticket = xdr + (6 * 2 + 1 + (keylen / 4) + 1);
+ tktlen = ntohl(ticket[-1]);
+ _debug("tktlen: %x", tktlen);
+ tktlen = round_up(tktlen, 4);
+ if ((6 * 2 + 2) * 4 + keylen + tktlen != toklen) {
+ kleave(" = -EKEYREJECTED [%x!=%x, %x,%x]",
+ (6 * 2 + 2) * 4 + keylen + tktlen, toklen,
+ keylen, tktlen);
+ goto reject;
+ }
+
+ plen = sizeof(*token) + sizeof(*token->rxgk) + tktlen + keylen;
+ prep->quotalen = datalen + plen;
+
+ plen -= sizeof(*token);
+ token = kzalloc(sizeof(*token), GFP_KERNEL);
+ if (!token)
+ goto nomem;
+
+ token->rxgk = kzalloc(sizeof(*token->rxgk) + keylen, GFP_KERNEL);
+ if (!token->rxgk)
+ goto nomem_token;
+
+ token->security_index = RXRPC_SECURITY_YFS_RXGK;
+ token->rxgk->begintime = xdr_dec64(xdr + 0 * 2);
+ token->rxgk->endtime = xdr_dec64(xdr + 1 * 2);
+ token->rxgk->level = tmp = xdr_dec64(xdr + 2 * 2);
+ if (tmp < -1LL || tmp > RXRPC_SECURITY_ENCRYPT)
+ goto reject_token;
+ token->rxgk->lifetime = xdr_dec64(xdr + 3 * 2);
+ token->rxgk->bytelife = xdr_dec64(xdr + 4 * 2);
+ token->rxgk->enctype = tmp = xdr_dec64(xdr + 5 * 2);
+ if (tmp < 0 || tmp > UINT_MAX)
+ goto reject_token;
+ token->rxgk->key.len = ntohl(key[-1]);
+ token->rxgk->key.data = token->rxgk->_key;
+ token->rxgk->ticket.len = ntohl(ticket[-1]);
+
+ if (token->rxgk->endtime != 0) {
+ expiry = rxrpc_s64_to_time64(token->rxgk->endtime);
+ if (expiry < 0)
+ goto expired;
+ if (expiry < prep->expiry)
+ prep->expiry = expiry;
+ }
+
+ memcpy(token->rxgk->key.data, key, token->rxgk->key.len);
+
+ /* Pad the ticket so that we can use it directly in XDR */
+ token->rxgk->ticket.data = kzalloc(round_up(token->rxgk->ticket.len, 4),
+ GFP_KERNEL);
+ if (!token->rxgk->ticket.data)
+ goto nomem_yrxgk;
+ memcpy(token->rxgk->ticket.data, ticket, token->rxgk->ticket.len);
+
+ _debug("SCIX: %u", token->security_index);
+ _debug("EXPY: %llx", token->rxgk->endtime);
+ _debug("LIFE: %llx", token->rxgk->lifetime);
+ _debug("BYTE: %llx", token->rxgk->bytelife);
+ _debug("ENC : %u", token->rxgk->enctype);
+ _debug("LEVL: %u", token->rxgk->level);
+ _debug("KLEN: %u", token->rxgk->key.len);
+ _debug("TLEN: %u", token->rxgk->ticket.len);
+ _debug("KEY0: %*phN", token->rxgk->key.len, token->rxgk->key.data);
+ _debug("TICK: %*phN",
+ min_t(u32, token->rxgk->ticket.len, 32), token->rxgk->ticket.data);
+
+ /* count the number of tokens attached */
+ prep->payload.data[1] = (void *)((unsigned long)prep->payload.data[1] + 1);
+
+ /* attach the data */
+ for (pptoken = (struct rxrpc_key_token **)&prep->payload.data[0];
+ *pptoken;
+ pptoken = &(*pptoken)->next)
+ continue;
+ *pptoken = token;
+
+ _leave(" = 0");
+ return 0;
+
+nomem_yrxgk:
+ kfree(token->rxgk);
+nomem_token:
+ kfree(token);
+nomem:
+ return -ENOMEM;
+reject_token:
+ kfree(token);
+reject:
+ return -EKEYREJECTED;
+expired:
+ kfree(token->rxgk);
+ kfree(token);
+ return -EKEYEXPIRED;
+}
+
/*
* attempt to parse the data as the XDR format
* - the caller guarantees we have more than 7 words
@@ -228,6 +382,9 @@ static int rxrpc_preparse_xdr(struct key_preparsed_payload *prep)
case RXRPC_SECURITY_RXKAD:
ret2 = rxrpc_preparse_xdr_rxkad(prep, datalen, token, toklen);
break;
+ case RXRPC_SECURITY_YFS_RXGK:
+ ret2 = rxrpc_preparse_xdr_yfs_rxgk(prep, datalen, token, toklen);
+ break;
default:
ret2 = -EPROTONOSUPPORT;
break;
@@ -390,6 +547,10 @@ static void rxrpc_free_token_list(struct rxrpc_key_token *token)
case RXRPC_SECURITY_RXKAD:
kfree(token->kad);
break;
+ case RXRPC_SECURITY_YFS_RXGK:
+ kfree(token->rxgk->ticket.data);
+ kfree(token->rxgk);
+ break;
default:
pr_err("Unknown token type %x on rxrpc key\n",
token->security_index);
@@ -433,6 +594,9 @@ static void rxrpc_describe(const struct key *key, struct seq_file *m)
case RXRPC_SECURITY_RXKAD:
seq_puts(m, "ka");
break;
+ case RXRPC_SECURITY_YFS_RXGK:
+ seq_puts(m, "ygk");
+ break;
default: /* we have a ticket we can't encode */
seq_printf(m, "%u", token->security_index);
break;
@@ -531,6 +695,8 @@ EXPORT_SYMBOL(rxrpc_get_server_data_key);
*
* Generate a null RxRPC key that can be used to indicate anonymous security is
* required for a particular domain.
+ *
+ * Return: The new key or a negative error code.
*/
struct key *rxrpc_get_null_key(const char *keyname)
{
@@ -595,6 +761,13 @@ static long rxrpc_read(const struct key *key,
toksize += RND(token->kad->ticket_len);
break;
+ case RXRPC_SECURITY_YFS_RXGK:
+ toksize += 6 * 8 + 2 * 4;
+ if (!token->no_leak_key)
+ toksize += RND(token->rxgk->key.len);
+ toksize += RND(token->rxgk->ticket.len);
+ break;
+
default: /* we have a ticket we can't encode */
pr_err("Unsupported key token type (%u)\n",
token->security_index);
@@ -674,6 +847,20 @@ static long rxrpc_read(const struct key *key,
ENCODE_DATA(token->kad->ticket_len, token->kad->ticket);
break;
+ case RXRPC_SECURITY_YFS_RXGK:
+ ENCODE64(token->rxgk->begintime);
+ ENCODE64(token->rxgk->endtime);
+ ENCODE64(token->rxgk->level);
+ ENCODE64(token->rxgk->lifetime);
+ ENCODE64(token->rxgk->bytelife);
+ ENCODE64(token->rxgk->enctype);
+ if (token->no_leak_key)
+ ENCODE(0);
+ else
+ ENCODE_DATA(token->rxgk->key.len, token->rxgk->key.data);
+ ENCODE_DATA(token->rxgk->ticket.len, token->rxgk->ticket.data);
+ break;
+
default:
pr_err("Unsupported key token type (%u)\n",
token->security_index);
diff --git a/net/rxrpc/oob.c b/net/rxrpc/oob.c
new file mode 100644
index 000000000000..05ca9c1faa57
--- /dev/null
+++ b/net/rxrpc/oob.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Out of band message handling (e.g. challenge-response)
+ *
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/net.h>
+#include <linux/gfp.h>
+#include <linux/skbuff.h>
+#include <linux/export.h>
+#include <linux/sched/signal.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+enum rxrpc_oob_command {
+ RXRPC_OOB_CMD_UNSET,
+ RXRPC_OOB_CMD_RESPOND,
+} __mode(byte);
+
+struct rxrpc_oob_params {
+ u64 oob_id; /* ID number of message if reply */
+ s32 abort_code;
+ enum rxrpc_oob_command command;
+ bool have_oob_id:1;
+};
+
+/*
+ * Post an out-of-band message for attention by the socket or kernel service
+ * associated with a reference call.
+ */
+void rxrpc_notify_socket_oob(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_sock *rx;
+ struct sock *sk;
+
+ rcu_read_lock();
+
+ rx = rcu_dereference(call->socket);
+ if (rx) {
+ sk = &rx->sk;
+ spin_lock_irq(&rx->recvmsg_lock);
+
+ if (sk->sk_state < RXRPC_CLOSE) {
+ skb->skb_mstamp_ns = rx->oob_id_counter++;
+ rxrpc_get_skb(skb, rxrpc_skb_get_post_oob);
+ skb_queue_tail(&rx->recvmsg_oobq, skb);
+
+ trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial);
+ if (rx->app_ops)
+ rx->app_ops->notify_oob(sk, skb);
+ }
+
+ spin_unlock_irq(&rx->recvmsg_lock);
+ if (!rx->app_ops && !sock_flag(sk, SOCK_DEAD))
+ sk->sk_data_ready(sk);
+ }
+
+ rcu_read_unlock();
+}
+
+/*
+ * Locate the OOB message to respond to by its ID.
+ */
+static struct sk_buff *rxrpc_find_pending_oob(struct rxrpc_sock *rx, u64 oob_id)
+{
+ struct rb_node *p;
+ struct sk_buff *skb;
+
+ p = rx->pending_oobq.rb_node;
+ while (p) {
+ skb = rb_entry(p, struct sk_buff, rbnode);
+
+ if (oob_id < skb->skb_mstamp_ns)
+ p = p->rb_left;
+ else if (oob_id > skb->skb_mstamp_ns)
+ p = p->rb_right;
+ else
+ return skb;
+ }
+
+ return NULL;
+}
+
+/*
+ * Add an OOB message into the pending-response set. We always assign the next
+ * value from a 64-bit counter to the oob_id, so just assume we're always going
+ * to be on the right-hand edge of the tree and that the counter won't wrap.
+ * The tree is also given a ref to the message.
+ */
+void rxrpc_add_pending_oob(struct rxrpc_sock *rx, struct sk_buff *skb)
+{
+ struct rb_node **pp = &rx->pending_oobq.rb_node, *p = NULL;
+
+ while (*pp) {
+ p = *pp;
+ pp = &(*pp)->rb_right;
+ }
+
+ rb_link_node(&skb->rbnode, p, pp);
+ rb_insert_color(&skb->rbnode, &rx->pending_oobq);
+}
+
+/*
+ * Extract control messages from the sendmsg() control buffer.
+ */
+static int rxrpc_sendmsg_oob_cmsg(struct msghdr *msg, struct rxrpc_oob_params *p)
+{
+ struct cmsghdr *cmsg;
+ int len;
+
+ if (msg->msg_controllen == 0)
+ return -EINVAL;
+
+ for_each_cmsghdr(cmsg, msg) {
+ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+
+ len = cmsg->cmsg_len - sizeof(struct cmsghdr);
+ _debug("CMSG %d, %d, %d",
+ cmsg->cmsg_level, cmsg->cmsg_type, len);
+
+ if (cmsg->cmsg_level != SOL_RXRPC)
+ continue;
+
+ switch (cmsg->cmsg_type) {
+ case RXRPC_OOB_ID:
+ if (len != sizeof(p->oob_id) || p->have_oob_id)
+ return -EINVAL;
+ memcpy(&p->oob_id, CMSG_DATA(cmsg), sizeof(p->oob_id));
+ p->have_oob_id = true;
+ break;
+ case RXRPC_RESPOND:
+ if (p->command != RXRPC_OOB_CMD_UNSET)
+ return -EINVAL;
+ p->command = RXRPC_OOB_CMD_RESPOND;
+ break;
+ case RXRPC_ABORT:
+ if (len != sizeof(p->abort_code) || p->abort_code)
+ return -EINVAL;
+ memcpy(&p->abort_code, CMSG_DATA(cmsg), sizeof(p->abort_code));
+ if (p->abort_code == 0)
+ return -EINVAL;
+ break;
+ case RXRPC_RESP_RXGK_APPDATA:
+ if (p->command != RXRPC_OOB_CMD_RESPOND)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ switch (p->command) {
+ case RXRPC_OOB_CMD_RESPOND:
+ if (!p->have_oob_id)
+ return -EBADSLT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Allow userspace to respond to an OOB using sendmsg().
+ */
+static int rxrpc_respond_to_oob(struct rxrpc_sock *rx,
+ struct rxrpc_oob_params *p,
+ struct msghdr *msg)
+{
+ struct rxrpc_connection *conn;
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ int ret;
+
+ skb = rxrpc_find_pending_oob(rx, p->oob_id);
+ if (skb)
+ rb_erase(&skb->rbnode, &rx->pending_oobq);
+ release_sock(&rx->sk);
+ if (!skb)
+ return -EBADSLT;
+
+ sp = rxrpc_skb(skb);
+
+ switch (p->command) {
+ case RXRPC_OOB_CMD_RESPOND:
+ ret = -EPROTO;
+ if (skb->mark != RXRPC_OOB_CHALLENGE)
+ break;
+ conn = sp->chall.conn;
+ ret = -EOPNOTSUPP;
+ if (!conn->security->sendmsg_respond_to_challenge)
+ break;
+ if (p->abort_code) {
+ rxrpc_abort_conn(conn, NULL, p->abort_code, -ECONNABORTED,
+ rxrpc_abort_response_sendmsg);
+ ret = 0;
+ } else {
+ ret = conn->security->sendmsg_respond_to_challenge(skb, msg);
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ rxrpc_free_skb(skb, rxrpc_skb_put_oob);
+ return ret;
+}
+
+/*
+ * Send an out-of-band message or respond to a received out-of-band message.
+ * - caller gives us the socket lock
+ * - the socket may be either a client socket or a server socket
+ */
+int rxrpc_sendmsg_oob(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+{
+ struct rxrpc_oob_params p = {};
+ int ret;
+
+ _enter("");
+
+ ret = rxrpc_sendmsg_oob_cmsg(msg, &p);
+ if (ret < 0)
+ goto error_release_sock;
+
+ if (p.have_oob_id)
+ return rxrpc_respond_to_oob(rx, &p, msg);
+
+ release_sock(&rx->sk);
+
+ switch (p.command) {
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+
+error_release_sock:
+ release_sock(&rx->sk);
+ return ret;
+}
+
+/**
+ * rxrpc_kernel_query_oob - Query the parameters of an out-of-band message
+ * @oob: The message to query
+ * @_peer: Where to return the peer record
+ * @_peer_appdata: The application data attached to a peer record
+ *
+ * Extract useful parameters from an out-of-band message. The source peer
+ * parameters are returned through the argument list and the message type is
+ * returned.
+ *
+ * Return:
+ * * %RXRPC_OOB_CHALLENGE - Challenge wanting a response.
+ */
+enum rxrpc_oob_type rxrpc_kernel_query_oob(struct sk_buff *oob,
+ struct rxrpc_peer **_peer,
+ unsigned long *_peer_appdata)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(oob);
+ enum rxrpc_oob_type type = oob->mark;
+
+ switch (type) {
+ case RXRPC_OOB_CHALLENGE:
+ *_peer = sp->chall.conn->peer;
+ *_peer_appdata = sp->chall.conn->peer->app_data;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ *_peer = NULL;
+ *_peer_appdata = 0;
+ break;
+ }
+
+ return type;
+}
+EXPORT_SYMBOL(rxrpc_kernel_query_oob);
+
+/**
+ * rxrpc_kernel_dequeue_oob - Dequeue and return the front OOB message
+ * @sock: The socket to query
+ * @_type: Where to return the message type
+ *
+ * Dequeue the front OOB message, if there is one, and return it and
+ * its type.
+ *
+ * Return: The sk_buff representing the OOB message or %NULL if the queue was
+ * empty.
+ */
+struct sk_buff *rxrpc_kernel_dequeue_oob(struct socket *sock,
+ enum rxrpc_oob_type *_type)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ struct sk_buff *oob;
+
+ oob = skb_dequeue(&rx->recvmsg_oobq);
+ if (oob)
+ *_type = oob->mark;
+ return oob;
+}
+EXPORT_SYMBOL(rxrpc_kernel_dequeue_oob);
+
+/**
+ * rxrpc_kernel_free_oob - Free an out-of-band message
+ * @oob: The OOB message to free
+ *
+ * Free an OOB message along with any resources it holds.
+ */
+void rxrpc_kernel_free_oob(struct sk_buff *oob)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(oob);
+
+ switch (oob->mark) {
+ case RXRPC_OOB_CHALLENGE:
+ rxrpc_put_connection(sp->chall.conn, rxrpc_conn_put_oob);
+ break;
+ }
+
+ rxrpc_free_skb(oob, rxrpc_skb_put_purge_oob);
+}
+EXPORT_SYMBOL(rxrpc_kernel_free_oob);
+
+/**
+ * rxrpc_kernel_query_challenge - Query the parameters of a challenge
+ * @challenge: The challenge to query
+ * @_peer: Where to return the peer record
+ * @_peer_appdata: The application data attached to a peer record
+ * @_service_id: Where to return the connection service ID
+ * @_security_index: Where to return the connection security index
+ *
+ * Extract useful parameters from a CHALLENGE message.
+ */
+void rxrpc_kernel_query_challenge(struct sk_buff *challenge,
+ struct rxrpc_peer **_peer,
+ unsigned long *_peer_appdata,
+ u16 *_service_id, u8 *_security_index)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(challenge);
+
+ *_peer = sp->chall.conn->peer;
+ *_peer_appdata = sp->chall.conn->peer->app_data;
+ *_service_id = sp->hdr.serviceId;
+ *_security_index = sp->hdr.securityIndex;
+}
+EXPORT_SYMBOL(rxrpc_kernel_query_challenge);
+
+/**
+ * rxrpc_kernel_reject_challenge - Allow a kernel service to reject a challenge
+ * @challenge: The challenge to be rejected
+ * @abort_code: The abort code to stick into the ABORT packet
+ * @error: Local error value
+ * @why: Indication as to why.
+ *
+ * Allow a kernel service to reject a challenge by aborting the connection if
+ * it's still in an abortable state. The error is returned so this function
+ * can be used with a return statement.
+ *
+ * Return: The %error parameter.
+ */
+int rxrpc_kernel_reject_challenge(struct sk_buff *challenge, u32 abort_code,
+ int error, enum rxrpc_abort_reason why)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(challenge);
+
+ _enter("{%x},%d,%d,%u", sp->hdr.serial, abort_code, error, why);
+
+ rxrpc_abort_conn(sp->chall.conn, NULL, abort_code, error, why);
+ return error;
+}
+EXPORT_SYMBOL(rxrpc_kernel_reject_challenge);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 95905b85a8d7..0af19bcdc80a 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -18,7 +18,7 @@
extern int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len);
-static ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len)
+ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len)
{
struct sockaddr *sa = msg->msg_name;
struct sock *sk = socket->sk;
@@ -916,3 +916,61 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
peer->last_tx_at = ktime_get_seconds();
_leave("");
}
+
+/*
+ * Send a RESPONSE message.
+ */
+void rxrpc_send_response(struct rxrpc_connection *conn, struct sk_buff *response)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(response);
+ struct scatterlist sg[16];
+ struct bio_vec bvec[16];
+ struct msghdr msg;
+ size_t len = sp->resp.len;
+ __be32 wserial;
+ u32 serial = 0;
+ int ret, nr_sg;
+
+ _enter("C=%x,%x", conn->debug_id, sp->resp.challenge_serial);
+
+ sg_init_table(sg, ARRAY_SIZE(sg));
+ ret = skb_to_sgvec(response, sg, 0, len);
+ if (ret < 0)
+ goto fail;
+ nr_sg = ret;
+
+ for (int i = 0; i < nr_sg; i++)
+ bvec_set_page(&bvec[i], sg_page(&sg[i]), sg[i].length, sg[i].offset);
+
+ iov_iter_bvec(&msg.msg_iter, WRITE, bvec, nr_sg, len);
+
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = MSG_SPLICE_PAGES;
+
+ serial = rxrpc_get_next_serials(conn, 1);
+ wserial = htonl(serial);
+
+ trace_rxrpc_tx_response(conn, serial, sp);
+
+ ret = skb_store_bits(response, offsetof(struct rxrpc_wire_header, serial),
+ &wserial, sizeof(wserial));
+ if (ret < 0)
+ goto fail;
+
+ rxrpc_local_dont_fragment(conn->local, false);
+
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ if (ret < 0)
+ goto fail;
+
+ conn->peer->last_tx_at = ktime_get_seconds();
+ return;
+
+fail:
+ trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+ rxrpc_tx_point_response);
+ kleave(" = %d", ret);
+}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 71b6e07bf161..e2f35e6c04d6 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -475,6 +475,8 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet)
* @call: The call to query
*
* Get a record for the remote peer in a call.
+ *
+ * Return: The call's peer record.
*/
struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call)
{
@@ -486,7 +488,9 @@ EXPORT_SYMBOL(rxrpc_kernel_get_call_peer);
* rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT
* @peer: The peer to query
*
- * Get the call's peer smoothed RTT in uS or UINT_MAX if we have no samples.
+ * Get the call's peer smoothed RTT.
+ *
+ * Return: The RTT in uS or %UINT_MAX if we have no samples.
*/
unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *peer)
{
@@ -499,7 +503,10 @@ EXPORT_SYMBOL(rxrpc_kernel_get_srtt);
* @peer: The peer to query
*
* Get a pointer to the address from a peer record. The caller is responsible
- * for making sure that the address is not deallocated.
+ * for making sure that the address is not deallocated. A fake address will be
+ * substituted if %peer in NULL.
+ *
+ * Return: The rxrpc address record or a fake record.
*/
const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer)
{
@@ -512,7 +519,10 @@ EXPORT_SYMBOL(rxrpc_kernel_remote_srx);
* @peer: The peer to query
*
* Get a pointer to the transport address from a peer record. The caller is
- * responsible for making sure that the address is not deallocated.
+ * responsible for making sure that the address is not deallocated. A fake
+ * address will be substituted if %peer in NULL.
+ *
+ * Return: The transport address record or a fake record.
*/
const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer)
{
@@ -527,7 +537,9 @@ EXPORT_SYMBOL(rxrpc_kernel_remote_addr);
* @app_data: The data to set
*
* Set the app-specific data on a peer. AF_RXRPC makes no effort to retain
- * anything the data might refer to. The previous app_data is returned.
+ * anything the data might refer to.
+ *
+ * Return: The previous app_data.
*/
unsigned long rxrpc_kernel_set_peer_data(struct rxrpc_peer *peer, unsigned long app_data)
{
@@ -540,6 +552,8 @@ EXPORT_SYMBOL(rxrpc_kernel_set_peer_data);
* @peer: The peer to query
*
* Retrieve the app-specific data from a peer.
+ *
+ * Return: The peer's app data.
*/
unsigned long rxrpc_kernel_get_peer_data(const struct rxrpc_peer *peer)
{
diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h
index 42f70e4636f8..f8bfec12bc7e 100644
--- a/net/rxrpc/protocol.h
+++ b/net/rxrpc/protocol.h
@@ -181,4 +181,24 @@ struct rxkad_response {
__be32 ticket_len; /* Kerberos ticket length */
} __packed;
+/*
+ * GSSAPI security type-4 and type-6 data header.
+ */
+struct rxgk_header {
+ __be32 epoch;
+ __be32 cid;
+ __be32 call_number;
+ __be32 seq;
+ __be32 sec_index;
+ __be32 data_len;
+} __packed;
+
+/*
+ * GSSAPI security type-4 and type-6 response packet header.
+ */
+struct rxgk_response {
+ __be64 start_time;
+ __be32 token_len;
+} __packed;
+
#endif /* _LINUX_RXRPC_PACKET_H */
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 32cd5f1d541d..86a27fb55a1c 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -155,6 +155,82 @@ static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb)
}
/*
+ * Transcribe a call's user ID to a control message.
+ */
+static int rxrpc_recvmsg_user_id(struct rxrpc_call *call, struct msghdr *msg,
+ int flags)
+{
+ if (!test_bit(RXRPC_CALL_HAS_USERID, &call->flags))
+ return 0;
+
+ if (flags & MSG_CMSG_COMPAT) {
+ unsigned int id32 = call->user_call_ID;
+
+ return put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+ sizeof(unsigned int), &id32);
+ } else {
+ unsigned long idl = call->user_call_ID;
+
+ return put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+ sizeof(unsigned long), &idl);
+ }
+}
+
+/*
+ * Deal with a CHALLENGE packet.
+ */
+static int rxrpc_recvmsg_challenge(struct socket *sock, struct msghdr *msg,
+ struct sk_buff *challenge, unsigned int flags)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(challenge);
+ struct rxrpc_connection *conn = sp->chall.conn;
+
+ return conn->security->challenge_to_recvmsg(conn, challenge, msg);
+}
+
+/*
+ * Process OOB packets. Called with the socket locked.
+ */
+static int rxrpc_recvmsg_oob(struct socket *sock, struct msghdr *msg,
+ unsigned int flags)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ struct sk_buff *skb;
+ bool need_response = false;
+ int ret;
+
+ skb = skb_peek(&rx->recvmsg_oobq);
+ if (!skb)
+ return -EAGAIN;
+ rxrpc_see_skb(skb, rxrpc_skb_see_recvmsg);
+
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_OOB_ID, sizeof(u64),
+ &skb->skb_mstamp_ns);
+ if (ret < 0)
+ return ret;
+
+ switch ((enum rxrpc_oob_type)skb->mark) {
+ case RXRPC_OOB_CHALLENGE:
+ need_response = true;
+ ret = rxrpc_recvmsg_challenge(sock, msg, skb, flags);
+ break;
+ default:
+ WARN_ONCE(1, "recvmsg() can't process unknown OOB type %u\n",
+ skb->mark);
+ ret = -EIO;
+ break;
+ }
+
+ if (!(flags & MSG_PEEK))
+ skb_unlink(skb, &rx->recvmsg_oobq);
+ if (need_response)
+ rxrpc_add_pending_oob(rx, skb);
+ else
+ rxrpc_free_skb(skb, rxrpc_skb_put_oob);
+ return ret;
+}
+
+/*
* Deliver messages to a call. This keeps processing packets until the buffer
* is filled and we find either more DATA (returns 0) or the end of the DATA
* (returns 1). If more packets are required, it returns -EAGAIN and if the
@@ -165,6 +241,7 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
size_t len, int flags, size_t *_offset)
{
struct rxrpc_skb_priv *sp;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
struct sk_buff *skb;
rxrpc_seq_t seq = 0;
size_t remain;
@@ -207,7 +284,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq,
sp->offset, sp->len, ret2);
if (ret2 < 0) {
- kdebug("verify = %d", ret2);
ret = ret2;
goto out;
}
@@ -255,6 +331,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
if (!(flags & MSG_PEEK))
rxrpc_rotate_rx_window(call);
+
+ if (!rx->app_ops &&
+ !skb_queue_empty_lockless(&rx->recvmsg_oobq)) {
+ trace_rxrpc_recvdata(call, rxrpc_recvmsg_oobq, seq,
+ rx_pkt_offset, rx_pkt_len, ret);
+ break;
+ }
}
out:
@@ -262,6 +345,7 @@ out:
call->rx_pkt_offset = rx_pkt_offset;
call->rx_pkt_len = rx_pkt_len;
}
+
done:
trace_rxrpc_recvdata(call, rxrpc_recvmsg_data_return, seq,
rx_pkt_offset, rx_pkt_len, ret);
@@ -301,6 +385,7 @@ try_again:
/* Return immediately if a client socket has no outstanding calls */
if (RB_EMPTY_ROOT(&rx->calls) &&
list_empty(&rx->recvmsg_q) &&
+ skb_queue_empty_lockless(&rx->recvmsg_oobq) &&
rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
release_sock(&rx->sk);
return -EAGAIN;
@@ -322,7 +407,8 @@ try_again:
if (ret)
goto wait_error;
- if (list_empty(&rx->recvmsg_q)) {
+ if (list_empty(&rx->recvmsg_q) &&
+ skb_queue_empty_lockless(&rx->recvmsg_oobq)) {
if (signal_pending(current))
goto wait_interrupted;
trace_rxrpc_recvmsg(0, rxrpc_recvmsg_wait, 0);
@@ -332,6 +418,15 @@ try_again:
goto try_again;
}
+ /* Deal with OOB messages before we consider getting normal data. */
+ if (!skb_queue_empty_lockless(&rx->recvmsg_oobq)) {
+ ret = rxrpc_recvmsg_oob(sock, msg, flags);
+ release_sock(&rx->sk);
+ if (ret == -EAGAIN)
+ goto try_again;
+ goto error_no_call;
+ }
+
/* Find the next call and dequeue it if we're not just peeking. If we
* do dequeue it, that comes with a ref that we will need to release.
* We also want to weed out calls that got requeued whilst we were
@@ -342,7 +437,8 @@ try_again:
call = list_entry(l, struct rxrpc_call, recvmsg_link);
if (!rxrpc_call_is_complete(call) &&
- skb_queue_empty(&call->recvmsg_queue)) {
+ skb_queue_empty(&call->recvmsg_queue) &&
+ skb_queue_empty(&rx->recvmsg_oobq)) {
list_del_init(&call->recvmsg_link);
spin_unlock_irq(&rx->recvmsg_lock);
release_sock(&rx->sk);
@@ -377,21 +473,9 @@ try_again:
if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
BUG();
- if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
- if (flags & MSG_CMSG_COMPAT) {
- unsigned int id32 = call->user_call_ID;
-
- ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
- sizeof(unsigned int), &id32);
- } else {
- unsigned long idl = call->user_call_ID;
-
- ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
- sizeof(unsigned long), &idl);
- }
- if (ret < 0)
- goto error_unlock_call;
- }
+ ret = rxrpc_recvmsg_user_id(call, msg, flags);
+ if (ret < 0)
+ goto error_unlock_call;
if (msg->msg_name && call->peer) {
size_t len = sizeof(call->dest_srx);
@@ -477,14 +561,14 @@ wait_error:
* @_service: Where to store the actual service ID (may be upgraded)
*
* Allow a kernel service to receive data and pick up information about the
- * state of a call. Returns 0 if got what was asked for and there's more
- * available, 1 if we got what was asked for and we're at the end of the data
- * and -EAGAIN if we need more data.
+ * state of a call. Note that *@_abort should also be initialised to %0.
*
- * Note that we may return -EAGAIN to drain empty packets at the end of the
- * data, even if we've already copied over the requested data.
+ * Note that we may return %-EAGAIN to drain empty packets at the end
+ * of the data, even if we've already copied over the requested data.
*
- * *_abort should also be initialised to 0.
+ * Return: %0 if got what was asked for and there's more available, %1
+ * if we got what was asked for and we're at the end of the data and
+ * %-EAGAIN if we need more data.
*/
int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
struct iov_iter *iter, size_t *_len,
diff --git a/net/rxrpc/rxgk.c b/net/rxrpc/rxgk.c
new file mode 100644
index 000000000000..ba8bc201b8d3
--- /dev/null
+++ b/net/rxrpc/rxgk.c
@@ -0,0 +1,1367 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* GSSAPI-based RxRPC security
+ *
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/key-type.h>
+#include "ar-internal.h"
+#include "rxgk_common.h"
+
+/*
+ * Parse the information from a server key
+ */
+static int rxgk_preparse_server_key(struct key_preparsed_payload *prep)
+{
+ const struct krb5_enctype *krb5;
+ struct krb5_buffer *server_key = (void *)&prep->payload.data[2];
+ unsigned int service, sec_class, kvno, enctype;
+ int n = 0;
+
+ _enter("%zu", prep->datalen);
+
+ if (sscanf(prep->orig_description, "%u:%u:%u:%u%n",
+ &service, &sec_class, &kvno, &enctype, &n) != 4)
+ return -EINVAL;
+
+ if (prep->orig_description[n])
+ return -EINVAL;
+
+ krb5 = crypto_krb5_find_enctype(enctype);
+ if (!krb5)
+ return -ENOPKG;
+
+ prep->payload.data[0] = (struct krb5_enctype *)krb5;
+
+ if (prep->datalen != krb5->key_len)
+ return -EKEYREJECTED;
+
+ server_key->len = prep->datalen;
+ server_key->data = kmemdup(prep->data, prep->datalen, GFP_KERNEL);
+ if (!server_key->data)
+ return -ENOMEM;
+
+ _leave(" = 0");
+ return 0;
+}
+
+static void rxgk_free_server_key(union key_payload *payload)
+{
+ struct krb5_buffer *server_key = (void *)&payload->data[2];
+
+ kfree_sensitive(server_key->data);
+}
+
+static void rxgk_free_preparse_server_key(struct key_preparsed_payload *prep)
+{
+ rxgk_free_server_key(&prep->payload);
+}
+
+static void rxgk_destroy_server_key(struct key *key)
+{
+ rxgk_free_server_key(&key->payload);
+}
+
+static void rxgk_describe_server_key(const struct key *key, struct seq_file *m)
+{
+ const struct krb5_enctype *krb5 = key->payload.data[0];
+
+ if (krb5)
+ seq_printf(m, ": %s", krb5->name);
+}
+
+/*
+ * Handle rekeying the connection when we see our limits overrun or when the
+ * far side decided to rekey.
+ *
+ * Returns a ref on the context if successful or -ESTALE if the key is out of
+ * date.
+ */
+static struct rxgk_context *rxgk_rekey(struct rxrpc_connection *conn,
+ const u16 *specific_key_number)
+{
+ struct rxgk_context *gk, *dead = NULL;
+ unsigned int key_number, current_key, mask = ARRAY_SIZE(conn->rxgk.keys) - 1;
+ bool crank = false;
+
+ _enter("%d", specific_key_number ? *specific_key_number : -1);
+
+ mutex_lock(&conn->security_lock);
+
+ current_key = conn->rxgk.key_number;
+ if (!specific_key_number) {
+ key_number = current_key;
+ } else {
+ if (*specific_key_number == (u16)current_key)
+ key_number = current_key;
+ else if (*specific_key_number == (u16)(current_key - 1))
+ key_number = current_key - 1;
+ else if (*specific_key_number == (u16)(current_key + 1))
+ goto crank_window;
+ else
+ goto bad_key;
+ }
+
+ gk = conn->rxgk.keys[key_number & mask];
+ if (!gk)
+ goto generate_key;
+ if (!specific_key_number &&
+ test_bit(RXGK_TK_NEEDS_REKEY, &gk->flags))
+ goto crank_window;
+
+grab:
+ refcount_inc(&gk->usage);
+ mutex_unlock(&conn->security_lock);
+ rxgk_put(dead);
+ return gk;
+
+crank_window:
+ trace_rxrpc_rxgk_rekey(conn, current_key,
+ specific_key_number ? *specific_key_number : -1);
+ if (current_key == UINT_MAX)
+ goto bad_key;
+ if (current_key + 1 == UINT_MAX)
+ set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+
+ key_number = current_key + 1;
+ if (WARN_ON(conn->rxgk.keys[key_number & mask]))
+ goto bad_key;
+ crank = true;
+
+generate_key:
+ gk = conn->rxgk.keys[current_key & mask];
+ gk = rxgk_generate_transport_key(conn, gk->key, key_number, GFP_NOFS);
+ if (IS_ERR(gk)) {
+ mutex_unlock(&conn->security_lock);
+ return gk;
+ }
+
+ write_lock(&conn->security_use_lock);
+ if (crank) {
+ current_key++;
+ conn->rxgk.key_number = current_key;
+ dead = conn->rxgk.keys[(current_key - 2) & mask];
+ conn->rxgk.keys[(current_key - 2) & mask] = NULL;
+ }
+ conn->rxgk.keys[current_key & mask] = gk;
+ write_unlock(&conn->security_use_lock);
+ goto grab;
+
+bad_key:
+ mutex_unlock(&conn->security_lock);
+ return ERR_PTR(-ESTALE);
+}
+
+/*
+ * Get the specified keying context.
+ *
+ * Returns a ref on the context if successful or -ESTALE if the key is out of
+ * date.
+ */
+static struct rxgk_context *rxgk_get_key(struct rxrpc_connection *conn,
+ const u16 *specific_key_number)
+{
+ struct rxgk_context *gk;
+ unsigned int key_number, current_key, mask = ARRAY_SIZE(conn->rxgk.keys) - 1;
+
+ _enter("{%u},%d",
+ conn->rxgk.key_number, specific_key_number ? *specific_key_number : -1);
+
+ read_lock(&conn->security_use_lock);
+
+ current_key = conn->rxgk.key_number;
+ if (!specific_key_number) {
+ key_number = current_key;
+ } else {
+ /* Only the bottom 16 bits of the key number are exposed in the
+ * header, so we try and keep the upper 16 bits in step. The
+ * whole 32 bits are used to generate the TK.
+ */
+ if (*specific_key_number == (u16)current_key)
+ key_number = current_key;
+ else if (*specific_key_number == (u16)(current_key - 1))
+ key_number = current_key - 1;
+ else if (*specific_key_number == (u16)(current_key + 1))
+ goto rekey;
+ else
+ goto bad_key;
+ }
+
+ gk = conn->rxgk.keys[key_number & mask];
+ if (!gk)
+ goto slow_path;
+ if (!specific_key_number &&
+ key_number < UINT_MAX) {
+ if (time_after(jiffies, gk->expiry) ||
+ gk->bytes_remaining < 0) {
+ set_bit(RXGK_TK_NEEDS_REKEY, &gk->flags);
+ goto slow_path;
+ }
+
+ if (test_bit(RXGK_TK_NEEDS_REKEY, &gk->flags))
+ goto slow_path;
+ }
+
+ refcount_inc(&gk->usage);
+ read_unlock(&conn->security_use_lock);
+ return gk;
+
+rekey:
+ _debug("rekey");
+ if (current_key == UINT_MAX)
+ goto bad_key;
+ gk = conn->rxgk.keys[current_key & mask];
+ if (gk)
+ set_bit(RXGK_TK_NEEDS_REKEY, &gk->flags);
+slow_path:
+ read_unlock(&conn->security_use_lock);
+ return rxgk_rekey(conn, specific_key_number);
+bad_key:
+ read_unlock(&conn->security_use_lock);
+ return ERR_PTR(-ESTALE);
+}
+
+/*
+ * initialise connection security
+ */
+static int rxgk_init_connection_security(struct rxrpc_connection *conn,
+ struct rxrpc_key_token *token)
+{
+ struct rxgk_context *gk;
+ int ret;
+
+ _enter("{%d,%u},{%x}",
+ conn->debug_id, conn->rxgk.key_number, key_serial(conn->key));
+
+ conn->security_ix = token->security_index;
+ conn->security_level = token->rxgk->level;
+
+ if (rxrpc_conn_is_client(conn)) {
+ conn->rxgk.start_time = ktime_get();
+ do_div(conn->rxgk.start_time, 100);
+ }
+
+ gk = rxgk_generate_transport_key(conn, token->rxgk, conn->rxgk.key_number,
+ GFP_NOFS);
+ if (IS_ERR(gk))
+ return PTR_ERR(gk);
+ conn->rxgk.enctype = gk->krb5->etype;
+ conn->rxgk.keys[gk->key_number & 3] = gk;
+
+ switch (conn->security_level) {
+ case RXRPC_SECURITY_PLAIN:
+ case RXRPC_SECURITY_AUTH:
+ case RXRPC_SECURITY_ENCRYPT:
+ break;
+ default:
+ ret = -EKEYREJECTED;
+ goto error;
+ }
+
+ ret = 0;
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Clean up the crypto on a call.
+ */
+static void rxgk_free_call_crypto(struct rxrpc_call *call)
+{
+}
+
+/*
+ * Work out how much data we can put in a packet.
+ */
+static struct rxrpc_txbuf *rxgk_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp)
+{
+ enum krb5_crypto_mode mode;
+ struct rxgk_context *gk;
+ struct rxrpc_txbuf *txb;
+ size_t shdr, alloc, limit, part, offset, gap;
+
+ switch (call->conn->security_level) {
+ default:
+ alloc = umin(remain, RXRPC_JUMBO_DATALEN);
+ return rxrpc_alloc_data_txbuf(call, alloc, 1, gfp);
+ case RXRPC_SECURITY_AUTH:
+ shdr = 0;
+ mode = KRB5_CHECKSUM_MODE;
+ break;
+ case RXRPC_SECURITY_ENCRYPT:
+ shdr = sizeof(struct rxgk_header);
+ mode = KRB5_ENCRYPT_MODE;
+ break;
+ }
+
+ gk = rxgk_get_key(call->conn, NULL);
+ if (IS_ERR(gk))
+ return NULL;
+
+ /* Work out the maximum amount of data that will fit. */
+ alloc = RXRPC_JUMBO_DATALEN;
+ limit = crypto_krb5_how_much_data(gk->krb5, mode, &alloc, &offset);
+
+ if (remain < limit - shdr) {
+ part = remain;
+ alloc = crypto_krb5_how_much_buffer(gk->krb5, mode,
+ shdr + part, &offset);
+ gap = 0;
+ } else {
+ part = limit - shdr;
+ gap = RXRPC_JUMBO_DATALEN - alloc;
+ alloc = RXRPC_JUMBO_DATALEN;
+ }
+
+ rxgk_put(gk);
+
+ txb = rxrpc_alloc_data_txbuf(call, alloc, 16, gfp);
+ if (!txb)
+ return NULL;
+
+ txb->crypto_header = offset;
+ txb->sec_header = shdr;
+ txb->offset += offset + shdr;
+ txb->space = part;
+
+ /* Clear excess space in the packet */
+ if (gap)
+ memset(txb->data + alloc - gap, 0, gap);
+ return txb;
+}
+
+/*
+ * Integrity mode (sign a packet - level 1 security)
+ */
+static int rxgk_secure_packet_integrity(const struct rxrpc_call *call,
+ struct rxgk_context *gk,
+ struct rxrpc_txbuf *txb)
+{
+ struct rxgk_header *hdr;
+ struct scatterlist sg[1];
+ struct krb5_buffer metadata;
+ int ret = -ENOMEM;
+
+ _enter("");
+
+ hdr = kzalloc(sizeof(*hdr), GFP_NOFS);
+ if (!hdr)
+ goto error_gk;
+
+ hdr->epoch = htonl(call->conn->proto.epoch);
+ hdr->cid = htonl(call->cid);
+ hdr->call_number = htonl(call->call_id);
+ hdr->seq = htonl(txb->seq);
+ hdr->sec_index = htonl(call->security_ix);
+ hdr->data_len = htonl(txb->len);
+ metadata.len = sizeof(*hdr);
+ metadata.data = hdr;
+
+ sg_init_table(sg, 1);
+ sg_set_buf(&sg[0], txb->data, txb->alloc_size);
+
+ ret = crypto_krb5_get_mic(gk->krb5, gk->tx_Kc, &metadata,
+ sg, 1, txb->alloc_size,
+ txb->crypto_header, txb->sec_header + txb->len);
+ if (ret >= 0) {
+ txb->pkt_len = ret;
+ if (txb->alloc_size == RXRPC_JUMBO_DATALEN)
+ txb->jumboable = true;
+ gk->bytes_remaining -= ret;
+ }
+ kfree(hdr);
+error_gk:
+ rxgk_put(gk);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * wholly encrypt a packet (level 2 security)
+ */
+static int rxgk_secure_packet_encrypted(const struct rxrpc_call *call,
+ struct rxgk_context *gk,
+ struct rxrpc_txbuf *txb)
+{
+ struct rxgk_header *hdr;
+ struct scatterlist sg[1];
+ int ret;
+
+ _enter("%x", txb->len);
+
+ /* Insert the header into the buffer. */
+ hdr = txb->data + txb->crypto_header;
+ hdr->epoch = htonl(call->conn->proto.epoch);
+ hdr->cid = htonl(call->cid);
+ hdr->call_number = htonl(call->call_id);
+ hdr->seq = htonl(txb->seq);
+ hdr->sec_index = htonl(call->security_ix);
+ hdr->data_len = htonl(txb->len);
+
+ sg_init_table(sg, 1);
+ sg_set_buf(&sg[0], txb->data, txb->alloc_size);
+
+ ret = crypto_krb5_encrypt(gk->krb5, gk->tx_enc,
+ sg, 1, txb->alloc_size,
+ txb->crypto_header, txb->sec_header + txb->len,
+ false);
+ if (ret >= 0) {
+ txb->pkt_len = ret;
+ if (txb->alloc_size == RXRPC_JUMBO_DATALEN)
+ txb->jumboable = true;
+ gk->bytes_remaining -= ret;
+ }
+
+ rxgk_put(gk);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * checksum an RxRPC packet header
+ */
+static int rxgk_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
+{
+ struct rxgk_context *gk;
+ int ret;
+
+ _enter("{%d{%x}},{#%u},%u,",
+ call->debug_id, key_serial(call->conn->key), txb->seq, txb->len);
+
+ gk = rxgk_get_key(call->conn, NULL);
+ if (IS_ERR(gk))
+ return PTR_ERR(gk) == -ESTALE ? -EKEYREJECTED : PTR_ERR(gk);
+
+ ret = key_validate(call->conn->key);
+ if (ret < 0)
+ return ret;
+
+ call->security_enctype = gk->krb5->etype;
+ txb->cksum = htons(gk->key_number);
+
+ switch (call->conn->security_level) {
+ case RXRPC_SECURITY_PLAIN:
+ rxgk_put(gk);
+ txb->pkt_len = txb->len;
+ return 0;
+ case RXRPC_SECURITY_AUTH:
+ return rxgk_secure_packet_integrity(call, gk, txb);
+ case RXRPC_SECURITY_ENCRYPT:
+ return rxgk_secure_packet_encrypted(call, gk, txb);
+ default:
+ rxgk_put(gk);
+ return -EPERM;
+ }
+}
+
+/*
+ * Integrity mode (check the signature on a packet - level 1 security)
+ */
+static int rxgk_verify_packet_integrity(struct rxrpc_call *call,
+ struct rxgk_context *gk,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxgk_header *hdr;
+ struct krb5_buffer metadata;
+ unsigned int offset = sp->offset, len = sp->len;
+ size_t data_offset = 0, data_len = len;
+ u32 ac;
+ int ret = -ENOMEM;
+
+ _enter("");
+
+ crypto_krb5_where_is_the_data(gk->krb5, KRB5_CHECKSUM_MODE,
+ &data_offset, &data_len);
+
+ hdr = kzalloc(sizeof(*hdr), GFP_NOFS);
+ if (!hdr)
+ return -ENOMEM;
+
+ hdr->epoch = htonl(call->conn->proto.epoch);
+ hdr->cid = htonl(call->cid);
+ hdr->call_number = htonl(call->call_id);
+ hdr->seq = htonl(sp->hdr.seq);
+ hdr->sec_index = htonl(call->security_ix);
+ hdr->data_len = htonl(data_len);
+
+ metadata.len = sizeof(*hdr);
+ metadata.data = hdr;
+ ret = rxgk_verify_mic_skb(gk->krb5, gk->rx_Kc, &metadata,
+ skb, &offset, &len, &ac);
+ kfree(hdr);
+ if (ret == -EPROTO) {
+ rxrpc_abort_eproto(call, skb, ac,
+ rxgk_abort_1_verify_mic_eproto);
+ } else {
+ sp->offset = offset;
+ sp->len = len;
+ }
+
+ rxgk_put(gk);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Decrypt an encrypted packet (level 2 security).
+ */
+static int rxgk_verify_packet_encrypted(struct rxrpc_call *call,
+ struct rxgk_context *gk,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxgk_header hdr;
+ unsigned int offset = sp->offset, len = sp->len;
+ int ret;
+ u32 ac;
+
+ _enter("");
+
+ ret = rxgk_decrypt_skb(gk->krb5, gk->rx_enc, skb, &offset, &len, &ac);
+ if (ret == -EPROTO)
+ rxrpc_abort_eproto(call, skb, ac, rxgk_abort_2_decrypt_eproto);
+ if (ret < 0)
+ goto error;
+
+ if (len < sizeof(hdr)) {
+ ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT,
+ rxgk_abort_2_short_header);
+ goto error;
+ }
+
+ /* Extract the header from the skb */
+ ret = skb_copy_bits(skb, offset, &hdr, sizeof(hdr));
+ if (ret < 0) {
+ ret = rxrpc_abort_eproto(call, skb, RXGK_PACKETSHORT,
+ rxgk_abort_2_short_encdata);
+ goto error;
+ }
+ offset += sizeof(hdr);
+ len -= sizeof(hdr);
+
+ if (ntohl(hdr.epoch) != call->conn->proto.epoch ||
+ ntohl(hdr.cid) != call->cid ||
+ ntohl(hdr.call_number) != call->call_id ||
+ ntohl(hdr.seq) != sp->hdr.seq ||
+ ntohl(hdr.sec_index) != call->security_ix ||
+ ntohl(hdr.data_len) > len) {
+ ret = rxrpc_abort_eproto(call, skb, RXGK_SEALEDINCON,
+ rxgk_abort_2_short_data);
+ goto error;
+ }
+
+ sp->offset = offset;
+ sp->len = ntohl(hdr.data_len);
+ ret = 0;
+error:
+ rxgk_put(gk);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Verify the security on a received packet or subpacket (if part of a
+ * jumbo packet).
+ */
+static int rxgk_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxgk_context *gk;
+ u16 key_number = sp->hdr.cksum;
+
+ _enter("{%d{%x}},{#%u}",
+ call->debug_id, key_serial(call->conn->key), sp->hdr.seq);
+
+ gk = rxgk_get_key(call->conn, &key_number);
+ if (IS_ERR(gk)) {
+ switch (PTR_ERR(gk)) {
+ case -ESTALE:
+ return rxrpc_abort_eproto(call, skb, RXGK_BADKEYNO,
+ rxgk_abort_bad_key_number);
+ default:
+ return PTR_ERR(gk);
+ }
+ }
+
+ call->security_enctype = gk->krb5->etype;
+ switch (call->conn->security_level) {
+ case RXRPC_SECURITY_PLAIN:
+ return 0;
+ case RXRPC_SECURITY_AUTH:
+ return rxgk_verify_packet_integrity(call, gk, skb);
+ case RXRPC_SECURITY_ENCRYPT:
+ return rxgk_verify_packet_encrypted(call, gk, skb);
+ default:
+ rxgk_put(gk);
+ return -ENOANO;
+ }
+}
+
+/*
+ * Allocate memory to hold a challenge or a response packet. We're not running
+ * in the io_thread, so we can't use ->tx_alloc.
+ */
+static struct page *rxgk_alloc_packet(size_t total_len)
+{
+ gfp_t gfp = GFP_NOFS;
+ int order;
+
+ order = get_order(total_len);
+ if (order > 0)
+ gfp |= __GFP_COMP;
+ return alloc_pages(gfp, order);
+}
+
+/*
+ * Issue a challenge.
+ */
+static int rxgk_issue_challenge(struct rxrpc_connection *conn)
+{
+ struct rxrpc_wire_header *whdr;
+ struct bio_vec bvec[1];
+ struct msghdr msg;
+ struct page *page;
+ size_t len = sizeof(*whdr) + sizeof(conn->rxgk.nonce);
+ u32 serial;
+ int ret;
+
+ _enter("{%d}", conn->debug_id);
+
+ get_random_bytes(&conn->rxgk.nonce, sizeof(conn->rxgk.nonce));
+
+ /* We can't use conn->tx_alloc without a lock */
+ page = rxgk_alloc_packet(sizeof(*whdr) + sizeof(conn->rxgk.nonce));
+ if (!page)
+ return -ENOMEM;
+
+ bvec_set_page(&bvec[0], page, len, 0);
+ iov_iter_bvec(&msg.msg_iter, WRITE, bvec, 1, len);
+
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = MSG_SPLICE_PAGES;
+
+ whdr = page_address(page);
+ whdr->epoch = htonl(conn->proto.epoch);
+ whdr->cid = htonl(conn->proto.cid);
+ whdr->callNumber = 0;
+ whdr->seq = 0;
+ whdr->type = RXRPC_PACKET_TYPE_CHALLENGE;
+ whdr->flags = conn->out_clientflag;
+ whdr->userStatus = 0;
+ whdr->securityIndex = conn->security_ix;
+ whdr->_rsvd = 0;
+ whdr->serviceId = htons(conn->service_id);
+
+ memcpy(whdr + 1, conn->rxgk.nonce, sizeof(conn->rxgk.nonce));
+
+ serial = rxrpc_get_next_serials(conn, 1);
+ whdr->serial = htonl(serial);
+
+ trace_rxrpc_tx_challenge(conn, serial, 0, *(u32 *)&conn->rxgk.nonce);
+
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ if (ret > 0)
+ conn->peer->last_tx_at = ktime_get_seconds();
+ __free_page(page);
+
+ if (ret < 0) {
+ trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+ rxrpc_tx_point_rxgk_challenge);
+ return -EAGAIN;
+ }
+
+ trace_rxrpc_tx_packet(conn->debug_id, whdr,
+ rxrpc_tx_point_rxgk_challenge);
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * Validate a challenge packet.
+ */
+static bool rxgk_validate_challenge(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ u8 nonce[20];
+
+ if (!conn->key) {
+ rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxgk_abort_chall_no_key);
+ return false;
+ }
+
+ if (key_validate(conn->key) < 0) {
+ rxrpc_abort_conn(conn, skb, RXGK_EXPIRED, -EPROTO,
+ rxgk_abort_chall_key_expired);
+ return false;
+ }
+
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ nonce, sizeof(nonce)) < 0) {
+ rxrpc_abort_conn(conn, skb, RXGK_PACKETSHORT, -EPROTO,
+ rxgk_abort_chall_short);
+ return false;
+ }
+
+ trace_rxrpc_rx_challenge(conn, sp->hdr.serial, 0, *(u32 *)nonce, 0);
+ return true;
+}
+
+/**
+ * rxgk_kernel_query_challenge - Query RxGK-specific challenge parameters
+ * @challenge: The challenge packet to query
+ *
+ * Return: The Kerberos 5 encoding type for the challenged connection.
+ */
+u32 rxgk_kernel_query_challenge(struct sk_buff *challenge)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(challenge);
+
+ return sp->chall.conn->rxgk.enctype;
+}
+EXPORT_SYMBOL(rxgk_kernel_query_challenge);
+
+/*
+ * Fill out the control message to pass to userspace to inform about the
+ * challenge.
+ */
+static int rxgk_challenge_to_recvmsg(struct rxrpc_connection *conn,
+ struct sk_buff *challenge,
+ struct msghdr *msg)
+{
+ struct rxgk_challenge chall;
+
+ chall.base.service_id = conn->service_id;
+ chall.base.security_index = conn->security_ix;
+ chall.enctype = conn->rxgk.enctype;
+
+ return put_cmsg(msg, SOL_RXRPC, RXRPC_CHALLENGED, sizeof(chall), &chall);
+}
+
+/*
+ * Insert the requisite amount of XDR padding for the length given.
+ */
+static int rxgk_pad_out(struct sk_buff *response, size_t len, size_t offset)
+{
+ __be32 zero = 0;
+ size_t pad = xdr_round_up(len) - len;
+ int ret;
+
+ if (!pad)
+ return 0;
+
+ ret = skb_store_bits(response, offset, &zero, pad);
+ if (ret < 0)
+ return ret;
+ return pad;
+}
+
+/*
+ * Insert the header into the response.
+ */
+static noinline ssize_t rxgk_insert_response_header(struct rxrpc_connection *conn,
+ struct rxgk_context *gk,
+ struct sk_buff *response,
+ size_t offset)
+{
+ struct rxrpc_skb_priv *rsp = rxrpc_skb(response);
+
+ struct {
+ struct rxrpc_wire_header whdr;
+ __be32 start_time_msw;
+ __be32 start_time_lsw;
+ __be32 ticket_len;
+ } h;
+ int ret;
+
+ rsp->resp.kvno = gk->key_number;
+ rsp->resp.version = gk->krb5->etype;
+
+ h.whdr.epoch = htonl(conn->proto.epoch);
+ h.whdr.cid = htonl(conn->proto.cid);
+ h.whdr.callNumber = 0;
+ h.whdr.serial = 0;
+ h.whdr.seq = 0;
+ h.whdr.type = RXRPC_PACKET_TYPE_RESPONSE;
+ h.whdr.flags = conn->out_clientflag;
+ h.whdr.userStatus = 0;
+ h.whdr.securityIndex = conn->security_ix;
+ h.whdr.cksum = htons(gk->key_number);
+ h.whdr.serviceId = htons(conn->service_id);
+ h.start_time_msw = htonl(upper_32_bits(conn->rxgk.start_time));
+ h.start_time_lsw = htonl(lower_32_bits(conn->rxgk.start_time));
+ h.ticket_len = htonl(gk->key->ticket.len);
+
+ ret = skb_store_bits(response, offset, &h, sizeof(h));
+ return ret < 0 ? ret : sizeof(h);
+}
+
+/*
+ * Construct the authenticator to go in the response packet
+ *
+ * struct RXGK_Authenticator {
+ * opaque nonce[20];
+ * opaque appdata<>;
+ * RXGK_Level level;
+ * unsigned int epoch;
+ * unsigned int cid;
+ * unsigned int call_numbers<>;
+ * };
+ */
+static ssize_t rxgk_construct_authenticator(struct rxrpc_connection *conn,
+ struct sk_buff *challenge,
+ const struct krb5_buffer *appdata,
+ struct sk_buff *response,
+ size_t offset)
+{
+ struct {
+ u8 nonce[20];
+ __be32 appdata_len;
+ } a;
+ struct {
+ __be32 level;
+ __be32 epoch;
+ __be32 cid;
+ __be32 call_numbers_count;
+ __be32 call_numbers[4];
+ } b;
+ int ret;
+
+ ret = skb_copy_bits(challenge, sizeof(struct rxrpc_wire_header),
+ a.nonce, sizeof(a.nonce));
+ if (ret < 0)
+ return -EPROTO;
+
+ a.appdata_len = htonl(appdata->len);
+
+ ret = skb_store_bits(response, offset, &a, sizeof(a));
+ if (ret < 0)
+ return ret;
+ offset += sizeof(a);
+
+ if (appdata->len) {
+ ret = skb_store_bits(response, offset, appdata->data, appdata->len);
+ if (ret < 0)
+ return ret;
+ offset += appdata->len;
+
+ ret = rxgk_pad_out(response, appdata->len, offset);
+ if (ret < 0)
+ return ret;
+ offset += ret;
+ }
+
+ b.level = htonl(conn->security_level);
+ b.epoch = htonl(conn->proto.epoch);
+ b.cid = htonl(conn->proto.cid);
+ b.call_numbers_count = htonl(4);
+ b.call_numbers[0] = htonl(conn->channels[0].call_counter);
+ b.call_numbers[1] = htonl(conn->channels[1].call_counter);
+ b.call_numbers[2] = htonl(conn->channels[2].call_counter);
+ b.call_numbers[3] = htonl(conn->channels[3].call_counter);
+
+ ret = skb_store_bits(response, offset, &b, sizeof(b));
+ if (ret < 0)
+ return ret;
+ return sizeof(a) + xdr_round_up(appdata->len) + sizeof(b);
+}
+
+static ssize_t rxgk_encrypt_authenticator(struct rxrpc_connection *conn,
+ struct rxgk_context *gk,
+ struct sk_buff *response,
+ size_t offset,
+ size_t alloc_len,
+ size_t auth_offset,
+ size_t auth_len)
+{
+ struct scatterlist sg[16];
+ int nr_sg;
+
+ sg_init_table(sg, ARRAY_SIZE(sg));
+ nr_sg = skb_to_sgvec(response, sg, offset, alloc_len);
+ if (unlikely(nr_sg < 0))
+ return nr_sg;
+ return crypto_krb5_encrypt(gk->krb5, gk->resp_enc, sg, nr_sg, alloc_len,
+ auth_offset, auth_len, false);
+}
+
+/*
+ * Construct the response.
+ *
+ * struct RXGK_Response {
+ * rxgkTime start_time;
+ * RXGK_Data token;
+ * opaque authenticator<RXGK_MAXAUTHENTICATOR>
+ * };
+ */
+static int rxgk_construct_response(struct rxrpc_connection *conn,
+ struct sk_buff *challenge,
+ struct krb5_buffer *appdata)
+{
+ struct rxrpc_skb_priv *csp, *rsp;
+ struct rxgk_context *gk;
+ struct sk_buff *response;
+ size_t len, auth_len, authx_len, offset, auth_offset, authx_offset;
+ __be32 tmp;
+ int ret;
+
+ gk = rxgk_get_key(conn, NULL);
+ if (IS_ERR(gk))
+ return PTR_ERR(gk);
+
+ auth_len = 20 + (4 + appdata->len) + 12 + (1 + 4) * 4;
+ authx_len = crypto_krb5_how_much_buffer(gk->krb5, KRB5_ENCRYPT_MODE,
+ auth_len, &auth_offset);
+ len = sizeof(struct rxrpc_wire_header) +
+ 8 + (4 + xdr_round_up(gk->key->ticket.len)) + (4 + authx_len);
+
+ response = alloc_skb_with_frags(0, len, 0, &ret, GFP_NOFS);
+ if (!response)
+ goto error;
+ rxrpc_new_skb(response, rxrpc_skb_new_response_rxgk);
+ response->len = len;
+ response->data_len = len;
+
+ ret = rxgk_insert_response_header(conn, gk, response, 0);
+ if (ret < 0)
+ goto error;
+ offset = ret;
+
+ ret = skb_store_bits(response, offset, gk->key->ticket.data, gk->key->ticket.len);
+ if (ret < 0)
+ goto error;
+ offset += gk->key->ticket.len;
+ ret = rxgk_pad_out(response, gk->key->ticket.len, offset);
+ if (ret < 0)
+ goto error;
+
+ authx_offset = offset + ret + 4; /* Leave a gap for the length. */
+
+ ret = rxgk_construct_authenticator(conn, challenge, appdata, response,
+ authx_offset + auth_offset);
+ if (ret < 0)
+ goto error;
+ auth_len = ret;
+
+ ret = rxgk_encrypt_authenticator(conn, gk, response,
+ authx_offset, authx_len,
+ auth_offset, auth_len);
+ if (ret < 0)
+ goto error;
+ authx_len = ret;
+
+ tmp = htonl(authx_len);
+ ret = skb_store_bits(response, authx_offset - 4, &tmp, 4);
+ if (ret < 0)
+ goto error;
+
+ ret = rxgk_pad_out(response, authx_len, authx_offset + authx_len);
+ if (ret < 0)
+ return ret;
+ len = authx_offset + authx_len + ret;
+
+ if (len != response->len) {
+ response->len = len;
+ response->data_len = len;
+ }
+
+ csp = rxrpc_skb(challenge);
+ rsp = rxrpc_skb(response);
+ rsp->resp.len = len;
+ rsp->resp.challenge_serial = csp->hdr.serial;
+ rxrpc_post_response(conn, response);
+ response = NULL;
+ ret = 0;
+
+error:
+ rxrpc_free_skb(response, rxrpc_skb_put_response);
+ rxgk_put(gk);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Respond to a challenge packet.
+ */
+static int rxgk_respond_to_challenge(struct rxrpc_connection *conn,
+ struct sk_buff *challenge,
+ struct krb5_buffer *appdata)
+{
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+
+ if (key_validate(conn->key) < 0)
+ return rxrpc_abort_conn(conn, NULL, RXGK_EXPIRED, -EPROTO,
+ rxgk_abort_chall_key_expired);
+
+ return rxgk_construct_response(conn, challenge, appdata);
+}
+
+static int rxgk_respond_to_challenge_no_appdata(struct rxrpc_connection *conn,
+ struct sk_buff *challenge)
+{
+ struct krb5_buffer appdata = {};
+
+ return rxgk_respond_to_challenge(conn, challenge, &appdata);
+}
+
+/**
+ * rxgk_kernel_respond_to_challenge - Respond to a challenge with appdata
+ * @challenge: The challenge to respond to
+ * @appdata: The application data to include in the RESPONSE authenticator
+ *
+ * Allow a kernel application to respond to a CHALLENGE with application data
+ * to be included in the RxGK RESPONSE Authenticator.
+ *
+ * Return: %0 if successful and a negative error code otherwise.
+ */
+int rxgk_kernel_respond_to_challenge(struct sk_buff *challenge,
+ struct krb5_buffer *appdata)
+{
+ struct rxrpc_skb_priv *csp = rxrpc_skb(challenge);
+
+ return rxgk_respond_to_challenge(csp->chall.conn, challenge, appdata);
+}
+EXPORT_SYMBOL(rxgk_kernel_respond_to_challenge);
+
+/*
+ * Parse sendmsg() control message and respond to challenge. We need to see if
+ * there's an appdata to fish out.
+ */
+static int rxgk_sendmsg_respond_to_challenge(struct sk_buff *challenge,
+ struct msghdr *msg)
+{
+ struct krb5_buffer appdata = {};
+ struct cmsghdr *cmsg;
+
+ for_each_cmsghdr(cmsg, msg) {
+ if (cmsg->cmsg_level != SOL_RXRPC ||
+ cmsg->cmsg_type != RXRPC_RESP_RXGK_APPDATA)
+ continue;
+ if (appdata.data)
+ return -EINVAL;
+ appdata.data = CMSG_DATA(cmsg);
+ appdata.len = cmsg->cmsg_len - sizeof(struct cmsghdr);
+ }
+
+ return rxgk_kernel_respond_to_challenge(challenge, &appdata);
+}
+
+/*
+ * Verify the authenticator.
+ *
+ * struct RXGK_Authenticator {
+ * opaque nonce[20];
+ * opaque appdata<>;
+ * RXGK_Level level;
+ * unsigned int epoch;
+ * unsigned int cid;
+ * unsigned int call_numbers<>;
+ * };
+ */
+static int rxgk_do_verify_authenticator(struct rxrpc_connection *conn,
+ const struct krb5_enctype *krb5,
+ struct sk_buff *skb,
+ __be32 *p, __be32 *end)
+{
+ u32 app_len, call_count, level, epoch, cid, i;
+
+ _enter("");
+
+ if (memcmp(p, conn->rxgk.nonce, 20) != 0)
+ return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO,
+ rxgk_abort_resp_bad_nonce);
+ p += 20 / sizeof(__be32);
+
+ app_len = ntohl(*p++);
+ if (app_len > (end - p) * sizeof(__be32))
+ return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO,
+ rxgk_abort_resp_short_applen);
+
+ p += xdr_round_up(app_len) / sizeof(__be32);
+ if (end - p < 4)
+ return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO,
+ rxgk_abort_resp_short_applen);
+
+ level = ntohl(*p++);
+ epoch = ntohl(*p++);
+ cid = ntohl(*p++);
+ call_count = ntohl(*p++);
+
+ if (level != conn->security_level ||
+ epoch != conn->proto.epoch ||
+ cid != conn->proto.cid ||
+ call_count > 4)
+ return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO,
+ rxgk_abort_resp_bad_param);
+
+ if (end - p < call_count)
+ return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO,
+ rxgk_abort_resp_short_call_list);
+
+ for (i = 0; i < call_count; i++) {
+ u32 call_id = ntohl(*p++);
+
+ if (call_id > INT_MAX)
+ return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO,
+ rxgk_abort_resp_bad_callid);
+
+ if (call_id < conn->channels[i].call_counter)
+ return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO,
+ rxgk_abort_resp_call_ctr);
+
+ if (call_id > conn->channels[i].call_counter) {
+ if (conn->channels[i].call)
+ return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO,
+ rxgk_abort_resp_call_state);
+
+ conn->channels[i].call_counter = call_id;
+ }
+ }
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * Extract the authenticator and verify it.
+ */
+static int rxgk_verify_authenticator(struct rxrpc_connection *conn,
+ const struct krb5_enctype *krb5,
+ struct sk_buff *skb,
+ unsigned int auth_offset, unsigned int auth_len)
+{
+ void *auth;
+ __be32 *p;
+ int ret;
+
+ auth = kmalloc(auth_len, GFP_NOFS);
+ if (!auth)
+ return -ENOMEM;
+
+ ret = skb_copy_bits(skb, auth_offset, auth, auth_len);
+ if (ret < 0) {
+ ret = rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EPROTO,
+ rxgk_abort_resp_short_auth);
+ goto error;
+ }
+
+ p = auth;
+ ret = rxgk_do_verify_authenticator(conn, krb5, skb, p, p + auth_len);
+error:
+ kfree(auth);
+ return ret;
+}
+
+/*
+ * Verify a response.
+ *
+ * struct RXGK_Response {
+ * rxgkTime start_time;
+ * RXGK_Data token;
+ * opaque authenticator<RXGK_MAXAUTHENTICATOR>
+ * };
+ */
+static int rxgk_verify_response(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ const struct krb5_enctype *krb5;
+ struct rxrpc_key_token *token;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxgk_response rhdr;
+ struct rxgk_context *gk;
+ struct key *key = NULL;
+ unsigned int offset = sizeof(struct rxrpc_wire_header);
+ unsigned int len = skb->len - sizeof(struct rxrpc_wire_header);
+ unsigned int token_offset, token_len;
+ unsigned int auth_offset, auth_len;
+ __be32 xauth_len;
+ int ret, ec;
+
+ _enter("{%d}", conn->debug_id);
+
+ /* Parse the RXGK_Response object */
+ if (sizeof(rhdr) + sizeof(__be32) > len)
+ goto short_packet;
+
+ if (skb_copy_bits(skb, offset, &rhdr, sizeof(rhdr)) < 0)
+ goto short_packet;
+ offset += sizeof(rhdr);
+ len -= sizeof(rhdr);
+
+ token_offset = offset;
+ token_len = ntohl(rhdr.token_len);
+ if (xdr_round_up(token_len) + sizeof(__be32) > len)
+ goto short_packet;
+
+ trace_rxrpc_rx_response(conn, sp->hdr.serial, 0, sp->hdr.cksum, token_len);
+
+ offset += xdr_round_up(token_len);
+ len -= xdr_round_up(token_len);
+
+ if (skb_copy_bits(skb, offset, &xauth_len, sizeof(xauth_len)) < 0)
+ goto short_packet;
+ offset += sizeof(xauth_len);
+ len -= sizeof(xauth_len);
+
+ auth_offset = offset;
+ auth_len = ntohl(xauth_len);
+ if (auth_len < len)
+ goto short_packet;
+ if (auth_len & 3)
+ goto inconsistent;
+ if (auth_len < 20 + 9 * 4)
+ goto auth_too_short;
+
+ /* We need to extract and decrypt the token and instantiate a session
+ * key for it. This bit, however, is application-specific. If
+ * possible, we use a default parser, but we might end up bumping this
+ * to the app to deal with - which might mean a round trip to
+ * userspace.
+ */
+ ret = rxgk_extract_token(conn, skb, token_offset, token_len, &key);
+ if (ret < 0)
+ goto out;
+
+ /* We now have a key instantiated from the decrypted ticket. We can
+ * pass this to the application so that they can parse the ticket
+ * content and we can use the session key it contains to derive the
+ * keys we need.
+ *
+ * Note that we have to switch enctype at this point as the enctype of
+ * the ticket doesn't necessarily match that of the transport.
+ */
+ token = key->payload.data[0];
+ conn->security_level = token->rxgk->level;
+ conn->rxgk.start_time = __be64_to_cpu(rhdr.start_time);
+
+ gk = rxgk_generate_transport_key(conn, token->rxgk, sp->hdr.cksum, GFP_NOFS);
+ if (IS_ERR(gk)) {
+ ret = PTR_ERR(gk);
+ goto cant_get_token;
+ }
+
+ krb5 = gk->krb5;
+
+ trace_rxrpc_rx_response(conn, sp->hdr.serial, krb5->etype, sp->hdr.cksum, token_len);
+
+ /* Decrypt, parse and verify the authenticator. */
+ ret = rxgk_decrypt_skb(krb5, gk->resp_enc, skb,
+ &auth_offset, &auth_len, &ec);
+ if (ret < 0) {
+ rxrpc_abort_conn(conn, skb, RXGK_SEALEDINCON, ret,
+ rxgk_abort_resp_auth_dec);
+ goto out;
+ }
+
+ ret = rxgk_verify_authenticator(conn, krb5, skb, auth_offset, auth_len);
+ if (ret < 0)
+ goto out;
+
+ conn->key = key;
+ key = NULL;
+ ret = 0;
+out:
+ key_put(key);
+ _leave(" = %d", ret);
+ return ret;
+
+inconsistent:
+ ret = rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO,
+ rxgk_abort_resp_xdr_align);
+ goto out;
+auth_too_short:
+ ret = rxrpc_abort_conn(conn, skb, RXGK_PACKETSHORT, -EPROTO,
+ rxgk_abort_resp_short_auth);
+ goto out;
+short_packet:
+ ret = rxrpc_abort_conn(conn, skb, RXGK_PACKETSHORT, -EPROTO,
+ rxgk_abort_resp_short_packet);
+ goto out;
+
+cant_get_token:
+ switch (ret) {
+ case -ENOMEM:
+ goto temporary_error;
+ case -EINVAL:
+ ret = rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EKEYREJECTED,
+ rxgk_abort_resp_internal_error);
+ goto out;
+ case -ENOPKG:
+ ret = rxrpc_abort_conn(conn, skb, KRB5_PROG_KEYTYPE_NOSUPP,
+ -EKEYREJECTED, rxgk_abort_resp_nopkg);
+ goto out;
+ }
+
+temporary_error:
+ /* Ignore the response packet if we got a temporary error such as
+ * ENOMEM. We just want to send the challenge again. Note that we
+ * also come out this way if the ticket decryption fails.
+ */
+ goto out;
+}
+
+/*
+ * clear the connection security
+ */
+static void rxgk_clear(struct rxrpc_connection *conn)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(conn->rxgk.keys); i++)
+ rxgk_put(conn->rxgk.keys[i]);
+}
+
+/*
+ * Initialise the RxGK security service.
+ */
+static int rxgk_init(void)
+{
+ return 0;
+}
+
+/*
+ * Clean up the RxGK security service.
+ */
+static void rxgk_exit(void)
+{
+}
+
+/*
+ * RxRPC YFS GSSAPI-based security
+ */
+const struct rxrpc_security rxgk_yfs = {
+ .name = "yfs-rxgk",
+ .security_index = RXRPC_SECURITY_YFS_RXGK,
+ .no_key_abort = RXGK_NOTAUTH,
+ .init = rxgk_init,
+ .exit = rxgk_exit,
+ .preparse_server_key = rxgk_preparse_server_key,
+ .free_preparse_server_key = rxgk_free_preparse_server_key,
+ .destroy_server_key = rxgk_destroy_server_key,
+ .describe_server_key = rxgk_describe_server_key,
+ .init_connection_security = rxgk_init_connection_security,
+ .alloc_txbuf = rxgk_alloc_txbuf,
+ .secure_packet = rxgk_secure_packet,
+ .verify_packet = rxgk_verify_packet,
+ .free_call_crypto = rxgk_free_call_crypto,
+ .issue_challenge = rxgk_issue_challenge,
+ .validate_challenge = rxgk_validate_challenge,
+ .challenge_to_recvmsg = rxgk_challenge_to_recvmsg,
+ .sendmsg_respond_to_challenge = rxgk_sendmsg_respond_to_challenge,
+ .respond_to_challenge = rxgk_respond_to_challenge_no_appdata,
+ .verify_response = rxgk_verify_response,
+ .clear = rxgk_clear,
+ .default_decode_ticket = rxgk_yfs_decode_ticket,
+};
diff --git a/net/rxrpc/rxgk_app.c b/net/rxrpc/rxgk_app.c
new file mode 100644
index 000000000000..6206a84395b8
--- /dev/null
+++ b/net/rxrpc/rxgk_app.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Application-specific bits for GSSAPI-based RxRPC security
+ *
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/key-type.h>
+#include "ar-internal.h"
+#include "rxgk_common.h"
+
+/*
+ * Decode a default-style YFS ticket in a response and turn it into an
+ * rxrpc-type key.
+ *
+ * struct rxgk_key {
+ * afs_uint32 enctype;
+ * opaque key<>;
+ * };
+ *
+ * struct RXGK_AuthName {
+ * afs_int32 kind;
+ * opaque data<AUTHDATAMAX>;
+ * opaque display<AUTHPRINTABLEMAX>;
+ * };
+ *
+ * struct RXGK_Token {
+ * rxgk_key K0;
+ * RXGK_Level level;
+ * rxgkTime starttime;
+ * afs_int32 lifetime;
+ * afs_int32 bytelife;
+ * rxgkTime expirationtime;
+ * struct RXGK_AuthName identities<>;
+ * };
+ */
+int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
+ unsigned int ticket_offset, unsigned int ticket_len,
+ struct key **_key)
+{
+ struct rxrpc_key_token *token;
+ const struct cred *cred = current_cred(); // TODO - use socket creds
+ struct key *key;
+ size_t pre_ticket_len, payload_len;
+ unsigned int klen, enctype;
+ void *payload, *ticket;
+ __be32 *t, *p, *q, tmp[2];
+ int ret;
+
+ _enter("");
+
+ /* Get the session key length */
+ ret = skb_copy_bits(skb, ticket_offset, tmp, sizeof(tmp));
+ if (ret < 0)
+ return rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO,
+ rxgk_abort_resp_short_yfs_klen);
+ enctype = ntohl(tmp[0]);
+ klen = ntohl(tmp[1]);
+
+ if (klen > ticket_len - 10 * sizeof(__be32))
+ return rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO,
+ rxgk_abort_resp_short_yfs_key);
+
+ pre_ticket_len = ((5 + 14) * sizeof(__be32) +
+ xdr_round_up(klen) +
+ sizeof(__be32));
+ payload_len = pre_ticket_len + xdr_round_up(ticket_len);
+
+ payload = kzalloc(payload_len, GFP_NOFS);
+ if (!payload)
+ return -ENOMEM;
+
+ /* We need to fill out the XDR form for a key payload that we can pass
+ * to add_key(). Start by copying in the ticket so that we can parse
+ * it.
+ */
+ ticket = payload + pre_ticket_len;
+ ret = skb_copy_bits(skb, ticket_offset, ticket, ticket_len);
+ if (ret < 0) {
+ ret = rxrpc_abort_conn(conn, skb, RXGK_INCONSISTENCY, -EPROTO,
+ rxgk_abort_resp_short_yfs_tkt);
+ goto error;
+ }
+
+ /* Fill out the form header. */
+ p = payload;
+ p[0] = htonl(0); /* Flags */
+ p[1] = htonl(1); /* len(cellname) */
+ p[2] = htonl(0x20000000); /* Cellname " " */
+ p[3] = htonl(1); /* #tokens */
+ p[4] = htonl(15 * sizeof(__be32) + xdr_round_up(klen) +
+ xdr_round_up(ticket_len)); /* Token len */
+
+ /* Now fill in the body. Most of this we can just scrape directly from
+ * the ticket.
+ */
+ t = ticket + sizeof(__be32) * 2 + xdr_round_up(klen);
+ q = payload + 5 * sizeof(__be32);
+ q[0] = htonl(RXRPC_SECURITY_YFS_RXGK);
+ q[1] = t[1]; /* begintime - msw */
+ q[2] = t[2]; /* - lsw */
+ q[3] = t[5]; /* endtime - msw */
+ q[4] = t[6]; /* - lsw */
+ q[5] = 0; /* level - msw */
+ q[6] = t[0]; /* - lsw */
+ q[7] = 0; /* lifetime - msw */
+ q[8] = t[3]; /* - lsw */
+ q[9] = 0; /* bytelife - msw */
+ q[10] = t[4]; /* - lsw */
+ q[11] = 0; /* enctype - msw */
+ q[12] = htonl(enctype); /* - lsw */
+ q[13] = htonl(klen); /* Key length */
+
+ q += 14;
+
+ memcpy(q, ticket + sizeof(__be32) * 2, klen);
+ q += xdr_round_up(klen) / 4;
+ q[0] = htonl(ticket_len);
+ q++;
+ if (WARN_ON((unsigned long)q != (unsigned long)ticket)) {
+ ret = -EIO;
+ goto error;
+ }
+
+ /* Ticket read in with skb_copy_bits above */
+ q += xdr_round_up(ticket_len) / 4;
+ if (WARN_ON((unsigned long)q - (unsigned long)payload != payload_len)) {
+ ret = -EIO;
+ goto error;
+ }
+
+ /* Now turn that into a key. */
+ key = key_alloc(&key_type_rxrpc, "x",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred, // TODO: Use socket owner
+ KEY_USR_VIEW,
+ KEY_ALLOC_NOT_IN_QUOTA, NULL);
+ if (IS_ERR(key)) {
+ _leave(" = -ENOMEM [alloc %ld]", PTR_ERR(key));
+ goto error;
+ }
+
+ _debug("key %d", key_serial(key));
+
+ ret = key_instantiate_and_link(key, payload, payload_len, NULL, NULL);
+ if (ret < 0)
+ goto error_key;
+
+ token = key->payload.data[0];
+ token->no_leak_key = true;
+ *_key = key;
+ key = NULL;
+ ret = 0;
+ goto error;
+
+error_key:
+ key_put(key);
+error:
+ kfree_sensitive(payload);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Extract the token and set up a session key from the details.
+ *
+ * struct RXGK_TokenContainer {
+ * afs_int32 kvno;
+ * afs_int32 enctype;
+ * opaque encrypted_token<>;
+ * };
+ *
+ * [tools.ietf.org/html/draft-wilkinson-afs3-rxgk-afs-08 sec 6.1]
+ */
+int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
+ unsigned int token_offset, unsigned int token_len,
+ struct key **_key)
+{
+ const struct krb5_enctype *krb5;
+ const struct krb5_buffer *server_secret;
+ struct crypto_aead *token_enc = NULL;
+ struct key *server_key;
+ unsigned int ticket_offset, ticket_len;
+ u32 kvno, enctype;
+ int ret, ec;
+
+ struct {
+ __be32 kvno;
+ __be32 enctype;
+ __be32 token_len;
+ } container;
+
+ /* Decode the RXGK_TokenContainer object. This tells us which server
+ * key we should be using. We can then fetch the key, get the secret
+ * and set up the crypto to extract the token.
+ */
+ if (skb_copy_bits(skb, token_offset, &container, sizeof(container)) < 0)
+ return rxrpc_abort_conn(conn, skb, RXGK_PACKETSHORT, -EPROTO,
+ rxgk_abort_resp_tok_short);
+
+ kvno = ntohl(container.kvno);
+ enctype = ntohl(container.enctype);
+ ticket_len = ntohl(container.token_len);
+ ticket_offset = token_offset + sizeof(container);
+
+ if (xdr_round_up(ticket_len) > token_len - 3 * 4)
+ return rxrpc_abort_conn(conn, skb, RXGK_PACKETSHORT, -EPROTO,
+ rxgk_abort_resp_tok_short);
+
+ _debug("KVNO %u", kvno);
+ _debug("ENC %u", enctype);
+ _debug("TLEN %u", ticket_len);
+
+ server_key = rxrpc_look_up_server_security(conn, skb, kvno, enctype);
+ if (IS_ERR(server_key))
+ goto cant_get_server_key;
+
+ down_read(&server_key->sem);
+ server_secret = (const void *)&server_key->payload.data[2];
+ ret = rxgk_set_up_token_cipher(server_secret, &token_enc, enctype, &krb5, GFP_NOFS);
+ up_read(&server_key->sem);
+ key_put(server_key);
+ if (ret < 0)
+ goto cant_get_token;
+
+ /* We can now decrypt and parse the token/ticket. This allows us to
+ * gain access to K0, from which we can derive the transport key and
+ * thence decode the authenticator.
+ */
+ ret = rxgk_decrypt_skb(krb5, token_enc, skb,
+ &ticket_offset, &ticket_len, &ec);
+ crypto_free_aead(token_enc);
+ token_enc = NULL;
+ if (ret < 0)
+ return rxrpc_abort_conn(conn, skb, ec, ret,
+ rxgk_abort_resp_tok_dec);
+
+ ret = conn->security->default_decode_ticket(conn, skb, ticket_offset,
+ ticket_len, _key);
+ if (ret < 0)
+ goto cant_get_token;
+
+ _leave(" = 0");
+ return ret;
+
+cant_get_server_key:
+ ret = PTR_ERR(server_key);
+ switch (ret) {
+ case -ENOMEM:
+ goto temporary_error;
+ case -ENOKEY:
+ case -EKEYREJECTED:
+ case -EKEYEXPIRED:
+ case -EKEYREVOKED:
+ case -EPERM:
+ return rxrpc_abort_conn(conn, skb, RXGK_BADKEYNO, -EKEYREJECTED,
+ rxgk_abort_resp_tok_nokey);
+ default:
+ return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EKEYREJECTED,
+ rxgk_abort_resp_tok_keyerr);
+ }
+
+cant_get_token:
+ switch (ret) {
+ case -ENOMEM:
+ goto temporary_error;
+ case -EINVAL:
+ return rxrpc_abort_conn(conn, skb, RXGK_NOTAUTH, -EKEYREJECTED,
+ rxgk_abort_resp_tok_internal_error);
+ case -ENOPKG:
+ return rxrpc_abort_conn(conn, skb, KRB5_PROG_KEYTYPE_NOSUPP,
+ -EKEYREJECTED, rxgk_abort_resp_tok_nopkg);
+ }
+
+temporary_error:
+ /* Ignore the response packet if we got a temporary error such as
+ * ENOMEM. We just want to send the challenge again. Note that we
+ * also come out this way if the ticket decryption fails.
+ */
+ return ret;
+}
diff --git a/net/rxrpc/rxgk_common.h b/net/rxrpc/rxgk_common.h
new file mode 100644
index 000000000000..7370a5655985
--- /dev/null
+++ b/net/rxrpc/rxgk_common.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Common bits for GSSAPI-based RxRPC security.
+ *
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <crypto/krb5.h>
+#include <crypto/skcipher.h>
+#include <crypto/hash.h>
+
+/*
+ * Per-key number context. This is replaced when the connection is rekeyed.
+ */
+struct rxgk_context {
+ refcount_t usage;
+ unsigned int key_number; /* Rekeying number (goes in the rx header) */
+ unsigned long flags;
+#define RXGK_TK_NEEDS_REKEY 0 /* Set if this needs rekeying */
+ unsigned long expiry; /* Expiration time of this key */
+ long long bytes_remaining; /* Remaining Tx lifetime of this key */
+ const struct krb5_enctype *krb5; /* RxGK encryption type */
+ const struct rxgk_key *key;
+
+ /* We need up to 7 keys derived from the transport key, but we don't
+ * actually need the transport key. Each key is derived by
+ * DK(TK,constant).
+ */
+ struct crypto_aead *tx_enc; /* Transmission key */
+ struct crypto_aead *rx_enc; /* Reception key */
+ struct crypto_shash *tx_Kc; /* Transmission checksum key */
+ struct crypto_shash *rx_Kc; /* Reception checksum key */
+ struct crypto_aead *resp_enc; /* Response packet enc key */
+};
+
+#define xdr_round_up(x) (round_up((x), sizeof(__be32)))
+#define xdr_object_len(x) (4 + xdr_round_up(x))
+
+/*
+ * rxgk_app.c
+ */
+int rxgk_yfs_decode_ticket(struct rxrpc_connection *conn, struct sk_buff *skb,
+ unsigned int ticket_offset, unsigned int ticket_len,
+ struct key **_key);
+int rxgk_extract_token(struct rxrpc_connection *conn, struct sk_buff *skb,
+ unsigned int token_offset, unsigned int token_len,
+ struct key **_key);
+
+/*
+ * rxgk_kdf.c
+ */
+void rxgk_put(struct rxgk_context *gk);
+struct rxgk_context *rxgk_generate_transport_key(struct rxrpc_connection *conn,
+ const struct rxgk_key *key,
+ unsigned int key_number,
+ gfp_t gfp);
+int rxgk_set_up_token_cipher(const struct krb5_buffer *server_key,
+ struct crypto_aead **token_key,
+ unsigned int enctype,
+ const struct krb5_enctype **_krb5,
+ gfp_t gfp);
+
+/*
+ * Apply decryption and checksumming functions to part of an skbuff. The
+ * offset and length are updated to reflect the actual content of the encrypted
+ * region.
+ */
+static inline
+int rxgk_decrypt_skb(const struct krb5_enctype *krb5,
+ struct crypto_aead *aead,
+ struct sk_buff *skb,
+ unsigned int *_offset, unsigned int *_len,
+ int *_error_code)
+{
+ struct scatterlist sg[16];
+ size_t offset = 0, len = *_len;
+ int nr_sg, ret;
+
+ sg_init_table(sg, ARRAY_SIZE(sg));
+ nr_sg = skb_to_sgvec(skb, sg, *_offset, len);
+ if (unlikely(nr_sg < 0))
+ return nr_sg;
+
+ ret = crypto_krb5_decrypt(krb5, aead, sg, nr_sg,
+ &offset, &len);
+ switch (ret) {
+ case 0:
+ *_offset += offset;
+ *_len = len;
+ break;
+ case -EPROTO:
+ case -EBADMSG:
+ *_error_code = RXGK_SEALEDINCON;
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+/*
+ * Check the MIC on a region of an skbuff. The offset and length are updated
+ * to reflect the actual content of the secure region.
+ */
+static inline
+int rxgk_verify_mic_skb(const struct krb5_enctype *krb5,
+ struct crypto_shash *shash,
+ const struct krb5_buffer *metadata,
+ struct sk_buff *skb,
+ unsigned int *_offset, unsigned int *_len,
+ u32 *_error_code)
+{
+ struct scatterlist sg[16];
+ size_t offset = 0, len = *_len;
+ int nr_sg, ret;
+
+ sg_init_table(sg, ARRAY_SIZE(sg));
+ nr_sg = skb_to_sgvec(skb, sg, *_offset, len);
+ if (unlikely(nr_sg < 0))
+ return nr_sg;
+
+ ret = crypto_krb5_verify_mic(krb5, shash, metadata, sg, nr_sg,
+ &offset, &len);
+ switch (ret) {
+ case 0:
+ *_offset += offset;
+ *_len = len;
+ break;
+ case -EPROTO:
+ case -EBADMSG:
+ *_error_code = RXGK_SEALEDINCON;
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
diff --git a/net/rxrpc/rxgk_kdf.c b/net/rxrpc/rxgk_kdf.c
new file mode 100644
index 000000000000..b4db5aa30e5b
--- /dev/null
+++ b/net/rxrpc/rxgk_kdf.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* RxGK transport key derivation.
+ *
+ * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/key-type.h>
+#include <linux/slab.h>
+#include <keys/rxrpc-type.h>
+#include "ar-internal.h"
+#include "rxgk_common.h"
+
+#define round16(x) (((x) + 15) & ~15)
+
+/*
+ * Constants used to derive the keys and hmacs actually used for doing stuff.
+ */
+#define RXGK_CLIENT_ENC_PACKET 1026U // 0x402
+#define RXGK_CLIENT_MIC_PACKET 1027U // 0x403
+#define RXGK_SERVER_ENC_PACKET 1028U // 0x404
+#define RXGK_SERVER_MIC_PACKET 1029U // 0x405
+#define RXGK_CLIENT_ENC_RESPONSE 1030U // 0x406
+#define RXGK_SERVER_ENC_TOKEN 1036U // 0x40c
+
+static void rxgk_free(struct rxgk_context *gk)
+{
+ if (gk->tx_Kc)
+ crypto_free_shash(gk->tx_Kc);
+ if (gk->rx_Kc)
+ crypto_free_shash(gk->rx_Kc);
+ if (gk->tx_enc)
+ crypto_free_aead(gk->tx_enc);
+ if (gk->rx_enc)
+ crypto_free_aead(gk->rx_enc);
+ if (gk->resp_enc)
+ crypto_free_aead(gk->resp_enc);
+ kfree(gk);
+}
+
+void rxgk_put(struct rxgk_context *gk)
+{
+ if (gk && refcount_dec_and_test(&gk->usage))
+ rxgk_free(gk);
+}
+
+/*
+ * Transport key derivation function.
+ *
+ * TK = random-to-key(PRF+(K0, L,
+ * epoch || cid || start_time || key_number))
+ * [tools.ietf.org/html/draft-wilkinson-afs3-rxgk-11 sec 8.3]
+ */
+static int rxgk_derive_transport_key(struct rxrpc_connection *conn,
+ struct rxgk_context *gk,
+ const struct rxgk_key *rxgk,
+ struct krb5_buffer *TK,
+ gfp_t gfp)
+{
+ const struct krb5_enctype *krb5 = gk->krb5;
+ struct krb5_buffer conn_info;
+ unsigned int L = krb5->key_bytes;
+ __be32 *info;
+ u8 *buffer;
+ int ret;
+
+ _enter("");
+
+ conn_info.len = sizeof(__be32) * 5;
+
+ buffer = kzalloc(round16(conn_info.len), gfp);
+ if (!buffer)
+ return -ENOMEM;
+
+ conn_info.data = buffer;
+
+ info = (__be32 *)conn_info.data;
+ info[0] = htonl(conn->proto.epoch);
+ info[1] = htonl(conn->proto.cid);
+ info[2] = htonl(conn->rxgk.start_time >> 32);
+ info[3] = htonl(conn->rxgk.start_time >> 0);
+ info[4] = htonl(gk->key_number);
+
+ ret = crypto_krb5_calc_PRFplus(krb5, &rxgk->key, L, &conn_info, TK, gfp);
+ kfree_sensitive(buffer);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Set up the ciphers for the usage keys.
+ */
+static int rxgk_set_up_ciphers(struct rxrpc_connection *conn,
+ struct rxgk_context *gk,
+ const struct rxgk_key *rxgk,
+ gfp_t gfp)
+{
+ const struct krb5_enctype *krb5 = gk->krb5;
+ struct crypto_shash *shash;
+ struct crypto_aead *aead;
+ struct krb5_buffer TK;
+ bool service = rxrpc_conn_is_service(conn);
+ int ret;
+ u8 *buffer;
+
+ buffer = kzalloc(krb5->key_bytes, gfp);
+ if (!buffer)
+ return -ENOMEM;
+
+ TK.len = krb5->key_bytes;
+ TK.data = buffer;
+
+ ret = rxgk_derive_transport_key(conn, gk, rxgk, &TK, gfp);
+ if (ret < 0)
+ goto out;
+
+ aead = crypto_krb5_prepare_encryption(krb5, &TK, RXGK_CLIENT_ENC_RESPONSE, gfp);
+ if (IS_ERR(aead))
+ goto aead_error;
+ gk->resp_enc = aead;
+
+ if (crypto_aead_blocksize(gk->resp_enc) != krb5->block_len ||
+ crypto_aead_authsize(gk->resp_enc) != krb5->cksum_len) {
+ pr_notice("algo inconsistent with krb5 table %u!=%u or %u!=%u\n",
+ crypto_aead_blocksize(gk->resp_enc), krb5->block_len,
+ crypto_aead_authsize(gk->resp_enc), krb5->cksum_len);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (service) {
+ switch (conn->security_level) {
+ case RXRPC_SECURITY_AUTH:
+ shash = crypto_krb5_prepare_checksum(
+ krb5, &TK, RXGK_SERVER_MIC_PACKET, gfp);
+ if (IS_ERR(shash))
+ goto hash_error;
+ gk->tx_Kc = shash;
+ shash = crypto_krb5_prepare_checksum(
+ krb5, &TK, RXGK_CLIENT_MIC_PACKET, gfp);
+ if (IS_ERR(shash))
+ goto hash_error;
+ gk->rx_Kc = shash;
+ break;
+ case RXRPC_SECURITY_ENCRYPT:
+ aead = crypto_krb5_prepare_encryption(
+ krb5, &TK, RXGK_SERVER_ENC_PACKET, gfp);
+ if (IS_ERR(aead))
+ goto aead_error;
+ gk->tx_enc = aead;
+ aead = crypto_krb5_prepare_encryption(
+ krb5, &TK, RXGK_CLIENT_ENC_PACKET, gfp);
+ if (IS_ERR(aead))
+ goto aead_error;
+ gk->rx_enc = aead;
+ break;
+ }
+ } else {
+ switch (conn->security_level) {
+ case RXRPC_SECURITY_AUTH:
+ shash = crypto_krb5_prepare_checksum(
+ krb5, &TK, RXGK_CLIENT_MIC_PACKET, gfp);
+ if (IS_ERR(shash))
+ goto hash_error;
+ gk->tx_Kc = shash;
+ shash = crypto_krb5_prepare_checksum(
+ krb5, &TK, RXGK_SERVER_MIC_PACKET, gfp);
+ if (IS_ERR(shash))
+ goto hash_error;
+ gk->rx_Kc = shash;
+ break;
+ case RXRPC_SECURITY_ENCRYPT:
+ aead = crypto_krb5_prepare_encryption(
+ krb5, &TK, RXGK_CLIENT_ENC_PACKET, gfp);
+ if (IS_ERR(aead))
+ goto aead_error;
+ gk->tx_enc = aead;
+ aead = crypto_krb5_prepare_encryption(
+ krb5, &TK, RXGK_SERVER_ENC_PACKET, gfp);
+ if (IS_ERR(aead))
+ goto aead_error;
+ gk->rx_enc = aead;
+ break;
+ }
+ }
+
+ ret = 0;
+out:
+ kfree_sensitive(buffer);
+ return ret;
+aead_error:
+ ret = PTR_ERR(aead);
+ goto out;
+hash_error:
+ ret = PTR_ERR(shash);
+ goto out;
+}
+
+/*
+ * Derive a transport key for a connection and then derive a bunch of usage
+ * keys from it and set up ciphers using them.
+ */
+struct rxgk_context *rxgk_generate_transport_key(struct rxrpc_connection *conn,
+ const struct rxgk_key *key,
+ unsigned int key_number,
+ gfp_t gfp)
+{
+ struct rxgk_context *gk;
+ unsigned long lifetime;
+ int ret = -ENOPKG;
+
+ _enter("");
+
+ gk = kzalloc(sizeof(*gk), GFP_KERNEL);
+ if (!gk)
+ return ERR_PTR(-ENOMEM);
+ refcount_set(&gk->usage, 1);
+ gk->key = key;
+ gk->key_number = key_number;
+
+ gk->krb5 = crypto_krb5_find_enctype(key->enctype);
+ if (!gk->krb5)
+ goto err_tk;
+
+ ret = rxgk_set_up_ciphers(conn, gk, key, gfp);
+ if (ret)
+ goto err_tk;
+
+ /* Set the remaining number of bytes encrypted with this key that may
+ * be transmitted before rekeying. Note that the spec has been
+ * interpreted differently on this point...
+ */
+ switch (key->bytelife) {
+ case 0:
+ case 63:
+ gk->bytes_remaining = LLONG_MAX;
+ break;
+ case 1 ... 62:
+ gk->bytes_remaining = 1LL << key->bytelife;
+ break;
+ default:
+ gk->bytes_remaining = key->bytelife;
+ break;
+ }
+
+ /* Set the time after which rekeying must occur */
+ if (key->lifetime) {
+ lifetime = min_t(u64, key->lifetime, INT_MAX / HZ);
+ lifetime *= HZ;
+ } else {
+ lifetime = MAX_JIFFY_OFFSET;
+ }
+ gk->expiry = jiffies + lifetime;
+ return gk;
+
+err_tk:
+ rxgk_put(gk);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
+}
+
+/*
+ * Use the server secret key to set up the ciphers that will be used to extract
+ * the token from a response packet.
+ */
+int rxgk_set_up_token_cipher(const struct krb5_buffer *server_key,
+ struct crypto_aead **token_aead,
+ unsigned int enctype,
+ const struct krb5_enctype **_krb5,
+ gfp_t gfp)
+{
+ const struct krb5_enctype *krb5;
+ struct crypto_aead *aead;
+
+ krb5 = crypto_krb5_find_enctype(enctype);
+ if (!krb5)
+ return -ENOPKG;
+
+ aead = crypto_krb5_prepare_encryption(krb5, server_key, RXGK_SERVER_ENC_TOKEN, gfp);
+ if (IS_ERR(aead))
+ return PTR_ERR(aead);
+
+ *_krb5 = krb5;
+ *token_aead = aead;
+ return 0;
+}
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 6cb37b0eb77f..3657c0661cdc 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -177,8 +177,10 @@ static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t rem
if (!txb)
return NULL;
- txb->offset += shdr;
- txb->space = part;
+ txb->crypto_header = 0;
+ txb->sec_header = shdr;
+ txb->offset += shdr;
+ txb->space = part;
return txb;
}
@@ -683,6 +685,8 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
+ trace_rxrpc_tx_challenge(conn, serial, 0, conn->rxkad.nonce);
+
ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
if (ret < 0) {
trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
@@ -698,62 +702,6 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
}
/*
- * send a Kerberos security response
- */
-static int rxkad_send_response(struct rxrpc_connection *conn,
- struct rxrpc_host_header *hdr,
- struct rxkad_response *resp,
- const struct rxkad_key *s2)
-{
- struct rxrpc_wire_header whdr;
- struct msghdr msg;
- struct kvec iov[3];
- size_t len;
- u32 serial;
- int ret;
-
- _enter("");
-
- msg.msg_name = &conn->peer->srx.transport;
- msg.msg_namelen = conn->peer->srx.transport_len;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = 0;
-
- memset(&whdr, 0, sizeof(whdr));
- whdr.epoch = htonl(hdr->epoch);
- whdr.cid = htonl(hdr->cid);
- whdr.type = RXRPC_PACKET_TYPE_RESPONSE;
- whdr.flags = conn->out_clientflag;
- whdr.securityIndex = hdr->securityIndex;
- whdr.serviceId = htons(hdr->serviceId);
-
- iov[0].iov_base = &whdr;
- iov[0].iov_len = sizeof(whdr);
- iov[1].iov_base = resp;
- iov[1].iov_len = sizeof(*resp);
- iov[2].iov_base = (void *)s2->ticket;
- iov[2].iov_len = s2->ticket_len;
-
- len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
-
- serial = rxrpc_get_next_serial(conn);
- whdr.serial = htonl(serial);
-
- rxrpc_local_dont_fragment(conn->local, false);
- ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len);
- if (ret < 0) {
- trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
- rxrpc_tx_point_rxkad_response);
- return -EAGAIN;
- }
-
- conn->peer->last_tx_at = ktime_get_seconds();
- _leave(" = 0");
- return 0;
-}
-
-/*
* calculate the response checksum
*/
static void rxkad_calc_response_checksum(struct rxkad_response *response)
@@ -772,12 +720,21 @@ static void rxkad_calc_response_checksum(struct rxkad_response *response)
* encrypt the response packet
*/
static int rxkad_encrypt_response(struct rxrpc_connection *conn,
- struct rxkad_response *resp,
+ struct sk_buff *response,
const struct rxkad_key *s2)
{
struct skcipher_request *req;
struct rxrpc_crypt iv;
struct scatterlist sg[1];
+ size_t encsize = sizeof(((struct rxkad_response *)0)->encrypted);
+ int ret;
+
+ sg_init_table(sg, ARRAY_SIZE(sg));
+ ret = skb_to_sgvec(response, sg,
+ sizeof(struct rxrpc_wire_header) +
+ offsetof(struct rxkad_response, encrypted), encsize);
+ if (ret < 0)
+ return ret;
req = skcipher_request_alloc(&conn->rxkad.cipher->base, GFP_NOFS);
if (!req)
@@ -786,89 +743,206 @@ static int rxkad_encrypt_response(struct rxrpc_connection *conn,
/* continue encrypting from where we left off */
memcpy(&iv, s2->session_key, sizeof(iv));
- sg_init_table(sg, 1);
- sg_set_buf(sg, &resp->encrypted, sizeof(resp->encrypted));
skcipher_request_set_sync_tfm(req, conn->rxkad.cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x);
- crypto_skcipher_encrypt(req);
+ skcipher_request_set_crypt(req, sg, sg, encsize, iv.x);
+ ret = crypto_skcipher_encrypt(req);
skcipher_request_free(req);
- return 0;
+ return ret;
}
/*
- * respond to a challenge packet
+ * Validate a challenge packet.
*/
-static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
- struct sk_buff *skb)
+static bool rxkad_validate_challenge(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
{
- const struct rxrpc_key_token *token;
struct rxkad_challenge challenge;
- struct rxkad_response *resp;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- u32 version, nonce, min_level;
- int ret = -EPROTO;
+ u32 version, min_level;
+ int ret;
_enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
- if (!conn->key)
- return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
- rxkad_abort_chall_no_key);
+ if (!conn->key) {
+ rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxkad_abort_chall_no_key);
+ return false;
+ }
ret = key_validate(conn->key);
- if (ret < 0)
- return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret,
- rxkad_abort_chall_key_expired);
+ if (ret < 0) {
+ rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret,
+ rxkad_abort_chall_key_expired);
+ return false;
+ }
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- &challenge, sizeof(challenge)) < 0)
- return rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
- rxkad_abort_chall_short);
+ &challenge, sizeof(challenge)) < 0) {
+ rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+ rxkad_abort_chall_short);
+ return false;
+ }
version = ntohl(challenge.version);
- nonce = ntohl(challenge.nonce);
+ sp->chall.rxkad_nonce = ntohl(challenge.nonce);
min_level = ntohl(challenge.min_level);
- trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level);
+ trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version,
+ sp->chall.rxkad_nonce, min_level);
+
+ if (version != RXKAD_VERSION) {
+ rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
+ rxkad_abort_chall_version);
+ return false;
+ }
- if (version != RXKAD_VERSION)
- return rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
- rxkad_abort_chall_version);
+ if (conn->security_level < min_level) {
+ rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EACCES,
+ rxkad_abort_chall_level);
+ return false;
+ }
+ return true;
+}
+
+/*
+ * Insert the header into the response.
+ */
+static noinline
+int rxkad_insert_response_header(struct rxrpc_connection *conn,
+ const struct rxrpc_key_token *token,
+ struct sk_buff *challenge,
+ struct sk_buff *response,
+ size_t *offset)
+{
+ struct rxrpc_skb_priv *csp = rxrpc_skb(challenge);
+ struct {
+ struct rxrpc_wire_header whdr;
+ struct rxkad_response resp;
+ } h;
+ int ret;
- if (conn->security_level < min_level)
- return rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EACCES,
- rxkad_abort_chall_level);
+ h.whdr.epoch = htonl(conn->proto.epoch);
+ h.whdr.cid = htonl(conn->proto.cid);
+ h.whdr.callNumber = 0;
+ h.whdr.serial = 0;
+ h.whdr.seq = 0;
+ h.whdr.type = RXRPC_PACKET_TYPE_RESPONSE;
+ h.whdr.flags = conn->out_clientflag;
+ h.whdr.userStatus = 0;
+ h.whdr.securityIndex = conn->security_ix;
+ h.whdr.cksum = 0;
+ h.whdr.serviceId = htons(conn->service_id);
+ h.resp.version = htonl(RXKAD_VERSION);
+ h.resp.__pad = 0;
+ h.resp.encrypted.epoch = htonl(conn->proto.epoch);
+ h.resp.encrypted.cid = htonl(conn->proto.cid);
+ h.resp.encrypted.checksum = 0;
+ h.resp.encrypted.securityIndex = htonl(conn->security_ix);
+ h.resp.encrypted.call_id[0] = htonl(conn->channels[0].call_counter);
+ h.resp.encrypted.call_id[1] = htonl(conn->channels[1].call_counter);
+ h.resp.encrypted.call_id[2] = htonl(conn->channels[2].call_counter);
+ h.resp.encrypted.call_id[3] = htonl(conn->channels[3].call_counter);
+ h.resp.encrypted.inc_nonce = htonl(csp->chall.rxkad_nonce + 1);
+ h.resp.encrypted.level = htonl(conn->security_level);
+ h.resp.kvno = htonl(token->kad->kvno);
+ h.resp.ticket_len = htonl(token->kad->ticket_len);
+
+ rxkad_calc_response_checksum(&h.resp);
+
+ ret = skb_store_bits(response, *offset, &h, sizeof(h));
+ *offset += sizeof(h);
+ return ret;
+}
+
+/*
+ * respond to a challenge packet
+ */
+static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
+ struct sk_buff *challenge)
+{
+ const struct rxrpc_key_token *token;
+ struct rxrpc_skb_priv *csp, *rsp;
+ struct sk_buff *response;
+ size_t len, offset = 0;
+ int ret = -EPROTO;
+
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
+
+ ret = key_validate(conn->key);
+ if (ret < 0)
+ return rxrpc_abort_conn(conn, challenge, RXKADEXPIRED, ret,
+ rxkad_abort_chall_key_expired);
token = conn->key->payload.data[0];
/* build the response packet */
- resp = kzalloc(sizeof(struct rxkad_response), GFP_NOFS);
- if (!resp)
- return -ENOMEM;
+ len = sizeof(struct rxrpc_wire_header) +
+ sizeof(struct rxkad_response) +
+ token->kad->ticket_len;
+
+ response = alloc_skb_with_frags(0, len, 0, &ret, GFP_NOFS);
+ if (!response)
+ goto error;
+ rxrpc_new_skb(response, rxrpc_skb_new_response_rxkad);
+ response->len = len;
+ response->data_len = len;
+
+ offset = 0;
+ ret = rxkad_insert_response_header(conn, token, challenge, response,
+ &offset);
+ if (ret < 0)
+ goto error;
+
+ ret = rxkad_encrypt_response(conn, response, token->kad);
+ if (ret < 0)
+ goto error;
+
+ ret = skb_store_bits(response, offset, token->kad->ticket,
+ token->kad->ticket_len);
+ if (ret < 0)
+ goto error;
- resp->version = htonl(RXKAD_VERSION);
- resp->encrypted.epoch = htonl(conn->proto.epoch);
- resp->encrypted.cid = htonl(conn->proto.cid);
- resp->encrypted.securityIndex = htonl(conn->security_ix);
- resp->encrypted.inc_nonce = htonl(nonce + 1);
- resp->encrypted.level = htonl(conn->security_level);
- resp->kvno = htonl(token->kad->kvno);
- resp->ticket_len = htonl(token->kad->ticket_len);
- resp->encrypted.call_id[0] = htonl(conn->channels[0].call_counter);
- resp->encrypted.call_id[1] = htonl(conn->channels[1].call_counter);
- resp->encrypted.call_id[2] = htonl(conn->channels[2].call_counter);
- resp->encrypted.call_id[3] = htonl(conn->channels[3].call_counter);
-
- /* calculate the response checksum and then do the encryption */
- rxkad_calc_response_checksum(resp);
- ret = rxkad_encrypt_response(conn, resp, token->kad);
- if (ret == 0)
- ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
- kfree(resp);
+ csp = rxrpc_skb(challenge);
+ rsp = rxrpc_skb(response);
+ rsp->resp.len = len;
+ rsp->resp.challenge_serial = csp->hdr.serial;
+ rxrpc_post_response(conn, response);
+ response = NULL;
+ ret = 0;
+
+error:
+ rxrpc_free_skb(response, rxrpc_skb_put_response);
return ret;
}
/*
+ * RxKAD does automatic response only as there's nothing to manage that isn't
+ * already in the key.
+ */
+static int rxkad_sendmsg_respond_to_challenge(struct sk_buff *challenge,
+ struct msghdr *msg)
+{
+ return -EINVAL;
+}
+
+/**
+ * rxkad_kernel_respond_to_challenge - Respond to a challenge with appdata
+ * @challenge: The challenge to respond to
+ *
+ * Allow a kernel application to respond to a CHALLENGE.
+ *
+ * Return: %0 if successful and a negative error code otherwise.
+ */
+int rxkad_kernel_respond_to_challenge(struct sk_buff *challenge)
+{
+ struct rxrpc_skb_priv *csp = rxrpc_skb(challenge);
+
+ return rxkad_respond_to_challenge(csp->chall.conn, challenge);
+}
+EXPORT_SYMBOL(rxkad_kernel_respond_to_challenge);
+
+/*
* decrypt the kerberos IV ticket in the response
*/
static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
@@ -1276,6 +1350,8 @@ const struct rxrpc_security rxkad = {
.verify_packet = rxkad_verify_packet,
.free_call_crypto = rxkad_free_call_crypto,
.issue_challenge = rxkad_issue_challenge,
+ .validate_challenge = rxkad_validate_challenge,
+ .sendmsg_respond_to_challenge = rxkad_sendmsg_respond_to_challenge,
.respond_to_challenge = rxkad_respond_to_challenge,
.verify_response = rxkad_verify_response,
.clear = rxkad_clear,
diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c
index e848a4777b8c..0377301156b0 100644
--- a/net/rxrpc/rxperf.c
+++ b/net/rxrpc/rxperf.c
@@ -8,6 +8,7 @@
#define pr_fmt(fmt) "rxperf: " fmt
#include <linux/module.h>
#include <linux/slab.h>
+#include <crypto/krb5.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
@@ -136,6 +137,12 @@ static void rxperf_notify_end_reply_tx(struct sock *sock,
RXPERF_CALL_SV_AWAIT_ACK);
}
+static const struct rxrpc_kernel_ops rxperf_rxrpc_callback_ops = {
+ .notify_new_call = rxperf_rx_new_call,
+ .discard_new_call = rxperf_rx_discard_new_call,
+ .user_attach_call = rxperf_rx_attach,
+};
+
/*
* Charge the incoming call preallocation.
*/
@@ -161,7 +168,6 @@ static void rxperf_charge_preallocation(struct work_struct *work)
if (rxrpc_kernel_charge_accept(rxperf_socket,
rxperf_notify_rx,
- rxperf_rx_attach,
(unsigned long)call,
GFP_KERNEL,
call->debug_id) < 0)
@@ -209,8 +215,7 @@ static int rxperf_open_socket(void)
if (ret < 0)
goto error_2;
- rxrpc_kernel_new_call_notification(socket, rxperf_rx_new_call,
- rxperf_rx_discard_new_call);
+ rxrpc_kernel_set_notifications(socket, &rxperf_rxrpc_callback_ops);
ret = kernel_listen(socket, INT_MAX);
if (ret < 0)
@@ -546,9 +551,9 @@ static int rxperf_process_call(struct rxperf_call *call)
}
/*
- * Add a key to the security keyring.
+ * Add an rxkad key to the security keyring.
*/
-static int rxperf_add_key(struct key *keyring)
+static int rxperf_add_rxkad_key(struct key *keyring)
{
key_ref_t kref;
int ret;
@@ -574,6 +579,47 @@ static int rxperf_add_key(struct key *keyring)
return ret;
}
+#ifdef CONFIG_RXGK
+/*
+ * Add a yfs-rxgk key to the security keyring.
+ */
+static int rxperf_add_yfs_rxgk_key(struct key *keyring, u32 enctype)
+{
+ const struct krb5_enctype *krb5 = crypto_krb5_find_enctype(enctype);
+ key_ref_t kref;
+ char name[64];
+ int ret;
+ u8 key[32];
+
+ if (!krb5 || krb5->key_len > sizeof(key))
+ return 0;
+
+ /* The key is just { 0, 1, 2, 3, 4, ... } */
+ for (int i = 0; i < krb5->key_len; i++)
+ key[i] = i;
+
+ sprintf(name, "%u:6:1:%u", RX_PERF_SERVICE, enctype);
+
+ kref = key_create_or_update(make_key_ref(keyring, true),
+ "rxrpc_s", name,
+ key, krb5->key_len,
+ KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH |
+ KEY_USR_VIEW,
+ KEY_ALLOC_NOT_IN_QUOTA);
+
+ if (IS_ERR(kref)) {
+ pr_err("Can't allocate rxperf server key: %ld\n", PTR_ERR(kref));
+ return PTR_ERR(kref);
+ }
+
+ ret = key_link(keyring, key_ref_to_ptr(kref));
+ if (ret < 0)
+ pr_err("Can't link rxperf server key: %d\n", ret);
+ key_ref_put(kref);
+ return ret;
+}
+#endif
+
/*
* Initialise the rxperf server.
*/
@@ -603,9 +649,29 @@ static int __init rxperf_init(void)
goto error_keyring;
}
rxperf_sec_keyring = keyring;
- ret = rxperf_add_key(keyring);
+ ret = rxperf_add_rxkad_key(keyring);
+ if (ret < 0)
+ goto error_key;
+#ifdef CONFIG_RXGK
+ ret = rxperf_add_yfs_rxgk_key(keyring, KRB5_ENCTYPE_AES128_CTS_HMAC_SHA1_96);
+ if (ret < 0)
+ goto error_key;
+ ret = rxperf_add_yfs_rxgk_key(keyring, KRB5_ENCTYPE_AES256_CTS_HMAC_SHA1_96);
+ if (ret < 0)
+ goto error_key;
+ ret = rxperf_add_yfs_rxgk_key(keyring, KRB5_ENCTYPE_AES128_CTS_HMAC_SHA256_128);
+ if (ret < 0)
+ goto error_key;
+ ret = rxperf_add_yfs_rxgk_key(keyring, KRB5_ENCTYPE_AES256_CTS_HMAC_SHA384_192);
+ if (ret < 0)
+ goto error_key;
+ ret = rxperf_add_yfs_rxgk_key(keyring, KRB5_ENCTYPE_CAMELLIA128_CTS_CMAC);
+ if (ret < 0)
+ goto error_key;
+ ret = rxperf_add_yfs_rxgk_key(keyring, KRB5_ENCTYPE_CAMELLIA256_CTS_CMAC);
if (ret < 0)
goto error_key;
+#endif
ret = rxperf_open_socket();
if (ret < 0)
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index 9784adc8f275..078d91a6b77f 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -20,6 +20,9 @@ static const struct rxrpc_security *rxrpc_security_types[] = {
#ifdef CONFIG_RXKAD
[RXRPC_SECURITY_RXKAD] = &rxkad,
#endif
+#ifdef CONFIG_RXGK
+ [RXRPC_SECURITY_YFS_RXGK] = &rxgk_yfs,
+#endif
};
int __init rxrpc_init_security(void)
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 84dc6c94f23b..ebbb78b842de 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -607,7 +607,7 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg, struct rxrpc_send_params *p)
static struct rxrpc_call *
rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
struct rxrpc_send_params *p)
- __releases(&rx->sk.sk_lock.slock)
+ __releases(&rx->sk.sk_lock)
__acquires(&call->user_mutex)
{
struct rxrpc_conn_parameters cp;
@@ -657,7 +657,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
* - the socket may be either a client socket or a server socket
*/
int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
- __releases(&rx->sk.sk_lock.slock)
{
struct rxrpc_call *call;
bool dropped_lock = false;
@@ -759,14 +758,21 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
if (rxrpc_call_is_complete(call)) {
/* it's too late for this call */
ret = -ESHUTDOWN;
- } else if (p.command == RXRPC_CMD_SEND_ABORT) {
+ goto out_put_unlock;
+ }
+
+ switch (p.command) {
+ case RXRPC_CMD_SEND_ABORT:
rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED,
rxrpc_abort_call_sendmsg);
ret = 0;
- } else if (p.command != RXRPC_CMD_SEND_DATA) {
- ret = -EINVAL;
- } else {
+ break;
+ case RXRPC_CMD_SEND_DATA:
ret = rxrpc_send_data(rx, call, msg, len, NULL, &dropped_lock);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
}
out_put_unlock:
@@ -794,6 +800,8 @@ error_release_sock:
* appropriate to sending data. No control data should be supplied in @msg,
* nor should an address be supplied. MSG_MORE should be flagged if there's
* more data to come, otherwise this data will end the transmission phase.
+ *
+ * Return: %0 if successful and a negative error code otherwise.
*/
int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
struct msghdr *msg, size_t len,
@@ -829,8 +837,9 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data);
* @error: Local error value
* @why: Indication as to why.
*
- * Allow a kernel service to abort a call, if it's still in an abortable state
- * and return true if the call was aborted, false if it was already complete.
+ * Allow a kernel service to abort a call if it's still in an abortable state.
+ *
+ * Return: %true if the call was aborted, %false if it was already complete.
*/
bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
u32 abort_code, int error, enum rxrpc_abort_reason why)
diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c
index e51940589ee5..36b05fd842a7 100644
--- a/net/rxrpc/server_key.c
+++ b/net/rxrpc/server_key.c
@@ -152,6 +152,8 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen)
*
* Set the server security keyring on an rxrpc socket. This is used to provide
* the encryption keys for a kernel service.
+ *
+ * Return: %0 if successful and a negative error code otherwise.
*/
int rxrpc_sock_set_security_keyring(struct sock *sk, struct key *keyring)
{
@@ -169,3 +171,43 @@ int rxrpc_sock_set_security_keyring(struct sock *sk, struct key *keyring)
return ret;
}
EXPORT_SYMBOL(rxrpc_sock_set_security_keyring);
+
+/**
+ * rxrpc_sock_set_manage_response - Set the manage-response flag for a kernel service
+ * @sk: The socket to set the keyring on
+ * @set: True to set, false to clear the flag
+ *
+ * Set the flag on an rxrpc socket to say that the caller wants to manage the
+ * RESPONSE packet and the user-defined data it may contain. Setting this
+ * means that recvmsg() will return messages with RXRPC_CHALLENGED in the
+ * control message buffer containing information about the challenge.
+ *
+ * The user should respond to the challenge by passing RXRPC_RESPOND or
+ * RXRPC_RESPOND_ABORT control messages with sendmsg() to the same call.
+ * Supplementary control messages, such as RXRPC_RESP_RXGK_APPDATA, may be
+ * included to indicate the parts the user wants to supply.
+ *
+ * The server will be passed the response data with a RXRPC_RESPONDED control
+ * message when it gets the first data from each call.
+ *
+ * Note that this is only honoured by security classes that need auxiliary data
+ * (e.g. RxGK). Those that don't offer the facility (e.g. RxKAD) respond
+ * without consulting userspace.
+ *
+ * Return: The previous setting.
+ */
+int rxrpc_sock_set_manage_response(struct sock *sk, bool set)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ int ret;
+
+ lock_sock(sk);
+ ret = !!test_bit(RXRPC_SOCK_MANAGE_RESPONSE, &rx->flags);
+ if (set)
+ set_bit(RXRPC_SOCK_MANAGE_RESPONSE, &rx->flags);
+ else
+ clear_bit(RXRPC_SOCK_MANAGE_RESPONSE, &rx->flags);
+ release_sock(sk);
+ return ret;
+}
+EXPORT_SYMBOL(rxrpc_sock_set_manage_response);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 839790043256..057e20cef375 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1461,17 +1461,29 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {};
- struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
+ struct nlattr *tb[TCA_ACT_MAX_PRIO + 2];
struct tc_action *act;
size_t sz = 0;
int err;
int i;
- err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL,
+ err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO + 1, nla, NULL,
extack);
if (err < 0)
return err;
+ /* The nested attributes are parsed as types, but they are really an
+ * array of actions. So we parse one more than we can handle, and return
+ * an error if the last one is set (as that indicates that the request
+ * contained more than the maximum number of actions).
+ */
+ if (tb[TCA_ACT_MAX_PRIO + 1]) {
+ NL_SET_ERR_MSG_FMT(extack,
+ "Only %d actions supported per filter",
+ TCA_ACT_MAX_PRIO);
+ return -EINVAL;
+ }
+
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 18be6ff4c3db..3ee44d731700 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2228,7 +2228,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
break;
if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME)
break;
- strncpy(if_name, data, TIPC_MAX_IF_NAME);
+ strscpy(if_name, data, TIPC_MAX_IF_NAME);
/* Update own tolerance if peer indicates a non-zero value */
if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
diff --git a/net/tipc/node.c b/net/tipc/node.c
index ccf5e427f43e..cb43f2016a70 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1581,7 +1581,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
tipc_node_read_lock(node);
link = node->links[bearer_id].link;
if (link) {
- strncpy(linkname, tipc_link_name(link), len);
+ strscpy(linkname, tipc_link_name(link), len);
err = 0;
}
tipc_node_read_unlock(node);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f78a2492826f..2ab20821d6bb 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -950,13 +950,6 @@ static void unix_close(struct sock *sk, long timeout)
*/
}
-static void unix_unhash(struct sock *sk)
-{
- /* Nothing to do here, unix socket does not need a ->unhash().
- * This is merely for sockmap.
- */
-}
-
static bool unix_bpf_bypass_getsockopt(int level, int optname)
{
if (level == SOL_SOCKET) {
@@ -987,7 +980,6 @@ struct proto unix_stream_proto = {
.owner = THIS_MODULE,
.obj_size = sizeof(struct unix_sock),
.close = unix_close,
- .unhash = unix_unhash,
.bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = unix_stream_bpf_update_proto,
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 25a76c5ce0f1..cbf2129e808b 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -266,13 +266,17 @@ int xp_assign_dev_shared(struct xsk_buff_pool *pool, struct xdp_sock *umem_xs,
void xp_clear_dev(struct xsk_buff_pool *pool)
{
+ struct net_device *netdev = pool->netdev;
+
if (!pool->netdev)
return;
+ netdev_lock_ops(netdev);
xp_disable_drv_zc(pool);
xsk_clear_pool_at_qid(pool->netdev, pool->queue_id);
- dev_put(pool->netdev);
pool->netdev = NULL;
+ netdev_unlock_ops(netdev);
+ dev_put(netdev);
}
static void xp_release_deferred(struct work_struct *work)
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 622445f041d3..cb1e12740c87 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -952,32 +952,28 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly = {
.get_link_net = xfrmi_get_link_net,
};
-static void __net_exit xfrmi_exit_batch_rtnl(struct list_head *net_exit_list,
- struct list_head *dev_to_kill)
+static void __net_exit xfrmi_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
{
- struct net *net;
+ struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
+ struct xfrm_if __rcu **xip;
+ struct xfrm_if *xi;
+ int i;
- ASSERT_RTNL();
- list_for_each_entry(net, net_exit_list, exit_list) {
- struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id);
- struct xfrm_if __rcu **xip;
- struct xfrm_if *xi;
- int i;
-
- for (i = 0; i < XFRMI_HASH_SIZE; i++) {
- for (xip = &xfrmn->xfrmi[i];
- (xi = rtnl_dereference(*xip)) != NULL;
- xip = &xi->next)
- unregister_netdevice_queue(xi->dev, dev_to_kill);
- }
- xi = rtnl_dereference(xfrmn->collect_md_xfrmi);
- if (xi)
+ for (i = 0; i < XFRMI_HASH_SIZE; i++) {
+ for (xip = &xfrmn->xfrmi[i];
+ (xi = rtnl_net_dereference(net, *xip)) != NULL;
+ xip = &xi->next)
unregister_netdevice_queue(xi->dev, dev_to_kill);
}
+
+ xi = rtnl_net_dereference(net, xfrmn->collect_md_xfrmi);
+ if (xi)
+ unregister_netdevice_queue(xi->dev, dev_to_kill);
}
static struct pernet_operations xfrmi_net_ops = {
- .exit_batch_rtnl = xfrmi_exit_batch_rtnl,
+ .exit_rtnl = xfrmi_exit_rtnl,
.id = &xfrmi_net_id,
.size = sizeof(struct xfrmi_net),
};
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
index b7997541f7ee..f93d9145ab8a 100644
--- a/samples/bpf/sockex2_kern.c
+++ b/samples/bpf/sockex2_kern.c
@@ -31,7 +31,6 @@ static inline int proto_ports_offset(__u64 proto)
switch (proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
- case IPPROTO_DCCP:
case IPPROTO_ESP:
case IPPROTO_SCTP:
case IPPROTO_UDPLITE:
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 3d22bf863eec..298c8f8d7719 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -4804,7 +4804,7 @@ sub process {
}
# do not use BUG() or variants
- if ($line =~ /\b(?!AA_|BUILD_|DCCP_|IDA_|KVM_|RWLOCK_|snd_|SPIN_)(?:[a-zA-Z_]*_)?BUG(?:_ON)?(?:_[A-Z_]+)?\s*\(/) {
+ if ($line =~ /\b(?!AA_|BUILD_|IDA_|KVM_|RWLOCK_|snd_|SPIN_)(?:[a-zA-Z_]*_)?BUG(?:_ON)?(?:_[A-Z_]+)?\s*\(/) {
my $msg_level = \&WARN;
$msg_level = \&CHK if ($file);
&{$msg_level}("AVOID_BUG",
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 1b942b4908a2..7d623b00495c 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -24,7 +24,6 @@
#include <net/ipv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
-#include <linux/dccp.h>
#include <linux/sctp.h>
#include <linux/lsm_audit.h>
#include <linux/security.h>
@@ -68,13 +67,6 @@ int ipv4_skb_to_auditdata(struct sk_buff *skb,
ad->u.net->dport = uh->dest;
break;
}
- case IPPROTO_DCCP: {
- struct dccp_hdr *dh = dccp_hdr(skb);
-
- ad->u.net->sport = dh->dccph_sport;
- ad->u.net->dport = dh->dccph_dport;
- break;
- }
case IPPROTO_SCTP: {
struct sctphdr *sh = sctp_hdr(skb);
@@ -140,17 +132,6 @@ int ipv6_skb_to_auditdata(struct sk_buff *skb,
ad->u.net->dport = uh->dest;
break;
}
- case IPPROTO_DCCP: {
- struct dccp_hdr _dccph, *dh;
-
- dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph);
- if (dh == NULL)
- break;
-
- ad->u.net->sport = dh->dccph_sport;
- ad->u.net->dport = dh->dccph_dport;
- break;
- }
case IPPROTO_SCTP: {
struct sctphdr _sctph, *sh;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index e7a7dcab81db..b2695785610a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -65,7 +65,6 @@
#include <net/netlink.h>
#include <linux/tcp.h>
#include <linux/udp.h>
-#include <linux/dccp.h>
#include <linux/sctp.h>
#include <net/sctp/structs.h>
#include <linux/quota.h>
@@ -1191,8 +1190,6 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc
return SECCLASS_ICMP_SOCKET;
else
return SECCLASS_RAWIP_SOCKET;
- case SOCK_DCCP:
- return SECCLASS_DCCP_SOCKET;
default:
return SECCLASS_RAWIP_SOCKET;
}
@@ -4392,22 +4389,6 @@ static int selinux_parse_skb_ipv4(struct sk_buff *skb,
break;
}
- case IPPROTO_DCCP: {
- struct dccp_hdr _dccph, *dh;
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- offset += ihlen;
- dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph);
- if (dh == NULL)
- break;
-
- ad->u.net->sport = dh->dccph_sport;
- ad->u.net->dport = dh->dccph_dport;
- break;
- }
-
#if IS_ENABLED(CONFIG_IP_SCTP)
case IPPROTO_SCTP: {
struct sctphdr _sctph, *sh;
@@ -4486,18 +4467,6 @@ static int selinux_parse_skb_ipv6(struct sk_buff *skb,
break;
}
- case IPPROTO_DCCP: {
- struct dccp_hdr _dccph, *dh;
-
- dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph);
- if (dh == NULL)
- break;
-
- ad->u.net->sport = dh->dccph_sport;
- ad->u.net->dport = dh->dccph_dport;
- break;
- }
-
#if IS_ENABLED(CONFIG_IP_SCTP)
case IPPROTO_SCTP: {
struct sctphdr _sctph, *sh;
@@ -4849,10 +4818,6 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
node_perm = UDP_SOCKET__NODE_BIND;
break;
- case SECCLASS_DCCP_SOCKET:
- node_perm = DCCP_SOCKET__NODE_BIND;
- break;
-
case SECCLASS_SCTP_SOCKET:
node_perm = SCTP_SOCKET__NODE_BIND;
break;
@@ -4908,11 +4873,10 @@ static int selinux_socket_connect_helper(struct socket *sock,
return 0;
/*
- * If a TCP, DCCP or SCTP socket, check name_connect permission
+ * If a TCP or SCTP socket, check name_connect permission
* for the port.
*/
if (sksec->sclass == SECCLASS_TCP_SOCKET ||
- sksec->sclass == SECCLASS_DCCP_SOCKET ||
sksec->sclass == SECCLASS_SCTP_SOCKET) {
struct common_audit_data ad;
struct lsm_network_audit net = {0,};
@@ -4957,9 +4921,6 @@ static int selinux_socket_connect_helper(struct socket *sock,
case SECCLASS_TCP_SOCKET:
perm = TCP_SOCKET__NAME_CONNECT;
break;
- case SECCLASS_DCCP_SOCKET:
- perm = DCCP_SOCKET__NAME_CONNECT;
- break;
case SECCLASS_SCTP_SOCKET:
perm = SCTP_SOCKET__NAME_CONNECT;
break;
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 04a9b480885e..5665aa5e7853 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -127,8 +127,6 @@ const struct security_class_mapping secclass_map[] = {
{ "key",
{ "view", "read", "write", "search", "link", "setattr", "create",
NULL } },
- { "dccp_socket",
- { COMMON_SOCK_PERMS, "node_bind", "name_connect", NULL } },
{ "memprotect", { "mmap_zero", NULL } },
{ "peer", { "recv", NULL } },
{ "capability2", { COMMON_CAP2_PERMS, NULL } },
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 3a95986b134f..2c0b07f9fbbd 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -98,7 +98,6 @@ static const struct nlmsg_perm nlmsg_route_perms[] = {
static const struct nlmsg_perm nlmsg_tcpdiag_perms[] = {
{ TCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
- { DCCPDIAG_GETSOCK, NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
{ SOCK_DIAG_BY_FAMILY, NETLINK_TCPDIAG_SOCKET__NLMSG_READ },
{ SOCK_DESTROY, NETLINK_TCPDIAG_SOCKET__NLMSG_WRITE },
};
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 99833168604e..fc340a6f0dde 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -24,7 +24,6 @@
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>
-#include <linux/dccp.h>
#include <linux/icmpv6.h>
#include <linux/slab.h>
#include <linux/mutex.h>
@@ -4061,7 +4060,6 @@ static int smk_skb_to_addr_ipv6(struct sk_buff *skb, struct sockaddr_in6 *sip)
__be16 frag_off;
struct tcphdr _tcph, *th;
struct udphdr _udph, *uh;
- struct dccp_hdr _dccph, *dh;
sip->sin6_port = 0;
@@ -4090,11 +4088,6 @@ static int smk_skb_to_addr_ipv6(struct sk_buff *skb, struct sockaddr_in6 *sip)
if (uh != NULL)
sip->sin6_port = uh->source;
break;
- case IPPROTO_DCCP:
- dh = skb_header_pointer(skb, offset, sizeof(_dccph), &_dccph);
- if (dh != NULL)
- sip->sin6_port = dh->dccph_sport;
- break;
}
return proto;
}
@@ -4216,7 +4209,7 @@ static int smack_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb)
case PF_INET6:
proto = smk_skb_to_addr_ipv6(skb, &sadd);
if (proto != IPPROTO_UDP && proto != IPPROTO_UDPLITE &&
- proto != IPPROTO_TCP && proto != IPPROTO_DCCP)
+ proto != IPPROTO_TCP)
break;
#ifdef SMACK_IPV6_SECMARK_LABELING
skp = smack_from_skb(skb);
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index f3269ce39e5b..385783489f84 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -29,4 +29,6 @@ CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)
CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
+CFLAGS_rt-addr:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h)
+CFLAGS_rt-route:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h)
CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h)
diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile
index 21f9e299dc75..6603ad8d4ce1 100644
--- a/tools/net/ynl/generated/Makefile
+++ b/tools/net/ynl/generated/Makefile
@@ -25,7 +25,7 @@ SPECS_DIR:=../../../../Documentation/netlink/specs
GENS_PATHS=$(shell grep -nrI --files-without-match \
'protocol: netlink' \
$(SPECS_DIR))
-GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS})
+GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS}) rt-addr rt-route
SRCS=$(patsubst %,%-user.c,${GENS})
HDRS=$(patsubst %,%-user.h,${GENS})
OBJS=$(patsubst %,%-user.o,${GENS})
diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h
index 3c09a7bbfba5..634eb16548b9 100644
--- a/tools/net/ynl/lib/ynl-priv.h
+++ b/tools/net/ynl/lib/ynl-priv.h
@@ -94,6 +94,9 @@ struct ynl_ntf_base_type {
unsigned char data[] __attribute__((aligned(8)));
};
+struct nlmsghdr *ynl_msg_start_req(struct ynl_sock *ys, __u32 id);
+struct nlmsghdr *ynl_msg_start_dump(struct ynl_sock *ys, __u32 id);
+
struct nlmsghdr *
ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
struct nlmsghdr *
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index ce32cb35007d..70f899a54007 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -451,14 +451,14 @@ ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags,
return nlh;
}
-void ynl_msg_start_req(struct ynl_sock *ys, __u32 id)
+struct nlmsghdr *ynl_msg_start_req(struct ynl_sock *ys, __u32 id)
{
- ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK);
+ return ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK);
}
-void ynl_msg_start_dump(struct ynl_sock *ys, __u32 id)
+struct nlmsghdr *ynl_msg_start_dump(struct ynl_sock *ys, __u32 id)
{
- ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
+ return ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
}
struct nlmsghdr *
@@ -663,6 +663,7 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
struct sockaddr_nl addr;
struct ynl_sock *ys;
socklen_t addrlen;
+ int sock_type;
int one = 1;
ys = malloc(sizeof(*ys) + 2 * YNL_SOCKET_BUFFER_SIZE);
@@ -675,7 +676,9 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
ys->rx_buf = &ys->raw_buf[YNL_SOCKET_BUFFER_SIZE];
ys->ntf_last_next = &ys->ntf_first;
- ys->socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ sock_type = yf->is_classic ? yf->classic_id : NETLINK_GENERIC;
+
+ ys->socket = socket(AF_NETLINK, SOCK_RAW, sock_type);
if (ys->socket < 0) {
__perr(yse, "failed to create a netlink socket");
goto err_free_sock;
@@ -708,8 +711,9 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
ys->portid = addr.nl_pid;
ys->seq = random();
-
- if (ynl_sock_read_family(ys, yf->name)) {
+ if (yf->is_classic) {
+ ys->family_id = yf->classic_id;
+ } else if (ynl_sock_read_family(ys, yf->name)) {
if (yse)
memcpy(yse, &ys->err, sizeof(*yse));
goto err_close_sock;
@@ -791,13 +795,21 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
struct ynl_parse_arg yarg = { .ys = ys, };
const struct ynl_ntf_info *info;
struct ynl_ntf_base_type *rsp;
- struct genlmsghdr *gehdr;
+ __u32 cmd;
int ret;
- gehdr = ynl_nlmsg_data(nlh);
- if (gehdr->cmd >= ys->family->ntf_info_size)
+ if (ys->family->is_classic) {
+ cmd = nlh->nlmsg_type;
+ } else {
+ struct genlmsghdr *gehdr;
+
+ gehdr = ynl_nlmsg_data(nlh);
+ cmd = gehdr->cmd;
+ }
+
+ if (cmd >= ys->family->ntf_info_size)
return YNL_PARSE_CB_ERROR;
- info = &ys->family->ntf_info[gehdr->cmd];
+ info = &ys->family->ntf_info[cmd];
if (!info->cb)
return YNL_PARSE_CB_ERROR;
@@ -811,7 +823,7 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
goto err_free;
rsp->family = nlh->nlmsg_type;
- rsp->cmd = gehdr->cmd;
+ rsp->cmd = cmd;
*ys->ntf_last_next = rsp;
ys->ntf_last_next = &rsp->next;
@@ -863,17 +875,22 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg)
static int
ynl_check_alien(struct ynl_sock *ys, const struct nlmsghdr *nlh, __u32 rsp_cmd)
{
- struct genlmsghdr *gehdr;
+ if (ys->family->is_classic) {
+ if (nlh->nlmsg_type != rsp_cmd)
+ return ynl_ntf_parse(ys, nlh);
+ } else {
+ struct genlmsghdr *gehdr;
- if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) {
- yerr(ys, YNL_ERROR_INV_RESP,
- "Kernel responded with truncated message");
- return -1;
- }
+ if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) {
+ yerr(ys, YNL_ERROR_INV_RESP,
+ "Kernel responded with truncated message");
+ return -1;
+ }
- gehdr = ynl_nlmsg_data(nlh);
- if (gehdr->cmd != rsp_cmd)
- return ynl_ntf_parse(ys, nlh);
+ gehdr = ynl_nlmsg_data(nlh);
+ if (gehdr->cmd != rsp_cmd)
+ return ynl_ntf_parse(ys, nlh);
+ }
return 0;
}
diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h
index 6cd570b283ea..59256e258130 100644
--- a/tools/net/ynl/lib/ynl.h
+++ b/tools/net/ynl/lib/ynl.h
@@ -2,6 +2,7 @@
#ifndef __YNL_C_H
#define __YNL_C_H 1
+#include <stdbool.h>
#include <stddef.h>
#include <linux/genetlink.h>
#include <linux/types.h>
@@ -48,6 +49,8 @@ struct ynl_family {
/* private: */
const char *name;
size_t hdr_len;
+ bool is_classic;
+ __u16 classic_id;
const struct ynl_ntf_info *ntf_info;
unsigned int ntf_info_size;
};
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index 30c0a34b2784..0d930c17f963 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -993,9 +993,6 @@ class Family(SpecFamily):
def resolve(self):
self.resolve_up(super())
- if self.yaml.get('protocol', 'genetlink') not in {'genetlink', 'genetlink-c', 'genetlink-legacy'}:
- raise Exception("Codegen only supported for genetlink")
-
self.c_name = c_lower(self.ident_name)
if 'name-prefix' in self.yaml['operations']:
self.op_prefix = c_upper(self.yaml['operations']['name-prefix'])
@@ -1042,6 +1039,9 @@ class Family(SpecFamily):
def new_operation(self, elem, req_value, rsp_value):
return Operation(self, elem, req_value, rsp_value)
+ def is_classic(self):
+ return self.proto == 'netlink-raw'
+
def _mark_notify(self):
for op in self.msgs.values():
if 'notify' in op:
@@ -1240,6 +1240,7 @@ class RenderInfo:
# 'do' and 'dump' response parsing is identical
self.type_consistent = True
+ self.type_oneside = False
if op_mode != 'do' and 'dump' in op:
if 'do' in op:
if ('reply' in op['do']) != ('reply' in op["dump"]):
@@ -1247,7 +1248,8 @@ class RenderInfo:
elif 'reply' in op['do'] and op["do"]["reply"] != op["dump"]["reply"]:
self.type_consistent = False
else:
- self.type_consistent = False
+ self.type_consistent = True
+ self.type_oneside = True
self.attr_set = attr_set
if not self.attr_set:
@@ -1275,6 +1277,9 @@ class RenderInfo:
if op_mode == 'event':
self.struct['reply'] = Struct(family, self.attr_set, type_list=op['event']['attributes'])
+ def type_empty(self, key):
+ return len(self.struct[key].attr_list) == 0 and self.fixed_hdr is None
+
class CodeWriter:
def __init__(self, nlib, out_file=None, overwrite=True):
@@ -1541,7 +1546,9 @@ def op_prefix(ri, direction, deref=False):
suffix += f"{direction_to_suffix[direction]}"
else:
if direction == 'request':
- suffix += '_req_dump'
+ suffix += '_req'
+ if not ri.type_oneside:
+ suffix += '_dump'
else:
if ri.type_consistent:
if deref:
@@ -1747,7 +1754,10 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.p(f'dst->{arg} = {arg};')
if ri.fixed_hdr:
- ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
+ if ri.family.is_classic():
+ ri.cw.p('hdr = ynl_nlmsg_data(nlh);')
+ else:
+ ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({ri.fixed_hdr}));")
for anest in sorted(all_multi):
aspec = struct[anest]
@@ -1899,7 +1909,10 @@ def print_req(ri):
ri.cw.block_start()
ri.cw.write_func_lvar(local_vars)
- ri.cw.p(f"nlh = ynl_gemsg_start_req(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
+ if ri.family.is_classic():
+ ri.cw.p(f"nlh = ynl_msg_start_req(ys, {ri.op.enum_name});")
+ else:
+ ri.cw.p(f"nlh = ynl_gemsg_start_req(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;")
if 'reply' in ri.op[ri.op_mode]:
@@ -1968,7 +1981,10 @@ def print_dump(ri):
else:
ri.cw.p(f'yds.rsp_cmd = {ri.op.rsp_value};')
ri.cw.nl()
- ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
+ if ri.family.is_classic():
+ ri.cw.p(f"nlh = ynl_msg_start_dump(ys, {ri.op.enum_name});")
+ else:
+ ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
if ri.fixed_hdr:
ri.cw.p("hdr_len = sizeof(req->_hdr);")
@@ -2028,7 +2044,7 @@ def _print_type(ri, direction, struct):
if not direction and ri.type_name_conflict:
suffix += '_'
- if ri.op_mode == 'dump':
+ if ri.op_mode == 'dump' and not ri.type_oneside:
suffix += '_dump'
ri.cw.block_start(line=f"struct {ri.family.c_name}{suffix}")
@@ -2079,7 +2095,7 @@ def print_type_helpers(ri, direction, deref=False):
def print_req_type_helpers(ri):
- if len(ri.struct["request"].attr_list) == 0:
+ if ri.type_empty("request"):
return
print_alloc_wrapper(ri, "request")
print_type_helpers(ri, "request")
@@ -2102,7 +2118,7 @@ def print_parse_prototype(ri, direction, terminate=True):
def print_req_type(ri):
- if len(ri.struct["request"].attr_list) == 0:
+ if ri.type_empty("request"):
return
print_type(ri, "request")
@@ -2755,7 +2771,7 @@ def render_user_family(family, cw, prototype):
return
if family.ntfs:
- cw.block_start(line=f"static const struct ynl_ntf_info {family['name']}_ntf_info[] = ")
+ cw.block_start(line=f"static const struct ynl_ntf_info {family.c_name}_ntf_info[] = ")
for ntf_op_name, ntf_op in family.ntfs.items():
if 'notify' in ntf_op:
op = family.ops[ntf_op['notify']]
@@ -2775,13 +2791,18 @@ def render_user_family(family, cw, prototype):
cw.block_start(f'{symbol} = ')
cw.p(f'.name\t\t= "{family.c_name}",')
- if family.fixed_header:
+ if family.is_classic():
+ cw.p(f'.is_classic\t= true,')
+ cw.p(f'.classic_id\t= {family.get("protonum")},')
+ if family.is_classic():
+ cw.p(f'.hdr_len\t= sizeof(struct {c_lower(family.fixed_header)}),')
+ elif family.fixed_header:
cw.p(f'.hdr_len\t= sizeof(struct genlmsghdr) + sizeof(struct {c_lower(family.fixed_header)}),')
else:
cw.p('.hdr_len\t= sizeof(struct genlmsghdr),')
if family.ntfs:
- cw.p(f".ntf_info\t= {family['name']}_ntf_info,")
- cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family['name']}_ntf_info),")
+ cw.p(f".ntf_info\t= {family.c_name}_ntf_info,")
+ cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family.c_name}_ntf_info),")
cw.block_end(line=';')
@@ -3010,7 +3031,7 @@ def main():
ri = RenderInfo(cw, parsed, args.mode, op, 'dump')
print_req_type(ri)
print_req_type_helpers(ri)
- if not ri.type_consistent:
+ if not ri.type_consistent or ri.type_oneside:
print_rsp_type(ri)
print_wrapped_type(ri)
print_dump_prototype(ri)
@@ -3088,7 +3109,7 @@ def main():
if 'dump' in op:
cw.p(f"/* {op.enum_name} - dump */")
ri = RenderInfo(cw, parsed, args.mode, op, "dump")
- if not ri.type_consistent:
+ if not ri.type_consistent or ri.type_oneside:
parse_rsp_msg(ri, deref=True)
print_req_free(ri)
print_dump_type_free(ri)
diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore
index dda6686257a7..7f9781cf532f 100644
--- a/tools/net/ynl/samples/.gitignore
+++ b/tools/net/ynl/samples/.gitignore
@@ -2,4 +2,6 @@ ethtool
devlink
netdev
ovs
-page-pool \ No newline at end of file
+page-pool
+rt-addr
+rt-route
diff --git a/tools/net/ynl/samples/rt-addr.c b/tools/net/ynl/samples/rt-addr.c
new file mode 100644
index 000000000000..0f4851b4ec57
--- /dev/null
+++ b/tools/net/ynl/samples/rt-addr.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "rt-addr-user.h"
+
+static void rt_addr_print(struct rt_addr_getaddr_rsp *a)
+{
+ char ifname[IF_NAMESIZE];
+ char addr_str[64];
+ const char *addr;
+ const char *name;
+
+ name = if_indextoname(a->_hdr.ifa_index, ifname);
+ if (name)
+ printf("%16s: ", name);
+
+ switch (a->_present.address_len) {
+ case 4:
+ addr = inet_ntop(AF_INET, a->address,
+ addr_str, sizeof(addr_str));
+ break;
+ case 16:
+ addr = inet_ntop(AF_INET6, a->address,
+ addr_str, sizeof(addr_str));
+ break;
+ default:
+ addr = NULL;
+ break;
+ }
+ if (addr)
+ printf("%s", addr);
+ else
+ printf("[%d]", a->_present.address_len);
+
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct rt_addr_getaddr_list *rsp;
+ struct rt_addr_getaddr_req *req;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_addr_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ req = rt_addr_getaddr_req_alloc();
+ if (!req)
+ goto err_destroy;
+
+ rsp = rt_addr_getaddr_dump(ys, req);
+ rt_addr_getaddr_req_free(req);
+ if (!rsp)
+ goto err_close;
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no addresses reported\n");
+ ynl_dump_foreach(rsp, addr)
+ rt_addr_print(addr);
+ rt_addr_getaddr_list_free(rsp);
+
+ ynl_sock_destroy(ys);
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/net/ynl/samples/rt-route.c b/tools/net/ynl/samples/rt-route.c
new file mode 100644
index 000000000000..9d9c868f8873
--- /dev/null
+++ b/tools/net/ynl/samples/rt-route.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "rt-route-user.h"
+
+static void rt_route_print(struct rt_route_getroute_rsp *r)
+{
+ char ifname[IF_NAMESIZE];
+ char route_str[64];
+ const char *route;
+ const char *name;
+
+ /* Ignore local */
+ if (r->_hdr.rtm_table == RT_TABLE_LOCAL)
+ return;
+
+ if (r->_present.oif) {
+ name = if_indextoname(r->oif, ifname);
+ if (name)
+ printf("oif: %-16s ", name);
+ }
+
+ if (r->_present.dst_len) {
+ route = inet_ntop(r->_hdr.rtm_family, r->dst,
+ route_str, sizeof(route_str));
+ printf("dst: %s/%d", route, r->_hdr.rtm_dst_len);
+ }
+
+ if (r->_present.gateway_len) {
+ route = inet_ntop(r->_hdr.rtm_family, r->gateway,
+ route_str, sizeof(route_str));
+ printf("gateway: %s ", route);
+ }
+
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct rt_route_getroute_req_dump *req;
+ struct rt_route_getroute_list *rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_route_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ req = rt_route_getroute_req_dump_alloc();
+ if (!req)
+ goto err_destroy;
+
+ rsp = rt_route_getroute_dump(ys, req);
+ rt_route_getroute_req_dump_free(req);
+ if (!rsp)
+ goto err_close;
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no routeesses reported\n");
+ ynl_dump_foreach(rsp, route)
+ rt_route_print(route);
+ rt_route_getroute_list_free(rsp);
+
+ ynl_sock_destroy(ys);
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c77c8c8e3d9b..61bb8bf1b507 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -71,6 +71,7 @@ TARGETS += net/hsr
TARGETS += net/mptcp
TARGETS += net/netfilter
TARGETS += net/openvswitch
+TARGETS += net/ovpn
TARGETS += net/packetdrill
TARGETS += net/rds
TARGETS += net/tcp_ao
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 130d532b7e67..3cfef5153823 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -33,7 +33,6 @@ CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_SIT=y
-CONFIG_IP_DCCP=m
CONFIG_NF_NAT=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP_NF_IPTABLES=m
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index 8986c584cb37..6329ae805abf 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -39,12 +39,12 @@ class EthtoolFamily(YnlFamily):
class RtnlFamily(YnlFamily):
def __init__(self, recv_size=0):
- super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
+ super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(),
schema='', recv_size=recv_size)
class RtnlAddrFamily(YnlFamily):
def __init__(self, recv_size=0):
- super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(),
+ super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(),
schema='', recv_size=recv_size)
class NetdevFamily(YnlFamily):
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index c83a8b47bbdf..ac1349c4b9e5 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -180,13 +180,26 @@ static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen,
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
+again:
int err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ /* glibc starts to support MPTCP since v2.42.
+ * For older versions, use IPPROTO_TCP to resolve,
+ * and use TCP/MPTCP to create socket.
+ * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82
+ */
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -292,7 +305,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -356,7 +369,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
int infd, struct wstate *winfo)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c
index 284286c524cf..37d5015ad08c 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_diag.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c
@@ -185,9 +185,10 @@ static void parse_nlmsg(struct nlmsghdr *nlh)
}
}
-static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
+static void recv_nlmsg(int fd)
{
char rcv_buff[8192];
+ struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buff;
struct sockaddr_nl rcv_nladdr = {
.nl_family = AF_NETLINK
};
@@ -204,7 +205,6 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
int len;
len = recvmsg(fd, &rcv_msg, 0);
- nlh = (struct nlmsghdr *)rcv_buff;
while (NLMSG_OK(nlh, len)) {
if (nlh->nlmsg_type == NLMSG_DONE) {
@@ -225,7 +225,6 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
static void get_mptcpinfo(__u32 token)
{
- struct nlmsghdr *nlh = NULL;
int fd;
fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
@@ -233,7 +232,7 @@ static void get_mptcpinfo(__u32 token)
die_perror("Netlink socket");
send_query(fd, token);
- recv_nlmsg(fd, nlh);
+ recv_nlmsg(fd);
close(fd);
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index befa66f5a366..b8af65373b3a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -62,6 +62,7 @@ unset sflags
unset fastclose
unset fullmesh
unset speed
+unset join_syn_rej
unset join_csum_ns1
unset join_csum_ns2
unset join_fail_nr
@@ -1403,6 +1404,7 @@ chk_join_nr()
local syn_nr=$1
local syn_ack_nr=$2
local ack_nr=$3
+ local syn_rej=${join_syn_rej:-0}
local csum_ns1=${join_csum_ns1:-0}
local csum_ns2=${join_csum_ns2:-0}
local fail_nr=${join_fail_nr:-0}
@@ -1468,6 +1470,15 @@ chk_join_nr()
fail_test "got $count JOIN[s] ack HMAC failure expected 0"
fi
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$syn_rej" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn rejected"
+ fail_test "got $count JOIN[s] syn rejected expected $syn_rej"
+ fi
+
print_results "join Rx" ${rc}
join_syn_tx="${join_syn_tx:-${syn_nr}}" \
@@ -1963,7 +1974,8 @@ subflows_tests()
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
fi
# subflow
@@ -1992,7 +2004,8 @@ subflows_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 2 2 1
+ join_syn_rej=1 \
+ chk_join_nr 2 2 1
fi
# single subflow, dev
@@ -3061,7 +3074,8 @@ syncookies_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 2 1 1
+ join_syn_rej=1 \
+ chk_join_nr 2 1 1
fi
# test signal address with cookies
@@ -3545,7 +3559,8 @@ userspace_tests()
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
fi
# userspace pm type does not send join
@@ -3568,7 +3583,8 @@ userspace_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
chk_prio_nr 0 0 0 0
fi
diff --git a/tools/testing/selftests/net/ovpn/.gitignore b/tools/testing/selftests/net/ovpn/.gitignore
new file mode 100644
index 000000000000..ee44c081ca7c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+ovpn-cli
diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile
new file mode 100644
index 000000000000..2d102878cb6d
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES)
+VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_CFLAGS),)
+VAR_CFLAGS = -I/usr/include/libnl3
+endif
+CFLAGS += $(VAR_CFLAGS)
+
+
+LDLIBS = -lmbedtls -lmbedcrypto
+VAR_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_LDLIBS),)
+VAR_LDLIBS = -lnl-genl-3 -lnl-3
+endif
+LDLIBS += $(VAR_LDLIBS)
+
+
+TEST_FILES = common.sh
+
+TEST_PROGS = test.sh \
+ test-chachapoly.sh \
+ test-tcp.sh \
+ test-float.sh \
+ test-close-socket.sh \
+ test-close-socket-tcp.sh
+
+TEST_GEN_FILES := ovpn-cli
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh
new file mode 100644
index 000000000000..7502292a1ee0
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/common.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt}
+TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt}
+OVPN_CLI=${OVPN_CLI:-./ovpn-cli}
+ALG=${ALG:-aes}
+PROTO=${PROTO:-UDP}
+FLOAT=${FLOAT:-0}
+
+create_ns() {
+ ip netns add peer${1}
+}
+
+setup_ns() {
+ MODE="P2P"
+
+ if [ ${1} -eq 0 ]; then
+ MODE="MP"
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p}
+
+ ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p}
+ ip -n peer0 link set veth${p} up
+
+ ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p}
+ ip -n peer${p} link set veth${p} up
+ done
+ fi
+
+ ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE
+ ip -n peer${1} addr add ${2} dev tun${1}
+ ip -n peer${1} link set tun${1} up
+}
+
+add_peer() {
+ if [ "${PROTO}" == "UDP" ]; then
+ if [ ${1} -eq 0 ]; then
+ ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 ${UDP_PEERS_FILE}
+
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \
+ data64.key
+ done
+ else
+ ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} ${1} 1 10.10.${1}.1 1
+ ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} ${1} 1 0 ${ALG} 1 \
+ data64.key
+ fi
+ else
+ if [ ${1} -eq 0 ]; then
+ (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${TCP_PEERS_FILE} && {
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \
+ ${ALG} 0 data64.key
+ done
+ }) &
+ sleep 5
+ else
+ ip netns exec peer${1} ${OVPN_CLI} connect tun${1} ${1} 10.10.${1}.1 1 \
+ data64.key
+ fi
+ fi
+}
+
+cleanup() {
+ # some ovpn-cli processes sleep in background so they need manual poking
+ killall $(basename ${OVPN_CLI}) 2>/dev/null || true
+
+ # netns peer0 is deleted without erasing ifaces first
+ for p in $(seq 1 10); do
+ ip -n peer${p} link set tun${p} down 2>/dev/null || true
+ ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true
+ done
+ for p in $(seq 1 10); do
+ ip -n peer0 link del veth${p} 2>/dev/null || true
+ done
+ for p in $(seq 0 10); do
+ ip netns del peer${p} 2>/dev/null || true
+ done
+}
+
+if [ "${PROTO}" == "UDP" ]; then
+ NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')}
+else
+ NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')}
+fi
+
+
diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config
new file mode 100644
index 000000000000..71946ba9fa17
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/config
@@ -0,0 +1,10 @@
+CONFIG_NET=y
+CONFIG_INET=y
+CONFIG_STREAM_PARSER=y
+CONFIG_NET_UDP_TUNNEL=y
+CONFIG_DST_CACHE=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_AES=y
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_CHACHA20POLY1305=y
+CONFIG_OVPN=m
diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key
new file mode 100644
index 000000000000..a99e88c4e290
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/data64.key
@@ -0,0 +1,5 @@
+jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3B
+ia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9
+uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6
+KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tE
+BofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w==
diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c
new file mode 100644
index 000000000000..69e41fc07fbc
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c
@@ -0,0 +1,2376 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel accelerator
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <time.h>
+
+#include <linux/ovpn.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+#include <netlink/socket.h>
+#include <netlink/netlink.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/family.h>
+#include <netlink/genl/ctrl.h>
+
+#include <mbedtls/base64.h>
+#include <mbedtls/error.h>
+
+#include <sys/socket.h>
+
+/* defines to make checkpatch happy */
+#define strscpy strncpy
+#define __always_unused __attribute__((__unused__))
+
+/* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we
+ * have to explicitly do it to prevent the kernel from failing upon
+ * parsing of the message
+ */
+#define nla_nest_start(_msg, _type) \
+ nla_nest_start(_msg, (_type) | NLA_F_NESTED)
+
+/* libnl < 3.11.0 does not implement nla_get_uint() */
+uint64_t ovpn_nla_get_uint(struct nlattr *attr)
+{
+ if (nla_len(attr) == sizeof(uint32_t))
+ return nla_get_u32(attr);
+ else
+ return nla_get_u64(attr);
+}
+
+typedef int (*ovpn_nl_cb)(struct nl_msg *msg, void *arg);
+
+enum ovpn_key_direction {
+ KEY_DIR_IN = 0,
+ KEY_DIR_OUT,
+};
+
+#define KEY_LEN (256 / 8)
+#define NONCE_LEN 8
+
+#define PEER_ID_UNDEF 0x00FFFFFF
+#define MAX_PEERS 10
+
+struct nl_ctx {
+ struct nl_sock *nl_sock;
+ struct nl_msg *nl_msg;
+ struct nl_cb *nl_cb;
+
+ int ovpn_dco_id;
+};
+
+enum ovpn_cmd {
+ CMD_INVALID,
+ CMD_NEW_IFACE,
+ CMD_DEL_IFACE,
+ CMD_LISTEN,
+ CMD_CONNECT,
+ CMD_NEW_PEER,
+ CMD_NEW_MULTI_PEER,
+ CMD_SET_PEER,
+ CMD_DEL_PEER,
+ CMD_GET_PEER,
+ CMD_NEW_KEY,
+ CMD_DEL_KEY,
+ CMD_GET_KEY,
+ CMD_SWAP_KEYS,
+ CMD_LISTEN_MCAST,
+};
+
+struct ovpn_ctx {
+ enum ovpn_cmd cmd;
+
+ __u8 key_enc[KEY_LEN];
+ __u8 key_dec[KEY_LEN];
+ __u8 nonce[NONCE_LEN];
+
+ enum ovpn_cipher_alg cipher;
+
+ sa_family_t sa_family;
+
+ unsigned long peer_id;
+ unsigned long lport;
+
+ union {
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+ } remote;
+
+ union {
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+ } peer_ip;
+
+ bool peer_ip_set;
+
+ unsigned int ifindex;
+ char ifname[IFNAMSIZ];
+ enum ovpn_mode mode;
+ bool mode_set;
+
+ int socket;
+ int cli_sockets[MAX_PEERS];
+
+ __u32 keepalive_interval;
+ __u32 keepalive_timeout;
+
+ enum ovpn_key_direction key_dir;
+ enum ovpn_key_slot key_slot;
+ int key_id;
+
+ const char *peers_file;
+};
+
+static int ovpn_nl_recvmsgs(struct nl_ctx *ctx)
+{
+ int ret;
+
+ ret = nl_recvmsgs(ctx->nl_sock, ctx->nl_cb);
+
+ switch (ret) {
+ case -NLE_INTR:
+ fprintf(stderr,
+ "netlink received interrupt due to signal - ignoring\n");
+ break;
+ case -NLE_NOMEM:
+ fprintf(stderr, "netlink out of memory error\n");
+ break;
+ case -NLE_AGAIN:
+ fprintf(stderr,
+ "netlink reports blocking read - aborting wait\n");
+ break;
+ default:
+ if (ret)
+ fprintf(stderr, "netlink reports error (%d): %s\n",
+ ret, nl_geterror(-ret));
+ break;
+ }
+
+ return ret;
+}
+
+static struct nl_ctx *nl_ctx_alloc_flags(struct ovpn_ctx *ovpn, int cmd,
+ int flags)
+{
+ struct nl_ctx *ctx;
+ int err, ret;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ ctx->nl_sock = nl_socket_alloc();
+ if (!ctx->nl_sock) {
+ fprintf(stderr, "cannot allocate netlink socket\n");
+ goto err_free;
+ }
+
+ nl_socket_set_buffer_size(ctx->nl_sock, 8192, 8192);
+
+ ret = genl_connect(ctx->nl_sock);
+ if (ret) {
+ fprintf(stderr, "cannot connect to generic netlink: %s\n",
+ nl_geterror(ret));
+ goto err_sock;
+ }
+
+ /* enable Extended ACK for detailed error reporting */
+ err = 1;
+ setsockopt(nl_socket_get_fd(ctx->nl_sock), SOL_NETLINK, NETLINK_EXT_ACK,
+ &err, sizeof(err));
+
+ ctx->ovpn_dco_id = genl_ctrl_resolve(ctx->nl_sock, OVPN_FAMILY_NAME);
+ if (ctx->ovpn_dco_id < 0) {
+ fprintf(stderr, "cannot find ovpn_dco netlink component: %d\n",
+ ctx->ovpn_dco_id);
+ goto err_free;
+ }
+
+ ctx->nl_msg = nlmsg_alloc();
+ if (!ctx->nl_msg) {
+ fprintf(stderr, "cannot allocate netlink message\n");
+ goto err_sock;
+ }
+
+ ctx->nl_cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!ctx->nl_cb) {
+ fprintf(stderr, "failed to allocate netlink callback\n");
+ goto err_msg;
+ }
+
+ nl_socket_set_cb(ctx->nl_sock, ctx->nl_cb);
+
+ genlmsg_put(ctx->nl_msg, 0, 0, ctx->ovpn_dco_id, 0, flags, cmd, 0);
+
+ if (ovpn->ifindex > 0)
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_IFINDEX, ovpn->ifindex);
+
+ return ctx;
+nla_put_failure:
+err_msg:
+ nlmsg_free(ctx->nl_msg);
+err_sock:
+ nl_socket_free(ctx->nl_sock);
+err_free:
+ free(ctx);
+ return NULL;
+}
+
+static struct nl_ctx *nl_ctx_alloc(struct ovpn_ctx *ovpn, int cmd)
+{
+ return nl_ctx_alloc_flags(ovpn, cmd, 0);
+}
+
+static void nl_ctx_free(struct nl_ctx *ctx)
+{
+ if (!ctx)
+ return;
+
+ nl_socket_free(ctx->nl_sock);
+ nlmsg_free(ctx->nl_msg);
+ nl_cb_put(ctx->nl_cb);
+ free(ctx);
+}
+
+static int ovpn_nl_cb_error(struct sockaddr_nl (*nla)__always_unused,
+ struct nlmsgerr *err, void *arg)
+{
+ struct nlmsghdr *nlh = (struct nlmsghdr *)err - 1;
+ struct nlattr *tb_msg[NLMSGERR_ATTR_MAX + 1];
+ int len = nlh->nlmsg_len;
+ struct nlattr *attrs;
+ int *ret = arg;
+ int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh);
+
+ *ret = err->error;
+
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return NL_STOP;
+
+ if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ ack_len += err->msg.nlmsg_len - sizeof(*nlh);
+
+ if (len <= ack_len)
+ return NL_STOP;
+
+ attrs = (void *)((uint8_t *)nlh + ack_len);
+ len -= ack_len;
+
+ nla_parse(tb_msg, NLMSGERR_ATTR_MAX, attrs, len, NULL);
+ if (tb_msg[NLMSGERR_ATTR_MSG]) {
+ len = strnlen((char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]),
+ nla_len(tb_msg[NLMSGERR_ATTR_MSG]));
+ fprintf(stderr, "kernel error: %*s\n", len,
+ (char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]));
+ }
+
+ if (tb_msg[NLMSGERR_ATTR_MISS_NEST]) {
+ fprintf(stderr, "missing required nesting type %u\n",
+ nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_NEST]));
+ }
+
+ if (tb_msg[NLMSGERR_ATTR_MISS_TYPE]) {
+ fprintf(stderr, "missing required attribute type %u\n",
+ nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_TYPE]));
+ }
+
+ return NL_STOP;
+}
+
+static int ovpn_nl_cb_finish(struct nl_msg (*msg)__always_unused,
+ void *arg)
+{
+ int *status = arg;
+
+ *status = 0;
+ return NL_SKIP;
+}
+
+static int ovpn_nl_cb_ack(struct nl_msg (*msg)__always_unused,
+ void *arg)
+{
+ int *status = arg;
+
+ *status = 0;
+ return NL_STOP;
+}
+
+static int ovpn_nl_msg_send(struct nl_ctx *ctx, ovpn_nl_cb cb)
+{
+ int status = 1;
+
+ nl_cb_err(ctx->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &status);
+ nl_cb_set(ctx->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish,
+ &status);
+ nl_cb_set(ctx->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_ack, &status);
+
+ if (cb)
+ nl_cb_set(ctx->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, ctx);
+
+ nl_send_auto_complete(ctx->nl_sock, ctx->nl_msg);
+
+ while (status == 1)
+ ovpn_nl_recvmsgs(ctx);
+
+ if (status < 0)
+ fprintf(stderr, "failed to send netlink message: %s (%d)\n",
+ strerror(-status), status);
+
+ return status;
+}
+
+static int ovpn_parse_key(const char *file, struct ovpn_ctx *ctx)
+{
+ int idx_enc, idx_dec, ret = -1;
+ unsigned char *ckey = NULL;
+ __u8 *bkey = NULL;
+ size_t olen = 0;
+ long ckey_len;
+ FILE *fp;
+
+ fp = fopen(file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open: %s\n", file);
+ return -1;
+ }
+
+ /* get file size */
+ fseek(fp, 0L, SEEK_END);
+ ckey_len = ftell(fp);
+ rewind(fp);
+
+ /* if the file is longer, let's just read a portion */
+ if (ckey_len > 256)
+ ckey_len = 256;
+
+ ckey = malloc(ckey_len);
+ if (!ckey)
+ goto err;
+
+ ret = fread(ckey, 1, ckey_len, fp);
+ if (ret != ckey_len) {
+ fprintf(stderr,
+ "couldn't read enough data from key file: %dbytes read\n",
+ ret);
+ goto err;
+ }
+
+ olen = 0;
+ ret = mbedtls_base64_decode(NULL, 0, &olen, ckey, ckey_len);
+ if (ret != MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL) {
+ char buf[256];
+
+ mbedtls_strerror(ret, buf, sizeof(buf));
+ fprintf(stderr, "unexpected base64 error1: %s (%d)\n", buf,
+ ret);
+
+ goto err;
+ }
+
+ bkey = malloc(olen);
+ if (!bkey) {
+ fprintf(stderr, "cannot allocate binary key buffer\n");
+ goto err;
+ }
+
+ ret = mbedtls_base64_decode(bkey, olen, &olen, ckey, ckey_len);
+ if (ret) {
+ char buf[256];
+
+ mbedtls_strerror(ret, buf, sizeof(buf));
+ fprintf(stderr, "unexpected base64 error2: %s (%d)\n", buf,
+ ret);
+
+ goto err;
+ }
+
+ if (olen < 2 * KEY_LEN + NONCE_LEN) {
+ fprintf(stderr,
+ "not enough data in key file, found %zdB but needs %dB\n",
+ olen, 2 * KEY_LEN + NONCE_LEN);
+ goto err;
+ }
+
+ switch (ctx->key_dir) {
+ case KEY_DIR_IN:
+ idx_enc = 0;
+ idx_dec = 1;
+ break;
+ case KEY_DIR_OUT:
+ idx_enc = 1;
+ idx_dec = 0;
+ break;
+ default:
+ goto err;
+ }
+
+ memcpy(ctx->key_enc, bkey + KEY_LEN * idx_enc, KEY_LEN);
+ memcpy(ctx->key_dec, bkey + KEY_LEN * idx_dec, KEY_LEN);
+ memcpy(ctx->nonce, bkey + 2 * KEY_LEN, NONCE_LEN);
+
+ ret = 0;
+
+err:
+ fclose(fp);
+ free(bkey);
+ free(ckey);
+
+ return ret;
+}
+
+static int ovpn_parse_cipher(const char *cipher, struct ovpn_ctx *ctx)
+{
+ if (strcmp(cipher, "aes") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_AES_GCM;
+ else if (strcmp(cipher, "chachapoly") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_CHACHA20_POLY1305;
+ else if (strcmp(cipher, "none") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_NONE;
+ else
+ return -ENOTSUP;
+
+ return 0;
+}
+
+static int ovpn_parse_key_direction(const char *dir, struct ovpn_ctx *ctx)
+{
+ int in_dir;
+
+ in_dir = strtoll(dir, NULL, 10);
+ switch (in_dir) {
+ case KEY_DIR_IN:
+ case KEY_DIR_OUT:
+ ctx->key_dir = in_dir;
+ break;
+ default:
+ fprintf(stderr,
+ "invalid key direction provided. Can be 0 or 1 only\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto)
+{
+ struct sockaddr_storage local_sock = { 0 };
+ struct sockaddr_in6 *in6;
+ struct sockaddr_in *in;
+ int ret, s, sock_type;
+ size_t sock_len;
+
+ if (proto == IPPROTO_UDP)
+ sock_type = SOCK_DGRAM;
+ else if (proto == IPPROTO_TCP)
+ sock_type = SOCK_STREAM;
+ else
+ return -EINVAL;
+
+ s = socket(family, sock_type, 0);
+ if (s < 0) {
+ perror("cannot create socket");
+ return -1;
+ }
+
+ switch (family) {
+ case AF_INET:
+ in = (struct sockaddr_in *)&local_sock;
+ in->sin_family = family;
+ in->sin_port = htons(ctx->lport);
+ in->sin_addr.s_addr = htonl(INADDR_ANY);
+ sock_len = sizeof(*in);
+ break;
+ case AF_INET6:
+ in6 = (struct sockaddr_in6 *)&local_sock;
+ in6->sin6_family = family;
+ in6->sin6_port = htons(ctx->lport);
+ in6->sin6_addr = in6addr_any;
+ sock_len = sizeof(*in6);
+ break;
+ default:
+ return -1;
+ }
+
+ int opt = 1;
+
+ ret = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+ if (ret < 0) {
+ perror("setsockopt for SO_REUSEADDR");
+ return ret;
+ }
+
+ ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+ if (ret < 0) {
+ perror("setsockopt for SO_REUSEPORT");
+ return ret;
+ }
+
+ if (family == AF_INET6) {
+ opt = 0;
+ if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt,
+ sizeof(opt))) {
+ perror("failed to set IPV6_V6ONLY");
+ return -1;
+ }
+ }
+
+ ret = bind(s, (struct sockaddr *)&local_sock, sock_len);
+ if (ret < 0) {
+ perror("cannot bind socket");
+ goto err_socket;
+ }
+
+ ctx->socket = s;
+ ctx->sa_family = family;
+ return 0;
+
+err_socket:
+ close(s);
+ return -1;
+}
+
+static int ovpn_udp_socket(struct ovpn_ctx *ctx, sa_family_t family)
+{
+ return ovpn_socket(ctx, family, IPPROTO_UDP);
+}
+
+static int ovpn_listen(struct ovpn_ctx *ctx, sa_family_t family)
+{
+ int ret;
+
+ ret = ovpn_socket(ctx, family, IPPROTO_TCP);
+ if (ret < 0)
+ return ret;
+
+ ret = listen(ctx->socket, 10);
+ if (ret < 0) {
+ perror("listen");
+ close(ctx->socket);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ovpn_accept(struct ovpn_ctx *ctx)
+{
+ socklen_t socklen;
+ int ret;
+
+ socklen = sizeof(ctx->remote);
+ ret = accept(ctx->socket, (struct sockaddr *)&ctx->remote, &socklen);
+ if (ret < 0) {
+ perror("accept");
+ goto err;
+ }
+
+ fprintf(stderr, "Connection received!\n");
+
+ switch (socklen) {
+ case sizeof(struct sockaddr_in):
+ case sizeof(struct sockaddr_in6):
+ break;
+ default:
+ fprintf(stderr, "error: expecting IPv4 or IPv6 connection\n");
+ close(ret);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ return ret;
+err:
+ close(ctx->socket);
+ return ret;
+}
+
+static int ovpn_connect(struct ovpn_ctx *ovpn)
+{
+ socklen_t socklen;
+ int s, ret;
+
+ s = socket(ovpn->remote.in4.sin_family, SOCK_STREAM, 0);
+ if (s < 0) {
+ perror("cannot create socket");
+ return -1;
+ }
+
+ switch (ovpn->remote.in4.sin_family) {
+ case AF_INET:
+ socklen = sizeof(struct sockaddr_in);
+ break;
+ case AF_INET6:
+ socklen = sizeof(struct sockaddr_in6);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ ret = connect(s, (struct sockaddr *)&ovpn->remote, socklen);
+ if (ret < 0) {
+ perror("connect");
+ goto err;
+ }
+
+ fprintf(stderr, "connected\n");
+
+ ovpn->socket = s;
+
+ return 0;
+err:
+ close(s);
+ return ret;
+}
+
+static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_NEW);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket);
+
+ if (!is_tcp) {
+ switch (ovpn->remote.in4.sin_family) {
+ case AF_INET:
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV4,
+ ovpn->remote.in4.sin_addr.s_addr);
+ NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+ ovpn->remote.in4.sin_port);
+ break;
+ case AF_INET6:
+ NLA_PUT(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV6,
+ sizeof(ovpn->remote.in6.sin6_addr),
+ &ovpn->remote.in6.sin6_addr);
+ NLA_PUT_U32(ctx->nl_msg,
+ OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID,
+ ovpn->remote.in6.sin6_scope_id);
+ NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+ ovpn->remote.in6.sin6_port);
+ break;
+ default:
+ fprintf(stderr,
+ "Invalid family for remote socket address\n");
+ goto nla_put_failure;
+ }
+ }
+
+ if (ovpn->peer_ip_set) {
+ switch (ovpn->peer_ip.in4.sin_family) {
+ case AF_INET:
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_VPN_IPV4,
+ ovpn->peer_ip.in4.sin_addr.s_addr);
+ break;
+ case AF_INET6:
+ NLA_PUT(ctx->nl_msg, OVPN_A_PEER_VPN_IPV6,
+ sizeof(struct in6_addr),
+ &ovpn->peer_ip.in6.sin6_addr);
+ break;
+ default:
+ fprintf(stderr, "Invalid family for peer address\n");
+ goto nla_put_failure;
+ }
+ }
+
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_set_peer(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_SET);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_INTERVAL,
+ ovpn->keepalive_interval);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_TIMEOUT,
+ ovpn->keepalive_timeout);
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_del_peer(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_DEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused)
+{
+ struct nlattr *pattrs[OVPN_A_PEER_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+ __u16 rport = 0, lport = 0;
+
+ nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!attrs[OVPN_A_PEER]) {
+ fprintf(stderr, "no packet content in netlink message\n");
+ return NL_SKIP;
+ }
+
+ nla_parse(pattrs, OVPN_A_PEER_MAX, nla_data(attrs[OVPN_A_PEER]),
+ nla_len(attrs[OVPN_A_PEER]), NULL);
+
+ if (pattrs[OVPN_A_PEER_ID])
+ fprintf(stderr, "* Peer %u\n",
+ nla_get_u32(pattrs[OVPN_A_PEER_ID]));
+
+ if (pattrs[OVPN_A_PEER_SOCKET_NETNSID])
+ fprintf(stderr, "\tsocket NetNS ID: %d\n",
+ nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID]));
+
+ if (pattrs[OVPN_A_PEER_VPN_IPV4]) {
+ char buf[INET_ADDRSTRLEN];
+
+ inet_ntop(AF_INET, nla_data(pattrs[OVPN_A_PEER_VPN_IPV4]),
+ buf, sizeof(buf));
+ fprintf(stderr, "\tVPN IPv4: %s\n", buf);
+ }
+
+ if (pattrs[OVPN_A_PEER_VPN_IPV6]) {
+ char buf[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6, nla_data(pattrs[OVPN_A_PEER_VPN_IPV6]),
+ buf, sizeof(buf));
+ fprintf(stderr, "\tVPN IPv6: %s\n", buf);
+ }
+
+ if (pattrs[OVPN_A_PEER_LOCAL_PORT])
+ lport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_LOCAL_PORT]));
+
+ if (pattrs[OVPN_A_PEER_REMOTE_PORT])
+ rport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_REMOTE_PORT]));
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV6]) {
+ void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV6];
+ char buf[INET6_ADDRSTRLEN];
+ int scope_id = -1;
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) {
+ void *p = pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID];
+
+ scope_id = nla_get_u32(p);
+ }
+
+ inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tRemote: %s:%hu (scope-id: %u)\n", buf, rport,
+ scope_id);
+
+ if (pattrs[OVPN_A_PEER_LOCAL_IPV6]) {
+ void *ip = pattrs[OVPN_A_PEER_LOCAL_IPV6];
+
+ inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+ }
+ }
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV4]) {
+ void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV4];
+ char buf[INET_ADDRSTRLEN];
+
+ inet_ntop(AF_INET, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tRemote: %s:%hu\n", buf, rport);
+
+ if (pattrs[OVPN_A_PEER_LOCAL_IPV4]) {
+ void *p = pattrs[OVPN_A_PEER_LOCAL_IPV4];
+
+ inet_ntop(AF_INET, nla_data(p), buf, sizeof(buf));
+ fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+ }
+ }
+
+ if (pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]) {
+ void *p = pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL];
+
+ fprintf(stderr, "\tKeepalive interval: %u sec\n",
+ nla_get_u32(p));
+ }
+
+ if (pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])
+ fprintf(stderr, "\tKeepalive timeout: %u sec\n",
+ nla_get_u32(pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]));
+
+ if (pattrs[OVPN_A_PEER_VPN_RX_BYTES])
+ fprintf(stderr, "\tVPN RX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_VPN_TX_BYTES])
+ fprintf(stderr, "\tVPN TX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_VPN_RX_PACKETS])
+ fprintf(stderr, "\tVPN RX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_VPN_TX_PACKETS])
+ fprintf(stderr, "\tVPN TX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_LINK_RX_BYTES])
+ fprintf(stderr, "\tLINK RX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_LINK_TX_BYTES])
+ fprintf(stderr, "\tLINK TX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_LINK_RX_PACKETS])
+ fprintf(stderr, "\tLINK RX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_LINK_TX_PACKETS])
+ fprintf(stderr, "\tLINK TX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_PACKETS]));
+
+ return NL_SKIP;
+}
+
+static int ovpn_get_peer(struct ovpn_ctx *ovpn)
+{
+ int flags = 0, ret = -1;
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+
+ if (ovpn->peer_id == PEER_ID_UNDEF)
+ flags = NLM_F_DUMP;
+
+ ctx = nl_ctx_alloc_flags(ovpn, OVPN_CMD_PEER_GET, flags);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (ovpn->peer_id != PEER_ID_UNDEF) {
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, attr);
+ }
+
+ ret = ovpn_nl_msg_send(ctx, ovpn_handle_peer);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_new_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf, *key_dir;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_NEW);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_KEY_ID, ovpn->key_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_CIPHER_ALG, ovpn->cipher);
+
+ key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_ENCRYPT_DIR);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_enc);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+ nla_nest_end(ctx->nl_msg, key_dir);
+
+ key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_DECRYPT_DIR);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_dec);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+ nla_nest_end(ctx->nl_msg, key_dir);
+
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_del_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_DEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_handle_key(struct nl_msg *msg, void (*arg)__always_unused)
+{
+ struct nlattr *kattrs[OVPN_A_KEYCONF_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+
+ nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!attrs[OVPN_A_KEYCONF]) {
+ fprintf(stderr, "no packet content in netlink message\n");
+ return NL_SKIP;
+ }
+
+ nla_parse(kattrs, OVPN_A_KEYCONF_MAX, nla_data(attrs[OVPN_A_KEYCONF]),
+ nla_len(attrs[OVPN_A_KEYCONF]), NULL);
+
+ if (kattrs[OVPN_A_KEYCONF_PEER_ID])
+ fprintf(stderr, "* Peer %u\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_PEER_ID]));
+ if (kattrs[OVPN_A_KEYCONF_SLOT]) {
+ fprintf(stderr, "\t- Slot: ");
+ switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])) {
+ case OVPN_KEY_SLOT_PRIMARY:
+ fprintf(stderr, "primary\n");
+ break;
+ case OVPN_KEY_SLOT_SECONDARY:
+ fprintf(stderr, "secondary\n");
+ break;
+ default:
+ fprintf(stderr, "invalid (%u)\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT]));
+ break;
+ }
+ }
+ if (kattrs[OVPN_A_KEYCONF_KEY_ID])
+ fprintf(stderr, "\t- Key ID: %u\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_KEY_ID]));
+ if (kattrs[OVPN_A_KEYCONF_CIPHER_ALG]) {
+ fprintf(stderr, "\t- Cipher: ");
+ switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])) {
+ case OVPN_CIPHER_ALG_NONE:
+ fprintf(stderr, "none\n");
+ break;
+ case OVPN_CIPHER_ALG_AES_GCM:
+ fprintf(stderr, "aes-gcm\n");
+ break;
+ case OVPN_CIPHER_ALG_CHACHA20_POLY1305:
+ fprintf(stderr, "chacha20poly1305\n");
+ break;
+ default:
+ fprintf(stderr, "invalid (%u)\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG]));
+ break;
+ }
+ }
+
+ return NL_SKIP;
+}
+
+static int ovpn_get_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_GET);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, ovpn_handle_key);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_swap_keys(struct ovpn_ctx *ovpn)
+{
+ struct nl_ctx *ctx;
+ struct nlattr *kc;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_SWAP);
+ if (!ctx)
+ return -ENOMEM;
+
+ kc = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, kc);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+/* Helper function used to easily add attributes to a rtnl message */
+static int ovpn_addattr(struct nlmsghdr *n, int maxlen, int type,
+ const void *data, int alen)
+{
+ int len = RTA_LENGTH(alen);
+ struct rtattr *rta;
+
+ if ((int)(NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len)) > maxlen) {
+ fprintf(stderr, "%s: rtnl: message exceeded bound of %d\n",
+ __func__, maxlen);
+ return -EMSGSIZE;
+ }
+
+ rta = nlmsg_tail(n);
+ rta->rta_type = type;
+ rta->rta_len = len;
+
+ if (!data)
+ memset(RTA_DATA(rta), 0, alen);
+ else
+ memcpy(RTA_DATA(rta), data, alen);
+
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+
+ return 0;
+}
+
+static struct rtattr *ovpn_nest_start(struct nlmsghdr *msg, size_t max_size,
+ int attr)
+{
+ struct rtattr *nest = nlmsg_tail(msg);
+
+ if (ovpn_addattr(msg, max_size, attr, NULL, 0) < 0)
+ return NULL;
+
+ return nest;
+}
+
+static void ovpn_nest_end(struct nlmsghdr *msg, struct rtattr *nest)
+{
+ nest->rta_len = (uint8_t *)nlmsg_tail(msg) - (uint8_t *)nest;
+}
+
+#define RT_SNDBUF_SIZE (1024 * 2)
+#define RT_RCVBUF_SIZE (1024 * 4)
+
+/* Open RTNL socket */
+static int ovpn_rt_socket(void)
+{
+ int sndbuf = RT_SNDBUF_SIZE, rcvbuf = RT_RCVBUF_SIZE, fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (fd < 0) {
+ fprintf(stderr, "%s: cannot open netlink socket\n", __func__);
+ return fd;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf,
+ sizeof(sndbuf)) < 0) {
+ fprintf(stderr, "%s: SO_SNDBUF\n", __func__);
+ close(fd);
+ return -1;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf,
+ sizeof(rcvbuf)) < 0) {
+ fprintf(stderr, "%s: SO_RCVBUF\n", __func__);
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+/* Bind socket to Netlink subsystem */
+static int ovpn_rt_bind(int fd, uint32_t groups)
+{
+ struct sockaddr_nl local = { 0 };
+ socklen_t addr_len;
+
+ local.nl_family = AF_NETLINK;
+ local.nl_groups = groups;
+
+ if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
+ fprintf(stderr, "%s: cannot bind netlink socket: %d\n",
+ __func__, errno);
+ return -errno;
+ }
+
+ addr_len = sizeof(local);
+ if (getsockname(fd, (struct sockaddr *)&local, &addr_len) < 0) {
+ fprintf(stderr, "%s: cannot getsockname: %d\n", __func__,
+ errno);
+ return -errno;
+ }
+
+ if (addr_len != sizeof(local)) {
+ fprintf(stderr, "%s: wrong address length %d\n", __func__,
+ addr_len);
+ return -EINVAL;
+ }
+
+ if (local.nl_family != AF_NETLINK) {
+ fprintf(stderr, "%s: wrong address family %d\n", __func__,
+ local.nl_family);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+typedef int (*ovpn_parse_reply_cb)(struct nlmsghdr *msg, void *arg);
+
+/* Send Netlink message and run callback on reply (if specified) */
+static int ovpn_rt_send(struct nlmsghdr *payload, pid_t peer,
+ unsigned int groups, ovpn_parse_reply_cb cb,
+ void *arg_cb)
+{
+ int len, rem_len, fd, ret, rcv_len;
+ struct sockaddr_nl nladdr = { 0 };
+ struct nlmsgerr *err;
+ struct nlmsghdr *h;
+ char buf[1024 * 16];
+ struct iovec iov = {
+ .iov_base = payload,
+ .iov_len = payload->nlmsg_len,
+ };
+ struct msghdr nlmsg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ nladdr.nl_family = AF_NETLINK;
+ nladdr.nl_pid = peer;
+ nladdr.nl_groups = groups;
+
+ payload->nlmsg_seq = time(NULL);
+
+ /* no need to send reply */
+ if (!cb)
+ payload->nlmsg_flags |= NLM_F_ACK;
+
+ fd = ovpn_rt_socket();
+ if (fd < 0) {
+ fprintf(stderr, "%s: can't open rtnl socket\n", __func__);
+ return -errno;
+ }
+
+ ret = ovpn_rt_bind(fd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: can't bind rtnl socket\n", __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ ret = sendmsg(fd, &nlmsg, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: rtnl: error on sendmsg()\n", __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ /* prepare buffer to store RTNL replies */
+ memset(buf, 0, sizeof(buf));
+ iov.iov_base = buf;
+
+ while (1) {
+ /*
+ * iov_len is modified by recvmsg(), therefore has to be initialized before
+ * using it again
+ */
+ iov.iov_len = sizeof(buf);
+ rcv_len = recvmsg(fd, &nlmsg, 0);
+ if (rcv_len < 0) {
+ if (errno == EINTR || errno == EAGAIN) {
+ fprintf(stderr, "%s: interrupted call\n",
+ __func__);
+ continue;
+ }
+ fprintf(stderr, "%s: rtnl: error on recvmsg()\n",
+ __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ if (rcv_len == 0) {
+ fprintf(stderr,
+ "%s: rtnl: socket reached unexpected EOF\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (nlmsg.msg_namelen != sizeof(nladdr)) {
+ fprintf(stderr,
+ "%s: sender address length: %u (expected %zu)\n",
+ __func__, nlmsg.msg_namelen, sizeof(nladdr));
+ ret = -EIO;
+ goto out;
+ }
+
+ h = (struct nlmsghdr *)buf;
+ while (rcv_len >= (int)sizeof(*h)) {
+ len = h->nlmsg_len;
+ rem_len = len - sizeof(*h);
+
+ if (rem_len < 0 || len > rcv_len) {
+ if (nlmsg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "%s: truncated message\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+ fprintf(stderr, "%s: malformed message: len=%d\n",
+ __func__, len);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (h->nlmsg_type == NLMSG_DONE) {
+ ret = 0;
+ goto out;
+ }
+
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ err = (struct nlmsgerr *)NLMSG_DATA(h);
+ if (rem_len < (int)sizeof(struct nlmsgerr)) {
+ fprintf(stderr, "%s: ERROR truncated\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (err->error) {
+ fprintf(stderr, "%s: (%d) %s\n",
+ __func__, err->error,
+ strerror(-err->error));
+ ret = err->error;
+ goto out;
+ }
+
+ ret = 0;
+ if (cb) {
+ int r = cb(h, arg_cb);
+
+ if (r <= 0)
+ ret = r;
+ }
+ goto out;
+ }
+
+ if (cb) {
+ int r = cb(h, arg_cb);
+
+ if (r <= 0) {
+ ret = r;
+ goto out;
+ }
+ } else {
+ fprintf(stderr, "%s: RTNL: unexpected reply\n",
+ __func__);
+ }
+
+ rcv_len -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr *)((uint8_t *)h +
+ NLMSG_ALIGN(len));
+ }
+
+ if (nlmsg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "%s: message truncated\n", __func__);
+ continue;
+ }
+
+ if (rcv_len) {
+ fprintf(stderr, "%s: rtnl: %d not parsed bytes\n",
+ __func__, rcv_len);
+ ret = -1;
+ goto out;
+ }
+ }
+out:
+ close(fd);
+
+ return ret;
+}
+
+struct ovpn_link_req {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[256];
+};
+
+static int ovpn_new_iface(struct ovpn_ctx *ovpn)
+{
+ struct rtattr *linkinfo, *data;
+ struct ovpn_link_req req = { 0 };
+ int ret = -1;
+
+ fprintf(stdout, "Creating interface %s with mode %u\n", ovpn->ifname,
+ ovpn->mode);
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ req.n.nlmsg_type = RTM_NEWLINK;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_IFNAME, ovpn->ifname,
+ strlen(ovpn->ifname) + 1) < 0)
+ goto err;
+
+ linkinfo = ovpn_nest_start(&req.n, sizeof(req), IFLA_LINKINFO);
+ if (!linkinfo)
+ goto err;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_INFO_KIND, OVPN_FAMILY_NAME,
+ strlen(OVPN_FAMILY_NAME) + 1) < 0)
+ goto err;
+
+ if (ovpn->mode_set) {
+ data = ovpn_nest_start(&req.n, sizeof(req), IFLA_INFO_DATA);
+ if (!data)
+ goto err;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_OVPN_MODE,
+ &ovpn->mode, sizeof(uint8_t)) < 0)
+ goto err;
+
+ ovpn_nest_end(&req.n, data);
+ }
+
+ ovpn_nest_end(&req.n, linkinfo);
+
+ req.i.ifi_family = AF_PACKET;
+
+ ret = ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+err:
+ return ret;
+}
+
+static int ovpn_del_iface(struct ovpn_ctx *ovpn)
+{
+ struct ovpn_link_req req = { 0 };
+
+ fprintf(stdout, "Deleting interface %s ifindex %u\n", ovpn->ifname,
+ ovpn->ifindex);
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+ req.n.nlmsg_flags = NLM_F_REQUEST;
+ req.n.nlmsg_type = RTM_DELLINK;
+
+ req.i.ifi_family = AF_PACKET;
+ req.i.ifi_index = ovpn->ifindex;
+
+ return ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+}
+
+static int nl_seq_check(struct nl_msg (*msg)__always_unused,
+ void (*arg)__always_unused)
+{
+ return NL_OK;
+}
+
+struct mcast_handler_args {
+ const char *group;
+ int id;
+};
+
+static int mcast_family_handler(struct nl_msg *msg, void *arg)
+{
+ struct mcast_handler_args *grp = arg;
+ struct nlattr *tb[CTRL_ATTR_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *mcgrp;
+ int rem_mcgrp;
+
+ nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!tb[CTRL_ATTR_MCAST_GROUPS])
+ return NL_SKIP;
+
+ nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) {
+ struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1];
+
+ nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX,
+ nla_data(mcgrp), nla_len(mcgrp), NULL);
+
+ if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] ||
+ !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID])
+ continue;
+ if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]),
+ grp->group, nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME])))
+ continue;
+ grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]);
+ break;
+ }
+
+ return NL_SKIP;
+}
+
+static int mcast_error_handler(struct sockaddr_nl (*nla)__always_unused,
+ struct nlmsgerr *err, void *arg)
+{
+ int *ret = arg;
+
+ *ret = err->error;
+ return NL_STOP;
+}
+
+static int mcast_ack_handler(struct nl_msg (*msg)__always_unused, void *arg)
+{
+ int *ret = arg;
+
+ *ret = 0;
+ return NL_STOP;
+}
+
+static int ovpn_handle_msg(struct nl_msg *msg, void *arg)
+{
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+ struct nlmsghdr *nlh = nlmsg_hdr(msg);
+ char ifname[IF_NAMESIZE];
+ int *ret = arg;
+ __u32 ifindex;
+
+ fprintf(stderr, "received message from ovpn-dco\n");
+
+ *ret = -1;
+
+ if (!genlmsg_valid_hdr(nlh, 0)) {
+ fprintf(stderr, "invalid header\n");
+ return NL_STOP;
+ }
+
+ if (nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL)) {
+ fprintf(stderr, "received bogus data from ovpn-dco\n");
+ return NL_STOP;
+ }
+
+ if (!attrs[OVPN_A_IFINDEX]) {
+ fprintf(stderr, "no ifindex in this message\n");
+ return NL_STOP;
+ }
+
+ ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]);
+ if (!if_indextoname(ifindex, ifname)) {
+ fprintf(stderr, "cannot resolve ifname for ifindex: %u\n",
+ ifindex);
+ return NL_STOP;
+ }
+
+ switch (gnlh->cmd) {
+ case OVPN_CMD_PEER_DEL_NTF:
+ fprintf(stdout, "received CMD_PEER_DEL_NTF\n");
+ break;
+ case OVPN_CMD_KEY_SWAP_NTF:
+ fprintf(stdout, "received CMD_KEY_SWAP_NTF\n");
+ break;
+ default:
+ fprintf(stderr, "received unknown command: %d\n", gnlh->cmd);
+ return NL_STOP;
+ }
+
+ *ret = 0;
+ return NL_OK;
+}
+
+static int ovpn_get_mcast_id(struct nl_sock *sock, const char *family,
+ const char *group)
+{
+ struct nl_msg *msg;
+ struct nl_cb *cb;
+ int ret, ctrlid;
+ struct mcast_handler_args grp = {
+ .group = group,
+ .id = -ENOENT,
+ };
+
+ msg = nlmsg_alloc();
+ if (!msg)
+ return -ENOMEM;
+
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!cb) {
+ ret = -ENOMEM;
+ goto out_fail_cb;
+ }
+
+ ctrlid = genl_ctrl_resolve(sock, "nlctrl");
+
+ genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0);
+
+ ret = -ENOBUFS;
+ NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+ ret = nl_send_auto_complete(sock, msg);
+ if (ret < 0)
+ goto nla_put_failure;
+
+ ret = 1;
+
+ nl_cb_err(cb, NL_CB_CUSTOM, mcast_error_handler, &ret);
+ nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, mcast_ack_handler, &ret);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, mcast_family_handler, &grp);
+
+ while (ret > 0)
+ nl_recvmsgs(sock, cb);
+
+ if (ret == 0)
+ ret = grp.id;
+ nla_put_failure:
+ nl_cb_put(cb);
+ out_fail_cb:
+ nlmsg_free(msg);
+ return ret;
+}
+
+static int ovpn_listen_mcast(void)
+{
+ struct nl_sock *sock;
+ struct nl_cb *cb;
+ int mcid, ret;
+
+ sock = nl_socket_alloc();
+ if (!sock) {
+ fprintf(stderr, "cannot allocate netlink socket\n");
+ goto err_free;
+ }
+
+ nl_socket_set_buffer_size(sock, 8192, 8192);
+
+ ret = genl_connect(sock);
+ if (ret < 0) {
+ fprintf(stderr, "cannot connect to generic netlink: %s\n",
+ nl_geterror(ret));
+ goto err_free;
+ }
+
+ mcid = ovpn_get_mcast_id(sock, OVPN_FAMILY_NAME, OVPN_MCGRP_PEERS);
+ if (mcid < 0) {
+ fprintf(stderr, "cannot get mcast group: %s\n",
+ nl_geterror(mcid));
+ goto err_free;
+ }
+
+ ret = nl_socket_add_membership(sock, mcid);
+ if (ret) {
+ fprintf(stderr, "failed to join mcast group: %d\n", ret);
+ goto err_free;
+ }
+
+ ret = 1;
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check, NULL);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, &ret);
+ nl_cb_err(cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &ret);
+
+ while (ret == 1) {
+ int err = nl_recvmsgs(sock, cb);
+
+ if (err < 0) {
+ fprintf(stderr,
+ "cannot receive netlink message: (%d) %s\n",
+ err, nl_geterror(-err));
+ ret = -1;
+ break;
+ }
+ }
+
+ nl_cb_put(cb);
+err_free:
+ nl_socket_free(sock);
+ return ret;
+}
+
+static void usage(const char *cmd)
+{
+ fprintf(stderr,
+ "Usage %s <command> <iface> [arguments..]\n",
+ cmd);
+ fprintf(stderr, "where <command> can be one of the following\n\n");
+
+ fprintf(stderr, "* new_iface <iface> [mode]: create new ovpn interface\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tmode:\n");
+ fprintf(stderr, "\t\t- P2P for peer-to-peer mode (i.e. client)\n");
+ fprintf(stderr, "\t\t- MP for multi-peer mode (i.e. server)\n");
+
+ fprintf(stderr, "* del_iface <iface>: delete ovpn interface\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+
+ fprintf(stderr,
+ "* listen <iface> <lport> <peers_file> [ipv6]: listen for incoming peer TCP connections\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: TCP port to listen to\n");
+ fprintf(stderr,
+ "\tpeers_file: file containing one peer per line: Line format:\n");
+ fprintf(stderr, "\t\t<peer_id> <vpnaddr>\n");
+ fprintf(stderr,
+ "\tipv6: whether the socket should listen to the IPv6 wildcard address\n");
+
+ fprintf(stderr,
+ "* connect <iface> <peer_id> <raddr> <rport> [key_file]: start connecting peer of TCP-based VPN session\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the connecting peer\n");
+ fprintf(stderr, "\traddr: peer IP address to connect to\n");
+ fprintf(stderr, "\trport: peer TCP port to connect to\n");
+ fprintf(stderr,
+ "\tkey_file: file containing the symmetric key for encryption\n");
+
+ fprintf(stderr,
+ "* new_peer <iface> <peer_id> <lport> <raddr> <rport> [vpnaddr]: add new peer\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: local UDP port to bind to\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID to be used in data packets to/from this peer\n");
+ fprintf(stderr, "\traddr: peer IP address\n");
+ fprintf(stderr, "\trport: peer UDP port\n");
+ fprintf(stderr, "\tvpnaddr: peer VPN IP\n");
+
+ fprintf(stderr,
+ "* new_multi_peer <iface> <lport> <peers_file>: add multiple peers as listed in the file\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: local UDP port to bind to\n");
+ fprintf(stderr,
+ "\tpeers_file: text file containing one peer per line. Line format:\n");
+ fprintf(stderr, "\t\t<peer_id> <raddr> <rport> <vpnaddr>\n");
+
+ fprintf(stderr,
+ "* set_peer <iface> <peer_id> <keepalive_interval> <keepalive_timeout>: set peer attributes\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+ fprintf(stderr,
+ "\tkeepalive_interval: interval for sending ping messages\n");
+ fprintf(stderr,
+ "\tkeepalive_timeout: time after which a peer is timed out\n");
+
+ fprintf(stderr, "* del_peer <iface> <peer_id>: delete peer\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to delete\n");
+
+ fprintf(stderr, "* get_peer <iface> [peer_id]: retrieve peer(s) status\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID of the peer to query. All peers are returned if omitted\n");
+
+ fprintf(stderr,
+ "* new_key <iface> <peer_id> <slot> <key_id> <cipher> <key_dir> <key_file>: set data channel key\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID of the peer to configure the key for\n");
+ fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+ fprintf(stderr, "\tkey_id: an ID from 0 to 7\n");
+ fprintf(stderr,
+ "\tcipher: cipher to use, supported: aes (AES-GCM), chachapoly (CHACHA20POLY1305)\n");
+ fprintf(stderr,
+ "\tkey_dir: key direction, must 0 on one host and 1 on the other\n");
+ fprintf(stderr, "\tkey_file: file containing the pre-shared key\n");
+
+ fprintf(stderr,
+ "* del_key <iface> <peer_id> [slot]: erase existing data channel key\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+ fprintf(stderr, "\tslot: slot to erase. PRIMARY if omitted\n");
+
+ fprintf(stderr,
+ "* get_key <iface> <peer_id> <slot>: retrieve non sensible key data\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to query\n");
+ fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+
+ fprintf(stderr,
+ "* swap_keys <iface> <peer_id>: swap content of primary and secondary key slots\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+
+ fprintf(stderr,
+ "* listen_mcast: listen to ovpn netlink multicast messages\n");
+}
+
+static int ovpn_parse_remote(struct ovpn_ctx *ovpn, const char *host,
+ const char *service, const char *vpnip)
+{
+ int ret;
+ struct addrinfo *result;
+ struct addrinfo hints = {
+ .ai_family = ovpn->sa_family,
+ .ai_socktype = SOCK_DGRAM,
+ .ai_protocol = IPPROTO_UDP
+ };
+
+ if (host) {
+ ret = getaddrinfo(host, service, &hints, &result);
+ if (ret == EAI_NONAME || ret == EAI_FAIL)
+ return -1;
+
+ if (!(result->ai_family == AF_INET &&
+ result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+ !(result->ai_family == AF_INET6 &&
+ result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&ovpn->remote, result->ai_addr, result->ai_addrlen);
+ }
+
+ if (vpnip) {
+ ret = getaddrinfo(vpnip, NULL, &hints, &result);
+ if (ret == EAI_NONAME || ret == EAI_FAIL)
+ return -1;
+
+ if (!(result->ai_family == AF_INET &&
+ result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+ !(result->ai_family == AF_INET6 &&
+ result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&ovpn->peer_ip, result->ai_addr, result->ai_addrlen);
+ ovpn->sa_family = result->ai_family;
+
+ ovpn->peer_ip_set = true;
+ }
+
+ ret = 0;
+out:
+ freeaddrinfo(result);
+ return ret;
+}
+
+static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id,
+ const char *raddr, const char *rport,
+ const char *vpnip)
+{
+ ovpn->peer_id = strtoul(peer_id, NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ return ovpn_parse_remote(ovpn, raddr, rport, vpnip);
+}
+
+static int ovpn_parse_key_slot(const char *arg, struct ovpn_ctx *ovpn)
+{
+ int slot = strtoul(arg, NULL, 10);
+
+ if (errno == ERANGE || slot < 1 || slot > 2) {
+ fprintf(stderr, "key slot out of range\n");
+ return -1;
+ }
+
+ switch (slot) {
+ case 1:
+ ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+ break;
+ case 2:
+ ovpn->key_slot = OVPN_KEY_SLOT_SECONDARY;
+ break;
+ }
+
+ return 0;
+}
+
+static int ovpn_send_tcp_data(int socket)
+{
+ uint16_t len = htons(1000);
+ uint8_t buf[1002];
+ int ret;
+
+ memcpy(buf, &len, sizeof(len));
+ memset(buf + sizeof(len), 0x86, sizeof(buf) - sizeof(len));
+
+ ret = send(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+ fprintf(stdout, "Sent %u bytes over TCP socket\n", ret);
+
+ return ret > 0 ? 0 : ret;
+}
+
+static int ovpn_recv_tcp_data(int socket)
+{
+ uint8_t buf[1002];
+ uint16_t len;
+ int ret;
+
+ ret = recv(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+ if (ret < 2) {
+ fprintf(stderr, ">>>> Error while reading TCP data: %d\n", ret);
+ return ret;
+ }
+
+ memcpy(&len, buf, sizeof(len));
+ len = ntohs(len);
+
+ fprintf(stdout, ">>>> Received %u bytes over TCP socket, header: %u\n",
+ ret, len);
+
+ return 0;
+}
+
+static enum ovpn_cmd ovpn_parse_cmd(const char *cmd)
+{
+ if (!strcmp(cmd, "new_iface"))
+ return CMD_NEW_IFACE;
+
+ if (!strcmp(cmd, "del_iface"))
+ return CMD_DEL_IFACE;
+
+ if (!strcmp(cmd, "listen"))
+ return CMD_LISTEN;
+
+ if (!strcmp(cmd, "connect"))
+ return CMD_CONNECT;
+
+ if (!strcmp(cmd, "new_peer"))
+ return CMD_NEW_PEER;
+
+ if (!strcmp(cmd, "new_multi_peer"))
+ return CMD_NEW_MULTI_PEER;
+
+ if (!strcmp(cmd, "set_peer"))
+ return CMD_SET_PEER;
+
+ if (!strcmp(cmd, "del_peer"))
+ return CMD_DEL_PEER;
+
+ if (!strcmp(cmd, "get_peer"))
+ return CMD_GET_PEER;
+
+ if (!strcmp(cmd, "new_key"))
+ return CMD_NEW_KEY;
+
+ if (!strcmp(cmd, "del_key"))
+ return CMD_DEL_KEY;
+
+ if (!strcmp(cmd, "get_key"))
+ return CMD_GET_KEY;
+
+ if (!strcmp(cmd, "swap_keys"))
+ return CMD_SWAP_KEYS;
+
+ if (!strcmp(cmd, "listen_mcast"))
+ return CMD_LISTEN_MCAST;
+
+ return CMD_INVALID;
+}
+
+/* Send process to background and waits for signal.
+ *
+ * This helper is called at the end of commands
+ * creating sockets, so that the latter stay alive
+ * along with the process that created them.
+ *
+ * A signal is expected to be delivered in order to
+ * terminate the waiting processes
+ */
+static void ovpn_waitbg(void)
+{
+ daemon(1, 1);
+ pause();
+}
+
+static int ovpn_run_cmd(struct ovpn_ctx *ovpn)
+{
+ char peer_id[10], vpnip[INET6_ADDRSTRLEN], raddr[128], rport[10];
+ int n, ret;
+ FILE *fp;
+
+ switch (ovpn->cmd) {
+ case CMD_NEW_IFACE:
+ ret = ovpn_new_iface(ovpn);
+ break;
+ case CMD_DEL_IFACE:
+ ret = ovpn_del_iface(ovpn);
+ break;
+ case CMD_LISTEN:
+ ret = ovpn_listen(ovpn, ovpn->sa_family);
+ if (ret < 0) {
+ fprintf(stderr, "cannot listen on TCP socket\n");
+ return ret;
+ }
+
+ fp = fopen(ovpn->peers_file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open file: %s\n",
+ ovpn->peers_file);
+ return -1;
+ }
+
+ int num_peers = 0;
+
+ while ((n = fscanf(fp, "%s %s\n", peer_id, vpnip)) == 2) {
+ struct ovpn_ctx peer_ctx = { 0 };
+
+ if (num_peers == MAX_PEERS) {
+ fprintf(stderr, "max peers reached!\n");
+ return -E2BIG;
+ }
+
+ peer_ctx.ifindex = ovpn->ifindex;
+ peer_ctx.sa_family = ovpn->sa_family;
+
+ peer_ctx.socket = ovpn_accept(ovpn);
+ if (peer_ctx.socket < 0) {
+ fprintf(stderr, "cannot accept connection!\n");
+ return -1;
+ }
+
+ /* store peer sockets to test TCP I/O */
+ ovpn->cli_sockets[num_peers] = peer_ctx.socket;
+
+ ret = ovpn_parse_new_peer(&peer_ctx, peer_id, NULL,
+ NULL, vpnip);
+ if (ret < 0) {
+ fprintf(stderr, "error while parsing line\n");
+ return -1;
+ }
+
+ ret = ovpn_new_peer(&peer_ctx, true);
+ if (ret < 0) {
+ fprintf(stderr,
+ "cannot add peer to VPN: %s %s\n",
+ peer_id, vpnip);
+ return ret;
+ }
+ num_peers++;
+ }
+
+ for (int i = 0; i < num_peers; i++) {
+ ret = ovpn_recv_tcp_data(ovpn->cli_sockets[i]);
+ if (ret < 0)
+ break;
+ }
+ ovpn_waitbg();
+ break;
+ case CMD_CONNECT:
+ ret = ovpn_connect(ovpn);
+ if (ret < 0) {
+ fprintf(stderr, "cannot connect TCP socket\n");
+ return ret;
+ }
+
+ ret = ovpn_new_peer(ovpn, true);
+ if (ret < 0) {
+ fprintf(stderr, "cannot add peer to VPN\n");
+ close(ovpn->socket);
+ return ret;
+ }
+
+ if (ovpn->cipher != OVPN_CIPHER_ALG_NONE) {
+ ret = ovpn_new_key(ovpn);
+ if (ret < 0) {
+ fprintf(stderr, "cannot set key\n");
+ return ret;
+ }
+ }
+
+ ret = ovpn_send_tcp_data(ovpn->socket);
+ ovpn_waitbg();
+ break;
+ case CMD_NEW_PEER:
+ ret = ovpn_udp_socket(ovpn, AF_INET6);
+ if (ret < 0)
+ return ret;
+
+ ret = ovpn_new_peer(ovpn, false);
+ ovpn_waitbg();
+ break;
+ case CMD_NEW_MULTI_PEER:
+ ret = ovpn_udp_socket(ovpn, AF_INET6);
+ if (ret < 0)
+ return ret;
+
+ fp = fopen(ovpn->peers_file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open file: %s\n",
+ ovpn->peers_file);
+ return -1;
+ }
+
+ while ((n = fscanf(fp, "%s %s %s %s\n", peer_id, raddr, rport,
+ vpnip)) == 4) {
+ struct ovpn_ctx peer_ctx = { 0 };
+
+ peer_ctx.ifindex = ovpn->ifindex;
+ peer_ctx.socket = ovpn->socket;
+ peer_ctx.sa_family = AF_UNSPEC;
+
+ ret = ovpn_parse_new_peer(&peer_ctx, peer_id, raddr,
+ rport, vpnip);
+ if (ret < 0) {
+ fprintf(stderr, "error while parsing line\n");
+ return -1;
+ }
+
+ ret = ovpn_new_peer(&peer_ctx, false);
+ if (ret < 0) {
+ fprintf(stderr,
+ "cannot add peer to VPN: %s %s %s %s\n",
+ peer_id, raddr, rport, vpnip);
+ return ret;
+ }
+ }
+ ovpn_waitbg();
+ break;
+ case CMD_SET_PEER:
+ ret = ovpn_set_peer(ovpn);
+ break;
+ case CMD_DEL_PEER:
+ ret = ovpn_del_peer(ovpn);
+ break;
+ case CMD_GET_PEER:
+ if (ovpn->peer_id == PEER_ID_UNDEF)
+ fprintf(stderr, "List of peers connected to: %s\n",
+ ovpn->ifname);
+
+ ret = ovpn_get_peer(ovpn);
+ break;
+ case CMD_NEW_KEY:
+ ret = ovpn_new_key(ovpn);
+ break;
+ case CMD_DEL_KEY:
+ ret = ovpn_del_key(ovpn);
+ break;
+ case CMD_GET_KEY:
+ ret = ovpn_get_key(ovpn);
+ break;
+ case CMD_SWAP_KEYS:
+ ret = ovpn_swap_keys(ovpn);
+ break;
+ case CMD_LISTEN_MCAST:
+ ret = ovpn_listen_mcast();
+ break;
+ case CMD_INVALID:
+ break;
+ }
+
+ return ret;
+}
+
+static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[])
+{
+ int ret;
+
+ /* no args required for LISTEN_MCAST */
+ if (ovpn->cmd == CMD_LISTEN_MCAST)
+ return 0;
+
+ /* all commands need an ifname */
+ if (argc < 3)
+ return -EINVAL;
+
+ strscpy(ovpn->ifname, argv[2], IFNAMSIZ - 1);
+ ovpn->ifname[IFNAMSIZ - 1] = '\0';
+
+ /* all commands, except NEW_IFNAME, needs an ifindex */
+ if (ovpn->cmd != CMD_NEW_IFACE) {
+ ovpn->ifindex = if_nametoindex(ovpn->ifname);
+ if (!ovpn->ifindex) {
+ fprintf(stderr, "cannot find interface: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ }
+
+ switch (ovpn->cmd) {
+ case CMD_NEW_IFACE:
+ if (argc < 4)
+ break;
+
+ if (!strcmp(argv[3], "P2P")) {
+ ovpn->mode = OVPN_MODE_P2P;
+ } else if (!strcmp(argv[3], "MP")) {
+ ovpn->mode = OVPN_MODE_MP;
+ } else {
+ fprintf(stderr, "Cannot parse iface mode: %s\n",
+ argv[3]);
+ return -1;
+ }
+ ovpn->mode_set = true;
+ break;
+ case CMD_DEL_IFACE:
+ break;
+ case CMD_LISTEN:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ ovpn->peers_file = argv[4];
+
+ if (argc > 5 && !strcmp(argv[5], "ipv6"))
+ ovpn->sa_family = AF_INET6;
+ break;
+ case CMD_CONNECT:
+ if (argc < 6)
+ return -EINVAL;
+
+ ovpn->sa_family = AF_INET;
+
+ ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5],
+ NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Cannot parse remote peer data\n");
+ return -1;
+ }
+
+ if (argc > 6) {
+ ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+ ovpn->key_id = 0;
+ ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM;
+ ovpn->key_dir = KEY_DIR_OUT;
+
+ ret = ovpn_parse_key(argv[6], ovpn);
+ if (ret)
+ return -1;
+ }
+ break;
+ case CMD_NEW_PEER:
+ if (argc < 7)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[4], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ const char *vpnip = (argc > 7) ? argv[7] : NULL;
+
+ ret = ovpn_parse_new_peer(ovpn, argv[3], argv[5], argv[6],
+ vpnip);
+ if (ret < 0)
+ return -1;
+ break;
+ case CMD_NEW_MULTI_PEER:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ ovpn->peers_file = argv[4];
+ break;
+ case CMD_SET_PEER:
+ if (argc < 6)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ovpn->keepalive_interval = strtoul(argv[4], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr,
+ "keepalive interval value out of range\n");
+ return -1;
+ }
+
+ ovpn->keepalive_timeout = strtoul(argv[5], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr,
+ "keepalive interval value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_DEL_PEER:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_GET_PEER:
+ ovpn->peer_id = PEER_ID_UNDEF;
+ if (argc > 3) {
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ }
+ break;
+ case CMD_NEW_KEY:
+ if (argc < 9)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return -1;
+
+ ovpn->key_id = strtoul(argv[5], NULL, 10);
+ if (errno == ERANGE || ovpn->key_id > 2) {
+ fprintf(stderr, "key ID out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_cipher(argv[6], ovpn);
+ if (ret < 0)
+ return -1;
+
+ ret = ovpn_parse_key_direction(argv[7], ovpn);
+ if (ret < 0)
+ return -1;
+
+ ret = ovpn_parse_key(argv[8], ovpn);
+ if (ret)
+ return -1;
+ break;
+ case CMD_DEL_KEY:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return ret;
+ break;
+ case CMD_GET_KEY:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return ret;
+ break;
+ case CMD_SWAP_KEYS:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_LISTEN_MCAST:
+ break;
+ case CMD_INVALID:
+ break;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct ovpn_ctx ovpn;
+ int ret;
+
+ if (argc < 2) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ memset(&ovpn, 0, sizeof(ovpn));
+ ovpn.sa_family = AF_INET;
+ ovpn.cipher = OVPN_CIPHER_ALG_NONE;
+
+ ovpn.cmd = ovpn_parse_cmd(argv[1]);
+ if (ovpn.cmd == CMD_INVALID) {
+ fprintf(stderr, "Error: unknown command.\n\n");
+ usage(argv[0]);
+ return -1;
+ }
+
+ ret = ovpn_parse_cmd_args(&ovpn, argc, argv);
+ if (ret < 0) {
+ fprintf(stderr, "Error: invalid arguments.\n\n");
+ if (ret == -EINVAL)
+ usage(argv[0]);
+ return ret;
+ }
+
+ ret = ovpn_run_cmd(&ovpn);
+ if (ret)
+ fprintf(stderr, "Cannot execute command: %s (%d)\n",
+ strerror(-ret), ret);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt
new file mode 100644
index 000000000000..d753eebe8716
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt
@@ -0,0 +1,5 @@
+1 5.5.5.2
+2 5.5.5.3
+3 5.5.5.4
+4 5.5.5.5
+5 5.5.5.6
diff --git a/tools/testing/selftests/net/ovpn/test-chachapoly.sh b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
new file mode 100755
index 000000000000..32504079a2b8
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+ALG="chachapoly"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
new file mode 100755
index 000000000000..093d44772ffd
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test-close-socket.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh
new file mode 100755
index 000000000000..5e48a8b67928
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ setup_ns ${p} 5.5.5.$((${p} + 1))/24
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+done
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/test-float.sh b/tools/testing/selftests/net/ovpn/test-float.sh
new file mode 100755
index 000000000000..ba5d725e18b0
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-float.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+FLOAT="1"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-tcp.sh b/tools/testing/selftests/net/ovpn/test-tcp.sh
new file mode 100755
index 000000000000..ba3f1f315a34
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh
new file mode 100755
index 000000000000..7b62897b0240
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ setup_ns ${p} 5.5.5.$((${p} + 1))/24
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+done
+
+if [ "$FLOAT" == "1" ]; then
+ # make clients float..
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p}
+ ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p}
+ done
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1
+ done
+fi
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+echo "Adding secondary key and then swap:"
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 data64.key
+ ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} ${p} 2 1 ${ALG} 1 data64.key
+ ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} ${p}
+done
+
+sleep 1
+
+echo "Querying all peers:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0
+ip netns exec peer1 ${OVPN_CLI} get_peer tun1
+
+echo "Querying peer 1:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1
+
+echo "Querying non-existent peer 10:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 10 || true
+
+echo "Deleting peer 1:"
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1
+ip netns exec peer1 ${OVPN_CLI} del_peer tun1 1
+
+echo "Querying keys:"
+for p in $(seq 2 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 1
+ ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 2
+done
+
+echo "Deleting peer while sending traffic:"
+(ip netns exec peer2 ping -qf -w 4 5.5.5.1)&
+sleep 2
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2
+# following command fails in TCP mode
+# (both ends get conn reset when one peer disconnects)
+ip netns exec peer2 ${OVPN_CLI} del_peer tun2 2 || true
+
+echo "Deleting keys:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 1
+ ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 2
+done
+
+echo "Setting timeout to 3s MP:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 0 0
+done
+# wait for peers to timeout
+sleep 5
+
+echo "Setting timeout to 3s P2P:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 3 3
+done
+sleep 5
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt
new file mode 100644
index 000000000000..32f14bd9347a
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/udp_peers.txt
@@ -0,0 +1,5 @@
+1 10.10.1.2 1 5.5.5.2
+2 10.10.2.2 1 5.5.5.3
+3 10.10.3.2 1 5.5.5.4
+4 10.10.4.2 1 5.5.5.5
+5 10.10.5.2 1 5.5.5.6
diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c
index b8475cb29be7..1c43401a1c80 100644
--- a/tools/testing/selftests/net/reuseport_addr_any.c
+++ b/tools/testing/selftests/net/reuseport_addr_any.c
@@ -9,7 +9,6 @@
#include <arpa/inet.h>
#include <errno.h>
#include <error.h>
-#include <linux/dccp.h>
#include <linux/in.h>
#include <linux/unistd.h>
#include <stdbool.h>
@@ -21,10 +20,6 @@
#include <sys/socket.h>
#include <unistd.h>
-#ifndef SOL_DCCP
-#define SOL_DCCP 269
-#endif
-
static const char *IP4_ADDR = "127.0.0.1";
static const char *IP6_ADDR = "::1";
static const char *IP4_MAPPED6 = "::ffff:127.0.0.1";
@@ -86,15 +81,6 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count,
if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
error(1, errno, "tcp: failed to listen on receive port");
- else if (proto == SOCK_DCCP) {
- if (setsockopt(rcv_fds[i], SOL_DCCP,
- DCCP_SOCKOPT_SERVICE,
- &(int) {htonl(42)}, sizeof(int)))
- error(1, errno, "failed to setsockopt");
-
- if (listen(rcv_fds[i], 10))
- error(1, errno, "dccp: failed to listen on receive port");
- }
}
}
@@ -148,11 +134,6 @@ static int connect_and_send(int family, int proto)
if (fd < 0)
error(1, errno, "failed to create send socket");
- if (proto == SOCK_DCCP &&
- setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE,
- &(int){htonl(42)}, sizeof(int)))
- error(1, errno, "failed to setsockopt");
-
if (bind(fd, saddr, sz))
error(1, errno, "failed to bind send socket");
@@ -175,7 +156,7 @@ static int receive_once(int epfd, int proto)
if (i < 0)
error(1, errno, "epoll_wait failed");
- if (proto == SOCK_STREAM || proto == SOCK_DCCP) {
+ if (proto == SOCK_STREAM) {
fd = accept(ev.data.fd, NULL, NULL);
if (fd < 0)
error(1, errno, "failed to accept");
@@ -243,20 +224,6 @@ static void run_one_test(int fam_send, int fam_rcv, int proto,
static void test_proto(int proto, const char *proto_str)
{
- if (proto == SOCK_DCCP) {
- int test_fd;
-
- test_fd = socket(AF_INET, proto, 0);
- if (test_fd < 0) {
- if (errno == ESOCKTNOSUPPORT) {
- fprintf(stderr, "DCCP not supported: skipping DCCP tests\n");
- return;
- } else
- error(1, errno, "failed to create a DCCP socket");
- }
- close(test_fd);
- }
-
fprintf(stderr, "%s IPv4 ... ", proto_str);
run_one_test(AF_INET, AF_INET, proto, IP4_ADDR);
@@ -271,7 +238,6 @@ int main(void)
{
test_proto(SOCK_DGRAM, "UDP");
test_proto(SOCK_STREAM, "TCP");
- test_proto(SOCK_DCCP, "DCCP");
fprintf(stderr, "SUCCESS\n");
return 0;
diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
index 02b986c9c247..9067197c9055 100755
--- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
+++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
@@ -51,7 +51,9 @@ ret=0
# All tests in this script. Can be overridden with -t option.
TESTS="
neigh_suppress_arp
+ neigh_suppress_uc_arp
neigh_suppress_ns
+ neigh_suppress_uc_ns
neigh_vlan_suppress_arp
neigh_vlan_suppress_ns
"
@@ -388,6 +390,52 @@ neigh_suppress_arp()
neigh_suppress_arp_common $vid $sip $tip
}
+neigh_suppress_uc_arp_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local tip=$1; shift
+ local tmac
+
+ echo
+ echo "Unicast ARP, per-port ARP suppression - VLAN $vid"
+ echo "-----------------------------------------------"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+ run_cmd "ip -n $sw1 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid"
+
+ run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto arp flower arp_sip $tip arp_op reply action pass"
+
+ run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto arp flower arp_tip $sip arp_op reply action pass"
+
+ run_cmd "ip netns exec $h1 mausezahn eth0.$vid -c 1 -a own -b $tmac -t arp 'request sip=$sip, tip=$tip, tmac=$tmac' -q"
+ tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+ log_test $? 0 "Unicast ARP, suppression on, h1 filter"
+ tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+ log_test $? 0 "Unicast ARP, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_arp()
+{
+ local vid=10
+ local sip=192.0.2.1
+ local tip=192.0.2.2
+
+ neigh_suppress_uc_arp_common $vid $sip $tip
+
+ vid=20
+ sip=192.0.2.17
+ tip=192.0.2.18
+ neigh_suppress_uc_arp_common $vid $sip $tip
+}
+
neigh_suppress_ns_common()
{
local vid=$1; shift
@@ -494,6 +542,78 @@ neigh_suppress_ns()
neigh_suppress_ns_common $vid $saddr $daddr $maddr
}
+icmpv6_header_get()
+{
+ local csum=$1; shift
+ local tip=$1; shift
+ local type
+ local p
+
+ # Type 135 (Neighbor Solicitation), hex format
+ type="87"
+ p=$(:
+ )"$type:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"$csum:"$( : ICMPv6.checksum
+ )"00:00:00:00:"$( : Reserved
+ )"$tip:"$( : Target Address
+ )
+ echo $p
+}
+
+neigh_suppress_uc_ns_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local full_dip=$1; shift
+ local csum=$1; shift
+ local tmac
+
+ echo
+ echo "Unicast NS, per-port NS suppression - VLAN $vid"
+ echo "---------------------------------------------"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+ run_cmd "ip -n $sw1 -6 neigh replace $dip lladdr $tmac nud permanent dev br0.$vid"
+
+ run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 src_ip $dip type 136 code 0 action pass"
+
+ run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip $sip type 136 code 0 action pass"
+
+ run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a own -b $tmac -A $sip -B $dip -t ip hop=255,next=58,payload=$(icmpv6_header_get $csum $full_dip) -q"
+ tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+ log_test $? 0 "Unicast NS, suppression on, h1 filter"
+ tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+ log_test $? 0 "Unicast NS, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_ns()
+{
+ local vid=10
+ local saddr=2001:db8:1::1
+ local daddr=2001:db8:1::2
+ local full_daddr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02
+ local csum="ef:79"
+
+ neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+
+ vid=20
+ saddr=2001:db8:2::1
+ daddr=2001:db8:2::2
+ full_daddr=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:02
+ csum="ef:76"
+
+ neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+}
+
neigh_vlan_suppress_arp()
{
local vid1=10
@@ -825,6 +945,11 @@ if [ ! -x "$(command -v jq)" ]; then
exit $ksft_skip
fi
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
bridge link help 2>&1 | grep -q "neigh_vlan_suppress"
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support"
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index cddff1772e10..589b18ed758a 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -31,6 +31,10 @@ try_modprobe act_skbedit
try_modprobe act_skbmod
try_modprobe act_tunnel_key
try_modprobe act_vlan
+try_modprobe act_ife
+try_modprobe act_meta_mark
+try_modprobe act_meta_skbtcindex
+try_modprobe act_meta_skbprio
try_modprobe cls_basic
try_modprobe cls_bpf
try_modprobe cls_cgroup