aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--.mailmap4
-rw-r--r--CREDITS12
-rw-r--r--Documentation/Makefile2
-rw-r--r--Documentation/admin-guide/mm/transhuge.rst2
-rw-r--r--Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml14
-rw-r--r--Documentation/devicetree/bindings/net/asix,ax88178.yaml4
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml32
-rw-r--r--Documentation/devicetree/bindings/net/brcm,mdio-mux-iproc.yaml50
-rw-r--r--Documentation/devicetree/bindings/net/can/atmel,at91sam9263-can.yaml58
-rw-r--r--Documentation/devicetree/bindings/net/can/atmel-can.txt15
-rw-r--r--Documentation/devicetree/bindings/net/can/bosch,c_can.yaml10
-rw-r--r--Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml18
-rw-r--r--Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml6
-rw-r--r--Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/can/tcan4x5x.txt48
-rw-r--r--Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml199
-rw-r--r--Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/qcom,ipa.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/stm32-dwmac.yaml94
-rw-r--r--Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml10
-rw-r--r--Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml20
-rw-r--r--Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml38
-rw-r--r--Documentation/devicetree/bindings/net/wireless/marvell,sd8787.yaml19
-rw-r--r--Documentation/netlink/specs/rt_link.yaml93
-rw-r--r--Documentation/networking/device_drivers/ethernet/intel/i40e.rst12
-rw-r--r--Documentation/networking/devlink/mlx5.rst3
-rw-r--r--Documentation/networking/diagnostic/twisted_pair_layer1_diagnostics.rst39
-rw-r--r--Documentation/networking/ethtool-netlink.rst1
-rw-r--r--Documentation/networking/multi-pf-netdev.rst4
-rw-r--r--Documentation/networking/napi.rst4
-rw-r--r--Documentation/networking/netdevices.rst10
-rw-r--r--Documentation/networking/netlink_spec/readme.txt2
-rw-r--r--Documentation/networking/xfrm_device.rst3
-rw-r--r--Documentation/userspace-api/netlink/intro-specs.rst8
-rw-r--r--MAINTAINERS28
-rw-r--r--Makefile2
-rw-r--r--drivers/bluetooth/btmtk.c7
-rw-r--r--drivers/bluetooth/btnxpuart.c1
-rw-r--r--drivers/cdrom/cdrom.c2
-rw-r--r--drivers/clk/imx/clk-imx8mp-audiomix.c3
-rw-r--r--drivers/clk/thead/clk-th1520-ap.c13
-rw-r--r--drivers/md/dm-ebs-target.c2
-rw-r--r--drivers/md/dm-thin.c5
-rw-r--r--drivers/md/dm-verity-fec.c59
-rw-r--r--drivers/md/persistent-data/dm-array.c19
-rw-r--r--drivers/net/can/dev/dev.c2
-rw-r--r--drivers/net/can/grcan.c3
-rw-r--r--drivers/net/can/kvaser_pciefd.c81
-rw-r--r--drivers/net/can/m_can/m_can.c22
-rw-r--r--drivers/net/can/m_can/m_can.h1
-rw-r--r--drivers/net/can/m_can/tcan4x5x-core.c30
-rw-r--r--drivers/net/can/m_can/tcan4x5x.h2
-rw-r--r--drivers/net/can/sun4i_can.c2
-rw-r--r--drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c3
-rw-r--r--drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c133
-rw-r--r--drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c38
-rw-r--r--drivers/net/dsa/b53/b53_common.c7
-rw-r--r--drivers/net/dsa/b53/b53_priv.h1
-rw-r--r--drivers/net/dsa/bcm_sf2.c1
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c15
-rw-r--r--drivers/net/dsa/mt7530.c13
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c8
-rw-r--r--drivers/net/dsa/qca/qca8k-8xxx.c3
-rw-r--r--drivers/net/dsa/qca/qca8k-common.c7
-rw-r--r--drivers/net/dsa/qca/qca8k.h1
-rw-r--r--drivers/net/ethernet/amd/pds_core/devlink.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c38
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c8
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c64
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth.c2
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c14
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c96
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c45
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.c9
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c41
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c9
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pf.c120
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pf.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h7
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq.c10
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c458
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_dcb.c13
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_dcb.h1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c231
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_prototype.h40
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h11
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c53
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dpll.c35
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ptp_consts.h4
-rw-r--r--drivers/net/ethernet/intel/igb/Makefile2
-rw-r--r--drivers/net/ethernet/intel/igb/igb.h58
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c270
-rw-r--r--drivers/net/ethernet/intel/igb/igb_xsk.c562
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h2
-rw-r--r--drivers/net/ethernet/intel/igc/igc_base.c6
-rw-r--r--drivers/net/ethernet/intel/igc/igc_hw.h5
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c118
-rw-r--r--drivers/net/ethernet/intel/igc/igc_nvm.c50
-rw-r--r--drivers/net/ethernet/intel/igc/igc_nvm.h2
-rw-r--r--drivers/net/ethernet/intel/igc/igc_xdp.c8
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c23
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf.h1
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c23
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/mbx.c12
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c4
-rw-r--r--drivers/net/ethernet/mediatek/airoha_eth.c521
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c37
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h62
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_pool.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_pool.h5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c159
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c54
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c95
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c1377
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h80
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c450
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.h73
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c216
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/prm.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c143
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c22
-rw-r--r--drivers/net/ethernet/meta/fbnic/Makefile1
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic.h5
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_fw.h7
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_hwmon.c81
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_mac.c22
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_mac.h7
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_pci.c3
-rw-r--r--drivers/net/ethernet/meta/fbnic/fbnic_txrx.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169.h1
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c30
-rw-r--r--drivers/net/ethernet/realtek/r8169_phy_config.c23
-rw-r--r--drivers/net/ethernet/realtek/rtase/rtase_main.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c14
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h18
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c25
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c124
-rw-r--r--drivers/net/ethernet/ti/am65-cpsw-nuss.c2
-rw-r--r--drivers/net/ethernet/ti/cpsw.c2
-rw-r--r--drivers/net/ethernet/ti/cpsw_new.c2
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c24
-rw-r--r--drivers/net/ieee802154/ca8210.c6
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c6
-rw-r--r--drivers/net/mctp/mctp-i3c.c4
-rw-r--r--drivers/net/netconsole.c2
-rw-r--r--drivers/net/netdevsim/netdev.c259
-rw-r--r--drivers/net/netdevsim/netdevsim.h5
-rw-r--r--drivers/net/netkit.c66
-rw-r--r--drivers/net/pcs/pcs-lynx.c13
-rw-r--r--drivers/net/pcs/pcs-mtk-lynxi.c4
-rw-r--r--drivers/net/pcs/pcs-xpcs.c5
-rw-r--r--drivers/net/phy/Kconfig4
-rw-r--r--drivers/net/phy/dp83822.c277
-rw-r--r--drivers/net/phy/dp83td510.c112
-rw-r--r--drivers/net/phy/dp83tg720.c161
-rw-r--r--drivers/net/phy/micrel.c2
-rw-r--r--drivers/net/phy/phy.c90
-rw-r--r--drivers/net/phy/phy_device.c2
-rw-r--r--drivers/net/phy/phylink.c11
-rw-r--r--drivers/nfc/st21nfca/dep.c18
-rw-r--r--drivers/nvme/host/nvme.h5
-rw-r--r--drivers/nvme/host/pci.c9
-rw-r--r--drivers/nvme/host/tcp.c18
-rw-r--r--drivers/nvme/target/admin-cmd.c9
-rw-r--r--drivers/nvme/target/configfs.c23
-rw-r--r--drivers/nvme/target/core.c108
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c2
-rw-r--r--drivers/nvme/target/nvmet.h7
-rw-r--r--drivers/nvme/target/pr.c8
-rw-r--r--drivers/ufs/core/ufshcd-priv.h6
-rw-r--r--drivers/ufs/core/ufshcd.c10
-rw-r--r--drivers/ufs/host/ufs-qcom.c31
-rw-r--r--drivers/ufs/host/ufs-qcom.h5
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c17
-rw-r--r--drivers/watchdog/stm32_iwdg.c2
-rw-r--r--fs/9p/vfs_addr.c6
-rw-r--r--fs/afs/write.c5
-rw-r--r--fs/btrfs/ioctl.c122
-rw-r--r--fs/btrfs/scrub.c4
-rw-r--r--fs/btrfs/zlib.c4
-rw-r--r--fs/btrfs/zoned.c5
-rw-r--r--fs/cachefiles/daemon.c14
-rw-r--r--fs/cachefiles/internal.h3
-rw-r--r--fs/cachefiles/security.c6
-rw-r--r--fs/exfat/dir.c3
-rw-r--r--fs/exfat/fatent.c10
-rw-r--r--fs/exfat/file.c6
-rw-r--r--fs/exfat/namei.c4
-rw-r--r--fs/file.c1
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/hfs/super.c4
-rw-r--r--fs/iomap/buffered-io.c66
-rw-r--r--fs/jbd2/commit.c4
-rw-r--r--fs/jbd2/revoke.c2
-rw-r--r--fs/namespace.c10
-rw-r--r--fs/netfs/buffered_read.c28
-rw-r--r--fs/netfs/direct_write.c1
-rw-r--r--fs/netfs/read_collect.c33
-rw-r--r--fs/netfs/read_pgpriv2.c4
-rw-r--r--fs/netfs/read_retry.c8
-rw-r--r--fs/netfs/write_collect.c14
-rw-r--r--fs/netfs/write_issue.c2
-rw-r--r--fs/nfs/fscache.c9
-rw-r--r--fs/notify/fdinfo.c4
-rw-r--r--fs/ocfs2/quota_global.c2
-rw-r--r--fs/ocfs2/quota_local.c1
-rw-r--r--fs/overlayfs/copy_up.c16
-rw-r--r--fs/overlayfs/export.c49
-rw-r--r--fs/overlayfs/namei.c4
-rw-r--r--fs/overlayfs/overlayfs.h2
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/qnx6/inode.c11
-rw-r--r--fs/smb/client/cifssmb.c13
-rw-r--r--fs/smb/client/smb2pdu.c9
-rw-r--r--include/linux/ethtool.h23
-rw-r--r--include/linux/if_vlan.h13
-rw-r--r--include/linux/io_uring/cmd.h10
-rw-r--r--include/linux/iomap.h2
-rw-r--r--include/linux/memfd.h14
-rw-r--r--include/linux/mlx5/driver.h1
-rw-r--r--include/linux/mlx5/mlx5_ifc.h1
-rw-r--r--include/linux/mm.h59
-rw-r--r--include/linux/mm_types.h30
-rw-r--r--include/linux/netdevice.h6
-rw-r--r--include/linux/netfs.h7
-rw-r--r--include/linux/pcs/pcs-xpcs.h1
-rw-r--r--include/linux/percpu-defs.h5
-rw-r--r--include/linux/phy.h58
-rw-r--r--include/linux/phylib_stubs.h42
-rw-r--r--include/linux/phylink.h3
-rw-r--r--include/linux/rtnetlink.h6
-rw-r--r--include/net/addrconf.h3
-rw-r--r--include/net/ax25.h10
-rw-r--r--include/net/dsa.h2
-rw-r--r--include/net/inet_connection_sock.h2
-rw-r--r--include/net/inet_sock.h6
-rw-r--r--include/net/xfrm.h44
-rw-r--r--include/uapi/linux/ethtool.h2
-rw-r--r--include/uapi/linux/ethtool_netlink.h14
-rw-r--r--include/uapi/linux/if_link.h2
-rw-r--r--include/uapi/linux/in.h2
-rw-r--r--include/uapi/linux/ip.h16
-rw-r--r--include/uapi/linux/ipsec.h3
-rw-r--r--include/uapi/linux/rtnetlink.h8
-rw-r--r--include/uapi/linux/snmp.h2
-rw-r--r--include/uapi/linux/xfrm.h9
-rw-r--r--include/ufs/ufshcd.h2
-rw-r--r--io_uring/io_uring.c2
-rw-r--r--io_uring/kbuf.c4
-rw-r--r--io_uring/net.c1
-rw-r--r--io_uring/opdef.c3
-rw-r--r--io_uring/rw.c2
-rw-r--r--io_uring/timeout.c45
-rw-r--r--io_uring/uring_cmd.c23
-rw-r--r--io_uring/uring_cmd.h4
-rwxr-xr-xkernel/gen_kheaders.sh1
-rw-r--r--kernel/kcov.c2
-rw-r--r--kernel/sched/ext.c4
-rw-r--r--kernel/workqueue.c23
-rw-r--r--lib/maple_tree.c1
-rw-r--r--mm/damon/core.c10
-rw-r--r--mm/filemap.c9
-rw-r--r--mm/hugetlb.c16
-rw-r--r--mm/internal.h6
-rw-r--r--mm/khugepaged.c3
-rw-r--r--mm/kmemleak.c2
-rw-r--r--mm/list_lru.c2
-rw-r--r--mm/memfd.c2
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/readahead.c6
-rw-r--r--mm/shmem.c7
-rw-r--r--mm/util.c7
-rw-r--r--mm/vmscan.c9
-rw-r--r--mm/zswap.c19
-rw-r--r--net/802/psnap.c4
-rw-r--r--net/ax25/af_ax25.c12
-rw-r--r--net/ax25/ax25_dev.c4
-rw-r--r--net/ax25/ax25_ip.c3
-rw-r--r--net/ax25/ax25_out.c22
-rw-r--r--net/ax25/ax25_route.c2
-rw-r--r--net/bluetooth/hci_sync.c11
-rw-r--r--net/bluetooth/mgmt.c38
-rw-r--r--net/bluetooth/rfcomm/tty.c4
-rw-r--r--net/bridge/br_arp_nd_proxy.c2
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/core/dev.c186
-rw-r--r--net/core/dev.h4
-rw-r--r--net/core/link_watch.c10
-rw-r--r--net/core/netdev-genl.c18
-rw-r--r--net/core/netdev_rx_queue.c1
-rw-r--r--net/core/rtnetlink.c11
-rw-r--r--net/dsa/user.c8
-rw-r--r--net/ethtool/common.c5
-rw-r--r--net/ethtool/linkstate.c26
-rw-r--r--net/ethtool/netlink.h1
-rw-r--r--net/ethtool/stats.c55
-rw-r--r--net/ethtool/strset.c5
-rw-r--r--net/hsr/hsr_main.h1
-rw-r--r--net/hsr/hsr_slave.c5
-rw-r--r--net/ipv4/esp4.c3
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv6/addrconf.c6
-rw-r--r--net/ipv6/anycast.c35
-rw-r--r--net/ipv6/esp6.c3
-rw-r--r--net/mac802154/iface.c4
-rw-r--r--net/mptcp/ctrl.c17
-rw-r--r--net/mptcp/protocol.c8
-rw-r--r--net/netfilter/nf_conntrack_core.c5
-rw-r--r--net/netfilter/nf_tables_api.c15
-rw-r--r--net/netfilter/nft_xfrm.c3
-rw-r--r--net/rds/tcp.c39
-rw-r--r--net/sched/cls_flow.c3
-rw-r--r--net/sched/sch_cake.c140
-rw-r--r--net/sched/sch_generic.c55
-rw-r--r--net/sctp/protocol.c10
-rw-r--r--net/sctp/sysctl.c14
-rw-r--r--net/smc/af_smc.c2
-rw-r--r--net/smc/smc_core.c2
-rw-r--r--net/smc/smc_rx.c39
-rw-r--r--net/smc/smc_rx.h8
-rw-r--r--net/socket.c2
-rw-r--r--net/tls/tls_main.c4
-rw-r--r--net/tls/tls_sw.c2
-rw-r--r--net/xfrm/Kconfig16
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/trace_iptfs.h218
-rw-r--r--net/xfrm/xfrm_compat.c10
-rw-r--r--net/xfrm/xfrm_device.c4
-rw-r--r--net/xfrm/xfrm_input.c27
-rw-r--r--net/xfrm/xfrm_iptfs.c2764
-rw-r--r--net/xfrm/xfrm_output.c6
-rw-r--r--net/xfrm/xfrm_policy.c26
-rw-r--r--net/xfrm/xfrm_proc.c2
-rw-r--r--net/xfrm/xfrm_replay.c1
-rw-r--r--net/xfrm/xfrm_state.c84
-rw-r--r--net/xfrm/xfrm_user.c77
-rw-r--r--rust/kernel/workqueue.rs18
-rwxr-xr-xscripts/mksysmap4
-rw-r--r--scripts/mod/file2alias.c36
-rw-r--r--scripts/mod/modpost.c24
-rw-r--r--scripts/mod/modpost.h14
-rw-r--r--scripts/package/PKGBUILD2
-rw-r--r--scripts/sorttable.h5
-rw-r--r--security/selinux/avc.c61
-rw-r--r--security/selinux/hooks.c6
-rw-r--r--security/selinux/include/avc.h5
-rw-r--r--security/selinux/include/security.h3
-rw-r--r--security/selinux/ss/services.c28
-rw-r--r--tools/include/uapi/linux/if_link.h2
-rw-r--r--tools/net/ynl/Makefile29
-rw-r--r--tools/net/ynl/generated/.gitignore1
-rw-r--r--tools/net/ynl/generated/Makefile51
-rw-r--r--tools/net/ynl/lib/.gitignore1
-rw-r--r--tools/net/ynl/lib/Makefile1
-rw-r--r--tools/net/ynl/pyproject.toml24
-rw-r--r--tools/net/ynl/pyynl/.gitignore2
-rw-r--r--tools/net/ynl/pyynl/__init__.py0
-rwxr-xr-xtools/net/ynl/pyynl/cli.py (renamed from tools/net/ynl/cli.py)45
-rwxr-xr-xtools/net/ynl/pyynl/ethtool.py (renamed from tools/net/ynl/ethtool.py)7
-rw-r--r--tools/net/ynl/pyynl/lib/__init__.py (renamed from tools/net/ynl/lib/__init__.py)0
-rw-r--r--tools/net/ynl/pyynl/lib/nlspec.py (renamed from tools/net/ynl/lib/nlspec.py)5
-rw-r--r--tools/net/ynl/pyynl/lib/ynl.py (renamed from tools/net/ynl/lib/ynl.py)74
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py (renamed from tools/net/ynl/ynl-gen-c.py)38
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_rst.py (renamed from tools/net/ynl/ynl-gen-rst.py)0
-rwxr-xr-xtools/net/ynl/ynl-regen.sh2
-rw-r--r--tools/sched_ext/include/scx/common.bpf.h6
-rw-r--r--tools/sched_ext/scx_central.c2
-rw-r--r--tools/testing/selftests/bpf/Makefile1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_netkit.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c87
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_link.c15
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c4
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_meta.sh58
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c3
-rw-r--r--tools/testing/selftests/drivers/net/Makefile2
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile2
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_macvlan.sh99
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh96
-rw-r--r--tools/testing/selftests/drivers/net/bonding/config1
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py1
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh225
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_basic.sh218
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_overflow.sh67
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py94
-rw-r--r--tools/testing/selftests/kselftest/ktap_helpers.sh15
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c43
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py5
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py4
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py19
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh23
-rw-r--r--tools/testing/selftests/net/ynl.mk3
-rw-r--r--tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c2
-rw-r--r--tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c4
-rw-r--r--tools/testing/selftests/sched_ext/dsp_local_on.bpf.c7
-rw-r--r--tools/testing/selftests/sched_ext/dsp_local_on.c5
-rw-r--r--tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c2
-rw-r--r--tools/testing/selftests/sched_ext/exit.bpf.c4
-rw-r--r--tools/testing/selftests/sched_ext/maximal.bpf.c8
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c2
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c2
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c2
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c2
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c4
-rw-r--r--tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c8
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/flow.json4
436 files changed, 12117 insertions, 3967 deletions
diff --git a/.mailmap b/.mailmap
index 7efe43237ca8..9b620deda3fc 100644
--- a/.mailmap
+++ b/.mailmap
@@ -435,7 +435,7 @@ Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@puri.sm>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com> <martyna.szapar-mudlaw@intel.com>
-Mathieu Othacehe <m.othacehe@gmail.com> <othacehe@gnu.org>
+Mathieu Othacehe <othacehe@gnu.org> <m.othacehe@gmail.com>
Mat Martineau <martineau@kernel.org> <mathew.j.martineau@linux.intel.com>
Mat Martineau <martineau@kernel.org> <mathewm@codeaurora.org>
Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
@@ -529,6 +529,8 @@ Oleksij Rempel <linux@rempel-privat.de> <external.Oleksij.Rempel@de.bosch.com>
Oleksij Rempel <linux@rempel-privat.de> <fixed-term.Oleksij.Rempel@de.bosch.com>
Oleksij Rempel <o.rempel@pengutronix.de>
Oleksij Rempel <o.rempel@pengutronix.de> <ore@pengutronix.de>
+Oliver Hartkopp <socketcan@hartkopp.net> <oliver.hartkopp@volkswagen.de>
+Oliver Hartkopp <socketcan@hartkopp.net> <oliver@hartkopp.net>
Oliver Upton <oliver.upton@linux.dev> <oupton@google.com>
Ondřej Jirman <megi@xff.cz> <megous@megous.com>
Oza Pawandeep <quic_poza@quicinc.com> <poza@codeaurora.org>
diff --git a/CREDITS b/CREDITS
index b1777b53c63a..cda68f04d5f1 100644
--- a/CREDITS
+++ b/CREDITS
@@ -20,6 +20,10 @@ N: Thomas Abraham
E: thomas.ab@samsung.com
D: Samsung pin controller driver
+N: Jose Abreu
+E: jose.abreu@synopsys.com
+D: Synopsys DesignWare XPCS MDIO/PCS driver.
+
N: Dragos Acostachioaie
E: dragos@iname.com
W: http://www.arbornet.org/~dragos
@@ -1428,6 +1432,10 @@ S: 8124 Constitution Apt. 7
S: Sterling Heights, Michigan 48313
S: USA
+N: Andy Gospodarek
+E: andy@greyhouse.net
+D: Maintenance and contributions to the network interface bonding driver.
+
N: Wolfgang Grandegger
E: wg@grandegger.com
D: Controller Area Network (device drivers)
@@ -1812,6 +1820,10 @@ D: Author/maintainer of most DRM drivers (especially ATI, MGA)
D: Core DRM templates, general DRM and 3D-related hacking
S: No fixed address
+N: Woojung Huh
+E: woojung.huh@microchip.com
+D: Microchip LAN78XX USB Ethernet driver
+
N: Kenn Humborg
E: kenn@wombat.ie
D: Mods to loop device to support sparse backing files
diff --git a/Documentation/Makefile b/Documentation/Makefile
index fa71602ec961..52c6c5a3efa9 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -104,7 +104,7 @@ quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4)
YNL_INDEX:=$(srctree)/Documentation/networking/netlink_spec/index.rst
YNL_RST_DIR:=$(srctree)/Documentation/networking/netlink_spec
YNL_YAML_DIR:=$(srctree)/Documentation/netlink/specs
-YNL_TOOL:=$(srctree)/tools/net/ynl/ynl-gen-rst.py
+YNL_TOOL:=$(srctree)/tools/net/ynl/pyynl/ynl_gen_rst.py
YNL_RST_FILES_TMP := $(patsubst %.yaml,%.rst,$(wildcard $(YNL_YAML_DIR)/*.yaml))
YNL_RST_FILES := $(patsubst $(YNL_YAML_DIR)%,$(YNL_RST_DIR)%, $(YNL_RST_FILES_TMP))
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 5034915f4e8e..8872203df088 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -436,7 +436,7 @@ AnonHugePmdMapped).
The number of file transparent huge pages mapped to userspace is available
by reading ShmemPmdMapped and ShmemHugePages fields in ``/proc/meminfo``.
To identify what applications are mapping file transparent huge pages, it
-is necessary to read ``/proc/PID/smaps`` and count the FileHugeMapped fields
+is necessary to read ``/proc/PID/smaps`` and count the FilePmdMapped fields
for each mapping.
Note that reading the smaps file is expensive and reading it
diff --git a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
index d1e2bca3c503..798a4c19f18c 100644
--- a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
@@ -166,11 +166,11 @@ unevaluatedProperties: false
examples:
- |
ethmac: ethernet@c9410000 {
- compatible = "amlogic,meson-gxbb-dwmac", "snps,dwmac";
- reg = <0xc9410000 0x10000>, <0xc8834540 0x8>;
- interrupts = <8>;
- interrupt-names = "macirq";
- clocks = <&clk_eth>, <&clk_fclk_div2>, <&clk_mpll2>, <&clk_fclk_div2>;
- clock-names = "stmmaceth", "clkin0", "clkin1", "timing-adjustment";
- phy-mode = "rgmii";
+ compatible = "amlogic,meson-gxbb-dwmac", "snps,dwmac";
+ reg = <0xc9410000 0x10000>, <0xc8834540 0x8>;
+ interrupts = <8>;
+ interrupt-names = "macirq";
+ clocks = <&clk_eth>, <&clk_fclk_div2>, <&clk_mpll2>, <&clk_fclk_div2>;
+ clock-names = "stmmaceth", "clkin0", "clkin1", "timing-adjustment";
+ phy-mode = "rgmii";
};
diff --git a/Documentation/devicetree/bindings/net/asix,ax88178.yaml b/Documentation/devicetree/bindings/net/asix,ax88178.yaml
index 768504ccbf74..03341b7438d5 100644
--- a/Documentation/devicetree/bindings/net/asix,ax88178.yaml
+++ b/Documentation/devicetree/bindings/net/asix,ax88178.yaml
@@ -63,8 +63,8 @@ examples:
#size-cells = <0>;
ethernet@1 {
- compatible = "usbb95,772b";
- reg = <1>;
+ compatible = "usbb95,772b";
+ reg = <1>;
};
};
};
diff --git a/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml b/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml
index 7c90a4390531..0e3fb4e42e3f 100644
--- a/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml
+++ b/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml
@@ -85,16 +85,16 @@ examples:
#size-cells = <1>;
mdio0: mdio@e14 {
- compatible = "brcm,genet-mdio-v4";
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0xe14 0x8>;
+ compatible = "brcm,genet-mdio-v4";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xe14 0x8>;
- phy1: ethernet-phy@1 {
+ phy1: ethernet-phy@1 {
max-speed = <1000>;
reg = <1>;
compatible = "ethernet-phy-ieee802.3-c22";
- };
+ };
};
};
@@ -110,10 +110,10 @@ examples:
interrupts = <0x0 0x16 0x0>, <0x0 0x17 0x0>;
mdio1: mdio@e14 {
- compatible = "brcm,genet-mdio-v4";
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0xe14 0x8>;
+ compatible = "brcm,genet-mdio-v4";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xe14 0x8>;
};
};
@@ -129,15 +129,15 @@ examples:
interrupts = <0x0 0x18 0x0>, <0x0 0x19 0x0>;
mdio2: mdio@e14 {
- compatible = "brcm,genet-mdio-v4";
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0xe14 0x8>;
+ compatible = "brcm,genet-mdio-v4";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xe14 0x8>;
- phy0: ethernet-phy@0 {
+ phy0: ethernet-phy@0 {
max-speed = <1000>;
reg = <0>;
compatible = "ethernet-phy-ieee802.3-c22";
- };
+ };
};
};
diff --git a/Documentation/devicetree/bindings/net/brcm,mdio-mux-iproc.yaml b/Documentation/devicetree/bindings/net/brcm,mdio-mux-iproc.yaml
index af96b4fd89d5..3f27746d9a56 100644
--- a/Documentation/devicetree/bindings/net/brcm,mdio-mux-iproc.yaml
+++ b/Documentation/devicetree/bindings/net/brcm,mdio-mux-iproc.yaml
@@ -38,43 +38,43 @@ unevaluatedProperties: false
examples:
- |
- mdio_mux_iproc: mdio-mux@66020000 {
+ mdio-mux@66020000 {
compatible = "brcm,mdio-mux-iproc";
reg = <0x66020000 0x250>;
#address-cells = <1>;
#size-cells = <0>;
mdio@0 {
- reg = <0x0>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- pci_phy0: pci-phy@0 {
- compatible = "brcm,ns2-pcie-phy";
- reg = <0x0>;
- #phy-cells = <0>;
- };
+ reg = <0x0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pci-phy@0 {
+ compatible = "brcm,ns2-pcie-phy";
+ reg = <0x0>;
+ #phy-cells = <0>;
+ };
};
mdio@7 {
- reg = <0x7>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- pci_phy1: pci-phy@0 {
- compatible = "brcm,ns2-pcie-phy";
- reg = <0x0>;
- #phy-cells = <0>;
- };
+ reg = <0x7>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pci-phy@0 {
+ compatible = "brcm,ns2-pcie-phy";
+ reg = <0x0>;
+ #phy-cells = <0>;
+ };
};
mdio@10 {
- reg = <0x10>;
- #address-cells = <1>;
- #size-cells = <0>;
+ reg = <0x10>;
+ #address-cells = <1>;
+ #size-cells = <0>;
- gphy0: eth-phy@10 {
- reg = <0x10>;
- };
+ eth-phy@10 {
+ reg = <0x10>;
+ };
};
};
diff --git a/Documentation/devicetree/bindings/net/can/atmel,at91sam9263-can.yaml b/Documentation/devicetree/bindings/net/can/atmel,at91sam9263-can.yaml
new file mode 100644
index 000000000000..c818c01a718b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/atmel,at91sam9263-can.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/can/atmel,at91sam9263-can.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip AT91 CAN Controller
+
+maintainers:
+ - Nicolas Ferre <nicolas.ferre@microchip.com>
+
+allOf:
+ - $ref: can-controller.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - atmel,at91sam9263-can
+ - atmel,at91sam9x5-can
+ - items:
+ - enum:
+ - microchip,sam9x60-can
+ - const: atmel,at91sam9x5-can
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: can_clk
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/clock/at91.h>
+ can@f000c000 {
+ compatible = "atmel,at91sam9263-can";
+ reg = <0xf000c000 0x300>;
+ interrupts = <30 IRQ_TYPE_LEVEL_HIGH 3>;
+ clocks = <&pmc PMC_TYPE_PERIPHERAL 12>;
+ clock-names = "can_clk";
+ };
diff --git a/Documentation/devicetree/bindings/net/can/atmel-can.txt b/Documentation/devicetree/bindings/net/can/atmel-can.txt
deleted file mode 100644
index 218a3b3eb27e..000000000000
--- a/Documentation/devicetree/bindings/net/can/atmel-can.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-* AT91 CAN *
-
-Required properties:
- - compatible: Should be "atmel,at91sam9263-can", "atmel,at91sam9x5-can" or
- "microchip,sam9x60-can"
- - reg: Should contain CAN controller registers location and length
- - interrupts: Should contain IRQ line for the CAN controller
-
-Example:
-
- can0: can@f000c000 {
- compatible = "atmel,at91sam9x5-can";
- reg = <0xf000c000 0x300>;
- interrupts = <40 4 5>
- };
diff --git a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml
index 4d7d67ee175a..ff1b59a0294e 100644
--- a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml
+++ b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml
@@ -99,11 +99,11 @@ examples:
#include <dt-bindings/reset/altr,rst-mgr.h>
can@ffc00000 {
- compatible = "bosch,d_can";
- reg = <0xffc00000 0x1000>;
- interrupts = <0 131 4>, <0 132 4>, <0 133 4>, <0 134 4>;
- clocks = <&can0_clk>;
- resets = <&rst CAN0_RESET>;
+ compatible = "bosch,d_can";
+ reg = <0xffc00000 0x1000>;
+ interrupts = <0 131 4>, <0 132 4>, <0 133 4>, <0 134 4>;
+ clocks = <&can0_clk>;
+ resets = <&rst CAN0_RESET>;
};
- |
can@0 {
diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml b/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml
index db446dde6842..e0ec53bc10c6 100644
--- a/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml
+++ b/Documentation/devicetree/bindings/net/can/microchip,mcp2510.yaml
@@ -56,15 +56,15 @@ examples:
#size-cells = <0>;
can@1 {
- compatible = "microchip,mcp2515";
- reg = <1>;
- clocks = <&clk24m>;
- interrupt-parent = <&gpio4>;
- interrupts = <13 IRQ_TYPE_LEVEL_LOW>;
- vdd-supply = <&reg5v0>;
- xceiver-supply = <&reg5v0>;
- gpio-controller;
- #gpio-cells = <2>;
+ compatible = "microchip,mcp2515";
+ reg = <1>;
+ clocks = <&clk24m>;
+ interrupt-parent = <&gpio4>;
+ interrupts = <13 IRQ_TYPE_LEVEL_LOW>;
+ vdd-supply = <&reg5v0>;
+ xceiver-supply = <&reg5v0>;
+ gpio-controller;
+ #gpio-cells = <2>;
};
};
diff --git a/Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml b/Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml
index 01e4d4a54df6..1219c5cb601f 100644
--- a/Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml
+++ b/Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml
@@ -15,7 +15,11 @@ allOf:
properties:
compatible:
- const: microchip,mpfs-can
+ oneOf:
+ - items:
+ - const: microchip,pic64gx-can
+ - const: microchip,mpfs-can
+ - const: microchip,mpfs-can
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml b/Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml
index de1d4298893b..c7510b00954a 100644
--- a/Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml
+++ b/Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml
@@ -63,7 +63,7 @@ properties:
maxItems: 1
st,gcan:
- $ref: /schemas/types.yaml#/definitions/phandle-array
+ $ref: /schemas/types.yaml#/definitions/phandle
description:
The phandle to the gcan node which allows to access the 512-bytes
SRAM memory shared by the two bxCAN cells (CAN1 primary and CAN2
diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
deleted file mode 100644
index 20c0572c9853..000000000000
--- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-Texas Instruments TCAN4x5x CAN Controller
-================================================
-
-This file provides device node information for the TCAN4x5x interface contains.
-
-Required properties:
- - compatible:
- "ti,tcan4552", "ti,tcan4x5x"
- "ti,tcan4553", "ti,tcan4x5x" or
- "ti,tcan4x5x"
- - reg: 0
- - #address-cells: 1
- - #size-cells: 0
- - spi-max-frequency: Maximum frequency of the SPI bus the chip can
- operate at should be less than or equal to 18 MHz.
- - interrupt-parent: the phandle to the interrupt controller which provides
- the interrupt.
- - interrupts: interrupt specification for data-ready.
-
-See Documentation/devicetree/bindings/net/can/bosch,m_can.yaml for additional
-required property details.
-
-Optional properties:
- - reset-gpios: Hardwired output GPIO. If not defined then software
- reset.
- - device-state-gpios: Input GPIO that indicates if the device is in
- a sleep state or if the device is active. Not
- available with tcan4552/4553.
- - device-wake-gpios: Wake up GPIO to wake up the TCAN device. Not
- available with tcan4552/4553.
- - wakeup-source: Leave the chip running when suspended, and configure
- the RX interrupt to wake up the device.
-
-Example:
-tcan4x5x: tcan4x5x@0 {
- compatible = "ti,tcan4x5x";
- reg = <0>;
- #address-cells = <1>;
- #size-cells = <1>;
- spi-max-frequency = <10000000>;
- bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>;
- interrupt-parent = <&gpio1>;
- interrupts = <14 IRQ_TYPE_LEVEL_LOW>;
- device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
- device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>;
- reset-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>;
- wakeup-source;
-};
diff --git a/Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml b/Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml
new file mode 100644
index 000000000000..384e15da2713
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml
@@ -0,0 +1,199 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/can/ti,tcan4x5x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments TCAN4x5x CAN Controller
+
+maintainers:
+ - Marc Kleine-Budde <mkl@pengutronix.de>
+
+properties:
+ compatible:
+ oneOf:
+ - items:
+ - enum:
+ - ti,tcan4552
+ - ti,tcan4553
+ - const: ti,tcan4x5x
+ - const: ti,tcan4x5x
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+ description: The GPIO parent interrupt.
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: cclk
+
+ reset-gpios:
+ description: Hardwired output GPIO. If not defined then software reset.
+ maxItems: 1
+
+ device-state-gpios:
+ description:
+ Input GPIO that indicates if the device is in a sleep state or if the
+ device is active. Not available with tcan4552/4553.
+ maxItems: 1
+
+ device-wake-gpios:
+ description:
+ Wake up GPIO to wake up the TCAN device.
+ Not available with tcan4552/4553.
+ maxItems: 1
+
+ bosch,mram-cfg:
+ description: |
+ Message RAM configuration data.
+ Multiple M_CAN instances can share the same Message RAM
+ and each element(e.g Rx FIFO or Tx Buffer and etc) number
+ in Message RAM is also configurable, so this property is
+ telling driver how the shared or private Message RAM are
+ used by this M_CAN controller.
+
+ The format should be as follows:
+ <offset sidf_elems xidf_elems rxf0_elems rxf1_elems rxb_elems txe_elems txb_elems>
+ The 'offset' is an address offset of the Message RAM where
+ the following elements start from. This is usually set to
+ 0x0 if you're using a private Message RAM. The remain cells
+ are used to specify how many elements are used for each FIFO/Buffer.
+
+ M_CAN includes the following elements according to user manual:
+ 11-bit Filter 0-128 elements / 0-128 words
+ 29-bit Filter 0-64 elements / 0-128 words
+ Rx FIFO 0 0-64 elements / 0-1152 words
+ Rx FIFO 1 0-64 elements / 0-1152 words
+ Rx Buffers 0-64 elements / 0-1152 words
+ Tx Event FIFO 0-32 elements / 0-64 words
+ Tx Buffers 0-32 elements / 0-576 words
+
+ Please refer to 2.4.1 Message RAM Configuration in Bosch
+ M_CAN user manual for details.
+ $ref: /schemas/types.yaml#/definitions/int32-array
+ items:
+ - description: The 'offset' is an address offset of the Message RAM where
+ the following elements start from. This is usually set to 0x0 if
+ you're using a private Message RAM.
+ default: 0
+ - description: 11-bit Filter 0-128 elements / 0-128 words
+ minimum: 0
+ maximum: 128
+ - description: 29-bit Filter 0-64 elements / 0-128 words
+ minimum: 0
+ maximum: 64
+ - description: Rx FIFO 0 0-64 elements / 0-1152 words
+ minimum: 0
+ maximum: 64
+ - description: Rx FIFO 1 0-64 elements / 0-1152 words
+ minimum: 0
+ maximum: 64
+ - description: Rx Buffers 0-64 elements / 0-1152 words
+ minimum: 0
+ maximum: 64
+ - description: Tx Event FIFO 0-32 elements / 0-64 words
+ minimum: 0
+ maximum: 32
+ - description: Tx Buffers 0-32 elements / 0-576 words
+ minimum: 0
+ maximum: 32
+ minItems: 1
+
+ spi-max-frequency:
+ description:
+ Must be half or less of "clocks" frequency.
+ maximum: 18000000
+
+ ti,nwkrq-voltage-vio:
+ type: boolean
+ description:
+ nWKRQ Pin GPO buffer voltage configuration.
+ Set nWKRQ to use VIO voltage rail.
+ When not set nWKRQ will use internal voltage rail.
+
+ wakeup-source:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Enable CAN remote wakeup.
+
+allOf:
+ - $ref: can-controller.yaml#
+ - $ref: /schemas/spi/spi-peripheral-props.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - ti,tcan4552
+ - ti,tcan4553
+ then:
+ properties:
+ device-state-gpios: false
+ device-wake-gpios: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+ - bosch,mram-cfg
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ can@0 {
+ compatible = "ti,tcan4x5x";
+ reg = <0>;
+ clocks = <&can0_osc>;
+ clock-names = "cclk";
+ pinctrl-names = "default";
+ pinctrl-0 = <&can0_pins>;
+ spi-max-frequency = <10000000>;
+ bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>;
+ interrupt-parent = <&gpio1>;
+ interrupts = <14 IRQ_TYPE_LEVEL_LOW>;
+ device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
+ device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>;
+ reset-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>;
+ ti,nwkrq-voltage-vio;
+ wakeup-source;
+ };
+ };
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ can@0 {
+ compatible = "ti,tcan4552", "ti,tcan4x5x";
+ reg = <0>;
+ clocks = <&can0_osc>;
+ clock-names = "cclk";
+ pinctrl-names = "default";
+ pinctrl-0 = <&can0_pins>;
+ spi-max-frequency = <10000000>;
+ bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>;
+ interrupt-parent = <&gpio1>;
+ interrupts = <14 IRQ_TYPE_LEVEL_LOW>;
+ reset-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>;
+ wakeup-source;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml b/Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml
index a12cda8aa764..cd09560e0aea 100644
--- a/Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml
+++ b/Documentation/devicetree/bindings/net/pse-pd/pse-controller.yaml
@@ -81,7 +81,7 @@ properties:
List of phandles, each pointing to the power supply for the
corresponding pairset named in 'pairset-names'. This property
aligns with IEEE 802.3-2022, Section 33.2.3 and 145.2.4.
- PSE Pinout Alternatives (as per IEEE 802.3-2022 Table 145\u20133)
+ PSE Pinout Alternatives (as per IEEE 802.3-2022 Table 145-3)
|-----------|---------------|---------------|---------------|---------------|
| Conductor | Alternative A | Alternative A | Alternative B | Alternative B |
| | (MDI-X) | (MDI) | (X) | (S) |
diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index 53cae71d9957..1a46d80a66e8 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -239,7 +239,7 @@ examples:
qcom,gsi-loader = "self";
memory-region = <&ipa_fw_mem>;
- firmware-name = "qcom/sc7180-trogdor/modem/modem.mdt";
+ firmware-name = "qcom/sc7180-trogdor/modem/modem.mbn";
iommus = <&apps_smmu 0x440 0x0>,
<&apps_smmu 0x442 0x0>;
diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
index bf23838fe6e8..85cea9966a27 100644
--- a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
@@ -154,56 +154,56 @@ examples:
#include <dt-bindings/interrupt-controller/arm-gic.h>
#include <dt-bindings/clock/stm32mp1-clks.h>
//Example 1
- ethernet0: ethernet@5800a000 {
- compatible = "st,stm32mp1-dwmac", "snps,dwmac-4.20a";
- reg = <0x5800a000 0x2000>;
- reg-names = "stmmaceth";
- interrupts = <GIC_SPI 61 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "macirq";
- clock-names = "stmmaceth",
- "mac-clk-tx",
- "mac-clk-rx",
- "ethstp",
- "eth-ck";
- clocks = <&rcc ETHMAC>,
- <&rcc ETHTX>,
- <&rcc ETHRX>,
- <&rcc ETHSTP>,
- <&rcc ETHCK_K>;
- st,syscon = <&syscfg 0x4>;
- snps,pbl = <2>;
- snps,axi-config = <&stmmac_axi_config_0>;
- snps,tso;
- phy-mode = "rgmii";
- };
+ ethernet0: ethernet@5800a000 {
+ compatible = "st,stm32mp1-dwmac", "snps,dwmac-4.20a";
+ reg = <0x5800a000 0x2000>;
+ reg-names = "stmmaceth";
+ interrupts = <GIC_SPI 61 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq";
+ clock-names = "stmmaceth",
+ "mac-clk-tx",
+ "mac-clk-rx",
+ "ethstp",
+ "eth-ck";
+ clocks = <&rcc ETHMAC>,
+ <&rcc ETHTX>,
+ <&rcc ETHRX>,
+ <&rcc ETHSTP>,
+ <&rcc ETHCK_K>;
+ st,syscon = <&syscfg 0x4>;
+ snps,pbl = <2>;
+ snps,axi-config = <&stmmac_axi_config_0>;
+ snps,tso;
+ phy-mode = "rgmii";
+ };
- |
//Example 2 (MCU example)
- ethernet1: ethernet@40028000 {
- compatible = "st,stm32-dwmac", "snps,dwmac-3.50a";
- reg = <0x40028000 0x8000>;
- reg-names = "stmmaceth";
- interrupts = <0 61 0>, <0 62 0>;
- interrupt-names = "macirq", "eth_wake_irq";
- clock-names = "stmmaceth", "mac-clk-tx", "mac-clk-rx";
- clocks = <&rcc 0 25>, <&rcc 0 26>, <&rcc 0 27>;
- st,syscon = <&syscfg 0x4>;
- snps,pbl = <8>;
- snps,mixed-burst;
- phy-mode = "mii";
- };
+ ethernet1: ethernet@40028000 {
+ compatible = "st,stm32-dwmac", "snps,dwmac-3.50a";
+ reg = <0x40028000 0x8000>;
+ reg-names = "stmmaceth";
+ interrupts = <0 61 0>, <0 62 0>;
+ interrupt-names = "macirq", "eth_wake_irq";
+ clock-names = "stmmaceth", "mac-clk-tx", "mac-clk-rx";
+ clocks = <&rcc 0 25>, <&rcc 0 26>, <&rcc 0 27>;
+ st,syscon = <&syscfg 0x4>;
+ snps,pbl = <8>;
+ snps,mixed-burst;
+ phy-mode = "mii";
+ };
- |
//Example 3
- ethernet2: ethernet@40027000 {
- compatible = "st,stm32-dwmac", "snps,dwmac-4.10a";
- reg = <0x40028000 0x8000>;
- reg-names = "stmmaceth";
- interrupts = <61>;
- interrupt-names = "macirq";
- clock-names = "stmmaceth", "mac-clk-tx", "mac-clk-rx";
- clocks = <&rcc 62>, <&rcc 61>, <&rcc 60>;
- st,syscon = <&syscfg 0x4>;
- snps,pbl = <8>;
- phy-mode = "mii";
- };
+ ethernet2: ethernet@40027000 {
+ compatible = "st,stm32-dwmac", "snps,dwmac-4.10a";
+ reg = <0x40028000 0x8000>;
+ reg-names = "stmmaceth";
+ interrupts = <61>;
+ interrupt-names = "macirq";
+ clock-names = "stmmaceth", "mac-clk-tx", "mac-clk-rx";
+ clocks = <&rcc 62>, <&rcc 61>, <&rcc 60>;
+ st,syscon = <&syscfg 0x4>;
+ snps,pbl = <8>;
+ phy-mode = "mii";
+ };
diff --git a/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml b/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml
index 53604fab0b73..08119b6880ee 100644
--- a/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml
@@ -72,9 +72,9 @@ unevaluatedProperties: false
examples:
- |
davinci_mdio: mdio@4a101000 {
- compatible = "ti,davinci_mdio";
- #address-cells = <1>;
- #size-cells = <0>;
- reg = <0x4a101000 0x1000>;
- bus_freq = <1000000>;
+ compatible = "ti,davinci_mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x4a101000 0x1000>;
+ bus_freq = <1000000>;
};
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index 02b6d32003cc..b11894fbaec4 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -302,16 +302,16 @@ examples:
ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
};
};
- };
- cpts@3d000 {
- compatible = "ti,am65-cpts";
- reg = <0x0 0x3d000 0x0 0x400>;
- clocks = <&k3_clks 18 2>;
- clock-names = "cpts";
- interrupts-extended = <&gic500 GIC_SPI 858 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "cpts";
- ti,cpts-ext-ts-inputs = <4>;
- ti,cpts-periodic-outputs = <2>;
+ cpts@3d000 {
+ compatible = "ti,am65-cpts";
+ reg = <0x0 0x3d000 0x0 0x400>;
+ clocks = <&k3_clks 18 2>;
+ clock-names = "cpts";
+ interrupts-extended = <&gic500 GIC_SPI 858 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "cpts";
+ ti,cpts-ext-ts-inputs = <4>;
+ ti,cpts-periodic-outputs = <2>;
+ };
};
};
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
index 3888692275ad..3572749147fb 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
@@ -131,23 +131,23 @@ examples:
#include <dt-bindings/interrupt-controller/arm-gic.h>
cpts@310d0000 {
- compatible = "ti,am65-cpts";
- reg = <0x310d0000 0x400>;
- reg-names = "cpts";
- clocks = <&main_cpts_mux>;
- clock-names = "cpts";
- interrupts-extended = <&k3_irq 163 0 IRQ_TYPE_LEVEL_HIGH>;
- interrupt-names = "cpts";
- ti,cpts-periodic-outputs = <6>;
- ti,cpts-ext-ts-inputs = <8>;
-
- main_cpts_mux: refclk-mux {
- #clock-cells = <0>;
- clocks = <&k3_clks 118 5>, <&k3_clks 118 11>,
- <&k3_clks 157 91>, <&k3_clks 157 77>,
- <&k3_clks 157 102>, <&k3_clks 157 80>,
- <&k3_clks 120 3>, <&k3_clks 121 3>;
- assigned-clocks = <&main_cpts_mux>;
- assigned-clock-parents = <&k3_clks 118 11>;
- };
+ compatible = "ti,am65-cpts";
+ reg = <0x310d0000 0x400>;
+ reg-names = "cpts";
+ clocks = <&main_cpts_mux>;
+ clock-names = "cpts";
+ interrupts-extended = <&k3_irq 163 0 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "cpts";
+ ti,cpts-periodic-outputs = <6>;
+ ti,cpts-ext-ts-inputs = <8>;
+
+ main_cpts_mux: refclk-mux {
+ #clock-cells = <0>;
+ clocks = <&k3_clks 118 5>, <&k3_clks 118 11>,
+ <&k3_clks 157 91>, <&k3_clks 157 77>,
+ <&k3_clks 157 102>, <&k3_clks 157 80>,
+ <&k3_clks 120 3>, <&k3_clks 121 3>;
+ assigned-clocks = <&main_cpts_mux>;
+ assigned-clock-parents = <&k3_clks 118 11>;
+ };
};
diff --git a/Documentation/devicetree/bindings/net/wireless/marvell,sd8787.yaml b/Documentation/devicetree/bindings/net/wireless/marvell,sd8787.yaml
index 1715b22e0dcf..930b700b73d0 100644
--- a/Documentation/devicetree/bindings/net/wireless/marvell,sd8787.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/marvell,sd8787.yaml
@@ -79,15 +79,14 @@ examples:
#include <dt-bindings/interrupt-controller/irq.h>
mmc {
- #address-cells = <1>;
- #size-cells = <0>;
-
- wifi@1 {
- compatible = "marvell,sd8897";
- reg = <1>;
- interrupt-parent = <&pio>;
- interrupts = <38 IRQ_TYPE_LEVEL_LOW>;
- marvell,wakeup-pin = <3>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ wifi@1 {
+ compatible = "marvell,sd8897";
+ reg = <1>;
+ interrupt-parent = <&pio>;
+ interrupts = <38 IRQ_TYPE_LEVEL_LOW>;
+ marvell,wakeup-pin = <3>;
};
};
-
diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml
index 96465376d6fe..0d492500c7e5 100644
--- a/Documentation/netlink/specs/rt_link.yaml
+++ b/Documentation/netlink/specs/rt_link.yaml
@@ -1826,6 +1826,48 @@ attribute-sets:
name: erspan-hwid
type: u16
-
+ name: linkinfo-vti-attrs
+ name-prefix: ifla-vti-
+ attributes:
+ -
+ name: link
+ type: u32
+ -
+ name: ikey
+ type: u32
+ -
+ name: okey
+ type: u32
+ -
+ name: local
+ type: binary
+ display-hint: ipv4
+ -
+ name: remote
+ type: binary
+ display-hint: ipv4
+ -
+ name: fwmark
+ type: u32
+ -
+ name: linkinfo-vti6-attrs
+ subset-of: linkinfo-vti-attrs
+ attributes:
+ -
+ name: link
+ -
+ name: ikey
+ -
+ name: okey
+ -
+ name: local
+ display-hint: ipv6
+ -
+ name: remote
+ display-hint: ipv6
+ -
+ name: fwmark
+ -
name: linkinfo-geneve-attrs
name-prefix: ifla-geneve-
attributes:
@@ -1942,6 +1984,42 @@ attribute-sets:
name: fwmark
type: u32
-
+ name: linkinfo-ip6tnl-attrs
+ subset-of: linkinfo-iptun-attrs
+ attributes:
+ -
+ name: link
+ -
+ name: local
+ display-hint: ipv6
+ -
+ name: remote
+ display-hint: ipv6
+ -
+ name: ttl
+ -
+ name: encap-limit
+ -
+ name: flowinfo
+ -
+ name: flags
+ # ip6tnl unlike ipip and sit has 32b flags
+ type: u32
+ -
+ name: proto
+ -
+ name: encap-type
+ -
+ name: encap-flags
+ -
+ name: encap-sport
+ -
+ name: encap-dport
+ -
+ name: collect-metadata
+ -
+ name: fwmark
+ -
name: linkinfo-tun-attrs
name-prefix: ifla-tun-
attributes:
@@ -2169,6 +2247,12 @@ attribute-sets:
name: peer-scrub
type: u32
enum: netkit-scrub
+ -
+ name: headroom
+ type: u16
+ -
+ name: tailroom
+ type: u16
sub-messages:
-
@@ -2196,6 +2280,9 @@ sub-messages:
value: ipip
attribute-set: linkinfo-iptun-attrs
-
+ value: ip6tnl
+ attribute-set: linkinfo-ip6tnl-attrs
+ -
value: sit
attribute-set: linkinfo-iptun-attrs
-
@@ -2208,6 +2295,12 @@ sub-messages:
value: vrf
attribute-set: linkinfo-vrf-attrs
-
+ value: vti
+ attribute-set: linkinfo-vti-attrs
+ -
+ value: vti6
+ attribute-set: linkinfo-vti6-attrs
+ -
value: netkit
attribute-set: linkinfo-netkit-attrs
-
diff --git a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
index 4fbaa1a2d674..53d9d5829d69 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
@@ -299,6 +299,18 @@ Use ethtool to view and set link-down-on-close, as follows::
ethtool --show-priv-flags ethX
ethtool --set-priv-flags ethX link-down-on-close [on|off]
+Setting the mdd-auto-reset-vf Private Flag
+------------------------------------------
+
+When the mdd-auto-reset-vf private flag is set to "on", the problematic VF will
+be automatically reset if a malformed descriptor is detected. If the flag is
+set to "off", the problematic VF will be disabled.
+
+Use ethtool to view and set mdd-auto-reset-vf, as follows::
+
+ ethtool --show-priv-flags ethX
+ ethtool --set-priv-flags ethX mdd-auto-reset-vf [on|off]
+
Viewing Link Messages
---------------------
Link messages will not be displayed to the console if the distribution is
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 456985407475..41618538fc70 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -53,6 +53,9 @@ parameters.
* ``smfs`` Software managed flow steering. In SMFS mode, the HW
steering entities are created and manage through the driver without
firmware intervention.
+ * ``hmfs`` Hardware managed flow steering. In HMFS mode, the driver
+ is configuring steering rules directly to the HW using Work Queues with
+ a special new type of WQE (Work Queue Element).
SMFS mode is faster and provides better rule insertion rate compared to
default DMFS mode.
diff --git a/Documentation/networking/diagnostic/twisted_pair_layer1_diagnostics.rst b/Documentation/networking/diagnostic/twisted_pair_layer1_diagnostics.rst
index c9be5cc7e113..079e17effadf 100644
--- a/Documentation/networking/diagnostic/twisted_pair_layer1_diagnostics.rst
+++ b/Documentation/networking/diagnostic/twisted_pair_layer1_diagnostics.rst
@@ -713,17 +713,23 @@ driver supports reporting such events.
- **Monitor Error Counters**:
- - While some NIC drivers and PHYs provide error counters, there is no unified
- set of PHY-specific counters across all hardware. Additionally, not all
- PHYs provide useful information related to errors like CRC errors, frame
- drops, or link flaps. Therefore, this step is dependent on the specific
- hardware and driver support.
-
- - **Next Steps**: Use `ethtool -S <interface>` to check if your driver
- provides useful error counters. In some cases, counters may provide
- information about errors like link flaps or physical layer problems (e.g.,
- excessive CRC errors), but results can vary significantly depending on the
- PHY.
+ - Use `ethtool -S <interface> --all-groups` to retrieve standardized interface
+ statistics if the driver supports the unified interface:
+
+ - **Command:** `ethtool -S <interface> --all-groups`
+
+ - **Example Output (if supported)**:
+
+ .. code-block:: bash
+
+ phydev-RxFrames: 100391
+ phydev-RxErrors: 0
+ phydev-TxFrames: 9
+ phydev-TxErrors: 0
+
+ - If the unified interface is not supported, use `ethtool -S <interface>` to
+ retrieve MAC and PHY counters. Note that non-standardized PHY counter names
+ vary by driver and must be interpreted accordingly:
- **Command:** `ethtool -S <interface>`
@@ -740,6 +746,17 @@ driver supports reporting such events.
condition) or kernel log messages (e.g., link up/down events) to further
diagnose the issue.
+ - **Compare Counters**:
+
+ - Compare the egress and ingress frame counts reported by the PHY and MAC.
+
+ - A small difference may occur due to sampling rate differences between the
+ MAC and PHY drivers, or if the PHY and MAC are not always fully
+ synchronized in their UP or DOWN states.
+
+ - Significant discrepancies indicate potential issues in the data path
+ between the MAC and PHY.
+
When All Else Fails...
~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index a7ba6368a4d5..da846f1d998e 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1616,6 +1616,7 @@ the ``ETHTOOL_A_STATS_GROUPS`` bitset. Currently defined values are:
ETHTOOL_STATS_ETH_PHY eth-phy Basic IEEE 802.3 PHY statistics (30.3.2.1.*)
ETHTOOL_STATS_ETH_CTRL eth-ctrl Basic IEEE 802.3 MAC Ctrl statistics (30.3.3.*)
ETHTOOL_STATS_RMON rmon RMON (RFC 2819) statistics
+ ETHTOOL_STATS_PHY phy Additional PHY statistics, not defined by IEEE
====================== ======== ===============================================
Each group should have a corresponding ``ETHTOOL_A_STATS_GRP`` in the reply.
diff --git a/Documentation/networking/multi-pf-netdev.rst b/Documentation/networking/multi-pf-netdev.rst
index 2cd25d81aaa7..2f5a5bb3ca9a 100644
--- a/Documentation/networking/multi-pf-netdev.rst
+++ b/Documentation/networking/multi-pf-netdev.rst
@@ -89,7 +89,7 @@ Observability
=============
The relation between PF, irq, napi, and queue can be observed via netlink spec::
- $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump queue-get --json='{"ifindex": 13}'
+ $ ./tools/net/ynl/pyynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump queue-get --json='{"ifindex": 13}'
[{'id': 0, 'ifindex': 13, 'napi-id': 539, 'type': 'rx'},
{'id': 1, 'ifindex': 13, 'napi-id': 540, 'type': 'rx'},
{'id': 2, 'ifindex': 13, 'napi-id': 541, 'type': 'rx'},
@@ -101,7 +101,7 @@ The relation between PF, irq, napi, and queue can be observed via netlink spec::
{'id': 3, 'ifindex': 13, 'napi-id': 542, 'type': 'tx'},
{'id': 4, 'ifindex': 13, 'napi-id': 543, 'type': 'tx'}]
- $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump napi-get --json='{"ifindex": 13}'
+ $ ./tools/net/ynl/pyynl/cli.py --spec Documentation/netlink/specs/netdev.yaml --dump napi-get --json='{"ifindex": 13}'
[{'id': 543, 'ifindex': 13, 'irq': 42},
{'id': 542, 'ifindex': 13, 'irq': 41},
{'id': 541, 'ifindex': 13, 'irq': 40},
diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst
index 02720dd71a76..6083210ab2a4 100644
--- a/Documentation/networking/napi.rst
+++ b/Documentation/networking/napi.rst
@@ -199,13 +199,13 @@ parameters mentioned above use hyphens instead of underscores:
Per-NAPI configuration can be done programmatically in a user application
or by using a script included in the kernel source tree:
-``tools/net/ynl/cli.py``.
+``tools/net/ynl/pyynl/cli.py``.
For example, using the script:
.. code-block:: bash
- $ kernel-source/tools/net/ynl/cli.py \
+ $ kernel-source/tools/net/ynl/pyynl/cli.py \
--spec Documentation/netlink/specs/netdev.yaml \
--do napi-set \
--json='{"id": 345,
diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst
index 857c9784f87e..1d37038e9fbe 100644
--- a/Documentation/networking/netdevices.rst
+++ b/Documentation/networking/netdevices.rst
@@ -297,3 +297,13 @@ napi->poll:
Context:
softirq
will be called with interrupts disabled by netconsole.
+
+NETDEV_INTERNAL symbol namespace
+================================
+
+Symbols exported as NETDEV_INTERNAL can only be used in networking
+core and drivers which exclusively flow via the main networking list and trees.
+Note that the inverse is not true, most symbols outside of NETDEV_INTERNAL
+are not expected to be used by random code outside netdev either.
+Symbols may lack the designation because they predate the namespaces,
+or simply due to an oversight.
diff --git a/Documentation/networking/netlink_spec/readme.txt b/Documentation/networking/netlink_spec/readme.txt
index 6763f99d216c..030b44aca4e6 100644
--- a/Documentation/networking/netlink_spec/readme.txt
+++ b/Documentation/networking/netlink_spec/readme.txt
@@ -1,4 +1,4 @@
SPDX-License-Identifier: GPL-2.0
This file is populated during the build of the documentation (htmldocs) by the
-tools/net/ynl/ynl-gen-rst.py script.
+tools/net/ynl/pyynl/ynl_gen_rst.py script.
diff --git a/Documentation/networking/xfrm_device.rst b/Documentation/networking/xfrm_device.rst
index bfea9d8579ed..66f6e9a9b59a 100644
--- a/Documentation/networking/xfrm_device.rst
+++ b/Documentation/networking/xfrm_device.rst
@@ -169,7 +169,8 @@ the stack in xfrm_input().
hand the packet to napi_gro_receive() as usual
-In ESN mode, xdo_dev_state_advance_esn() is called from xfrm_replay_advance_esn().
+In ESN mode, xdo_dev_state_advance_esn() is called from
+xfrm_replay_advance_esn() for RX, and xfrm_replay_overflow_offload_esn for TX.
Driver will check packet seq number and update HW ESN state machine if needed.
Packet offload mode:
diff --git a/Documentation/userspace-api/netlink/intro-specs.rst b/Documentation/userspace-api/netlink/intro-specs.rst
index bada89699455..a4435ae4628d 100644
--- a/Documentation/userspace-api/netlink/intro-specs.rst
+++ b/Documentation/userspace-api/netlink/intro-specs.rst
@@ -15,7 +15,7 @@ developing Netlink related code. The tool is implemented in Python
and can use a YAML specification to issue Netlink requests
to the kernel. Only Generic Netlink is supported.
-The tool is located at ``tools/net/ynl/cli.py``. It accepts
+The tool is located at ``tools/net/ynl/pyynl/cli.py``. It accepts
a handul of arguments, the most important ones are:
- ``--spec`` - point to the spec file
@@ -27,7 +27,7 @@ YAML specs can be found under ``Documentation/netlink/specs/``.
Example use::
- $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/ethtool.yaml \
+ $ ./tools/net/ynl/pyynl/cli.py --spec Documentation/netlink/specs/ethtool.yaml \
--do rings-get \
--json '{"header":{"dev-index": 18}}'
{'header': {'dev-index': 18, 'dev-name': 'eni1np1'},
@@ -75,7 +75,7 @@ the two marker lines like above to a file, add that file to git,
and run the regeneration tool. Grep the tree for ``YNL-GEN``
to see other examples.
-The code generation itself is performed by ``tools/net/ynl/ynl-gen-c.py``
+The code generation itself is performed by ``tools/net/ynl/pyynl/ynl_gen_c.py``
but it takes a few arguments so calling it directly for each file
quickly becomes tedious.
@@ -84,7 +84,7 @@ YNL lib
``tools/net/ynl/lib/`` contains an implementation of a C library
(based on libmnl) which integrates with code generated by
-``tools/net/ynl/ynl-gen-c.py`` to create easy to use netlink wrappers.
+``tools/net/ynl/pyynl/ynl_gen_c.py`` to create easy to use netlink wrappers.
YNL basics
----------
diff --git a/MAINTAINERS b/MAINTAINERS
index a685c551faf0..797e94ba74f7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -949,7 +949,6 @@ AMAZON ETHERNET DRIVERS
M: Shay Agroskin <shayagr@amazon.com>
M: Arthur Kiyanovski <akiyano@amazon.com>
R: David Arinzon <darinzon@amazon.com>
-R: Noam Dagan <ndagan@amazon.com>
R: Saeed Bishara <saeedb@amazon.com>
L: netdev@vger.kernel.org
S: Supported
@@ -2690,7 +2689,6 @@ N: at91
N: atmel
ARM/Microchip Sparx5 SoC support
-M: Lars Povlsen <lars.povlsen@microchip.com>
M: Steen Hegelund <Steen.Hegelund@microchip.com>
M: Daniel Machon <daniel.machon@microchip.com>
M: UNGLinuxDriver@microchip.com
@@ -4065,7 +4063,6 @@ F: net/bluetooth/
BONDING DRIVER
M: Jay Vosburgh <jv@jvosburgh.net>
-M: Andy Gospodarek <andy@greyhouse.net>
L: netdev@vger.kernel.org
S: Maintained
F: Documentation/networking/bonding.rst
@@ -5118,6 +5115,7 @@ F: include/uapi/linux/can/gw.h
F: include/uapi/linux/can/isotp.h
F: include/uapi/linux/can/raw.h
F: net/can/
+F: net/sched/em_canid.c
CAN-J1939 NETWORK LAYER
M: Robin van der Gracht <robin@protonic.nl>
@@ -14574,7 +14572,6 @@ F: drivers/dma/mediatek/
MEDIATEK ETHERNET DRIVER
M: Felix Fietkau <nbd@nbd.name>
M: Sean Wang <sean.wang@mediatek.com>
-M: Mark Lee <Mark-MC.Lee@mediatek.com>
M: Lorenzo Bianconi <lorenzo@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
@@ -14764,7 +14761,7 @@ F: drivers/memory/mtk-smi.c
F: include/soc/mediatek/smi.h
MEDIATEK SWITCH DRIVER
-M: Arınç ÜNAL <arinc.unal@arinc9.com>
+M: Chester A. Unal <chester.a.unal@arinc9.com>
M: Daniel Golle <daniel@makrotopia.org>
M: DENG Qingfang <dqfext@gmail.com>
M: Sean Wang <sean.wang@mediatek.com>
@@ -16174,7 +16171,8 @@ M: Breno Leitao <leitao@debian.org>
S: Maintained
F: Documentation/networking/netconsole.rst
F: drivers/net/netconsole.c
-F: tools/testing/selftests/drivers/net/netcons_basic.sh
+F: tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
+F: tools/testing/selftests/drivers/net/netcons\*
NETDEVSIM
M: Jakub Kicinski <kuba@kernel.org>
@@ -18469,7 +18467,7 @@ F: Documentation/devicetree/bindings/pinctrl/mediatek,mt8183-pinctrl.yaml
F: drivers/pinctrl/mediatek/
PIN CONTROLLER - MEDIATEK MIPS
-M: Arınç ÜNAL <arinc.unal@arinc9.com>
+M: Chester A. Unal <chester.a.unal@arinc9.com>
M: Sergio Paracuellos <sergio.paracuellos@gmail.com>
L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
L: linux-mips@vger.kernel.org
@@ -19513,7 +19511,7 @@ S: Maintained
F: arch/mips/ralink
RALINK MT7621 MIPS ARCHITECTURE
-M: Arınç ÜNAL <arinc.unal@arinc9.com>
+M: Chester A. Unal <chester.a.unal@arinc9.com>
M: Sergio Paracuellos <sergio.paracuellos@gmail.com>
L: linux-mips@vger.kernel.org
S: Maintained
@@ -20916,6 +20914,8 @@ F: kernel/sched/
SCHEDULER - SCHED_EXT
R: Tejun Heo <tj@kernel.org>
R: David Vernet <void@manifault.com>
+R: Andrea Righi <arighi@nvidia.com>
+R: Changwoo Min <changwoo@igalia.com>
L: linux-kernel@vger.kernel.org
S: Maintained
W: https://github.com/sched-ext/scx
@@ -22510,11 +22510,8 @@ F: Documentation/devicetree/bindings/phy/st,stm32mp25-combophy.yaml
F: drivers/phy/st/phy-stm32-combophy.c
STMMAC ETHERNET DRIVER
-M: Alexandre Torgue <alexandre.torgue@foss.st.com>
-M: Jose Abreu <joabreu@synopsys.com>
L: netdev@vger.kernel.org
-S: Supported
-W: http://www.stlinux.com
+S: Orphan
F: Documentation/networking/device_drivers/ethernet/stmicro/
F: drivers/net/ethernet/stmicro/stmmac/
@@ -22746,9 +22743,8 @@ S: Supported
F: drivers/net/ethernet/synopsys/
SYNOPSYS DESIGNWARE ETHERNET XPCS DRIVER
-M: Jose Abreu <Jose.Abreu@synopsys.com>
L: netdev@vger.kernel.org
-S: Supported
+S: Orphan
F: drivers/net/pcs/pcs-xpcs.c
F: drivers/net/pcs/pcs-xpcs.h
F: include/linux/pcs/pcs-xpcs.h
@@ -23656,7 +23652,6 @@ F: tools/testing/selftests/timers/
TIPC NETWORK LAYER
M: Jon Maloy <jmaloy@redhat.com>
-M: Ying Xue <ying.xue@windriver.com>
L: netdev@vger.kernel.org (core kernel code)
L: tipc-discussion@lists.sourceforge.net (user apps, general discussion)
S: Maintained
@@ -24262,7 +24257,8 @@ F: Documentation/devicetree/bindings/usb/nxp,isp1760.yaml
F: drivers/usb/isp1760/*
USB LAN78XX ETHERNET DRIVER
-M: Woojung Huh <woojung.huh@microchip.com>
+M: Thangaraj Samynathan <Thangaraj.S@microchip.com>
+M: Rengarajan Sundararajan <Rengarajan.S@microchip.com>
M: UNGLinuxDriver@microchip.com
L: netdev@vger.kernel.org
S: Maintained
diff --git a/Makefile b/Makefile
index aae2d92f7827..b11454a2233a 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 13
SUBLEVEL = 0
-EXTRAVERSION = -rc5
+EXTRAVERSION = -rc6
NAME = Baby Opossum Posse
# *DOCUMENTATION*
diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c
index 7fd9d5ddce02..224eafc27dbe 100644
--- a/drivers/bluetooth/btmtk.c
+++ b/drivers/bluetooth/btmtk.c
@@ -1472,10 +1472,15 @@ EXPORT_SYMBOL_GPL(btmtk_usb_setup);
int btmtk_usb_shutdown(struct hci_dev *hdev)
{
+ struct btmtk_data *data = hci_get_priv(hdev);
struct btmtk_hci_wmt_params wmt_params;
u8 param = 0;
int err;
+ err = usb_autopm_get_interface(data->intf);
+ if (err < 0)
+ return err;
+
/* Disable the device */
wmt_params.op = BTMTK_WMT_FUNC_CTRL;
wmt_params.flag = 0;
@@ -1486,9 +1491,11 @@ int btmtk_usb_shutdown(struct hci_dev *hdev)
err = btmtk_usb_hci_wmt_sync(hdev, &wmt_params);
if (err < 0) {
bt_dev_err(hdev, "Failed to send wmt func ctrl (%d)", err);
+ usb_autopm_put_interface(data->intf);
return err;
}
+ usb_autopm_put_interface(data->intf);
return 0;
}
EXPORT_SYMBOL_GPL(btmtk_usb_shutdown);
diff --git a/drivers/bluetooth/btnxpuart.c b/drivers/bluetooth/btnxpuart.c
index 569f5b7d6e46..1230045d78a5 100644
--- a/drivers/bluetooth/btnxpuart.c
+++ b/drivers/bluetooth/btnxpuart.c
@@ -1381,6 +1381,7 @@ static void btnxpuart_tx_work(struct work_struct *work)
while ((skb = nxp_dequeue(nxpdev))) {
len = serdev_device_write_buf(serdev, skb->data, skb->len);
+ serdev_device_wait_until_sent(serdev, 0);
hdev->stat.byte_tx += len;
skb_pull(skb, len);
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index 6a99a459b80b..51745ed1bbab 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -1106,7 +1106,7 @@ int open_for_data(struct cdrom_device_info *cdi)
}
}
- cd_dbg(CD_OPEN, "all seems well, opening the devicen");
+ cd_dbg(CD_OPEN, "all seems well, opening the device\n");
/* all seems well, we can open the device */
ret = cdo->open(cdi, 0); /* open for data */
diff --git a/drivers/clk/imx/clk-imx8mp-audiomix.c b/drivers/clk/imx/clk-imx8mp-audiomix.c
index b2cb157703c5..c409fc7e0618 100644
--- a/drivers/clk/imx/clk-imx8mp-audiomix.c
+++ b/drivers/clk/imx/clk-imx8mp-audiomix.c
@@ -278,7 +278,8 @@ static int clk_imx8mp_audiomix_reset_controller_register(struct device *dev,
#else /* !CONFIG_RESET_CONTROLLER */
-static int clk_imx8mp_audiomix_reset_controller_register(struct clk_imx8mp_audiomix_priv *priv)
+static int clk_imx8mp_audiomix_reset_controller_register(struct device *dev,
+ struct clk_imx8mp_audiomix_priv *priv)
{
return 0;
}
diff --git a/drivers/clk/thead/clk-th1520-ap.c b/drivers/clk/thead/clk-th1520-ap.c
index 17e32ae08720..1015fab95251 100644
--- a/drivers/clk/thead/clk-th1520-ap.c
+++ b/drivers/clk/thead/clk-th1520-ap.c
@@ -779,6 +779,13 @@ static struct ccu_div dpu1_clk = {
},
};
+static CLK_FIXED_FACTOR_HW(emmc_sdio_ref_clk, "emmc-sdio-ref",
+ &video_pll_clk.common.hw, 4, 1, 0);
+
+static const struct clk_parent_data emmc_sdio_ref_clk_pd[] = {
+ { .hw = &emmc_sdio_ref_clk.hw },
+};
+
static CCU_GATE(CLK_BROM, brom_clk, "brom", ahb2_cpusys_hclk_pd, 0x100, BIT(4), 0);
static CCU_GATE(CLK_BMU, bmu_clk, "bmu", axi4_cpusys2_aclk_pd, 0x100, BIT(5), 0);
static CCU_GATE(CLK_AON2CPU_A2X, aon2cpu_a2x_clk, "aon2cpu-a2x", axi4_cpusys2_aclk_pd,
@@ -798,7 +805,7 @@ static CCU_GATE(CLK_PERISYS_APB4_HCLK, perisys_apb4_hclk, "perisys-apb4-hclk", p
0x150, BIT(12), 0);
static CCU_GATE(CLK_NPU_AXI, npu_axi_clk, "npu-axi", axi_aclk_pd, 0x1c8, BIT(5), 0);
static CCU_GATE(CLK_CPU2VP, cpu2vp_clk, "cpu2vp", axi_aclk_pd, 0x1e0, BIT(13), 0);
-static CCU_GATE(CLK_EMMC_SDIO, emmc_sdio_clk, "emmc-sdio", video_pll_clk_pd, 0x204, BIT(30), 0);
+static CCU_GATE(CLK_EMMC_SDIO, emmc_sdio_clk, "emmc-sdio", emmc_sdio_ref_clk_pd, 0x204, BIT(30), 0);
static CCU_GATE(CLK_GMAC1, gmac1_clk, "gmac1", gmac_pll_clk_pd, 0x204, BIT(26), 0);
static CCU_GATE(CLK_PADCTRL1, padctrl1_clk, "padctrl1", perisys_apb_pclk_pd, 0x204, BIT(24), 0);
static CCU_GATE(CLK_DSMART, dsmart_clk, "dsmart", perisys_apb_pclk_pd, 0x204, BIT(23), 0);
@@ -1059,6 +1066,10 @@ static int th1520_clk_probe(struct platform_device *pdev)
return ret;
priv->hws[CLK_PLL_GMAC_100M] = &gmac_pll_clk_100m.hw;
+ ret = devm_clk_hw_register(dev, &emmc_sdio_ref_clk.hw);
+ if (ret)
+ return ret;
+
ret = devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, priv);
if (ret)
return ret;
diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c
index ec5db1478b2f..18ae45dcbfb2 100644
--- a/drivers/md/dm-ebs-target.c
+++ b/drivers/md/dm-ebs-target.c
@@ -442,7 +442,7 @@ static int ebs_iterate_devices(struct dm_target *ti,
static struct target_type ebs_target = {
.name = "ebs",
.version = {1, 0, 1},
- .features = DM_TARGET_PASSES_INTEGRITY,
+ .features = 0,
.module = THIS_MODULE,
.ctr = ebs_ctr,
.dtr = ebs_dtr,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index bf0f9dddd146..05cf4e3f2bbe 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2332,10 +2332,9 @@ static struct thin_c *get_first_thin(struct pool *pool)
struct thin_c *tc = NULL;
rcu_read_lock();
- if (!list_empty(&pool->active_thins)) {
- tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list);
+ tc = list_first_or_null_rcu(&pool->active_thins, struct thin_c, list);
+ if (tc)
thin_get(tc);
- }
rcu_read_unlock();
return tc;
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 62b1a44b8dd2..e61855da6461 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -40,35 +40,23 @@ static inline u64 fec_interleave(struct dm_verity *v, u64 offset)
}
/*
- * Decode an RS block using Reed-Solomon.
- */
-static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio,
- u8 *data, u8 *fec, int neras)
-{
- int i;
- uint16_t par[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN];
-
- for (i = 0; i < v->fec->roots; i++)
- par[i] = fec[i];
-
- return decode_rs8(fio->rs, data, par, v->fec->rsn, NULL, neras,
- fio->erasures, 0, NULL);
-}
-
-/*
* Read error-correcting codes for the requested RS block. Returns a pointer
* to the data block. Caller is responsible for releasing buf.
*/
static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
- unsigned int *offset, struct dm_buffer **buf,
- unsigned short ioprio)
+ unsigned int *offset, unsigned int par_buf_offset,
+ struct dm_buffer **buf, unsigned short ioprio)
{
u64 position, block, rem;
u8 *res;
+ /* We have already part of parity bytes read, skip to the next block */
+ if (par_buf_offset)
+ index++;
+
position = (index + rsb) * v->fec->roots;
block = div64_u64_rem(position, v->fec->io_size, &rem);
- *offset = (unsigned int)rem;
+ *offset = par_buf_offset ? 0 : (unsigned int)rem;
res = dm_bufio_read_with_ioprio(v->fec->bufio, block, buf, ioprio);
if (IS_ERR(res)) {
@@ -128,11 +116,13 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_io *io,
{
int r, corrected = 0, res;
struct dm_buffer *buf;
- unsigned int n, i, offset;
+ unsigned int n, i, j, offset, par_buf_offset = 0;
+ uint16_t par_buf[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN];
u8 *par, *block;
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
- par = fec_read_parity(v, rsb, block_offset, &offset, &buf, bio_prio(bio));
+ par = fec_read_parity(v, rsb, block_offset, &offset,
+ par_buf_offset, &buf, bio_prio(bio));
if (IS_ERR(par))
return PTR_ERR(par);
@@ -142,7 +132,11 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_io *io,
*/
fec_for_each_buffer_rs_block(fio, n, i) {
block = fec_buffer_rs_block(v, fio, n, i);
- res = fec_decode_rs8(v, fio, block, &par[offset], neras);
+ for (j = 0; j < v->fec->roots - par_buf_offset; j++)
+ par_buf[par_buf_offset + j] = par[offset + j];
+ /* Decode an RS block using Reed-Solomon */
+ res = decode_rs8(fio->rs, block, par_buf, v->fec->rsn,
+ NULL, neras, fio->erasures, 0, NULL);
if (res < 0) {
r = res;
goto error;
@@ -155,12 +149,22 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_io *io,
if (block_offset >= 1 << v->data_dev_block_bits)
goto done;
- /* read the next block when we run out of parity bytes */
- offset += v->fec->roots;
+ /* Read the next block when we run out of parity bytes */
+ offset += (v->fec->roots - par_buf_offset);
+ /* Check if parity bytes are split between blocks */
+ if (offset < v->fec->io_size && (offset + v->fec->roots) > v->fec->io_size) {
+ par_buf_offset = v->fec->io_size - offset;
+ for (j = 0; j < par_buf_offset; j++)
+ par_buf[j] = par[offset + j];
+ offset += par_buf_offset;
+ } else
+ par_buf_offset = 0;
+
if (offset >= v->fec->io_size) {
dm_bufio_release(buf);
- par = fec_read_parity(v, rsb, block_offset, &offset, &buf, bio_prio(bio));
+ par = fec_read_parity(v, rsb, block_offset, &offset,
+ par_buf_offset, &buf, bio_prio(bio));
if (IS_ERR(par))
return PTR_ERR(par);
}
@@ -724,10 +728,7 @@ int verity_fec_ctr(struct dm_verity *v)
return -E2BIG;
}
- if ((f->roots << SECTOR_SHIFT) & ((1 << v->data_dev_block_bits) - 1))
- f->io_size = 1 << v->data_dev_block_bits;
- else
- f->io_size = v->fec->roots << SECTOR_SHIFT;
+ f->io_size = 1 << v->data_dev_block_bits;
f->bufio = dm_bufio_client_create(f->dev->bdev,
f->io_size,
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
index 157c9bd2fed7..8f8792e55806 100644
--- a/drivers/md/persistent-data/dm-array.c
+++ b/drivers/md/persistent-data/dm-array.c
@@ -917,23 +917,27 @@ static int load_ablock(struct dm_array_cursor *c)
if (c->block)
unlock_ablock(c->info, c->block);
- c->block = NULL;
- c->ab = NULL;
c->index = 0;
r = dm_btree_cursor_get_value(&c->cursor, &key, &value_le);
if (r) {
DMERR("dm_btree_cursor_get_value failed");
- dm_btree_cursor_end(&c->cursor);
+ goto out;
} else {
r = get_ablock(c->info, le64_to_cpu(value_le), &c->block, &c->ab);
if (r) {
DMERR("get_ablock failed");
- dm_btree_cursor_end(&c->cursor);
+ goto out;
}
}
+ return 0;
+
+out:
+ dm_btree_cursor_end(&c->cursor);
+ c->block = NULL;
+ c->ab = NULL;
return r;
}
@@ -956,10 +960,10 @@ EXPORT_SYMBOL_GPL(dm_array_cursor_begin);
void dm_array_cursor_end(struct dm_array_cursor *c)
{
- if (c->block) {
+ if (c->block)
unlock_ablock(c->info, c->block);
- dm_btree_cursor_end(&c->cursor);
- }
+
+ dm_btree_cursor_end(&c->cursor);
}
EXPORT_SYMBOL_GPL(dm_array_cursor_end);
@@ -999,6 +1003,7 @@ int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count)
}
count -= remaining;
+ c->index += (remaining - 1);
r = dm_array_cursor_next(c);
} while (!r);
diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c
index 681643ab3780..5ec3170b896a 100644
--- a/drivers/net/can/dev/dev.c
+++ b/drivers/net/can/dev/dev.c
@@ -85,8 +85,6 @@ const char *can_get_state_str(const enum can_state state)
default:
return "<unknown>";
}
-
- return "<unknown>";
}
EXPORT_SYMBOL_GPL(can_get_state_str);
diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c
index cdf0ec9fa7f3..21a61b86f67d 100644
--- a/drivers/net/can/grcan.c
+++ b/drivers/net/can/grcan.c
@@ -1073,9 +1073,10 @@ static int grcan_open(struct net_device *dev)
if (err)
goto exit_close_candev;
+ napi_enable(&priv->napi);
+
spin_lock_irqsave(&priv->lock, flags);
- napi_enable(&priv->napi);
grcan_start(dev);
if (!(priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY))
netif_start_queue(dev);
diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c
index fee012b57f33..fa04a7ced02b 100644
--- a/drivers/net/can/kvaser_pciefd.c
+++ b/drivers/net/can/kvaser_pciefd.c
@@ -999,7 +999,8 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie)
can->can.ctrlmode_supported = CAN_CTRLMODE_LISTENONLY |
CAN_CTRLMODE_FD |
CAN_CTRLMODE_FD_NON_ISO |
- CAN_CTRLMODE_CC_LEN8_DLC;
+ CAN_CTRLMODE_CC_LEN8_DLC |
+ CAN_CTRLMODE_BERR_REPORTING;
status = ioread32(can->reg_base + KVASER_PCIEFD_KCAN_STAT_REG);
if (!(status & KVASER_PCIEFD_KCAN_STAT_FD)) {
@@ -1234,11 +1235,15 @@ static int kvaser_pciefd_handle_data_packet(struct kvaser_pciefd *pcie,
}
static void kvaser_pciefd_change_state(struct kvaser_pciefd_can *can,
+ const struct can_berr_counter *bec,
struct can_frame *cf,
enum can_state new_state,
enum can_state tx_state,
enum can_state rx_state)
{
+ enum can_state old_state;
+
+ old_state = can->can.state;
can_change_state(can->can.dev, cf, tx_state, rx_state);
if (new_state == CAN_STATE_BUS_OFF) {
@@ -1254,6 +1259,18 @@ static void kvaser_pciefd_change_state(struct kvaser_pciefd_can *can,
can_bus_off(ndev);
}
}
+ if (old_state == CAN_STATE_BUS_OFF &&
+ new_state == CAN_STATE_ERROR_ACTIVE &&
+ can->can.restart_ms) {
+ can->can.can_stats.restarts++;
+ if (cf)
+ cf->can_id |= CAN_ERR_RESTARTED;
+ }
+ if (cf && new_state != CAN_STATE_BUS_OFF) {
+ cf->can_id |= CAN_ERR_CNT;
+ cf->data[6] = bec->txerr;
+ cf->data[7] = bec->rxerr;
+ }
}
static void kvaser_pciefd_packet_to_state(struct kvaser_pciefd_rx_packet *p,
@@ -1288,7 +1305,7 @@ static int kvaser_pciefd_rx_error_frame(struct kvaser_pciefd_can *can,
struct can_berr_counter bec;
enum can_state old_state, new_state, tx_state, rx_state;
struct net_device *ndev = can->can.dev;
- struct sk_buff *skb;
+ struct sk_buff *skb = NULL;
struct can_frame *cf = NULL;
old_state = can->can.state;
@@ -1297,16 +1314,10 @@ static int kvaser_pciefd_rx_error_frame(struct kvaser_pciefd_can *can,
bec.rxerr = FIELD_GET(KVASER_PCIEFD_SPACK_RXERR_MASK, p->header[0]);
kvaser_pciefd_packet_to_state(p, &bec, &new_state, &tx_state, &rx_state);
- skb = alloc_can_err_skb(ndev, &cf);
+ if (can->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)
+ skb = alloc_can_err_skb(ndev, &cf);
if (new_state != old_state) {
- kvaser_pciefd_change_state(can, cf, new_state, tx_state, rx_state);
- if (old_state == CAN_STATE_BUS_OFF &&
- new_state == CAN_STATE_ERROR_ACTIVE &&
- can->can.restart_ms) {
- can->can.can_stats.restarts++;
- if (skb)
- cf->can_id |= CAN_ERR_RESTARTED;
- }
+ kvaser_pciefd_change_state(can, &bec, cf, new_state, tx_state, rx_state);
}
can->err_rep_cnt++;
@@ -1319,18 +1330,19 @@ static int kvaser_pciefd_rx_error_frame(struct kvaser_pciefd_can *can,
can->bec.txerr = bec.txerr;
can->bec.rxerr = bec.rxerr;
- if (!skb) {
- ndev->stats.rx_dropped++;
- return -ENOMEM;
+ if (can->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) {
+ if (!skb) {
+ netdev_warn(ndev, "No memory left for err_skb\n");
+ ndev->stats.rx_dropped++;
+ return -ENOMEM;
+ }
+ kvaser_pciefd_set_skb_timestamp(can->kv_pcie, skb, p->timestamp);
+ cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_CNT;
+ cf->data[6] = bec.txerr;
+ cf->data[7] = bec.rxerr;
+ netif_rx(skb);
}
- kvaser_pciefd_set_skb_timestamp(can->kv_pcie, skb, p->timestamp);
- cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_CNT;
- cf->data[6] = bec.txerr;
- cf->data[7] = bec.rxerr;
-
- netif_rx(skb);
-
return 0;
}
@@ -1359,6 +1371,7 @@ static int kvaser_pciefd_handle_status_resp(struct kvaser_pciefd_can *can,
{
struct can_berr_counter bec;
enum can_state old_state, new_state, tx_state, rx_state;
+ int ret = 0;
old_state = can->can.state;
@@ -1372,25 +1385,15 @@ static int kvaser_pciefd_handle_status_resp(struct kvaser_pciefd_can *can,
struct can_frame *cf;
skb = alloc_can_err_skb(ndev, &cf);
- if (!skb) {
+ kvaser_pciefd_change_state(can, &bec, cf, new_state, tx_state, rx_state);
+ if (skb) {
+ kvaser_pciefd_set_skb_timestamp(can->kv_pcie, skb, p->timestamp);
+ netif_rx(skb);
+ } else {
ndev->stats.rx_dropped++;
- return -ENOMEM;
+ netdev_warn(ndev, "No memory left for err_skb\n");
+ ret = -ENOMEM;
}
-
- kvaser_pciefd_change_state(can, cf, new_state, tx_state, rx_state);
- if (old_state == CAN_STATE_BUS_OFF &&
- new_state == CAN_STATE_ERROR_ACTIVE &&
- can->can.restart_ms) {
- can->can.can_stats.restarts++;
- cf->can_id |= CAN_ERR_RESTARTED;
- }
-
- kvaser_pciefd_set_skb_timestamp(can->kv_pcie, skb, p->timestamp);
-
- cf->data[6] = bec.txerr;
- cf->data[7] = bec.rxerr;
-
- netif_rx(skb);
}
can->bec.txerr = bec.txerr;
can->bec.rxerr = bec.rxerr;
@@ -1398,7 +1401,7 @@ static int kvaser_pciefd_handle_status_resp(struct kvaser_pciefd_can *can,
if (bec.txerr || bec.rxerr)
mod_timer(&can->bec_poll_timer, KVASER_PCIEFD_BEC_POLL_FREQ);
- return 0;
+ return ret;
}
static int kvaser_pciefd_handle_status_packet(struct kvaser_pciefd *pcie,
diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 97cd8bbf2e32..d025d4163fd1 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1785,6 +1785,13 @@ static void m_can_stop(struct net_device *dev)
/* set the state as STOPPED */
cdev->can.state = CAN_STATE_STOPPED;
+
+ if (cdev->ops->deinit) {
+ ret = cdev->ops->deinit(cdev);
+ if (ret)
+ netdev_err(dev, "failed to deinitialize: %pe\n",
+ ERR_PTR(ret));
+ }
}
static int m_can_close(struct net_device *dev)
@@ -2466,6 +2473,7 @@ int m_can_class_suspend(struct device *dev)
{
struct m_can_classdev *cdev = dev_get_drvdata(dev);
struct net_device *ndev = cdev->net;
+ int ret = 0;
if (netif_running(ndev)) {
netif_stop_queue(ndev);
@@ -2478,6 +2486,9 @@ int m_can_class_suspend(struct device *dev)
if (cdev->pm_wake_source) {
hrtimer_cancel(&cdev->hrtimer);
m_can_write(cdev, M_CAN_IE, IR_RF0N);
+
+ if (cdev->ops->deinit)
+ ret = cdev->ops->deinit(cdev);
} else {
m_can_stop(ndev);
}
@@ -2489,7 +2500,7 @@ int m_can_class_suspend(struct device *dev)
cdev->can.state = CAN_STATE_SLEEPING;
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(m_can_class_suspend);
@@ -2497,14 +2508,13 @@ int m_can_class_resume(struct device *dev)
{
struct m_can_classdev *cdev = dev_get_drvdata(dev);
struct net_device *ndev = cdev->net;
+ int ret = 0;
pinctrl_pm_select_default_state(dev);
cdev->can.state = CAN_STATE_ERROR_ACTIVE;
if (netif_running(ndev)) {
- int ret;
-
ret = m_can_clk_start(cdev);
if (ret)
return ret;
@@ -2517,6 +2527,10 @@ int m_can_class_resume(struct device *dev)
* again.
*/
cdev->active_interrupts |= IR_RF0N | IR_TEFN;
+
+ if (cdev->ops->init)
+ ret = cdev->ops->init(cdev);
+
m_can_write(cdev, M_CAN_IE, cdev->active_interrupts);
} else {
ret = m_can_start(ndev);
@@ -2530,7 +2544,7 @@ int m_can_class_resume(struct device *dev)
netif_start_queue(ndev);
}
- return 0;
+ return ret;
}
EXPORT_SYMBOL_GPL(m_can_class_resume);
diff --git a/drivers/net/can/m_can/m_can.h b/drivers/net/can/m_can/m_can.h
index ef39e8e527ab..bd4746c63af3 100644
--- a/drivers/net/can/m_can/m_can.h
+++ b/drivers/net/can/m_can/m_can.h
@@ -68,6 +68,7 @@ struct m_can_ops {
int (*write_fifo)(struct m_can_classdev *cdev, int addr_offset,
const void *val, size_t val_count);
int (*init)(struct m_can_classdev *cdev);
+ int (*deinit)(struct m_can_classdev *cdev);
};
struct m_can_tx_op {
diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c
index 2f73bf3abad8..e5c162f8c589 100644
--- a/drivers/net/can/m_can/tcan4x5x-core.c
+++ b/drivers/net/can/m_can/tcan4x5x-core.c
@@ -92,6 +92,8 @@
#define TCAN4X5X_MODE_STANDBY BIT(6)
#define TCAN4X5X_MODE_NORMAL BIT(7)
+#define TCAN4X5X_NWKRQ_VOLTAGE_VIO BIT(19)
+
#define TCAN4X5X_DISABLE_WAKE_MSK (BIT(31) | BIT(30))
#define TCAN4X5X_DISABLE_INH_MSK BIT(9)
@@ -267,9 +269,24 @@ static int tcan4x5x_init(struct m_can_classdev *cdev)
if (ret)
return ret;
+ if (tcan4x5x->nwkrq_voltage_vio) {
+ ret = regmap_set_bits(tcan4x5x->regmap, TCAN4X5X_CONFIG,
+ TCAN4X5X_NWKRQ_VOLTAGE_VIO);
+ if (ret)
+ return ret;
+ }
+
return ret;
}
+static int tcan4x5x_deinit(struct m_can_classdev *cdev)
+{
+ struct tcan4x5x_priv *tcan4x5x = cdev_to_priv(cdev);
+
+ return regmap_update_bits(tcan4x5x->regmap, TCAN4X5X_CONFIG,
+ TCAN4X5X_MODE_SEL_MASK, TCAN4X5X_MODE_STANDBY);
+};
+
static int tcan4x5x_disable_wake(struct m_can_classdev *cdev)
{
struct tcan4x5x_priv *tcan4x5x = cdev_to_priv(cdev);
@@ -318,6 +335,14 @@ static const struct tcan4x5x_version_info
return &tcan4x5x_versions[TCAN4X5X];
}
+static void tcan4x5x_get_dt_data(struct m_can_classdev *cdev)
+{
+ struct tcan4x5x_priv *tcan4x5x = cdev_to_priv(cdev);
+
+ tcan4x5x->nwkrq_voltage_vio =
+ of_property_read_bool(cdev->dev->of_node, "ti,nwkrq-voltage-vio");
+}
+
static int tcan4x5x_get_gpios(struct m_can_classdev *cdev,
const struct tcan4x5x_version_info *version_info)
{
@@ -359,6 +384,7 @@ static int tcan4x5x_get_gpios(struct m_can_classdev *cdev,
static const struct m_can_ops tcan4x5x_ops = {
.init = tcan4x5x_init,
+ .deinit = tcan4x5x_deinit,
.read_reg = tcan4x5x_read_reg,
.write_reg = tcan4x5x_write_reg,
.write_fifo = tcan4x5x_write_fifo,
@@ -392,7 +418,7 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
priv->power = NULL;
}
- m_can_class_get_clocks(mcan_class);
+ mcan_class->cclk = devm_clk_get(mcan_class->dev, "cclk");
if (IS_ERR(mcan_class->cclk)) {
dev_err(&spi->dev, "no CAN clock source defined\n");
freq = TCAN4X5X_EXT_CLK_DEF;
@@ -453,6 +479,8 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
goto out_power;
}
+ tcan4x5x_get_dt_data(mcan_class);
+
tcan4x5x_check_wake(priv);
ret = tcan4x5x_write_tcan_reg(mcan_class, TCAN4X5X_INT_EN, 0);
diff --git a/drivers/net/can/m_can/tcan4x5x.h b/drivers/net/can/m_can/tcan4x5x.h
index e62c030d3e1e..203399d5e8cc 100644
--- a/drivers/net/can/m_can/tcan4x5x.h
+++ b/drivers/net/can/m_can/tcan4x5x.h
@@ -42,6 +42,8 @@ struct tcan4x5x_priv {
struct tcan4x5x_map_buf map_buf_rx;
struct tcan4x5x_map_buf map_buf_tx;
+
+ bool nwkrq_voltage_vio;
};
static inline void
diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c
index 4311c1f0eafd..6fcb301ef611 100644
--- a/drivers/net/can/sun4i_can.c
+++ b/drivers/net/can/sun4i_can.c
@@ -570,7 +570,7 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status)
else
state = CAN_STATE_ERROR_ACTIVE;
}
- if (skb && state != CAN_STATE_BUS_OFF) {
+ if (likely(skb) && state != CAN_STATE_BUS_OFF) {
cf->can_id |= CAN_ERR_CNT;
cf->data[6] = txerr;
cf->data[7] = rxerr;
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
index 7d12776ab63e..dcb0bcbe0565 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
@@ -818,7 +818,8 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev, int channel)
init_completion(&priv->stop_comp);
init_completion(&priv->flush_comp);
init_completion(&priv->get_busparams_comp);
- priv->can.ctrlmode_supported = CAN_CTRLMODE_CC_LEN8_DLC;
+ priv->can.ctrlmode_supported = CAN_CTRLMODE_CC_LEN8_DLC |
+ CAN_CTRLMODE_BERR_REPORTING;
priv->dev = dev;
priv->netdev = netdev;
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
index 3764b263add3..8e88b5917796 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
@@ -926,6 +926,42 @@ kvaser_usb_hydra_bus_status_to_can_state(const struct kvaser_usb_net_priv *priv,
}
}
+static void kvaser_usb_hydra_change_state(struct kvaser_usb_net_priv *priv,
+ const struct can_berr_counter *bec,
+ struct can_frame *cf,
+ enum can_state new_state)
+{
+ struct net_device *netdev = priv->netdev;
+ enum can_state old_state = priv->can.state;
+ enum can_state tx_state, rx_state;
+
+ tx_state = (bec->txerr >= bec->rxerr) ?
+ new_state : CAN_STATE_ERROR_ACTIVE;
+ rx_state = (bec->txerr <= bec->rxerr) ?
+ new_state : CAN_STATE_ERROR_ACTIVE;
+ can_change_state(netdev, cf, tx_state, rx_state);
+
+ if (new_state == CAN_STATE_BUS_OFF && old_state < CAN_STATE_BUS_OFF) {
+ if (priv->can.restart_ms == 0)
+ kvaser_usb_hydra_send_simple_cmd_async(priv, CMD_STOP_CHIP_REQ);
+
+ can_bus_off(netdev);
+ }
+
+ if (priv->can.restart_ms &&
+ old_state >= CAN_STATE_BUS_OFF &&
+ new_state < CAN_STATE_BUS_OFF) {
+ priv->can.can_stats.restarts++;
+ if (cf)
+ cf->can_id |= CAN_ERR_RESTARTED;
+ }
+ if (cf && new_state != CAN_STATE_BUS_OFF) {
+ cf->can_id |= CAN_ERR_CNT;
+ cf->data[6] = bec->txerr;
+ cf->data[7] = bec->rxerr;
+ }
+}
+
static void kvaser_usb_hydra_update_state(struct kvaser_usb_net_priv *priv,
u8 bus_status,
const struct can_berr_counter *bec)
@@ -951,41 +987,11 @@ static void kvaser_usb_hydra_update_state(struct kvaser_usb_net_priv *priv,
return;
skb = alloc_can_err_skb(netdev, &cf);
- if (skb) {
- enum can_state tx_state, rx_state;
-
- tx_state = (bec->txerr >= bec->rxerr) ?
- new_state : CAN_STATE_ERROR_ACTIVE;
- rx_state = (bec->txerr <= bec->rxerr) ?
- new_state : CAN_STATE_ERROR_ACTIVE;
- can_change_state(netdev, cf, tx_state, rx_state);
- }
-
- if (new_state == CAN_STATE_BUS_OFF && old_state < CAN_STATE_BUS_OFF) {
- if (!priv->can.restart_ms)
- kvaser_usb_hydra_send_simple_cmd_async
- (priv, CMD_STOP_CHIP_REQ);
-
- can_bus_off(netdev);
- }
-
- if (!skb) {
+ kvaser_usb_hydra_change_state(priv, bec, cf, new_state);
+ if (skb)
+ netif_rx(skb);
+ else
netdev_warn(netdev, "No memory left for err_skb\n");
- return;
- }
-
- if (priv->can.restart_ms &&
- old_state >= CAN_STATE_BUS_OFF &&
- new_state < CAN_STATE_BUS_OFF)
- priv->can.can_stats.restarts++;
-
- if (new_state != CAN_STATE_BUS_OFF) {
- cf->can_id |= CAN_ERR_CNT;
- cf->data[6] = bec->txerr;
- cf->data[7] = bec->rxerr;
- }
-
- netif_rx(skb);
}
static void kvaser_usb_hydra_state_event(const struct kvaser_usb *dev,
@@ -1078,9 +1084,8 @@ kvaser_usb_hydra_error_frame(struct kvaser_usb_net_priv *priv,
{
struct net_device *netdev = priv->netdev;
struct net_device_stats *stats = &netdev->stats;
- struct can_frame *cf;
- struct sk_buff *skb;
- struct skb_shared_hwtstamps *shhwtstamps;
+ struct can_frame *cf = NULL;
+ struct sk_buff *skb = NULL;
struct can_berr_counter bec;
enum can_state new_state, old_state;
u8 bus_status;
@@ -1096,52 +1101,26 @@ kvaser_usb_hydra_error_frame(struct kvaser_usb_net_priv *priv,
kvaser_usb_hydra_bus_status_to_can_state(priv, bus_status, &bec,
&new_state);
- skb = alloc_can_err_skb(netdev, &cf);
+ if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)
+ skb = alloc_can_err_skb(netdev, &cf);
+ if (new_state != old_state)
+ kvaser_usb_hydra_change_state(priv, &bec, cf, new_state);
- if (new_state != old_state) {
+ if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) {
if (skb) {
- enum can_state tx_state, rx_state;
-
- tx_state = (bec.txerr >= bec.rxerr) ?
- new_state : CAN_STATE_ERROR_ACTIVE;
- rx_state = (bec.txerr <= bec.rxerr) ?
- new_state : CAN_STATE_ERROR_ACTIVE;
-
- can_change_state(netdev, cf, tx_state, rx_state);
-
- if (priv->can.restart_ms &&
- old_state >= CAN_STATE_BUS_OFF &&
- new_state < CAN_STATE_BUS_OFF)
- cf->can_id |= CAN_ERR_RESTARTED;
- }
-
- if (new_state == CAN_STATE_BUS_OFF) {
- if (!priv->can.restart_ms)
- kvaser_usb_hydra_send_simple_cmd_async
- (priv, CMD_STOP_CHIP_REQ);
+ struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
- can_bus_off(netdev);
+ shhwtstamps->hwtstamp = hwtstamp;
+ cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_CNT;
+ cf->data[6] = bec.txerr;
+ cf->data[7] = bec.rxerr;
+ netif_rx(skb);
+ } else {
+ stats->rx_dropped++;
+ netdev_warn(netdev, "No memory left for err_skb\n");
}
}
- if (!skb) {
- stats->rx_dropped++;
- netdev_warn(netdev, "No memory left for err_skb\n");
- return;
- }
-
- shhwtstamps = skb_hwtstamps(skb);
- shhwtstamps->hwtstamp = hwtstamp;
-
- cf->can_id |= CAN_ERR_BUSERROR;
- if (new_state != CAN_STATE_BUS_OFF) {
- cf->can_id |= CAN_ERR_CNT;
- cf->data[6] = bec.txerr;
- cf->data[7] = bec.rxerr;
- }
-
- netif_rx(skb);
-
priv->bec.txerr = bec.txerr;
priv->bec.rxerr = bec.rxerr;
}
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c
index 6b9122ab1464..6a45adcc45bd 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c
@@ -1120,10 +1120,8 @@ kvaser_usb_leaf_rx_error_update_can_state(struct kvaser_usb_net_priv *priv,
static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev,
const struct kvaser_usb_err_summary *es)
{
- struct can_frame *cf;
- struct can_frame tmp_cf = { .can_id = CAN_ERR_FLAG,
- .len = CAN_ERR_DLC };
- struct sk_buff *skb;
+ struct can_frame *cf = NULL;
+ struct sk_buff *skb = NULL;
struct net_device_stats *stats;
struct kvaser_usb_net_priv *priv;
struct kvaser_usb_net_leaf_priv *leaf;
@@ -1143,18 +1141,10 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev,
if (!netif_running(priv->netdev))
return;
- /* Update all of the CAN interface's state and error counters before
- * trying any memory allocation that can actually fail with -ENOMEM.
- *
- * We send a temporary stack-allocated error CAN frame to
- * can_change_state() for the very same reason.
- *
- * TODO: Split can_change_state() responsibility between updating the
- * CAN interface's state and counters, and the setting up of CAN error
- * frame ID and data to userspace. Remove stack allocation afterwards.
- */
old_state = priv->can.state;
- kvaser_usb_leaf_rx_error_update_can_state(priv, es, &tmp_cf);
+ if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING)
+ skb = alloc_can_err_skb(priv->netdev, &cf);
+ kvaser_usb_leaf_rx_error_update_can_state(priv, es, cf);
new_state = priv->can.state;
/* If there are errors, request status updates periodically as we do
@@ -1168,13 +1158,6 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev,
schedule_delayed_work(&leaf->chip_state_req_work,
msecs_to_jiffies(500));
- skb = alloc_can_err_skb(priv->netdev, &cf);
- if (!skb) {
- stats->rx_dropped++;
- return;
- }
- memcpy(cf, &tmp_cf, sizeof(*cf));
-
if (new_state != old_state) {
if (es->status &
(M16C_STATE_BUS_OFF | M16C_STATE_BUS_RESET)) {
@@ -1187,11 +1170,20 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev,
if (priv->can.restart_ms &&
old_state == CAN_STATE_BUS_OFF &&
new_state < CAN_STATE_BUS_OFF) {
- cf->can_id |= CAN_ERR_RESTARTED;
+ if (cf)
+ cf->can_id |= CAN_ERR_RESTARTED;
netif_carrier_on(priv->netdev);
}
}
+ if (!skb) {
+ if (priv->can.ctrlmode & CAN_CTRLMODE_BERR_REPORTING) {
+ stats->rx_dropped++;
+ netdev_warn(priv->netdev, "No memory left for err_skb\n");
+ }
+ return;
+ }
+
switch (dev->driver_info->family) {
case KVASER_LEAF:
if (es->leaf.error_factor) {
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 0561b60f668f..79dc77835681 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -2232,12 +2232,6 @@ bool b53_support_eee(struct dsa_switch *ds, int port)
}
EXPORT_SYMBOL(b53_support_eee);
-int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e)
-{
- return 0;
-}
-EXPORT_SYMBOL(b53_get_mac_eee);
-
int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e)
{
struct b53_device *dev = ds->priv;
@@ -2299,7 +2293,6 @@ static const struct dsa_switch_ops b53_switch_ops = {
.port_enable = b53_enable_port,
.port_disable = b53_disable_port,
.support_eee = b53_support_eee,
- .get_mac_eee = b53_get_mac_eee,
.set_mac_eee = b53_set_mac_eee,
.port_bridge_join = b53_br_join,
.port_bridge_leave = b53_br_leave,
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 99e5cfc98ae8..9e9b5bc0c5d6 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -385,7 +385,6 @@ void b53_disable_port(struct dsa_switch *ds, int port);
void b53_brcm_hdr_setup(struct dsa_switch *ds, int port);
int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy);
bool b53_support_eee(struct dsa_switch *ds, int port);
-int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e);
int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e);
#endif
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index a53fb6191e6b..fa2bf3fa9019 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1233,7 +1233,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
.port_enable = bcm_sf2_port_setup,
.port_disable = bcm_sf2_port_disable,
.support_eee = b53_support_eee,
- .get_mac_eee = b53_get_mac_eee,
.set_mac_eee = b53_set_mac_eee,
.port_bridge_join = b53_br_join,
.port_bridge_leave = b53_br_leave,
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index e3512e324572..89f0796894af 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -3489,20 +3489,6 @@ static bool ksz_support_eee(struct dsa_switch *ds, int port)
return false;
}
-static int ksz_get_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_keee *e)
-{
- /* There is no documented control of Tx LPI configuration. */
- e->tx_lpi_enabled = true;
-
- /* There is no documented control of Tx LPI timer. According to tests
- * Tx LPI timer seems to be set by default to minimal value.
- */
- e->tx_lpi_timer = 0;
-
- return 0;
-}
-
static int ksz_set_mac_eee(struct dsa_switch *ds, int port,
struct ethtool_keee *e)
{
@@ -4672,7 +4658,6 @@ static const struct dsa_switch_ops ksz_switch_ops = {
.cls_flower_del = ksz_cls_flower_del,
.port_setup_tc = ksz_setup_tc,
.support_eee = ksz_support_eee,
- .get_mac_eee = ksz_get_mac_eee,
.set_mac_eee = ksz_set_mac_eee,
.port_get_default_prio = ksz_port_get_default_prio,
.port_set_default_prio = ksz_port_set_default_prio,
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 9605febd3573..d2d0f091e49e 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -3085,18 +3085,6 @@ mt753x_setup(struct dsa_switch *ds)
return ret;
}
-static int mt753x_get_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_keee *e)
-{
- struct mt7530_priv *priv = ds->priv;
- u32 eeecr = mt7530_read(priv, MT753X_PMEEECR_P(port));
-
- e->tx_lpi_enabled = !(eeecr & LPI_MODE_EN);
- e->tx_lpi_timer = LPI_THRESH_GET(eeecr);
-
- return 0;
-}
-
static int mt753x_set_mac_eee(struct dsa_switch *ds, int port,
struct ethtool_keee *e)
{
@@ -3239,7 +3227,6 @@ const struct dsa_switch_ops mt7530_switch_ops = {
.port_mirror_del = mt753x_port_mirror_del,
.phylink_get_caps = mt753x_phylink_get_caps,
.support_eee = dsa_supports_eee,
- .get_mac_eee = mt753x_get_mac_eee,
.set_mac_eee = mt753x_set_mac_eee,
.conduit_state_change = mt753x_conduit_state_change,
.port_setup_tc = mt753x_setup_tc,
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 570c8642d387..35ae084af166 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -1513,13 +1513,6 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port,
mv88e6xxx_reg_unlock(chip);
}
-static int mv88e6xxx_get_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_keee *e)
-{
- /* Nothing to do on the port's MAC */
- return 0;
-}
-
static int mv88e6xxx_set_mac_eee(struct dsa_switch *ds, int port,
struct ethtool_keee *e)
{
@@ -7100,7 +7093,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
.port_max_mtu = mv88e6xxx_get_max_mtu,
.port_change_mtu = mv88e6xxx_change_mtu,
.support_eee = dsa_supports_eee,
- .get_mac_eee = mv88e6xxx_get_mac_eee,
.set_mac_eee = mv88e6xxx_set_mac_eee,
.get_eeprom_len = mv88e6xxx_get_eeprom_len,
.get_eeprom = mv88e6xxx_get_eeprom,
diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c
index 90e24bc00b99..750fc76a6e11 100644
--- a/drivers/net/dsa/qca/qca8k-8xxx.c
+++ b/drivers/net/dsa/qca/qca8k-8xxx.c
@@ -1019,7 +1019,7 @@ qca8k_setup_mdio_bus(struct qca8k_priv *priv)
of_get_phy_mode(port, &mode);
- if (of_property_read_bool(port, "phy-handle") &&
+ if (of_property_present(port, "phy-handle") &&
mode != PHY_INTERFACE_MODE_INTERNAL)
external_mdio_mask |= BIT(reg);
else
@@ -2017,7 +2017,6 @@ static const struct dsa_switch_ops qca8k_switch_ops = {
.get_sset_count = qca8k_get_sset_count,
.set_ageing_time = qca8k_set_ageing_time,
.support_eee = dsa_supports_eee,
- .get_mac_eee = qca8k_get_mac_eee,
.set_mac_eee = qca8k_set_mac_eee,
.port_enable = qca8k_port_enable,
.port_disable = qca8k_port_disable,
diff --git a/drivers/net/dsa/qca/qca8k-common.c b/drivers/net/dsa/qca/qca8k-common.c
index 560c74c4ac3d..13005f10edb7 100644
--- a/drivers/net/dsa/qca/qca8k-common.c
+++ b/drivers/net/dsa/qca/qca8k-common.c
@@ -557,13 +557,6 @@ exit:
return ret;
}
-int qca8k_get_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_keee *e)
-{
- /* Nothing to do on the port's MAC */
- return 0;
-}
-
static int qca8k_port_configure_learning(struct dsa_switch *ds, int port,
bool learning)
{
diff --git a/drivers/net/dsa/qca/qca8k.h b/drivers/net/dsa/qca/qca8k.h
index 24962a395754..d046679265fa 100644
--- a/drivers/net/dsa/qca/qca8k.h
+++ b/drivers/net/dsa/qca/qca8k.h
@@ -520,7 +520,6 @@ int qca8k_get_sset_count(struct dsa_switch *ds, int port, int sset);
/* Common eee function */
int qca8k_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *eee);
-int qca8k_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e);
/* Common bridge function */
void qca8k_port_stp_state_set(struct dsa_switch *ds, int port, u8 state);
diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
index 2681889162a2..44971e71991f 100644
--- a/drivers/net/ethernet/amd/pds_core/devlink.c
+++ b/drivers/net/ethernet/amd/pds_core/devlink.c
@@ -118,7 +118,7 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
if (err && err != -EIO)
return err;
- listlen = fw_list.num_fw_slots;
+ listlen = min(fw_list.num_fw_slots, ARRAY_SIZE(fw_list.fw_names));
for (i = 0; i < listlen; i++) {
if (i < ARRAY_SIZE(fw_slotnames))
strscpy(buf, fw_slotnames[i], sizeof(buf));
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 46edea75e062..884d42db5554 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -2897,6 +2897,13 @@ static int bnxt_hwrm_handler(struct bnxt *bp, struct tx_cmp *txcmp)
return 0;
}
+static bool bnxt_vnic_is_active(struct bnxt *bp)
+{
+ struct bnxt_vnic_info *vnic = &bp->vnic_info[0];
+
+ return vnic->fw_vnic_id != INVALID_HW_RING_ID && vnic->mru > 0;
+}
+
static irqreturn_t bnxt_msix(int irq, void *dev_instance)
{
struct bnxt_napi *bnapi = dev_instance;
@@ -3164,7 +3171,7 @@ static int bnxt_poll(struct napi_struct *napi, int budget)
break;
}
}
- if (bp->flags & BNXT_FLAG_DIM) {
+ if ((bp->flags & BNXT_FLAG_DIM) && bnxt_vnic_is_active(bp)) {
struct dim_sample dim_sample = {};
dim_update_sample(cpr->event_ctr,
@@ -3295,7 +3302,7 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
poll_done:
cpr_rx = &cpr->cp_ring_arr[0];
if (cpr_rx->cp_ring_type == BNXT_NQ_HDL_TYPE_RX &&
- (bp->flags & BNXT_FLAG_DIM)) {
+ (bp->flags & BNXT_FLAG_DIM) && bnxt_vnic_is_active(bp)) {
struct dim_sample dim_sample = {};
dim_update_sample(cpr->event_ctr,
@@ -7266,6 +7273,26 @@ err_out:
return rc;
}
+static void bnxt_cancel_dim(struct bnxt *bp)
+{
+ int i;
+
+ /* DIM work is initialized in bnxt_enable_napi(). Proceed only
+ * if NAPI is enabled.
+ */
+ if (!bp->bnapi || test_bit(BNXT_STATE_NAPI_DISABLED, &bp->state))
+ return;
+
+ /* Make sure NAPI sees that the VNIC is disabled */
+ synchronize_net();
+ for (i = 0; i < bp->rx_nr_rings; i++) {
+ struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+ struct bnxt_napi *bnapi = rxr->bnapi;
+
+ cancel_work_sync(&bnapi->cp_ring.dim.work);
+ }
+}
+
static int hwrm_ring_free_send_msg(struct bnxt *bp,
struct bnxt_ring_struct *ring,
u32 ring_type, int cmpl_ring_id)
@@ -7366,6 +7393,7 @@ static void bnxt_hwrm_ring_free(struct bnxt *bp, bool close_path)
}
}
+ bnxt_cancel_dim(bp);
for (i = 0; i < bp->rx_nr_rings; i++) {
bnxt_hwrm_rx_ring_free(bp, &bp->rx_ring[i], close_path);
bnxt_hwrm_rx_agg_ring_free(bp, &bp->rx_ring[i], close_path);
@@ -11330,8 +11358,6 @@ static void bnxt_disable_napi(struct bnxt *bp)
if (bnapi->in_reset)
cpr->sw_stats->rx.rx_resets++;
napi_disable(&bnapi->napi);
- if (bnapi->rx_ring)
- cancel_work_sync(&cpr->dim.work);
}
}
@@ -15613,8 +15639,10 @@ static int bnxt_queue_stop(struct net_device *dev, void *qmem, int idx)
bnxt_hwrm_vnic_update(bp, vnic,
VNIC_UPDATE_REQ_ENABLES_MRU_VALID);
}
-
+ /* Make sure NAPI sees that the VNIC is disabled */
+ synchronize_net();
rxr = &bp->rx_ring[idx];
+ cancel_work_sync(&rxr->bnapi->cp_ring.dim.work);
bnxt_hwrm_rx_ring_free(bp, rxr, false);
bnxt_hwrm_rx_agg_ring_free(bp, rxr, false);
rxr->rx_next_cons = 0;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 94c6a0928ca0..7c88b9f05c4c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -208,7 +208,7 @@ int bnxt_send_msg(struct bnxt_en_dev *edev,
rc = hwrm_req_replace(bp, req, fw_msg->msg, fw_msg->msg_len);
if (rc)
- return rc;
+ goto drop_req;
hwrm_req_timeout(bp, req, fw_msg->timeout);
resp = hwrm_req_hold(bp, req);
@@ -220,6 +220,7 @@ int bnxt_send_msg(struct bnxt_en_dev *edev,
memcpy(fw_msg->resp, resp, resp_len);
}
+drop_req:
hwrm_req_drop(bp, req);
return rc;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index bc3af0054406..2f0b3e389e62 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1799,7 +1799,10 @@ void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid,
struct adapter *adap = container_of(t, struct adapter, tids);
struct sk_buff *skb;
- WARN_ON(tid_out_of_range(&adap->tids, tid));
+ if (tid_out_of_range(&adap->tids, tid)) {
+ dev_err(adap->pdev_dev, "tid %d out of range\n", tid);
+ return;
+ }
if (t->tid_tab[tid - adap->tids.tid_base]) {
t->tid_tab[tid - adap->tids.tid_base] = NULL;
@@ -6559,6 +6562,9 @@ static void cxgb4_advance_esn_state(struct xfrm_state *x)
{
struct adapter *adap = netdev2adap(x->xso.dev);
+ if (x->xso.dir != XFRM_DEV_OFFLOAD_IN)
+ return;
+
if (!mutex_trylock(&uld_mutex)) {
dev_dbg(adap->pdev_dev,
"crypto uld critical resource is under use\n");
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 9913952ccb42..49f6cab01ed5 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -109,7 +109,7 @@ static struct enic_intr_mod_table mod_table[ENIC_MAX_COALESCE_TIMERS + 1] = {
static struct enic_intr_mod_range mod_range[ENIC_MAX_LINK_SPEEDS] = {
{0, 0}, /* 0 - 4 Gbps */
{0, 3}, /* 4 - 10 Gbps */
- {3, 6}, /* 10 - 40 Gbps */
+ {3, 6}, /* 10+ Gbps */
};
static void enic_init_affinity_hint(struct enic *enic)
@@ -428,6 +428,36 @@ static void enic_mtu_check(struct enic *enic)
}
}
+static void enic_set_rx_coal_setting(struct enic *enic)
+{
+ unsigned int speed;
+ int index = -1;
+ struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
+
+ /* 1. Read the link speed from fw
+ * 2. Pick the default range for the speed
+ * 3. Update it in enic->rx_coalesce_setting
+ */
+ speed = vnic_dev_port_speed(enic->vdev);
+ if (speed > ENIC_LINK_SPEED_10G)
+ index = ENIC_LINK_40G_INDEX;
+ else if (speed > ENIC_LINK_SPEED_4G)
+ index = ENIC_LINK_10G_INDEX;
+ else
+ index = ENIC_LINK_4G_INDEX;
+
+ rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start;
+ rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start;
+ rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END;
+
+ /* Start with the value provided by UCSM */
+ for (index = 0; index < enic->rq_count; index++)
+ enic->cq[index].cur_rx_coal_timeval =
+ enic->config.intr_timer_usec;
+
+ rx_coal->use_adaptive_rx_coalesce = 1;
+}
+
static void enic_link_check(struct enic *enic)
{
int link_status = vnic_dev_link_status(enic->vdev);
@@ -436,6 +466,7 @@ static void enic_link_check(struct enic *enic)
if (link_status && !carrier_ok) {
netdev_info(enic->netdev, "Link UP\n");
netif_carrier_on(enic->netdev);
+ enic_set_rx_coal_setting(enic);
} else if (!link_status && carrier_ok) {
netdev_info(enic->netdev, "Link DOWN\n");
netif_carrier_off(enic->netdev);
@@ -1901,36 +1932,6 @@ static void enic_synchronize_irqs(struct enic *enic)
}
}
-static void enic_set_rx_coal_setting(struct enic *enic)
-{
- unsigned int speed;
- int index = -1;
- struct enic_rx_coal *rx_coal = &enic->rx_coalesce_setting;
-
- /* 1. Read the link speed from fw
- * 2. Pick the default range for the speed
- * 3. Update it in enic->rx_coalesce_setting
- */
- speed = vnic_dev_port_speed(enic->vdev);
- if (ENIC_LINK_SPEED_10G < speed)
- index = ENIC_LINK_40G_INDEX;
- else if (ENIC_LINK_SPEED_4G < speed)
- index = ENIC_LINK_10G_INDEX;
- else
- index = ENIC_LINK_4G_INDEX;
-
- rx_coal->small_pkt_range_start = mod_range[index].small_pkt_range_start;
- rx_coal->large_pkt_range_start = mod_range[index].large_pkt_range_start;
- rx_coal->range_end = ENIC_RX_COALESCE_RANGE_END;
-
- /* Start with the value provided by UCSM */
- for (index = 0; index < enic->rq_count; index++)
- enic->cq[index].cur_rx_coal_timeval =
- enic->config.intr_timer_usec;
-
- rx_coal->use_adaptive_rx_coalesce = 1;
-}
-
static int enic_dev_notify_set(struct enic *enic)
{
int err;
@@ -3063,7 +3064,6 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
timer_setup(&enic->notify_timer, enic_notify_timer, 0);
enic_rfs_flw_tbl_init(enic);
- enic_set_rx_coal_setting(enic);
INIT_WORK(&enic->reset, enic_reset);
INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset);
INIT_WORK(&enic->change_mtu_work, enic_change_mtu_work);
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index f47f8177a93b..88510f822759 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -1434,7 +1434,7 @@ static void ugeth_activate(struct ucc_geth_private *ugeth)
/* allow to xmit again */
netif_tx_wake_all_queues(ugeth->ndev);
- __netdev_watchdog_up(ugeth->ndev);
+ netdev_watchdog_up(ugeth->ndev);
}
/* Initialize TBI PHY interface for communicating with the
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 8a8f6ab12a98..533e659b15b3 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -2241,14 +2241,18 @@ static void gve_service_task(struct work_struct *work)
static void gve_set_netdev_xdp_features(struct gve_priv *priv)
{
+ xdp_features_t xdp_features;
+
if (priv->queue_format == GVE_GQI_QPL_FORMAT) {
- priv->dev->xdp_features = NETDEV_XDP_ACT_BASIC;
- priv->dev->xdp_features |= NETDEV_XDP_ACT_REDIRECT;
- priv->dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
- priv->dev->xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
+ xdp_features = NETDEV_XDP_ACT_BASIC;
+ xdp_features |= NETDEV_XDP_ACT_REDIRECT;
+ xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
+ xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY;
} else {
- priv->dev->xdp_features = 0;
+ xdp_features = 0;
}
+
+ xdp_set_features_flag(priv->dev, xdp_features);
}
static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 710a8f9f2248..12ba380eb701 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -916,9 +916,6 @@ struct hnae3_handle {
u8 netdev_flags;
struct dentry *hnae3_dbgfs;
- /* protects concurrent contention between debugfs commands */
- struct mutex dbgfs_lock;
- char **dbgfs_buf;
/* Network interface message level enabled bits */
u32 msg_enable;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 807eb3bbb11c..9bbece25552b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -1260,69 +1260,55 @@ static int hns3_dbg_read_cmd(struct hns3_dbg_data *dbg_data,
static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer,
size_t count, loff_t *ppos)
{
- struct hns3_dbg_data *dbg_data = filp->private_data;
+ char *buf = filp->private_data;
+
+ return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+}
+
+static int hns3_dbg_open(struct inode *inode, struct file *filp)
+{
+ struct hns3_dbg_data *dbg_data = inode->i_private;
struct hnae3_handle *handle = dbg_data->handle;
struct hns3_nic_priv *priv = handle->priv;
- ssize_t size = 0;
- char **save_buf;
- char *read_buf;
u32 index;
+ char *buf;
int ret;
+ if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
+ test_bit(HNS3_NIC_STATE_RESETTING, &priv->state))
+ return -EBUSY;
+
ret = hns3_dbg_get_cmd_index(dbg_data, &index);
if (ret)
return ret;
- mutex_lock(&handle->dbgfs_lock);
- save_buf = &handle->dbgfs_buf[index];
-
- if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
- test_bit(HNS3_NIC_STATE_RESETTING, &priv->state)) {
- ret = -EBUSY;
- goto out;
- }
-
- if (*save_buf) {
- read_buf = *save_buf;
- } else {
- read_buf = kvzalloc(hns3_dbg_cmd[index].buf_len, GFP_KERNEL);
- if (!read_buf) {
- ret = -ENOMEM;
- goto out;
- }
-
- /* save the buffer addr until the last read operation */
- *save_buf = read_buf;
-
- /* get data ready for the first time to read */
- ret = hns3_dbg_read_cmd(dbg_data, hns3_dbg_cmd[index].cmd,
- read_buf, hns3_dbg_cmd[index].buf_len);
- if (ret)
- goto out;
- }
+ buf = kvzalloc(hns3_dbg_cmd[index].buf_len, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
- size = simple_read_from_buffer(buffer, count, ppos, read_buf,
- strlen(read_buf));
- if (size > 0) {
- mutex_unlock(&handle->dbgfs_lock);
- return size;
+ ret = hns3_dbg_read_cmd(dbg_data, hns3_dbg_cmd[index].cmd,
+ buf, hns3_dbg_cmd[index].buf_len);
+ if (ret) {
+ kvfree(buf);
+ return ret;
}
-out:
- /* free the buffer for the last read operation */
- if (*save_buf) {
- kvfree(*save_buf);
- *save_buf = NULL;
- }
+ filp->private_data = buf;
+ return 0;
+}
- mutex_unlock(&handle->dbgfs_lock);
- return ret;
+static int hns3_dbg_release(struct inode *inode, struct file *filp)
+{
+ kvfree(filp->private_data);
+ filp->private_data = NULL;
+ return 0;
}
static const struct file_operations hns3_dbg_fops = {
.owner = THIS_MODULE,
- .open = simple_open,
+ .open = hns3_dbg_open,
.read = hns3_dbg_read,
+ .release = hns3_dbg_release,
};
static int hns3_dbg_bd_file_init(struct hnae3_handle *handle, u32 cmd)
@@ -1379,13 +1365,6 @@ int hns3_dbg_init(struct hnae3_handle *handle)
int ret;
u32 i;
- handle->dbgfs_buf = devm_kcalloc(&handle->pdev->dev,
- ARRAY_SIZE(hns3_dbg_cmd),
- sizeof(*handle->dbgfs_buf),
- GFP_KERNEL);
- if (!handle->dbgfs_buf)
- return -ENOMEM;
-
hns3_dbg_dentry[HNS3_DBG_DENTRY_COMMON].dentry =
debugfs_create_dir(name, hns3_dbgfs_root);
handle->hnae3_dbgfs = hns3_dbg_dentry[HNS3_DBG_DENTRY_COMMON].dentry;
@@ -1395,8 +1374,6 @@ int hns3_dbg_init(struct hnae3_handle *handle)
debugfs_create_dir(hns3_dbg_dentry[i].name,
handle->hnae3_dbgfs);
- mutex_init(&handle->dbgfs_lock);
-
for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++) {
if ((hns3_dbg_cmd[i].cmd == HNAE3_DBG_CMD_TM_NODES &&
ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2) ||
@@ -1425,24 +1402,13 @@ int hns3_dbg_init(struct hnae3_handle *handle)
out:
debugfs_remove_recursive(handle->hnae3_dbgfs);
handle->hnae3_dbgfs = NULL;
- mutex_destroy(&handle->dbgfs_lock);
return ret;
}
void hns3_dbg_uninit(struct hnae3_handle *handle)
{
- u32 i;
-
debugfs_remove_recursive(handle->hnae3_dbgfs);
handle->hnae3_dbgfs = NULL;
-
- for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++)
- if (handle->dbgfs_buf[i]) {
- kvfree(handle->dbgfs_buf[i]);
- handle->dbgfs_buf[i] = NULL;
- }
-
- mutex_destroy(&handle->dbgfs_lock);
}
void hns3_dbg_register_debugfs(const char *debugfs_dir_name)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 43377a7b2426..a7e3b22f641c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2452,7 +2452,6 @@ static int hns3_nic_set_features(struct net_device *netdev,
return ret;
}
- netdev->features = features;
return 0;
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 05942fa78b11..db7845009252 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -6,6 +6,7 @@
#include <linux/etherdevice.h>
#include <linux/init.h>
#include <linux/interrupt.h>
+#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netdevice.h>
@@ -3574,6 +3575,17 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf,
return ret;
}
+static void hclge_set_reset_pending(struct hclge_dev *hdev,
+ enum hnae3_reset_type reset_type)
+{
+ /* When an incorrect reset type is executed, the get_reset_level
+ * function generates the HNAE3_NONE_RESET flag. As a result, this
+ * type do not need to pending.
+ */
+ if (reset_type != HNAE3_NONE_RESET)
+ set_bit(reset_type, &hdev->reset_pending);
+}
+
static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
{
u32 cmdq_src_reg, msix_src_reg, hw_err_src_reg;
@@ -3594,7 +3606,7 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
*/
if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & msix_src_reg) {
dev_info(&hdev->pdev->dev, "IMP reset interrupt\n");
- set_bit(HNAE3_IMP_RESET, &hdev->reset_pending);
+ hclge_set_reset_pending(hdev, HNAE3_IMP_RESET);
set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state);
*clearval = BIT(HCLGE_VECTOR0_IMPRESET_INT_B);
hdev->rst_stats.imp_rst_cnt++;
@@ -3604,7 +3616,7 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & msix_src_reg) {
dev_info(&hdev->pdev->dev, "global reset interrupt\n");
set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state);
- set_bit(HNAE3_GLOBAL_RESET, &hdev->reset_pending);
+ hclge_set_reset_pending(hdev, HNAE3_GLOBAL_RESET);
*clearval = BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B);
hdev->rst_stats.global_rst_cnt++;
return HCLGE_VECTOR0_EVENT_RST;
@@ -3759,7 +3771,7 @@ static int hclge_misc_irq_init(struct hclge_dev *hdev)
snprintf(hdev->misc_vector.name, HNAE3_INT_NAME_LEN, "%s-misc-%s",
HCLGE_NAME, pci_name(hdev->pdev));
ret = request_irq(hdev->misc_vector.vector_irq, hclge_misc_irq_handle,
- 0, hdev->misc_vector.name, hdev);
+ IRQF_NO_AUTOEN, hdev->misc_vector.name, hdev);
if (ret) {
hclge_free_vector(hdev, 0);
dev_err(&hdev->pdev->dev, "request misc irq(%d) fail\n",
@@ -4052,7 +4064,7 @@ static void hclge_do_reset(struct hclge_dev *hdev)
case HNAE3_FUNC_RESET:
dev_info(&pdev->dev, "PF reset requested\n");
/* schedule again to check later */
- set_bit(HNAE3_FUNC_RESET, &hdev->reset_pending);
+ hclge_set_reset_pending(hdev, HNAE3_FUNC_RESET);
hclge_reset_task_schedule(hdev);
break;
default:
@@ -4086,6 +4098,8 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
clear_bit(HNAE3_FLR_RESET, addr);
}
+ clear_bit(HNAE3_NONE_RESET, addr);
+
if (hdev->reset_type != HNAE3_NONE_RESET &&
rst_level < hdev->reset_type)
return HNAE3_NONE_RESET;
@@ -4227,7 +4241,7 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev)
return false;
} else if (hdev->rst_stats.reset_fail_cnt < MAX_RESET_FAIL_CNT) {
hdev->rst_stats.reset_fail_cnt++;
- set_bit(hdev->reset_type, &hdev->reset_pending);
+ hclge_set_reset_pending(hdev, hdev->reset_type);
dev_info(&hdev->pdev->dev,
"re-schedule reset task(%u)\n",
hdev->rst_stats.reset_fail_cnt);
@@ -4470,8 +4484,20 @@ static void hclge_reset_event(struct pci_dev *pdev, struct hnae3_handle *handle)
static void hclge_set_def_reset_request(struct hnae3_ae_dev *ae_dev,
enum hnae3_reset_type rst_type)
{
+#define HCLGE_SUPPORT_RESET_TYPE \
+ (BIT(HNAE3_FLR_RESET) | BIT(HNAE3_FUNC_RESET) | \
+ BIT(HNAE3_GLOBAL_RESET) | BIT(HNAE3_IMP_RESET))
+
struct hclge_dev *hdev = ae_dev->priv;
+ if (!(BIT(rst_type) & HCLGE_SUPPORT_RESET_TYPE)) {
+ /* To prevent reset triggered by hclge_reset_event */
+ set_bit(HNAE3_NONE_RESET, &hdev->default_reset_request);
+ dev_warn(&hdev->pdev->dev, "unsupported reset type %d\n",
+ rst_type);
+ return;
+ }
+
set_bit(rst_type, &hdev->default_reset_request);
}
@@ -11881,9 +11907,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
hclge_init_rxd_adv_layout(hdev);
- /* Enable MISC vector(vector0) */
- hclge_enable_vector(&hdev->misc_vector, true);
-
ret = hclge_init_wol(hdev);
if (ret)
dev_warn(&pdev->dev,
@@ -11896,6 +11919,10 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
hclge_state_init(hdev);
hdev->last_reset_time = jiffies;
+ /* Enable MISC vector(vector0) */
+ enable_irq(hdev->misc_vector.vector_irq);
+ hclge_enable_vector(&hdev->misc_vector, true);
+
dev_info(&hdev->pdev->dev, "%s driver initialization finished.\n",
HCLGE_DRIVER_NAME);
@@ -12301,7 +12328,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
/* Disable MISC vector(vector0) */
hclge_enable_vector(&hdev->misc_vector, false);
- synchronize_irq(hdev->misc_vector.vector_irq);
+ disable_irq(hdev->misc_vector.vector_irq);
/* Disable all hw interrupts */
hclge_config_mac_tnl_int(hdev, false);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
index 5505caea88e9..bab16c2191b2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c
@@ -58,6 +58,9 @@ bool hclge_ptp_set_tx_info(struct hnae3_handle *handle, struct sk_buff *skb)
struct hclge_dev *hdev = vport->back;
struct hclge_ptp *ptp = hdev->ptp;
+ if (!ptp)
+ return false;
+
if (!test_bit(HCLGE_PTP_FLAG_TX_EN, &ptp->flags) ||
test_and_set_bit(HCLGE_STATE_PTP_TX_HANDLING, &hdev->state)) {
ptp->tx_skipped++;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.c
index 43c1c18fa81f..8c057192aae6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_regs.c
@@ -510,9 +510,9 @@ out:
static int hclge_fetch_pf_reg(struct hclge_dev *hdev, void *data,
struct hnae3_knic_private_info *kinfo)
{
-#define HCLGE_RING_REG_OFFSET 0x200
#define HCLGE_RING_INT_REG_OFFSET 0x4
+ struct hnae3_queue *tqp;
int i, j, reg_num;
int data_num_sum;
u32 *reg = data;
@@ -533,10 +533,11 @@ static int hclge_fetch_pf_reg(struct hclge_dev *hdev, void *data,
reg_num = ARRAY_SIZE(ring_reg_addr_list);
for (j = 0; j < kinfo->num_tqps; j++) {
reg += hclge_reg_get_tlv(HCLGE_REG_TAG_RING, reg_num, reg);
+ tqp = kinfo->tqp[j];
for (i = 0; i < reg_num; i++)
- *reg++ = hclge_read_dev(&hdev->hw,
- ring_reg_addr_list[i] +
- HCLGE_RING_REG_OFFSET * j);
+ *reg++ = readl_relaxed(tqp->io_base -
+ HCLGE_TQP_REG_OFFSET +
+ ring_reg_addr_list[i]);
}
data_num_sum += (reg_num + HCLGE_REG_TLV_SPACE) * kinfo->num_tqps;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 2f6ffb88e700..163c6e59ea4c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1393,6 +1393,17 @@ static int hclgevf_notify_roce_client(struct hclgevf_dev *hdev,
return ret;
}
+static void hclgevf_set_reset_pending(struct hclgevf_dev *hdev,
+ enum hnae3_reset_type reset_type)
+{
+ /* When an incorrect reset type is executed, the get_reset_level
+ * function generates the HNAE3_NONE_RESET flag. As a result, this
+ * type do not need to pending.
+ */
+ if (reset_type != HNAE3_NONE_RESET)
+ set_bit(reset_type, &hdev->reset_pending);
+}
+
static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
{
#define HCLGEVF_RESET_WAIT_US 20000
@@ -1542,7 +1553,7 @@ static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev)
hdev->rst_stats.rst_fail_cnt);
if (hdev->rst_stats.rst_fail_cnt < HCLGEVF_RESET_MAX_FAIL_CNT)
- set_bit(hdev->reset_type, &hdev->reset_pending);
+ hclgevf_set_reset_pending(hdev, hdev->reset_type);
if (hclgevf_is_reset_pending(hdev)) {
set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
@@ -1662,6 +1673,8 @@ static enum hnae3_reset_type hclgevf_get_reset_level(unsigned long *addr)
clear_bit(HNAE3_FLR_RESET, addr);
}
+ clear_bit(HNAE3_NONE_RESET, addr);
+
return rst_level;
}
@@ -1671,14 +1684,15 @@ static void hclgevf_reset_event(struct pci_dev *pdev,
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(pdev);
struct hclgevf_dev *hdev = ae_dev->priv;
- dev_info(&hdev->pdev->dev, "received reset request from VF enet\n");
-
if (hdev->default_reset_request)
hdev->reset_level =
hclgevf_get_reset_level(&hdev->default_reset_request);
else
hdev->reset_level = HNAE3_VF_FUNC_RESET;
+ dev_info(&hdev->pdev->dev, "received reset request from VF enet, reset level is %d\n",
+ hdev->reset_level);
+
/* reset of this VF requested */
set_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state);
hclgevf_reset_task_schedule(hdev);
@@ -1689,8 +1703,20 @@ static void hclgevf_reset_event(struct pci_dev *pdev,
static void hclgevf_set_def_reset_request(struct hnae3_ae_dev *ae_dev,
enum hnae3_reset_type rst_type)
{
+#define HCLGEVF_SUPPORT_RESET_TYPE \
+ (BIT(HNAE3_VF_RESET) | BIT(HNAE3_VF_FUNC_RESET) | \
+ BIT(HNAE3_VF_PF_FUNC_RESET) | BIT(HNAE3_VF_FULL_RESET) | \
+ BIT(HNAE3_FLR_RESET) | BIT(HNAE3_VF_EXP_RESET))
+
struct hclgevf_dev *hdev = ae_dev->priv;
+ if (!(BIT(rst_type) & HCLGEVF_SUPPORT_RESET_TYPE)) {
+ /* To prevent reset triggered by hclge_reset_event */
+ set_bit(HNAE3_NONE_RESET, &hdev->default_reset_request);
+ dev_info(&hdev->pdev->dev, "unsupported reset type %d\n",
+ rst_type);
+ return;
+ }
set_bit(rst_type, &hdev->default_reset_request);
}
@@ -1847,14 +1873,14 @@ static void hclgevf_reset_service_task(struct hclgevf_dev *hdev)
*/
if (hdev->reset_attempts > HCLGEVF_MAX_RESET_ATTEMPTS_CNT) {
/* prepare for full reset of stack + pcie interface */
- set_bit(HNAE3_VF_FULL_RESET, &hdev->reset_pending);
+ hclgevf_set_reset_pending(hdev, HNAE3_VF_FULL_RESET);
/* "defer" schedule the reset task again */
set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
} else {
hdev->reset_attempts++;
- set_bit(hdev->reset_level, &hdev->reset_pending);
+ hclgevf_set_reset_pending(hdev, hdev->reset_level);
set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
}
hclgevf_reset_task_schedule(hdev);
@@ -1977,7 +2003,7 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev,
rst_ing_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING);
dev_info(&hdev->pdev->dev,
"receive reset interrupt 0x%x!\n", rst_ing_reg);
- set_bit(HNAE3_VF_RESET, &hdev->reset_pending);
+ hclgevf_set_reset_pending(hdev, HNAE3_VF_RESET);
set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
set_bit(HCLGE_COMM_STATE_CMD_DISABLE, &hdev->hw.hw.comm_state);
*clearval = ~(1U << HCLGEVF_VECTOR0_RST_INT_B);
@@ -2287,6 +2313,8 @@ static void hclgevf_state_init(struct hclgevf_dev *hdev)
clear_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state);
INIT_DELAYED_WORK(&hdev->service_task, hclgevf_service_task);
+ /* timer needs to be initialized before misc irq */
+ timer_setup(&hdev->reset_timer, hclgevf_reset_timer, 0);
mutex_init(&hdev->mbx_resp.mbx_mutex);
sema_init(&hdev->reset_sem, 1);
@@ -2986,7 +3014,6 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
HCLGEVF_DRIVER_NAME);
hclgevf_task_schedule(hdev, round_jiffies_relative(HZ));
- timer_setup(&hdev->reset_timer, hclgevf_reset_timer, 0);
return 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c
index 6db415d8b917..7d9d9dbc7560 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_regs.c
@@ -123,10 +123,10 @@ int hclgevf_get_regs_len(struct hnae3_handle *handle)
void hclgevf_get_regs(struct hnae3_handle *handle, u32 *version,
void *data)
{
-#define HCLGEVF_RING_REG_OFFSET 0x200
#define HCLGEVF_RING_INT_REG_OFFSET 0x4
struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ struct hnae3_queue *tqp;
int i, j, reg_um;
u32 *reg = data;
@@ -147,10 +147,11 @@ void hclgevf_get_regs(struct hnae3_handle *handle, u32 *version,
reg_um = ARRAY_SIZE(ring_reg_addr_list);
for (j = 0; j < hdev->num_tqps; j++) {
reg += hclgevf_reg_get_tlv(HCLGEVF_REG_TAG_RING, reg_um, reg);
+ tqp = &hdev->htqp[j].q;
for (i = 0; i < reg_um; i++)
- *reg++ = hclgevf_read_dev(&hdev->hw,
- ring_reg_addr_list[i] +
- HCLGEVF_RING_REG_OFFSET * j);
+ *reg++ = readl_relaxed(tqp->io_base -
+ HCLGEVF_TQP_REG_OFFSET +
+ ring_reg_addr_list[i]);
}
reg_um = ARRAY_SIZE(tqp_intr_reg_addr_list);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index 98861cc6df7c..b9dd7b719832 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1180,126 +1180,6 @@ s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid)
}
/**
- * fm10k_iov_msg_mac_vlan_pf - Message handler for MAC/VLAN request from VF
- * @hw: Pointer to hardware structure
- * @results: Pointer array to message, results[0] is pointer to message
- * @mbx: Pointer to mailbox information structure
- *
- * This function is a default handler for MAC/VLAN requests from the VF.
- * The assumption is that in this case it is acceptable to just directly
- * hand off the message from the VF to the underlying shared code.
- **/
-s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results,
- struct fm10k_mbx_info *mbx)
-{
- struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx;
- u8 mac[ETH_ALEN];
- u32 *result;
- int err = 0;
- bool set;
- u16 vlan;
- u32 vid;
-
- /* we shouldn't be updating rules on a disabled interface */
- if (!FM10K_VF_FLAG_ENABLED(vf_info))
- err = FM10K_ERR_PARAM;
-
- if (!err && !!results[FM10K_MAC_VLAN_MSG_VLAN]) {
- result = results[FM10K_MAC_VLAN_MSG_VLAN];
-
- /* record VLAN id requested */
- err = fm10k_tlv_attr_get_u32(result, &vid);
- if (err)
- return err;
-
- set = !(vid & FM10K_VLAN_CLEAR);
- vid &= ~FM10K_VLAN_CLEAR;
-
- /* if the length field has been set, this is a multi-bit
- * update request. For multi-bit requests, simply disallow
- * them when the pf_vid has been set. In this case, the PF
- * should have already cleared the VLAN_TABLE, and if we
- * allowed them, it could allow a rogue VF to receive traffic
- * on a VLAN it was not assigned. In the single-bit case, we
- * need to modify requests for VLAN 0 to use the default PF or
- * SW vid when assigned.
- */
-
- if (vid >> 16) {
- /* prevent multi-bit requests when PF has
- * administratively set the VLAN for this VF
- */
- if (vf_info->pf_vid)
- return FM10K_ERR_PARAM;
- } else {
- err = fm10k_iov_select_vid(vf_info, (u16)vid);
- if (err < 0)
- return err;
-
- vid = err;
- }
-
- /* update VSI info for VF in regards to VLAN table */
- err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set);
- }
-
- if (!err && !!results[FM10K_MAC_VLAN_MSG_MAC]) {
- result = results[FM10K_MAC_VLAN_MSG_MAC];
-
- /* record unicast MAC address requested */
- err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
- if (err)
- return err;
-
- /* block attempts to set MAC for a locked device */
- if (is_valid_ether_addr(vf_info->mac) &&
- !ether_addr_equal(mac, vf_info->mac))
- return FM10K_ERR_PARAM;
-
- set = !(vlan & FM10K_VLAN_CLEAR);
- vlan &= ~FM10K_VLAN_CLEAR;
-
- err = fm10k_iov_select_vid(vf_info, vlan);
- if (err < 0)
- return err;
-
- vlan = (u16)err;
-
- /* notify switch of request for new unicast address */
- err = hw->mac.ops.update_uc_addr(hw, vf_info->glort,
- mac, vlan, set, 0);
- }
-
- if (!err && !!results[FM10K_MAC_VLAN_MSG_MULTICAST]) {
- result = results[FM10K_MAC_VLAN_MSG_MULTICAST];
-
- /* record multicast MAC address requested */
- err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
- if (err)
- return err;
-
- /* verify that the VF is allowed to request multicast */
- if (!(vf_info->vf_flags & FM10K_VF_FLAG_MULTI_ENABLED))
- return FM10K_ERR_PARAM;
-
- set = !(vlan & FM10K_VLAN_CLEAR);
- vlan &= ~FM10K_VLAN_CLEAR;
-
- err = fm10k_iov_select_vid(vf_info, vlan);
- if (err < 0)
- return err;
-
- vlan = (u16)err;
-
- /* notify switch of request for new multicast address */
- err = hw->mac.ops.update_mc_addr(hw, vf_info->glort,
- mac, vlan, set);
- }
-
- return err;
-}
-
-/**
* fm10k_iov_supported_xcast_mode_pf - Determine best match for xcast mode
* @vf_info: VF info structure containing capability flags
* @mode: Requested xcast mode
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
index 8e814df709d2..ad3696893cb1 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
@@ -99,8 +99,6 @@ extern const struct fm10k_tlv_attr fm10k_err_msg_attr[];
s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid);
s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *);
-s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *, u32 **,
- struct fm10k_mbx_info *);
s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *, u32 **,
struct fm10k_mbx_info *);
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index d4255c2706fa..c67963bfe14e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -88,6 +88,7 @@ enum i40e_state {
__I40E_SERVICE_SCHED,
__I40E_ADMINQ_EVENT_PENDING,
__I40E_MDD_EVENT_PENDING,
+ __I40E_MDD_VF_PRINT_PENDING,
__I40E_VFLR_EVENT_PENDING,
__I40E_RESET_RECOVERY_PENDING,
__I40E_TIMEOUT_RECOVERY_PENDING,
@@ -191,6 +192,7 @@ enum i40e_pf_flags {
*/
I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
I40E_FLAG_VF_VLAN_PRUNING_ENA,
+ I40E_FLAG_MDD_AUTO_RESET_VF,
I40E_PF_FLAGS_NBITS, /* must be last */
};
@@ -572,7 +574,7 @@ struct i40e_pf {
int num_alloc_vfs; /* actual number of VFs allocated */
u32 vf_aq_requests;
u32 arq_overflows; /* Not fatal, possibly indicative of problems */
-
+ struct ratelimit_state mdd_message_rate_limit;
/* DCBx/DCBNL capability for PF that indicates
* whether DCBx is managed by firmware or host
* based agent (LLDPAD). Also, indicates what
@@ -1189,7 +1191,6 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
struct i40e_fdir_filter *input, bool add);
void i40e_fdir_check_and_reenable(struct i40e_pf *pf);
u32 i40e_get_current_fd_count(struct i40e_pf *pf);
-u32 i40e_get_cur_guaranteed_fd_count(struct i40e_pf *pf);
u32 i40e_get_current_atr_cnt(struct i40e_pf *pf);
u32 i40e_get_global_fd_count(struct i40e_pf *pf);
bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features);
@@ -1197,7 +1198,6 @@ void i40e_set_ethtool_ops(struct net_device *netdev);
struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
const u8 *macaddr, s16 vlan);
void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f);
-void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan);
int i40e_sync_vsi_filters(struct i40e_vsi *vsi);
struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
u16 uplink, u32 param1);
@@ -1313,7 +1313,6 @@ int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset);
int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi);
int i40e_get_partition_bw_setting(struct i40e_pf *pf);
int i40e_set_partition_bw_setting(struct i40e_pf *pf);
-int i40e_commit_partition_bw_setting(struct i40e_pf *pf);
void i40e_print_link_message(struct i40e_vsi *vsi, bool isup);
void i40e_set_fec_in_flags(u8 fec_cfg, unsigned long *flags);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index f73f5930fc58..175c1320c143 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -1016,16 +1016,6 @@ i40e_asq_send_command_atomic_v2(struct i40e_hw *hw,
return status;
}
-int
-i40e_asq_send_command_v2(struct i40e_hw *hw, struct i40e_aq_desc *desc,
- void *buff, /* can be NULL */ u16 buff_size,
- struct i40e_asq_cmd_details *cmd_details,
- enum i40e_admin_queue_err *aq_status)
-{
- return i40e_asq_send_command_atomic_v2(hw, desc, buff, buff_size,
- cmd_details, true, aq_status);
-}
-
/**
* i40e_fill_default_direct_cmd_desc - AQ descriptor helper function
* @desc: pointer to the temp descriptor (non DMA mem)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index e8031f1a9b4f..370b4bddee44 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1805,37 +1805,6 @@ int i40e_aq_set_vsi_broadcast(struct i40e_hw *hw,
}
/**
- * i40e_aq_set_vsi_vlan_promisc - control the VLAN promiscuous setting
- * @hw: pointer to the hw struct
- * @seid: vsi number
- * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN
- * @cmd_details: pointer to command details structure or NULL
- **/
-int i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw,
- u16 seid, bool enable,
- struct i40e_asq_cmd_details *cmd_details)
-{
- struct i40e_aq_desc desc;
- struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
- (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
- u16 flags = 0;
- int status;
-
- i40e_fill_default_direct_cmd_desc(&desc,
- i40e_aqc_opc_set_vsi_promiscuous_modes);
- if (enable)
- flags |= I40E_AQC_SET_VSI_PROMISC_VLAN;
-
- cmd->promiscuous_flags = cpu_to_le16(flags);
- cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_VLAN);
- cmd->seid = cpu_to_le16(seid);
-
- status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
-
- return status;
-}
-
-/**
* i40e_aq_get_vsi_params - get VSI configuration info
* @hw: pointer to the hw struct
* @vsi_ctx: pointer to a vsi context struct
@@ -2436,136 +2405,6 @@ i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid,
}
/**
- * i40e_mirrorrule_op - Internal helper function to add/delete mirror rule
- * @hw: pointer to the hw struct
- * @opcode: AQ opcode for add or delete mirror rule
- * @sw_seid: Switch SEID (to which rule refers)
- * @rule_type: Rule Type (ingress/egress/VLAN)
- * @id: Destination VSI SEID or Rule ID
- * @count: length of the list
- * @mr_list: list of mirrored VSI SEIDs or VLAN IDs
- * @cmd_details: pointer to command details structure or NULL
- * @rule_id: Rule ID returned from FW
- * @rules_used: Number of rules used in internal switch
- * @rules_free: Number of rules free in internal switch
- *
- * Add/Delete a mirror rule to a specific switch. Mirror rules are supported for
- * VEBs/VEPA elements only
- **/
-static int i40e_mirrorrule_op(struct i40e_hw *hw,
- u16 opcode, u16 sw_seid, u16 rule_type, u16 id,
- u16 count, __le16 *mr_list,
- struct i40e_asq_cmd_details *cmd_details,
- u16 *rule_id, u16 *rules_used, u16 *rules_free)
-{
- struct i40e_aq_desc desc;
- struct i40e_aqc_add_delete_mirror_rule *cmd =
- (struct i40e_aqc_add_delete_mirror_rule *)&desc.params.raw;
- struct i40e_aqc_add_delete_mirror_rule_completion *resp =
- (struct i40e_aqc_add_delete_mirror_rule_completion *)&desc.params.raw;
- u16 buf_size;
- int status;
-
- buf_size = count * sizeof(*mr_list);
-
- /* prep the rest of the request */
- i40e_fill_default_direct_cmd_desc(&desc, opcode);
- cmd->seid = cpu_to_le16(sw_seid);
- cmd->rule_type = cpu_to_le16(rule_type &
- I40E_AQC_MIRROR_RULE_TYPE_MASK);
- cmd->num_entries = cpu_to_le16(count);
- /* Dest VSI for add, rule_id for delete */
- cmd->destination = cpu_to_le16(id);
- if (mr_list) {
- desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF |
- I40E_AQ_FLAG_RD));
- if (buf_size > I40E_AQ_LARGE_BUF)
- desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
- }
-
- status = i40e_asq_send_command(hw, &desc, mr_list, buf_size,
- cmd_details);
- if (!status ||
- hw->aq.asq_last_status == I40E_AQ_RC_ENOSPC) {
- if (rule_id)
- *rule_id = le16_to_cpu(resp->rule_id);
- if (rules_used)
- *rules_used = le16_to_cpu(resp->mirror_rules_used);
- if (rules_free)
- *rules_free = le16_to_cpu(resp->mirror_rules_free);
- }
- return status;
-}
-
-/**
- * i40e_aq_add_mirrorrule - add a mirror rule
- * @hw: pointer to the hw struct
- * @sw_seid: Switch SEID (to which rule refers)
- * @rule_type: Rule Type (ingress/egress/VLAN)
- * @dest_vsi: SEID of VSI to which packets will be mirrored
- * @count: length of the list
- * @mr_list: list of mirrored VSI SEIDs or VLAN IDs
- * @cmd_details: pointer to command details structure or NULL
- * @rule_id: Rule ID returned from FW
- * @rules_used: Number of rules used in internal switch
- * @rules_free: Number of rules free in internal switch
- *
- * Add mirror rule. Mirror rules are supported for VEBs or VEPA elements only
- **/
-int i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
- u16 rule_type, u16 dest_vsi, u16 count,
- __le16 *mr_list,
- struct i40e_asq_cmd_details *cmd_details,
- u16 *rule_id, u16 *rules_used, u16 *rules_free)
-{
- if (!(rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_INGRESS ||
- rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_EGRESS)) {
- if (count == 0 || !mr_list)
- return -EINVAL;
- }
-
- return i40e_mirrorrule_op(hw, i40e_aqc_opc_add_mirror_rule, sw_seid,
- rule_type, dest_vsi, count, mr_list,
- cmd_details, rule_id, rules_used, rules_free);
-}
-
-/**
- * i40e_aq_delete_mirrorrule - delete a mirror rule
- * @hw: pointer to the hw struct
- * @sw_seid: Switch SEID (to which rule refers)
- * @rule_type: Rule Type (ingress/egress/VLAN)
- * @count: length of the list
- * @rule_id: Rule ID that is returned in the receive desc as part of
- * add_mirrorrule.
- * @mr_list: list of mirrored VLAN IDs to be removed
- * @cmd_details: pointer to command details structure or NULL
- * @rules_used: Number of rules used in internal switch
- * @rules_free: Number of rules free in internal switch
- *
- * Delete a mirror rule. Mirror rules are supported for VEBs/VEPA elements only
- **/
-int i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
- u16 rule_type, u16 rule_id, u16 count,
- __le16 *mr_list,
- struct i40e_asq_cmd_details *cmd_details,
- u16 *rules_used, u16 *rules_free)
-{
- /* Rule ID has to be valid except rule_type: INGRESS VLAN mirroring */
- if (rule_type == I40E_AQC_MIRROR_RULE_TYPE_VLAN) {
- /* count and mr_list shall be valid for rule_type INGRESS VLAN
- * mirroring. For other rule_type, count and rule_type should
- * not matter.
- */
- if (count == 0 || !mr_list)
- return -EINVAL;
- }
-
- return i40e_mirrorrule_op(hw, i40e_aqc_opc_delete_mirror_rule, sw_seid,
- rule_type, rule_id, count, mr_list,
- cmd_details, NULL, rules_used, rules_free);
-}
-
-/**
* i40e_aq_send_msg_to_vf
* @hw: pointer to the hardware structure
* @vfid: VF id to send msg
@@ -3180,41 +3019,6 @@ i40e_aq_update_nvm_exit:
}
/**
- * i40e_aq_rearrange_nvm
- * @hw: pointer to the hw struct
- * @rearrange_nvm: defines direction of rearrangement
- * @cmd_details: pointer to command details structure or NULL
- *
- * Rearrange NVM structure, available only for transition FW
- **/
-int i40e_aq_rearrange_nvm(struct i40e_hw *hw,
- u8 rearrange_nvm,
- struct i40e_asq_cmd_details *cmd_details)
-{
- struct i40e_aqc_nvm_update *cmd;
- struct i40e_aq_desc desc;
- int status;
-
- cmd = (struct i40e_aqc_nvm_update *)&desc.params.raw;
-
- i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_nvm_update);
-
- rearrange_nvm &= (I40E_AQ_NVM_REARRANGE_TO_FLAT |
- I40E_AQ_NVM_REARRANGE_TO_STRUCT);
-
- if (!rearrange_nvm) {
- status = -EINVAL;
- goto i40e_aq_rearrange_nvm_exit;
- }
-
- cmd->command_flags |= rearrange_nvm;
- status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
-
-i40e_aq_rearrange_nvm_exit:
- return status;
-}
-
-/**
* i40e_aq_get_lldp_mib
* @hw: pointer to the hw struct
* @bridge_type: type of bridge requested
@@ -3335,44 +3139,6 @@ int i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
}
/**
- * i40e_aq_restore_lldp
- * @hw: pointer to the hw struct
- * @setting: pointer to factory setting variable or NULL
- * @restore: True if factory settings should be restored
- * @cmd_details: pointer to command details structure or NULL
- *
- * Restore LLDP Agent factory settings if @restore set to True. In other case
- * only returns factory setting in AQ response.
- **/
-int
-i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore,
- struct i40e_asq_cmd_details *cmd_details)
-{
- struct i40e_aq_desc desc;
- struct i40e_aqc_lldp_restore *cmd =
- (struct i40e_aqc_lldp_restore *)&desc.params.raw;
- int status;
-
- if (!test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps)) {
- i40e_debug(hw, I40E_DEBUG_ALL,
- "Restore LLDP not supported by current FW version.\n");
- return -ENODEV;
- }
-
- i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_restore);
-
- if (restore)
- cmd->command |= I40E_AQ_LLDP_AGENT_RESTORE;
-
- status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
-
- if (setting)
- *setting = cmd->command & 1;
-
- return status;
-}
-
-/**
* i40e_aq_stop_lldp
* @hw: pointer to the hw struct
* @shutdown_agent: True if LLDP Agent needs to be Shutdown
@@ -4570,84 +4336,6 @@ phy_write_end:
}
/**
- * i40e_write_phy_register
- * @hw: pointer to the HW structure
- * @page: registers page number
- * @reg: register address in the page
- * @phy_addr: PHY address on MDIO interface
- * @value: PHY register value
- *
- * Writes value to specified PHY register
- **/
-int i40e_write_phy_register(struct i40e_hw *hw,
- u8 page, u16 reg, u8 phy_addr, u16 value)
-{
- int status;
-
- switch (hw->device_id) {
- case I40E_DEV_ID_1G_BASE_T_X722:
- status = i40e_write_phy_register_clause22(hw, reg, phy_addr,
- value);
- break;
- case I40E_DEV_ID_1G_BASE_T_BC:
- case I40E_DEV_ID_5G_BASE_T_BC:
- case I40E_DEV_ID_10G_BASE_T:
- case I40E_DEV_ID_10G_BASE_T4:
- case I40E_DEV_ID_10G_BASE_T_BC:
- case I40E_DEV_ID_10G_BASE_T_X722:
- case I40E_DEV_ID_25G_B:
- case I40E_DEV_ID_25G_SFP28:
- status = i40e_write_phy_register_clause45(hw, page, reg,
- phy_addr, value);
- break;
- default:
- status = -EIO;
- break;
- }
-
- return status;
-}
-
-/**
- * i40e_read_phy_register
- * @hw: pointer to the HW structure
- * @page: registers page number
- * @reg: register address in the page
- * @phy_addr: PHY address on MDIO interface
- * @value: PHY register value
- *
- * Reads specified PHY register value
- **/
-int i40e_read_phy_register(struct i40e_hw *hw,
- u8 page, u16 reg, u8 phy_addr, u16 *value)
-{
- int status;
-
- switch (hw->device_id) {
- case I40E_DEV_ID_1G_BASE_T_X722:
- status = i40e_read_phy_register_clause22(hw, reg, phy_addr,
- value);
- break;
- case I40E_DEV_ID_1G_BASE_T_BC:
- case I40E_DEV_ID_5G_BASE_T_BC:
- case I40E_DEV_ID_10G_BASE_T:
- case I40E_DEV_ID_10G_BASE_T4:
- case I40E_DEV_ID_10G_BASE_T_BC:
- case I40E_DEV_ID_10G_BASE_T_X722:
- case I40E_DEV_ID_25G_B:
- case I40E_DEV_ID_25G_SFP28:
- status = i40e_read_phy_register_clause45(hw, page, reg,
- phy_addr, value);
- break;
- default:
- status = -EIO;
- break;
- }
-
- return status;
-}
-
-/**
* i40e_get_phy_address
* @hw: pointer to the HW structure
* @dev_num: PHY port num that address we want
@@ -4663,80 +4351,6 @@ u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num)
}
/**
- * i40e_blink_phy_link_led
- * @hw: pointer to the HW structure
- * @time: time how long led will blinks in secs
- * @interval: gap between LED on and off in msecs
- *
- * Blinks PHY link LED
- **/
-int i40e_blink_phy_link_led(struct i40e_hw *hw,
- u32 time, u32 interval)
-{
- u16 led_addr = I40E_PHY_LED_PROV_REG_1;
- u16 gpio_led_port;
- u8 phy_addr = 0;
- int status = 0;
- u16 led_ctl;
- u8 port_num;
- u16 led_reg;
- u32 i;
-
- i = rd32(hw, I40E_PFGEN_PORTNUM);
- port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
- phy_addr = i40e_get_phy_address(hw, port_num);
-
- for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++,
- led_addr++) {
- status = i40e_read_phy_register_clause45(hw,
- I40E_PHY_COM_REG_PAGE,
- led_addr, phy_addr,
- &led_reg);
- if (status)
- goto phy_blinking_end;
- led_ctl = led_reg;
- if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) {
- led_reg = 0;
- status = i40e_write_phy_register_clause45(hw,
- I40E_PHY_COM_REG_PAGE,
- led_addr, phy_addr,
- led_reg);
- if (status)
- goto phy_blinking_end;
- break;
- }
- }
-
- if (time > 0 && interval > 0) {
- for (i = 0; i < time * 1000; i += interval) {
- status = i40e_read_phy_register_clause45(hw,
- I40E_PHY_COM_REG_PAGE,
- led_addr, phy_addr, &led_reg);
- if (status)
- goto restore_config;
- if (led_reg & I40E_PHY_LED_MANUAL_ON)
- led_reg = 0;
- else
- led_reg = I40E_PHY_LED_MANUAL_ON;
- status = i40e_write_phy_register_clause45(hw,
- I40E_PHY_COM_REG_PAGE,
- led_addr, phy_addr, led_reg);
- if (status)
- goto restore_config;
- msleep(interval);
- }
- }
-
-restore_config:
- status = i40e_write_phy_register_clause45(hw,
- I40E_PHY_COM_REG_PAGE,
- led_addr, phy_addr, led_ctl);
-
-phy_blinking_end:
- return status;
-}
-
-/**
* i40e_led_get_reg - read LED register
* @hw: pointer to the HW structure
* @led_addr: LED register address
@@ -5269,39 +4883,6 @@ i40e_find_segment_in_package(u32 segment_type,
(struct i40e_profile_section_header *)((u8 *)(profile) + (offset))
/**
- * i40e_find_section_in_profile
- * @section_type: the section type to search for (i.e., SECTION_TYPE_NOTE)
- * @profile: pointer to the i40e segment header to be searched
- *
- * This function searches i40e segment for a particular section type. On
- * success it returns a pointer to the section header, otherwise it will
- * return NULL.
- **/
-struct i40e_profile_section_header *
-i40e_find_section_in_profile(u32 section_type,
- struct i40e_profile_segment *profile)
-{
- struct i40e_profile_section_header *sec;
- struct i40e_section_table *sec_tbl;
- u32 sec_off;
- u32 i;
-
- if (profile->header.type != SEGMENT_TYPE_I40E)
- return NULL;
-
- I40E_SECTION_TABLE(profile, sec_tbl);
-
- for (i = 0; i < sec_tbl->section_count; i++) {
- sec_off = sec_tbl->section_offset[i];
- sec = I40E_SECTION_HEADER(profile, sec_off);
- if (sec->section.type == section_type)
- return sec;
- }
-
- return NULL;
-}
-
-/**
* i40e_ddp_exec_aq_section - Execute generic AQ for DDP
* @hw: pointer to the hw struct
* @aq: command buffer containing all data to execute AQ
@@ -5524,45 +5105,6 @@ i40e_rollback_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile,
}
/**
- * i40e_add_pinfo_to_list
- * @hw: pointer to the hardware structure
- * @profile: pointer to the profile segment of the package
- * @profile_info_sec: buffer for information section
- * @track_id: package tracking id
- *
- * Register a profile to the list of loaded profiles.
- */
-int
-i40e_add_pinfo_to_list(struct i40e_hw *hw,
- struct i40e_profile_segment *profile,
- u8 *profile_info_sec, u32 track_id)
-{
- struct i40e_profile_section_header *sec = NULL;
- struct i40e_profile_info *pinfo;
- u32 offset = 0, info = 0;
- int status = 0;
-
- sec = (struct i40e_profile_section_header *)profile_info_sec;
- sec->tbl_size = 1;
- sec->data_end = sizeof(struct i40e_profile_section_header) +
- sizeof(struct i40e_profile_info);
- sec->section.type = SECTION_TYPE_INFO;
- sec->section.offset = sizeof(struct i40e_profile_section_header);
- sec->section.size = sizeof(struct i40e_profile_info);
- pinfo = (struct i40e_profile_info *)(profile_info_sec +
- sec->section.offset);
- pinfo->track_id = track_id;
- pinfo->version = profile->version;
- pinfo->op = I40E_DDP_ADD_TRACKID;
- memcpy(pinfo->name, profile->name, I40E_DDP_NAME_SIZE);
-
- status = i40e_aq_write_ddp(hw, (void *)sec, sec->data_end,
- track_id, &offset, &info, NULL);
-
- return status;
-}
-
-/**
* i40e_aq_add_cloud_filters
* @hw: pointer to the hardware structure
* @seid: VSI seid to add cloud filters from
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 8db1eb0c1768..352e957443fd 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -1491,19 +1491,6 @@ void i40e_dcb_hw_set_num_tc(struct i40e_hw *hw, u8 num_tc)
}
/**
- * i40e_dcb_hw_get_num_tc
- * @hw: pointer to the hw struct
- *
- * Returns number of traffic classes configured in HW
- **/
-u8 i40e_dcb_hw_get_num_tc(struct i40e_hw *hw)
-{
- u32 reg = rd32(hw, I40E_PRTDCB_GENC);
-
- return FIELD_GET(I40E_PRTDCB_GENC_NUMTC_MASK, reg);
-}
-
-/**
* i40e_dcb_hw_rx_ets_bw_config
* @hw: pointer to the hw struct
* @bw_share: Bandwidth share indexed per traffic class
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
index d76497566e40..d5662c639c41 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h
@@ -253,7 +253,6 @@ void i40e_dcb_hw_rx_cmd_monitor_config(struct i40e_hw *hw,
void i40e_dcb_hw_pfc_config(struct i40e_hw *hw,
u8 pfc_en, u8 *prio_tc);
void i40e_dcb_hw_set_num_tc(struct i40e_hw *hw, u8 num_tc);
-u8 i40e_dcb_hw_get_num_tc(struct i40e_hw *hw);
void i40e_dcb_hw_rx_ets_bw_config(struct i40e_hw *hw, u8 *bw_share,
u8 *mode, u8 *prio_type);
void i40e_dcb_hw_rx_up2tc_config(struct i40e_hw *hw, u8 *prio_tc);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 208c2f0857b6..6cd9da662ae1 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -722,7 +722,7 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
dev_info(&pf->pdev->dev, " num MDD=%lld\n",
- vf->num_mdd_events);
+ vf->mdd_tx_events.count + vf->mdd_rx_events.count);
} else {
dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index bce5b76f1e7a..8a7a83f83ee5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -459,6 +459,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
I40E_PRIV_FLAG("vf-vlan-pruning",
I40E_FLAG_VF_VLAN_PRUNING_ENA, 0),
+ I40E_PRIV_FLAG("mdd-auto-reset-vf",
+ I40E_FLAG_MDD_AUTO_RESET_VF, 0),
};
#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 0e1d9e2fbf38..65a702668e21 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1666,9 +1666,8 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
* @vsi: VSI to remove from
* @f: the filter to remove from the list
*
- * This function should be called instead of i40e_del_filter only if you know
- * the exact filter you will remove already, such as via i40e_find_filter or
- * i40e_find_mac.
+ * This function requires you've found * the exact filter you will remove
+ * already, such as via i40e_find_filter or i40e_find_mac.
*
* NOTE: This function is expected to be called with mac_filter_hash_lock
* being held.
@@ -1698,29 +1697,6 @@ void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
}
/**
- * i40e_del_filter - Remove a MAC/VLAN filter from the VSI
- * @vsi: the VSI to be searched
- * @macaddr: the MAC address
- * @vlan: the VLAN
- *
- * NOTE: This function is expected to be called with mac_filter_hash_lock
- * being held.
- * ANOTHER NOTE: This function MUST be called from within the context of
- * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
- * instead of list_for_each_entry().
- **/
-void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan)
-{
- struct i40e_mac_filter *f;
-
- if (!vsi || !macaddr)
- return;
-
- f = i40e_find_filter(vsi, macaddr, vlan);
- __i40e_del_filter(vsi, f);
-}
-
-/**
* i40e_add_mac_filter - Add a MAC filter for all active VLANs
* @vsi: the VSI to be searched
* @macaddr: the mac address to be filtered
@@ -9629,19 +9605,6 @@ static void i40e_handle_lan_overflow_event(struct i40e_pf *pf,
}
/**
- * i40e_get_cur_guaranteed_fd_count - Get the consumed guaranteed FD filters
- * @pf: board private structure
- **/
-u32 i40e_get_cur_guaranteed_fd_count(struct i40e_pf *pf)
-{
- u32 val, fcnt_prog;
-
- val = rd32(&pf->hw, I40E_PFQF_FDSTAT);
- fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK);
- return fcnt_prog;
-}
-
-/**
* i40e_get_current_fd_count - Get total FD filters programmed for this PF
* @pf: board private structure
**/
@@ -11217,6 +11180,67 @@ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
}
/**
+ * i40e_print_vf_mdd_event - print VF Tx/Rx malicious driver detect event
+ * @pf: board private structure
+ * @vf: pointer to the VF structure
+ * @is_tx: true - for Tx event, false - for Rx
+ */
+static void i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf,
+ bool is_tx)
+{
+ dev_err(&pf->pdev->dev, is_tx ?
+ "%lld Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n" :
+ "%lld Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
+ is_tx ? vf->mdd_tx_events.count : vf->mdd_rx_events.count,
+ pf->hw.pf_id,
+ vf->vf_id,
+ vf->default_lan_addr.addr,
+ str_on_off(test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)));
+}
+
+/**
+ * i40e_print_vfs_mdd_events - print VFs malicious driver detect event
+ * @pf: pointer to the PF structure
+ *
+ * Called from i40e_handle_mdd_event to rate limit and print VFs MDD events.
+ */
+static void i40e_print_vfs_mdd_events(struct i40e_pf *pf)
+{
+ unsigned int i;
+
+ /* check that there are pending MDD events to print */
+ if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state))
+ return;
+
+ if (!__ratelimit(&pf->mdd_message_rate_limit))
+ return;
+
+ for (i = 0; i < pf->num_alloc_vfs; i++) {
+ struct i40e_vf *vf = &pf->vf[i];
+ bool is_printed = false;
+
+ /* only print Rx MDD event message if there are new events */
+ if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
+ vf->mdd_rx_events.last_printed = vf->mdd_rx_events.count;
+ i40e_print_vf_mdd_event(pf, vf, false);
+ is_printed = true;
+ }
+
+ /* only print Tx MDD event message if there are new events */
+ if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
+ vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count;
+ i40e_print_vf_mdd_event(pf, vf, true);
+ is_printed = true;
+ }
+
+ if (is_printed && !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))
+ dev_info(&pf->pdev->dev,
+ "Use PF Control I/F to re-enable the VF #%d\n",
+ i);
+ }
+}
+
+/**
* i40e_handle_mdd_event
* @pf: pointer to the PF structure
*
@@ -11230,8 +11254,13 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
u32 reg;
int i;
- if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
+ if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf->state)) {
+ /* Since the VF MDD event logging is rate limited, check if
+ * there are pending MDD events.
+ */
+ i40e_print_vfs_mdd_events(pf);
return;
+ }
/* find what triggered the MDD event */
reg = rd32(hw, I40E_GL_MDET_TX);
@@ -11275,36 +11304,48 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
/* see if one of the VFs needs its hand slapped */
for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
+ bool is_mdd_on_tx = false;
+ bool is_mdd_on_rx = false;
+
vf = &(pf->vf[i]);
reg = rd32(hw, I40E_VP_MDET_TX(i));
if (reg & I40E_VP_MDET_TX_VALID_MASK) {
+ set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
- vf->num_mdd_events++;
- dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
- i);
- dev_info(&pf->pdev->dev,
- "Use PF Control I/F to re-enable the VF\n");
+ vf->mdd_tx_events.count++;
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
+ is_mdd_on_tx = true;
}
reg = rd32(hw, I40E_VP_MDET_RX(i));
if (reg & I40E_VP_MDET_RX_VALID_MASK) {
+ set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
- vf->num_mdd_events++;
- dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
- i);
- dev_info(&pf->pdev->dev,
- "Use PF Control I/F to re-enable the VF\n");
+ vf->mdd_rx_events.count++;
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
+ is_mdd_on_rx = true;
+ }
+
+ if ((is_mdd_on_tx || is_mdd_on_rx) &&
+ test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
+ /* VF MDD event counters will be cleared by
+ * reset, so print the event prior to reset.
+ */
+ if (is_mdd_on_rx)
+ i40e_print_vf_mdd_event(pf, vf, false);
+ if (is_mdd_on_tx)
+ i40e_print_vf_mdd_event(pf, vf, true);
+
+ i40e_vc_reset_vf(vf, true);
}
}
- /* re-enable mdd interrupt cause */
- clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
reg = rd32(hw, I40E_PFINT_ICR0_ENA);
reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
wr32(hw, I40E_PFINT_ICR0_ENA, reg);
i40e_flush(hw);
+
+ i40e_print_vfs_mdd_events(pf);
}
/**
@@ -12614,89 +12655,6 @@ int i40e_set_partition_bw_setting(struct i40e_pf *pf)
}
/**
- * i40e_commit_partition_bw_setting - Commit BW settings for this PF partition
- * @pf: board private structure
- **/
-int i40e_commit_partition_bw_setting(struct i40e_pf *pf)
-{
- /* Commit temporary BW setting to permanent NVM image */
- enum i40e_admin_queue_err last_aq_status;
- u16 nvm_word;
- int ret;
-
- if (pf->hw.partition_id != 1) {
- dev_info(&pf->pdev->dev,
- "Commit BW only works on partition 1! This is partition %d",
- pf->hw.partition_id);
- ret = -EOPNOTSUPP;
- goto bw_commit_out;
- }
-
- /* Acquire NVM for read access */
- ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_READ);
- last_aq_status = pf->hw.aq.asq_last_status;
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Cannot acquire NVM for read access, err %pe aq_err %s\n",
- ERR_PTR(ret),
- i40e_aq_str(&pf->hw, last_aq_status));
- goto bw_commit_out;
- }
-
- /* Read word 0x10 of NVM - SW compatibility word 1 */
- ret = i40e_aq_read_nvm(&pf->hw,
- I40E_SR_NVM_CONTROL_WORD,
- 0x10, sizeof(nvm_word), &nvm_word,
- false, NULL);
- /* Save off last admin queue command status before releasing
- * the NVM
- */
- last_aq_status = pf->hw.aq.asq_last_status;
- i40e_release_nvm(&pf->hw);
- if (ret) {
- dev_info(&pf->pdev->dev, "NVM read error, err %pe aq_err %s\n",
- ERR_PTR(ret),
- i40e_aq_str(&pf->hw, last_aq_status));
- goto bw_commit_out;
- }
-
- /* Wait a bit for NVM release to complete */
- msleep(50);
-
- /* Acquire NVM for write access */
- ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_WRITE);
- last_aq_status = pf->hw.aq.asq_last_status;
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Cannot acquire NVM for write access, err %pe aq_err %s\n",
- ERR_PTR(ret),
- i40e_aq_str(&pf->hw, last_aq_status));
- goto bw_commit_out;
- }
- /* Write it back out unchanged to initiate update NVM,
- * which will force a write of the shadow (alt) RAM to
- * the NVM - thus storing the bandwidth values permanently.
- */
- ret = i40e_aq_update_nvm(&pf->hw,
- I40E_SR_NVM_CONTROL_WORD,
- 0x10, sizeof(nvm_word),
- &nvm_word, true, 0, NULL);
- /* Save off last admin queue command status before releasing
- * the NVM
- */
- last_aq_status = pf->hw.aq.asq_last_status;
- i40e_release_nvm(&pf->hw);
- if (ret)
- dev_info(&pf->pdev->dev,
- "BW settings NOT SAVED, err %pe aq_err %s\n",
- ERR_PTR(ret),
- i40e_aq_str(&pf->hw, last_aq_status));
-bw_commit_out:
-
- return ret;
-}
-
-/**
* i40e_is_total_port_shutdown_enabled - read NVM and return value
* if total port shutdown feature is enabled for this PF
* @pf: board private structure
@@ -15998,6 +15956,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
ERR_PTR(err),
i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ /* VF MDD event logs are rate limited to one second intervals */
+ ratelimit_state_init(&pf->mdd_message_rate_limit, 1 * HZ, 1);
+
/* Reconfigure hardware for allowing smaller MSS in the case
* of TSO, so that we avoid the MDD being fired and causing
* a reset in the case of small MSS+TSO.
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 5a0699ca7ce5..099bb8ab7d70 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -27,13 +27,6 @@ i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc,
void *buff, /* can be NULL */ u16 buff_size,
struct i40e_asq_cmd_details *cmd_details);
int
-i40e_asq_send_command_v2(struct i40e_hw *hw,
- struct i40e_aq_desc *desc,
- void *buff, /* can be NULL */
- u16 buff_size,
- struct i40e_asq_cmd_details *cmd_details,
- enum i40e_admin_queue_err *aq_status);
-int
i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc,
void *buff, /* can be NULL */ u16 buff_size,
struct i40e_asq_cmd_details *cmd_details,
@@ -72,8 +65,6 @@ int i40e_led_set_phy(struct i40e_hw *hw, bool on,
u16 led_addr, u32 mode);
int i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
u16 *val);
-int i40e_blink_phy_link_led(struct i40e_hw *hw,
- u32 time, u32 interval);
/* admin send queue commands */
@@ -141,9 +132,6 @@ int i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
int i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw,
u16 seid, bool enable, u16 vid,
struct i40e_asq_cmd_details *cmd_details);
-int i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw,
- u16 seid, bool enable,
- struct i40e_asq_cmd_details *cmd_details);
int i40e_aq_get_vsi_params(struct i40e_hw *hw,
struct i40e_vsi_context *vsi_ctx,
struct i40e_asq_cmd_details *cmd_details);
@@ -176,14 +164,6 @@ i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid,
struct i40e_aqc_remove_macvlan_element_data *mv_list,
u16 count, struct i40e_asq_cmd_details *cmd_details,
enum i40e_admin_queue_err *aq_status);
-int i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
- u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list,
- struct i40e_asq_cmd_details *cmd_details,
- u16 *rule_id, u16 *rules_used, u16 *rules_free);
-int i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid,
- u16 rule_type, u16 rule_id, u16 count, __le16 *mr_list,
- struct i40e_asq_cmd_details *cmd_details,
- u16 *rules_used, u16 *rules_free);
int i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid,
u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen,
@@ -220,9 +200,6 @@ int i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
u32 offset, u16 length, void *data,
bool last_command, u8 preservation_flags,
struct i40e_asq_cmd_details *cmd_details);
-int i40e_aq_rearrange_nvm(struct i40e_hw *hw,
- u8 rearrange_nvm,
- struct i40e_asq_cmd_details *cmd_details);
int i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
u8 mib_type, void *buff, u16 buff_size,
u16 *local_len, u16 *remote_len,
@@ -234,9 +211,6 @@ i40e_aq_set_lldp_mib(struct i40e_hw *hw,
int i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
bool enable_update,
struct i40e_asq_cmd_details *cmd_details);
-int
-i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore,
- struct i40e_asq_cmd_details *cmd_details);
int i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
bool persist,
struct i40e_asq_cmd_details *cmd_details);
@@ -458,13 +432,7 @@ int i40e_read_phy_register_clause45(struct i40e_hw *hw,
u8 page, u16 reg, u8 phy_addr, u16 *value);
int i40e_write_phy_register_clause45(struct i40e_hw *hw,
u8 page, u16 reg, u8 phy_addr, u16 value);
-int i40e_read_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
- u8 phy_addr, u16 *value);
-int i40e_write_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
- u8 phy_addr, u16 value);
u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num);
-int i40e_blink_phy_link_led(struct i40e_hw *hw,
- u32 time, u32 interval);
int i40e_aq_write_ddp(struct i40e_hw *hw, void *buff,
u16 buff_size, u32 track_id,
u32 *error_offset, u32 *error_info,
@@ -477,20 +445,12 @@ int i40e_aq_get_ddp_list(struct i40e_hw *hw, void *buff,
struct i40e_generic_seg_header *
i40e_find_segment_in_package(u32 segment_type,
struct i40e_package_header *pkg_header);
-struct i40e_profile_section_header *
-i40e_find_section_in_profile(u32 section_type,
- struct i40e_profile_segment *profile);
int
i40e_write_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg,
u32 track_id);
int
i40e_rollback_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg,
u32 track_id);
-int
-i40e_add_pinfo_to_list(struct i40e_hw *hw,
- struct i40e_profile_segment *profile,
- u8 *profile_info_sec, u32 track_id);
-
/* i40e_ddp */
int i40e_ddp_flash(struct net_device *netdev, struct ethtool_flash *flash);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index dfa785e39458..1120f8e4bb67 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -216,7 +216,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
* @notify_vf: notify vf about reset or not
* Reset VF handler.
**/
-static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
+void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
{
struct i40e_pf *pf = vf->pf;
int i;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 66f95e2f3146..5cf74f16f433 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -64,6 +64,12 @@ struct i40evf_channel {
u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
};
+struct i40e_mdd_vf_events {
+ u64 count; /* total count of Rx|Tx events */
+ /* count number of the last printed event */
+ u64 last_printed;
+};
+
/* VF information structure */
struct i40e_vf {
struct i40e_pf *pf;
@@ -92,7 +98,9 @@ struct i40e_vf {
u8 num_queue_pairs; /* num of qps assigned to VF vsis */
u8 num_req_queues; /* num of requested qps */
- u64 num_mdd_events; /* num of mdd events detected */
+ /* num of mdd tx and rx events detected */
+ struct i40e_mdd_vf_events mdd_rx_events;
+ struct i40e_mdd_vf_events mdd_tx_events;
unsigned long vf_caps; /* vf's adv. capabilities */
unsigned long vf_states; /* vf's runtime states */
@@ -120,6 +128,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs);
int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
u32 v_retval, u8 *msg, u16 msglen);
int i40e_vc_process_vflr_event(struct i40e_pf *pf);
+void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf);
bool i40e_reset_vf(struct i40e_vf *vf, bool flr);
bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index a9e54866ae6b..7740f446c73f 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1968,6 +1968,7 @@ err:
static void iavf_finish_config(struct work_struct *work)
{
struct iavf_adapter *adapter;
+ bool netdev_released = false;
int pairs, err;
adapter = container_of(work, struct iavf_adapter, finish_config);
@@ -1988,7 +1989,16 @@ static void iavf_finish_config(struct work_struct *work)
switch (adapter->state) {
case __IAVF_DOWN:
+ /* Set the real number of queues when reset occurs while
+ * state == __IAVF_DOWN
+ */
+ pairs = adapter->num_active_queues;
+ netif_set_real_num_rx_queues(adapter->netdev, pairs);
+ netif_set_real_num_tx_queues(adapter->netdev, pairs);
+
if (adapter->netdev->reg_state != NETREG_REGISTERED) {
+ mutex_unlock(&adapter->netdev->lock);
+ netdev_released = true;
err = register_netdevice(adapter->netdev);
if (err) {
dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n",
@@ -2003,11 +2013,7 @@ static void iavf_finish_config(struct work_struct *work)
goto out;
}
}
-
- /* Set the real number of queues when reset occurs while
- * state == __IAVF_DOWN
- */
- fallthrough;
+ break;
case __IAVF_RUNNING:
pairs = adapter->num_active_queues;
netif_set_real_num_rx_queues(adapter->netdev, pairs);
@@ -2020,7 +2026,8 @@ static void iavf_finish_config(struct work_struct *work)
out:
mutex_unlock(&adapter->crit_lock);
- mutex_unlock(&adapter->netdev->lock);
+ if (!netdev_released)
+ mutex_unlock(&adapter->netdev->lock);
rtnl_unlock();
}
@@ -2713,12 +2720,16 @@ static void iavf_watchdog_task(struct work_struct *work)
struct iavf_adapter *adapter = container_of(work,
struct iavf_adapter,
watchdog_task.work);
+ struct net_device *netdev = adapter->netdev;
struct iavf_hw *hw = &adapter->hw;
u32 reg_val;
+ mutex_lock(&netdev->lock);
if (!mutex_trylock(&adapter->crit_lock)) {
- if (adapter->state == __IAVF_REMOVE)
+ if (adapter->state == __IAVF_REMOVE) {
+ mutex_unlock(&netdev->lock);
return;
+ }
goto restart_watchdog;
}
@@ -2730,30 +2741,35 @@ static void iavf_watchdog_task(struct work_struct *work)
case __IAVF_STARTUP:
iavf_startup(adapter);
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(30));
return;
case __IAVF_INIT_VERSION_CHECK:
iavf_init_version_check(adapter);
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(30));
return;
case __IAVF_INIT_GET_RESOURCES:
iavf_init_get_resources(adapter);
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(1));
return;
case __IAVF_INIT_EXTENDED_CAPS:
iavf_init_process_extended_caps(adapter);
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(1));
return;
case __IAVF_INIT_CONFIG_ADAPTER:
iavf_init_config_adapter(adapter);
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
msecs_to_jiffies(1));
return;
@@ -2765,6 +2781,7 @@ static void iavf_watchdog_task(struct work_struct *work)
* as it can loop forever
*/
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
return;
}
if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
@@ -2773,6 +2790,7 @@ static void iavf_watchdog_task(struct work_struct *work)
adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
iavf_shutdown_adminq(hw);
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
queue_delayed_work(adapter->wq,
&adapter->watchdog_task, (5 * HZ));
return;
@@ -2780,6 +2798,7 @@ static void iavf_watchdog_task(struct work_struct *work)
/* Try again from failed step*/
iavf_change_state(adapter, adapter->last_state);
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ);
return;
case __IAVF_COMM_FAILED:
@@ -2792,6 +2811,7 @@ static void iavf_watchdog_task(struct work_struct *work)
iavf_change_state(adapter, __IAVF_INIT_FAILED);
adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
return;
}
reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
@@ -2811,12 +2831,14 @@ static void iavf_watchdog_task(struct work_struct *work)
adapter->aq_required = 0;
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
queue_delayed_work(adapter->wq,
&adapter->watchdog_task,
msecs_to_jiffies(10));
return;
case __IAVF_RESETTING:
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
HZ * 2);
return;
@@ -2847,6 +2869,7 @@ static void iavf_watchdog_task(struct work_struct *work)
case __IAVF_REMOVE:
default:
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
return;
}
@@ -2858,12 +2881,14 @@ static void iavf_watchdog_task(struct work_struct *work)
dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
queue_delayed_work(adapter->wq,
&adapter->watchdog_task, HZ * 2);
return;
}
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
restart_watchdog:
if (adapter->state >= __IAVF_DOWN)
queue_work(adapter->wq, &adapter->adminq_task);
@@ -4340,14 +4365,17 @@ static int iavf_open(struct net_device *netdev)
return -EIO;
}
+ mutex_lock(&netdev->lock);
while (!mutex_trylock(&adapter->crit_lock)) {
/* If we are in __IAVF_INIT_CONFIG_ADAPTER state the crit_lock
* is already taken and iavf_open is called from an upper
* device's notifier reacting on NETDEV_REGISTER event.
* We have to leave here to avoid dead lock.
*/
- if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER)
+ if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER) {
+ mutex_unlock(&netdev->lock);
return -EBUSY;
+ }
usleep_range(500, 1000);
}
@@ -4396,6 +4424,7 @@ static int iavf_open(struct net_device *netdev)
iavf_irq_enable(adapter, true);
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
return 0;
@@ -4408,6 +4437,7 @@ err_setup_tx:
iavf_free_all_tx_resources(adapter);
err_unlock:
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
return err;
}
@@ -4429,10 +4459,12 @@ static int iavf_close(struct net_device *netdev)
u64 aq_to_restore;
int status;
+ mutex_lock(&netdev->lock);
mutex_lock(&adapter->crit_lock);
if (adapter->state <= __IAVF_DOWN_PENDING) {
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
return 0;
}
@@ -4466,6 +4498,7 @@ static int iavf_close(struct net_device *netdev)
iavf_free_traffic_irqs(adapter);
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
/* We explicitly don't free resources here because the hardware is
* still active and can DMA into memory. Resources are cleared in
@@ -5342,6 +5375,7 @@ static int iavf_suspend(struct device *dev_d)
netif_device_detach(netdev);
+ mutex_lock(&netdev->lock);
mutex_lock(&adapter->crit_lock);
if (netif_running(netdev)) {
@@ -5353,6 +5387,7 @@ static int iavf_suspend(struct device *dev_d)
iavf_reset_interrupt_capability(adapter);
mutex_unlock(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
return 0;
}
@@ -5451,6 +5486,7 @@ static void iavf_remove(struct pci_dev *pdev)
if (netdev->reg_state == NETREG_REGISTERED)
unregister_netdev(netdev);
+ mutex_lock(&netdev->lock);
mutex_lock(&adapter->crit_lock);
dev_info(&adapter->pdev->dev, "Removing device\n");
iavf_change_state(adapter, __IAVF_REMOVE);
@@ -5487,6 +5523,7 @@ static void iavf_remove(struct pci_dev *pdev)
mutex_destroy(&hw->aq.asq_mutex);
mutex_unlock(&adapter->crit_lock);
mutex_destroy(&adapter->crit_lock);
+ mutex_unlock(&netdev->lock);
iounmap(hw->hw_addr);
pci_release_regions(pdev);
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 3bf05b135b35..73f5fddf3ee9 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -2271,6 +2271,8 @@ struct ice_aqc_get_pkg_info_resp {
struct ice_aqc_get_pkg_info pkg_info[];
};
+#define ICE_AQC_GET_CGU_MAX_PHASE_ADJ GENMASK(30, 0)
+
/* Get CGU abilities command response data structure (indirect 0x0C61) */
struct ice_aqc_get_cgu_abilities {
u8 num_inputs;
diff --git a/drivers/net/ethernet/intel/ice/ice_dpll.c b/drivers/net/ethernet/intel/ice/ice_dpll.c
index d5ad6d84007c..38e151c7ea23 100644
--- a/drivers/net/ethernet/intel/ice/ice_dpll.c
+++ b/drivers/net/ethernet/intel/ice/ice_dpll.c
@@ -2065,6 +2065,18 @@ static int ice_dpll_init_worker(struct ice_pf *pf)
}
/**
+ * ice_dpll_phase_range_set - initialize phase adjust range helper
+ * @range: pointer to phase adjust range struct to be initialized
+ * @phase_adj: a value to be used as min(-)/max(+) boundary
+ */
+static void ice_dpll_phase_range_set(struct dpll_pin_phase_adjust_range *range,
+ u32 phase_adj)
+{
+ range->min = -phase_adj;
+ range->max = phase_adj;
+}
+
+/**
* ice_dpll_init_info_pins_generic - initializes generic pins info
* @pf: board private structure
* @input: if input pins initialized
@@ -2105,8 +2117,8 @@ static int ice_dpll_init_info_pins_generic(struct ice_pf *pf, bool input)
for (i = 0; i < pin_num; i++) {
pins[i].idx = i;
pins[i].prop.board_label = labels[i];
- pins[i].prop.phase_range.min = phase_adj_max;
- pins[i].prop.phase_range.max = -phase_adj_max;
+ ice_dpll_phase_range_set(&pins[i].prop.phase_range,
+ phase_adj_max);
pins[i].prop.capabilities = cap;
pins[i].pf = pf;
ret = ice_dpll_pin_state_update(pf, &pins[i], pin_type, NULL);
@@ -2152,6 +2164,7 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
struct ice_hw *hw = &pf->hw;
struct ice_dpll_pin *pins;
unsigned long caps;
+ u32 phase_adj_max;
u8 freq_supp_num;
bool input;
@@ -2159,11 +2172,13 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
case ICE_DPLL_PIN_TYPE_INPUT:
pins = pf->dplls.inputs;
num_pins = pf->dplls.num_inputs;
+ phase_adj_max = pf->dplls.input_phase_adj_max;
input = true;
break;
case ICE_DPLL_PIN_TYPE_OUTPUT:
pins = pf->dplls.outputs;
num_pins = pf->dplls.num_outputs;
+ phase_adj_max = pf->dplls.output_phase_adj_max;
input = false;
break;
default:
@@ -2188,19 +2203,13 @@ ice_dpll_init_info_direct_pins(struct ice_pf *pf,
return ret;
caps |= (DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE |
DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE);
- pins[i].prop.phase_range.min =
- pf->dplls.input_phase_adj_max;
- pins[i].prop.phase_range.max =
- -pf->dplls.input_phase_adj_max;
} else {
- pins[i].prop.phase_range.min =
- pf->dplls.output_phase_adj_max;
- pins[i].prop.phase_range.max =
- -pf->dplls.output_phase_adj_max;
ret = ice_cgu_get_output_pin_state_caps(hw, i, &caps);
if (ret)
return ret;
}
+ ice_dpll_phase_range_set(&pins[i].prop.phase_range,
+ phase_adj_max);
pins[i].prop.capabilities = caps;
ret = ice_dpll_pin_state_update(pf, &pins[i], pin_type, NULL);
if (ret)
@@ -2308,8 +2317,10 @@ static int ice_dpll_init_info(struct ice_pf *pf, bool cgu)
dp->dpll_idx = abilities.pps_dpll_idx;
d->num_inputs = abilities.num_inputs;
d->num_outputs = abilities.num_outputs;
- d->input_phase_adj_max = le32_to_cpu(abilities.max_in_phase_adj);
- d->output_phase_adj_max = le32_to_cpu(abilities.max_out_phase_adj);
+ d->input_phase_adj_max = le32_to_cpu(abilities.max_in_phase_adj) &
+ ICE_AQC_GET_CGU_MAX_PHASE_ADJ;
+ d->output_phase_adj_max = le32_to_cpu(abilities.max_out_phase_adj) &
+ ICE_AQC_GET_CGU_MAX_PHASE_ADJ;
alloc_size = sizeof(*d->inputs) * d->num_inputs;
d->inputs = kzalloc(alloc_size, GFP_KERNEL);
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_consts.h b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h
index 585ce200c60f..d75f0eddd631 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp_consts.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp_consts.h
@@ -761,9 +761,9 @@ const struct ice_vernier_info_e82x e822_vernier[NUM_ICE_PTP_LNK_SPD] = {
/* rx_desk_rsgb_par */
644531250, /* 644.53125 MHz Reed Solomon gearbox */
/* tx_desk_rsgb_pcs */
- 644531250, /* 644.53125 MHz Reed Solomon gearbox */
+ 390625000, /* 390.625 MHz Reed Solomon gearbox */
/* rx_desk_rsgb_pcs */
- 644531250, /* 644.53125 MHz Reed Solomon gearbox */
+ 390625000, /* 390.625 MHz Reed Solomon gearbox */
/* tx_fixed_delay */
1620,
/* pmd_adj_divisor */
diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile
index 463c0d26b9d4..6c1b702fd992 100644
--- a/drivers/net/ethernet/intel/igb/Makefile
+++ b/drivers/net/ethernet/intel/igb/Makefile
@@ -8,4 +8,4 @@ obj-$(CONFIG_IGB) += igb.o
igb-y := igb_main.o igb_ethtool.o e1000_82575.o \
e1000_mac.o e1000_nvm.o e1000_phy.o e1000_mbx.o \
- e1000_i210.o igb_ptp.o igb_hwmon.o
+ e1000_i210.o igb_ptp.o igb_hwmon.o igb_xsk.o
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 3c2dc7bdebb5..02f340280d20 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -18,8 +18,10 @@
#include <linux/i2c-algo-bit.h>
#include <linux/pci.h>
#include <linux/mdio.h>
+#include <linux/lockdep.h>
#include <net/xdp.h>
+#include <net/xdp_sock_drv.h>
struct igb_adapter;
@@ -86,6 +88,7 @@ struct igb_adapter;
#define IGB_XDP_CONSUMED BIT(0)
#define IGB_XDP_TX BIT(1)
#define IGB_XDP_REDIR BIT(2)
+#define IGB_XDP_EXIT BIT(3)
struct vf_data_storage {
unsigned char vf_mac_addresses[ETH_ALEN];
@@ -255,6 +258,7 @@ enum igb_tx_flags {
enum igb_tx_buf_type {
IGB_TYPE_SKB = 0,
IGB_TYPE_XDP,
+ IGB_TYPE_XSK
};
/* wrapper around a pointer to a socket buffer,
@@ -320,6 +324,7 @@ struct igb_ring {
union { /* array of buffer info structs */
struct igb_tx_buffer *tx_buffer_info;
struct igb_rx_buffer *rx_buffer_info;
+ struct xdp_buff **rx_buffer_info_zc;
};
void *desc; /* descriptor ring memory */
unsigned long flags; /* ring specific flags */
@@ -357,6 +362,7 @@ struct igb_ring {
};
};
struct xdp_rxq_info xdp_rxq;
+ struct xsk_buff_pool *xsk_pool;
} ____cacheline_internodealigned_in_smp;
struct igb_q_vector {
@@ -384,7 +390,8 @@ enum e1000_ring_flags_t {
IGB_RING_FLAG_RX_SCTP_CSUM,
IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
IGB_RING_FLAG_TX_CTX_IDX,
- IGB_RING_FLAG_TX_DETECT_HANG
+ IGB_RING_FLAG_TX_DETECT_HANG,
+ IGB_RING_FLAG_TX_DISABLED
};
#define ring_uses_large_buffer(ring) \
@@ -731,12 +738,21 @@ int igb_setup_tx_resources(struct igb_ring *);
int igb_setup_rx_resources(struct igb_ring *);
void igb_free_tx_resources(struct igb_ring *);
void igb_free_rx_resources(struct igb_ring *);
+void igb_clean_tx_ring(struct igb_ring *tx_ring);
+void igb_clean_rx_ring(struct igb_ring *rx_ring);
void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *);
void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
+void igb_finalize_xdp(struct igb_adapter *adapter, unsigned int status);
+void igb_update_rx_stats(struct igb_q_vector *q_vector, unsigned int packets,
+ unsigned int bytes);
void igb_setup_tctl(struct igb_adapter *);
void igb_setup_rctl(struct igb_adapter *);
void igb_setup_srrctl(struct igb_adapter *, struct igb_ring *);
netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *);
+int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp);
+void igb_process_skb_fields(struct igb_ring *rx_ring,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb);
void igb_alloc_rx_buffers(struct igb_ring *, u16);
void igb_update_stats(struct igb_adapter *);
bool igb_has_link(struct igb_adapter *adapter);
@@ -797,6 +813,33 @@ static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring)
return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
}
+/* This function assumes __netif_tx_lock is held by the caller. */
+static inline void igb_xdp_ring_update_tail(struct igb_ring *ring)
+{
+ lockdep_assert_held(&txring_txq(ring)->_xmit_lock);
+
+ /* Force memory writes to complete before letting h/w know there
+ * are new descriptors to fetch.
+ */
+ wmb();
+ writel(ring->next_to_use, ring->tail);
+}
+
+static inline struct igb_ring *igb_xdp_tx_queue_mapping(struct igb_adapter *adapter)
+{
+ unsigned int r_idx = smp_processor_id();
+
+ if (r_idx >= adapter->num_tx_queues)
+ r_idx = r_idx % adapter->num_tx_queues;
+
+ return adapter->tx_ring[r_idx];
+}
+
+static inline bool igb_xdp_is_enabled(struct igb_adapter *adapter)
+{
+ return !!READ_ONCE(adapter->xdp_prog);
+}
+
int igb_add_filter(struct igb_adapter *adapter,
struct igb_nfc_filter *input);
int igb_erase_filter(struct igb_adapter *adapter,
@@ -807,4 +850,17 @@ int igb_add_mac_steering_filter(struct igb_adapter *adapter,
int igb_del_mac_steering_filter(struct igb_adapter *adapter,
const u8 *addr, u8 queue, u8 flags);
+struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
+ struct igb_ring *ring);
+int igb_xsk_pool_setup(struct igb_adapter *adapter,
+ struct xsk_buff_pool *pool,
+ u16 qid);
+bool igb_alloc_rx_buffers_zc(struct igb_ring *rx_ring,
+ struct xsk_buff_pool *xsk_pool, u16 count);
+void igb_clean_rx_ring_zc(struct igb_ring *rx_ring);
+int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector,
+ struct xsk_buff_pool *xsk_pool, const int budget);
+bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool);
+int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
+
#endif /* _IGB_H_ */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 288a4bb2683a..d368b753a467 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -33,7 +33,6 @@
#include <linux/bpf_trace.h>
#include <linux/pm_runtime.h>
#include <linux/etherdevice.h>
-#include <linux/lockdep.h>
#ifdef CONFIG_IGB_DCA
#include <linux/dca.h>
#endif
@@ -116,8 +115,6 @@ static void igb_configure_tx(struct igb_adapter *);
static void igb_configure_rx(struct igb_adapter *);
static void igb_clean_all_tx_rings(struct igb_adapter *);
static void igb_clean_all_rx_rings(struct igb_adapter *);
-static void igb_clean_tx_ring(struct igb_ring *);
-static void igb_clean_rx_ring(struct igb_ring *);
static void igb_set_rx_mode(struct net_device *);
static void igb_update_phy_info(struct timer_list *);
static void igb_watchdog(struct timer_list *);
@@ -475,12 +472,17 @@ rx_ring_summary:
for (i = 0; i < rx_ring->count; i++) {
const char *next_desc;
- struct igb_rx_buffer *buffer_info;
- buffer_info = &rx_ring->rx_buffer_info[i];
+ dma_addr_t dma = (dma_addr_t)0;
+ struct igb_rx_buffer *buffer_info = NULL;
rx_desc = IGB_RX_DESC(rx_ring, i);
u0 = (struct my_u0 *)rx_desc;
staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+ if (!rx_ring->xsk_pool) {
+ buffer_info = &rx_ring->rx_buffer_info[i];
+ dma = buffer_info->dma;
+ }
+
if (i == rx_ring->next_to_use)
next_desc = " NTU";
else if (i == rx_ring->next_to_clean)
@@ -500,11 +502,11 @@ rx_ring_summary:
"R ", i,
le64_to_cpu(u0->a),
le64_to_cpu(u0->b),
- (u64)buffer_info->dma,
+ (u64)dma,
next_desc);
if (netif_msg_pktdata(adapter) &&
- buffer_info->dma && buffer_info->page) {
+ buffer_info && dma && buffer_info->page) {
print_hex_dump(KERN_INFO, "",
DUMP_PREFIX_ADDRESS,
16, 1,
@@ -1990,7 +1992,11 @@ static void igb_configure(struct igb_adapter *adapter)
*/
for (i = 0; i < adapter->num_rx_queues; i++) {
struct igb_ring *ring = adapter->rx_ring[i];
- igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
+ if (ring->xsk_pool)
+ igb_alloc_rx_buffers_zc(ring, ring->xsk_pool,
+ igb_desc_unused(ring));
+ else
+ igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
}
}
@@ -2911,37 +2917,20 @@ static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf)
static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
+ struct igb_adapter *adapter = netdev_priv(dev);
+
switch (xdp->command) {
case XDP_SETUP_PROG:
return igb_xdp_setup(dev, xdp);
+ case XDP_SETUP_XSK_POOL:
+ return igb_xsk_pool_setup(adapter, xdp->xsk.pool,
+ xdp->xsk.queue_id);
default:
return -EINVAL;
}
}
-/* This function assumes __netif_tx_lock is held by the caller. */
-static void igb_xdp_ring_update_tail(struct igb_ring *ring)
-{
- lockdep_assert_held(&txring_txq(ring)->_xmit_lock);
-
- /* Force memory writes to complete before letting h/w know there
- * are new descriptors to fetch.
- */
- wmb();
- writel(ring->next_to_use, ring->tail);
-}
-
-static struct igb_ring *igb_xdp_tx_queue_mapping(struct igb_adapter *adapter)
-{
- unsigned int r_idx = smp_processor_id();
-
- if (r_idx >= adapter->num_tx_queues)
- r_idx = r_idx % adapter->num_tx_queues;
-
- return adapter->tx_ring[r_idx];
-}
-
-static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
+int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
{
struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
int cpu = smp_processor_id();
@@ -2955,7 +2944,8 @@ static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
/* During program transitions its possible adapter->xdp_prog is assigned
* but ring has not been configured yet. In this case simply abort xmit.
*/
- tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
+ tx_ring = igb_xdp_is_enabled(adapter) ?
+ igb_xdp_tx_queue_mapping(adapter) : NULL;
if (unlikely(!tx_ring))
return IGB_XDP_CONSUMED;
@@ -2988,10 +2978,14 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
/* During program transitions its possible adapter->xdp_prog is assigned
* but ring has not been configured yet. In this case simply abort xmit.
*/
- tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
+ tx_ring = igb_xdp_is_enabled(adapter) ?
+ igb_xdp_tx_queue_mapping(adapter) : NULL;
if (unlikely(!tx_ring))
return -ENXIO;
+ if (unlikely(test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags)))
+ return -ENXIO;
+
nq = txring_txq(tx_ring);
__netif_tx_lock(nq, cpu);
@@ -3042,6 +3036,7 @@ static const struct net_device_ops igb_netdev_ops = {
.ndo_setup_tc = igb_setup_tc,
.ndo_bpf = igb_xdp,
.ndo_xdp_xmit = igb_xdp_xmit,
+ .ndo_xsk_wakeup = igb_xsk_wakeup,
};
/**
@@ -3338,7 +3333,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->priv_flags |= IFF_SUPP_NOFCS;
netdev->priv_flags |= IFF_UNICAST_FLT;
- netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
+ netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_XSK_ZEROCOPY;
/* MTU range: 68 - 9216 */
netdev->min_mtu = ETH_MIN_MTU;
@@ -4364,6 +4360,8 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
u64 tdba = ring->dma;
int reg_idx = ring->reg_idx;
+ WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring));
+
wr32(E1000_TDLEN(reg_idx),
ring->count * sizeof(union e1000_adv_tx_desc));
wr32(E1000_TDBAL(reg_idx),
@@ -4424,7 +4422,8 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
- rx_ring->queue_index, 0);
+ rx_ring->queue_index,
+ rx_ring->q_vector->napi.napi_id);
if (res < 0) {
dev_err(dev, "Failed to register xdp_rxq index %u\n",
rx_ring->queue_index);
@@ -4720,12 +4719,17 @@ void igb_setup_srrctl(struct igb_adapter *adapter, struct igb_ring *ring)
struct e1000_hw *hw = &adapter->hw;
int reg_idx = ring->reg_idx;
u32 srrctl = 0;
+ u32 buf_size;
- srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
- if (ring_uses_large_buffer(ring))
- srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+ if (ring->xsk_pool)
+ buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
+ else if (ring_uses_large_buffer(ring))
+ buf_size = IGB_RXBUFFER_3072;
else
- srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+ buf_size = IGB_RXBUFFER_2048;
+
+ srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+ srrctl |= buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT;
srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
if (hw->mac.type >= e1000_82580)
srrctl |= E1000_SRRCTL_TIMESTAMP;
@@ -4757,8 +4761,17 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
u32 rxdctl = 0;
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
- WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
- MEM_TYPE_PAGE_SHARED, NULL));
+ WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring));
+ if (ring->xsk_pool) {
+ WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_XSK_BUFF_POOL,
+ NULL));
+ xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
+ } else {
+ WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
+ MEM_TYPE_PAGE_SHARED,
+ NULL));
+ }
/* disable the queue */
wr32(E1000_RXDCTL(reg_idx), 0);
@@ -4785,9 +4798,12 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
rxdctl |= IGB_RX_HTHRESH << 8;
rxdctl |= IGB_RX_WTHRESH << 16;
- /* initialize rx_buffer_info */
- memset(ring->rx_buffer_info, 0,
- sizeof(struct igb_rx_buffer) * ring->count);
+ if (ring->xsk_pool)
+ memset(ring->rx_buffer_info_zc, 0,
+ sizeof(*ring->rx_buffer_info_zc) * ring->count);
+ else
+ memset(ring->rx_buffer_info, 0,
+ sizeof(*ring->rx_buffer_info) * ring->count);
/* initialize Rx descriptor 0 */
rx_desc = IGB_RX_DESC(ring, 0);
@@ -4888,19 +4904,24 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter)
* igb_clean_tx_ring - Free Tx Buffers
* @tx_ring: ring to be cleaned
**/
-static void igb_clean_tx_ring(struct igb_ring *tx_ring)
+void igb_clean_tx_ring(struct igb_ring *tx_ring)
{
u16 i = tx_ring->next_to_clean;
struct igb_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
+ u32 xsk_frames = 0;
while (i != tx_ring->next_to_use) {
union e1000_adv_tx_desc *eop_desc, *tx_desc;
/* Free all the Tx ring sk_buffs or xdp frames */
- if (tx_buffer->type == IGB_TYPE_SKB)
+ if (tx_buffer->type == IGB_TYPE_SKB) {
dev_kfree_skb_any(tx_buffer->skb);
- else
+ } else if (tx_buffer->type == IGB_TYPE_XDP) {
xdp_return_frame(tx_buffer->xdpf);
+ } else if (tx_buffer->type == IGB_TYPE_XSK) {
+ xsk_frames++;
+ goto skip_for_xsk;
+ }
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -4931,6 +4952,7 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring)
DMA_TO_DEVICE);
}
+skip_for_xsk:
tx_buffer->next_to_watch = NULL;
/* move us one more past the eop_desc for start of next pkt */
@@ -4945,6 +4967,9 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring)
/* reset BQL for queue */
netdev_tx_reset_queue(txring_txq(tx_ring));
+ if (tx_ring->xsk_pool && xsk_frames)
+ xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
+
/* reset next_to_use and next_to_clean */
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
@@ -4975,8 +5000,13 @@ void igb_free_rx_resources(struct igb_ring *rx_ring)
rx_ring->xdp_prog = NULL;
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
- vfree(rx_ring->rx_buffer_info);
- rx_ring->rx_buffer_info = NULL;
+ if (rx_ring->xsk_pool) {
+ vfree(rx_ring->rx_buffer_info_zc);
+ rx_ring->rx_buffer_info_zc = NULL;
+ } else {
+ vfree(rx_ring->rx_buffer_info);
+ rx_ring->rx_buffer_info = NULL;
+ }
/* if not set, then don't free */
if (!rx_ring->desc)
@@ -5007,13 +5037,18 @@ static void igb_free_all_rx_resources(struct igb_adapter *adapter)
* igb_clean_rx_ring - Free Rx Buffers per Queue
* @rx_ring: ring to free buffers from
**/
-static void igb_clean_rx_ring(struct igb_ring *rx_ring)
+void igb_clean_rx_ring(struct igb_ring *rx_ring)
{
u16 i = rx_ring->next_to_clean;
dev_kfree_skb(rx_ring->skb);
rx_ring->skb = NULL;
+ if (rx_ring->xsk_pool) {
+ igb_clean_rx_ring_zc(rx_ring);
+ goto skip_for_xsk;
+ }
+
/* Free all the Rx ring sk_buffs */
while (i != rx_ring->next_to_alloc) {
struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
@@ -5041,6 +5076,7 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring)
i = 0;
}
+skip_for_xsk:
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
@@ -6467,6 +6503,9 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
return NETDEV_TX_BUSY;
}
+ if (unlikely(test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags)))
+ return NETDEV_TX_BUSY;
+
/* record the location of the first descriptor for this packet */
first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
first->type = IGB_TYPE_SKB;
@@ -6622,7 +6661,7 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
struct igb_adapter *adapter = netdev_priv(netdev);
int max_frame = new_mtu + IGB_ETH_PKT_HDR_PAD;
- if (adapter->xdp_prog) {
+ if (igb_xdp_is_enabled(adapter)) {
int i;
for (i = 0; i < adapter->num_rx_queues; i++) {
@@ -8195,6 +8234,7 @@ static int igb_poll(struct napi_struct *napi, int budget)
struct igb_q_vector *q_vector = container_of(napi,
struct igb_q_vector,
napi);
+ struct xsk_buff_pool *xsk_pool;
bool clean_complete = true;
int work_done = 0;
@@ -8206,7 +8246,12 @@ static int igb_poll(struct napi_struct *napi, int budget)
clean_complete = igb_clean_tx_irq(q_vector, budget);
if (q_vector->rx.ring) {
- int cleaned = igb_clean_rx_irq(q_vector, budget);
+ int cleaned;
+
+ xsk_pool = READ_ONCE(q_vector->rx.ring->xsk_pool);
+ cleaned = xsk_pool ?
+ igb_clean_rx_irq_zc(q_vector, xsk_pool, budget) :
+ igb_clean_rx_irq(q_vector, budget);
work_done += cleaned;
if (cleaned >= budget)
@@ -8235,13 +8280,18 @@ static int igb_poll(struct napi_struct *napi, int budget)
**/
static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
{
- struct igb_adapter *adapter = q_vector->adapter;
- struct igb_ring *tx_ring = q_vector->tx.ring;
- struct igb_tx_buffer *tx_buffer;
- union e1000_adv_tx_desc *tx_desc;
unsigned int total_bytes = 0, total_packets = 0;
+ struct igb_adapter *adapter = q_vector->adapter;
unsigned int budget = q_vector->tx.work_limit;
+ struct igb_ring *tx_ring = q_vector->tx.ring;
unsigned int i = tx_ring->next_to_clean;
+ union e1000_adv_tx_desc *tx_desc;
+ struct igb_tx_buffer *tx_buffer;
+ struct xsk_buff_pool *xsk_pool;
+ int cpu = smp_processor_id();
+ bool xsk_xmit_done = true;
+ struct netdev_queue *nq;
+ u32 xsk_frames = 0;
if (test_bit(__IGB_DOWN, &adapter->state))
return true;
@@ -8272,10 +8322,14 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
total_packets += tx_buffer->gso_segs;
/* free the skb */
- if (tx_buffer->type == IGB_TYPE_SKB)
+ if (tx_buffer->type == IGB_TYPE_SKB) {
napi_consume_skb(tx_buffer->skb, napi_budget);
- else
+ } else if (tx_buffer->type == IGB_TYPE_XDP) {
xdp_return_frame(tx_buffer->xdpf);
+ } else if (tx_buffer->type == IGB_TYPE_XSK) {
+ xsk_frames++;
+ goto skip_for_xsk;
+ }
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -8307,6 +8361,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
}
}
+skip_for_xsk:
/* move us one more past the eop_desc for start of next pkt */
tx_buffer++;
tx_desc++;
@@ -8335,6 +8390,21 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
q_vector->tx.total_bytes += total_bytes;
q_vector->tx.total_packets += total_packets;
+ xsk_pool = READ_ONCE(tx_ring->xsk_pool);
+ if (xsk_pool) {
+ if (xsk_frames)
+ xsk_tx_completed(xsk_pool, xsk_frames);
+ if (xsk_uses_need_wakeup(xsk_pool))
+ xsk_set_tx_need_wakeup(xsk_pool);
+
+ nq = txring_txq(tx_ring);
+ __netif_tx_lock(nq, cpu);
+ /* Avoid transmit queue timeout since we share it with the slow path */
+ txq_trans_cond_update(nq);
+ xsk_xmit_done = igb_xmit_zc(tx_ring, xsk_pool);
+ __netif_tx_unlock(nq);
+ }
+
if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
struct e1000_hw *hw = &adapter->hw;
@@ -8397,7 +8467,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
}
}
- return !!budget;
+ return !!budget && xsk_xmit_done;
}
/**
@@ -8588,9 +8658,8 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
return skb;
}
-static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
- struct igb_ring *rx_ring,
- struct xdp_buff *xdp)
+static int igb_run_xdp(struct igb_adapter *adapter, struct igb_ring *rx_ring,
+ struct xdp_buff *xdp)
{
int err, result = IGB_XDP_PASS;
struct bpf_prog *xdp_prog;
@@ -8630,7 +8699,7 @@ out_failure:
break;
}
xdp_out:
- return ERR_PTR(-result);
+ return result;
}
static unsigned int igb_rx_frame_truesize(struct igb_ring *rx_ring,
@@ -8756,10 +8825,6 @@ static bool igb_cleanup_headers(struct igb_ring *rx_ring,
union e1000_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
- /* XDP packets use error pointer so abort at this point */
- if (IS_ERR(skb))
- return true;
-
if (unlikely((igb_test_staterr(rx_desc,
E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
struct net_device *netdev = rx_ring->netdev;
@@ -8786,9 +8851,9 @@ static bool igb_cleanup_headers(struct igb_ring *rx_ring,
* order to populate the hash, checksum, VLAN, timestamp, protocol, and
* other fields within the skb.
**/
-static void igb_process_skb_fields(struct igb_ring *rx_ring,
- union e1000_adv_rx_desc *rx_desc,
- struct sk_buff *skb)
+void igb_process_skb_fields(struct igb_ring *rx_ring,
+ union e1000_adv_rx_desc *rx_desc,
+ struct sk_buff *skb)
{
struct net_device *dev = rx_ring->netdev;
@@ -8870,6 +8935,38 @@ static void igb_put_rx_buffer(struct igb_ring *rx_ring,
rx_buffer->page = NULL;
}
+void igb_finalize_xdp(struct igb_adapter *adapter, unsigned int status)
+{
+ int cpu = smp_processor_id();
+ struct netdev_queue *nq;
+
+ if (status & IGB_XDP_REDIR)
+ xdp_do_flush();
+
+ if (status & IGB_XDP_TX) {
+ struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
+
+ nq = txring_txq(tx_ring);
+ __netif_tx_lock(nq, cpu);
+ igb_xdp_ring_update_tail(tx_ring);
+ __netif_tx_unlock(nq);
+ }
+}
+
+void igb_update_rx_stats(struct igb_q_vector *q_vector, unsigned int packets,
+ unsigned int bytes)
+{
+ struct igb_ring *ring = q_vector->rx.ring;
+
+ u64_stats_update_begin(&ring->rx_syncp);
+ ring->rx_stats.packets += packets;
+ ring->rx_stats.bytes += bytes;
+ u64_stats_update_end(&ring->rx_syncp);
+
+ q_vector->rx.total_packets += packets;
+ q_vector->rx.total_bytes += bytes;
+}
+
static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
{
unsigned int total_bytes = 0, total_packets = 0;
@@ -8877,12 +8974,11 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
struct igb_ring *rx_ring = q_vector->rx.ring;
u16 cleaned_count = igb_desc_unused(rx_ring);
struct sk_buff *skb = rx_ring->skb;
- int cpu = smp_processor_id();
unsigned int xdp_xmit = 0;
- struct netdev_queue *nq;
struct xdp_buff xdp;
u32 frame_sz = 0;
int rx_buf_pgcnt;
+ int xdp_res = 0;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
@@ -8940,12 +9036,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = igb_rx_frame_truesize(rx_ring, size);
#endif
- skb = igb_run_xdp(adapter, rx_ring, &xdp);
+ xdp_res = igb_run_xdp(adapter, rx_ring, &xdp);
}
- if (IS_ERR(skb)) {
- unsigned int xdp_res = -PTR_ERR(skb);
-
+ if (xdp_res) {
if (xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR)) {
xdp_xmit |= xdp_res;
igb_rx_buffer_flip(rx_ring, rx_buffer, size);
@@ -8964,7 +9058,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
&xdp, timestamp);
/* exit if we failed to retrieve a buffer */
- if (!skb) {
+ if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_failed++;
rx_buffer->pagecnt_bias++;
break;
@@ -8978,7 +9072,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
continue;
/* verify the packet layout is correct */
- if (igb_cleanup_headers(rx_ring, rx_desc, skb)) {
+ if (xdp_res || igb_cleanup_headers(rx_ring, rx_desc, skb)) {
skb = NULL;
continue;
}
@@ -9001,24 +9095,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
/* place incomplete frames back on ring for completion */
rx_ring->skb = skb;
- if (xdp_xmit & IGB_XDP_REDIR)
- xdp_do_flush();
-
- if (xdp_xmit & IGB_XDP_TX) {
- struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
-
- nq = txring_txq(tx_ring);
- __netif_tx_lock(nq, cpu);
- igb_xdp_ring_update_tail(tx_ring);
- __netif_tx_unlock(nq);
- }
+ if (xdp_xmit)
+ igb_finalize_xdp(adapter, xdp_xmit);
- u64_stats_update_begin(&rx_ring->rx_syncp);
- rx_ring->rx_stats.packets += total_packets;
- rx_ring->rx_stats.bytes += total_bytes;
- u64_stats_update_end(&rx_ring->rx_syncp);
- q_vector->rx.total_packets += total_packets;
- q_vector->rx.total_bytes += total_bytes;
+ igb_update_rx_stats(q_vector, total_packets, total_bytes);
if (cleaned_count)
igb_alloc_rx_buffers(rx_ring, cleaned_count);
diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c
new file mode 100644
index 000000000000..157d43787fa0
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
@@ -0,0 +1,562 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock_drv.h>
+#include <net/xdp.h>
+
+#include "e1000_hw.h"
+#include "igb.h"
+
+static int igb_realloc_rx_buffer_info(struct igb_ring *ring, bool pool_present)
+{
+ int size = pool_present ?
+ sizeof(*ring->rx_buffer_info_zc) * ring->count :
+ sizeof(*ring->rx_buffer_info) * ring->count;
+ void *buff_info = vmalloc(size);
+
+ if (!buff_info)
+ return -ENOMEM;
+
+ if (pool_present) {
+ vfree(ring->rx_buffer_info);
+ ring->rx_buffer_info = NULL;
+ ring->rx_buffer_info_zc = buff_info;
+ } else {
+ vfree(ring->rx_buffer_info_zc);
+ ring->rx_buffer_info_zc = NULL;
+ ring->rx_buffer_info = buff_info;
+ }
+
+ return 0;
+}
+
+static void igb_txrx_ring_disable(struct igb_adapter *adapter, u16 qid)
+{
+ struct igb_ring *tx_ring = adapter->tx_ring[qid];
+ struct igb_ring *rx_ring = adapter->rx_ring[qid];
+ struct e1000_hw *hw = &adapter->hw;
+
+ set_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
+
+ wr32(E1000_TXDCTL(tx_ring->reg_idx), 0);
+ wr32(E1000_RXDCTL(rx_ring->reg_idx), 0);
+
+ synchronize_net();
+
+ /* Rx/Tx share the same napi context. */
+ napi_disable(&rx_ring->q_vector->napi);
+
+ igb_clean_tx_ring(tx_ring);
+ igb_clean_rx_ring(rx_ring);
+
+ memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats));
+ memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats));
+}
+
+static void igb_txrx_ring_enable(struct igb_adapter *adapter, u16 qid)
+{
+ struct igb_ring *tx_ring = adapter->tx_ring[qid];
+ struct igb_ring *rx_ring = adapter->rx_ring[qid];
+
+ igb_configure_tx_ring(adapter, tx_ring);
+ igb_configure_rx_ring(adapter, rx_ring);
+
+ synchronize_net();
+
+ clear_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
+
+ /* call igb_desc_unused which always leaves
+ * at least 1 descriptor unused to make sure
+ * next_to_use != next_to_clean
+ */
+ if (rx_ring->xsk_pool)
+ igb_alloc_rx_buffers_zc(rx_ring, rx_ring->xsk_pool,
+ igb_desc_unused(rx_ring));
+ else
+ igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring));
+
+ /* Rx/Tx share the same napi context. */
+ napi_enable(&rx_ring->q_vector->napi);
+}
+
+struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
+ struct igb_ring *ring)
+{
+ int qid = ring->queue_index;
+ struct xsk_buff_pool *pool;
+
+ pool = xsk_get_pool_from_qid(adapter->netdev, qid);
+
+ if (!igb_xdp_is_enabled(adapter))
+ return NULL;
+
+ return (pool && pool->dev) ? pool : NULL;
+}
+
+static int igb_xsk_pool_enable(struct igb_adapter *adapter,
+ struct xsk_buff_pool *pool,
+ u16 qid)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct igb_ring *rx_ring;
+ bool if_running;
+ int err;
+
+ if (qid >= adapter->num_rx_queues)
+ return -EINVAL;
+
+ if (qid >= netdev->real_num_rx_queues ||
+ qid >= netdev->real_num_tx_queues)
+ return -EINVAL;
+
+ err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IGB_RX_DMA_ATTR);
+ if (err)
+ return err;
+
+ rx_ring = adapter->rx_ring[qid];
+ if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
+ if (if_running)
+ igb_txrx_ring_disable(adapter, qid);
+
+ if (if_running) {
+ err = igb_realloc_rx_buffer_info(rx_ring, true);
+ if (!err) {
+ igb_txrx_ring_enable(adapter, qid);
+ /* Kick start the NAPI context so that receiving will start */
+ err = igb_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX);
+ }
+
+ if (err) {
+ xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int igb_xsk_pool_disable(struct igb_adapter *adapter, u16 qid)
+{
+ struct xsk_buff_pool *pool;
+ struct igb_ring *rx_ring;
+ bool if_running;
+ int err;
+
+ pool = xsk_get_pool_from_qid(adapter->netdev, qid);
+ if (!pool)
+ return -EINVAL;
+
+ rx_ring = adapter->rx_ring[qid];
+ if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
+ if (if_running)
+ igb_txrx_ring_disable(adapter, qid);
+
+ xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
+
+ if (if_running) {
+ err = igb_realloc_rx_buffer_info(rx_ring, false);
+ if (err)
+ return err;
+
+ igb_txrx_ring_enable(adapter, qid);
+ }
+
+ return 0;
+}
+
+int igb_xsk_pool_setup(struct igb_adapter *adapter,
+ struct xsk_buff_pool *pool,
+ u16 qid)
+{
+ return pool ? igb_xsk_pool_enable(adapter, pool, qid) :
+ igb_xsk_pool_disable(adapter, qid);
+}
+
+static u16 igb_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
+ union e1000_adv_rx_desc *rx_desc, u16 count)
+{
+ dma_addr_t dma;
+ u16 buffs;
+ int i;
+
+ /* nothing to do */
+ if (!count)
+ return 0;
+
+ buffs = xsk_buff_alloc_batch(pool, xdp, count);
+ for (i = 0; i < buffs; i++) {
+ dma = xsk_buff_xdp_get_dma(*xdp);
+ rx_desc->read.pkt_addr = cpu_to_le64(dma);
+ rx_desc->wb.upper.length = 0;
+
+ rx_desc++;
+ xdp++;
+ }
+
+ return buffs;
+}
+
+bool igb_alloc_rx_buffers_zc(struct igb_ring *rx_ring,
+ struct xsk_buff_pool *xsk_pool, u16 count)
+{
+ u32 nb_buffs_extra = 0, nb_buffs = 0;
+ union e1000_adv_rx_desc *rx_desc;
+ u16 ntu = rx_ring->next_to_use;
+ u16 total_count = count;
+ struct xdp_buff **xdp;
+
+ rx_desc = IGB_RX_DESC(rx_ring, ntu);
+ xdp = &rx_ring->rx_buffer_info_zc[ntu];
+
+ if (ntu + count >= rx_ring->count) {
+ nb_buffs_extra = igb_fill_rx_descs(xsk_pool, xdp, rx_desc,
+ rx_ring->count - ntu);
+ if (nb_buffs_extra != rx_ring->count - ntu) {
+ ntu += nb_buffs_extra;
+ goto exit;
+ }
+ rx_desc = IGB_RX_DESC(rx_ring, 0);
+ xdp = rx_ring->rx_buffer_info_zc;
+ ntu = 0;
+ count -= nb_buffs_extra;
+ }
+
+ nb_buffs = igb_fill_rx_descs(xsk_pool, xdp, rx_desc, count);
+ ntu += nb_buffs;
+ if (ntu == rx_ring->count)
+ ntu = 0;
+
+ /* clear the length for the next_to_use descriptor */
+ rx_desc = IGB_RX_DESC(rx_ring, ntu);
+ rx_desc->wb.upper.length = 0;
+
+exit:
+ if (rx_ring->next_to_use != ntu) {
+ rx_ring->next_to_use = ntu;
+
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+ writel(ntu, rx_ring->tail);
+ }
+
+ return total_count == (nb_buffs + nb_buffs_extra);
+}
+
+void igb_clean_rx_ring_zc(struct igb_ring *rx_ring)
+{
+ u16 ntc = rx_ring->next_to_clean;
+ u16 ntu = rx_ring->next_to_use;
+
+ while (ntc != ntu) {
+ struct xdp_buff *xdp = rx_ring->rx_buffer_info_zc[ntc];
+
+ xsk_buff_free(xdp);
+ ntc++;
+ if (ntc >= rx_ring->count)
+ ntc = 0;
+ }
+}
+
+static struct sk_buff *igb_construct_skb_zc(struct igb_ring *rx_ring,
+ struct xdp_buff *xdp,
+ ktime_t timestamp)
+{
+ unsigned int totalsize = xdp->data_end - xdp->data_meta;
+ unsigned int metasize = xdp->data - xdp->data_meta;
+ struct sk_buff *skb;
+
+ net_prefetch(xdp->data_meta);
+
+ /* allocate a skb to store the frags */
+ skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize);
+ if (unlikely(!skb))
+ return NULL;
+
+ if (timestamp)
+ skb_hwtstamps(skb)->hwtstamp = timestamp;
+
+ memcpy(__skb_put(skb, totalsize), xdp->data_meta,
+ ALIGN(totalsize, sizeof(long)));
+
+ if (metasize) {
+ skb_metadata_set(skb, metasize);
+ __skb_pull(skb, metasize);
+ }
+
+ return skb;
+}
+
+static int igb_run_xdp_zc(struct igb_adapter *adapter, struct igb_ring *rx_ring,
+ struct xdp_buff *xdp, struct xsk_buff_pool *xsk_pool,
+ struct bpf_prog *xdp_prog)
+{
+ int err, result = IGB_XDP_PASS;
+ u32 act;
+
+ prefetchw(xdp->data_hard_start); /* xdp_frame write */
+
+ act = bpf_prog_run_xdp(xdp_prog, xdp);
+
+ if (likely(act == XDP_REDIRECT)) {
+ err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
+ if (!err)
+ return IGB_XDP_REDIR;
+
+ if (xsk_uses_need_wakeup(xsk_pool) &&
+ err == -ENOBUFS)
+ result = IGB_XDP_EXIT;
+ else
+ result = IGB_XDP_CONSUMED;
+ goto out_failure;
+ }
+
+ switch (act) {
+ case XDP_PASS:
+ break;
+ case XDP_TX:
+ result = igb_xdp_xmit_back(adapter, xdp);
+ if (result == IGB_XDP_CONSUMED)
+ goto out_failure;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(adapter->netdev, xdp_prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+out_failure:
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+ fallthrough;
+ case XDP_DROP:
+ result = IGB_XDP_CONSUMED;
+ break;
+ }
+
+ return result;
+}
+
+int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector,
+ struct xsk_buff_pool *xsk_pool, const int budget)
+{
+ struct igb_adapter *adapter = q_vector->adapter;
+ unsigned int total_bytes = 0, total_packets = 0;
+ struct igb_ring *rx_ring = q_vector->rx.ring;
+ u32 ntc = rx_ring->next_to_clean;
+ struct bpf_prog *xdp_prog;
+ unsigned int xdp_xmit = 0;
+ bool failure = false;
+ u16 entries_to_alloc;
+ struct sk_buff *skb;
+
+ /* xdp_prog cannot be NULL in the ZC path */
+ xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+ while (likely(total_packets < budget)) {
+ union e1000_adv_rx_desc *rx_desc;
+ ktime_t timestamp = 0;
+ struct xdp_buff *xdp;
+ unsigned int size;
+ int xdp_res = 0;
+
+ rx_desc = IGB_RX_DESC(rx_ring, ntc);
+ size = le16_to_cpu(rx_desc->wb.upper.length);
+ if (!size)
+ break;
+
+ /* This memory barrier is needed to keep us from reading
+ * any other fields out of the rx_desc until we know the
+ * descriptor has been written back
+ */
+ dma_rmb();
+
+ xdp = rx_ring->rx_buffer_info_zc[ntc];
+ xsk_buff_set_size(xdp, size);
+ xsk_buff_dma_sync_for_cpu(xdp);
+
+ /* pull rx packet timestamp if available and valid */
+ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
+ int ts_hdr_len;
+
+ ts_hdr_len = igb_ptp_rx_pktstamp(rx_ring->q_vector,
+ xdp->data,
+ &timestamp);
+
+ xdp->data += ts_hdr_len;
+ xdp->data_meta += ts_hdr_len;
+ size -= ts_hdr_len;
+ }
+
+ xdp_res = igb_run_xdp_zc(adapter, rx_ring, xdp, xsk_pool,
+ xdp_prog);
+
+ if (xdp_res) {
+ if (likely(xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR))) {
+ xdp_xmit |= xdp_res;
+ } else if (xdp_res == IGB_XDP_EXIT) {
+ failure = true;
+ break;
+ } else if (xdp_res == IGB_XDP_CONSUMED) {
+ xsk_buff_free(xdp);
+ }
+
+ total_packets++;
+ total_bytes += size;
+ ntc++;
+ if (ntc == rx_ring->count)
+ ntc = 0;
+ continue;
+ }
+
+ skb = igb_construct_skb_zc(rx_ring, xdp, timestamp);
+
+ /* exit if we failed to retrieve a buffer */
+ if (!skb) {
+ rx_ring->rx_stats.alloc_failed++;
+ break;
+ }
+
+ xsk_buff_free(xdp);
+ ntc++;
+ if (ntc == rx_ring->count)
+ ntc = 0;
+
+ if (eth_skb_pad(skb))
+ continue;
+
+ /* probably a little skewed due to removing CRC */
+ total_bytes += skb->len;
+
+ /* populate checksum, timestamp, VLAN, and protocol */
+ igb_process_skb_fields(rx_ring, rx_desc, skb);
+
+ napi_gro_receive(&q_vector->napi, skb);
+
+ /* update budget accounting */
+ total_packets++;
+ }
+
+ rx_ring->next_to_clean = ntc;
+
+ if (xdp_xmit)
+ igb_finalize_xdp(adapter, xdp_xmit);
+
+ igb_update_rx_stats(q_vector, total_packets, total_bytes);
+
+ entries_to_alloc = igb_desc_unused(rx_ring);
+ if (entries_to_alloc >= IGB_RX_BUFFER_WRITE)
+ failure |= !igb_alloc_rx_buffers_zc(rx_ring, xsk_pool,
+ entries_to_alloc);
+
+ if (xsk_uses_need_wakeup(xsk_pool)) {
+ if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
+ xsk_set_rx_need_wakeup(xsk_pool);
+ else
+ xsk_clear_rx_need_wakeup(xsk_pool);
+
+ return (int)total_packets;
+ }
+ return failure ? budget : (int)total_packets;
+}
+
+bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool)
+{
+ unsigned int budget = igb_desc_unused(tx_ring);
+ u32 cmd_type, olinfo_status, nb_pkts, i = 0;
+ struct xdp_desc *descs = xsk_pool->tx_descs;
+ union e1000_adv_tx_desc *tx_desc = NULL;
+ struct igb_tx_buffer *tx_buffer_info;
+ unsigned int total_bytes = 0;
+ dma_addr_t dma;
+
+ if (!netif_carrier_ok(tx_ring->netdev))
+ return true;
+
+ if (test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags))
+ return true;
+
+ nb_pkts = xsk_tx_peek_release_desc_batch(xsk_pool, budget);
+ if (!nb_pkts)
+ return true;
+
+ while (nb_pkts-- > 0) {
+ dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr);
+ xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len);
+
+ tx_buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+ tx_buffer_info->bytecount = descs[i].len;
+ tx_buffer_info->type = IGB_TYPE_XSK;
+ tx_buffer_info->xdpf = NULL;
+ tx_buffer_info->gso_segs = 1;
+ tx_buffer_info->time_stamp = jiffies;
+
+ tx_desc = IGB_TX_DESC(tx_ring, tx_ring->next_to_use);
+ tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+ /* put descriptor type bits */
+ cmd_type = E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_DEXT |
+ E1000_ADVTXD_DCMD_IFCS;
+ olinfo_status = descs[i].len << E1000_ADVTXD_PAYLEN_SHIFT;
+
+ /* FIXME: This sets the Report Status (RS) bit for every
+ * descriptor. One nice to have optimization would be to set it
+ * only for the last descriptor in the whole batch. See Intel
+ * ice driver for an example on how to do it.
+ */
+ cmd_type |= descs[i].len | IGB_TXD_DCMD;
+ tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+ tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+
+ total_bytes += descs[i].len;
+
+ i++;
+ tx_ring->next_to_use++;
+ tx_buffer_info->next_to_watch = tx_desc;
+ if (tx_ring->next_to_use == tx_ring->count)
+ tx_ring->next_to_use = 0;
+ }
+
+ netdev_tx_sent_queue(txring_txq(tx_ring), total_bytes);
+ igb_xdp_ring_update_tail(tx_ring);
+
+ return nb_pkts < budget;
+}
+
+int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
+{
+ struct igb_adapter *adapter = netdev_priv(dev);
+ struct e1000_hw *hw = &adapter->hw;
+ struct igb_ring *ring;
+ u32 eics = 0;
+
+ if (test_bit(__IGB_DOWN, &adapter->state))
+ return -ENETDOWN;
+
+ if (!igb_xdp_is_enabled(adapter))
+ return -EINVAL;
+
+ if (qid >= adapter->num_tx_queues)
+ return -EINVAL;
+
+ ring = adapter->tx_ring[qid];
+
+ if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags))
+ return -ENETDOWN;
+
+ if (!READ_ONCE(ring->xsk_pool))
+ return -EINVAL;
+
+ if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
+ /* Cause software interrupt */
+ if (adapter->flags & IGB_FLAG_HAS_MSIX) {
+ eics |= ring->q_vector->eims_value;
+ wr32(E1000_EICS, eics);
+ } else {
+ wr32(E1000_ICS, E1000_ICS_RXDMT0);
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index eac0f966e0e4..b8111ad9a9a8 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -337,6 +337,8 @@ struct igc_adapter {
struct igc_led_classdev *leds;
};
+void igc_set_queue_napi(struct igc_adapter *adapter, int q_idx,
+ struct napi_struct *napi);
void igc_up(struct igc_adapter *adapter);
void igc_down(struct igc_adapter *adapter);
int igc_open(struct net_device *netdev);
diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index 9fae8bdec2a7..1613b562d17c 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -68,6 +68,10 @@ static s32 igc_init_nvm_params_base(struct igc_hw *hw)
u32 eecd = rd32(IGC_EECD);
u16 size;
+ /* failed to read reg and got all F's */
+ if (!(~eecd))
+ return -ENXIO;
+
size = FIELD_GET(IGC_EECD_SIZE_EX_MASK, eecd);
/* Added to a constant, "size" becomes the left-shift value
@@ -221,6 +225,8 @@ static s32 igc_get_invariants_base(struct igc_hw *hw)
/* NVM initialization */
ret_val = igc_init_nvm_params_base(hw);
+ if (ret_val)
+ goto out;
switch (hw->mac.type) {
case igc_i225:
ret_val = igc_init_nvm_params_i225(hw);
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
index d9d1a1a11daf..be8a49a86d09 100644
--- a/drivers/net/ethernet/intel/igc/igc_hw.h
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -279,9 +279,4 @@ struct net_device *igc_get_hw_dev(struct igc_hw *hw);
#define hw_dbg(format, arg...) \
netdev_dbg(igc_get_hw_dev(hw), format, ##arg)
-s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value);
-s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value);
-void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value);
-void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value);
-
#endif /* _IGC_HW_H_ */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 27872bdea9bd..56a35d58e7a6 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2123,10 +2123,6 @@ static bool igc_cleanup_headers(struct igc_ring *rx_ring,
union igc_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
- /* XDP packets use error pointer so abort at this point */
- if (IS_ERR(skb))
- return true;
-
if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
struct net_device *netdev = rx_ring->netdev;
@@ -2515,8 +2511,7 @@ out_failure:
}
}
-static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
- struct xdp_buff *xdp)
+static int igc_xdp_run_prog(struct igc_adapter *adapter, struct xdp_buff *xdp)
{
struct bpf_prog *prog;
int res;
@@ -2530,7 +2525,7 @@ static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
res = __igc_xdp_run_prog(adapter, prog, xdp);
out:
- return ERR_PTR(-res);
+ return res;
}
/* This function assumes __netif_tx_lock is held by the caller. */
@@ -2585,6 +2580,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
struct sk_buff *skb = rx_ring->skb;
u16 cleaned_count = igc_desc_unused(rx_ring);
int xdp_status = 0, rx_buffer_pgcnt;
+ int xdp_res = 0;
while (likely(total_packets < budget)) {
struct igc_xdp_buff ctx = { .rx_ts = NULL };
@@ -2630,12 +2626,10 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
xdp_buff_clear_frags_flag(&ctx.xdp);
ctx.rx_desc = rx_desc;
- skb = igc_xdp_run_prog(adapter, &ctx.xdp);
+ xdp_res = igc_xdp_run_prog(adapter, &ctx.xdp);
}
- if (IS_ERR(skb)) {
- unsigned int xdp_res = -PTR_ERR(skb);
-
+ if (xdp_res) {
switch (xdp_res) {
case IGC_XDP_CONSUMED:
rx_buffer->pagecnt_bias++;
@@ -2657,7 +2651,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
skb = igc_construct_skb(rx_ring, rx_buffer, &ctx);
/* exit if we failed to retrieve a buffer */
- if (!skb) {
+ if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_failed++;
rx_buffer->pagecnt_bias++;
set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags);
@@ -2672,7 +2666,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
continue;
/* verify the packet layout is correct */
- if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
+ if (xdp_res || igc_cleanup_headers(rx_ring, rx_desc, skb)) {
skb = NULL;
continue;
}
@@ -4948,6 +4942,22 @@ static int igc_sw_init(struct igc_adapter *adapter)
return 0;
}
+void igc_set_queue_napi(struct igc_adapter *adapter, int vector,
+ struct napi_struct *napi)
+{
+ struct igc_q_vector *q_vector = adapter->q_vector[vector];
+
+ if (q_vector->rx.ring)
+ netif_queue_set_napi(adapter->netdev,
+ q_vector->rx.ring->queue_index,
+ NETDEV_QUEUE_TYPE_RX, napi);
+
+ if (q_vector->tx.ring)
+ netif_queue_set_napi(adapter->netdev,
+ q_vector->tx.ring->queue_index,
+ NETDEV_QUEUE_TYPE_TX, napi);
+}
+
/**
* igc_up - Open the interface and prepare it to handle traffic
* @adapter: board private structure
@@ -4955,6 +4965,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
void igc_up(struct igc_adapter *adapter)
{
struct igc_hw *hw = &adapter->hw;
+ struct napi_struct *napi;
int i = 0;
/* hardware has been reset, we need to reload some things */
@@ -4962,8 +4973,11 @@ void igc_up(struct igc_adapter *adapter)
clear_bit(__IGC_DOWN, &adapter->state);
- for (i = 0; i < adapter->num_q_vectors; i++)
- napi_enable(&adapter->q_vector[i]->napi);
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ napi = &adapter->q_vector[i]->napi;
+ napi_enable(napi);
+ igc_set_queue_napi(adapter, i, napi);
+ }
if (adapter->msix_entries)
igc_configure_msix(adapter);
@@ -5192,6 +5206,7 @@ void igc_down(struct igc_adapter *adapter)
for (i = 0; i < adapter->num_q_vectors; i++) {
if (adapter->q_vector[i]) {
napi_synchronize(&adapter->q_vector[i]->napi);
+ igc_set_queue_napi(adapter, i, NULL);
napi_disable(&adapter->q_vector[i]->napi);
}
}
@@ -5576,6 +5591,9 @@ static int igc_request_msix(struct igc_adapter *adapter)
q_vector);
if (err)
goto err_free;
+
+ netif_napi_set_irq(&q_vector->napi,
+ adapter->msix_entries[vector].vector);
}
igc_configure_msix(adapter);
@@ -6018,6 +6036,7 @@ static int __igc_open(struct net_device *netdev, bool resuming)
struct igc_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = adapter->pdev;
struct igc_hw *hw = &adapter->hw;
+ struct napi_struct *napi;
int err = 0;
int i = 0;
@@ -6053,8 +6072,11 @@ static int __igc_open(struct net_device *netdev, bool resuming)
clear_bit(__IGC_DOWN, &adapter->state);
- for (i = 0; i < adapter->num_q_vectors; i++)
- napi_enable(&adapter->q_vector[i]->napi);
+ for (i = 0; i < adapter->num_q_vectors; i++) {
+ napi = &adapter->q_vector[i]->napi;
+ napi_enable(napi);
+ igc_set_queue_napi(adapter, i, napi);
+ }
/* Clear any pending interrupts. */
rd32(IGC_ICR);
@@ -6779,45 +6801,6 @@ static const struct net_device_ops igc_netdev_ops = {
.ndo_get_tstamp = igc_get_tstamp,
};
-/* PCIe configuration access */
-void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
-{
- struct igc_adapter *adapter = hw->back;
-
- pci_read_config_word(adapter->pdev, reg, value);
-}
-
-void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
-{
- struct igc_adapter *adapter = hw->back;
-
- pci_write_config_word(adapter->pdev, reg, *value);
-}
-
-s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
-{
- struct igc_adapter *adapter = hw->back;
-
- if (!pci_is_pcie(adapter->pdev))
- return -IGC_ERR_CONFIG;
-
- pcie_capability_read_word(adapter->pdev, reg, value);
-
- return IGC_SUCCESS;
-}
-
-s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
-{
- struct igc_adapter *adapter = hw->back;
-
- if (!pci_is_pcie(adapter->pdev))
- return -IGC_ERR_CONFIG;
-
- pcie_capability_write_word(adapter->pdev, reg, *value);
-
- return IGC_SUCCESS;
-}
-
u32 igc_rd32(struct igc_hw *hw, u32 reg)
{
struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
@@ -7338,7 +7321,7 @@ static void igc_deliver_wake_packet(struct net_device *netdev)
netif_rx(skb);
}
-static int igc_resume(struct device *dev)
+static int __igc_resume(struct device *dev, bool rpm)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct net_device *netdev = pci_get_drvdata(pdev);
@@ -7381,7 +7364,11 @@ static int igc_resume(struct device *dev)
wr32(IGC_WUS, ~0);
if (netif_running(netdev)) {
+ if (!rpm)
+ rtnl_lock();
err = __igc_open(netdev, true);
+ if (!rpm)
+ rtnl_unlock();
if (!err)
netif_device_attach(netdev);
}
@@ -7389,9 +7376,14 @@ static int igc_resume(struct device *dev)
return err;
}
+static int igc_resume(struct device *dev)
+{
+ return __igc_resume(dev, false);
+}
+
static int igc_runtime_resume(struct device *dev)
{
- return igc_resume(dev);
+ return __igc_resume(dev, true);
}
static int igc_suspend(struct device *dev)
@@ -7436,14 +7428,18 @@ static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
struct net_device *netdev = pci_get_drvdata(pdev);
struct igc_adapter *adapter = netdev_priv(netdev);
+ rtnl_lock();
netif_device_detach(netdev);
- if (state == pci_channel_io_perm_failure)
+ if (state == pci_channel_io_perm_failure) {
+ rtnl_unlock();
return PCI_ERS_RESULT_DISCONNECT;
+ }
if (netif_running(netdev))
igc_down(adapter);
pci_disable_device(pdev);
+ rtnl_unlock();
/* Request a slot reset. */
return PCI_ERS_RESULT_NEED_RESET;
@@ -7454,7 +7450,7 @@ static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
* @pdev: Pointer to PCI device
*
* Restart the card from scratch, as if from a cold-boot. Implementation
- * resembles the first-half of the igc_resume routine.
+ * resembles the first-half of the __igc_resume routine.
**/
static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
{
@@ -7493,7 +7489,7 @@ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
*
* This callback is called when the error recovery driver tells us that
* its OK to resume normal operation. Implementation resembles the
- * second-half of the igc_resume routine.
+ * second-half of the __igc_resume routine.
*/
static void igc_io_resume(struct pci_dev *pdev)
{
diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.c b/drivers/net/ethernet/intel/igc/igc_nvm.c
index 58f81aba0144..efd121c03967 100644
--- a/drivers/net/ethernet/intel/igc/igc_nvm.c
+++ b/drivers/net/ethernet/intel/igc/igc_nvm.c
@@ -36,56 +36,6 @@ static s32 igc_poll_eerd_eewr_done(struct igc_hw *hw, int ee_reg)
}
/**
- * igc_acquire_nvm - Generic request for access to EEPROM
- * @hw: pointer to the HW structure
- *
- * Set the EEPROM access request bit and wait for EEPROM access grant bit.
- * Return successful if access grant bit set, else clear the request for
- * EEPROM access and return -IGC_ERR_NVM (-1).
- */
-s32 igc_acquire_nvm(struct igc_hw *hw)
-{
- s32 timeout = IGC_NVM_GRANT_ATTEMPTS;
- u32 eecd = rd32(IGC_EECD);
- s32 ret_val = 0;
-
- wr32(IGC_EECD, eecd | IGC_EECD_REQ);
- eecd = rd32(IGC_EECD);
-
- while (timeout) {
- if (eecd & IGC_EECD_GNT)
- break;
- udelay(5);
- eecd = rd32(IGC_EECD);
- timeout--;
- }
-
- if (!timeout) {
- eecd &= ~IGC_EECD_REQ;
- wr32(IGC_EECD, eecd);
- hw_dbg("Could not acquire NVM grant\n");
- ret_val = -IGC_ERR_NVM;
- }
-
- return ret_val;
-}
-
-/**
- * igc_release_nvm - Release exclusive access to EEPROM
- * @hw: pointer to the HW structure
- *
- * Stop any current commands to the EEPROM and clear the EEPROM request bit.
- */
-void igc_release_nvm(struct igc_hw *hw)
-{
- u32 eecd;
-
- eecd = rd32(IGC_EECD);
- eecd &= ~IGC_EECD_REQ;
- wr32(IGC_EECD, eecd);
-}
-
-/**
* igc_read_nvm_eerd - Reads EEPROM using EERD register
* @hw: pointer to the HW structure
* @offset: offset of word in the EEPROM to read
diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.h b/drivers/net/ethernet/intel/igc/igc_nvm.h
index f9fc2e9cfb03..ab78d0c64547 100644
--- a/drivers/net/ethernet/intel/igc/igc_nvm.h
+++ b/drivers/net/ethernet/intel/igc/igc_nvm.h
@@ -4,8 +4,6 @@
#ifndef _IGC_NVM_H_
#define _IGC_NVM_H_
-s32 igc_acquire_nvm(struct igc_hw *hw);
-void igc_release_nvm(struct igc_hw *hw);
s32 igc_read_mac_addr(struct igc_hw *hw);
s32 igc_read_nvm_eerd(struct igc_hw *hw, u16 offset, u16 words, u16 *data);
s32 igc_validate_nvm_checksum(struct igc_hw *hw);
diff --git a/drivers/net/ethernet/intel/igc/igc_xdp.c b/drivers/net/ethernet/intel/igc/igc_xdp.c
index e27af72aada8..13bbd3346e01 100644
--- a/drivers/net/ethernet/intel/igc/igc_xdp.c
+++ b/drivers/net/ethernet/intel/igc/igc_xdp.c
@@ -13,6 +13,7 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
struct net_device *dev = adapter->netdev;
bool if_running = netif_running(dev);
struct bpf_prog *old_prog;
+ bool need_update;
if (dev->mtu > ETH_DATA_LEN) {
/* For now, the driver doesn't support XDP functionality with
@@ -22,7 +23,8 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
return -EOPNOTSUPP;
}
- if (if_running)
+ need_update = !!adapter->xdp_prog != !!prog;
+ if (if_running && need_update)
igc_close(dev);
old_prog = xchg(&adapter->xdp_prog, prog);
@@ -34,7 +36,7 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
else
xdp_features_clear_redirect_target(dev);
- if (if_running)
+ if (if_running && need_update)
igc_open(dev);
return 0;
@@ -84,6 +86,7 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter,
napi_disable(napi);
}
+ igc_set_queue_napi(adapter, queue_id, NULL);
set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
set_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
@@ -133,6 +136,7 @@ static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id)
xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
+ igc_set_queue_napi(adapter, queue_id, napi);
if (needs_reset) {
napi_enable(napi);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 336e08d35f97..7236f20c9a30 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1923,10 +1923,6 @@ bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
{
struct net_device *netdev = rx_ring->netdev;
- /* XDP packets use error pointer so abort at this point */
- if (IS_ERR(skb))
- return true;
-
/* Verify netdev is present, and that packet does not have any
* errors that would be unacceptable to the netdev.
*/
@@ -2234,9 +2230,9 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
return skb;
}
-static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
- struct ixgbe_ring *rx_ring,
- struct xdp_buff *xdp)
+static int ixgbe_run_xdp(struct ixgbe_adapter *adapter,
+ struct ixgbe_ring *rx_ring,
+ struct xdp_buff *xdp)
{
int err, result = IXGBE_XDP_PASS;
struct bpf_prog *xdp_prog;
@@ -2286,7 +2282,7 @@ out_failure:
break;
}
xdp_out:
- return ERR_PTR(-result);
+ return result;
}
static unsigned int ixgbe_rx_frame_truesize(struct ixgbe_ring *rx_ring,
@@ -2344,6 +2340,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
unsigned int offset = rx_ring->rx_offset;
unsigned int xdp_xmit = 0;
struct xdp_buff xdp;
+ int xdp_res = 0;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
@@ -2389,12 +2386,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, size);
#endif
- skb = ixgbe_run_xdp(adapter, rx_ring, &xdp);
+ xdp_res = ixgbe_run_xdp(adapter, rx_ring, &xdp);
}
- if (IS_ERR(skb)) {
- unsigned int xdp_res = -PTR_ERR(skb);
-
+ if (xdp_res) {
if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) {
xdp_xmit |= xdp_res;
ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
@@ -2414,7 +2409,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
}
/* exit if we failed to retrieve a buffer */
- if (!skb) {
+ if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_rx_buff_failed++;
rx_buffer->pagecnt_bias++;
break;
@@ -2428,7 +2423,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
continue;
/* verify the packet layout is correct */
- if (ixgbe_cleanup_headers(rx_ring, rx_desc, skb))
+ if (xdp_res || ixgbe_cleanup_headers(rx_ring, rx_desc, skb))
continue;
/* probably a little skewed due to removing CRC */
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 9b37f354d78c..4384e892f967 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -443,7 +443,6 @@ extern const struct ixgbevf_info ixgbevf_X540_vf_hv_info;
extern const struct ixgbevf_info ixgbevf_X550_vf_hv_info;
extern const struct ixgbevf_info ixgbevf_X550EM_x_vf_hv_info;
extern const struct ixgbevf_info ixgbevf_e610_vf_hv_info;
-extern const struct ixgbe_mbx_operations ixgbevf_hv_mbx_ops;
/* needed by ethtool.c */
extern const char ixgbevf_driver_name[];
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 2829bac9af94..6442f115a262 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -737,10 +737,6 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring,
union ixgbe_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
- /* XDP packets use error pointer so abort at this point */
- if (IS_ERR(skb))
- return true;
-
/* verify that the packet does not have any known errors */
if (unlikely(ixgbevf_test_staterr(rx_desc,
IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) {
@@ -1049,9 +1045,9 @@ static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring,
return IXGBEVF_XDP_TX;
}
-static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
- struct ixgbevf_ring *rx_ring,
- struct xdp_buff *xdp)
+static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
+ struct ixgbevf_ring *rx_ring,
+ struct xdp_buff *xdp)
{
int result = IXGBEVF_XDP_PASS;
struct ixgbevf_ring *xdp_ring;
@@ -1085,7 +1081,7 @@ out_failure:
break;
}
xdp_out:
- return ERR_PTR(-result);
+ return result;
}
static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring,
@@ -1127,6 +1123,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
struct sk_buff *skb = rx_ring->skb;
bool xdp_xmit = false;
struct xdp_buff xdp;
+ int xdp_res = 0;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
@@ -1170,11 +1167,11 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, size);
#endif
- skb = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
+ xdp_res = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
}
- if (IS_ERR(skb)) {
- if (PTR_ERR(skb) == -IXGBEVF_XDP_TX) {
+ if (xdp_res) {
+ if (xdp_res == IXGBEVF_XDP_TX) {
xdp_xmit = true;
ixgbevf_rx_buffer_flip(rx_ring, rx_buffer,
size);
@@ -1194,7 +1191,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
}
/* exit if we failed to retrieve a buffer */
- if (!skb) {
+ if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_rx_buff_failed++;
rx_buffer->pagecnt_bias++;
break;
@@ -1208,7 +1205,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
continue;
/* verify the packet layout is correct */
- if (ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) {
+ if (xdp_res || ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) {
skb = NULL;
continue;
}
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.c b/drivers/net/ethernet/intel/ixgbevf/mbx.c
index a55dd978f7ca..24d0237e7a99 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.c
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.c
@@ -505,15 +505,3 @@ const struct ixgbe_mbx_operations ixgbevf_mbx_ops_legacy = {
.check_for_ack = ixgbevf_check_for_ack_vf,
.check_for_rst = ixgbevf_check_for_rst_vf,
};
-
-/* Mailbox operations when running on Hyper-V.
- * On Hyper-V, PF/VF communication is not through the
- * hardware mailbox; this communication is through
- * a software mediated path.
- * Most mail box operations are noop while running on
- * Hyper-V.
- */
-const struct ixgbe_mbx_operations ixgbevf_hv_mbx_ops = {
- .init_params = ixgbevf_init_mbx_params_vf,
- .check_for_rst = ixgbevf_check_for_rst_vf,
-};
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c
index 6cc7a78968fc..f3b9daffaec3 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c
@@ -133,9 +133,7 @@ static const char *rsrc_name(enum mcs_rsrc_type rsrc_type)
return "SA";
default:
return "Unknown";
- };
-
- return "Unknown";
+ }
}
static int cn10k_mcs_alloc_rsrc(struct otx2_nic *pfvf, enum mcs_direction dir,
diff --git a/drivers/net/ethernet/mediatek/airoha_eth.c b/drivers/net/ethernet/mediatek/airoha_eth.c
index d8bfc21a5b19..a30c417d66f2 100644
--- a/drivers/net/ethernet/mediatek/airoha_eth.c
+++ b/drivers/net/ethernet/mediatek/airoha_eth.c
@@ -15,6 +15,7 @@
#include <linux/u64_stats_sync.h>
#include <net/dsa.h>
#include <net/page_pool/helpers.h>
+#include <net/pkt_cls.h>
#include <uapi/linux/ppp_defs.h>
#define AIROHA_MAX_NUM_GDM_PORTS 1
@@ -23,8 +24,12 @@
#define AIROHA_MAX_NUM_XSI_RSTS 5
#define AIROHA_MAX_MTU 2000
#define AIROHA_MAX_PACKET_SIZE 2048
+#define AIROHA_NUM_QOS_CHANNELS 4
+#define AIROHA_NUM_QOS_QUEUES 8
#define AIROHA_NUM_TX_RING 32
#define AIROHA_NUM_RX_RING 32
+#define AIROHA_NUM_NETDEV_TX_RINGS (AIROHA_NUM_TX_RING + \
+ AIROHA_NUM_QOS_CHANNELS)
#define AIROHA_FE_MC_MAX_VLAN_TABLE 64
#define AIROHA_FE_MC_MAX_VLAN_PORT 16
#define AIROHA_NUM_TX_IRQ 2
@@ -40,6 +45,9 @@
#define PSE_RSV_PAGES 128
#define PSE_QUEUE_RSV_PAGES 64
+#define QDMA_METER_IDX(_n) ((_n) & 0xff)
+#define QDMA_METER_GROUP(_n) (((_n) >> 8) & 0x3)
+
/* FE */
#define PSE_BASE 0x0100
#define CSR_IFC_BASE 0x0200
@@ -541,9 +549,24 @@
#define INGRESS_SLOW_TICK_RATIO_MASK GENMASK(29, 16)
#define INGRESS_FAST_TICK_MASK GENMASK(15, 0)
+#define REG_QUEUE_CLOSE_CFG(_n) (0x00a0 + ((_n) & 0xfc))
+#define TXQ_DISABLE_CHAN_QUEUE_MASK(_n, _m) BIT((_m) + (((_n) & 0x3) << 3))
+
#define REG_TXQ_DIS_CFG_BASE(_n) ((_n) ? 0x20a0 : 0x00a0)
#define REG_TXQ_DIS_CFG(_n, _m) (REG_TXQ_DIS_CFG_BASE((_n)) + (_m) << 2)
+#define REG_CNTR_CFG(_n) (0x0400 + ((_n) << 3))
+#define CNTR_EN_MASK BIT(31)
+#define CNTR_ALL_CHAN_EN_MASK BIT(30)
+#define CNTR_ALL_QUEUE_EN_MASK BIT(29)
+#define CNTR_ALL_DSCP_RING_EN_MASK BIT(28)
+#define CNTR_SRC_MASK GENMASK(27, 24)
+#define CNTR_DSCP_RING_MASK GENMASK(20, 16)
+#define CNTR_CHAN_MASK GENMASK(7, 3)
+#define CNTR_QUEUE_MASK GENMASK(2, 0)
+
+#define REG_CNTR_VAL(_n) (0x0404 + ((_n) << 3))
+
#define REG_LMGR_INIT_CFG 0x1000
#define LMGR_INIT_START BIT(31)
#define LMGR_SRAM_MODE_MASK BIT(30)
@@ -565,13 +588,34 @@
#define EGRESS_SLOW_TICK_RATIO_MASK GENMASK(29, 16)
#define EGRESS_FAST_TICK_MASK GENMASK(15, 0)
+#define TRTCM_PARAM_RW_MASK BIT(31)
+#define TRTCM_PARAM_RW_DONE_MASK BIT(30)
+#define TRTCM_PARAM_TYPE_MASK GENMASK(29, 28)
+#define TRTCM_METER_GROUP_MASK GENMASK(27, 26)
+#define TRTCM_PARAM_INDEX_MASK GENMASK(23, 17)
+#define TRTCM_PARAM_RATE_TYPE_MASK BIT(16)
+
+#define REG_TRTCM_CFG_PARAM(_n) ((_n) + 0x4)
+#define REG_TRTCM_DATA_LOW(_n) ((_n) + 0x8)
+#define REG_TRTCM_DATA_HIGH(_n) ((_n) + 0xc)
+
#define REG_TXWRR_MODE_CFG 0x1020
#define TWRR_WEIGHT_SCALE_MASK BIT(31)
#define TWRR_WEIGHT_BASE_MASK BIT(3)
+#define REG_TXWRR_WEIGHT_CFG 0x1024
+#define TWRR_RW_CMD_MASK BIT(31)
+#define TWRR_RW_CMD_DONE BIT(30)
+#define TWRR_CHAN_IDX_MASK GENMASK(23, 19)
+#define TWRR_QUEUE_IDX_MASK GENMASK(18, 16)
+#define TWRR_VALUE_MASK GENMASK(15, 0)
+
#define REG_PSE_BUF_USAGE_CFG 0x1028
#define PSE_BUF_ESTIMATE_EN_MASK BIT(29)
+#define REG_CHAN_QOS_MODE(_n) (0x1040 + ((_n) << 2))
+#define CHAN_QOS_MODE_MASK(_n) GENMASK(2 + ((_n) << 2), (_n) << 2)
+
#define REG_GLB_TRTCM_CFG 0x1080
#define GLB_TRTCM_EN_MASK BIT(31)
#define GLB_TRTCM_MODE_MASK BIT(30)
@@ -720,6 +764,40 @@ enum {
FE_PSE_PORT_DROP = 0xf,
};
+enum tx_sched_mode {
+ TC_SCH_WRR8,
+ TC_SCH_SP,
+ TC_SCH_WRR7,
+ TC_SCH_WRR6,
+ TC_SCH_WRR5,
+ TC_SCH_WRR4,
+ TC_SCH_WRR3,
+ TC_SCH_WRR2,
+};
+
+enum trtcm_param_type {
+ TRTCM_MISC_MODE, /* meter_en, pps_mode, tick_sel */
+ TRTCM_TOKEN_RATE_MODE,
+ TRTCM_BUCKETSIZE_SHIFT_MODE,
+ TRTCM_BUCKET_COUNTER_MODE,
+};
+
+enum trtcm_mode_type {
+ TRTCM_COMMIT_MODE,
+ TRTCM_PEAK_MODE,
+};
+
+enum trtcm_param {
+ TRTCM_TICK_SEL = BIT(0),
+ TRTCM_PKT_MODE = BIT(1),
+ TRTCM_METER_MODE = BIT(2),
+};
+
+#define MIN_TOKEN_SIZE 4096
+#define MAX_TOKEN_SIZE_OFFSET 17
+#define TRTCM_TOKEN_RATE_MASK GENMASK(23, 6)
+#define TRTCM_TOKEN_RATE_FRACTION_MASK GENMASK(5, 0)
+
struct airoha_queue_entry {
union {
void *buf;
@@ -810,6 +888,12 @@ struct airoha_gdm_port {
int id;
struct airoha_hw_stats stats;
+
+ DECLARE_BITMAP(qos_sq_bmap, AIROHA_NUM_QOS_CHANNELS);
+
+ /* qos stats counters */
+ u64 cpu_tx_packets;
+ u64 fwd_tx_packets;
};
struct airoha_eth {
@@ -1789,6 +1873,10 @@ static int airoha_qdma_init_tx_queue(struct airoha_queue *q,
WRITE_ONCE(q->desc[i].ctrl, cpu_to_le32(val));
}
+ /* xmit ring drop default setting */
+ airoha_qdma_set(qdma, REG_TX_RING_BLOCKING(qid),
+ TX_RING_IRQ_BLOCKING_TX_DROP_EN_MASK);
+
airoha_qdma_wr(qdma, REG_TX_RING_BASE(qid), dma_addr);
airoha_qdma_rmw(qdma, REG_TX_CPU_IDX(qid), TX_RING_CPU_IDX_MASK,
FIELD_PREP(TX_RING_CPU_IDX_MASK, q->head));
@@ -1955,6 +2043,27 @@ static void airoha_qdma_init_qos(struct airoha_qdma *qdma)
FIELD_PREP(SLA_SLOW_TICK_RATIO_MASK, 40));
}
+static void airoha_qdma_init_qos_stats(struct airoha_qdma *qdma)
+{
+ int i;
+
+ for (i = 0; i < AIROHA_NUM_QOS_CHANNELS; i++) {
+ /* Tx-cpu transferred count */
+ airoha_qdma_wr(qdma, REG_CNTR_VAL(i << 1), 0);
+ airoha_qdma_wr(qdma, REG_CNTR_CFG(i << 1),
+ CNTR_EN_MASK | CNTR_ALL_QUEUE_EN_MASK |
+ CNTR_ALL_DSCP_RING_EN_MASK |
+ FIELD_PREP(CNTR_CHAN_MASK, i));
+ /* Tx-fwd transferred count */
+ airoha_qdma_wr(qdma, REG_CNTR_VAL((i << 1) + 1), 0);
+ airoha_qdma_wr(qdma, REG_CNTR_CFG(i << 1),
+ CNTR_EN_MASK | CNTR_ALL_QUEUE_EN_MASK |
+ CNTR_ALL_DSCP_RING_EN_MASK |
+ FIELD_PREP(CNTR_SRC_MASK, 1) |
+ FIELD_PREP(CNTR_CHAN_MASK, i));
+ }
+}
+
static int airoha_qdma_hw_init(struct airoha_qdma *qdma)
{
int i;
@@ -2005,6 +2114,7 @@ static int airoha_qdma_hw_init(struct airoha_qdma *qdma)
airoha_qdma_set(qdma, REG_TXQ_CNGST_CFG,
TXQ_CNGST_DROP_EN | TXQ_CNGST_DEI_DROP_EN);
+ airoha_qdma_init_qos_stats(qdma);
return 0;
}
@@ -2425,21 +2535,44 @@ static void airoha_dev_get_stats64(struct net_device *dev,
} while (u64_stats_fetch_retry(&port->stats.syncp, start));
}
+static u16 airoha_dev_select_queue(struct net_device *dev, struct sk_buff *skb,
+ struct net_device *sb_dev)
+{
+ struct airoha_gdm_port *port = netdev_priv(dev);
+ int queue, channel;
+
+ /* For dsa device select QoS channel according to the dsa user port
+ * index, rely on port id otherwise. Select QoS queue based on the
+ * skb priority.
+ */
+ channel = netdev_uses_dsa(dev) ? skb_get_queue_mapping(skb) : port->id;
+ channel = channel % AIROHA_NUM_QOS_CHANNELS;
+ queue = (skb->priority - 1) % AIROHA_NUM_QOS_QUEUES; /* QoS queue */
+ queue = channel * AIROHA_NUM_QOS_QUEUES + queue;
+
+ return queue < dev->num_tx_queues ? queue : 0;
+}
+
static netdev_tx_t airoha_dev_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct skb_shared_info *sinfo = skb_shinfo(skb);
struct airoha_gdm_port *port = netdev_priv(dev);
- u32 msg0 = 0, msg1, len = skb_headlen(skb);
- int i, qid = skb_get_queue_mapping(skb);
+ u32 msg0, msg1, len = skb_headlen(skb);
struct airoha_qdma *qdma = port->qdma;
u32 nr_frags = 1 + sinfo->nr_frags;
struct netdev_queue *txq;
struct airoha_queue *q;
void *data = skb->data;
+ int i, qid;
u16 index;
u8 fport;
+ qid = skb_get_queue_mapping(skb) % ARRAY_SIZE(qdma->q_tx);
+ msg0 = FIELD_PREP(QDMA_ETH_TXMSG_CHAN_MASK,
+ qid / AIROHA_NUM_QOS_QUEUES) |
+ FIELD_PREP(QDMA_ETH_TXMSG_QUEUE_MASK,
+ qid % AIROHA_NUM_QOS_QUEUES);
if (skb->ip_summed == CHECKSUM_PARTIAL)
msg0 |= FIELD_PREP(QDMA_ETH_TXMSG_TCO_MASK, 1) |
FIELD_PREP(QDMA_ETH_TXMSG_UCO_MASK, 1) |
@@ -2609,13 +2742,386 @@ airoha_ethtool_get_rmon_stats(struct net_device *dev,
} while (u64_stats_fetch_retry(&port->stats.syncp, start));
}
+static int airoha_qdma_set_chan_tx_sched(struct airoha_gdm_port *port,
+ int channel, enum tx_sched_mode mode,
+ const u16 *weights, u8 n_weights)
+{
+ int i;
+
+ for (i = 0; i < AIROHA_NUM_TX_RING; i++)
+ airoha_qdma_clear(port->qdma, REG_QUEUE_CLOSE_CFG(channel),
+ TXQ_DISABLE_CHAN_QUEUE_MASK(channel, i));
+
+ for (i = 0; i < n_weights; i++) {
+ u32 status;
+ int err;
+
+ airoha_qdma_wr(port->qdma, REG_TXWRR_WEIGHT_CFG,
+ TWRR_RW_CMD_MASK |
+ FIELD_PREP(TWRR_CHAN_IDX_MASK, channel) |
+ FIELD_PREP(TWRR_QUEUE_IDX_MASK, i) |
+ FIELD_PREP(TWRR_VALUE_MASK, weights[i]));
+ err = read_poll_timeout(airoha_qdma_rr, status,
+ status & TWRR_RW_CMD_DONE,
+ USEC_PER_MSEC, 10 * USEC_PER_MSEC,
+ true, port->qdma,
+ REG_TXWRR_WEIGHT_CFG);
+ if (err)
+ return err;
+ }
+
+ airoha_qdma_rmw(port->qdma, REG_CHAN_QOS_MODE(channel >> 3),
+ CHAN_QOS_MODE_MASK(channel),
+ mode << __ffs(CHAN_QOS_MODE_MASK(channel)));
+
+ return 0;
+}
+
+static int airoha_qdma_set_tx_prio_sched(struct airoha_gdm_port *port,
+ int channel)
+{
+ static const u16 w[AIROHA_NUM_QOS_QUEUES] = {};
+
+ return airoha_qdma_set_chan_tx_sched(port, channel, TC_SCH_SP, w,
+ ARRAY_SIZE(w));
+}
+
+static int airoha_qdma_set_tx_ets_sched(struct airoha_gdm_port *port,
+ int channel,
+ struct tc_ets_qopt_offload *opt)
+{
+ struct tc_ets_qopt_offload_replace_params *p = &opt->replace_params;
+ enum tx_sched_mode mode = TC_SCH_SP;
+ u16 w[AIROHA_NUM_QOS_QUEUES] = {};
+ int i, nstrict = 0;
+
+ if (p->bands > AIROHA_NUM_QOS_QUEUES)
+ return -EINVAL;
+
+ for (i = 0; i < p->bands; i++) {
+ if (!p->quanta[i])
+ nstrict++;
+ }
+
+ /* this configuration is not supported by the hw */
+ if (nstrict == AIROHA_NUM_QOS_QUEUES - 1)
+ return -EINVAL;
+
+ for (i = 0; i < p->bands - nstrict; i++)
+ w[i] = p->weights[nstrict + i];
+
+ if (!nstrict)
+ mode = TC_SCH_WRR8;
+ else if (nstrict < AIROHA_NUM_QOS_QUEUES - 1)
+ mode = nstrict + 1;
+
+ return airoha_qdma_set_chan_tx_sched(port, channel, mode, w,
+ ARRAY_SIZE(w));
+}
+
+static int airoha_qdma_get_tx_ets_stats(struct airoha_gdm_port *port,
+ int channel,
+ struct tc_ets_qopt_offload *opt)
+{
+ u64 cpu_tx_packets = airoha_qdma_rr(port->qdma,
+ REG_CNTR_VAL(channel << 1));
+ u64 fwd_tx_packets = airoha_qdma_rr(port->qdma,
+ REG_CNTR_VAL((channel << 1) + 1));
+ u64 tx_packets = (cpu_tx_packets - port->cpu_tx_packets) +
+ (fwd_tx_packets - port->fwd_tx_packets);
+ _bstats_update(opt->stats.bstats, 0, tx_packets);
+
+ port->cpu_tx_packets = cpu_tx_packets;
+ port->fwd_tx_packets = fwd_tx_packets;
+
+ return 0;
+}
+
+static int airoha_tc_setup_qdisc_ets(struct airoha_gdm_port *port,
+ struct tc_ets_qopt_offload *opt)
+{
+ int channel;
+
+ if (opt->parent == TC_H_ROOT)
+ return -EINVAL;
+
+ channel = TC_H_MAJ(opt->handle) >> 16;
+ channel = channel % AIROHA_NUM_QOS_CHANNELS;
+
+ switch (opt->command) {
+ case TC_ETS_REPLACE:
+ return airoha_qdma_set_tx_ets_sched(port, channel, opt);
+ case TC_ETS_DESTROY:
+ /* PRIO is default qdisc scheduler */
+ return airoha_qdma_set_tx_prio_sched(port, channel);
+ case TC_ETS_STATS:
+ return airoha_qdma_get_tx_ets_stats(port, channel, opt);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int airoha_qdma_get_trtcm_param(struct airoha_qdma *qdma, int channel,
+ u32 addr, enum trtcm_param_type param,
+ enum trtcm_mode_type mode,
+ u32 *val_low, u32 *val_high)
+{
+ u32 idx = QDMA_METER_IDX(channel), group = QDMA_METER_GROUP(channel);
+ u32 val, config = FIELD_PREP(TRTCM_PARAM_TYPE_MASK, param) |
+ FIELD_PREP(TRTCM_METER_GROUP_MASK, group) |
+ FIELD_PREP(TRTCM_PARAM_INDEX_MASK, idx) |
+ FIELD_PREP(TRTCM_PARAM_RATE_TYPE_MASK, mode);
+
+ airoha_qdma_wr(qdma, REG_TRTCM_CFG_PARAM(addr), config);
+ if (read_poll_timeout(airoha_qdma_rr, val,
+ val & TRTCM_PARAM_RW_DONE_MASK,
+ USEC_PER_MSEC, 10 * USEC_PER_MSEC, true,
+ qdma, REG_TRTCM_CFG_PARAM(addr)))
+ return -ETIMEDOUT;
+
+ *val_low = airoha_qdma_rr(qdma, REG_TRTCM_DATA_LOW(addr));
+ if (val_high)
+ *val_high = airoha_qdma_rr(qdma, REG_TRTCM_DATA_HIGH(addr));
+
+ return 0;
+}
+
+static int airoha_qdma_set_trtcm_param(struct airoha_qdma *qdma, int channel,
+ u32 addr, enum trtcm_param_type param,
+ enum trtcm_mode_type mode, u32 val)
+{
+ u32 idx = QDMA_METER_IDX(channel), group = QDMA_METER_GROUP(channel);
+ u32 config = TRTCM_PARAM_RW_MASK |
+ FIELD_PREP(TRTCM_PARAM_TYPE_MASK, param) |
+ FIELD_PREP(TRTCM_METER_GROUP_MASK, group) |
+ FIELD_PREP(TRTCM_PARAM_INDEX_MASK, idx) |
+ FIELD_PREP(TRTCM_PARAM_RATE_TYPE_MASK, mode);
+
+ airoha_qdma_wr(qdma, REG_TRTCM_DATA_LOW(addr), val);
+ airoha_qdma_wr(qdma, REG_TRTCM_CFG_PARAM(addr), config);
+
+ return read_poll_timeout(airoha_qdma_rr, val,
+ val & TRTCM_PARAM_RW_DONE_MASK,
+ USEC_PER_MSEC, 10 * USEC_PER_MSEC, true,
+ qdma, REG_TRTCM_CFG_PARAM(addr));
+}
+
+static int airoha_qdma_set_trtcm_config(struct airoha_qdma *qdma, int channel,
+ u32 addr, enum trtcm_mode_type mode,
+ bool enable, u32 enable_mask)
+{
+ u32 val;
+
+ if (airoha_qdma_get_trtcm_param(qdma, channel, addr, TRTCM_MISC_MODE,
+ mode, &val, NULL))
+ return -EINVAL;
+
+ val = enable ? val | enable_mask : val & ~enable_mask;
+
+ return airoha_qdma_set_trtcm_param(qdma, channel, addr, TRTCM_MISC_MODE,
+ mode, val);
+}
+
+static int airoha_qdma_set_trtcm_token_bucket(struct airoha_qdma *qdma,
+ int channel, u32 addr,
+ enum trtcm_mode_type mode,
+ u32 rate_val, u32 bucket_size)
+{
+ u32 val, config, tick, unit, rate, rate_frac;
+ int err;
+
+ if (airoha_qdma_get_trtcm_param(qdma, channel, addr, TRTCM_MISC_MODE,
+ mode, &config, NULL))
+ return -EINVAL;
+
+ val = airoha_qdma_rr(qdma, addr);
+ tick = FIELD_GET(INGRESS_FAST_TICK_MASK, val);
+ if (config & TRTCM_TICK_SEL)
+ tick *= FIELD_GET(INGRESS_SLOW_TICK_RATIO_MASK, val);
+ if (!tick)
+ return -EINVAL;
+
+ unit = (config & TRTCM_PKT_MODE) ? 1000000 / tick : 8000 / tick;
+ if (!unit)
+ return -EINVAL;
+
+ rate = rate_val / unit;
+ rate_frac = rate_val % unit;
+ rate_frac = FIELD_PREP(TRTCM_TOKEN_RATE_MASK, rate_frac) / unit;
+ rate = FIELD_PREP(TRTCM_TOKEN_RATE_MASK, rate) |
+ FIELD_PREP(TRTCM_TOKEN_RATE_FRACTION_MASK, rate_frac);
+
+ err = airoha_qdma_set_trtcm_param(qdma, channel, addr,
+ TRTCM_TOKEN_RATE_MODE, mode, rate);
+ if (err)
+ return err;
+
+ val = max_t(u32, bucket_size, MIN_TOKEN_SIZE);
+ val = min_t(u32, __fls(val), MAX_TOKEN_SIZE_OFFSET);
+
+ return airoha_qdma_set_trtcm_param(qdma, channel, addr,
+ TRTCM_BUCKETSIZE_SHIFT_MODE,
+ mode, val);
+}
+
+static int airoha_qdma_set_tx_rate_limit(struct airoha_gdm_port *port,
+ int channel, u32 rate,
+ u32 bucket_size)
+{
+ int i, err;
+
+ for (i = 0; i <= TRTCM_PEAK_MODE; i++) {
+ err = airoha_qdma_set_trtcm_config(port->qdma, channel,
+ REG_EGRESS_TRTCM_CFG, i,
+ !!rate, TRTCM_METER_MODE);
+ if (err)
+ return err;
+
+ err = airoha_qdma_set_trtcm_token_bucket(port->qdma, channel,
+ REG_EGRESS_TRTCM_CFG,
+ i, rate, bucket_size);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int airoha_tc_htb_alloc_leaf_queue(struct airoha_gdm_port *port,
+ struct tc_htb_qopt_offload *opt)
+{
+ u32 channel = TC_H_MIN(opt->classid) % AIROHA_NUM_QOS_CHANNELS;
+ u32 rate = div_u64(opt->rate, 1000) << 3; /* kbps */
+ struct net_device *dev = port->dev;
+ int num_tx_queues = dev->real_num_tx_queues;
+ int err;
+
+ if (opt->parent_classid != TC_HTB_CLASSID_ROOT) {
+ NL_SET_ERR_MSG_MOD(opt->extack, "invalid parent classid");
+ return -EINVAL;
+ }
+
+ err = airoha_qdma_set_tx_rate_limit(port, channel, rate, opt->quantum);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(opt->extack,
+ "failed configuring htb offload");
+ return err;
+ }
+
+ if (opt->command == TC_HTB_NODE_MODIFY)
+ return 0;
+
+ err = netif_set_real_num_tx_queues(dev, num_tx_queues + 1);
+ if (err) {
+ airoha_qdma_set_tx_rate_limit(port, channel, 0, opt->quantum);
+ NL_SET_ERR_MSG_MOD(opt->extack,
+ "failed setting real_num_tx_queues");
+ return err;
+ }
+
+ set_bit(channel, port->qos_sq_bmap);
+ opt->qid = AIROHA_NUM_TX_RING + channel;
+
+ return 0;
+}
+
+static void airoha_tc_remove_htb_queue(struct airoha_gdm_port *port, int queue)
+{
+ struct net_device *dev = port->dev;
+
+ netif_set_real_num_tx_queues(dev, dev->real_num_tx_queues - 1);
+ airoha_qdma_set_tx_rate_limit(port, queue + 1, 0, 0);
+ clear_bit(queue, port->qos_sq_bmap);
+}
+
+static int airoha_tc_htb_delete_leaf_queue(struct airoha_gdm_port *port,
+ struct tc_htb_qopt_offload *opt)
+{
+ u32 channel = TC_H_MIN(opt->classid) % AIROHA_NUM_QOS_CHANNELS;
+
+ if (!test_bit(channel, port->qos_sq_bmap)) {
+ NL_SET_ERR_MSG_MOD(opt->extack, "invalid queue id");
+ return -EINVAL;
+ }
+
+ airoha_tc_remove_htb_queue(port, channel);
+
+ return 0;
+}
+
+static int airoha_tc_htb_destroy(struct airoha_gdm_port *port)
+{
+ int q;
+
+ for_each_set_bit(q, port->qos_sq_bmap, AIROHA_NUM_QOS_CHANNELS)
+ airoha_tc_remove_htb_queue(port, q);
+
+ return 0;
+}
+
+static int airoha_tc_get_htb_get_leaf_queue(struct airoha_gdm_port *port,
+ struct tc_htb_qopt_offload *opt)
+{
+ u32 channel = TC_H_MIN(opt->classid) % AIROHA_NUM_QOS_CHANNELS;
+
+ if (!test_bit(channel, port->qos_sq_bmap)) {
+ NL_SET_ERR_MSG_MOD(opt->extack, "invalid queue id");
+ return -EINVAL;
+ }
+
+ opt->qid = channel;
+
+ return 0;
+}
+
+static int airoha_tc_setup_qdisc_htb(struct airoha_gdm_port *port,
+ struct tc_htb_qopt_offload *opt)
+{
+ switch (opt->command) {
+ case TC_HTB_CREATE:
+ break;
+ case TC_HTB_DESTROY:
+ return airoha_tc_htb_destroy(port);
+ case TC_HTB_NODE_MODIFY:
+ case TC_HTB_LEAF_ALLOC_QUEUE:
+ return airoha_tc_htb_alloc_leaf_queue(port, opt);
+ case TC_HTB_LEAF_DEL:
+ case TC_HTB_LEAF_DEL_LAST:
+ case TC_HTB_LEAF_DEL_LAST_FORCE:
+ return airoha_tc_htb_delete_leaf_queue(port, opt);
+ case TC_HTB_LEAF_QUERY_QUEUE:
+ return airoha_tc_get_htb_get_leaf_queue(port, opt);
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static int airoha_dev_tc_setup(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct airoha_gdm_port *port = netdev_priv(dev);
+
+ switch (type) {
+ case TC_SETUP_QDISC_ETS:
+ return airoha_tc_setup_qdisc_ets(port, type_data);
+ case TC_SETUP_QDISC_HTB:
+ return airoha_tc_setup_qdisc_htb(port, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct net_device_ops airoha_netdev_ops = {
.ndo_init = airoha_dev_init,
.ndo_open = airoha_dev_open,
.ndo_stop = airoha_dev_stop,
+ .ndo_select_queue = airoha_dev_select_queue,
.ndo_start_xmit = airoha_dev_xmit,
.ndo_get_stats64 = airoha_dev_get_stats64,
.ndo_set_mac_address = airoha_dev_set_macaddr,
+ .ndo_setup_tc = airoha_dev_tc_setup,
};
static const struct ethtool_ops airoha_ethtool_ops = {
@@ -2652,7 +3158,8 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, struct device_node *np)
}
dev = devm_alloc_etherdev_mqs(eth->dev, sizeof(*port),
- AIROHA_NUM_TX_RING, AIROHA_NUM_RX_RING);
+ AIROHA_NUM_NETDEV_TX_RINGS,
+ AIROHA_NUM_RX_RING);
if (!dev) {
dev_err(eth->dev, "alloc_etherdev failed\n");
return -ENOMEM;
@@ -2665,12 +3172,18 @@ static int airoha_alloc_gdm_port(struct airoha_eth *eth, struct device_node *np)
dev->watchdog_timeo = 5 * HZ;
dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
NETIF_F_TSO6 | NETIF_F_IPV6_CSUM |
- NETIF_F_SG | NETIF_F_TSO;
+ NETIF_F_SG | NETIF_F_TSO |
+ NETIF_F_HW_TC;
dev->features |= dev->hw_features;
dev->dev.of_node = np;
dev->irq = qdma->irq;
SET_NETDEV_DEV(dev, eth->dev);
+ /* reserve hw queues for HTB offloading */
+ err = netif_set_real_num_tx_queues(dev, AIROHA_NUM_TX_RING);
+ if (err)
+ return err;
+
err = of_get_ethdev_address(np, dev);
if (err) {
if (err == -EPROBE_DEFER)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 10a763e668ed..d9a8817bb33c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -151,8 +151,9 @@ mlx5_core-$(CONFIG_MLX5_HW_STEERING) += steering/hws/cmd.o \
steering/hws/bwc.o \
steering/hws/debug.o \
steering/hws/vport.o \
- steering/hws/bwc_complex.o
-
+ steering/hws/bwc_complex.o \
+ steering/hws/fs_hws_pools.o \
+ steering/hws/fs_hws.o
#
# SF device
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 6bd8a18e3af3..e733b81e18a2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1013,6 +1013,7 @@ static void cmd_work_handler(struct work_struct *work)
complete(&ent->done);
}
up(&cmd->vars.sem);
+ complete(&ent->slotted);
return;
}
} else {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index ca92e518be76..8489b0a0e8bd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -94,25 +94,14 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
u32 esn, esn_msb;
u8 overlap;
- switch (x->xso.type) {
- case XFRM_DEV_OFFLOAD_PACKET:
- switch (x->xso.dir) {
- case XFRM_DEV_OFFLOAD_IN:
- esn = x->replay_esn->seq;
- esn_msb = x->replay_esn->seq_hi;
- break;
- case XFRM_DEV_OFFLOAD_OUT:
- esn = x->replay_esn->oseq;
- esn_msb = x->replay_esn->oseq_hi;
- break;
- default:
- WARN_ON(true);
- return false;
- }
- break;
- case XFRM_DEV_OFFLOAD_CRYPTO:
- /* Already parsed by XFRM core */
+ switch (x->xso.dir) {
+ case XFRM_DEV_OFFLOAD_IN:
esn = x->replay_esn->seq;
+ esn_msb = x->replay_esn->seq_hi;
+ break;
+ case XFRM_DEV_OFFLOAD_OUT:
+ esn = x->replay_esn->oseq;
+ esn_msb = x->replay_esn->oseq_hi;
break;
default:
WARN_ON(true);
@@ -121,11 +110,15 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
overlap = sa_entry->esn_state.overlap;
- if (esn >= x->replay_esn->replay_window)
- seq_bottom = esn - x->replay_esn->replay_window + 1;
+ if (!x->replay_esn->replay_window) {
+ seq_bottom = esn;
+ } else {
+ if (esn >= x->replay_esn->replay_window)
+ seq_bottom = esn - x->replay_esn->replay_window + 1;
- if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO)
- esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom));
+ if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO)
+ esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom));
+ }
if (sa_entry->esn_state.esn_msb)
sa_entry->esn_state.esn = esn;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 41b5e98a0495..f43fd96a680d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -3535,35 +3535,42 @@ static int mlx5_fs_mode_validate(struct devlink *devlink, u32 id,
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
char *value = val.vstr;
- int err = 0;
+ u8 eswitch_mode;
- if (!strcmp(value, "dmfs")) {
+ if (!strcmp(value, "dmfs"))
return 0;
- } else if (!strcmp(value, "smfs")) {
- u8 eswitch_mode;
- bool smfs_cap;
- eswitch_mode = mlx5_eswitch_mode(dev);
- smfs_cap = mlx5_fs_dr_is_supported(dev);
+ if (!strcmp(value, "smfs")) {
+ bool smfs_cap = mlx5_fs_dr_is_supported(dev);
if (!smfs_cap) {
- err = -EOPNOTSUPP;
NL_SET_ERR_MSG_MOD(extack,
"Software managed steering is not supported by current device");
+ return -EOPNOTSUPP;
}
+ } else if (!strcmp(value, "hmfs")) {
+ bool hmfs_cap = mlx5_fs_hws_is_supported(dev);
- else if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) {
+ if (!hmfs_cap) {
NL_SET_ERR_MSG_MOD(extack,
- "Software managed steering is not supported when eswitch offloads enabled.");
- err = -EOPNOTSUPP;
+ "Hardware steering is not supported by current device");
+ return -EOPNOTSUPP;
}
} else {
NL_SET_ERR_MSG_MOD(extack,
- "Bad parameter: supported values are [\"dmfs\", \"smfs\"]");
- err = -EINVAL;
+ "Bad parameter: supported values are [\"dmfs\", \"smfs\", \"hmfs\"]");
+ return -EINVAL;
}
- return err;
+ eswitch_mode = mlx5_eswitch_mode(dev);
+ if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Moving to %s is not supported when eswitch offloads enabled.",
+ value);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
}
static int mlx5_fs_mode_set(struct devlink *devlink, u32 id,
@@ -3575,6 +3582,8 @@ static int mlx5_fs_mode_set(struct devlink *devlink, u32 id,
if (!strcmp(ctx->val.vstr, "smfs"))
mode = MLX5_FLOW_STEERING_MODE_SMFS;
+ else if (!strcmp(ctx->val.vstr, "hmfs"))
+ mode = MLX5_FLOW_STEERING_MODE_HMFS;
else
mode = MLX5_FLOW_STEERING_MODE_DMFS;
dev->priv.steering->mode = mode;
@@ -3587,10 +3596,17 @@ static int mlx5_fs_mode_get(struct devlink *devlink, u32 id,
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
- if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS)
+ switch (dev->priv.steering->mode) {
+ case MLX5_FLOW_STEERING_MODE_SMFS:
strscpy(ctx->val.vstr, "smfs", sizeof(ctx->val.vstr));
- else
+ break;
+ case MLX5_FLOW_STEERING_MODE_HMFS:
+ strscpy(ctx->val.vstr, "hmfs", sizeof(ctx->val.vstr));
+ break;
+ default:
strscpy(ctx->val.vstr, "dmfs", sizeof(ctx->val.vstr));
+ }
+
return 0;
}
@@ -4009,6 +4025,8 @@ int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
if (mode == MLX5_FLOW_STEERING_MODE_SMFS)
cmds = mlx5_fs_cmd_get_dr_cmds();
+ else if (mode == MLX5_FLOW_STEERING_MODE_HMFS)
+ cmds = mlx5_fs_cmd_get_hws_cmds();
else
cmds = mlx5_fs_cmd_get_fw_cmds();
if (!cmds)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index bad2df0715ec..20837e526679 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -38,6 +38,7 @@
#include <linux/rhashtable.h>
#include <linux/llist.h>
#include <steering/sws/fs_dr.h>
+#include <steering/hws/fs_hws.h>
#define FDB_TC_MAX_CHAIN 3
#define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1)
@@ -64,6 +65,7 @@ struct mlx5_modify_hdr {
enum mlx5_flow_resource_owner owner;
union {
struct mlx5_fs_dr_action fs_dr_action;
+ struct mlx5_fs_hws_action fs_hws_action;
u32 id;
};
};
@@ -74,6 +76,7 @@ struct mlx5_pkt_reformat {
enum mlx5_flow_resource_owner owner;
union {
struct mlx5_fs_dr_action fs_dr_action;
+ struct mlx5_fs_hws_action fs_hws_action;
u32 id;
};
};
@@ -126,7 +129,8 @@ enum fs_fte_status {
enum mlx5_flow_steering_mode {
MLX5_FLOW_STEERING_MODE_DMFS,
- MLX5_FLOW_STEERING_MODE_SMFS
+ MLX5_FLOW_STEERING_MODE_SMFS,
+ MLX5_FLOW_STEERING_MODE_HMFS,
};
enum mlx5_flow_steering_capabilty {
@@ -190,7 +194,10 @@ struct mlx5_flow_handle {
/* Type of children is mlx5_flow_group */
struct mlx5_flow_table {
struct fs_node node;
- struct mlx5_fs_dr_table fs_dr_table;
+ union {
+ struct mlx5_fs_dr_table fs_dr_table;
+ struct mlx5_fs_hws_table fs_hws_table;
+ };
u32 id;
u16 vport;
unsigned int max_fte;
@@ -247,7 +254,10 @@ struct fs_fte_dup {
/* Type of children is mlx5_flow_rule */
struct fs_fte {
struct fs_node node;
- struct mlx5_fs_dr_rule fs_dr_rule;
+ union {
+ struct mlx5_fs_dr_rule fs_dr_rule;
+ struct mlx5_fs_hws_rule fs_hws_rule;
+ };
u32 val[MLX5_ST_SZ_DW_MATCH_PARAM];
struct fs_fte_action act_dests;
struct fs_fte_dup *dup;
@@ -280,7 +290,10 @@ struct mlx5_flow_group_mask {
/* Type of children is fs_fte */
struct mlx5_flow_group {
struct fs_node node;
- struct mlx5_fs_dr_matcher fs_dr_matcher;
+ union {
+ struct mlx5_fs_dr_matcher fs_dr_matcher;
+ struct mlx5_fs_hws_matcher fs_hws_matcher;
+ };
struct mlx5_flow_group_mask mask;
u32 start_index;
u32 max_ftes;
@@ -293,7 +306,10 @@ struct mlx5_flow_group {
struct mlx5_flow_root_namespace {
struct mlx5_flow_namespace ns;
enum mlx5_flow_steering_mode mode;
- struct mlx5_fs_dr_domain fs_dr_domain;
+ union {
+ struct mlx5_fs_dr_domain fs_dr_domain;
+ struct mlx5_fs_hws_context fs_hws_context;
+ };
enum fs_flow_table_type table_type;
struct mlx5_core_dev *dev;
struct mlx5_flow_table *root_ft;
@@ -303,6 +319,42 @@ struct mlx5_flow_root_namespace {
const struct mlx5_flow_cmds *cmds;
};
+enum mlx5_fc_type {
+ MLX5_FC_TYPE_ACQUIRED = 0,
+ MLX5_FC_TYPE_LOCAL,
+};
+
+struct mlx5_fc_cache {
+ u64 packets;
+ u64 bytes;
+ u64 lastuse;
+};
+
+struct mlx5_fc {
+ u32 id;
+ bool aging;
+ enum mlx5_fc_type type;
+ struct mlx5_fc_bulk *bulk;
+ struct mlx5_fc_cache cache;
+ /* last{packets,bytes} are used for calculating deltas since last reading. */
+ u64 lastpackets;
+ u64 lastbytes;
+};
+
+struct mlx5_fc_bulk_hws_data {
+ struct mlx5hws_action *hws_action;
+ struct mutex lock; /* protects hws_action */
+ refcount_t hws_action_refcount;
+};
+
+struct mlx5_fc_bulk {
+ struct mlx5_fs_bulk fs_bulk;
+ u32 base_id;
+ struct mlx5_fc_bulk_hws_data hws_data;
+ struct mlx5_fc fcs[];
+};
+
+u32 mlx5_fc_get_base_id(struct mlx5_fc *counter);
int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev);
void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index d8e1c4ebd364..492775d3d193 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -44,28 +44,6 @@
#define MLX5_FC_POOL_MAX_THRESHOLD BIT(18)
#define MLX5_FC_POOL_USED_BUFF_RATIO 10
-enum mlx5_fc_type {
- MLX5_FC_TYPE_ACQUIRED = 0,
- MLX5_FC_TYPE_LOCAL,
-};
-
-struct mlx5_fc_cache {
- u64 packets;
- u64 bytes;
- u64 lastuse;
-};
-
-struct mlx5_fc {
- u32 id;
- bool aging;
- enum mlx5_fc_type type;
- struct mlx5_fc_bulk *bulk;
- struct mlx5_fc_cache cache;
- /* last{packets,bytes} are used for calculating deltas since last reading. */
- u64 lastpackets;
- u64 lastbytes;
-};
-
struct mlx5_fc_stats {
struct xarray counters;
@@ -434,13 +412,7 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev,
fc_stats->sampling_interval);
}
-/* Flow counter bluks */
-
-struct mlx5_fc_bulk {
- struct mlx5_fs_bulk fs_bulk;
- u32 base_id;
- struct mlx5_fc fcs[];
-};
+/* Flow counter bulks */
static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk,
u32 id)
@@ -449,7 +421,13 @@ static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk,
counter->id = id;
}
-static struct mlx5_fs_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
+u32 mlx5_fc_get_base_id(struct mlx5_fc *counter)
+{
+ return counter->bulk->base_id;
+}
+
+static struct mlx5_fs_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev,
+ void *pool_ctx)
{
enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask;
struct mlx5_fc_bulk *fc_bulk;
@@ -473,6 +451,8 @@ static struct mlx5_fs_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev)
for (i = 0; i < bulk_len; i++)
mlx5_fc_init(&fc_bulk->fcs[i], fc_bulk, base_id + i);
+ refcount_set(&fc_bulk->hws_data.hws_action_refcount, 0);
+ mutex_init(&fc_bulk->hws_data.lock);
return &fc_bulk->fs_bulk;
fs_bulk_cleanup:
@@ -518,7 +498,7 @@ static const struct mlx5_fs_pool_ops mlx5_fc_pool_ops = {
static void
mlx5_fc_pool_init(struct mlx5_fs_pool *fc_pool, struct mlx5_core_dev *dev)
{
- mlx5_fs_pool_init(fc_pool, dev, &mlx5_fc_pool_ops);
+ mlx5_fs_pool_init(fc_pool, dev, &mlx5_fc_pool_ops, NULL);
}
static void mlx5_fc_pool_cleanup(struct mlx5_fs_pool *fc_pool)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.c
index b891d7b9e3e0..f6c226664602 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.c
@@ -56,11 +56,12 @@ static int mlx5_fs_bulk_release_index(struct mlx5_fs_bulk *fs_bulk, int index)
}
void mlx5_fs_pool_init(struct mlx5_fs_pool *pool, struct mlx5_core_dev *dev,
- const struct mlx5_fs_pool_ops *ops)
+ const struct mlx5_fs_pool_ops *ops, void *pool_ctx)
{
WARN_ON_ONCE(!ops || !ops->bulk_destroy || !ops->bulk_create ||
!ops->update_threshold);
pool->dev = dev;
+ pool->pool_ctx = pool_ctx;
mutex_init(&pool->pool_lock);
INIT_LIST_HEAD(&pool->fully_used);
INIT_LIST_HEAD(&pool->partially_used);
@@ -91,7 +92,7 @@ mlx5_fs_pool_alloc_new_bulk(struct mlx5_fs_pool *fs_pool)
struct mlx5_core_dev *dev = fs_pool->dev;
struct mlx5_fs_bulk *new_bulk;
- new_bulk = fs_pool->ops->bulk_create(dev);
+ new_bulk = fs_pool->ops->bulk_create(dev, fs_pool->pool_ctx);
if (new_bulk)
fs_pool->available_units += new_bulk->bulk_len;
fs_pool->ops->update_threshold(fs_pool);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.h
index 3b149863260c..f04ec3107498 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.h
@@ -21,7 +21,8 @@ struct mlx5_fs_pool;
struct mlx5_fs_pool_ops {
int (*bulk_destroy)(struct mlx5_core_dev *dev, struct mlx5_fs_bulk *bulk);
- struct mlx5_fs_bulk * (*bulk_create)(struct mlx5_core_dev *dev);
+ struct mlx5_fs_bulk * (*bulk_create)(struct mlx5_core_dev *dev,
+ void *pool_ctx);
void (*update_threshold)(struct mlx5_fs_pool *pool);
};
@@ -44,7 +45,7 @@ void mlx5_fs_bulk_cleanup(struct mlx5_fs_bulk *fs_bulk);
int mlx5_fs_bulk_get_free_amount(struct mlx5_fs_bulk *bulk);
void mlx5_fs_pool_init(struct mlx5_fs_pool *pool, struct mlx5_core_dev *dev,
- const struct mlx5_fs_pool_ops *ops);
+ const struct mlx5_fs_pool_ops *ops, void *pool_ctx);
void mlx5_fs_pool_cleanup(struct mlx5_fs_pool *pool);
int mlx5_fs_pool_acquire_index(struct mlx5_fs_pool *fs_pool,
struct mlx5_fs_pool_index *pool_index);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 4822d01123b4..d61a1a9297c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -322,17 +322,16 @@ static void mlx5_pps_out(struct work_struct *work)
}
}
-static void mlx5_timestamp_overflow(struct work_struct *work)
+static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info)
{
- struct delayed_work *dwork = to_delayed_work(work);
struct mlx5_core_dev *mdev;
struct mlx5_timer *timer;
struct mlx5_clock *clock;
unsigned long flags;
- timer = container_of(dwork, struct mlx5_timer, overflow_work);
- clock = container_of(timer, struct mlx5_clock, timer);
+ clock = container_of(ptp_info, struct mlx5_clock, ptp_info);
mdev = container_of(clock, struct mlx5_core_dev, clock);
+ timer = &clock->timer;
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
goto out;
@@ -343,7 +342,7 @@ static void mlx5_timestamp_overflow(struct work_struct *work)
write_sequnlock_irqrestore(&clock->lock, flags);
out:
- schedule_delayed_work(&timer->overflow_work, timer->overflow_period);
+ return timer->overflow_period;
}
static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev,
@@ -517,6 +516,7 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
timer->cycles.mult = mult;
mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
+ ptp_schedule_worker(clock->ptp, timer->overflow_period);
return 0;
}
@@ -852,6 +852,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = {
.settime64 = mlx5_ptp_settime,
.enable = NULL,
.verify = NULL,
+ .do_aux_work = mlx5_timestamp_overflow,
};
static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin,
@@ -1052,12 +1053,11 @@ static void mlx5_init_overflow_period(struct mlx5_clock *clock)
do_div(ns, NSEC_PER_SEC / HZ);
timer->overflow_period = ns;
- INIT_DELAYED_WORK(&timer->overflow_work, mlx5_timestamp_overflow);
- if (timer->overflow_period)
- schedule_delayed_work(&timer->overflow_work, 0);
- else
+ if (!timer->overflow_period) {
+ timer->overflow_period = HZ;
mlx5_core_warn(mdev,
- "invalid overflow period, overflow_work is not scheduled\n");
+ "invalid overflow period, overflow_work is scheduled once per second\n");
+ }
if (clock_info)
clock_info->overflow_period = timer->overflow_period;
@@ -1172,6 +1172,9 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT);
mlx5_eq_notifier_register(mdev, &clock->pps_nb);
+
+ if (clock->ptp)
+ ptp_schedule_worker(clock->ptp, 0);
}
void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
@@ -1188,7 +1191,6 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
}
cancel_work_sync(&clock->pps_info.out_work);
- cancel_delayed_work_sync(&clock->timer.overflow_work);
if (mdev->clock_info) {
free_page((unsigned long)mdev->clock_info);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
index a897cdc60fdb..b5332c54d4fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c
@@ -11,31 +11,29 @@
/* This is the longest supported action sequence for FDB table:
* DECAP, POP_VLAN, MODIFY, CTR, ASO, PUSH_VLAN, MODIFY, ENCAP, Term.
*/
-static const u32 action_order_arr[MLX5HWS_TABLE_TYPE_MAX][MLX5HWS_ACTION_TYP_MAX] = {
- [MLX5HWS_TABLE_TYPE_FDB] = {
- BIT(MLX5HWS_ACTION_TYP_REMOVE_HEADER) |
- BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2) |
- BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2),
- BIT(MLX5HWS_ACTION_TYP_POP_VLAN),
- BIT(MLX5HWS_ACTION_TYP_POP_VLAN),
- BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR),
- BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN),
- BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN),
- BIT(MLX5HWS_ACTION_TYP_INSERT_HEADER) |
- BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2) |
- BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3),
- BIT(MLX5HWS_ACTION_TYP_CTR),
- BIT(MLX5HWS_ACTION_TYP_TAG),
- BIT(MLX5HWS_ACTION_TYP_ASO_METER),
- BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR),
- BIT(MLX5HWS_ACTION_TYP_TBL) |
- BIT(MLX5HWS_ACTION_TYP_VPORT) |
- BIT(MLX5HWS_ACTION_TYP_DROP) |
- BIT(MLX5HWS_ACTION_TYP_SAMPLER) |
- BIT(MLX5HWS_ACTION_TYP_RANGE) |
- BIT(MLX5HWS_ACTION_TYP_DEST_ARRAY),
- BIT(MLX5HWS_ACTION_TYP_LAST),
- },
+static const u32 action_order_arr[MLX5HWS_ACTION_TYP_MAX] = {
+ BIT(MLX5HWS_ACTION_TYP_REMOVE_HEADER) |
+ BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2) |
+ BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2),
+ BIT(MLX5HWS_ACTION_TYP_POP_VLAN),
+ BIT(MLX5HWS_ACTION_TYP_POP_VLAN),
+ BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR),
+ BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN),
+ BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN),
+ BIT(MLX5HWS_ACTION_TYP_INSERT_HEADER) |
+ BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2) |
+ BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3),
+ BIT(MLX5HWS_ACTION_TYP_CTR),
+ BIT(MLX5HWS_ACTION_TYP_TAG),
+ BIT(MLX5HWS_ACTION_TYP_ASO_METER),
+ BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR),
+ BIT(MLX5HWS_ACTION_TYP_TBL) |
+ BIT(MLX5HWS_ACTION_TYP_VPORT) |
+ BIT(MLX5HWS_ACTION_TYP_DROP) |
+ BIT(MLX5HWS_ACTION_TYP_SAMPLER) |
+ BIT(MLX5HWS_ACTION_TYP_RANGE) |
+ BIT(MLX5HWS_ACTION_TYP_DEST_ARRAY),
+ BIT(MLX5HWS_ACTION_TYP_LAST),
};
static const char * const mlx5hws_action_type_str[] = {
@@ -83,8 +81,8 @@ static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx,
int ret;
mutex_lock(&ctx->ctrl_lock);
- if (ctx->common_res[tbl_type].shared_stc[stc_type]) {
- ctx->common_res[tbl_type].shared_stc[stc_type]->refcount++;
+ if (ctx->common_res.shared_stc[stc_type]) {
+ ctx->common_res.shared_stc[stc_type]->refcount++;
mutex_unlock(&ctx->ctrl_lock);
return 0;
}
@@ -124,8 +122,8 @@ static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx,
goto free_shared_stc;
}
- ctx->common_res[tbl_type].shared_stc[stc_type] = shared_stc;
- ctx->common_res[tbl_type].shared_stc[stc_type]->refcount = 1;
+ ctx->common_res.shared_stc[stc_type] = shared_stc;
+ ctx->common_res.shared_stc[stc_type]->refcount = 1;
mutex_unlock(&ctx->ctrl_lock);
@@ -178,16 +176,16 @@ static void hws_action_put_shared_stc(struct mlx5hws_action *action,
}
mutex_lock(&ctx->ctrl_lock);
- if (--ctx->common_res[tbl_type].shared_stc[stc_type]->refcount) {
+ if (--ctx->common_res.shared_stc[stc_type]->refcount) {
mutex_unlock(&ctx->ctrl_lock);
return;
}
- shared_stc = ctx->common_res[tbl_type].shared_stc[stc_type];
+ shared_stc = ctx->common_res.shared_stc[stc_type];
mlx5hws_action_free_single_stc(ctx, tbl_type, &shared_stc->stc_chunk);
kfree(shared_stc);
- ctx->common_res[tbl_type].shared_stc[stc_type] = NULL;
+ ctx->common_res.shared_stc[stc_type] = NULL;
mutex_unlock(&ctx->ctrl_lock);
}
@@ -206,10 +204,10 @@ bool mlx5hws_action_check_combo(struct mlx5hws_context *ctx,
enum mlx5hws_action_type *user_actions,
enum mlx5hws_table_type table_type)
{
- const u32 *order_arr = action_order_arr[table_type];
+ const u32 *order_arr = action_order_arr;
+ bool valid_combo;
u8 order_idx = 0;
u8 user_idx = 0;
- bool valid_combo;
if (table_type >= MLX5HWS_TABLE_TYPE_MAX) {
mlx5hws_err(ctx, "Invalid table_type %d", table_type);
@@ -321,8 +319,8 @@ int mlx5hws_action_alloc_single_stc(struct mlx5hws_context *ctx,
__must_hold(&ctx->ctrl_lock)
{
struct mlx5hws_cmd_stc_modify_attr cleanup_stc_attr = {0};
- struct mlx5hws_pool *stc_pool = ctx->stc_pool[table_type];
struct mlx5hws_cmd_stc_modify_attr fixup_stc_attr = {0};
+ struct mlx5hws_pool *stc_pool = ctx->stc_pool;
bool use_fixup;
u32 obj_0_id;
int ret;
@@ -387,8 +385,8 @@ void mlx5hws_action_free_single_stc(struct mlx5hws_context *ctx,
struct mlx5hws_pool_chunk *stc)
__must_hold(&ctx->ctrl_lock)
{
- struct mlx5hws_pool *stc_pool = ctx->stc_pool[table_type];
struct mlx5hws_cmd_stc_modify_attr stc_attr = {0};
+ struct mlx5hws_pool *stc_pool = ctx->stc_pool;
u32 obj_id;
/* Modify the STC not to point to an object */
@@ -473,6 +471,7 @@ static void hws_action_fill_stc_attr(struct mlx5hws_action *action,
break;
case MLX5HWS_ACTION_TYP_TBL:
case MLX5HWS_ACTION_TYP_DEST_ARRAY:
+ case MLX5HWS_ACTION_TYP_SAMPLER:
attr->action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_FT;
attr->action_offset = MLX5HWS_ACTION_OFFSET_HIT;
attr->dest_table_id = obj_id;
@@ -561,7 +560,7 @@ hws_action_create_stcs(struct mlx5hws_action *action, u32 obj_id)
if (action->flags & MLX5HWS_ACTION_FLAG_HWS_FDB) {
ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr,
MLX5HWS_TABLE_TYPE_FDB,
- &action->stc[MLX5HWS_TABLE_TYPE_FDB]);
+ &action->stc);
if (ret)
goto out_err;
}
@@ -585,7 +584,7 @@ hws_action_destroy_stcs(struct mlx5hws_action *action)
if (action->flags & MLX5HWS_ACTION_FLAG_HWS_FDB)
mlx5hws_action_free_single_stc(ctx, MLX5HWS_TABLE_TYPE_FDB,
- &action->stc[MLX5HWS_TABLE_TYPE_FDB]);
+ &action->stc);
mutex_unlock(&ctx->ctrl_lock);
}
@@ -1639,8 +1638,8 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx,
rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, false);
/* STC is a single resource (obj_id), use any STC for the ID */
- stc_pool = ctx->stc_pool[MLX5HWS_TABLE_TYPE_FDB];
- default_stc = ctx->common_res[MLX5HWS_TABLE_TYPE_FDB].default_stc;
+ stc_pool = ctx->stc_pool;
+ default_stc = ctx->common_res.default_stc;
obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit);
rtc_attr.stc_base = obj_id;
@@ -1731,7 +1730,7 @@ hws_action_create_dest_match_range_fill_table(struct mlx5hws_context *ctx,
ste_attr.used_id_rtc_0 = &used_rtc_0_id;
ste_attr.used_id_rtc_1 = &used_rtc_1_id;
- common_res = &ctx->common_res[MLX5HWS_TABLE_TYPE_FDB];
+ common_res = &ctx->common_res;
/* init an empty match STE which will always hit */
ste_attr.wqe_ctrl = &wqe_ctrl;
@@ -1750,7 +1749,7 @@ hws_action_create_dest_match_range_fill_table(struct mlx5hws_context *ctx,
wqe_ctrl.stc_ix[MLX5HWS_ACTION_STC_IDX_CTRL] |=
htonl(MLX5HWS_ACTION_STC_IDX_LAST_COMBO2 << 29);
wqe_ctrl.stc_ix[MLX5HWS_ACTION_STC_IDX_HIT] =
- htonl(hit_ft_action->stc[MLX5HWS_TABLE_TYPE_FDB].offset);
+ htonl(hit_ft_action->stc.offset);
wqe_data_arr = (__force __be32 *)&range_wqe_data;
@@ -1843,7 +1842,7 @@ mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx,
stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer;
ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, MLX5HWS_TABLE_TYPE_FDB,
- &action->stc[MLX5HWS_TABLE_TYPE_FDB]);
+ &action->stc);
if (ret)
goto error_unlock;
@@ -1875,7 +1874,50 @@ struct mlx5hws_action *
mlx5hws_action_create_flow_sampler(struct mlx5hws_context *ctx,
u32 sampler_id, u32 flags)
{
- mlx5hws_err(ctx, "Flow sampler action - unsupported\n");
+ struct mlx5hws_cmd_ft_create_attr ft_attr = {0};
+ struct mlx5hws_cmd_set_fte_attr fte_attr = {0};
+ struct mlx5hws_cmd_forward_tbl *fw_island;
+ struct mlx5hws_cmd_set_fte_dest dest;
+ struct mlx5hws_action *action;
+ int ret;
+
+ if (flags != (MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED)) {
+ mlx5hws_err(ctx, "Unsupported flags for flow sampler\n");
+ return NULL;
+ }
+
+ ft_attr.type = FS_FT_FDB;
+ ft_attr.level = ctx->caps->fdb_ft.max_level - 1;
+
+ dest.destination_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
+ dest.destination_id = sampler_id;
+
+ fte_attr.dests_num = 1;
+ fte_attr.dests = &dest;
+ fte_attr.action_flags = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ fte_attr.ignore_flow_level = 1;
+
+ fw_island = mlx5hws_cmd_forward_tbl_create(ctx->mdev, &ft_attr, &fte_attr);
+ if (!fw_island)
+ return NULL;
+
+ action = hws_action_create_generic(ctx, flags,
+ MLX5HWS_ACTION_TYP_SAMPLER);
+ if (!action)
+ goto destroy_fw_island;
+
+ ret = hws_action_create_stcs(action, fw_island->ft_id);
+ if (ret)
+ goto free_action;
+
+ action->flow_sampler.fw_island = fw_island;
+
+ return action;
+
+free_action:
+ kfree(action);
+destroy_fw_island:
+ mlx5hws_cmd_forward_tbl_destroy(ctx->mdev, fw_island);
return NULL;
}
@@ -1914,6 +1956,11 @@ static void hws_action_destroy_hws(struct mlx5hws_action *action)
}
kfree(action->dest_array.dest_list);
break;
+ case MLX5HWS_ACTION_TYP_SAMPLER:
+ hws_action_destroy_stcs(action);
+ mlx5hws_cmd_forward_tbl_destroy(action->ctx->mdev,
+ action->flow_sampler.fw_island);
+ break;
case MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2:
case MLX5HWS_ACTION_TYP_MODIFY_HDR:
shared_arg = false;
@@ -1970,8 +2017,8 @@ __must_hold(&ctx->ctrl_lock)
struct mlx5hws_action_default_stc *default_stc;
int ret;
- if (ctx->common_res[tbl_type].default_stc) {
- ctx->common_res[tbl_type].default_stc->refcount++;
+ if (ctx->common_res.default_stc) {
+ ctx->common_res.default_stc->refcount++;
return 0;
}
@@ -2023,8 +2070,8 @@ __must_hold(&ctx->ctrl_lock)
goto free_nop_dw7;
}
- ctx->common_res[tbl_type].default_stc = default_stc;
- ctx->common_res[tbl_type].default_stc->refcount++;
+ ctx->common_res.default_stc = default_stc;
+ ctx->common_res.default_stc->refcount++;
return 0;
@@ -2046,9 +2093,7 @@ __must_hold(&ctx->ctrl_lock)
{
struct mlx5hws_action_default_stc *default_stc;
- default_stc = ctx->common_res[tbl_type].default_stc;
-
- default_stc = ctx->common_res[tbl_type].default_stc;
+ default_stc = ctx->common_res.default_stc;
if (--default_stc->refcount)
return;
@@ -2058,7 +2103,7 @@ __must_hold(&ctx->ctrl_lock)
mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_dw5);
mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_ctr);
kfree(default_stc);
- ctx->common_res[tbl_type].default_stc = NULL;
+ ctx->common_res.default_stc = NULL;
}
static void hws_action_modify_write(struct mlx5hws_send_engine *queue,
@@ -2150,8 +2195,7 @@ hws_action_apply_stc(struct mlx5hws_actions_apply_data *apply,
{
struct mlx5hws_action *action = apply->rule_action[action_idx].action;
- apply->wqe_ctrl->stc_ix[stc_idx] =
- htonl(action->stc[apply->tbl_type].offset);
+ apply->wqe_ctrl->stc_ix[stc_idx] = htonl(action->stc.offset);
}
static void
@@ -2181,7 +2225,7 @@ hws_action_setter_modify_header(struct mlx5hws_actions_apply_data *apply,
rule_action = &apply->rule_action[setter->idx_double];
action = rule_action->action;
- stc_idx = htonl(action->stc[apply->tbl_type].offset);
+ stc_idx = htonl(action->stc.offset);
apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] = stc_idx;
apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0;
@@ -2240,7 +2284,7 @@ hws_action_setter_insert_ptr(struct mlx5hws_actions_apply_data *apply,
apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW6] = 0;
apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx);
- stc_idx = htonl(action->stc[apply->tbl_type].offset);
+ stc_idx = htonl(action->stc.offset);
apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] = stc_idx;
apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0;
@@ -2272,7 +2316,7 @@ hws_action_setter_tnl_l3_to_l2(struct mlx5hws_actions_apply_data *apply,
apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW6] = 0;
apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx);
- stc_idx = htonl(action->stc[apply->tbl_type].offset);
+ stc_idx = htonl(action->stc.offset);
apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] = stc_idx;
apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0;
@@ -2434,6 +2478,7 @@ int mlx5hws_action_template_process(struct mlx5hws_action_template *at)
case MLX5HWS_ACTION_TYP_DROP:
case MLX5HWS_ACTION_TYP_TBL:
case MLX5HWS_ACTION_TYP_DEST_ARRAY:
+ case MLX5HWS_ACTION_TYP_SAMPLER:
case MLX5HWS_ACTION_TYP_VPORT:
case MLX5HWS_ACTION_TYP_MISS:
/* Hit action */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h
index e8f562c31826..64b76075f7f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h
@@ -70,12 +70,12 @@ struct mlx5hws_action_default_stc {
struct mlx5hws_pool_chunk nop_dw6;
struct mlx5hws_pool_chunk nop_dw7;
struct mlx5hws_pool_chunk default_hit;
- u32 refcount;
+ u32 refcount; /* protected by context ctrl lock */
};
struct mlx5hws_action_shared_stc {
struct mlx5hws_pool_chunk stc_chunk;
- u32 refcount;
+ u32 refcount; /* protected by context ctrl lock */
};
struct mlx5hws_actions_apply_data {
@@ -124,7 +124,7 @@ struct mlx5hws_action {
struct mlx5hws_context *ctx;
union {
struct {
- struct mlx5hws_pool_chunk stc[MLX5HWS_TABLE_TYPE_MAX];
+ struct mlx5hws_pool_chunk stc;
union {
struct {
u32 pat_id;
@@ -166,6 +166,9 @@ struct mlx5hws_action {
struct mlx5hws_cmd_set_fte_dest *dest_list;
} dest_array;
struct {
+ struct mlx5hws_cmd_forward_tbl *fw_island;
+ } flow_sampler;
+ struct {
u8 type;
u8 start_anchor;
u8 end_anchor;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
index baacf662c0ab..a8d886e92144 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c
@@ -152,6 +152,8 @@ mlx5hws_bwc_matcher_create(struct mlx5hws_table *table,
if (!bwc_matcher)
return NULL;
+ atomic_set(&bwc_matcher->num_of_rules, 0);
+
/* Check if the required match params can be all matched
* in single STE, otherwise complex matcher is needed.
*/
@@ -199,10 +201,12 @@ int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher)
int mlx5hws_bwc_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher)
{
- if (bwc_matcher->num_of_rules)
+ u32 num_of_rules = atomic_read(&bwc_matcher->num_of_rules);
+
+ if (num_of_rules)
mlx5hws_err(bwc_matcher->matcher->tbl->ctx,
"BWC matcher destroy: matcher still has %d rules\n",
- bwc_matcher->num_of_rules);
+ num_of_rules);
mlx5hws_bwc_matcher_destroy_simple(bwc_matcher);
@@ -215,6 +219,8 @@ static int hws_bwc_queue_poll(struct mlx5hws_context *ctx,
u32 *pending_rules,
bool drain)
{
+ unsigned long timeout = jiffies +
+ msecs_to_jiffies(MLX5HWS_BWC_POLLING_TIMEOUT * MSEC_PER_SEC);
struct mlx5hws_flow_op_result comp[MLX5HWS_BWC_MATCHER_REHASH_BURST_TH];
u16 burst_th = hws_bwc_get_burst_th(ctx, queue_id);
bool got_comp = *pending_rules >= burst_th;
@@ -250,6 +256,11 @@ static int hws_bwc_queue_poll(struct mlx5hws_context *ctx,
}
got_comp = !!ret;
+
+ if (unlikely(!got_comp && time_after(jiffies, timeout))) {
+ mlx5hws_err(ctx, "BWC poll error: polling queue %d - TIMEOUT\n", queue_id);
+ return -ETIMEDOUT;
+ }
}
return err;
@@ -309,7 +320,7 @@ static void hws_bwc_rule_list_add(struct mlx5hws_bwc_rule *bwc_rule, u16 idx)
{
struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
- bwc_matcher->num_of_rules++;
+ atomic_inc(&bwc_matcher->num_of_rules);
bwc_rule->bwc_queue_idx = idx;
list_add(&bwc_rule->list_node, &bwc_matcher->rules[idx]);
}
@@ -318,7 +329,7 @@ static void hws_bwc_rule_list_remove(struct mlx5hws_bwc_rule *bwc_rule)
{
struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher;
- bwc_matcher->num_of_rules--;
+ atomic_dec(&bwc_matcher->num_of_rules);
list_del_init(&bwc_rule->list_node);
}
@@ -334,22 +345,21 @@ hws_bwc_rule_destroy_hws_sync(struct mlx5hws_bwc_rule *bwc_rule,
struct mlx5hws_rule_attr *rule_attr)
{
struct mlx5hws_context *ctx = bwc_rule->bwc_matcher->matcher->tbl->ctx;
- struct mlx5hws_flow_op_result completion;
+ u32 expected_completions = 1;
int ret;
ret = hws_bwc_rule_destroy_hws_async(bwc_rule, rule_attr);
if (unlikely(ret))
return ret;
- do {
- ret = mlx5hws_send_queue_poll(ctx, rule_attr->queue_id, &completion, 1);
- } while (ret != 1);
-
- if (unlikely(completion.status != MLX5HWS_FLOW_OP_SUCCESS ||
- (bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETED &&
- bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETING))) {
- mlx5hws_err(ctx, "Failed destroying BWC rule: completion %d, rule status %d\n",
- completion.status, bwc_rule->rule->status);
+ ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true);
+ if (unlikely(ret))
+ return ret;
+
+ if (unlikely(bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETED &&
+ bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETING)) {
+ mlx5hws_err(ctx, "Failed destroying BWC rule: rule status %d\n",
+ bwc_rule->rule->status);
return -EINVAL;
}
@@ -615,8 +625,12 @@ static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_match
ret = hws_bwc_queue_poll(ctx, rule_attr.queue_id,
&pending_rules[i], false);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ mlx5hws_err(ctx,
+ "Moving BWC rule failed during rehash (%d)\n",
+ ret);
goto free_bwc_rules;
+ }
}
}
} while (!all_done);
@@ -629,8 +643,11 @@ static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_match
mlx5hws_send_engine_flush_queue(&ctx->send_queue[queue_id]);
ret = hws_bwc_queue_poll(ctx, queue_id,
&pending_rules[i], true);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ mlx5hws_err(ctx,
+ "Moving BWC rule failed during rehash (%d)\n", ret);
goto free_bwc_rules;
+ }
}
}
@@ -704,7 +721,8 @@ hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher)
* Need to check again if we really need rehash.
* If the reason for rehash was size, but not any more - skip rehash.
*/
- if (!hws_bwc_matcher_rehash_size_needed(bwc_matcher, bwc_matcher->num_of_rules))
+ if (!hws_bwc_matcher_rehash_size_needed(bwc_matcher,
+ atomic_read(&bwc_matcher->num_of_rules)))
return 0;
/* Now we're done all the checking - do the rehash:
@@ -797,7 +815,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule,
}
/* check if number of rules require rehash */
- num_of_rules = bwc_matcher->num_of_rules;
+ num_of_rules = atomic_read(&bwc_matcher->num_of_rules);
if (unlikely(hws_bwc_matcher_rehash_size_needed(bwc_matcher, num_of_rules))) {
mutex_unlock(queue_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
index 3d4965213b01..f9f569131dde 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h
@@ -8,10 +8,18 @@
#define MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP 1
#define MLX5HWS_BWC_MATCHER_REHASH_PERCENT_TH 70
#define MLX5HWS_BWC_MATCHER_REHASH_BURST_TH 32
-#define MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM 255
+
+/* Max number of AT attach operations for the same matcher.
+ * When the limit is reached, next attempt to attach new AT
+ * will result in creation of a new matcher and moving all
+ * the rules to this matcher.
+ */
+#define MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM 8
#define MLX5HWS_BWC_MAX_ACTS 16
+#define MLX5HWS_BWC_POLLING_TIMEOUT 60
+
struct mlx5hws_bwc_matcher {
struct mlx5hws_matcher *matcher;
struct mlx5hws_match_template *mt;
@@ -19,7 +27,7 @@ struct mlx5hws_bwc_matcher {
u8 num_of_at;
u16 priority;
u8 size_log;
- u32 num_of_rules; /* atomically accessed */
+ atomic_t num_of_rules;
struct list_head *rules;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c
index c00c138c3366..487e75476b0a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c
@@ -257,6 +257,12 @@ int mlx5hws_cmd_set_fte(struct mlx5_core_dev *mdev,
dest->ext_reformat_id);
}
break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER:
+ MLX5_SET(dest_format, in_dests,
+ destination_type, ifc_dest_type);
+ MLX5_SET(dest_format, in_dests, destination_id,
+ dest->destination_id);
+ break;
default:
ret = -EOPNOTSUPP;
goto out;
@@ -359,7 +365,7 @@ void mlx5hws_cmd_set_attr_connect_miss_tbl(struct mlx5hws_context *ctx,
ft_attr->type = fw_ft_type;
ft_attr->table_miss_action = MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION_GOTO_TBL;
- default_miss_tbl = ctx->common_res[type].default_miss->ft_id;
+ default_miss_tbl = ctx->common_res.default_miss->ft_id;
if (!default_miss_tbl) {
pr_warn("HWS: no flow table ID for default miss\n");
return;
@@ -622,12 +628,12 @@ int mlx5hws_cmd_arg_create(struct mlx5_core_dev *mdev,
u32 pd,
u32 *arg_id)
{
+ u32 in[MLX5_ST_SZ_DW(create_modify_header_arg_in)] = {0};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
- u32 in[MLX5_ST_SZ_DW(create_arg_in)] = {0};
void *attr;
int ret;
- attr = MLX5_ADDR_OF(create_arg_in, in, hdr);
+ attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, hdr);
MLX5_SET(general_obj_in_cmd_hdr,
attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr,
@@ -635,8 +641,8 @@ int mlx5hws_cmd_arg_create(struct mlx5_core_dev *mdev,
MLX5_SET(general_obj_in_cmd_hdr,
attr, op_param.create.log_obj_range, log_obj_range);
- attr = MLX5_ADDR_OF(create_arg_in, in, arg);
- MLX5_SET(arg, attr, access_pd, pd);
+ attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, arg);
+ MLX5_SET(modify_header_arg, attr, access_pd, pd);
ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
if (ret) {
@@ -812,7 +818,7 @@ int mlx5hws_cmd_packet_reformat_create(struct mlx5_core_dev *mdev,
struct mlx5hws_cmd_packet_reformat_create_attr *attr,
u32 *reformat_id)
{
- u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_out)] = {0};
+ u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {0};
size_t insz, cmd_data_sz, cmd_total_sz;
void *prctx;
void *pdata;
@@ -845,7 +851,7 @@ int mlx5hws_cmd_packet_reformat_create(struct mlx5_core_dev *mdev,
goto out;
}
- *reformat_id = MLX5_GET(alloc_packet_reformat_out, out, packet_reformat_id);
+ *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id);
out:
kfree(in);
return ret;
@@ -854,13 +860,13 @@ out:
int mlx5hws_cmd_packet_reformat_destroy(struct mlx5_core_dev *mdev,
u32 reformat_id)
{
- u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {0};
int ret;
- MLX5_SET(dealloc_packet_reformat_in, in, opcode,
+ MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
- MLX5_SET(dealloc_packet_reformat_in, in,
+ MLX5_SET(dealloc_packet_reformat_context_in, in,
packet_reformat_id, reformat_id);
ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
@@ -889,73 +895,6 @@ int mlx5hws_cmd_sq_modify_rdy(struct mlx5_core_dev *mdev, u32 sqn)
return ret;
}
-int mlx5hws_cmd_allow_other_vhca_access(struct mlx5_core_dev *mdev,
- struct mlx5hws_cmd_allow_other_vhca_access_attr *attr)
-{
- u32 out[MLX5_ST_SZ_DW(allow_other_vhca_access_out)] = {0};
- u32 in[MLX5_ST_SZ_DW(allow_other_vhca_access_in)] = {0};
- void *key;
- int ret;
-
- MLX5_SET(allow_other_vhca_access_in,
- in, opcode, MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS);
- MLX5_SET(allow_other_vhca_access_in,
- in, object_type_to_be_accessed, attr->obj_type);
- MLX5_SET(allow_other_vhca_access_in,
- in, object_id_to_be_accessed, attr->obj_id);
-
- key = MLX5_ADDR_OF(allow_other_vhca_access_in, in, access_key);
- memcpy(key, attr->access_key, sizeof(attr->access_key));
-
- ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
- if (ret)
- mlx5_core_err(mdev, "Failed to execute ALLOW_OTHER_VHCA_ACCESS command\n");
-
- return ret;
-}
-
-int mlx5hws_cmd_alias_obj_create(struct mlx5_core_dev *mdev,
- struct mlx5hws_cmd_alias_obj_create_attr *alias_attr,
- u32 *obj_id)
-{
- u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
- u32 in[MLX5_ST_SZ_DW(create_alias_obj_in)] = {0};
- void *attr;
- void *key;
- int ret;
-
- attr = MLX5_ADDR_OF(create_alias_obj_in, in, hdr);
- MLX5_SET(general_obj_in_cmd_hdr,
- attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
- MLX5_SET(general_obj_in_cmd_hdr,
- attr, obj_type, alias_attr->obj_type);
- MLX5_SET(general_obj_in_cmd_hdr, attr, op_param.create.alias_object, 1);
-
- attr = MLX5_ADDR_OF(create_alias_obj_in, in, alias_ctx);
- MLX5_SET(alias_context, attr, vhca_id_to_be_accessed, alias_attr->vhca_id);
- MLX5_SET(alias_context, attr, object_id_to_be_accessed, alias_attr->obj_id);
-
- key = MLX5_ADDR_OF(alias_context, attr, access_key);
- memcpy(key, alias_attr->access_key, sizeof(alias_attr->access_key));
-
- ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
- if (ret) {
- mlx5_core_err(mdev, "Failed to create ALIAS OBJ\n");
- goto out;
- }
-
- *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
-out:
- return ret;
-}
-
-int mlx5hws_cmd_alias_obj_destroy(struct mlx5_core_dev *mdev,
- u16 obj_type,
- u32 obj_id)
-{
- return hws_cmd_general_obj_destroy(mdev, obj_type, obj_id);
-}
-
int mlx5hws_cmd_generate_wqe(struct mlx5_core_dev *mdev,
struct mlx5hws_cmd_generate_wqe_attr *attr,
struct mlx5_cqe64 *ret_cqe)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h
index 434f62b0904e..610c63d81ad9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h
@@ -63,7 +63,7 @@ struct mlx5hws_cmd_forward_tbl {
u8 type;
u32 ft_id;
u32 fg_id;
- u32 refcount;
+ u32 refcount; /* protected by context ctrl lock */
};
struct mlx5hws_cmd_rtc_create_attr {
@@ -334,14 +334,6 @@ mlx5hws_cmd_forward_tbl_create(struct mlx5_core_dev *mdev,
void mlx5hws_cmd_forward_tbl_destroy(struct mlx5_core_dev *mdev,
struct mlx5hws_cmd_forward_tbl *tbl);
-int mlx5hws_cmd_alias_obj_create(struct mlx5_core_dev *mdev,
- struct mlx5hws_cmd_alias_obj_create_attr *alias_attr,
- u32 *obj_id);
-
-int mlx5hws_cmd_alias_obj_destroy(struct mlx5_core_dev *mdev,
- u16 obj_type,
- u32 obj_id);
-
int mlx5hws_cmd_sq_modify_rdy(struct mlx5_core_dev *mdev, u32 sqn);
int mlx5hws_cmd_query_caps(struct mlx5_core_dev *mdev,
@@ -352,9 +344,6 @@ void mlx5hws_cmd_set_attr_connect_miss_tbl(struct mlx5hws_context *ctx,
enum mlx5hws_table_type type,
struct mlx5hws_cmd_ft_modify_attr *ft_attr);
-int mlx5hws_cmd_allow_other_vhca_access(struct mlx5_core_dev *mdev,
- struct mlx5hws_cmd_allow_other_vhca_access_attr *attr);
-
int mlx5hws_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_function,
u16 vport_number, u16 *gvmi);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c
index 4a8928f33bb9..9cda2774fd64 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c
@@ -23,7 +23,6 @@ static int hws_context_pools_init(struct mlx5hws_context *ctx)
struct mlx5hws_pool_attr pool_attr = {0};
u8 max_log_sz;
int ret;
- int i;
ret = mlx5hws_pat_init_pattern_cache(&ctx->pattern_cache);
if (ret)
@@ -39,23 +38,17 @@ static int hws_context_pools_init(struct mlx5hws_context *ctx)
max_log_sz = min(MLX5HWS_POOL_STC_LOG_SZ, ctx->caps->stc_alloc_log_max);
pool_attr.alloc_log_sz = max(max_log_sz, ctx->caps->stc_alloc_log_gran);
- for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) {
- pool_attr.table_type = i;
- ctx->stc_pool[i] = mlx5hws_pool_create(ctx, &pool_attr);
- if (!ctx->stc_pool[i]) {
- mlx5hws_err(ctx, "Failed to allocate STC pool [%d]", i);
- ret = -ENOMEM;
- goto free_stc_pools;
- }
+ pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB;
+ ctx->stc_pool = mlx5hws_pool_create(ctx, &pool_attr);
+ if (!ctx->stc_pool) {
+ mlx5hws_err(ctx, "Failed to allocate STC pool\n");
+ ret = -ENOMEM;
+ goto uninit_cache;
}
return 0;
-free_stc_pools:
- for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++)
- if (ctx->stc_pool[i])
- mlx5hws_pool_destroy(ctx->stc_pool[i]);
-
+uninit_cache:
mlx5hws_definer_uninit_cache(ctx->definer_cache);
uninit_pat_cache:
mlx5hws_pat_uninit_pattern_cache(ctx->pattern_cache);
@@ -64,12 +57,8 @@ uninit_pat_cache:
static void hws_context_pools_uninit(struct mlx5hws_context *ctx)
{
- int i;
-
- for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) {
- if (ctx->stc_pool[i])
- mlx5hws_pool_destroy(ctx->stc_pool[i]);
- }
+ if (ctx->stc_pool)
+ mlx5hws_pool_destroy(ctx->stc_pool);
mlx5hws_definer_uninit_cache(ctx->definer_cache);
mlx5hws_pat_uninit_pattern_cache(ctx->pattern_cache);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h
index 1c9cc4fba083..38c3647444ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h
@@ -38,8 +38,8 @@ struct mlx5hws_context {
struct mlx5_core_dev *mdev;
struct mlx5hws_cmd_query_caps *caps;
u32 pd_num;
- struct mlx5hws_pool *stc_pool[MLX5HWS_TABLE_TYPE_MAX];
- struct mlx5hws_context_common_res common_res[MLX5HWS_TABLE_TYPE_MAX];
+ struct mlx5hws_pool *stc_pool;
+ struct mlx5hws_context_common_res common_res;
struct mlx5hws_pattern_cache *pattern_cache;
struct mlx5hws_definer_cache *definer_cache;
struct mutex ctrl_lock; /* control lock to protect the whole context */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
index 5b200b4bc1a8..696275fd0ce2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c
@@ -148,8 +148,8 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma
matcher->match_ste.rtc_1_id,
(int)ste_1_id);
- ste = &matcher->action_ste[0].ste;
- ste_pool = matcher->action_ste[0].pool;
+ ste = &matcher->action_ste.ste;
+ ste_pool = matcher->action_ste.pool;
if (ste_pool) {
ste_0_id = mlx5hws_pool_chunk_get_base_id(ste_pool, ste);
if (tbl_type == MLX5HWS_TABLE_TYPE_FDB)
@@ -171,10 +171,8 @@ static int hws_debug_dump_matcher(struct seq_file *f, struct mlx5hws_matcher *ma
return ret;
seq_printf(f, ",%d,%d,%d,%d,%d,0x%llx,0x%llx\n",
- matcher->action_ste[0].rtc_0_id,
- (int)ste_0_id,
- matcher->action_ste[0].rtc_1_id,
- (int)ste_1_id,
+ matcher->action_ste.rtc_0_id, (int)ste_0_id,
+ matcher->action_ste.rtc_1_id, (int)ste_1_id,
0,
mlx5hws_debug_icm_to_idx(icm_addr_0),
mlx5hws_debug_icm_to_idx(icm_addr_1));
@@ -368,9 +366,10 @@ static int hws_debug_dump_context_info(struct seq_file *f, struct mlx5hws_contex
static int hws_debug_dump_context_stc_resource(struct seq_file *f,
struct mlx5hws_context *ctx,
- u32 tbl_type,
struct mlx5hws_pool_resource *resource)
{
+ u32 tbl_type = MLX5HWS_TABLE_TYPE_BASE + MLX5HWS_TABLE_TYPE_FDB;
+
seq_printf(f, "%d,0x%llx,%u,%u\n",
MLX5HWS_DEBUG_RES_TYPE_CONTEXT_STC,
HWS_PTR_TO_ID(ctx),
@@ -382,31 +381,22 @@ static int hws_debug_dump_context_stc_resource(struct seq_file *f,
static int hws_debug_dump_context_stc(struct seq_file *f, struct mlx5hws_context *ctx)
{
- struct mlx5hws_pool *stc_pool;
- u32 table_type;
+ struct mlx5hws_pool *stc_pool = ctx->stc_pool;
int ret;
- int i;
- for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) {
- stc_pool = ctx->stc_pool[i];
- table_type = MLX5HWS_TABLE_TYPE_BASE + i;
-
- if (!stc_pool)
- continue;
+ if (!stc_pool)
+ return 0;
- if (stc_pool->resource[0]) {
- ret = hws_debug_dump_context_stc_resource(f, ctx, table_type,
- stc_pool->resource[0]);
- if (ret)
- return ret;
- }
+ if (stc_pool->resource[0]) {
+ ret = hws_debug_dump_context_stc_resource(f, ctx, stc_pool->resource[0]);
+ if (ret)
+ return ret;
+ }
- if (i == MLX5HWS_TABLE_TYPE_FDB && stc_pool->mirror_resource[0]) {
- ret = hws_debug_dump_context_stc_resource(f, ctx, table_type,
- stc_pool->mirror_resource[0]);
- if (ret)
- return ret;
- }
+ if (stc_pool->mirror_resource[0]) {
+ ret = hws_debug_dump_context_stc_resource(f, ctx, stc_pool->mirror_resource[0]);
+ if (ret)
+ return ret;
}
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
index 8fe96eb76baf..10ece7df1cfa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c
@@ -70,7 +70,7 @@
u32 second_dw_mask = (mask) & ((1 << _bit_off) - 1); \
_HWS_SET32(p, (v) >> _bit_off, byte_off, 0, (mask) >> _bit_off); \
_HWS_SET32(p, (v) & second_dw_mask, (byte_off) + DW_SIZE, \
- (bit_off) % BITS_IN_DW, second_dw_mask); \
+ (bit_off + BITS_IN_DW) % BITS_IN_DW, second_dw_mask); \
} else { \
_HWS_SET32(p, v, byte_off, (bit_off), (mask)); \
} \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h
index 9432d5084def..5c1a2086efba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h
@@ -785,7 +785,7 @@ struct mlx5hws_definer_cache {
struct mlx5hws_definer_cache_item {
struct mlx5hws_definer definer;
- u32 refcount;
+ u32 refcount; /* protected by context ctrl lock */
struct list_head list_node;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
new file mode 100644
index 000000000000..05329afeb9ea
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c
@@ -0,0 +1,1377 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */
+
+#include <linux/mlx5/vport.h>
+#include <mlx5_core.h>
+#include <fs_core.h>
+#include <fs_cmd.h>
+#include "fs_hws_pools.h"
+#include "mlx5hws.h"
+
+#define MLX5HWS_CTX_MAX_NUM_OF_QUEUES 16
+#define MLX5HWS_CTX_QUEUE_SIZE 256
+
+static struct mlx5hws_action *
+mlx5_fs_create_action_remove_header_vlan(struct mlx5hws_context *ctx);
+static void
+mlx5_fs_destroy_pr_pool(struct mlx5_fs_pool *pool, struct xarray *pr_pools,
+ unsigned long index);
+static void
+mlx5_fs_destroy_mh_pool(struct mlx5_fs_pool *pool, struct xarray *mh_pools,
+ unsigned long index);
+
+static int mlx5_fs_init_hws_actions_pool(struct mlx5_core_dev *dev,
+ struct mlx5_fs_hws_context *fs_ctx)
+{
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED;
+ struct mlx5_fs_hws_actions_pool *hws_pool = &fs_ctx->hws_pool;
+ struct mlx5hws_action_reformat_header reformat_hdr = {};
+ struct mlx5hws_context *ctx = fs_ctx->hws_ctx;
+ enum mlx5hws_action_type action_type;
+ int err = -ENOSPC;
+
+ hws_pool->tag_action = mlx5hws_action_create_tag(ctx, flags);
+ if (!hws_pool->tag_action)
+ return err;
+ hws_pool->pop_vlan_action = mlx5hws_action_create_pop_vlan(ctx, flags);
+ if (!hws_pool->pop_vlan_action)
+ goto destroy_tag;
+ hws_pool->push_vlan_action = mlx5hws_action_create_push_vlan(ctx, flags);
+ if (!hws_pool->push_vlan_action)
+ goto destroy_pop_vlan;
+ hws_pool->drop_action = mlx5hws_action_create_dest_drop(ctx, flags);
+ if (!hws_pool->drop_action)
+ goto destroy_push_vlan;
+ action_type = MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2;
+ hws_pool->decapl2_action =
+ mlx5hws_action_create_reformat(ctx, action_type, 1,
+ &reformat_hdr, 0, flags);
+ if (!hws_pool->decapl2_action)
+ goto destroy_drop;
+ hws_pool->remove_hdr_vlan_action =
+ mlx5_fs_create_action_remove_header_vlan(ctx);
+ if (!hws_pool->remove_hdr_vlan_action)
+ goto destroy_decapl2;
+ err = mlx5_fs_hws_pr_pool_init(&hws_pool->insert_hdr_pool, dev, 0,
+ MLX5HWS_ACTION_TYP_INSERT_HEADER);
+ if (err)
+ goto destroy_remove_hdr;
+ err = mlx5_fs_hws_pr_pool_init(&hws_pool->dl3tnltol2_pool, dev, 0,
+ MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2);
+ if (err)
+ goto cleanup_insert_hdr;
+ xa_init(&hws_pool->el2tol3tnl_pools);
+ xa_init(&hws_pool->el2tol2tnl_pools);
+ xa_init(&hws_pool->mh_pools);
+ xa_init(&hws_pool->table_dests);
+ xa_init(&hws_pool->vport_dests);
+ xa_init(&hws_pool->vport_vhca_dests);
+ return 0;
+
+cleanup_insert_hdr:
+ mlx5_fs_hws_pr_pool_cleanup(&hws_pool->insert_hdr_pool);
+destroy_remove_hdr:
+ mlx5hws_action_destroy(hws_pool->remove_hdr_vlan_action);
+destroy_decapl2:
+ mlx5hws_action_destroy(hws_pool->decapl2_action);
+destroy_drop:
+ mlx5hws_action_destroy(hws_pool->drop_action);
+destroy_push_vlan:
+ mlx5hws_action_destroy(hws_pool->push_vlan_action);
+destroy_pop_vlan:
+ mlx5hws_action_destroy(hws_pool->pop_vlan_action);
+destroy_tag:
+ mlx5hws_action_destroy(hws_pool->tag_action);
+ return err;
+}
+
+static void mlx5_fs_cleanup_hws_actions_pool(struct mlx5_fs_hws_context *fs_ctx)
+{
+ struct mlx5_fs_hws_actions_pool *hws_pool = &fs_ctx->hws_pool;
+ struct mlx5hws_action *action;
+ struct mlx5_fs_pool *pool;
+ unsigned long i;
+
+ xa_for_each(&hws_pool->vport_vhca_dests, i, action)
+ mlx5hws_action_destroy(action);
+ xa_destroy(&hws_pool->vport_vhca_dests);
+ xa_for_each(&hws_pool->vport_dests, i, action)
+ mlx5hws_action_destroy(action);
+ xa_destroy(&hws_pool->vport_dests);
+ xa_destroy(&hws_pool->table_dests);
+ xa_for_each(&hws_pool->mh_pools, i, pool)
+ mlx5_fs_destroy_mh_pool(pool, &hws_pool->mh_pools, i);
+ xa_destroy(&hws_pool->mh_pools);
+ xa_for_each(&hws_pool->el2tol2tnl_pools, i, pool)
+ mlx5_fs_destroy_pr_pool(pool, &hws_pool->el2tol2tnl_pools, i);
+ xa_destroy(&hws_pool->el2tol2tnl_pools);
+ xa_for_each(&hws_pool->el2tol3tnl_pools, i, pool)
+ mlx5_fs_destroy_pr_pool(pool, &hws_pool->el2tol3tnl_pools, i);
+ xa_destroy(&hws_pool->el2tol3tnl_pools);
+ mlx5_fs_hws_pr_pool_cleanup(&hws_pool->dl3tnltol2_pool);
+ mlx5_fs_hws_pr_pool_cleanup(&hws_pool->insert_hdr_pool);
+ mlx5hws_action_destroy(hws_pool->remove_hdr_vlan_action);
+ mlx5hws_action_destroy(hws_pool->decapl2_action);
+ mlx5hws_action_destroy(hws_pool->drop_action);
+ mlx5hws_action_destroy(hws_pool->push_vlan_action);
+ mlx5hws_action_destroy(hws_pool->pop_vlan_action);
+ mlx5hws_action_destroy(hws_pool->tag_action);
+}
+
+static int mlx5_cmd_hws_create_ns(struct mlx5_flow_root_namespace *ns)
+{
+ struct mlx5hws_context_attr hws_ctx_attr = {};
+ int err;
+
+ hws_ctx_attr.queues = min_t(int, num_online_cpus(),
+ MLX5HWS_CTX_MAX_NUM_OF_QUEUES);
+ hws_ctx_attr.queue_size = MLX5HWS_CTX_QUEUE_SIZE;
+
+ ns->fs_hws_context.hws_ctx =
+ mlx5hws_context_open(ns->dev, &hws_ctx_attr);
+ if (!ns->fs_hws_context.hws_ctx) {
+ mlx5_core_err(ns->dev, "Failed to create hws flow namespace\n");
+ return -EINVAL;
+ }
+ err = mlx5_fs_init_hws_actions_pool(ns->dev, &ns->fs_hws_context);
+ if (err) {
+ mlx5_core_err(ns->dev, "Failed to init hws actions pool\n");
+ mlx5hws_context_close(ns->fs_hws_context.hws_ctx);
+ return err;
+ }
+ return 0;
+}
+
+static int mlx5_cmd_hws_destroy_ns(struct mlx5_flow_root_namespace *ns)
+{
+ mlx5_fs_cleanup_hws_actions_pool(&ns->fs_hws_context);
+ return mlx5hws_context_close(ns->fs_hws_context.hws_ctx);
+}
+
+static int mlx5_cmd_hws_set_peer(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_root_namespace *peer_ns,
+ u16 peer_vhca_id)
+{
+ struct mlx5hws_context *peer_ctx = NULL;
+
+ if (peer_ns)
+ peer_ctx = peer_ns->fs_hws_context.hws_ctx;
+ mlx5hws_context_set_peer(ns->fs_hws_context.hws_ctx, peer_ctx,
+ peer_vhca_id);
+ return 0;
+}
+
+static int mlx5_fs_set_ft_default_miss(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ struct mlx5hws_table *next_tbl;
+ int err;
+
+ if (!ns->fs_hws_context.hws_ctx)
+ return -EINVAL;
+
+ /* if no change required, return */
+ if (!next_ft && !ft->fs_hws_table.miss_ft_set)
+ return 0;
+
+ next_tbl = next_ft ? next_ft->fs_hws_table.hws_table : NULL;
+ err = mlx5hws_table_set_default_miss(ft->fs_hws_table.hws_table, next_tbl);
+ if (err) {
+ mlx5_core_err(ns->dev, "Failed setting FT default miss (%d)\n", err);
+ return err;
+ }
+ ft->fs_hws_table.miss_ft_set = !!next_tbl;
+ return 0;
+}
+
+static int mlx5_fs_add_flow_table_dest_action(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft)
+{
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED;
+ struct mlx5_fs_hws_context *fs_ctx = &ns->fs_hws_context;
+ struct mlx5hws_action *dest_ft_action;
+ struct xarray *dests_xa;
+ int err;
+
+ dest_ft_action = mlx5hws_action_create_dest_table_num(fs_ctx->hws_ctx,
+ ft->id, flags);
+ if (!dest_ft_action) {
+ mlx5_core_err(ns->dev, "Failed creating dest table action\n");
+ return -ENOMEM;
+ }
+
+ dests_xa = &fs_ctx->hws_pool.table_dests;
+ err = xa_insert(dests_xa, ft->id, dest_ft_action, GFP_KERNEL);
+ if (err)
+ mlx5hws_action_destroy(dest_ft_action);
+ return err;
+}
+
+static int mlx5_fs_del_flow_table_dest_action(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft)
+{
+ struct mlx5_fs_hws_context *fs_ctx = &ns->fs_hws_context;
+ struct mlx5hws_action *dest_ft_action;
+ struct xarray *dests_xa;
+ int err;
+
+ dests_xa = &fs_ctx->hws_pool.table_dests;
+ dest_ft_action = xa_erase(dests_xa, ft->id);
+ if (!dest_ft_action) {
+ mlx5_core_err(ns->dev, "Failed to erase dest ft action\n");
+ return -ENOENT;
+ }
+
+ err = mlx5hws_action_destroy(dest_ft_action);
+ if (err)
+ mlx5_core_err(ns->dev, "Failed to destroy dest ft action\n");
+ return err;
+}
+
+static int mlx5_cmd_hws_create_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table_attr *ft_attr,
+ struct mlx5_flow_table *next_ft)
+{
+ struct mlx5hws_context *ctx = ns->fs_hws_context.hws_ctx;
+ struct mlx5hws_table_attr tbl_attr = {};
+ struct mlx5hws_table *tbl;
+ int err;
+
+ if (mlx5_fs_cmd_is_fw_term_table(ft)) {
+ err = mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft, ft_attr,
+ next_ft);
+ if (err)
+ return err;
+ err = mlx5_fs_add_flow_table_dest_action(ns, ft);
+ if (err)
+ mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft);
+ return err;
+ }
+
+ if (ns->table_type != FS_FT_FDB) {
+ mlx5_core_err(ns->dev, "Table type %d not supported for HWS\n",
+ ns->table_type);
+ return -EOPNOTSUPP;
+ }
+
+ tbl_attr.type = MLX5HWS_TABLE_TYPE_FDB;
+ tbl_attr.level = ft_attr->level;
+ tbl = mlx5hws_table_create(ctx, &tbl_attr);
+ if (!tbl) {
+ mlx5_core_err(ns->dev, "Failed creating hws flow_table\n");
+ return -EINVAL;
+ }
+
+ ft->fs_hws_table.hws_table = tbl;
+ ft->id = mlx5hws_table_get_id(tbl);
+
+ if (next_ft) {
+ err = mlx5_fs_set_ft_default_miss(ns, ft, next_ft);
+ if (err)
+ goto destroy_table;
+ }
+
+ ft->max_fte = INT_MAX;
+
+ err = mlx5_fs_add_flow_table_dest_action(ns, ft);
+ if (err)
+ goto clear_ft_miss;
+ return 0;
+
+clear_ft_miss:
+ mlx5_fs_set_ft_default_miss(ns, ft, NULL);
+destroy_table:
+ mlx5hws_table_destroy(tbl);
+ ft->fs_hws_table.hws_table = NULL;
+ return err;
+}
+
+static int mlx5_cmd_hws_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft)
+{
+ int err;
+
+ err = mlx5_fs_del_flow_table_dest_action(ns, ft);
+ if (err)
+ mlx5_core_err(ns->dev, "Failed to remove dest action (%d)\n", err);
+
+ if (mlx5_fs_cmd_is_fw_term_table(ft))
+ return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft);
+
+ err = mlx5_fs_set_ft_default_miss(ns, ft, NULL);
+ if (err)
+ mlx5_core_err(ns->dev, "Failed to disconnect next table (%d)\n", err);
+
+ err = mlx5hws_table_destroy(ft->fs_hws_table.hws_table);
+ if (err)
+ mlx5_core_err(ns->dev, "Failed to destroy flow_table (%d)\n", err);
+
+ return err;
+}
+
+static int mlx5_cmd_hws_modify_flow_table(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_table *next_ft)
+{
+ if (mlx5_fs_cmd_is_fw_term_table(ft))
+ return mlx5_fs_cmd_get_fw_cmds()->modify_flow_table(ns, ft, next_ft);
+
+ return mlx5_fs_set_ft_default_miss(ns, ft, next_ft);
+}
+
+static int mlx5_cmd_hws_update_root_ft(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ u32 underlay_qpn,
+ bool disconnect)
+{
+ return mlx5_fs_cmd_get_fw_cmds()->update_root_ft(ns, ft, underlay_qpn,
+ disconnect);
+}
+
+static int mlx5_cmd_hws_create_flow_group(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft, u32 *in,
+ struct mlx5_flow_group *fg)
+{
+ struct mlx5hws_match_parameters mask;
+ struct mlx5hws_bwc_matcher *matcher;
+ u8 match_criteria_enable;
+ u32 priority;
+
+ if (mlx5_fs_cmd_is_fw_term_table(ft))
+ return mlx5_fs_cmd_get_fw_cmds()->create_flow_group(ns, ft, in, fg);
+
+ mask.match_buf = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+ mask.match_sz = sizeof(fg->mask.match_criteria);
+
+ match_criteria_enable = MLX5_GET(create_flow_group_in, in,
+ match_criteria_enable);
+ priority = MLX5_GET(create_flow_group_in, in, start_flow_index);
+ matcher = mlx5hws_bwc_matcher_create(ft->fs_hws_table.hws_table,
+ priority, match_criteria_enable,
+ &mask);
+ if (!matcher) {
+ mlx5_core_err(ns->dev, "Failed creating matcher\n");
+ return -EINVAL;
+ }
+
+ fg->fs_hws_matcher.matcher = matcher;
+ return 0;
+}
+
+static int mlx5_cmd_hws_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg)
+{
+ if (mlx5_fs_cmd_is_fw_term_table(ft))
+ return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_group(ns, ft, fg);
+
+ return mlx5hws_bwc_matcher_destroy(fg->fs_hws_matcher.matcher);
+}
+
+static struct mlx5hws_action *
+mlx5_fs_get_dest_action_ft(struct mlx5_fs_hws_context *fs_ctx,
+ struct mlx5_flow_rule *dst)
+{
+ return xa_load(&fs_ctx->hws_pool.table_dests, dst->dest_attr.ft->id);
+}
+
+static struct mlx5hws_action *
+mlx5_fs_get_dest_action_table_num(struct mlx5_fs_hws_context *fs_ctx,
+ struct mlx5_flow_rule *dst)
+{
+ u32 table_num = dst->dest_attr.ft_num;
+
+ return xa_load(&fs_ctx->hws_pool.table_dests, table_num);
+}
+
+static struct mlx5hws_action *
+mlx5_fs_create_dest_action_table_num(struct mlx5_fs_hws_context *fs_ctx,
+ struct mlx5_flow_rule *dst)
+{
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED;
+ struct mlx5hws_context *ctx = fs_ctx->hws_ctx;
+ u32 table_num = dst->dest_attr.ft_num;
+
+ return mlx5hws_action_create_dest_table_num(ctx, table_num, flags);
+}
+
+static struct mlx5hws_action *
+mlx5_fs_get_dest_action_vport(struct mlx5_fs_hws_context *fs_ctx,
+ struct mlx5_flow_rule *dst,
+ bool is_dest_type_uplink)
+{
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED;
+ struct mlx5_flow_destination *dest_attr = &dst->dest_attr;
+ struct mlx5hws_context *ctx = fs_ctx->hws_ctx;
+ struct mlx5hws_action *dest;
+ struct xarray *dests_xa;
+ bool vhca_id_valid;
+ unsigned long idx;
+ u16 vport_num;
+ int err;
+
+ vhca_id_valid = is_dest_type_uplink ||
+ (dest_attr->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID);
+ vport_num = is_dest_type_uplink ? MLX5_VPORT_UPLINK : dest_attr->vport.num;
+ if (vhca_id_valid) {
+ dests_xa = &fs_ctx->hws_pool.vport_vhca_dests;
+ idx = dest_attr->vport.vhca_id << 16 | vport_num;
+ } else {
+ dests_xa = &fs_ctx->hws_pool.vport_dests;
+ idx = vport_num;
+ }
+dest_load:
+ dest = xa_load(dests_xa, idx);
+ if (dest)
+ return dest;
+
+ dest = mlx5hws_action_create_dest_vport(ctx, vport_num, vhca_id_valid,
+ dest_attr->vport.vhca_id, flags);
+
+ err = xa_insert(dests_xa, idx, dest, GFP_KERNEL);
+ if (err) {
+ mlx5hws_action_destroy(dest);
+ dest = NULL;
+
+ if (err == -EBUSY)
+ /* xarray entry was already stored by another thread */
+ goto dest_load;
+ }
+
+ return dest;
+}
+
+static struct mlx5hws_action *
+mlx5_fs_create_dest_action_range(struct mlx5hws_context *ctx,
+ struct mlx5_flow_rule *dst)
+{
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED;
+ struct mlx5_flow_destination *dest_attr = &dst->dest_attr;
+
+ return mlx5hws_action_create_dest_match_range(ctx,
+ dest_attr->range.field,
+ dest_attr->range.hit_ft,
+ dest_attr->range.miss_ft,
+ dest_attr->range.min,
+ dest_attr->range.max,
+ flags);
+}
+
+static struct mlx5hws_action *
+mlx5_fs_create_action_dest_array(struct mlx5hws_context *ctx,
+ struct mlx5hws_action_dest_attr *dests,
+ u32 num_of_dests, bool ignore_flow_level,
+ u32 flow_source)
+{
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED;
+
+ return mlx5hws_action_create_dest_array(ctx, num_of_dests, dests,
+ ignore_flow_level,
+ flow_source, flags);
+}
+
+static struct mlx5hws_action *
+mlx5_fs_get_action_push_vlan(struct mlx5_fs_hws_context *fs_ctx)
+{
+ return fs_ctx->hws_pool.push_vlan_action;
+}
+
+static u32 mlx5_fs_calc_vlan_hdr(struct mlx5_fs_vlan *vlan)
+{
+ u16 n_ethtype = vlan->ethtype;
+ u8 prio = vlan->prio;
+ u16 vid = vlan->vid;
+
+ return (u32)n_ethtype << 16 | (u32)(prio) << 12 | (u32)vid;
+}
+
+static struct mlx5hws_action *
+mlx5_fs_get_action_pop_vlan(struct mlx5_fs_hws_context *fs_ctx)
+{
+ return fs_ctx->hws_pool.pop_vlan_action;
+}
+
+static struct mlx5hws_action *
+mlx5_fs_get_action_decap_tnl_l2_to_l2(struct mlx5_fs_hws_context *fs_ctx)
+{
+ return fs_ctx->hws_pool.decapl2_action;
+}
+
+static struct mlx5hws_action *
+mlx5_fs_get_dest_action_drop(struct mlx5_fs_hws_context *fs_ctx)
+{
+ return fs_ctx->hws_pool.drop_action;
+}
+
+static struct mlx5hws_action *
+mlx5_fs_get_action_tag(struct mlx5_fs_hws_context *fs_ctx)
+{
+ return fs_ctx->hws_pool.tag_action;
+}
+
+static struct mlx5hws_action *
+mlx5_fs_create_action_last(struct mlx5hws_context *ctx)
+{
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED;
+
+ return mlx5hws_action_create_last(ctx, flags);
+}
+
+static void mlx5_fs_destroy_fs_action(struct mlx5_fs_hws_rule_action *fs_action)
+{
+ switch (mlx5hws_action_get_type(fs_action->action)) {
+ case MLX5HWS_ACTION_TYP_CTR:
+ mlx5_fc_put_hws_action(fs_action->counter);
+ break;
+ default:
+ mlx5hws_action_destroy(fs_action->action);
+ }
+}
+
+static void
+mlx5_fs_destroy_fs_actions(struct mlx5_fs_hws_rule_action **fs_actions,
+ int *num_fs_actions)
+{
+ int i;
+
+ /* Free in reverse order to handle action dependencies */
+ for (i = *num_fs_actions - 1; i >= 0; i--)
+ mlx5_fs_destroy_fs_action(*fs_actions + i);
+ *num_fs_actions = 0;
+ kfree(*fs_actions);
+ *fs_actions = NULL;
+}
+
+/* Splits FTE's actions into cached, rule and destination actions.
+ * The cached and destination actions are saved on the fte hws rule.
+ * The rule actions are returned as a parameter, together with their count.
+ * We want to support a rule with 32 destinations, which means we need to
+ * account for 32 destinations plus usually a counter plus one more action
+ * for a multi-destination flow table.
+ * 32 is SW limitation for array size, keep. HWS limitation is 16M STEs per matcher
+ */
+#define MLX5_FLOW_CONTEXT_ACTION_MAX 34
+static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ struct fs_fte *fte,
+ struct mlx5hws_rule_action **ractions)
+{
+ struct mlx5_flow_act *fte_action = &fte->act_dests.action;
+ struct mlx5_fs_hws_context *fs_ctx = &ns->fs_hws_context;
+ struct mlx5hws_action_dest_attr *dest_actions;
+ struct mlx5hws_context *ctx = fs_ctx->hws_ctx;
+ struct mlx5_fs_hws_rule_action *fs_actions;
+ struct mlx5_core_dev *dev = ns->dev;
+ struct mlx5hws_action *dest_action;
+ struct mlx5hws_action *tmp_action;
+ struct mlx5_fs_hws_pr *pr_data;
+ struct mlx5_fs_hws_mh *mh_data;
+ bool delay_encap_set = false;
+ struct mlx5_flow_rule *dst;
+ int num_dest_actions = 0;
+ int num_fs_actions = 0;
+ int num_actions = 0;
+ int err;
+
+ *ractions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(**ractions),
+ GFP_KERNEL);
+ if (!*ractions) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ fs_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX,
+ sizeof(*fs_actions), GFP_KERNEL);
+ if (!fs_actions) {
+ err = -ENOMEM;
+ goto free_actions_alloc;
+ }
+
+ dest_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX,
+ sizeof(*dest_actions), GFP_KERNEL);
+ if (!dest_actions) {
+ err = -ENOMEM;
+ goto free_fs_actions_alloc;
+ }
+
+ /* The order of the actions are must to be kept, only the following
+ * order is supported by HW steering:
+ * HWS: decap -> remove_hdr -> pop_vlan -> modify header -> push_vlan
+ * -> reformat (insert_hdr/encap) -> ctr -> tag -> aso
+ * -> drop -> FWD:tbl/vport/sampler/tbl_num/range -> dest_array -> last
+ */
+ if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
+ tmp_action = mlx5_fs_get_action_decap_tnl_l2_to_l2(fs_ctx);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_dest_actions_alloc;
+ }
+ (*ractions)[num_actions++].action = tmp_action;
+ }
+
+ if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
+ int reformat_type = fte_action->pkt_reformat->reformat_type;
+
+ if (fte_action->pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_FW) {
+ mlx5_core_err(dev, "FW-owned reformat can't be used in HWS rule\n");
+ err = -EINVAL;
+ goto free_actions;
+ }
+
+ if (reformat_type == MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2) {
+ pr_data = fte_action->pkt_reformat->fs_hws_action.pr_data;
+ (*ractions)[num_actions].reformat.offset = pr_data->offset;
+ (*ractions)[num_actions].reformat.hdr_idx = pr_data->hdr_idx;
+ (*ractions)[num_actions].reformat.data = pr_data->data;
+ (*ractions)[num_actions++].action =
+ fte_action->pkt_reformat->fs_hws_action.hws_action;
+ } else if (reformat_type == MLX5_REFORMAT_TYPE_REMOVE_HDR) {
+ (*ractions)[num_actions++].action =
+ fte_action->pkt_reformat->fs_hws_action.hws_action;
+ } else {
+ delay_encap_set = true;
+ }
+ }
+
+ if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) {
+ tmp_action = mlx5_fs_get_action_pop_vlan(fs_ctx);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ (*ractions)[num_actions++].action = tmp_action;
+ }
+
+ if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2) {
+ tmp_action = mlx5_fs_get_action_pop_vlan(fs_ctx);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ (*ractions)[num_actions++].action = tmp_action;
+ }
+
+ if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+ mh_data = fte_action->modify_hdr->fs_hws_action.mh_data;
+ (*ractions)[num_actions].modify_header.offset = mh_data->offset;
+ (*ractions)[num_actions].modify_header.data = mh_data->data;
+ (*ractions)[num_actions++].action =
+ fte_action->modify_hdr->fs_hws_action.hws_action;
+ }
+
+ if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) {
+ tmp_action = mlx5_fs_get_action_push_vlan(fs_ctx);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ (*ractions)[num_actions].push_vlan.vlan_hdr =
+ htonl(mlx5_fs_calc_vlan_hdr(&fte_action->vlan[0]));
+ (*ractions)[num_actions++].action = tmp_action;
+ }
+
+ if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2) {
+ tmp_action = mlx5_fs_get_action_push_vlan(fs_ctx);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ (*ractions)[num_actions].push_vlan.vlan_hdr =
+ htonl(mlx5_fs_calc_vlan_hdr(&fte_action->vlan[1]));
+ (*ractions)[num_actions++].action = tmp_action;
+ }
+
+ if (delay_encap_set) {
+ pr_data = fte_action->pkt_reformat->fs_hws_action.pr_data;
+ (*ractions)[num_actions].reformat.offset = pr_data->offset;
+ (*ractions)[num_actions].reformat.data = pr_data->data;
+ (*ractions)[num_actions++].action =
+ fte_action->pkt_reformat->fs_hws_action.hws_action;
+ }
+
+ if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ struct mlx5_fc *counter;
+
+ if (dst->dest_attr.type !=
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+
+ counter = dst->dest_attr.counter;
+ tmp_action = mlx5_fc_get_hws_action(ctx, counter);
+ if (!tmp_action) {
+ err = -EINVAL;
+ goto free_actions;
+ }
+
+ (*ractions)[num_actions].counter.offset =
+ mlx5_fc_id(counter) - mlx5_fc_get_base_id(counter);
+ (*ractions)[num_actions++].action = tmp_action;
+ fs_actions[num_fs_actions].action = tmp_action;
+ fs_actions[num_fs_actions++].counter = counter;
+ }
+ }
+
+ if (fte->act_dests.flow_context.flow_tag) {
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+ tmp_action = mlx5_fs_get_action_tag(fs_ctx);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ (*ractions)[num_actions].tag.value = fte->act_dests.flow_context.flow_tag;
+ (*ractions)[num_actions++].action = tmp_action;
+ }
+
+ if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+
+ if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
+ dest_action = mlx5_fs_get_dest_action_drop(fs_ctx);
+ if (!dest_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ dest_actions[num_dest_actions++].dest = dest_action;
+ }
+
+ if (fte_action->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ struct mlx5_flow_destination *attr = &dst->dest_attr;
+ bool type_uplink =
+ attr->type == MLX5_FLOW_DESTINATION_TYPE_UPLINK;
+
+ if (num_fs_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ num_dest_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+ if (attr->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ switch (attr->type) {
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+ dest_action = mlx5_fs_get_dest_action_ft(fs_ctx, dst);
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+ dest_action = mlx5_fs_get_dest_action_table_num(fs_ctx,
+ dst);
+ if (dest_action)
+ break;
+ dest_action = mlx5_fs_create_dest_action_table_num(fs_ctx,
+ dst);
+ fs_actions[num_fs_actions++].action = dest_action;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_RANGE:
+ dest_action = mlx5_fs_create_dest_action_range(ctx, dst);
+ fs_actions[num_fs_actions++].action = dest_action;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_UPLINK:
+ case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+ dest_action = mlx5_fs_get_dest_action_vport(fs_ctx, dst,
+ type_uplink);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+ if (!dest_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ dest_actions[num_dest_actions++].dest = dest_action;
+ }
+ }
+
+ if (num_dest_actions == 1) {
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+ (*ractions)[num_actions++].action = dest_actions->dest;
+ } else if (num_dest_actions > 1) {
+ u32 flow_source = fte->act_dests.flow_context.flow_source;
+ bool ignore_flow_level;
+
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ num_fs_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+ ignore_flow_level =
+ !!(fte_action->flags & FLOW_ACT_IGNORE_FLOW_LEVEL);
+ tmp_action = mlx5_fs_create_action_dest_array(ctx, dest_actions,
+ num_dest_actions,
+ ignore_flow_level,
+ flow_source);
+ if (!tmp_action) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+ fs_actions[num_fs_actions++].action = tmp_action;
+ (*ractions)[num_actions++].action = tmp_action;
+ }
+
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ num_fs_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+
+ tmp_action = mlx5_fs_create_action_last(ctx);
+ if (!tmp_action) {
+ err = -ENOMEM;
+ goto free_actions;
+ }
+ fs_actions[num_fs_actions++].action = tmp_action;
+ (*ractions)[num_actions++].action = tmp_action;
+
+ kfree(dest_actions);
+
+ /* Actions created specifically for this rule will be destroyed
+ * once rule is deleted.
+ */
+ fte->fs_hws_rule.num_fs_actions = num_fs_actions;
+ fte->fs_hws_rule.hws_fs_actions = fs_actions;
+
+ return 0;
+
+free_actions:
+ mlx5_fs_destroy_fs_actions(&fs_actions, &num_fs_actions);
+free_dest_actions_alloc:
+ kfree(dest_actions);
+free_fs_actions_alloc:
+ kfree(fs_actions);
+free_actions_alloc:
+ kfree(*ractions);
+ *ractions = NULL;
+out_err:
+ return err;
+}
+
+static int mlx5_cmd_hws_create_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ struct fs_fte *fte)
+{
+ struct mlx5hws_match_parameters params;
+ struct mlx5hws_rule_action *ractions;
+ struct mlx5hws_bwc_rule *rule;
+ int err = 0;
+
+ if (mlx5_fs_cmd_is_fw_term_table(ft)) {
+ /* Packet reformat on terminamtion table not supported yet */
+ if (fte->act_dests.action.action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+ return -EOPNOTSUPP;
+ return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte);
+ }
+
+ err = mlx5_fs_fte_get_hws_actions(ns, ft, group, fte, &ractions);
+ if (err)
+ goto out_err;
+
+ params.match_sz = sizeof(fte->val);
+ params.match_buf = fte->val;
+
+ rule = mlx5hws_bwc_rule_create(group->fs_hws_matcher.matcher, &params,
+ fte->act_dests.flow_context.flow_source,
+ ractions);
+ kfree(ractions);
+ if (!rule) {
+ err = -EINVAL;
+ goto free_actions;
+ }
+
+ fte->fs_hws_rule.bwc_rule = rule;
+ return 0;
+
+free_actions:
+ mlx5_fs_destroy_fs_actions(&fte->fs_hws_rule.hws_fs_actions,
+ &fte->fs_hws_rule.num_fs_actions);
+out_err:
+ mlx5_core_err(ns->dev, "Failed to create hws rule err(%d)\n", err);
+ return err;
+}
+
+static int mlx5_cmd_hws_delete_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct fs_fte *fte)
+{
+ struct mlx5_fs_hws_rule *rule = &fte->fs_hws_rule;
+ int err;
+
+ if (mlx5_fs_cmd_is_fw_term_table(ft))
+ return mlx5_fs_cmd_get_fw_cmds()->delete_fte(ns, ft, fte);
+
+ err = mlx5hws_bwc_rule_destroy(rule->bwc_rule);
+ rule->bwc_rule = NULL;
+
+ mlx5_fs_destroy_fs_actions(&rule->hws_fs_actions, &rule->num_fs_actions);
+
+ return err;
+}
+
+static int mlx5_cmd_hws_update_fte(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *group,
+ int modify_mask,
+ struct fs_fte *fte)
+{
+ int allowed_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) |
+ BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST) |
+ BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+ struct mlx5_fs_hws_rule_action *saved_hws_fs_actions;
+ struct mlx5hws_rule_action *ractions;
+ int saved_num_fs_actions;
+ int ret;
+
+ if (mlx5_fs_cmd_is_fw_term_table(ft))
+ return mlx5_fs_cmd_get_fw_cmds()->update_fte(ns, ft, group,
+ modify_mask, fte);
+
+ if ((modify_mask & ~allowed_mask) != 0)
+ return -EINVAL;
+
+ saved_hws_fs_actions = fte->fs_hws_rule.hws_fs_actions;
+ saved_num_fs_actions = fte->fs_hws_rule.num_fs_actions;
+
+ ret = mlx5_fs_fte_get_hws_actions(ns, ft, group, fte, &ractions);
+ if (ret)
+ return ret;
+
+ ret = mlx5hws_bwc_rule_action_update(fte->fs_hws_rule.bwc_rule, ractions);
+ kfree(ractions);
+ if (ret)
+ goto restore_actions;
+
+ mlx5_fs_destroy_fs_actions(&saved_hws_fs_actions, &saved_num_fs_actions);
+ return ret;
+
+restore_actions:
+ mlx5_fs_destroy_fs_actions(&fte->fs_hws_rule.hws_fs_actions,
+ &fte->fs_hws_rule.num_fs_actions);
+ fte->fs_hws_rule.hws_fs_actions = saved_hws_fs_actions;
+ fte->fs_hws_rule.num_fs_actions = saved_num_fs_actions;
+ return ret;
+}
+
+static struct mlx5hws_action *
+mlx5_fs_create_action_remove_header_vlan(struct mlx5hws_context *ctx)
+{
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED;
+ struct mlx5hws_action_remove_header_attr remove_hdr_vlan = {};
+
+ /* MAC anchor not supported in HWS reformat, use VLAN anchor */
+ remove_hdr_vlan.anchor = MLX5_REFORMAT_CONTEXT_ANCHOR_VLAN_START;
+ remove_hdr_vlan.offset = 0;
+ remove_hdr_vlan.size = sizeof(struct vlan_hdr);
+ return mlx5hws_action_create_remove_header(ctx, &remove_hdr_vlan, flags);
+}
+
+static struct mlx5hws_action *
+mlx5_fs_get_action_remove_header_vlan(struct mlx5_fs_hws_context *fs_ctx,
+ struct mlx5_pkt_reformat_params *params)
+{
+ if (!params ||
+ params->param_0 != MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START ||
+ params->param_1 != offsetof(struct vlan_ethhdr, h_vlan_proto) ||
+ params->size != sizeof(struct vlan_hdr))
+ return NULL;
+
+ return fs_ctx->hws_pool.remove_hdr_vlan_action;
+}
+
+static int
+mlx5_fs_verify_insert_header_params(struct mlx5_core_dev *mdev,
+ struct mlx5_pkt_reformat_params *params)
+{
+ if ((!params->data && params->size) || (params->data && !params->size) ||
+ MLX5_CAP_GEN_2(mdev, max_reformat_insert_size) < params->size ||
+ MLX5_CAP_GEN_2(mdev, max_reformat_insert_offset) < params->param_1) {
+ mlx5_core_err(mdev, "Invalid reformat params for INSERT_HDR\n");
+ return -EINVAL;
+ }
+ if (params->param_0 != MLX5_FS_INSERT_HDR_VLAN_ANCHOR ||
+ params->param_1 != MLX5_FS_INSERT_HDR_VLAN_OFFSET ||
+ params->size != MLX5_FS_INSERT_HDR_VLAN_SIZE) {
+ mlx5_core_err(mdev, "Only vlan insert header supported\n");
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int
+mlx5_fs_verify_encap_decap_params(struct mlx5_core_dev *dev,
+ struct mlx5_pkt_reformat_params *params)
+{
+ if (params->param_0 || params->param_1) {
+ mlx5_core_err(dev, "Invalid reformat params\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static struct mlx5_fs_pool *
+mlx5_fs_get_pr_encap_pool(struct mlx5_core_dev *dev, struct xarray *pr_pools,
+ enum mlx5hws_action_type reformat_type, size_t size)
+{
+ struct mlx5_fs_pool *pr_pool;
+ unsigned long index = size;
+ int err;
+
+ pr_pool = xa_load(pr_pools, index);
+ if (pr_pool)
+ return pr_pool;
+
+ pr_pool = kzalloc(sizeof(*pr_pool), GFP_KERNEL);
+ if (!pr_pool)
+ return ERR_PTR(-ENOMEM);
+ err = mlx5_fs_hws_pr_pool_init(pr_pool, dev, size, reformat_type);
+ if (err)
+ goto free_pr_pool;
+ err = xa_insert(pr_pools, index, pr_pool, GFP_KERNEL);
+ if (err)
+ goto cleanup_pr_pool;
+ return pr_pool;
+
+cleanup_pr_pool:
+ mlx5_fs_hws_pr_pool_cleanup(pr_pool);
+free_pr_pool:
+ kfree(pr_pool);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_fs_destroy_pr_pool(struct mlx5_fs_pool *pool, struct xarray *pr_pools,
+ unsigned long index)
+{
+ xa_erase(pr_pools, index);
+ mlx5_fs_hws_pr_pool_cleanup(pool);
+ kfree(pool);
+}
+
+static int
+mlx5_cmd_hws_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat_params *params,
+ enum mlx5_flow_namespace_type namespace,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ struct mlx5_fs_hws_context *fs_ctx = &ns->fs_hws_context;
+ struct mlx5_fs_hws_actions_pool *hws_pool;
+ struct mlx5hws_action *hws_action = NULL;
+ struct mlx5_fs_hws_pr *pr_data = NULL;
+ struct mlx5_fs_pool *pr_pool = NULL;
+ struct mlx5_core_dev *dev = ns->dev;
+ u8 hdr_idx = 0;
+ int err;
+
+ if (!params)
+ return -EINVAL;
+
+ hws_pool = &fs_ctx->hws_pool;
+
+ switch (params->type) {
+ case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
+ case MLX5_REFORMAT_TYPE_L2_TO_NVGRE:
+ case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ if (mlx5_fs_verify_encap_decap_params(dev, params))
+ return -EINVAL;
+ pr_pool = mlx5_fs_get_pr_encap_pool(dev, &hws_pool->el2tol2tnl_pools,
+ MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2,
+ params->size);
+ if (IS_ERR(pr_pool))
+ return PTR_ERR(pr_pool);
+ break;
+ case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ if (mlx5_fs_verify_encap_decap_params(dev, params))
+ return -EINVAL;
+ pr_pool = mlx5_fs_get_pr_encap_pool(dev, &hws_pool->el2tol3tnl_pools,
+ MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3,
+ params->size);
+ if (IS_ERR(pr_pool))
+ return PTR_ERR(pr_pool);
+ break;
+ case MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ if (mlx5_fs_verify_encap_decap_params(dev, params))
+ return -EINVAL;
+ pr_pool = &hws_pool->dl3tnltol2_pool;
+ hdr_idx = params->size == ETH_HLEN ?
+ MLX5_FS_DL3TNLTOL2_MAC_HDR_IDX :
+ MLX5_FS_DL3TNLTOL2_MAC_VLAN_HDR_IDX;
+ break;
+ case MLX5_REFORMAT_TYPE_INSERT_HDR:
+ err = mlx5_fs_verify_insert_header_params(dev, params);
+ if (err)
+ return err;
+ pr_pool = &hws_pool->insert_hdr_pool;
+ break;
+ case MLX5_REFORMAT_TYPE_REMOVE_HDR:
+ hws_action = mlx5_fs_get_action_remove_header_vlan(fs_ctx, params);
+ if (!hws_action)
+ mlx5_core_err(dev, "Only vlan remove header supported\n");
+ break;
+ default:
+ mlx5_core_err(ns->dev, "Packet-reformat not supported(%d)\n",
+ params->type);
+ return -EOPNOTSUPP;
+ }
+
+ if (pr_pool) {
+ pr_data = mlx5_fs_hws_pr_pool_acquire_pr(pr_pool);
+ if (IS_ERR_OR_NULL(pr_data))
+ return !pr_data ? -EINVAL : PTR_ERR(pr_data);
+ hws_action = pr_data->bulk->hws_action;
+ if (!hws_action) {
+ mlx5_core_err(dev,
+ "Failed allocating packet-reformat action\n");
+ err = -EINVAL;
+ goto release_pr;
+ }
+ pr_data->data = kmemdup(params->data, params->size, GFP_KERNEL);
+ if (!pr_data->data) {
+ err = -ENOMEM;
+ goto release_pr;
+ }
+ pr_data->hdr_idx = hdr_idx;
+ pr_data->data_size = params->size;
+ pkt_reformat->fs_hws_action.pr_data = pr_data;
+ }
+
+ pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_SW;
+ pkt_reformat->fs_hws_action.hws_action = hws_action;
+ return 0;
+
+release_pr:
+ if (pr_pool && pr_data)
+ mlx5_fs_hws_pr_pool_release_pr(pr_pool, pr_data);
+ return err;
+}
+
+static void mlx5_cmd_hws_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_pkt_reformat *pkt_reformat)
+{
+ struct mlx5_fs_hws_actions_pool *hws_pool = &ns->fs_hws_context.hws_pool;
+ struct mlx5_core_dev *dev = ns->dev;
+ struct mlx5_fs_hws_pr *pr_data;
+ struct mlx5_fs_pool *pr_pool;
+
+ if (pkt_reformat->reformat_type == MLX5_REFORMAT_TYPE_REMOVE_HDR)
+ return;
+
+ if (!pkt_reformat->fs_hws_action.pr_data) {
+ mlx5_core_err(ns->dev, "Failed release packet-reformat\n");
+ return;
+ }
+ pr_data = pkt_reformat->fs_hws_action.pr_data;
+
+ switch (pkt_reformat->reformat_type) {
+ case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
+ case MLX5_REFORMAT_TYPE_L2_TO_NVGRE:
+ case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL:
+ pr_pool = mlx5_fs_get_pr_encap_pool(dev, &hws_pool->el2tol2tnl_pools,
+ MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2,
+ pr_data->data_size);
+ break;
+ case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL:
+ pr_pool = mlx5_fs_get_pr_encap_pool(dev, &hws_pool->el2tol2tnl_pools,
+ MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2,
+ pr_data->data_size);
+ break;
+ case MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2:
+ pr_pool = &hws_pool->dl3tnltol2_pool;
+ break;
+ case MLX5_REFORMAT_TYPE_INSERT_HDR:
+ pr_pool = &hws_pool->insert_hdr_pool;
+ break;
+ default:
+ mlx5_core_err(ns->dev, "Unknown packet-reformat type\n");
+ return;
+ }
+ if (!pkt_reformat->fs_hws_action.pr_data || IS_ERR(pr_pool)) {
+ mlx5_core_err(ns->dev, "Failed release packet-reformat\n");
+ return;
+ }
+ kfree(pr_data->data);
+ mlx5_fs_hws_pr_pool_release_pr(pr_pool, pr_data);
+ pkt_reformat->fs_hws_action.pr_data = NULL;
+}
+
+static struct mlx5_fs_pool *
+mlx5_fs_create_mh_pool(struct mlx5_core_dev *dev,
+ struct mlx5hws_action_mh_pattern *pattern,
+ struct xarray *mh_pools, unsigned long index)
+{
+ struct mlx5_fs_pool *pool;
+ int err;
+
+ pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+ if (!pool)
+ return ERR_PTR(-ENOMEM);
+ err = mlx5_fs_hws_mh_pool_init(pool, dev, pattern);
+ if (err)
+ goto free_pool;
+ err = xa_insert(mh_pools, index, pool, GFP_KERNEL);
+ if (err)
+ goto cleanup_pool;
+ return pool;
+
+cleanup_pool:
+ mlx5_fs_hws_mh_pool_cleanup(pool);
+free_pool:
+ kfree(pool);
+ return ERR_PTR(err);
+}
+
+static void
+mlx5_fs_destroy_mh_pool(struct mlx5_fs_pool *pool, struct xarray *mh_pools,
+ unsigned long index)
+{
+ xa_erase(mh_pools, index);
+ mlx5_fs_hws_mh_pool_cleanup(pool);
+ kfree(pool);
+}
+
+static int mlx5_cmd_hws_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
+ u8 namespace, u8 num_actions,
+ void *modify_actions,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+ struct mlx5_fs_hws_actions_pool *hws_pool = &ns->fs_hws_context.hws_pool;
+ struct mlx5hws_action_mh_pattern pattern = {};
+ struct mlx5_fs_hws_mh *mh_data = NULL;
+ struct mlx5hws_action *hws_action;
+ struct mlx5_fs_pool *pool;
+ unsigned long i, cnt = 0;
+ bool known_pattern;
+ int err;
+
+ pattern.sz = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) * num_actions;
+ pattern.data = modify_actions;
+
+ known_pattern = false;
+ xa_for_each(&hws_pool->mh_pools, i, pool) {
+ if (mlx5_fs_hws_mh_pool_match(pool, &pattern)) {
+ known_pattern = true;
+ break;
+ }
+ cnt++;
+ }
+
+ if (!known_pattern) {
+ pool = mlx5_fs_create_mh_pool(ns->dev, &pattern,
+ &hws_pool->mh_pools, cnt);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
+ }
+ mh_data = mlx5_fs_hws_mh_pool_acquire_mh(pool);
+ if (IS_ERR(mh_data)) {
+ err = PTR_ERR(mh_data);
+ goto destroy_pool;
+ }
+ hws_action = mh_data->bulk->hws_action;
+ mh_data->data = kmemdup(pattern.data, pattern.sz, GFP_KERNEL);
+ if (!mh_data->data) {
+ err = -ENOMEM;
+ goto release_mh;
+ }
+ modify_hdr->fs_hws_action.mh_data = mh_data;
+ modify_hdr->fs_hws_action.fs_pool = pool;
+ modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_SW;
+ modify_hdr->fs_hws_action.hws_action = hws_action;
+
+ return 0;
+
+release_mh:
+ mlx5_fs_hws_mh_pool_release_mh(pool, mh_data);
+destroy_pool:
+ if (!known_pattern)
+ mlx5_fs_destroy_mh_pool(pool, &hws_pool->mh_pools, cnt);
+ return err;
+}
+
+static void mlx5_cmd_hws_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
+ struct mlx5_modify_hdr *modify_hdr)
+{
+ struct mlx5_fs_hws_mh *mh_data;
+ struct mlx5_fs_pool *pool;
+
+ if (!modify_hdr->fs_hws_action.fs_pool || !modify_hdr->fs_hws_action.mh_data) {
+ mlx5_core_err(ns->dev, "Failed release modify-header\n");
+ return;
+ }
+
+ mh_data = modify_hdr->fs_hws_action.mh_data;
+ kfree(mh_data->data);
+ pool = modify_hdr->fs_hws_action.fs_pool;
+ mlx5_fs_hws_mh_pool_release_mh(pool, mh_data);
+ modify_hdr->fs_hws_action.mh_data = NULL;
+}
+
+static int mlx5_cmd_hws_create_match_definer(struct mlx5_flow_root_namespace *ns,
+ u16 format_id, u32 *match_mask)
+{
+ return -EOPNOTSUPP;
+}
+
+static int mlx5_cmd_hws_destroy_match_definer(struct mlx5_flow_root_namespace *ns,
+ int definer_id)
+{
+ return -EOPNOTSUPP;
+}
+
+static u32 mlx5_cmd_hws_get_capabilities(struct mlx5_flow_root_namespace *ns,
+ enum fs_flow_table_type ft_type)
+{
+ if (ft_type != FS_FT_FDB)
+ return 0;
+
+ return MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX |
+ MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX |
+ MLX5_FLOW_STEERING_CAP_MATCH_RANGES;
+}
+
+bool mlx5_fs_hws_is_supported(struct mlx5_core_dev *dev)
+{
+ return mlx5hws_is_supported(dev);
+}
+
+static const struct mlx5_flow_cmds mlx5_flow_cmds_hws = {
+ .create_flow_table = mlx5_cmd_hws_create_flow_table,
+ .destroy_flow_table = mlx5_cmd_hws_destroy_flow_table,
+ .modify_flow_table = mlx5_cmd_hws_modify_flow_table,
+ .update_root_ft = mlx5_cmd_hws_update_root_ft,
+ .create_flow_group = mlx5_cmd_hws_create_flow_group,
+ .destroy_flow_group = mlx5_cmd_hws_destroy_flow_group,
+ .create_fte = mlx5_cmd_hws_create_fte,
+ .delete_fte = mlx5_cmd_hws_delete_fte,
+ .update_fte = mlx5_cmd_hws_update_fte,
+ .packet_reformat_alloc = mlx5_cmd_hws_packet_reformat_alloc,
+ .packet_reformat_dealloc = mlx5_cmd_hws_packet_reformat_dealloc,
+ .modify_header_alloc = mlx5_cmd_hws_modify_header_alloc,
+ .modify_header_dealloc = mlx5_cmd_hws_modify_header_dealloc,
+ .create_match_definer = mlx5_cmd_hws_create_match_definer,
+ .destroy_match_definer = mlx5_cmd_hws_destroy_match_definer,
+ .create_ns = mlx5_cmd_hws_create_ns,
+ .destroy_ns = mlx5_cmd_hws_destroy_ns,
+ .set_peer = mlx5_cmd_hws_set_peer,
+ .get_capabilities = mlx5_cmd_hws_get_capabilities,
+};
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_hws_cmds(void)
+{
+ return &mlx5_flow_cmds_hws;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h
new file mode 100644
index 000000000000..cbddb72d4362
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */
+
+#ifndef _MLX5_FS_HWS_
+#define _MLX5_FS_HWS_
+
+#include "mlx5hws.h"
+#include "fs_hws_pools.h"
+
+struct mlx5_fs_hws_actions_pool {
+ struct mlx5hws_action *tag_action;
+ struct mlx5hws_action *pop_vlan_action;
+ struct mlx5hws_action *push_vlan_action;
+ struct mlx5hws_action *drop_action;
+ struct mlx5hws_action *decapl2_action;
+ struct mlx5hws_action *remove_hdr_vlan_action;
+ struct mlx5_fs_pool insert_hdr_pool;
+ struct mlx5_fs_pool dl3tnltol2_pool;
+ struct xarray el2tol3tnl_pools;
+ struct xarray el2tol2tnl_pools;
+ struct xarray mh_pools;
+ struct xarray table_dests;
+ struct xarray vport_vhca_dests;
+ struct xarray vport_dests;
+};
+
+struct mlx5_fs_hws_context {
+ struct mlx5hws_context *hws_ctx;
+ struct mlx5_fs_hws_actions_pool hws_pool;
+};
+
+struct mlx5_fs_hws_table {
+ struct mlx5hws_table *hws_table;
+ bool miss_ft_set;
+};
+
+struct mlx5_fs_hws_action {
+ struct mlx5hws_action *hws_action;
+ struct mlx5_fs_pool *fs_pool;
+ struct mlx5_fs_hws_pr *pr_data;
+ struct mlx5_fs_hws_mh *mh_data;
+};
+
+struct mlx5_fs_hws_matcher {
+ struct mlx5hws_bwc_matcher *matcher;
+};
+
+struct mlx5_fs_hws_rule_action {
+ struct mlx5hws_action *action;
+ union {
+ struct mlx5_fc *counter;
+ };
+};
+
+struct mlx5_fs_hws_rule {
+ struct mlx5hws_bwc_rule *bwc_rule;
+ struct mlx5_fs_hws_rule_action *hws_fs_actions;
+ int num_fs_actions;
+};
+
+#ifdef CONFIG_MLX5_HW_STEERING
+
+bool mlx5_fs_hws_is_supported(struct mlx5_core_dev *dev);
+
+const struct mlx5_flow_cmds *mlx5_fs_cmd_get_hws_cmds(void);
+
+#else
+
+static inline bool mlx5_fs_hws_is_supported(struct mlx5_core_dev *dev)
+{
+ return false;
+}
+
+static inline const struct mlx5_flow_cmds *mlx5_fs_cmd_get_hws_cmds(void)
+{
+ return NULL;
+}
+
+#endif /* CONFIG_MLX5_HWS_STEERING */
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c
new file mode 100644
index 000000000000..2ae4ac62b0e2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */
+
+#include <mlx5_core.h>
+#include "fs_hws_pools.h"
+
+#define MLX5_FS_HWS_DEFAULT_BULK_LEN 65536
+#define MLX5_FS_HWS_POOL_MAX_THRESHOLD BIT(18)
+#define MLX5_FS_HWS_POOL_USED_BUFF_RATIO 10
+
+static struct mlx5hws_action *
+mlx5_fs_dl3tnltol2_bulk_action_create(struct mlx5hws_context *ctx)
+{
+ struct mlx5hws_action_reformat_header reformat_hdr[2] = {};
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB;
+ enum mlx5hws_action_type reformat_type;
+ u32 log_bulk_size;
+
+ reformat_type = MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2;
+ reformat_hdr[MLX5_FS_DL3TNLTOL2_MAC_HDR_IDX].sz = ETH_HLEN;
+ reformat_hdr[MLX5_FS_DL3TNLTOL2_MAC_VLAN_HDR_IDX].sz = ETH_HLEN + VLAN_HLEN;
+
+ log_bulk_size = ilog2(MLX5_FS_HWS_DEFAULT_BULK_LEN);
+ return mlx5hws_action_create_reformat(ctx, reformat_type, 2,
+ reformat_hdr, log_bulk_size, flags);
+}
+
+static struct mlx5hws_action *
+mlx5_fs_el2tol3tnl_bulk_action_create(struct mlx5hws_context *ctx, size_t data_size)
+{
+ struct mlx5hws_action_reformat_header reformat_hdr = {};
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB;
+ enum mlx5hws_action_type reformat_type;
+ u32 log_bulk_size;
+
+ reformat_type = MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3;
+ reformat_hdr.sz = data_size;
+
+ log_bulk_size = ilog2(MLX5_FS_HWS_DEFAULT_BULK_LEN);
+ return mlx5hws_action_create_reformat(ctx, reformat_type, 1,
+ &reformat_hdr, log_bulk_size, flags);
+}
+
+static struct mlx5hws_action *
+mlx5_fs_el2tol2tnl_bulk_action_create(struct mlx5hws_context *ctx, size_t data_size)
+{
+ struct mlx5hws_action_reformat_header reformat_hdr = {};
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB;
+ enum mlx5hws_action_type reformat_type;
+ u32 log_bulk_size;
+
+ reformat_type = MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2;
+ reformat_hdr.sz = data_size;
+
+ log_bulk_size = ilog2(MLX5_FS_HWS_DEFAULT_BULK_LEN);
+ return mlx5hws_action_create_reformat(ctx, reformat_type, 1,
+ &reformat_hdr, log_bulk_size, flags);
+}
+
+static struct mlx5hws_action *
+mlx5_fs_insert_hdr_bulk_action_create(struct mlx5hws_context *ctx)
+{
+ struct mlx5hws_action_insert_header insert_hdr = {};
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB;
+ u32 log_bulk_size;
+
+ log_bulk_size = ilog2(MLX5_FS_HWS_DEFAULT_BULK_LEN);
+ insert_hdr.hdr.sz = MLX5_FS_INSERT_HDR_VLAN_SIZE;
+ insert_hdr.anchor = MLX5_FS_INSERT_HDR_VLAN_ANCHOR;
+ insert_hdr.offset = MLX5_FS_INSERT_HDR_VLAN_OFFSET;
+
+ return mlx5hws_action_create_insert_header(ctx, 1, &insert_hdr,
+ log_bulk_size, flags);
+}
+
+static struct mlx5hws_action *
+mlx5_fs_pr_bulk_action_create(struct mlx5_core_dev *dev,
+ struct mlx5_fs_hws_pr_pool_ctx *pr_pool_ctx)
+{
+ struct mlx5_flow_root_namespace *root_ns;
+ struct mlx5hws_context *ctx;
+ size_t encap_data_size;
+
+ root_ns = mlx5_get_root_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns || root_ns->mode != MLX5_FLOW_STEERING_MODE_HMFS)
+ return NULL;
+
+ ctx = root_ns->fs_hws_context.hws_ctx;
+ if (!ctx)
+ return NULL;
+
+ encap_data_size = pr_pool_ctx->encap_data_size;
+ switch (pr_pool_ctx->reformat_type) {
+ case MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2:
+ return mlx5_fs_dl3tnltol2_bulk_action_create(ctx);
+ case MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3:
+ return mlx5_fs_el2tol3tnl_bulk_action_create(ctx, encap_data_size);
+ case MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2:
+ return mlx5_fs_el2tol2tnl_bulk_action_create(ctx, encap_data_size);
+ case MLX5HWS_ACTION_TYP_INSERT_HEADER:
+ return mlx5_fs_insert_hdr_bulk_action_create(ctx);
+ default:
+ return NULL;
+ }
+ return NULL;
+}
+
+static struct mlx5_fs_bulk *
+mlx5_fs_hws_pr_bulk_create(struct mlx5_core_dev *dev, void *pool_ctx)
+{
+ struct mlx5_fs_hws_pr_pool_ctx *pr_pool_ctx;
+ struct mlx5_fs_hws_pr_bulk *pr_bulk;
+ int bulk_len;
+ int i;
+
+ if (!pool_ctx)
+ return NULL;
+ pr_pool_ctx = pool_ctx;
+ bulk_len = MLX5_FS_HWS_DEFAULT_BULK_LEN;
+ pr_bulk = kvzalloc(struct_size(pr_bulk, prs_data, bulk_len), GFP_KERNEL);
+ if (!pr_bulk)
+ return NULL;
+
+ if (mlx5_fs_bulk_init(dev, &pr_bulk->fs_bulk, bulk_len))
+ goto free_pr_bulk;
+
+ for (i = 0; i < bulk_len; i++) {
+ pr_bulk->prs_data[i].bulk = pr_bulk;
+ pr_bulk->prs_data[i].offset = i;
+ }
+
+ pr_bulk->hws_action = mlx5_fs_pr_bulk_action_create(dev, pr_pool_ctx);
+ if (!pr_bulk->hws_action)
+ goto cleanup_fs_bulk;
+
+ return &pr_bulk->fs_bulk;
+
+cleanup_fs_bulk:
+ mlx5_fs_bulk_cleanup(&pr_bulk->fs_bulk);
+free_pr_bulk:
+ kvfree(pr_bulk);
+ return NULL;
+}
+
+static int
+mlx5_fs_hws_pr_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fs_bulk *fs_bulk)
+{
+ struct mlx5_fs_hws_pr_bulk *pr_bulk;
+
+ pr_bulk = container_of(fs_bulk, struct mlx5_fs_hws_pr_bulk, fs_bulk);
+ if (mlx5_fs_bulk_get_free_amount(fs_bulk) < fs_bulk->bulk_len) {
+ mlx5_core_err(dev, "Freeing bulk before all reformats were released\n");
+ return -EBUSY;
+ }
+
+ mlx5hws_action_destroy(pr_bulk->hws_action);
+ mlx5_fs_bulk_cleanup(fs_bulk);
+ kvfree(pr_bulk);
+
+ return 0;
+}
+
+static void mlx5_hws_pool_update_threshold(struct mlx5_fs_pool *hws_pool)
+{
+ hws_pool->threshold = min_t(int, MLX5_FS_HWS_POOL_MAX_THRESHOLD,
+ hws_pool->used_units / MLX5_FS_HWS_POOL_USED_BUFF_RATIO);
+}
+
+static const struct mlx5_fs_pool_ops mlx5_fs_hws_pr_pool_ops = {
+ .bulk_create = mlx5_fs_hws_pr_bulk_create,
+ .bulk_destroy = mlx5_fs_hws_pr_bulk_destroy,
+ .update_threshold = mlx5_hws_pool_update_threshold,
+};
+
+int mlx5_fs_hws_pr_pool_init(struct mlx5_fs_pool *pr_pool,
+ struct mlx5_core_dev *dev, size_t encap_data_size,
+ enum mlx5hws_action_type reformat_type)
+{
+ struct mlx5_fs_hws_pr_pool_ctx *pr_pool_ctx;
+
+ if (reformat_type != MLX5HWS_ACTION_TYP_INSERT_HEADER &&
+ reformat_type != MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2 &&
+ reformat_type != MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3 &&
+ reformat_type != MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2)
+ return -EOPNOTSUPP;
+
+ pr_pool_ctx = kzalloc(sizeof(*pr_pool_ctx), GFP_KERNEL);
+ if (!pr_pool_ctx)
+ return -ENOMEM;
+ pr_pool_ctx->reformat_type = reformat_type;
+ pr_pool_ctx->encap_data_size = encap_data_size;
+ mlx5_fs_pool_init(pr_pool, dev, &mlx5_fs_hws_pr_pool_ops, pr_pool_ctx);
+ return 0;
+}
+
+void mlx5_fs_hws_pr_pool_cleanup(struct mlx5_fs_pool *pr_pool)
+{
+ struct mlx5_fs_hws_pr_pool_ctx *pr_pool_ctx;
+
+ mlx5_fs_pool_cleanup(pr_pool);
+ pr_pool_ctx = pr_pool->pool_ctx;
+ if (!pr_pool_ctx)
+ return;
+ kfree(pr_pool_ctx);
+}
+
+struct mlx5_fs_hws_pr *
+mlx5_fs_hws_pr_pool_acquire_pr(struct mlx5_fs_pool *pr_pool)
+{
+ struct mlx5_fs_pool_index pool_index = {};
+ struct mlx5_fs_hws_pr_bulk *pr_bulk;
+ int err;
+
+ err = mlx5_fs_pool_acquire_index(pr_pool, &pool_index);
+ if (err)
+ return ERR_PTR(err);
+ pr_bulk = container_of(pool_index.fs_bulk, struct mlx5_fs_hws_pr_bulk,
+ fs_bulk);
+ return &pr_bulk->prs_data[pool_index.index];
+}
+
+void mlx5_fs_hws_pr_pool_release_pr(struct mlx5_fs_pool *pr_pool,
+ struct mlx5_fs_hws_pr *pr_data)
+{
+ struct mlx5_fs_bulk *fs_bulk = &pr_data->bulk->fs_bulk;
+ struct mlx5_fs_pool_index pool_index = {};
+ struct mlx5_core_dev *dev = pr_pool->dev;
+
+ pool_index.fs_bulk = fs_bulk;
+ pool_index.index = pr_data->offset;
+ if (mlx5_fs_pool_release_index(pr_pool, &pool_index))
+ mlx5_core_warn(dev, "Attempted to release packet reformat which is not acquired\n");
+}
+
+struct mlx5hws_action *mlx5_fs_hws_pr_get_action(struct mlx5_fs_hws_pr *pr_data)
+{
+ return pr_data->bulk->hws_action;
+}
+
+static struct mlx5hws_action *
+mlx5_fs_mh_bulk_action_create(struct mlx5hws_context *ctx,
+ struct mlx5hws_action_mh_pattern *pattern)
+{
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB;
+ u32 log_bulk_size;
+
+ log_bulk_size = ilog2(MLX5_FS_HWS_DEFAULT_BULK_LEN);
+ return mlx5hws_action_create_modify_header(ctx, 1, pattern,
+ log_bulk_size, flags);
+}
+
+static struct mlx5_fs_bulk *
+mlx5_fs_hws_mh_bulk_create(struct mlx5_core_dev *dev, void *pool_ctx)
+{
+ struct mlx5hws_action_mh_pattern *pattern;
+ struct mlx5_flow_root_namespace *root_ns;
+ struct mlx5_fs_hws_mh_bulk *mh_bulk;
+ struct mlx5hws_context *ctx;
+ int bulk_len;
+
+ if (!pool_ctx)
+ return NULL;
+
+ root_ns = mlx5_get_root_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ if (!root_ns || root_ns->mode != MLX5_FLOW_STEERING_MODE_HMFS)
+ return NULL;
+
+ ctx = root_ns->fs_hws_context.hws_ctx;
+ if (!ctx)
+ return NULL;
+
+ pattern = pool_ctx;
+ bulk_len = MLX5_FS_HWS_DEFAULT_BULK_LEN;
+ mh_bulk = kvzalloc(struct_size(mh_bulk, mhs_data, bulk_len), GFP_KERNEL);
+ if (!mh_bulk)
+ return NULL;
+
+ if (mlx5_fs_bulk_init(dev, &mh_bulk->fs_bulk, bulk_len))
+ goto free_mh_bulk;
+
+ for (int i = 0; i < bulk_len; i++) {
+ mh_bulk->mhs_data[i].bulk = mh_bulk;
+ mh_bulk->mhs_data[i].offset = i;
+ }
+
+ mh_bulk->hws_action = mlx5_fs_mh_bulk_action_create(ctx, pattern);
+ if (!mh_bulk->hws_action)
+ goto cleanup_fs_bulk;
+
+ return &mh_bulk->fs_bulk;
+
+cleanup_fs_bulk:
+ mlx5_fs_bulk_cleanup(&mh_bulk->fs_bulk);
+free_mh_bulk:
+ kvfree(mh_bulk);
+ return NULL;
+}
+
+static int
+mlx5_fs_hws_mh_bulk_destroy(struct mlx5_core_dev *dev,
+ struct mlx5_fs_bulk *fs_bulk)
+{
+ struct mlx5_fs_hws_mh_bulk *mh_bulk;
+
+ mh_bulk = container_of(fs_bulk, struct mlx5_fs_hws_mh_bulk, fs_bulk);
+ if (mlx5_fs_bulk_get_free_amount(fs_bulk) < fs_bulk->bulk_len) {
+ mlx5_core_err(dev, "Freeing bulk before all modify header were released\n");
+ return -EBUSY;
+ }
+
+ mlx5hws_action_destroy(mh_bulk->hws_action);
+ mlx5_fs_bulk_cleanup(fs_bulk);
+ kvfree(mh_bulk);
+
+ return 0;
+}
+
+static const struct mlx5_fs_pool_ops mlx5_fs_hws_mh_pool_ops = {
+ .bulk_create = mlx5_fs_hws_mh_bulk_create,
+ .bulk_destroy = mlx5_fs_hws_mh_bulk_destroy,
+ .update_threshold = mlx5_hws_pool_update_threshold,
+};
+
+int mlx5_fs_hws_mh_pool_init(struct mlx5_fs_pool *fs_hws_mh_pool,
+ struct mlx5_core_dev *dev,
+ struct mlx5hws_action_mh_pattern *pattern)
+{
+ struct mlx5hws_action_mh_pattern *pool_pattern;
+
+ pool_pattern = kzalloc(sizeof(*pool_pattern), GFP_KERNEL);
+ if (!pool_pattern)
+ return -ENOMEM;
+ pool_pattern->data = kmemdup(pattern->data, pattern->sz, GFP_KERNEL);
+ if (!pool_pattern->data) {
+ kfree(pool_pattern);
+ return -ENOMEM;
+ }
+ pool_pattern->sz = pattern->sz;
+ mlx5_fs_pool_init(fs_hws_mh_pool, dev, &mlx5_fs_hws_mh_pool_ops,
+ pool_pattern);
+ return 0;
+}
+
+void mlx5_fs_hws_mh_pool_cleanup(struct mlx5_fs_pool *fs_hws_mh_pool)
+{
+ struct mlx5hws_action_mh_pattern *pool_pattern;
+
+ mlx5_fs_pool_cleanup(fs_hws_mh_pool);
+ pool_pattern = fs_hws_mh_pool->pool_ctx;
+ if (!pool_pattern)
+ return;
+ kfree(pool_pattern->data);
+ kfree(pool_pattern);
+}
+
+struct mlx5_fs_hws_mh *
+mlx5_fs_hws_mh_pool_acquire_mh(struct mlx5_fs_pool *mh_pool)
+{
+ struct mlx5_fs_pool_index pool_index = {};
+ struct mlx5_fs_hws_mh_bulk *mh_bulk;
+ int err;
+
+ err = mlx5_fs_pool_acquire_index(mh_pool, &pool_index);
+ if (err)
+ return ERR_PTR(err);
+ mh_bulk = container_of(pool_index.fs_bulk, struct mlx5_fs_hws_mh_bulk,
+ fs_bulk);
+ return &mh_bulk->mhs_data[pool_index.index];
+}
+
+void mlx5_fs_hws_mh_pool_release_mh(struct mlx5_fs_pool *mh_pool,
+ struct mlx5_fs_hws_mh *mh_data)
+{
+ struct mlx5_fs_bulk *fs_bulk = &mh_data->bulk->fs_bulk;
+ struct mlx5_fs_pool_index pool_index = {};
+ struct mlx5_core_dev *dev = mh_pool->dev;
+
+ pool_index.fs_bulk = fs_bulk;
+ pool_index.index = mh_data->offset;
+ if (mlx5_fs_pool_release_index(mh_pool, &pool_index))
+ mlx5_core_warn(dev, "Attempted to release modify header which is not acquired\n");
+}
+
+bool mlx5_fs_hws_mh_pool_match(struct mlx5_fs_pool *mh_pool,
+ struct mlx5hws_action_mh_pattern *pattern)
+{
+ struct mlx5hws_action_mh_pattern *pool_pattern;
+ int num_actions, i;
+
+ pool_pattern = mh_pool->pool_ctx;
+ if (WARN_ON_ONCE(!pool_pattern))
+ return false;
+
+ if (pattern->sz != pool_pattern->sz)
+ return false;
+ num_actions = pattern->sz / MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
+ for (i = 0; i < num_actions; i++) {
+ if ((__force __be32)pattern->data[i] !=
+ (__force __be32)pool_pattern->data[i])
+ return false;
+ }
+ return true;
+}
+
+struct mlx5hws_action *mlx5_fc_get_hws_action(struct mlx5hws_context *ctx,
+ struct mlx5_fc *counter)
+{
+ u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED;
+ struct mlx5_fc_bulk *fc_bulk = counter->bulk;
+ struct mlx5_fc_bulk_hws_data *fc_bulk_hws;
+
+ fc_bulk_hws = &fc_bulk->hws_data;
+ /* try avoid locking if not necessary */
+ if (refcount_inc_not_zero(&fc_bulk_hws->hws_action_refcount))
+ return fc_bulk_hws->hws_action;
+
+ mutex_lock(&fc_bulk_hws->lock);
+ if (refcount_inc_not_zero(&fc_bulk_hws->hws_action_refcount)) {
+ mutex_unlock(&fc_bulk_hws->lock);
+ return fc_bulk_hws->hws_action;
+ }
+ fc_bulk_hws->hws_action =
+ mlx5hws_action_create_counter(ctx, fc_bulk->base_id, flags);
+ if (!fc_bulk_hws->hws_action) {
+ mutex_unlock(&fc_bulk_hws->lock);
+ return NULL;
+ }
+ refcount_set(&fc_bulk_hws->hws_action_refcount, 1);
+ mutex_unlock(&fc_bulk_hws->lock);
+
+ return fc_bulk_hws->hws_action;
+}
+
+void mlx5_fc_put_hws_action(struct mlx5_fc *counter)
+{
+ struct mlx5_fc_bulk_hws_data *fc_bulk_hws = &counter->bulk->hws_data;
+
+ /* try avoid locking if not necessary */
+ if (refcount_dec_not_one(&fc_bulk_hws->hws_action_refcount))
+ return;
+
+ mutex_lock(&fc_bulk_hws->lock);
+ if (!refcount_dec_and_test(&fc_bulk_hws->hws_action_refcount)) {
+ mutex_unlock(&fc_bulk_hws->lock);
+ return;
+ }
+ mlx5hws_action_destroy(fc_bulk_hws->hws_action);
+ fc_bulk_hws->hws_action = NULL;
+ mutex_unlock(&fc_bulk_hws->lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.h
new file mode 100644
index 000000000000..34072551dd21
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2025 NVIDIA Corporation & Affiliates */
+
+#ifndef __MLX5_FS_HWS_POOLS_H__
+#define __MLX5_FS_HWS_POOLS_H__
+
+#include <linux/if_vlan.h>
+#include "fs_pool.h"
+#include "fs_core.h"
+
+#define MLX5_FS_INSERT_HDR_VLAN_ANCHOR MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START
+#define MLX5_FS_INSERT_HDR_VLAN_OFFSET offsetof(struct vlan_ethhdr, h_vlan_proto)
+#define MLX5_FS_INSERT_HDR_VLAN_SIZE sizeof(struct vlan_hdr)
+
+enum {
+ MLX5_FS_DL3TNLTOL2_MAC_HDR_IDX = 0,
+ MLX5_FS_DL3TNLTOL2_MAC_VLAN_HDR_IDX,
+};
+
+struct mlx5_fs_hws_pr {
+ struct mlx5_fs_hws_pr_bulk *bulk;
+ u32 offset;
+ u8 hdr_idx;
+ u8 *data;
+ size_t data_size;
+};
+
+struct mlx5_fs_hws_pr_bulk {
+ struct mlx5_fs_bulk fs_bulk;
+ struct mlx5hws_action *hws_action;
+ struct mlx5_fs_hws_pr prs_data[];
+};
+
+struct mlx5_fs_hws_pr_pool_ctx {
+ enum mlx5hws_action_type reformat_type;
+ size_t encap_data_size;
+};
+
+struct mlx5_fs_hws_mh {
+ struct mlx5_fs_hws_mh_bulk *bulk;
+ u32 offset;
+ u8 *data;
+};
+
+struct mlx5_fs_hws_mh_bulk {
+ struct mlx5_fs_bulk fs_bulk;
+ struct mlx5_fs_pool *mh_pool;
+ struct mlx5hws_action *hws_action;
+ struct mlx5_fs_hws_mh mhs_data[];
+};
+
+int mlx5_fs_hws_pr_pool_init(struct mlx5_fs_pool *pr_pool,
+ struct mlx5_core_dev *dev, size_t encap_data_size,
+ enum mlx5hws_action_type reformat_type);
+void mlx5_fs_hws_pr_pool_cleanup(struct mlx5_fs_pool *pr_pool);
+
+struct mlx5_fs_hws_pr *mlx5_fs_hws_pr_pool_acquire_pr(struct mlx5_fs_pool *pr_pool);
+void mlx5_fs_hws_pr_pool_release_pr(struct mlx5_fs_pool *pr_pool,
+ struct mlx5_fs_hws_pr *pr_data);
+struct mlx5hws_action *mlx5_fs_hws_pr_get_action(struct mlx5_fs_hws_pr *pr_data);
+int mlx5_fs_hws_mh_pool_init(struct mlx5_fs_pool *fs_hws_mh_pool,
+ struct mlx5_core_dev *dev,
+ struct mlx5hws_action_mh_pattern *pattern);
+void mlx5_fs_hws_mh_pool_cleanup(struct mlx5_fs_pool *fs_hws_mh_pool);
+struct mlx5_fs_hws_mh *mlx5_fs_hws_mh_pool_acquire_mh(struct mlx5_fs_pool *mh_pool);
+void mlx5_fs_hws_mh_pool_release_mh(struct mlx5_fs_pool *mh_pool,
+ struct mlx5_fs_hws_mh *mh_data);
+bool mlx5_fs_hws_mh_pool_match(struct mlx5_fs_pool *mh_pool,
+ struct mlx5hws_action_mh_pattern *pattern);
+struct mlx5hws_action *mlx5_fc_get_hws_action(struct mlx5hws_context *ctx,
+ struct mlx5_fc *counter);
+void mlx5_fc_put_hws_action(struct mlx5_fc *counter);
+#endif /* __MLX5_FS_HWS_POOLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h
index 3c8635f286ce..30ccd635b505 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/internal.h
@@ -39,7 +39,6 @@
#define mlx5hws_dbg(ctx, arg...) mlx5_core_dbg((ctx)->mdev, ##arg)
#define MLX5HWS_TABLE_TYPE_BASE 2
-#define MLX5HWS_ACTION_STE_IDX_ANY 0
static inline bool is_mem_zero(const u8 *mem, size_t size)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
index 1bb3a6f8c3cd..80157a29a076 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c
@@ -165,14 +165,14 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher)
next->match_ste.rtc_0_id,
next->match_ste.rtc_1_id);
if (ret) {
- mlx5hws_err(tbl->ctx, "Failed to disconnect matcher\n");
- goto matcher_reconnect;
+ mlx5hws_err(tbl->ctx, "Fatal error, failed to disconnect matcher\n");
+ return ret;
}
} else {
ret = mlx5hws_table_connect_to_miss_table(tbl, tbl->default_miss.miss_tbl);
if (ret) {
- mlx5hws_err(tbl->ctx, "Failed to disconnect last matcher\n");
- goto matcher_reconnect;
+ mlx5hws_err(tbl->ctx, "Fatal error, failed to disconnect last matcher\n");
+ return ret;
}
}
@@ -180,27 +180,19 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher)
if (prev_ft_id == tbl->ft_id) {
ret = mlx5hws_table_update_connected_miss_tables(tbl);
if (ret) {
- mlx5hws_err(tbl->ctx, "Fatal error, failed to update connected miss table\n");
- goto matcher_reconnect;
+ mlx5hws_err(tbl->ctx,
+ "Fatal error, failed to update connected miss table\n");
+ return ret;
}
}
ret = mlx5hws_table_ft_set_default_next_ft(tbl, prev_ft_id);
if (ret) {
mlx5hws_err(tbl->ctx, "Fatal error, failed to restore matcher ft default miss\n");
- goto matcher_reconnect;
+ return ret;
}
return 0;
-
-matcher_reconnect:
- if (list_empty(&tbl->matchers_list) || !prev)
- list_add(&matcher->list_node, &tbl->matchers_list);
- else
- /* insert after prev matcher */
- list_add(&matcher->list_node, &prev->list_node);
-
- return ret;
}
static void hws_matcher_set_rtc_attr_sz(struct mlx5hws_matcher *matcher,
@@ -208,7 +200,7 @@ static void hws_matcher_set_rtc_attr_sz(struct mlx5hws_matcher *matcher,
enum mlx5hws_matcher_rtc_type rtc_type,
bool is_mirror)
{
- struct mlx5hws_pool_chunk *ste = &matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].ste;
+ struct mlx5hws_pool_chunk *ste = &matcher->action_ste.ste;
enum mlx5hws_matcher_flow_src flow_src = matcher->attr.optimize_flow_src;
bool is_match_rtc = rtc_type == HWS_MATCHER_RTC_TYPE_MATCH;
@@ -225,8 +217,7 @@ static void hws_matcher_set_rtc_attr_sz(struct mlx5hws_matcher *matcher,
}
static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
- enum mlx5hws_matcher_rtc_type rtc_type,
- u8 action_ste_selector)
+ enum mlx5hws_matcher_rtc_type rtc_type)
{
struct mlx5hws_matcher_attr *attr = &matcher->attr;
struct mlx5hws_cmd_rtc_create_attr rtc_attr = {0};
@@ -286,14 +277,19 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
break;
case HWS_MATCHER_RTC_TYPE_STE_ARRAY:
- action_ste = &matcher->action_ste[action_ste_selector];
+ action_ste = &matcher->action_ste;
rtc_0_id = &action_ste->rtc_0_id;
rtc_1_id = &action_ste->rtc_1_id;
ste_pool = action_ste->pool;
ste = &action_ste->ste;
+ /* Action RTC size calculation:
+ * log((max number of rules in matcher) *
+ * (max number of action STEs per rule) *
+ * (2 to support writing new STEs for update rule))
+ */
ste->order = ilog2(roundup_pow_of_two(action_ste->max_stes)) +
- attr->table.sz_row_log;
+ attr->table.sz_row_log + 1;
rtc_attr.log_size = ste->order;
rtc_attr.log_depth = 0;
rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET;
@@ -318,8 +314,8 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher,
hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, false);
/* STC is a single resource (obj_id), use any STC for the ID */
- stc_pool = ctx->stc_pool[tbl->type];
- default_stc = ctx->common_res[tbl->type].default_stc;
+ stc_pool = ctx->stc_pool;
+ default_stc = ctx->common_res.default_stc;
obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit);
rtc_attr.stc_base = obj_id;
@@ -358,8 +354,7 @@ free_ste:
}
static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher,
- enum mlx5hws_matcher_rtc_type rtc_type,
- u8 action_ste_selector)
+ enum mlx5hws_matcher_rtc_type rtc_type)
{
struct mlx5hws_matcher_action_ste *action_ste;
struct mlx5hws_table *tbl = matcher->tbl;
@@ -375,7 +370,7 @@ static void hws_matcher_destroy_rtc(struct mlx5hws_matcher *matcher,
ste = &matcher->match_ste.ste;
break;
case HWS_MATCHER_RTC_TYPE_STE_ARRAY:
- action_ste = &matcher->action_ste[action_ste_selector];
+ action_ste = &matcher->action_ste;
rtc_0_id = action_ste->rtc_0_id;
rtc_1_id = action_ste->rtc_1_id;
ste_pool = action_ste->pool;
@@ -466,20 +461,13 @@ static int hws_matcher_resize_init(struct mlx5hws_matcher *src_matcher)
if (!resize_data)
return -ENOMEM;
- resize_data->max_stes = src_matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes;
-
- resize_data->action_ste[0].stc = src_matcher->action_ste[0].stc;
- resize_data->action_ste[0].rtc_0_id = src_matcher->action_ste[0].rtc_0_id;
- resize_data->action_ste[0].rtc_1_id = src_matcher->action_ste[0].rtc_1_id;
- resize_data->action_ste[0].pool = src_matcher->action_ste[0].max_stes ?
- src_matcher->action_ste[0].pool :
- NULL;
- resize_data->action_ste[1].stc = src_matcher->action_ste[1].stc;
- resize_data->action_ste[1].rtc_0_id = src_matcher->action_ste[1].rtc_0_id;
- resize_data->action_ste[1].rtc_1_id = src_matcher->action_ste[1].rtc_1_id;
- resize_data->action_ste[1].pool = src_matcher->action_ste[1].max_stes ?
- src_matcher->action_ste[1].pool :
- NULL;
+ resize_data->max_stes = src_matcher->action_ste.max_stes;
+
+ resize_data->stc = src_matcher->action_ste.stc;
+ resize_data->rtc_0_id = src_matcher->action_ste.rtc_0_id;
+ resize_data->rtc_1_id = src_matcher->action_ste.rtc_1_id;
+ resize_data->pool = src_matcher->action_ste.max_stes ?
+ src_matcher->action_ste.pool : NULL;
/* Place the new resized matcher on the dst matcher's list */
list_add(&resize_data->list_node, &src_matcher->resize_dst->resize_data);
@@ -512,49 +500,68 @@ static void hws_matcher_resize_uninit(struct mlx5hws_matcher *matcher)
if (resize_data->max_stes) {
mlx5hws_action_free_single_stc(matcher->tbl->ctx,
matcher->tbl->type,
- &resize_data->action_ste[1].stc);
- mlx5hws_action_free_single_stc(matcher->tbl->ctx,
- matcher->tbl->type,
- &resize_data->action_ste[0].stc);
+ &resize_data->stc);
- if (matcher->tbl->type == MLX5HWS_TABLE_TYPE_FDB) {
+ if (matcher->tbl->type == MLX5HWS_TABLE_TYPE_FDB)
mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev,
- resize_data->action_ste[1].rtc_1_id);
- mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev,
- resize_data->action_ste[0].rtc_1_id);
- }
- mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev,
- resize_data->action_ste[1].rtc_0_id);
+ resize_data->rtc_1_id);
+
mlx5hws_cmd_rtc_destroy(matcher->tbl->ctx->mdev,
- resize_data->action_ste[0].rtc_0_id);
- if (resize_data->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].pool) {
- mlx5hws_pool_destroy(resize_data->action_ste[1].pool);
- mlx5hws_pool_destroy(resize_data->action_ste[0].pool);
- }
+ resize_data->rtc_0_id);
+
+ if (resize_data->pool)
+ mlx5hws_pool_destroy(resize_data->pool);
}
kfree(resize_data);
}
}
-static int
-hws_matcher_bind_at_idx(struct mlx5hws_matcher *matcher, u8 action_ste_selector)
+static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher)
{
+ bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt);
struct mlx5hws_cmd_stc_modify_attr stc_attr = {0};
struct mlx5hws_matcher_action_ste *action_ste;
struct mlx5hws_table *tbl = matcher->tbl;
struct mlx5hws_pool_attr pool_attr = {0};
struct mlx5hws_context *ctx = tbl->ctx;
- int ret;
+ u32 required_stes;
+ u8 max_stes = 0;
+ int i, ret;
+
+ if (matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)
+ return 0;
+
+ for (i = 0; i < matcher->num_of_at; i++) {
+ struct mlx5hws_action_template *at = &matcher->at[i];
+
+ ret = hws_matcher_check_and_process_at(matcher, at);
+ if (ret) {
+ mlx5hws_err(ctx, "Invalid at %d", i);
+ return ret;
+ }
+
+ required_stes = at->num_of_action_stes - (!is_jumbo || at->only_term);
+ max_stes = max(max_stes, required_stes);
+
+ /* Future: Optimize reparse */
+ }
+
+ /* There are no additional STEs required for matcher */
+ if (!max_stes)
+ return 0;
+
+ matcher->action_ste.max_stes = max_stes;
- action_ste = &matcher->action_ste[action_ste_selector];
+ action_ste = &matcher->action_ste;
/* Allocate action STE mempool */
pool_attr.table_type = tbl->type;
pool_attr.pool_type = MLX5HWS_POOL_TYPE_STE;
pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL;
+ /* Pool size is similar to action RTC size */
pool_attr.alloc_log_sz = ilog2(roundup_pow_of_two(action_ste->max_stes)) +
- matcher->attr.table.sz_row_log;
+ matcher->attr.table.sz_row_log + 1;
hws_matcher_set_pool_attr(&pool_attr, matcher);
action_ste->pool = mlx5hws_pool_create(ctx, &pool_attr);
if (!action_ste->pool) {
@@ -563,7 +570,7 @@ hws_matcher_bind_at_idx(struct mlx5hws_matcher *matcher, u8 action_ste_selector)
}
/* Allocate action RTC */
- ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY, action_ste_selector);
+ ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY);
if (ret) {
mlx5hws_err(ctx, "Failed to create action RTC\n");
goto free_ste_pool;
@@ -587,18 +594,18 @@ hws_matcher_bind_at_idx(struct mlx5hws_matcher *matcher, u8 action_ste_selector)
return 0;
free_rtc:
- hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY, action_ste_selector);
+ hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY);
free_ste_pool:
mlx5hws_pool_destroy(action_ste->pool);
return ret;
}
-static void hws_matcher_unbind_at_idx(struct mlx5hws_matcher *matcher, u8 action_ste_selector)
+static void hws_matcher_unbind_at(struct mlx5hws_matcher *matcher)
{
struct mlx5hws_matcher_action_ste *action_ste;
struct mlx5hws_table *tbl = matcher->tbl;
- action_ste = &matcher->action_ste[action_ste_selector];
+ action_ste = &matcher->action_ste;
if (!action_ste->max_stes ||
matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION ||
@@ -606,65 +613,10 @@ static void hws_matcher_unbind_at_idx(struct mlx5hws_matcher *matcher, u8 action
return;
mlx5hws_action_free_single_stc(tbl->ctx, tbl->type, &action_ste->stc);
- hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY, action_ste_selector);
+ hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_STE_ARRAY);
mlx5hws_pool_destroy(action_ste->pool);
}
-static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher)
-{
- bool is_jumbo = mlx5hws_matcher_mt_is_jumbo(matcher->mt);
- struct mlx5hws_table *tbl = matcher->tbl;
- struct mlx5hws_context *ctx = tbl->ctx;
- u32 required_stes;
- u8 max_stes = 0;
- int i, ret;
-
- if (matcher->flags & MLX5HWS_MATCHER_FLAGS_COLLISION)
- return 0;
-
- for (i = 0; i < matcher->num_of_at; i++) {
- struct mlx5hws_action_template *at = &matcher->at[i];
-
- ret = hws_matcher_check_and_process_at(matcher, at);
- if (ret) {
- mlx5hws_err(ctx, "Invalid at %d", i);
- return ret;
- }
-
- required_stes = at->num_of_action_stes - (!is_jumbo || at->only_term);
- max_stes = max(max_stes, required_stes);
-
- /* Future: Optimize reparse */
- }
-
- /* There are no additional STEs required for matcher */
- if (!max_stes)
- return 0;
-
- matcher->action_ste[0].max_stes = max_stes;
- matcher->action_ste[1].max_stes = max_stes;
-
- ret = hws_matcher_bind_at_idx(matcher, 0);
- if (ret)
- return ret;
-
- ret = hws_matcher_bind_at_idx(matcher, 1);
- if (ret)
- goto free_at_0;
-
- return 0;
-
-free_at_0:
- hws_matcher_unbind_at_idx(matcher, 0);
- return ret;
-}
-
-static void hws_matcher_unbind_at(struct mlx5hws_matcher *matcher)
-{
- hws_matcher_unbind_at_idx(matcher, 1);
- hws_matcher_unbind_at_idx(matcher, 0);
-}
-
static int hws_matcher_bind_mt(struct mlx5hws_matcher *matcher)
{
struct mlx5hws_context *ctx = matcher->tbl->ctx;
@@ -810,7 +762,7 @@ static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher)
goto unbind_at;
/* Allocate the RTC for the new matcher */
- ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH, 0);
+ ret = hws_matcher_create_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH);
if (ret)
goto destroy_end_ft;
@@ -822,7 +774,7 @@ static int hws_matcher_create_and_connect(struct mlx5hws_matcher *matcher)
return 0;
destroy_rtc:
- hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH, 0);
+ hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH);
destroy_end_ft:
hws_matcher_destroy_end_ft(matcher);
unbind_at:
@@ -836,7 +788,7 @@ static void hws_matcher_destroy_and_disconnect(struct mlx5hws_matcher *matcher)
{
hws_matcher_resize_uninit(matcher);
hws_matcher_disconnect(matcher);
- hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH, 0);
+ hws_matcher_destroy_rtc(matcher, HWS_MATCHER_RTC_TYPE_MATCH);
hws_matcher_destroy_end_ft(matcher);
hws_matcher_unbind_at(matcher);
hws_matcher_unbind_mt(matcher);
@@ -970,10 +922,9 @@ int mlx5hws_matcher_attach_at(struct mlx5hws_matcher *matcher,
return ret;
required_stes = at->num_of_action_stes - (!is_jumbo || at->only_term);
- if (matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes < required_stes) {
+ if (matcher->action_ste.max_stes < required_stes) {
mlx5hws_dbg(ctx, "Required STEs [%d] exceeds initial action template STE [%d]\n",
- required_stes,
- matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes);
+ required_stes, matcher->action_ste.max_stes);
return -ENOMEM;
}
@@ -1007,9 +958,9 @@ hws_matcher_set_templates(struct mlx5hws_matcher *matcher,
if (!matcher->mt)
return -ENOMEM;
- matcher->at = kcalloc(num_of_at + matcher->attr.max_num_of_at_attach,
- sizeof(*matcher->at),
- GFP_KERNEL);
+ matcher->at = kvcalloc(num_of_at + matcher->attr.max_num_of_at_attach,
+ sizeof(*matcher->at),
+ GFP_KERNEL);
if (!matcher->at) {
mlx5hws_err(ctx, "Failed to allocate action template array\n");
ret = -ENOMEM;
@@ -1035,7 +986,7 @@ free_mt:
static void
hws_matcher_unset_templates(struct mlx5hws_matcher *matcher)
{
- kfree(matcher->at);
+ kvfree(matcher->at);
kfree(matcher->mt);
}
@@ -1157,8 +1108,7 @@ static int hws_matcher_resize_precheck(struct mlx5hws_matcher *src_matcher,
return -EINVAL;
}
- if (src_matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes >
- dst_matcher->action_ste[0].max_stes) {
+ if (src_matcher->action_ste.max_stes > dst_matcher->action_ste.max_stes) {
mlx5hws_err(ctx, "Src/dst matcher max STEs mismatch\n");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
index 81ff487f57be..cff4ae854a79 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h
@@ -52,15 +52,11 @@ struct mlx5hws_matcher_action_ste {
u8 max_stes;
};
-struct mlx5hws_matcher_resize_data_node {
+struct mlx5hws_matcher_resize_data {
struct mlx5hws_pool_chunk stc;
u32 rtc_0_id;
u32 rtc_1_id;
struct mlx5hws_pool *pool;
-};
-
-struct mlx5hws_matcher_resize_data {
- struct mlx5hws_matcher_resize_data_node action_ste[2];
u8 max_stes;
struct list_head list_node;
};
@@ -78,7 +74,7 @@ struct mlx5hws_matcher {
struct mlx5hws_matcher *col_matcher;
struct mlx5hws_matcher *resize_dst;
struct mlx5hws_matcher_match_ste match_ste;
- struct mlx5hws_matcher_action_ste action_ste[2];
+ struct mlx5hws_matcher_action_ste action_ste;
struct list_head list_node;
struct list_head resize_data;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c
index 06db5e4726ae..d9dc4f2d0dc6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c
@@ -344,7 +344,7 @@ void mlx5hws_arg_write(struct mlx5hws_send_engine *queue,
mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len);
memset(wqe_ctrl, 0, wqe_len);
mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_arg, &wqe_len);
- memcpy(wqe_arg, arg_data, wqe_len);
+ memcpy(wqe_arg, arg_data, MLX5HWS_ARG_DATA_SIZE);
send_attr.id = arg_idx++;
mlx5hws_send_engine_post_end(&ctrl, &send_attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h
index 27ca93385b08..8ddb51980044 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h
@@ -31,7 +31,7 @@ struct mlx5hws_pattern_cache_item {
u8 *data;
u16 num_of_actions;
} mh_data;
- u32 refcount;
+ u32 refcount; /* protected by pattern_cache lock */
struct list_head ptrn_list_node;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
index fed2d913f3b8..50a81d360bb2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c
@@ -183,7 +183,7 @@ static int hws_pool_buddy_get_mem_chunk(struct mlx5hws_pool *pool,
*seg = -1;
/* Find the next free place from the buddy array */
- while (*seg == -1) {
+ while (*seg < 0) {
for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) {
buddy = hws_pool_buddy_get_next_buddy(pool, i,
order,
@@ -194,7 +194,7 @@ static int hws_pool_buddy_get_mem_chunk(struct mlx5hws_pool *pool,
}
*seg = mlx5hws_buddy_alloc_mem(buddy, order);
- if (*seg != -1)
+ if (*seg >= 0)
goto found;
if (pool->flags & MLX5HWS_POOL_FLAGS_ONE_RESOURCE) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/prm.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/prm.h
index de92cecbeb92..271490a51b96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/prm.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/prm.h
@@ -390,11 +390,6 @@ struct mlx5_ifc_definer_bits {
u8 match_mask[0x160];
};
-struct mlx5_ifc_arg_bits {
- u8 rsvd0[0x88];
- u8 access_pd[0x18];
-};
-
struct mlx5_ifc_header_modify_pattern_in_bits {
u8 modify_field_select[0x40];
@@ -428,11 +423,6 @@ struct mlx5_ifc_create_definer_in_bits {
struct mlx5_ifc_definer_bits definer;
};
-struct mlx5_ifc_create_arg_in_bits {
- struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
- struct mlx5_ifc_arg_bits arg;
-};
-
struct mlx5_ifc_create_header_modify_pattern_in_bits {
struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr;
struct mlx5_ifc_header_modify_pattern_in_bits pattern;
@@ -479,36 +469,4 @@ enum {
MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION_GOTO_TBL = 1,
};
-struct mlx5_ifc_alloc_packet_reformat_out_bits {
- u8 status[0x8];
- u8 reserved_at_8[0x18];
-
- u8 syndrome[0x20];
-
- u8 packet_reformat_id[0x20];
-
- u8 reserved_at_60[0x20];
-};
-
-struct mlx5_ifc_dealloc_packet_reformat_in_bits {
- u8 opcode[0x10];
- u8 reserved_at_10[0x10];
-
- u8 reserved_at_20[0x10];
- u8 op_mod[0x10];
-
- u8 packet_reformat_id[0x20];
-
- u8 reserved_at_60[0x20];
-};
-
-struct mlx5_ifc_dealloc_packet_reformat_out_bits {
- u8 status[0x8];
- u8 reserved_at_8[0x18];
-
- u8 syndrome[0x20];
-
- u8 reserved_at_40[0x40];
-};
-
#endif /* MLX5_PRM_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
index e20c67a04203..a27a2d5ffc7b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c
@@ -129,27 +129,18 @@ static void hws_rule_gen_comp(struct mlx5hws_send_engine *queue,
static void
hws_rule_save_resize_info(struct mlx5hws_rule *rule,
- struct mlx5hws_send_ste_attr *ste_attr,
- bool is_update)
+ struct mlx5hws_send_ste_attr *ste_attr)
{
if (!mlx5hws_matcher_is_resizable(rule->matcher))
return;
- if (likely(!is_update)) {
+ /* resize_info might already exist (if we're in update flow) */
+ if (likely(!rule->resize_info)) {
rule->resize_info = kzalloc(sizeof(*rule->resize_info), GFP_KERNEL);
if (unlikely(!rule->resize_info)) {
pr_warn("HWS: resize info isn't allocated for rule\n");
return;
}
-
- rule->resize_info->max_stes =
- rule->matcher->action_ste[MLX5HWS_ACTION_STE_IDX_ANY].max_stes;
- rule->resize_info->action_ste_pool[0] = rule->matcher->action_ste[0].max_stes ?
- rule->matcher->action_ste[0].pool :
- NULL;
- rule->resize_info->action_ste_pool[1] = rule->matcher->action_ste[1].max_stes ?
- rule->matcher->action_ste[1].pool :
- NULL;
}
memcpy(rule->resize_info->ctrl_seg, ste_attr->wqe_ctrl,
@@ -204,15 +195,14 @@ hws_rule_load_delete_info(struct mlx5hws_rule *rule,
}
}
-static int hws_rule_alloc_action_ste_idx(struct mlx5hws_rule *rule,
- u8 action_ste_selector)
+static int hws_rule_alloc_action_ste(struct mlx5hws_rule *rule)
{
struct mlx5hws_matcher *matcher = rule->matcher;
struct mlx5hws_matcher_action_ste *action_ste;
struct mlx5hws_pool_chunk ste = {0};
int ret;
- action_ste = &matcher->action_ste[action_ste_selector];
+ action_ste = &matcher->action_ste;
ste.order = ilog2(roundup_pow_of_two(action_ste->max_stes));
ret = mlx5hws_pool_chunk_alloc(action_ste->pool, &ste);
if (unlikely(ret)) {
@@ -220,68 +210,29 @@ static int hws_rule_alloc_action_ste_idx(struct mlx5hws_rule *rule,
"Failed to allocate STE for rule actions");
return ret;
}
- rule->action_ste_idx = ste.offset;
+
+ rule->action_ste.pool = matcher->action_ste.pool;
+ rule->action_ste.num_stes = matcher->action_ste.max_stes;
+ rule->action_ste.index = ste.offset;
return 0;
}
-static void hws_rule_free_action_ste_idx(struct mlx5hws_rule *rule,
- u8 action_ste_selector)
+void mlx5hws_rule_free_action_ste(struct mlx5hws_rule_action_ste_info *action_ste)
{
- struct mlx5hws_matcher *matcher = rule->matcher;
struct mlx5hws_pool_chunk ste = {0};
- struct mlx5hws_pool *pool;
- u8 max_stes;
-
- if (mlx5hws_matcher_is_resizable(matcher)) {
- /* Free the original action pool if rule was resized */
- max_stes = rule->resize_info->max_stes;
- pool = rule->resize_info->action_ste_pool[action_ste_selector];
- } else {
- max_stes = matcher->action_ste[action_ste_selector].max_stes;
- pool = matcher->action_ste[action_ste_selector].pool;
- }
-
- /* This release is safe only when the rule match part was deleted */
- ste.order = ilog2(roundup_pow_of_two(max_stes));
- ste.offset = rule->action_ste_idx;
-
- mlx5hws_pool_chunk_free(pool, &ste);
-}
-static int hws_rule_alloc_action_ste(struct mlx5hws_rule *rule,
- struct mlx5hws_rule_attr *attr)
-{
- int action_ste_idx;
- int ret;
-
- ret = hws_rule_alloc_action_ste_idx(rule, 0);
- if (unlikely(ret))
- return ret;
-
- action_ste_idx = rule->action_ste_idx;
-
- ret = hws_rule_alloc_action_ste_idx(rule, 1);
- if (unlikely(ret)) {
- hws_rule_free_action_ste_idx(rule, 0);
- return ret;
- }
-
- /* Both pools have to return the same index */
- if (unlikely(rule->action_ste_idx != action_ste_idx)) {
- pr_warn("HWS: allocation of action STE failed - pool indexes mismatch\n");
- return -EINVAL;
- }
+ if (!action_ste->num_stes)
+ return;
- return 0;
-}
+ ste.order = ilog2(roundup_pow_of_two(action_ste->num_stes));
+ ste.offset = action_ste->index;
-void mlx5hws_rule_free_action_ste(struct mlx5hws_rule *rule)
-{
- if (rule->action_ste_idx > -1) {
- hws_rule_free_action_ste_idx(rule, 1);
- hws_rule_free_action_ste_idx(rule, 0);
- }
+ /* This release is safe only when the rule match STE was deleted
+ * (when the rule is being deleted) or replaced with the new STE that
+ * isn't pointing to old action STEs (when the rule is being updated).
+ */
+ mlx5hws_pool_chunk_free(action_ste->pool, &ste);
}
static void hws_rule_create_init(struct mlx5hws_rule *rule,
@@ -298,14 +249,24 @@ static void hws_rule_create_init(struct mlx5hws_rule *rule,
/* In update we use these rtc's */
rule->rtc_0 = 0;
rule->rtc_1 = 0;
- rule->action_ste_selector = 0;
+
+ rule->action_ste.pool = NULL;
+ rule->action_ste.num_stes = 0;
+ rule->action_ste.index = -1;
+
+ rule->status = MLX5HWS_RULE_STATUS_CREATING;
} else {
- rule->action_ste_selector = !rule->action_ste_selector;
+ rule->status = MLX5HWS_RULE_STATUS_UPDATING;
}
+ /* Initialize the old action STE info - shallow-copy action_ste.
+ * In create flow this will set old_action_ste fields to initial values.
+ * In update flow this will save the existing action STE info,
+ * so that we will later use it to free old STEs.
+ */
+ rule->old_action_ste = rule->action_ste;
+
rule->pending_wqes = 0;
- rule->action_ste_idx = -1;
- rule->status = MLX5HWS_RULE_STATUS_CREATING;
/* Init default send STE attributes */
ste_attr->gta_opcode = MLX5HWS_WQE_GTA_OP_ACTIVATE;
@@ -315,8 +276,8 @@ static void hws_rule_create_init(struct mlx5hws_rule *rule,
/* Init default action apply */
apply->tbl_type = tbl->type;
- apply->common_res = &ctx->common_res[tbl->type];
- apply->jump_to_action_stc = matcher->action_ste[0].stc.offset;
+ apply->common_res = &ctx->common_res;
+ apply->jump_to_action_stc = matcher->action_ste.stc.offset;
apply->require_dep = 0;
}
@@ -332,8 +293,6 @@ static void hws_rule_move_init(struct mlx5hws_rule *rule,
rule->rtc_1 = 0;
rule->pending_wqes = 0;
- rule->action_ste_idx = -1;
- rule->action_ste_selector = 0;
rule->status = MLX5HWS_RULE_STATUS_CREATING;
rule->resize_info->state = MLX5HWS_RULE_RESIZE_STATE_WRITING;
}
@@ -394,21 +353,17 @@ static int hws_rule_create_hws(struct mlx5hws_rule *rule,
if (action_stes) {
/* Allocate action STEs for rules that need more than match STE */
- if (!is_update) {
- ret = hws_rule_alloc_action_ste(rule, attr);
- if (ret) {
- mlx5hws_err(ctx, "Failed to allocate action memory %d", ret);
- mlx5hws_send_abort_new_dep_wqe(queue);
- return ret;
- }
+ ret = hws_rule_alloc_action_ste(rule);
+ if (ret) {
+ mlx5hws_err(ctx, "Failed to allocate action memory %d", ret);
+ mlx5hws_send_abort_new_dep_wqe(queue);
+ return ret;
}
/* Skip RX/TX based on the dep_wqe init */
- ste_attr.rtc_0 = dep_wqe->rtc_0 ?
- matcher->action_ste[rule->action_ste_selector].rtc_0_id : 0;
- ste_attr.rtc_1 = dep_wqe->rtc_1 ?
- matcher->action_ste[rule->action_ste_selector].rtc_1_id : 0;
+ ste_attr.rtc_0 = dep_wqe->rtc_0 ? matcher->action_ste.rtc_0_id : 0;
+ ste_attr.rtc_1 = dep_wqe->rtc_1 ? matcher->action_ste.rtc_1_id : 0;
/* Action STEs are written to a specific index last to first */
- ste_attr.direct_index = rule->action_ste_idx + action_stes;
+ ste_attr.direct_index = rule->action_ste.index + action_stes;
apply.next_direct_idx = ste_attr.direct_index;
} else {
apply.next_direct_idx = 0;
@@ -459,7 +414,7 @@ static int hws_rule_create_hws(struct mlx5hws_rule *rule,
if (!is_update)
hws_rule_save_delete_info(rule, &ste_attr);
- hws_rule_save_resize_info(rule, &ste_attr, is_update);
+ hws_rule_save_resize_info(rule, &ste_attr);
mlx5hws_send_engine_inc_rule(queue);
if (!attr->burst)
@@ -480,7 +435,10 @@ static void hws_rule_destroy_failed_hws(struct mlx5hws_rule *rule,
attr->user_data, MLX5HWS_RULE_STATUS_DELETED);
/* Rule failed now we can safely release action STEs */
- mlx5hws_rule_free_action_ste(rule);
+ mlx5hws_rule_free_action_ste(&rule->action_ste);
+
+ /* Perhaps the rule failed updating - release old action STEs as well */
+ mlx5hws_rule_free_action_ste(&rule->old_action_ste);
/* Clear complex tag */
hws_rule_clear_delete_info(rule);
@@ -517,7 +475,8 @@ static int hws_rule_destroy_hws(struct mlx5hws_rule *rule,
}
/* Rule is not completed yet */
- if (rule->status == MLX5HWS_RULE_STATUS_CREATING)
+ if (rule->status == MLX5HWS_RULE_STATUS_CREATING ||
+ rule->status == MLX5HWS_RULE_STATUS_UPDATING)
return -EBUSY;
/* Rule failed and doesn't require cleanup */
@@ -534,7 +493,7 @@ static int hws_rule_destroy_hws(struct mlx5hws_rule *rule,
hws_rule_gen_comp(queue, rule, false,
attr->user_data, MLX5HWS_RULE_STATUS_DELETED);
- mlx5hws_rule_free_action_ste(rule);
+ mlx5hws_rule_free_action_ste(&rule->action_ste);
mlx5hws_rule_clear_resize_info(rule);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h
index 495cdd17e9f3..b5ee94ac449b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.h
@@ -15,6 +15,8 @@ enum mlx5hws_rule_status {
MLX5HWS_RULE_STATUS_UNKNOWN,
MLX5HWS_RULE_STATUS_CREATING,
MLX5HWS_RULE_STATUS_CREATED,
+ MLX5HWS_RULE_STATUS_UPDATING,
+ MLX5HWS_RULE_STATUS_UPDATED,
MLX5HWS_RULE_STATUS_DELETING,
MLX5HWS_RULE_STATUS_DELETED,
MLX5HWS_RULE_STATUS_FAILING,
@@ -41,13 +43,17 @@ struct mlx5hws_rule_match_tag {
};
};
+struct mlx5hws_rule_action_ste_info {
+ struct mlx5hws_pool *pool;
+ int index; /* STE array index */
+ u8 num_stes;
+};
+
struct mlx5hws_rule_resize_info {
- struct mlx5hws_pool *action_ste_pool[2];
u32 rtc_0;
u32 rtc_1;
u32 rule_idx;
u8 state;
- u8 max_stes;
u8 ctrl_seg[MLX5HWS_WQE_SZ_GTA_CTRL]; /* Ctrl segment of STE: 48 bytes */
u8 data_seg[MLX5HWS_WQE_SZ_GTA_DATA]; /* Data segment of STE: 64 bytes */
};
@@ -58,18 +64,18 @@ struct mlx5hws_rule {
struct mlx5hws_rule_match_tag tag;
struct mlx5hws_rule_resize_info *resize_info;
};
+ struct mlx5hws_rule_action_ste_info action_ste;
+ struct mlx5hws_rule_action_ste_info old_action_ste;
u32 rtc_0; /* The RTC into which the STE was inserted */
u32 rtc_1; /* The RTC into which the STE was inserted */
- int action_ste_idx; /* STE array index */
u8 status; /* enum mlx5hws_rule_status */
- u8 action_ste_selector; /* For rule update - which action STE is in use */
u8 pending_wqes;
bool skip_delete; /* For complex rules - another rule with same tag
* still exists, so don't actually delete this rule.
*/
};
-void mlx5hws_rule_free_action_ste(struct mlx5hws_rule *rule);
+void mlx5hws_rule_free_action_ste(struct mlx5hws_rule_action_ste_info *action_ste);
int mlx5hws_rule_move_hws_remove(struct mlx5hws_rule *rule,
void *queue, void *user_data);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c
index 20fe126ffd22..cb6abc4ab7df 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c
@@ -377,17 +377,25 @@ static void hws_send_engine_update_rule(struct mlx5hws_send_engine *queue,
*status = MLX5HWS_FLOW_OP_ERROR;
} else {
- /* Increase the status, this only works on good flow as the enum
- * is arrange it away creating -> created -> deleting -> deleted
+ /* Increase the status, this only works on good flow as
+ * the enum is arranged this way:
+ * - creating -> created
+ * - updating -> updated
+ * - deleting -> deleted
*/
priv->rule->status++;
*status = MLX5HWS_FLOW_OP_SUCCESS;
- /* Rule was deleted now we can safely release action STEs
- * and clear resize info
- */
if (priv->rule->status == MLX5HWS_RULE_STATUS_DELETED) {
- mlx5hws_rule_free_action_ste(priv->rule);
+ /* Rule was deleted, now we can safely release
+ * action STEs and clear resize info
+ */
+ mlx5hws_rule_free_action_ste(&priv->rule->action_ste);
mlx5hws_rule_clear_resize_info(priv->rule);
+ } else if (priv->rule->status == MLX5HWS_RULE_STATUS_UPDATED) {
+ /* Rule was updated, free the old action STEs */
+ mlx5hws_rule_free_action_ste(&priv->rule->old_action_ste);
+ /* Update completed - move the rule back to "created" */
+ priv->rule->status = MLX5HWS_RULE_STATUS_CREATED;
}
}
}
@@ -633,6 +641,7 @@ static int hws_send_ring_create_sq(struct mlx5_core_dev *mdev, u32 pdn,
MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST);
MLX5_SET(sqc, sqc, flush_in_error_en, 1);
+ MLX5_SET(sqc, sqc, non_wire, 1);
ts_format = mlx5_is_real_time_sq(mdev) ? MLX5_TIMESTAMP_FORMAT_REAL_TIME :
MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c
index 9576e02d00c3..ab1297531232 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c
@@ -37,6 +37,7 @@ static void hws_table_set_cap_attr(struct mlx5hws_table *tbl,
}
static int hws_table_up_default_fdb_miss_tbl(struct mlx5hws_table *tbl)
+__must_hold(&tbl->ctx->ctrl_lock)
{
struct mlx5hws_cmd_ft_create_attr ft_attr = {0};
struct mlx5hws_cmd_set_fte_attr fte_attr = {0};
@@ -48,8 +49,8 @@ static int hws_table_up_default_fdb_miss_tbl(struct mlx5hws_table *tbl)
if (tbl->type != MLX5HWS_TABLE_TYPE_FDB)
return 0;
- if (ctx->common_res[tbl_type].default_miss) {
- ctx->common_res[tbl_type].default_miss->refcount++;
+ if (ctx->common_res.default_miss) {
+ ctx->common_res.default_miss->refcount++;
return 0;
}
@@ -70,29 +71,28 @@ static int hws_table_up_default_fdb_miss_tbl(struct mlx5hws_table *tbl)
return -EINVAL;
}
- /* ctx->ctrl_lock must be held here */
- ctx->common_res[tbl_type].default_miss = default_miss;
- ctx->common_res[tbl_type].default_miss->refcount++;
+ ctx->common_res.default_miss = default_miss;
+ ctx->common_res.default_miss->refcount++;
return 0;
}
/* Called under ctx->ctrl_lock */
static void hws_table_down_default_fdb_miss_tbl(struct mlx5hws_table *tbl)
+__must_hold(&tbl->ctx->ctrl_lock)
{
struct mlx5hws_cmd_forward_tbl *default_miss;
struct mlx5hws_context *ctx = tbl->ctx;
- u8 tbl_type = tbl->type;
if (tbl->type != MLX5HWS_TABLE_TYPE_FDB)
return;
- default_miss = ctx->common_res[tbl_type].default_miss;
+ default_miss = ctx->common_res.default_miss;
if (--default_miss->refcount)
return;
mlx5hws_cmd_forward_tbl_destroy(ctx->mdev, default_miss);
- ctx->common_res[tbl_type].default_miss = NULL;
+ ctx->common_res.default_miss = NULL;
}
static int hws_table_connect_to_default_miss_tbl(struct mlx5hws_table *tbl, u32 ft_id)
@@ -478,15 +478,9 @@ int mlx5hws_table_set_default_miss(struct mlx5hws_table *tbl,
if (old_miss_tbl)
list_del_init(&tbl->default_miss.next);
- old_miss_tbl = tbl->default_miss.miss_tbl;
- if (old_miss_tbl)
- list_del_init(&old_miss_tbl->default_miss.head);
-
if (miss_tbl)
list_add(&tbl->default_miss.next, &miss_tbl->default_miss.head);
- mutex_unlock(&ctx->ctrl_lock);
- return 0;
out:
mutex_unlock(&ctx->ctrl_lock);
return ret;
diff --git a/drivers/net/ethernet/meta/fbnic/Makefile b/drivers/net/ethernet/meta/fbnic/Makefile
index 239b2258ec65..ea6214ca48e7 100644
--- a/drivers/net/ethernet/meta/fbnic/Makefile
+++ b/drivers/net/ethernet/meta/fbnic/Makefile
@@ -13,7 +13,6 @@ fbnic-y := fbnic_csr.o \
fbnic_ethtool.o \
fbnic_fw.o \
fbnic_hw_stats.o \
- fbnic_hwmon.o \
fbnic_irq.o \
fbnic_mac.o \
fbnic_netdev.o \
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h
index 14751f16e125..50f97f5399ff 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic.h
@@ -24,7 +24,6 @@ struct fbnic_dev {
struct device *dev;
struct net_device *netdev;
struct dentry *dbg_fbd;
- struct device *hwmon;
u32 __iomem *uc_addr0;
u32 __iomem *uc_addr4;
@@ -42,7 +41,6 @@ struct fbnic_dev {
struct fbnic_fw_mbx mbx[FBNIC_IPC_MBX_INDICES];
struct fbnic_fw_cap fw_cap;
- struct fbnic_fw_completion *cmpl_data;
/* Lock protecting Tx Mailbox queue to prevent possible races */
spinlock_t fw_tx_lock;
@@ -151,9 +149,6 @@ void fbnic_devlink_unregister(struct fbnic_dev *fbd);
int fbnic_fw_enable_mbx(struct fbnic_dev *fbd);
void fbnic_fw_disable_mbx(struct fbnic_dev *fbd);
-void fbnic_hwmon_register(struct fbnic_dev *fbd);
-void fbnic_hwmon_unregister(struct fbnic_dev *fbd);
-
int fbnic_pcs_irq_enable(struct fbnic_dev *fbd);
void fbnic_pcs_irq_disable(struct fbnic_dev *fbd);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h
index 7cd8841920e4..221faf8c6756 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h
@@ -44,13 +44,6 @@ struct fbnic_fw_cap {
u8 link_fec;
};
-struct fbnic_fw_completion {
- struct {
- s32 millivolts;
- s32 millidegrees;
- } tsene;
-};
-
void fbnic_mbx_init(struct fbnic_dev *fbd);
void fbnic_mbx_clean(struct fbnic_dev *fbd);
void fbnic_mbx_poll(struct fbnic_dev *fbd);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hwmon.c b/drivers/net/ethernet/meta/fbnic/fbnic_hwmon.c
deleted file mode 100644
index bcd1086e3768..000000000000
--- a/drivers/net/ethernet/meta/fbnic/fbnic_hwmon.c
+++ /dev/null
@@ -1,81 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) Meta Platforms, Inc. and affiliates. */
-
-#include <linux/hwmon.h>
-
-#include "fbnic.h"
-#include "fbnic_mac.h"
-
-static int fbnic_hwmon_sensor_id(enum hwmon_sensor_types type)
-{
- if (type == hwmon_temp)
- return FBNIC_SENSOR_TEMP;
- if (type == hwmon_in)
- return FBNIC_SENSOR_VOLTAGE;
-
- return -EOPNOTSUPP;
-}
-
-static umode_t fbnic_hwmon_is_visible(const void *drvdata,
- enum hwmon_sensor_types type,
- u32 attr, int channel)
-{
- if (type == hwmon_temp && attr == hwmon_temp_input)
- return 0444;
- if (type == hwmon_in && attr == hwmon_in_input)
- return 0444;
-
- return 0;
-}
-
-static int fbnic_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
- u32 attr, int channel, long *val)
-{
- struct fbnic_dev *fbd = dev_get_drvdata(dev);
- const struct fbnic_mac *mac = fbd->mac;
- int id;
-
- id = fbnic_hwmon_sensor_id(type);
- return id < 0 ? id : mac->get_sensor(fbd, id, val);
-}
-
-static const struct hwmon_ops fbnic_hwmon_ops = {
- .is_visible = fbnic_hwmon_is_visible,
- .read = fbnic_hwmon_read,
-};
-
-static const struct hwmon_channel_info *fbnic_hwmon_info[] = {
- HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT),
- HWMON_CHANNEL_INFO(in, HWMON_I_INPUT),
- NULL
-};
-
-static const struct hwmon_chip_info fbnic_chip_info = {
- .ops = &fbnic_hwmon_ops,
- .info = fbnic_hwmon_info,
-};
-
-void fbnic_hwmon_register(struct fbnic_dev *fbd)
-{
- if (!IS_REACHABLE(CONFIG_HWMON))
- return;
-
- fbd->hwmon = hwmon_device_register_with_info(fbd->dev, "fbnic",
- fbd, &fbnic_chip_info,
- NULL);
- if (IS_ERR(fbd->hwmon)) {
- dev_notice(fbd->dev,
- "Failed to register hwmon device %pe\n",
- fbd->hwmon);
- fbd->hwmon = NULL;
- }
-}
-
-void fbnic_hwmon_unregister(struct fbnic_dev *fbd)
-{
- if (!IS_REACHABLE(CONFIG_HWMON) || !fbd->hwmon)
- return;
-
- hwmon_device_unregister(fbd->hwmon);
- fbd->hwmon = NULL;
-}
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c
index 80b82ff12c4d..7b654d0a6dac 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c
@@ -686,27 +686,6 @@ fbnic_mac_get_eth_mac_stats(struct fbnic_dev *fbd, bool reset,
MAC_STAT_TX_BROADCAST);
}
-static int fbnic_mac_get_sensor_asic(struct fbnic_dev *fbd, int id, long *val)
-{
- struct fbnic_fw_completion fw_cmpl;
- s32 *sensor;
-
- switch (id) {
- case FBNIC_SENSOR_TEMP:
- sensor = &fw_cmpl.tsene.millidegrees;
- break;
- case FBNIC_SENSOR_VOLTAGE:
- sensor = &fw_cmpl.tsene.millivolts;
- break;
- default:
- return -EINVAL;
- }
-
- *val = *sensor;
-
- return 0;
-}
-
static const struct fbnic_mac fbnic_mac_asic = {
.init_regs = fbnic_mac_init_regs,
.pcs_enable = fbnic_pcs_enable_asic,
@@ -716,7 +695,6 @@ static const struct fbnic_mac fbnic_mac_asic = {
.get_eth_mac_stats = fbnic_mac_get_eth_mac_stats,
.link_down = fbnic_mac_link_down_asic,
.link_up = fbnic_mac_link_up_asic,
- .get_sensor = fbnic_mac_get_sensor_asic,
};
/**
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h
index 05a591653e09..476239a9d381 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h
@@ -47,11 +47,6 @@ enum {
#define FBNIC_LINK_MODE_PAM4 (FBNIC_LINK_50R1)
#define FBNIC_LINK_MODE_MASK (FBNIC_LINK_AUTO - 1)
-enum fbnic_sensor_id {
- FBNIC_SENSOR_TEMP, /* Temp in millidegrees Centigrade */
- FBNIC_SENSOR_VOLTAGE, /* Voltage in millivolts */
-};
-
/* This structure defines the interface hooks for the MAC. The MAC hooks
* will be configured as a const struct provided with a set of function
* pointers.
@@ -88,8 +83,6 @@ struct fbnic_mac {
void (*link_down)(struct fbnic_dev *fbd);
void (*link_up)(struct fbnic_dev *fbd, bool tx_pause, bool rx_pause);
-
- int (*get_sensor)(struct fbnic_dev *fbd, int id, long *val);
};
int fbnic_mac_init(struct fbnic_dev *fbd);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
index 6cbbc2ee3e1f..2c96980d150d 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c
@@ -296,8 +296,6 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Capture snapshot of hardware stats so netdev can calculate delta */
fbnic_reset_hw_stats(fbd);
- fbnic_hwmon_register(fbd);
-
if (!fbd->dsn) {
dev_warn(&pdev->dev, "Reading serial number failed\n");
goto init_failure_mode;
@@ -360,7 +358,6 @@ static void fbnic_remove(struct pci_dev *pdev)
fbnic_netdev_free(fbd);
}
- fbnic_hwmon_unregister(fbd);
fbnic_dbg_fbd_exit(fbd);
fbnic_devlink_unregister(fbd);
fbnic_fw_disable_mbx(fbd);
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
index bb54ce5f5787..d4d7027df9a0 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c
@@ -1033,7 +1033,7 @@ static int fbnic_poll(struct napi_struct *napi, int budget)
if (likely(napi_complete_done(napi, work_done)))
fbnic_nv_irq_rearm(nv);
- return 0;
+ return work_done;
}
irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data)
diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
index e0817f2a311a..7a194a8ab989 100644
--- a/drivers/net/ethernet/realtek/r8169.h
+++ b/drivers/net/ethernet/realtek/r8169.h
@@ -70,6 +70,7 @@ enum mac_version {
RTL_GIGA_MAC_VER_63,
RTL_GIGA_MAC_VER_64,
RTL_GIGA_MAC_VER_65,
+ RTL_GIGA_MAC_VER_66,
RTL_GIGA_MAC_VER_70,
RTL_GIGA_MAC_VER_71,
RTL_GIGA_MAC_NONE
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 5724f650f9c6..4b77f2151204 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -58,6 +58,7 @@
#define FIRMWARE_8125B_2 "rtl_nic/rtl8125b-2.fw"
#define FIRMWARE_8125D_1 "rtl_nic/rtl8125d-1.fw"
#define FIRMWARE_8125D_2 "rtl_nic/rtl8125d-2.fw"
+#define FIRMWARE_8125BP_2 "rtl_nic/rtl8125bp-2.fw"
#define FIRMWARE_8126A_2 "rtl_nic/rtl8126a-2.fw"
#define FIRMWARE_8126A_3 "rtl_nic/rtl8126a-3.fw"
@@ -142,6 +143,7 @@ static const struct {
[RTL_GIGA_MAC_VER_63] = {"RTL8125B", FIRMWARE_8125B_2},
[RTL_GIGA_MAC_VER_64] = {"RTL8125D", FIRMWARE_8125D_1},
[RTL_GIGA_MAC_VER_65] = {"RTL8125D", FIRMWARE_8125D_2},
+ [RTL_GIGA_MAC_VER_66] = {"RTL8125BP", FIRMWARE_8125BP_2},
[RTL_GIGA_MAC_VER_70] = {"RTL8126A", FIRMWARE_8126A_2},
[RTL_GIGA_MAC_VER_71] = {"RTL8126A", FIRMWARE_8126A_3},
};
@@ -632,6 +634,7 @@ enum rtl_dash_type {
RTL_DASH_NONE,
RTL_DASH_DP,
RTL_DASH_EP,
+ RTL_DASH_25_BP,
};
struct rtl8169_private {
@@ -709,6 +712,7 @@ MODULE_FIRMWARE(FIRMWARE_8125A_3);
MODULE_FIRMWARE(FIRMWARE_8125B_2);
MODULE_FIRMWARE(FIRMWARE_8125D_1);
MODULE_FIRMWARE(FIRMWARE_8125D_2);
+MODULE_FIRMWARE(FIRMWARE_8125BP_2);
MODULE_FIRMWARE(FIRMWARE_8126A_2);
MODULE_FIRMWARE(FIRMWARE_8126A_3);
@@ -1361,10 +1365,19 @@ static void rtl8168ep_driver_start(struct rtl8169_private *tp)
rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
}
+static void rtl8125bp_driver_start(struct rtl8169_private *tp)
+{
+ r8168ep_ocp_write(tp, 0x01, 0x14, OOB_CMD_DRIVER_START);
+ r8168ep_ocp_write(tp, 0x01, 0x18, 0x00);
+ r8168ep_ocp_write(tp, 0x01, 0x10, 0x01);
+}
+
static void rtl8168_driver_start(struct rtl8169_private *tp)
{
if (tp->dash_type == RTL_DASH_DP)
rtl8168dp_driver_start(tp);
+ else if (tp->dash_type == RTL_DASH_25_BP)
+ rtl8125bp_driver_start(tp);
else
rtl8168ep_driver_start(tp);
}
@@ -1385,10 +1398,19 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
}
+static void rtl8125bp_driver_stop(struct rtl8169_private *tp)
+{
+ r8168ep_ocp_write(tp, 0x01, 0x14, OOB_CMD_DRIVER_STOP);
+ r8168ep_ocp_write(tp, 0x01, 0x18, 0x00);
+ r8168ep_ocp_write(tp, 0x01, 0x10, 0x01);
+}
+
static void rtl8168_driver_stop(struct rtl8169_private *tp)
{
if (tp->dash_type == RTL_DASH_DP)
rtl8168dp_driver_stop(tp);
+ else if (tp->dash_type == RTL_DASH_25_BP)
+ rtl8125bp_driver_stop(tp);
else
rtl8168ep_driver_stop(tp);
}
@@ -1411,6 +1433,7 @@ static bool rtl_dash_is_enabled(struct rtl8169_private *tp)
case RTL_DASH_DP:
return r8168dp_check_dash(tp);
case RTL_DASH_EP:
+ case RTL_DASH_25_BP:
return r8168ep_check_dash(tp);
default:
return false;
@@ -1425,6 +1448,8 @@ static enum rtl_dash_type rtl_get_dash_type(struct rtl8169_private *tp)
return RTL_DASH_DP;
case RTL_GIGA_MAC_VER_51 ... RTL_GIGA_MAC_VER_53:
return RTL_DASH_EP;
+ case RTL_GIGA_MAC_VER_66:
+ return RTL_DASH_25_BP;
default:
return RTL_DASH_NONE;
}
@@ -2261,6 +2286,9 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii)
{ 0x7cf, 0x64a, RTL_GIGA_MAC_VER_71 },
{ 0x7cf, 0x649, RTL_GIGA_MAC_VER_70 },
+ /* 8125BP family. */
+ { 0x7cf, 0x681, RTL_GIGA_MAC_VER_66 },
+
/* 8125D family. */
{ 0x7cf, 0x689, RTL_GIGA_MAC_VER_65 },
{ 0x7cf, 0x688, RTL_GIGA_MAC_VER_64 },
@@ -3842,6 +3870,7 @@ static void rtl_hw_config(struct rtl8169_private *tp)
[RTL_GIGA_MAC_VER_63] = rtl_hw_start_8125b,
[RTL_GIGA_MAC_VER_64] = rtl_hw_start_8125d,
[RTL_GIGA_MAC_VER_65] = rtl_hw_start_8125d,
+ [RTL_GIGA_MAC_VER_66] = rtl_hw_start_8125d,
[RTL_GIGA_MAC_VER_70] = rtl_hw_start_8126a,
[RTL_GIGA_MAC_VER_71] = rtl_hw_start_8126a,
};
@@ -3861,6 +3890,7 @@ static void rtl_hw_start_8125(struct rtl8169_private *tp)
case RTL_GIGA_MAC_VER_61:
case RTL_GIGA_MAC_VER_64:
case RTL_GIGA_MAC_VER_65:
+ case RTL_GIGA_MAC_VER_66:
for (i = 0xa00; i < 0xb00; i += 4)
RTL_W32(tp, i, 0);
break;
diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c
index 968c8a2185a4..cf95e579c65d 100644
--- a/drivers/net/ethernet/realtek/r8169_phy_config.c
+++ b/drivers/net/ethernet/realtek/r8169_phy_config.c
@@ -1102,6 +1102,28 @@ static void rtl8125d_hw_phy_config(struct rtl8169_private *tp,
rtl8125_config_eee_phy(phydev);
}
+static void rtl8125bp_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ r8169_apply_firmware(tp);
+ rtl8168g_enable_gphy_10m(phydev);
+
+ r8168g_phy_param(phydev, 0x8010, 0x0800, 0x0000);
+
+ phy_write(phydev, 0x1f, 0x0b87);
+ phy_write(phydev, 0x16, 0x8088);
+ phy_modify(phydev, 0x17, 0xff00, 0x9000);
+ phy_write(phydev, 0x16, 0x808f);
+ phy_modify(phydev, 0x17, 0xff00, 0x9000);
+ phy_write(phydev, 0x1f, 0x0000);
+
+ r8168g_phy_param(phydev, 0x8174, 0x2000, 0x1800);
+
+ rtl8125_legacy_force_mode(phydev);
+ rtl8168g_disable_aldps(phydev);
+ rtl8125_config_eee_phy(phydev);
+}
+
static void rtl8126a_hw_phy_config(struct rtl8169_private *tp,
struct phy_device *phydev)
{
@@ -1163,6 +1185,7 @@ void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev,
[RTL_GIGA_MAC_VER_63] = rtl8125b_hw_phy_config,
[RTL_GIGA_MAC_VER_64] = rtl8125d_hw_phy_config,
[RTL_GIGA_MAC_VER_65] = rtl8125d_hw_phy_config,
+ [RTL_GIGA_MAC_VER_66] = rtl8125bp_hw_phy_config,
[RTL_GIGA_MAC_VER_70] = rtl8126a_hw_phy_config,
[RTL_GIGA_MAC_VER_71] = rtl8126a_hw_phy_config,
};
diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c
index 585d0b21c9e0..3bd11cb56294 100644
--- a/drivers/net/ethernet/realtek/rtase/rtase_main.c
+++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c
@@ -1828,7 +1828,7 @@ static int rtase_alloc_msix(struct pci_dev *pdev, struct rtase_private *tp)
for (i = 0; i < tp->int_nums; i++) {
irq = pci_irq_vector(pdev, i);
- if (!irq) {
+ if (irq < 0) {
pci_disable_msix(pdev);
return irq;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index eabc4da9e1a9..de9b6dfef15b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -313,7 +313,6 @@ static void sti_dwmac_remove(struct platform_device *pdev)
clk_disable_unprepare(dwmac->clk);
}
-#ifdef CONFIG_PM_SLEEP
static int sti_dwmac_suspend(struct device *dev)
{
struct sti_dwmac *dwmac = get_stmmac_bsp_priv(dev);
@@ -333,10 +332,9 @@ static int sti_dwmac_resume(struct device *dev)
return stmmac_resume(dev);
}
-#endif /* CONFIG_PM_SLEEP */
-static SIMPLE_DEV_PM_OPS(sti_dwmac_pm_ops, sti_dwmac_suspend,
- sti_dwmac_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(sti_dwmac_pm_ops, sti_dwmac_suspend,
+ sti_dwmac_resume);
static const struct sti_dwmac_of_data stih4xx_dwmac_data = {
.fix_retime_src = stih4xx_fix_retime_src,
@@ -353,7 +351,7 @@ static struct platform_driver sti_dwmac_driver = {
.remove = sti_dwmac_remove,
.driver = {
.name = "sti-dwmac",
- .pm = &sti_dwmac_pm_ops,
+ .pm = pm_sleep_ptr(&sti_dwmac_pm_ops),
.of_match_table = sti_dwmac_match,
},
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
index 3827997d2132..dc903b846b1b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/iommu.h>
#include <linux/platform_device.h>
#include <linux/of.h>
#include <linux/module.h>
@@ -19,6 +20,8 @@ struct tegra_mgbe {
struct reset_control *rst_mac;
struct reset_control *rst_pcs;
+ u32 iommu_sid;
+
void __iomem *hv;
void __iomem *regs;
void __iomem *xpcs;
@@ -50,7 +53,6 @@ struct tegra_mgbe {
#define MGBE_WRAP_COMMON_INTR_ENABLE 0x8704
#define MAC_SBD_INTR BIT(2)
#define MGBE_WRAP_AXI_ASID0_CTRL 0x8400
-#define MGBE_SID 0x6
static int __maybe_unused tegra_mgbe_suspend(struct device *dev)
{
@@ -84,7 +86,7 @@ static int __maybe_unused tegra_mgbe_resume(struct device *dev)
writel(MAC_SBD_INTR, mgbe->regs + MGBE_WRAP_COMMON_INTR_ENABLE);
/* Program SID */
- writel(MGBE_SID, mgbe->hv + MGBE_WRAP_AXI_ASID0_CTRL);
+ writel(mgbe->iommu_sid, mgbe->hv + MGBE_WRAP_AXI_ASID0_CTRL);
value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_STATUS);
if ((value & XPCS_WRAP_UPHY_STATUS_TX_P_UP) == 0) {
@@ -241,6 +243,12 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
if (IS_ERR(mgbe->xpcs))
return PTR_ERR(mgbe->xpcs);
+ /* get controller's stream id from iommu property in device tree */
+ if (!tegra_dev_iommu_get_stream_id(mgbe->dev, &mgbe->iommu_sid)) {
+ dev_err(mgbe->dev, "failed to get iommu stream id\n");
+ return -EINVAL;
+ }
+
res.addr = mgbe->regs;
res.irq = irq;
@@ -346,7 +354,7 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
writel(MAC_SBD_INTR, mgbe->regs + MGBE_WRAP_COMMON_INTR_ENABLE);
/* Program SID */
- writel(MGBE_SID, mgbe->hv + MGBE_WRAP_AXI_ASID0_CTRL);
+ writel(mgbe->iommu_sid, mgbe->hv + MGBE_WRAP_AXI_ASID0_CTRL);
plat->flags |= STMMAC_FLAG_SERDES_UP_AFTER_PHY_LINKUP;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index c36f90a782c5..9ed8620580a8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -420,10 +420,10 @@ static void dwmac4_set_eee_pls(struct mac_device_info *hw, int link)
writel(value, ioaddr + GMAC4_LPI_CTRL_STATUS);
}
-static void dwmac4_set_eee_lpi_entry_timer(struct mac_device_info *hw, int et)
+static void dwmac4_set_eee_lpi_entry_timer(struct mac_device_info *hw, u32 et)
{
void __iomem *ioaddr = hw->pcsr;
- int value = et & STMMAC_ET_MAX;
+ u32 value = et & STMMAC_ET_MAX;
int regval;
/* Program LPI entry timer value into register */
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 2f7295b6c1c5..0f200b72c225 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -363,7 +363,7 @@ struct stmmac_ops {
void (*set_eee_mode)(struct mac_device_info *hw,
bool en_tx_lpi_clockgating);
void (*reset_eee_mode)(struct mac_device_info *hw);
- void (*set_eee_lpi_entry_timer)(struct mac_device_info *hw, int et);
+ void (*set_eee_lpi_entry_timer)(struct mac_device_info *hw, u32 et);
void (*set_eee_timer)(struct mac_device_info *hw, int ls, int tw);
void (*set_eee_pls)(struct mac_device_info *hw, int link);
void (*debug)(struct stmmac_priv *priv, void __iomem *ioaddr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index b8d631e559c0..e8dbce20129c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -305,11 +305,9 @@ struct stmmac_priv {
int clk_csr;
struct timer_list eee_ctrl_timer;
int lpi_irq;
- int eee_enabled;
- int eee_active;
- int tx_lpi_timer;
- int tx_lpi_enabled;
- int eee_tw_timer;
+ u32 tx_lpi_timer;
+ bool eee_enabled;
+ bool eee_active;
bool eee_sw_timer_en;
unsigned int mode;
unsigned int chain_mode;
@@ -405,8 +403,6 @@ void stmmac_dvr_remove(struct device *dev);
int stmmac_dvr_probe(struct device *device,
struct plat_stmmacenet_data *plat_dat,
struct stmmac_resources *res);
-void stmmac_disable_eee_mode(struct stmmac_priv *priv);
-bool stmmac_eee_init(struct stmmac_priv *priv);
int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt);
int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size);
int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled);
@@ -416,14 +412,6 @@ static inline bool stmmac_xdp_is_enabled(struct stmmac_priv *priv)
return !!priv->xdp_prog;
}
-static inline unsigned int stmmac_rx_offset(struct stmmac_priv *priv)
-{
- if (stmmac_xdp_is_enabled(priv))
- return XDP_PACKET_HEADROOM;
-
- return 0;
-}
-
void stmmac_disable_rx_queue(struct stmmac_priv *priv, u32 queue);
void stmmac_enable_rx_queue(struct stmmac_priv *priv, u32 queue);
void stmmac_disable_tx_queue(struct stmmac_priv *priv, u32 queue);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 16b4d8c21c90..918a32f8fda8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -654,7 +654,7 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
(*(u32 *)p);
}
}
- if (priv->eee_enabled) {
+ if (priv->dma_cap.eee) {
int val = phylink_get_eee_err(priv->phylink);
if (val)
priv->xstats.phy_eee_wakeup_error_n = val;
@@ -898,9 +898,6 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev,
if (!priv->dma_cap.eee)
return -EOPNOTSUPP;
- edata->tx_lpi_timer = priv->tx_lpi_timer;
- edata->tx_lpi_enabled = priv->tx_lpi_enabled;
-
return phylink_ethtool_get_eee(priv->phylink, edata);
}
@@ -908,29 +905,11 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
struct ethtool_keee *edata)
{
struct stmmac_priv *priv = netdev_priv(dev);
- int ret;
if (!priv->dma_cap.eee)
return -EOPNOTSUPP;
- if (priv->tx_lpi_enabled != edata->tx_lpi_enabled)
- netdev_warn(priv->dev,
- "Setting EEE tx-lpi is not supported\n");
-
- if (!edata->eee_enabled)
- stmmac_disable_eee_mode(priv);
-
- ret = phylink_ethtool_set_eee(priv->phylink, edata);
- if (ret)
- return ret;
-
- if (edata->eee_enabled &&
- priv->tx_lpi_timer != edata->tx_lpi_timer) {
- priv->tx_lpi_timer = edata->tx_lpi_timer;
- stmmac_eee_init(priv);
- }
-
- return 0;
+ return phylink_ethtool_set_eee(priv->phylink, edata);
}
static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 99eaec8bac4a..58b013528dea 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -111,8 +111,8 @@ static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
NETIF_MSG_IFDOWN | NETIF_MSG_TIMER);
#define STMMAC_DEFAULT_LPI_TIMER 1000
-static int eee_timer = STMMAC_DEFAULT_LPI_TIMER;
-module_param(eee_timer, int, 0644);
+static unsigned int eee_timer = STMMAC_DEFAULT_LPI_TIMER;
+module_param(eee_timer, uint, 0644);
MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
#define STMMAC_LPI_T(x) (jiffies + usecs_to_jiffies(x))
@@ -194,8 +194,6 @@ static void stmmac_verify_args(void)
flow_ctrl = FLOW_OFF;
if (unlikely((pause < 0) || (pause > 0xffff)))
pause = PAUSE_TIME;
- if (eee_timer < 0)
- eee_timer = STMMAC_DEFAULT_LPI_TIMER;
}
static void __stmmac_disable_all_queues(struct stmmac_priv *priv)
@@ -392,14 +390,14 @@ static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv, u32 queue)
return dirty;
}
-static void stmmac_lpi_entry_timer_config(struct stmmac_priv *priv, bool en)
+static void stmmac_disable_hw_lpi_timer(struct stmmac_priv *priv)
{
- int tx_lpi_timer;
+ stmmac_set_eee_lpi_timer(priv, priv->hw, 0);
+}
- /* Clear/set the SW EEE timer flag based on LPI ET enablement */
- priv->eee_sw_timer_en = en ? 0 : 1;
- tx_lpi_timer = en ? priv->tx_lpi_timer : 0;
- stmmac_set_eee_lpi_timer(priv, priv->hw, tx_lpi_timer);
+static void stmmac_enable_hw_lpi_timer(struct stmmac_priv *priv)
+{
+ stmmac_set_eee_lpi_timer(priv, priv->hw, priv->tx_lpi_timer);
}
/**
@@ -429,18 +427,13 @@ static int stmmac_enable_eee_mode(struct stmmac_priv *priv)
}
/**
- * stmmac_disable_eee_mode - disable and exit from LPI mode
+ * stmmac_disable_sw_eee_mode - disable and exit from LPI mode
* @priv: driver private structure
* Description: this function is to exit and disable EEE in case of
* LPI state is true. This is called by the xmit.
*/
-void stmmac_disable_eee_mode(struct stmmac_priv *priv)
+static void stmmac_disable_sw_eee_mode(struct stmmac_priv *priv)
{
- if (!priv->eee_sw_timer_en) {
- stmmac_lpi_entry_timer_config(priv, 0);
- return;
- }
-
stmmac_reset_eee_mode(priv, priv->hw);
del_timer_sync(&priv->eee_ctrl_timer);
priv->tx_path_in_lpi_mode = false;
@@ -464,18 +457,21 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t)
/**
* stmmac_eee_init - init EEE
* @priv: driver private structure
+ * @active: indicates whether EEE should be enabled.
* Description:
* if the GMAC supports the EEE (from the HW cap reg) and the phy device
* can also manage EEE, this function enable the LPI state and start related
* timer.
*/
-bool stmmac_eee_init(struct stmmac_priv *priv)
+static void stmmac_eee_init(struct stmmac_priv *priv, bool active)
{
- int eee_tw_timer = priv->eee_tw_timer;
+ priv->eee_active = active;
/* Check if MAC core supports the EEE feature. */
- if (!priv->dma_cap.eee)
- return false;
+ if (!priv->dma_cap.eee) {
+ priv->eee_enabled = false;
+ return;
+ }
mutex_lock(&priv->lock);
@@ -483,22 +479,24 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
if (!priv->eee_active) {
if (priv->eee_enabled) {
netdev_dbg(priv->dev, "disable EEE\n");
- stmmac_lpi_entry_timer_config(priv, 0);
+ priv->eee_sw_timer_en = true;
+ stmmac_disable_hw_lpi_timer(priv);
del_timer_sync(&priv->eee_ctrl_timer);
- stmmac_set_eee_timer(priv, priv->hw, 0, eee_tw_timer);
+ stmmac_set_eee_timer(priv, priv->hw, 0,
+ STMMAC_DEFAULT_TWT_LS);
if (priv->hw->xpcs)
xpcs_config_eee(priv->hw->xpcs,
priv->plat->mult_fact_100ns,
false);
}
+ priv->eee_enabled = false;
mutex_unlock(&priv->lock);
- return false;
+ return;
}
if (priv->eee_active && !priv->eee_enabled) {
- timer_setup(&priv->eee_ctrl_timer, stmmac_eee_ctrl_timer, 0);
stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS,
- eee_tw_timer);
+ STMMAC_DEFAULT_TWT_LS);
if (priv->hw->xpcs)
xpcs_config_eee(priv->hw->xpcs,
priv->plat->mult_fact_100ns,
@@ -506,18 +504,23 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
}
if (priv->plat->has_gmac4 && priv->tx_lpi_timer <= STMMAC_ET_MAX) {
+ /* Use hardware LPI mode */
del_timer_sync(&priv->eee_ctrl_timer);
priv->tx_path_in_lpi_mode = false;
- stmmac_lpi_entry_timer_config(priv, 1);
+ priv->eee_sw_timer_en = false;
+ stmmac_enable_hw_lpi_timer(priv);
} else {
- stmmac_lpi_entry_timer_config(priv, 0);
+ /* Use software LPI mode */
+ priv->eee_sw_timer_en = true;
+ stmmac_disable_hw_lpi_timer(priv);
mod_timer(&priv->eee_ctrl_timer,
STMMAC_LPI_T(priv->tx_lpi_timer));
}
+ priv->eee_enabled = true;
+
mutex_unlock(&priv->lock);
netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n");
- return true;
}
/* stmmac_get_tx_hwtstamp - get HW TX timestamps
@@ -974,9 +977,7 @@ static void stmmac_mac_link_down(struct phylink_config *config,
struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev));
stmmac_mac_set(priv, priv->ioaddr, false);
- priv->eee_active = false;
- priv->tx_lpi_enabled = false;
- priv->eee_enabled = stmmac_eee_init(priv);
+ stmmac_eee_init(priv, false);
stmmac_set_eee_pls(priv, priv->hw, false);
if (stmmac_fpe_supported(priv))
@@ -1085,11 +1086,10 @@ static void stmmac_mac_link_up(struct phylink_config *config,
stmmac_mac_set(priv, priv->ioaddr, true);
if (phy && priv->dma_cap.eee) {
- priv->eee_active =
- phy_init_eee(phy, !(priv->plat->flags &
- STMMAC_FLAG_RX_CLK_RUNS_IN_LPI)) >= 0;
- priv->eee_enabled = stmmac_eee_init(priv);
- priv->tx_lpi_enabled = priv->eee_enabled;
+ phy_eee_rx_clock_stop(phy, !(priv->plat->flags &
+ STMMAC_FLAG_RX_CLK_RUNS_IN_LPI));
+ priv->tx_lpi_timer = phy->eee_cfg.tx_lpi_timer;
+ stmmac_eee_init(priv, phy->enable_tx_lpi);
stmmac_set_eee_pls(priv, priv->hw, true);
}
@@ -1187,6 +1187,16 @@ static int stmmac_init_phy(struct net_device *dev)
ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0);
}
+ if (ret == 0) {
+ struct ethtool_keee eee;
+
+ /* Configure phylib's copy of the LPI timer */
+ if (!phylink_ethtool_get_eee(priv->phylink, &eee)) {
+ eee.tx_lpi_timer = priv->tx_lpi_timer;
+ phylink_ethtool_set_eee(priv->phylink, &eee);
+ }
+ }
+
if (!priv->plat->pmt) {
struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
@@ -1203,6 +1213,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
struct stmmac_mdio_bus_data *mdio_bus_data;
int mode = priv->plat->phy_interface;
struct fwnode_handle *fwnode;
+ struct phylink_pcs *pcs;
struct phylink *phylink;
priv->phylink_config.dev = &priv->dev->dev;
@@ -1224,8 +1235,14 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
/* If we have an xpcs, it defines which PHY interfaces are supported. */
if (priv->hw->xpcs)
- xpcs_get_interfaces(priv->hw->xpcs,
- priv->phylink_config.supported_interfaces);
+ pcs = xpcs_to_phylink_pcs(priv->hw->xpcs);
+ else
+ pcs = priv->hw->phylink_pcs;
+
+ if (pcs)
+ phy_interface_or(priv->phylink_config.supported_interfaces,
+ priv->phylink_config.supported_interfaces,
+ pcs->supported_interfaces);
fwnode = priv->plat->port_node;
if (!fwnode)
@@ -1308,6 +1325,14 @@ static void stmmac_display_rings(struct stmmac_priv *priv,
stmmac_display_tx_rings(priv, dma_conf);
}
+static unsigned int stmmac_rx_offset(struct stmmac_priv *priv)
+{
+ if (stmmac_xdp_is_enabled(priv))
+ return XDP_PACKET_HEADROOM;
+
+ return 0;
+}
+
static int stmmac_set_bfsize(int mtu, int bufsize)
{
int ret = bufsize;
@@ -3437,12 +3462,6 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register)
else if (ptp_register)
stmmac_ptp_register(priv);
- priv->eee_tw_timer = STMMAC_DEFAULT_TWT_LS;
-
- /* Convert the timer from msec to usec */
- if (!priv->tx_lpi_timer)
- priv->tx_lpi_timer = eee_timer * 1000;
-
if (priv->use_riwt) {
u32 queue;
@@ -3909,6 +3928,10 @@ static int __stmmac_open(struct net_device *dev,
u32 chan;
int ret;
+ /* Initialise the tx lpi timer, converting from msec to usec */
+ if (!priv->tx_lpi_timer)
+ priv->tx_lpi_timer = eee_timer * 1000;
+
ret = pm_runtime_resume_and_get(priv->device);
if (ret < 0)
return ret;
@@ -4023,11 +4046,6 @@ static int stmmac_release(struct net_device *dev)
/* Free the IRQ lines */
stmmac_free_irq(dev, REQ_IRQ_ERR_ALL, 0);
- if (priv->eee_enabled) {
- priv->tx_path_in_lpi_mode = false;
- del_timer_sync(&priv->eee_ctrl_timer);
- }
-
/* Stop TX/RX DMA and clear the descriptors */
stmmac_stop_all_dma(priv);
@@ -4479,7 +4497,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
first_tx = tx_q->cur_tx;
if (priv->tx_path_in_lpi_mode && priv->eee_sw_timer_en)
- stmmac_disable_eee_mode(priv);
+ stmmac_disable_sw_eee_mode(priv);
/* Manage oversized TCP frames for GMAC4 device */
if (skb_is_gso(skb) && priv->tso) {
@@ -5460,7 +5478,7 @@ read_again:
if (priv->extend_desc)
stmmac_rx_extended_status(priv, &priv->xstats, rx_q->dma_erx + entry);
if (unlikely(status == discard_frame)) {
- page_pool_recycle_direct(rx_q->page_pool, buf->page);
+ page_pool_put_page(rx_q->page_pool, buf->page, 0, true);
buf->page = NULL;
error = 1;
if (!priv->hwts_rx_en)
@@ -7394,6 +7412,8 @@ int stmmac_dvr_probe(struct device *device,
INIT_WORK(&priv->service_task, stmmac_service_task);
+ timer_setup(&priv->eee_ctrl_timer, stmmac_eee_ctrl_timer, 0);
+
/* Override with kernel parameters if supplied XXX CRS XXX
* this needs to have multiple instances
*/
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 5465bf872734..dcb6662b473d 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -762,7 +762,7 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common)
ALE_DEFAULT_THREAD_ID, 0);
cpsw_ale_control_set(common->ale, HOST_PORT_NUM,
ALE_DEFAULT_THREAD_ENABLE, 1);
- /* switch to vlan unaware mode */
+ /* switch to vlan aware mode */
cpsw_ale_control_set(common->ale, HOST_PORT_NUM, ALE_VLAN_AWARE, 1);
cpsw_ale_control_set(common->ale, HOST_PORT_NUM,
ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 1e290ee8edfd..0cb6fa6e5b7d 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -686,7 +686,7 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
soft_reset("cpsw", &cpsw->regs->soft_reset);
cpsw_ale_start(cpsw->ale);
- /* switch to vlan unaware mode */
+ /* switch to vlan aware mode */
cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_VLAN_AWARE,
CPSW_ALE_VLAN_AWARE);
control_reg = readl(&cpsw->regs->control);
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index be4d90c1cbe7..cec0a90659d9 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -554,7 +554,7 @@ static void cpsw_init_host_port(struct cpsw_priv *priv)
soft_reset("cpsw", &cpsw->regs->soft_reset);
cpsw_ale_start(cpsw->ale);
- /* switch to vlan unaware mode */
+ /* switch to vlan aware mode */
cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_VLAN_AWARE,
CPSW_ALE_VLAN_AWARE);
control_reg = readl(&cpsw->regs->control);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 1bf9c38e4125..deaf670c160e 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -334,27 +334,25 @@ int wx_host_interface_command(struct wx *wx, u32 *buffer,
status = read_poll_timeout(rd32, hicr, hicr & WX_MNG_MBOX_CTL_FWRDY, 1000,
timeout * 1000, false, wx, WX_MNG_MBOX_CTL);
+ buf[0] = rd32(wx, WX_MNG_MBOX);
+ if ((buf[0] & 0xff0000) >> 16 == 0x80) {
+ wx_err(wx, "Unknown FW command: 0x%x\n", buffer[0] & 0xff);
+ status = -EINVAL;
+ goto rel_out;
+ }
+
/* Check command completion */
if (status) {
- wx_dbg(wx, "Command has failed with no status valid.\n");
-
- buf[0] = rd32(wx, WX_MNG_MBOX);
- if ((buffer[0] & 0xff) != (~buf[0] >> 24)) {
- status = -EINVAL;
- goto rel_out;
- }
- if ((buf[0] & 0xff0000) >> 16 == 0x80) {
- wx_dbg(wx, "It's unknown cmd.\n");
- status = -EINVAL;
- goto rel_out;
- }
-
+ wx_err(wx, "Command has failed with no status valid.\n");
wx_dbg(wx, "write value:\n");
for (i = 0; i < dword_len; i++)
wx_dbg(wx, "%x ", buffer[i]);
wx_dbg(wx, "read value:\n");
for (i = 0; i < dword_len; i++)
wx_dbg(wx, "%x ", buf[i]);
+ wx_dbg(wx, "\ncheck: %x %x\n", buffer[0] & 0xff, ~buf[0] >> 24);
+
+ goto rel_out;
}
if (!return_data)
diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index e685a7f946f0..753215ebc67c 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -3072,7 +3072,11 @@ static int ca8210_probe(struct spi_device *spi_device)
spi_set_drvdata(priv->spi, priv);
if (IS_ENABLED(CONFIG_IEEE802154_CA8210_DEBUGFS)) {
cascoda_api_upstream = ca8210_test_int_driver_write;
- ca8210_test_interface_init(priv);
+ ret = ca8210_test_interface_init(priv);
+ if (ret) {
+ dev_crit(&spi_device->dev, "ca8210_test_interface_init failed\n");
+ goto error;
+ }
} else {
cascoda_api_upstream = NULL;
}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index ee2c3cf4df36..da3a97a65507 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -799,6 +799,12 @@ static int ipvlan_device_event(struct notifier_block *unused,
case NETDEV_PRE_TYPE_CHANGE:
/* Forbid underlying device to change its type. */
return NOTIFY_BAD;
+
+ case NETDEV_NOTIFY_PEERS:
+ case NETDEV_BONDING_FAILOVER:
+ case NETDEV_RESEND_IGMP:
+ list_for_each_entry(ipvlan, &port->ipvlans, pnode)
+ call_netdevice_notifiers(event, ipvlan->dev);
}
return NOTIFY_DONE;
}
diff --git a/drivers/net/mctp/mctp-i3c.c b/drivers/net/mctp/mctp-i3c.c
index 9adad59b8676..d247fe483c58 100644
--- a/drivers/net/mctp/mctp-i3c.c
+++ b/drivers/net/mctp/mctp-i3c.c
@@ -125,6 +125,8 @@ static int mctp_i3c_read(struct mctp_i3c_device *mi)
xfer.data.in = skb_put(skb, mi->mrl);
+ /* Make sure netif_rx() is read in the same order as i3c. */
+ mutex_lock(&mi->lock);
rc = i3c_device_do_priv_xfers(mi->i3c, &xfer, 1);
if (rc < 0)
goto err;
@@ -166,8 +168,10 @@ static int mctp_i3c_read(struct mctp_i3c_device *mi)
stats->rx_dropped++;
}
+ mutex_unlock(&mi->lock);
return 0;
err:
+ mutex_unlock(&mi->lock);
kfree_skb(skb);
return rc;
}
diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index f422a2f666ef..86ab4a42769a 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -730,7 +730,7 @@ static void update_userdata(struct netconsole_target *nt)
struct userdatum *udm_item;
struct config_item *item;
- if (child_count >= MAX_USERDATA_ITEMS)
+ if (WARN_ON_ONCE(child_count >= MAX_USERDATA_ITEMS))
break;
child_count++;
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index e068a9761c09..d013b6498539 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -20,6 +20,7 @@
#include <linux/netdevice.h>
#include <linux/slab.h>
#include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <net/netlink.h>
#include <net/net_shaper.h>
@@ -29,6 +30,8 @@
#include "netdevsim.h"
+MODULE_IMPORT_NS("NETDEV_INTERNAL");
+
#define NSIM_RING_SIZE 256
static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb)
@@ -69,7 +72,7 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
rxq = skb_get_queue_mapping(skb);
if (rxq >= peer_dev->num_rx_queues)
rxq = rxq % peer_dev->num_rx_queues;
- rq = &peer_ns->rq[rxq];
+ rq = peer_ns->rq[rxq];
skb_tx_timestamp(skb);
if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP))
@@ -359,25 +362,24 @@ static int nsim_poll(struct napi_struct *napi, int budget)
return done;
}
-static int nsim_create_page_pool(struct nsim_rq *rq)
+static int nsim_create_page_pool(struct page_pool **p, struct napi_struct *napi)
{
- struct page_pool_params p = {
+ struct page_pool_params params = {
.order = 0,
.pool_size = NSIM_RING_SIZE,
.nid = NUMA_NO_NODE,
- .dev = &rq->napi.dev->dev,
- .napi = &rq->napi,
+ .dev = &napi->dev->dev,
+ .napi = napi,
.dma_dir = DMA_BIDIRECTIONAL,
- .netdev = rq->napi.dev,
+ .netdev = napi->dev,
};
+ struct page_pool *pool;
- rq->page_pool = page_pool_create(&p);
- if (IS_ERR(rq->page_pool)) {
- int err = PTR_ERR(rq->page_pool);
+ pool = page_pool_create(&params);
+ if (IS_ERR(pool))
+ return PTR_ERR(pool);
- rq->page_pool = NULL;
- return err;
- }
+ *p = pool;
return 0;
}
@@ -388,15 +390,15 @@ static int nsim_init_napi(struct netdevsim *ns)
int err, i;
for (i = 0; i < dev->num_rx_queues; i++) {
- rq = &ns->rq[i];
+ rq = ns->rq[i];
- netif_napi_add(dev, &rq->napi, nsim_poll);
+ netif_napi_add_config(dev, &rq->napi, nsim_poll, i);
}
for (i = 0; i < dev->num_rx_queues; i++) {
- rq = &ns->rq[i];
+ rq = ns->rq[i];
- err = nsim_create_page_pool(rq);
+ err = nsim_create_page_pool(&rq->page_pool, &rq->napi);
if (err)
goto err_pp_destroy;
}
@@ -405,12 +407,12 @@ static int nsim_init_napi(struct netdevsim *ns)
err_pp_destroy:
while (i--) {
- page_pool_destroy(ns->rq[i].page_pool);
- ns->rq[i].page_pool = NULL;
+ page_pool_destroy(ns->rq[i]->page_pool);
+ ns->rq[i]->page_pool = NULL;
}
for (i = 0; i < dev->num_rx_queues; i++)
- __netif_napi_del(&ns->rq[i].napi);
+ __netif_napi_del(&ns->rq[i]->napi);
return err;
}
@@ -421,7 +423,7 @@ static void nsim_enable_napi(struct netdevsim *ns)
int i;
for (i = 0; i < dev->num_rx_queues; i++) {
- struct nsim_rq *rq = &ns->rq[i];
+ struct nsim_rq *rq = ns->rq[i];
netif_queue_set_napi(dev, i, NETDEV_QUEUE_TYPE_RX, &rq->napi);
napi_enable(&rq->napi);
@@ -448,7 +450,7 @@ static void nsim_del_napi(struct netdevsim *ns)
int i;
for (i = 0; i < dev->num_rx_queues; i++) {
- struct nsim_rq *rq = &ns->rq[i];
+ struct nsim_rq *rq = ns->rq[i];
napi_disable(&rq->napi);
__netif_napi_del(&rq->napi);
@@ -456,8 +458,8 @@ static void nsim_del_napi(struct netdevsim *ns)
synchronize_net();
for (i = 0; i < dev->num_rx_queues; i++) {
- page_pool_destroy(ns->rq[i].page_pool);
- ns->rq[i].page_pool = NULL;
+ page_pool_destroy(ns->rq[i]->page_pool);
+ ns->rq[i]->page_pool = NULL;
}
}
@@ -595,6 +597,182 @@ static const struct netdev_stat_ops nsim_stat_ops = {
.get_base_stats = nsim_get_base_stats,
};
+static struct nsim_rq *nsim_queue_alloc(void)
+{
+ struct nsim_rq *rq;
+
+ rq = kzalloc(sizeof(*rq), GFP_KERNEL_ACCOUNT);
+ if (!rq)
+ return NULL;
+
+ skb_queue_head_init(&rq->skb_queue);
+ return rq;
+}
+
+static void nsim_queue_free(struct nsim_rq *rq)
+{
+ skb_queue_purge_reason(&rq->skb_queue, SKB_DROP_REASON_QUEUE_PURGE);
+ kfree(rq);
+}
+
+/* Queue reset mode is controlled by ns->rq_reset_mode.
+ * - normal - new NAPI new pool (old NAPI enabled when new added)
+ * - mode 1 - allocate new pool (NAPI is only disabled / enabled)
+ * - mode 2 - new NAPI new pool (old NAPI removed before new added)
+ * - mode 3 - new NAPI new pool (old NAPI disabled when new added)
+ */
+struct nsim_queue_mem {
+ struct nsim_rq *rq;
+ struct page_pool *pp;
+};
+
+static int
+nsim_queue_mem_alloc(struct net_device *dev, void *per_queue_mem, int idx)
+{
+ struct nsim_queue_mem *qmem = per_queue_mem;
+ struct netdevsim *ns = netdev_priv(dev);
+ int err;
+
+ if (ns->rq_reset_mode > 3)
+ return -EINVAL;
+
+ if (ns->rq_reset_mode == 1)
+ return nsim_create_page_pool(&qmem->pp, &ns->rq[idx]->napi);
+
+ qmem->rq = nsim_queue_alloc();
+ if (!qmem->rq)
+ return -ENOMEM;
+
+ err = nsim_create_page_pool(&qmem->rq->page_pool, &qmem->rq->napi);
+ if (err)
+ goto err_free;
+
+ if (!ns->rq_reset_mode)
+ netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx);
+
+ return 0;
+
+err_free:
+ nsim_queue_free(qmem->rq);
+ return err;
+}
+
+static void nsim_queue_mem_free(struct net_device *dev, void *per_queue_mem)
+{
+ struct nsim_queue_mem *qmem = per_queue_mem;
+ struct netdevsim *ns = netdev_priv(dev);
+
+ page_pool_destroy(qmem->pp);
+ if (qmem->rq) {
+ if (!ns->rq_reset_mode)
+ netif_napi_del(&qmem->rq->napi);
+ page_pool_destroy(qmem->rq->page_pool);
+ nsim_queue_free(qmem->rq);
+ }
+}
+
+static int
+nsim_queue_start(struct net_device *dev, void *per_queue_mem, int idx)
+{
+ struct nsim_queue_mem *qmem = per_queue_mem;
+ struct netdevsim *ns = netdev_priv(dev);
+
+ if (ns->rq_reset_mode == 1) {
+ ns->rq[idx]->page_pool = qmem->pp;
+ napi_enable(&ns->rq[idx]->napi);
+ return 0;
+ }
+
+ /* netif_napi_add()/_del() should normally be called from alloc/free,
+ * here we want to test various call orders.
+ */
+ if (ns->rq_reset_mode == 2) {
+ netif_napi_del(&ns->rq[idx]->napi);
+ netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx);
+ } else if (ns->rq_reset_mode == 3) {
+ netif_napi_add_config(dev, &qmem->rq->napi, nsim_poll, idx);
+ netif_napi_del(&ns->rq[idx]->napi);
+ }
+
+ ns->rq[idx] = qmem->rq;
+ napi_enable(&ns->rq[idx]->napi);
+
+ return 0;
+}
+
+static int nsim_queue_stop(struct net_device *dev, void *per_queue_mem, int idx)
+{
+ struct nsim_queue_mem *qmem = per_queue_mem;
+ struct netdevsim *ns = netdev_priv(dev);
+
+ napi_disable(&ns->rq[idx]->napi);
+
+ if (ns->rq_reset_mode == 1) {
+ qmem->pp = ns->rq[idx]->page_pool;
+ page_pool_disable_direct_recycling(qmem->pp);
+ } else {
+ qmem->rq = ns->rq[idx];
+ }
+
+ return 0;
+}
+
+static const struct netdev_queue_mgmt_ops nsim_queue_mgmt_ops = {
+ .ndo_queue_mem_size = sizeof(struct nsim_queue_mem),
+ .ndo_queue_mem_alloc = nsim_queue_mem_alloc,
+ .ndo_queue_mem_free = nsim_queue_mem_free,
+ .ndo_queue_start = nsim_queue_start,
+ .ndo_queue_stop = nsim_queue_stop,
+};
+
+static ssize_t
+nsim_qreset_write(struct file *file, const char __user *data,
+ size_t count, loff_t *ppos)
+{
+ struct netdevsim *ns = file->private_data;
+ unsigned int queue, mode;
+ char buf[32];
+ ssize_t ret;
+
+ if (count >= sizeof(buf))
+ return -EINVAL;
+ if (copy_from_user(buf, data, count))
+ return -EFAULT;
+ buf[count] = '\0';
+
+ ret = sscanf(buf, "%u %u", &queue, &mode);
+ if (ret != 2)
+ return -EINVAL;
+
+ rtnl_lock();
+ if (!netif_running(ns->netdev)) {
+ ret = -ENETDOWN;
+ goto exit_unlock;
+ }
+
+ if (queue >= ns->netdev->real_num_rx_queues) {
+ ret = -EINVAL;
+ goto exit_unlock;
+ }
+
+ ns->rq_reset_mode = mode;
+ ret = netdev_rx_queue_restart(ns->netdev, queue);
+ ns->rq_reset_mode = 0;
+ if (ret)
+ goto exit_unlock;
+
+ ret = count;
+exit_unlock:
+ rtnl_unlock();
+ return ret;
+}
+
+static const struct file_operations nsim_qreset_fops = {
+ .open = simple_open,
+ .write = nsim_qreset_write,
+ .owner = THIS_MODULE,
+};
+
static ssize_t
nsim_pp_hold_read(struct file *file, char __user *data,
size_t count, loff_t *ppos)
@@ -628,7 +806,7 @@ nsim_pp_hold_write(struct file *file, const char __user *data,
if (!netif_running(ns->netdev) && val) {
ret = -ENETDOWN;
} else if (val) {
- ns->page = page_pool_dev_alloc_pages(ns->rq[0].page_pool);
+ ns->page = page_pool_dev_alloc_pages(ns->rq[0]->page_pool);
if (!ns->page)
ret = -ENOMEM;
} else {
@@ -677,27 +855,35 @@ static int nsim_queue_init(struct netdevsim *ns)
struct net_device *dev = ns->netdev;
int i;
- ns->rq = kvcalloc(dev->num_rx_queues, sizeof(*ns->rq),
- GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
+ ns->rq = kcalloc(dev->num_rx_queues, sizeof(*ns->rq),
+ GFP_KERNEL_ACCOUNT);
if (!ns->rq)
return -ENOMEM;
- for (i = 0; i < dev->num_rx_queues; i++)
- skb_queue_head_init(&ns->rq[i].skb_queue);
+ for (i = 0; i < dev->num_rx_queues; i++) {
+ ns->rq[i] = nsim_queue_alloc();
+ if (!ns->rq[i])
+ goto err_free_prev;
+ }
return 0;
+
+err_free_prev:
+ while (i--)
+ kfree(ns->rq[i]);
+ kfree(ns->rq);
+ return -ENOMEM;
}
-static void nsim_queue_free(struct netdevsim *ns)
+static void nsim_queue_uninit(struct netdevsim *ns)
{
struct net_device *dev = ns->netdev;
int i;
for (i = 0; i < dev->num_rx_queues; i++)
- skb_queue_purge_reason(&ns->rq[i].skb_queue,
- SKB_DROP_REASON_QUEUE_PURGE);
+ nsim_queue_free(ns->rq[i]);
- kvfree(ns->rq);
+ kfree(ns->rq);
ns->rq = NULL;
}
@@ -713,6 +899,7 @@ static int nsim_init_netdevsim(struct netdevsim *ns)
ns->phc = phc;
ns->netdev->netdev_ops = &nsim_netdev_ops;
ns->netdev->stat_ops = &nsim_stat_ops;
+ ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops;
err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
if (err)
@@ -741,7 +928,7 @@ err_ipsec_teardown:
nsim_macsec_teardown(ns);
nsim_bpf_uninit(ns);
err_rq_destroy:
- nsim_queue_free(ns);
+ nsim_queue_uninit(ns);
err_utn_destroy:
rtnl_unlock();
nsim_udp_tunnels_info_destroy(ns->netdev);
@@ -798,6 +985,9 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
ns->pp_dfs = debugfs_create_file("pp_hold", 0600, nsim_dev_port->ddir,
ns, &nsim_pp_hold_fops);
+ ns->qr_dfs = debugfs_create_file("queue_reset", 0200,
+ nsim_dev_port->ddir, ns,
+ &nsim_qreset_fops);
return ns;
@@ -811,6 +1001,7 @@ void nsim_destroy(struct netdevsim *ns)
struct net_device *dev = ns->netdev;
struct netdevsim *peer;
+ debugfs_remove(ns->qr_dfs);
debugfs_remove(ns->pp_dfs);
rtnl_lock();
@@ -823,7 +1014,7 @@ void nsim_destroy(struct netdevsim *ns)
nsim_macsec_teardown(ns);
nsim_ipsec_teardown(ns);
nsim_bpf_uninit(ns);
- nsim_queue_free(ns);
+ nsim_queue_uninit(ns);
}
rtnl_unlock();
if (nsim_dev_port_is_pf(ns->nsim_dev_port))
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index bf02efa10956..a70f62af4c88 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -101,7 +101,9 @@ struct netdevsim {
struct nsim_dev *nsim_dev;
struct nsim_dev_port *nsim_dev_port;
struct mock_phc *phc;
- struct nsim_rq *rq;
+ struct nsim_rq **rq;
+
+ int rq_reset_mode;
u64 tx_packets;
u64 tx_bytes;
@@ -134,6 +136,7 @@ struct netdevsim {
struct page *page;
struct dentry *pp_dfs;
+ struct dentry *qr_dfs;
struct nsim_ethtool ethtool;
struct netdevsim __rcu *peer;
diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index c1d881dc6409..1e1b00756be7 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -338,6 +338,7 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev,
enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT;
enum netkit_mode mode = NETKIT_L3;
unsigned char ifname_assign_type;
+ u16 headroom = 0, tailroom = 0;
struct ifinfomsg *ifmp = NULL;
struct net_device *peer;
char ifname[IFNAMSIZ];
@@ -371,6 +372,10 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev,
if (err < 0)
return err;
}
+ if (data[IFLA_NETKIT_HEADROOM])
+ headroom = nla_get_u16(data[IFLA_NETKIT_HEADROOM]);
+ if (data[IFLA_NETKIT_TAILROOM])
+ tailroom = nla_get_u16(data[IFLA_NETKIT_TAILROOM]);
}
if (ifmp && tbp[IFLA_IFNAME]) {
@@ -390,6 +395,14 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev,
return PTR_ERR(peer);
netif_inherit_tso_max(peer, dev);
+ if (headroom) {
+ peer->needed_headroom = headroom;
+ dev->needed_headroom = headroom;
+ }
+ if (tailroom) {
+ peer->needed_tailroom = tailroom;
+ dev->needed_tailroom = tailroom;
+ }
if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS]))
eth_hw_addr_random(peer);
@@ -401,6 +414,7 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev,
nk->policy = policy_peer;
nk->scrub = scrub_peer;
nk->mode = mode;
+ nk->headroom = headroom;
bpf_mprog_bundle_init(&nk->bundle);
err = register_netdevice(peer);
@@ -426,6 +440,7 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev,
nk->policy = policy_prim;
nk->scrub = scrub_prim;
nk->mode = mode;
+ nk->headroom = headroom;
bpf_mprog_bundle_init(&nk->bundle);
err = register_netdevice(dev);
@@ -850,7 +865,18 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
struct net_device *peer = rtnl_dereference(nk->peer);
enum netkit_action policy;
struct nlattr *attr;
- int err;
+ int err, i;
+ static const struct {
+ u32 attr;
+ char *name;
+ } fixed_params[] = {
+ { IFLA_NETKIT_MODE, "operating mode" },
+ { IFLA_NETKIT_SCRUB, "scrubbing" },
+ { IFLA_NETKIT_PEER_SCRUB, "peer scrubbing" },
+ { IFLA_NETKIT_PEER_INFO, "peer info" },
+ { IFLA_NETKIT_HEADROOM, "headroom" },
+ { IFLA_NETKIT_TAILROOM, "tailroom" },
+ };
if (!nk->primary) {
NL_SET_ERR_MSG(extack,
@@ -858,28 +884,14 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
return -EACCES;
}
- if (data[IFLA_NETKIT_MODE]) {
- NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_MODE],
- "netkit link operating mode cannot be changed after device creation");
- return -EACCES;
- }
-
- if (data[IFLA_NETKIT_SCRUB]) {
- NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_SCRUB],
- "netkit scrubbing cannot be changed after device creation");
- return -EACCES;
- }
-
- if (data[IFLA_NETKIT_PEER_SCRUB]) {
- NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_SCRUB],
- "netkit scrubbing cannot be changed after device creation");
- return -EACCES;
- }
-
- if (data[IFLA_NETKIT_PEER_INFO]) {
- NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO],
- "netkit peer info cannot be changed after device creation");
- return -EINVAL;
+ for (i = 0; i < ARRAY_SIZE(fixed_params); i++) {
+ attr = data[fixed_params[i].attr];
+ if (attr) {
+ NL_SET_ERR_MSG_ATTR_FMT(extack, attr,
+ "netkit link %s cannot be changed after device creation",
+ fixed_params[i].name);
+ return -EACCES;
+ }
}
if (data[IFLA_NETKIT_POLICY]) {
@@ -914,6 +926,8 @@ static size_t netkit_get_size(const struct net_device *dev)
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */
nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */
+ nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_HEADROOM */
+ nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_TAILROOM */
0;
}
@@ -930,6 +944,10 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)
return -EMSGSIZE;
if (nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub))
return -EMSGSIZE;
+ if (nla_put_u16(skb, IFLA_NETKIT_HEADROOM, dev->needed_headroom))
+ return -EMSGSIZE;
+ if (nla_put_u16(skb, IFLA_NETKIT_TAILROOM, dev->needed_tailroom))
+ return -EMSGSIZE;
if (peer) {
nk = netkit_priv(peer);
@@ -947,6 +965,8 @@ static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
[IFLA_NETKIT_MODE] = NLA_POLICY_MAX(NLA_U32, NETKIT_L3),
[IFLA_NETKIT_POLICY] = { .type = NLA_U32 },
[IFLA_NETKIT_PEER_POLICY] = { .type = NLA_U32 },
+ [IFLA_NETKIT_HEADROOM] = { .type = NLA_U16 },
+ [IFLA_NETKIT_TAILROOM] = { .type = NLA_U16 },
[IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
[IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
[IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT,
diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c
index 767a8c0714ac..6457190ec6e7 100644
--- a/drivers/net/pcs/pcs-lynx.c
+++ b/drivers/net/pcs/pcs-lynx.c
@@ -334,9 +334,19 @@ static const struct phylink_pcs_ops lynx_pcs_phylink_ops = {
.pcs_link_up = lynx_pcs_link_up,
};
+static const phy_interface_t lynx_interfaces[] = {
+ PHY_INTERFACE_MODE_SGMII,
+ PHY_INTERFACE_MODE_QSGMII,
+ PHY_INTERFACE_MODE_1000BASEX,
+ PHY_INTERFACE_MODE_2500BASEX,
+ PHY_INTERFACE_MODE_10GBASER,
+ PHY_INTERFACE_MODE_USXGMII,
+};
+
static struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio)
{
struct lynx_pcs *lynx;
+ int i;
lynx = kzalloc(sizeof(*lynx), GFP_KERNEL);
if (!lynx)
@@ -348,6 +358,9 @@ static struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio)
lynx->pcs.neg_mode = true;
lynx->pcs.poll = true;
+ for (i = 0; i < ARRAY_SIZE(lynx_interfaces); i++)
+ __set_bit(lynx_interfaces[i], lynx->pcs.supported_interfaces);
+
return lynx_to_phylink_pcs(lynx);
}
diff --git a/drivers/net/pcs/pcs-mtk-lynxi.c b/drivers/net/pcs/pcs-mtk-lynxi.c
index ed91cd7a406a..4fe0fb6d12a4 100644
--- a/drivers/net/pcs/pcs-mtk-lynxi.c
+++ b/drivers/net/pcs/pcs-mtk-lynxi.c
@@ -307,6 +307,10 @@ struct phylink_pcs *mtk_pcs_lynxi_create(struct device *dev,
mpcs->pcs.poll = true;
mpcs->interface = PHY_INTERFACE_MODE_NA;
+ __set_bit(PHY_INTERFACE_MODE_SGMII, mpcs->pcs.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_1000BASEX, mpcs->pcs.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_2500BASEX, mpcs->pcs.supported_interfaces);
+
return &mpcs->pcs;
}
EXPORT_SYMBOL(mtk_pcs_lynxi_create);
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index f70ca39f0905..c06b66f40022 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -594,14 +594,13 @@ static unsigned int xpcs_inband_caps(struct phylink_pcs *pcs,
}
}
-void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces)
+static void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces)
{
const struct dw_xpcs_compat *compat;
for (compat = xpcs->desc->compat; compat->supported; compat++)
__set_bit(compat->interface, interfaces);
}
-EXPORT_SYMBOL_GPL(xpcs_get_interfaces);
int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable)
{
@@ -1446,6 +1445,8 @@ static struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev)
if (ret)
goto out_clear_clks;
+ xpcs_get_interfaces(xpcs, xpcs->pcs.supported_interfaces);
+
if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID)
xpcs->pcs.poll = false;
else
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index dc625f2b3ae4..9ad3dbfd2f99 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -287,8 +287,8 @@ config MICROCHIP_PHY
config MICROCHIP_T1_PHY
tristate "Microchip T1 PHYs"
- select MICROCHIP_PHY_RDS_PTP if NETWORK_PHY_TIMESTAMPING && \
- PTP_1588_CLOCK_OPTIONAL
+ select MICROCHIP_PHY_RDS_PTP if NETWORK_PHY_TIMESTAMPING
+ depends on PTP_1588_CLOCK_OPTIONAL
help
Supports the LAN8XXX PHYs.
diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
index 334c17a68edd..4262bc31503b 100644
--- a/drivers/net/phy/dp83822.c
+++ b/drivers/net/phy/dp83822.c
@@ -30,6 +30,9 @@
#define MII_DP83822_FCSCR 0x14
#define MII_DP83822_RCSR 0x17
#define MII_DP83822_RESET_CTRL 0x1f
+#define MII_DP83822_MLEDCR 0x25
+#define MII_DP83822_LEDCFG1 0x460
+#define MII_DP83822_IOCTRL1 0x462
#define MII_DP83822_IOCTRL2 0x463
#define MII_DP83822_GENCFG 0x465
#define MII_DP83822_SOR1 0x467
@@ -105,10 +108,26 @@
#define DP83822_RX_CLK_SHIFT BIT(12)
#define DP83822_TX_CLK_SHIFT BIT(11)
+/* MLEDCR bits */
+#define DP83822_MLEDCR_CFG GENMASK(6, 3)
+#define DP83822_MLEDCR_ROUTE GENMASK(1, 0)
+#define DP83822_MLEDCR_ROUTE_LED_0 DP83822_MLEDCR_ROUTE
+
+/* LEDCFG1 bits */
+#define DP83822_LEDCFG1_LED1_CTRL GENMASK(11, 8)
+#define DP83822_LEDCFG1_LED3_CTRL GENMASK(7, 4)
+
+/* IOCTRL1 bits */
+#define DP83822_IOCTRL1_GPIO3_CTRL GENMASK(10, 8)
+#define DP83822_IOCTRL1_GPIO3_CTRL_LED3 BIT(0)
+#define DP83822_IOCTRL1_GPIO1_CTRL GENMASK(2, 0)
+#define DP83822_IOCTRL1_GPIO1_CTRL_LED_1 BIT(0)
+
/* IOCTRL2 bits */
#define DP83822_IOCTRL2_GPIO2_CLK_SRC GENMASK(6, 4)
#define DP83822_IOCTRL2_GPIO2_CTRL GENMASK(2, 0)
#define DP83822_IOCTRL2_GPIO2_CTRL_CLK_REF GENMASK(1, 0)
+#define DP83822_IOCTRL2_GPIO2_CTRL_MLED BIT(0)
#define DP83822_CLK_SRC_MAC_IF 0x0
#define DP83822_CLK_SRC_XI 0x1
@@ -117,6 +136,22 @@
#define DP83822_CLK_SRC_FREE_RUNNING 0x6
#define DP83822_CLK_SRC_RECOVERED 0x7
+#define DP83822_LED_FN_LINK 0x0 /* Link established */
+#define DP83822_LED_FN_RX_TX 0x1 /* Receive or Transmit activity */
+#define DP83822_LED_FN_TX 0x2 /* Transmit activity */
+#define DP83822_LED_FN_RX 0x3 /* Receive activity */
+#define DP83822_LED_FN_COLLISION 0x4 /* Collision detected */
+#define DP83822_LED_FN_LINK_100_BTX 0x5 /* 100 BTX link established */
+#define DP83822_LED_FN_LINK_10_BT 0x6 /* 10BT link established */
+#define DP83822_LED_FN_FULL_DUPLEX 0x7 /* Full duplex */
+#define DP83822_LED_FN_LINK_RX_TX 0x8 /* Link established, blink for rx or tx activity */
+#define DP83822_LED_FN_ACTIVE_STRETCH 0x9 /* Active Stretch Signal */
+#define DP83822_LED_FN_MII_LINK 0xa /* MII LINK (100BT+FD) */
+#define DP83822_LED_FN_LPI_MODE 0xb /* LPI Mode (EEE) */
+#define DP83822_LED_FN_RX_TX_ERR 0xc /* TX/RX MII Error */
+#define DP83822_LED_FN_LINK_LOST 0xd /* Link Lost */
+#define DP83822_LED_FN_PRBS_ERR 0xe /* Blink for PRBS error */
+
/* SOR1 mode */
#define DP83822_STRAP_MODE1 0
#define DP83822_STRAP_MODE2 BIT(0)
@@ -145,6 +180,13 @@
ADVERTISED_FIBRE | \
ADVERTISED_Pause | ADVERTISED_Asym_Pause)
+#define DP83822_MAX_LED_PINS 4
+
+#define DP83822_LED_INDEX_LED_0 0
+#define DP83822_LED_INDEX_LED_1_GPIO1 1
+#define DP83822_LED_INDEX_COL_GPIO2 2
+#define DP83822_LED_INDEX_RX_D3_GPIO3 3
+
struct dp83822_private {
bool fx_signal_det_low;
int fx_enabled;
@@ -154,6 +196,7 @@ struct dp83822_private {
struct ethtool_wolinfo wol;
bool set_gpio2_clk_out;
u32 gpio2_clk_out;
+ bool led_pin_enable[DP83822_MAX_LED_PINS];
};
static int dp83822_config_wol(struct phy_device *phydev,
@@ -418,6 +461,48 @@ static int dp83822_read_status(struct phy_device *phydev)
return 0;
}
+static int dp83822_config_init_leds(struct phy_device *phydev)
+{
+ struct dp83822_private *dp83822 = phydev->priv;
+ int ret;
+
+ if (dp83822->led_pin_enable[DP83822_LED_INDEX_LED_0]) {
+ ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_MLEDCR,
+ DP83822_MLEDCR_ROUTE,
+ FIELD_PREP(DP83822_MLEDCR_ROUTE,
+ DP83822_MLEDCR_ROUTE_LED_0));
+ if (ret)
+ return ret;
+ } else if (dp83822->led_pin_enable[DP83822_LED_INDEX_COL_GPIO2]) {
+ ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_IOCTRL2,
+ DP83822_IOCTRL2_GPIO2_CTRL,
+ FIELD_PREP(DP83822_IOCTRL2_GPIO2_CTRL,
+ DP83822_IOCTRL2_GPIO2_CTRL_MLED));
+ if (ret)
+ return ret;
+ }
+
+ if (dp83822->led_pin_enable[DP83822_LED_INDEX_LED_1_GPIO1]) {
+ ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_IOCTRL1,
+ DP83822_IOCTRL1_GPIO1_CTRL,
+ FIELD_PREP(DP83822_IOCTRL1_GPIO1_CTRL,
+ DP83822_IOCTRL1_GPIO1_CTRL_LED_1));
+ if (ret)
+ return ret;
+ }
+
+ if (dp83822->led_pin_enable[DP83822_LED_INDEX_RX_D3_GPIO3]) {
+ ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_IOCTRL1,
+ DP83822_IOCTRL1_GPIO3_CTRL,
+ FIELD_PREP(DP83822_IOCTRL1_GPIO3_CTRL,
+ DP83822_IOCTRL1_GPIO3_CTRL_LED3));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int dp83822_config_init(struct phy_device *phydev)
{
struct dp83822_private *dp83822 = phydev->priv;
@@ -437,6 +522,10 @@ static int dp83822_config_init(struct phy_device *phydev)
FIELD_PREP(DP83822_IOCTRL2_GPIO2_CLK_SRC,
dp83822->gpio2_clk_out));
+ err = dp83822_config_init_leds(phydev);
+ if (err)
+ return err;
+
if (phy_interface_is_rgmii(phydev)) {
rx_int_delay = phy_get_internal_delay(phydev, dev, NULL, 0,
true);
@@ -631,6 +720,61 @@ static int dp83822_phy_reset(struct phy_device *phydev)
}
#ifdef CONFIG_OF_MDIO
+static int dp83822_of_init_leds(struct phy_device *phydev)
+{
+ struct device_node *node = phydev->mdio.dev.of_node;
+ struct dp83822_private *dp83822 = phydev->priv;
+ struct device_node *leds;
+ u32 index;
+ int err;
+
+ if (!node)
+ return 0;
+
+ leds = of_get_child_by_name(node, "leds");
+ if (!leds)
+ return 0;
+
+ for_each_available_child_of_node_scoped(leds, led) {
+ err = of_property_read_u32(led, "reg", &index);
+ if (err) {
+ of_node_put(leds);
+ return err;
+ }
+
+ if (index <= DP83822_LED_INDEX_RX_D3_GPIO3) {
+ dp83822->led_pin_enable[index] = true;
+ } else {
+ of_node_put(leds);
+ return -EINVAL;
+ }
+ }
+
+ of_node_put(leds);
+ /* LED_0 and COL(GPIO2) use the MLED function. MLED can be routed to
+ * only one of these two pins at a time.
+ */
+ if (dp83822->led_pin_enable[DP83822_LED_INDEX_LED_0] &&
+ dp83822->led_pin_enable[DP83822_LED_INDEX_COL_GPIO2]) {
+ phydev_err(phydev, "LED_0 and COL(GPIO2) cannot be used as LED output at the same time\n");
+ return -EINVAL;
+ }
+
+ if (dp83822->led_pin_enable[DP83822_LED_INDEX_COL_GPIO2] &&
+ dp83822->set_gpio2_clk_out) {
+ phydev_err(phydev, "COL(GPIO2) cannot be used as LED outout, already used as clock output\n");
+ return -EINVAL;
+ }
+
+ if (dp83822->led_pin_enable[DP83822_LED_INDEX_RX_D3_GPIO3] &&
+ phydev->interface != PHY_INTERFACE_MODE_RMII) {
+ phydev_err(phydev, "RX_D3 can only be used as LED output when in RMII mode\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int dp83822_of_init(struct phy_device *phydev)
{
struct dp83822_private *dp83822 = phydev->priv;
@@ -671,7 +815,7 @@ static int dp83822_of_init(struct phy_device *phydev)
dp83822->set_gpio2_clk_out = true;
}
- return 0;
+ return dp83822_of_init_leds(phydev);
}
static int dp83826_to_dac_minus_one_regval(int percent)
@@ -769,7 +913,9 @@ static int dp83822_probe(struct phy_device *phydev)
if (ret)
return ret;
- dp83822_of_init(phydev);
+ ret = dp83822_of_init(phydev);
+ if (ret)
+ return ret;
if (dp83822->fx_enabled)
phydev->port = PORT_FIBRE;
@@ -816,6 +962,130 @@ static int dp83822_resume(struct phy_device *phydev)
return 0;
}
+static int dp83822_led_mode(u8 index, unsigned long rules)
+{
+ switch (rules) {
+ case BIT(TRIGGER_NETDEV_LINK):
+ return DP83822_LED_FN_LINK;
+ case BIT(TRIGGER_NETDEV_LINK_10):
+ return DP83822_LED_FN_LINK_10_BT;
+ case BIT(TRIGGER_NETDEV_LINK_100):
+ return DP83822_LED_FN_LINK_100_BTX;
+ case BIT(TRIGGER_NETDEV_FULL_DUPLEX):
+ return DP83822_LED_FN_FULL_DUPLEX;
+ case BIT(TRIGGER_NETDEV_TX):
+ return DP83822_LED_FN_TX;
+ case BIT(TRIGGER_NETDEV_RX):
+ return DP83822_LED_FN_RX;
+ case BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX):
+ return DP83822_LED_FN_RX_TX;
+ case BIT(TRIGGER_NETDEV_TX_ERR) | BIT(TRIGGER_NETDEV_RX_ERR):
+ return DP83822_LED_FN_RX_TX_ERR;
+ case BIT(TRIGGER_NETDEV_LINK) | BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX):
+ return DP83822_LED_FN_LINK_RX_TX;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int dp83822_led_hw_is_supported(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ int mode;
+
+ mode = dp83822_led_mode(index, rules);
+ if (mode < 0)
+ return mode;
+
+ return 0;
+}
+
+static int dp83822_led_hw_control_set(struct phy_device *phydev, u8 index,
+ unsigned long rules)
+{
+ int mode;
+
+ mode = dp83822_led_mode(index, rules);
+ if (mode < 0)
+ return mode;
+
+ if (index == DP83822_LED_INDEX_LED_0 || index == DP83822_LED_INDEX_COL_GPIO2)
+ return phy_modify_mmd(phydev, MDIO_MMD_VEND2,
+ MII_DP83822_MLEDCR, DP83822_MLEDCR_CFG,
+ FIELD_PREP(DP83822_MLEDCR_CFG, mode));
+ else if (index == DP83822_LED_INDEX_LED_1_GPIO1)
+ return phy_modify_mmd(phydev, MDIO_MMD_VEND2,
+ MII_DP83822_LEDCFG1,
+ DP83822_LEDCFG1_LED1_CTRL,
+ FIELD_PREP(DP83822_LEDCFG1_LED1_CTRL,
+ mode));
+ else
+ return phy_modify_mmd(phydev, MDIO_MMD_VEND2,
+ MII_DP83822_LEDCFG1,
+ DP83822_LEDCFG1_LED3_CTRL,
+ FIELD_PREP(DP83822_LEDCFG1_LED3_CTRL,
+ mode));
+}
+
+static int dp83822_led_hw_control_get(struct phy_device *phydev, u8 index,
+ unsigned long *rules)
+{
+ int val;
+
+ if (index == DP83822_LED_INDEX_LED_0 || index == DP83822_LED_INDEX_COL_GPIO2) {
+ val = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_MLEDCR);
+ if (val < 0)
+ return val;
+
+ val = FIELD_GET(DP83822_MLEDCR_CFG, val);
+ } else {
+ val = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_LEDCFG1);
+ if (val < 0)
+ return val;
+
+ if (index == DP83822_LED_INDEX_LED_1_GPIO1)
+ val = FIELD_GET(DP83822_LEDCFG1_LED1_CTRL, val);
+ else
+ val = FIELD_GET(DP83822_LEDCFG1_LED3_CTRL, val);
+ }
+
+ switch (val) {
+ case DP83822_LED_FN_LINK:
+ *rules = BIT(TRIGGER_NETDEV_LINK);
+ break;
+ case DP83822_LED_FN_LINK_10_BT:
+ *rules = BIT(TRIGGER_NETDEV_LINK_10);
+ break;
+ case DP83822_LED_FN_LINK_100_BTX:
+ *rules = BIT(TRIGGER_NETDEV_LINK_100);
+ break;
+ case DP83822_LED_FN_FULL_DUPLEX:
+ *rules = BIT(TRIGGER_NETDEV_FULL_DUPLEX);
+ break;
+ case DP83822_LED_FN_TX:
+ *rules = BIT(TRIGGER_NETDEV_TX);
+ break;
+ case DP83822_LED_FN_RX:
+ *rules = BIT(TRIGGER_NETDEV_RX);
+ break;
+ case DP83822_LED_FN_RX_TX:
+ *rules = BIT(TRIGGER_NETDEV_TX) | BIT(TRIGGER_NETDEV_RX);
+ break;
+ case DP83822_LED_FN_RX_TX_ERR:
+ *rules = BIT(TRIGGER_NETDEV_TX_ERR) | BIT(TRIGGER_NETDEV_RX_ERR);
+ break;
+ case DP83822_LED_FN_LINK_RX_TX:
+ *rules = BIT(TRIGGER_NETDEV_LINK) | BIT(TRIGGER_NETDEV_TX) |
+ BIT(TRIGGER_NETDEV_RX);
+ break;
+ default:
+ *rules = 0;
+ break;
+ }
+
+ return 0;
+}
+
#define DP83822_PHY_DRIVER(_id, _name) \
{ \
PHY_ID_MATCH_MODEL(_id), \
@@ -831,6 +1101,9 @@ static int dp83822_resume(struct phy_device *phydev)
.handle_interrupt = dp83822_handle_interrupt, \
.suspend = dp83822_suspend, \
.resume = dp83822_resume, \
+ .led_hw_is_supported = dp83822_led_hw_is_supported, \
+ .led_hw_control_set = dp83822_led_hw_control_set, \
+ .led_hw_control_get = dp83822_led_hw_control_get, \
}
#define DP83825_PHY_DRIVER(_id, _name) \
diff --git a/drivers/net/phy/dp83td510.c b/drivers/net/phy/dp83td510.c
index 92aa3a2b9744..1abeacca7441 100644
--- a/drivers/net/phy/dp83td510.c
+++ b/drivers/net/phy/dp83td510.c
@@ -34,6 +34,29 @@
#define DP83TD510E_CTRL_HW_RESET BIT(15)
#define DP83TD510E_CTRL_SW_RESET BIT(14)
+/*
+ * DP83TD510E_PKT_STAT_x registers correspond to similarly named registers
+ * in the datasheet (PKT_STAT_1 through PKT_STAT_6). These registers store
+ * 32-bit or 16-bit counters for TX and RX statistics and must be read in
+ * sequence to ensure the counters are cleared correctly.
+ *
+ * - DP83TD510E_PKT_STAT_1: Contains TX packet count bits [15:0].
+ * - DP83TD510E_PKT_STAT_2: Contains TX packet count bits [31:16].
+ * - DP83TD510E_PKT_STAT_3: Contains TX error packet count.
+ * - DP83TD510E_PKT_STAT_4: Contains RX packet count bits [15:0].
+ * - DP83TD510E_PKT_STAT_5: Contains RX packet count bits [31:16].
+ * - DP83TD510E_PKT_STAT_6: Contains RX error packet count.
+ *
+ * Keeping the register names as defined in the datasheet helps maintain
+ * clarity and alignment with the documentation.
+ */
+#define DP83TD510E_PKT_STAT_1 0x12b
+#define DP83TD510E_PKT_STAT_2 0x12c
+#define DP83TD510E_PKT_STAT_3 0x12d
+#define DP83TD510E_PKT_STAT_4 0x12e
+#define DP83TD510E_PKT_STAT_5 0x12f
+#define DP83TD510E_PKT_STAT_6 0x130
+
#define DP83TD510E_AN_STAT_1 0x60c
#define DP83TD510E_MASTER_SLAVE_RESOL_FAIL BIT(15)
@@ -58,8 +81,16 @@ static const u16 dp83td510_mse_sqi_map[] = {
0x0000 /* 24dB =< SNR */
};
+struct dp83td510_stats {
+ u64 tx_pkt_cnt;
+ u64 tx_err_pkt_cnt;
+ u64 rx_pkt_cnt;
+ u64 rx_err_pkt_cnt;
+};
+
struct dp83td510_priv {
bool alcd_test_active;
+ struct dp83td510_stats stats;
};
/* Time Domain Reflectometry (TDR) Functionality of DP83TD510 PHY
@@ -177,6 +208,85 @@ struct dp83td510_priv {
#define DP83TD510E_ALCD_COMPLETE BIT(15)
#define DP83TD510E_ALCD_CABLE_LENGTH GENMASK(10, 0)
+/**
+ * dp83td510_update_stats - Update the PHY statistics for the DP83TD510 PHY.
+ * @phydev: Pointer to the phy_device structure.
+ *
+ * The function reads the PHY statistics registers and updates the statistics
+ * structure.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+static int dp83td510_update_stats(struct phy_device *phydev)
+{
+ struct dp83td510_priv *priv = phydev->priv;
+ u32 count;
+ int ret;
+
+ /* The DP83TD510E_PKT_STAT registers are divided into two groups:
+ * - Group 1 (TX stats): DP83TD510E_PKT_STAT_1 to DP83TD510E_PKT_STAT_3
+ * - Group 2 (RX stats): DP83TD510E_PKT_STAT_4 to DP83TD510E_PKT_STAT_6
+ *
+ * Registers in each group are cleared only after reading them in a
+ * plain sequence (e.g., 1, 2, 3 for Group 1 or 4, 5, 6 for Group 2).
+ * Any deviation from the sequence, such as reading 1, 2, 1, 2, 3, will
+ * prevent the group from being cleared. Additionally, the counters
+ * for a group are frozen as soon as the first register in that group
+ * is accessed.
+ */
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_PKT_STAT_1);
+ if (ret < 0)
+ return ret;
+ /* tx_pkt_cnt_15_0 */
+ count = ret;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_PKT_STAT_2);
+ if (ret < 0)
+ return ret;
+ /* tx_pkt_cnt_31_16 */
+ count |= ret << 16;
+ priv->stats.tx_pkt_cnt += count;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_PKT_STAT_3);
+ if (ret < 0)
+ return ret;
+ /* tx_err_pkt_cnt */
+ priv->stats.tx_err_pkt_cnt += ret;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_PKT_STAT_4);
+ if (ret < 0)
+ return ret;
+ /* rx_pkt_cnt_15_0 */
+ count = ret;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_PKT_STAT_5);
+ if (ret < 0)
+ return ret;
+ /* rx_pkt_cnt_31_16 */
+ count |= ret << 16;
+ priv->stats.rx_pkt_cnt += count;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TD510E_PKT_STAT_6);
+ if (ret < 0)
+ return ret;
+ /* rx_err_pkt_cnt */
+ priv->stats.rx_err_pkt_cnt += ret;
+
+ return 0;
+}
+
+static void dp83td510_get_phy_stats(struct phy_device *phydev,
+ struct ethtool_eth_phy_stats *eth_stats,
+ struct ethtool_phy_stats *stats)
+{
+ struct dp83td510_priv *priv = phydev->priv;
+
+ stats->tx_packets = priv->stats.tx_pkt_cnt;
+ stats->tx_errors = priv->stats.tx_err_pkt_cnt;
+ stats->rx_packets = priv->stats.rx_pkt_cnt;
+ stats->rx_errors = priv->stats.rx_err_pkt_cnt;
+}
+
static int dp83td510_config_intr(struct phy_device *phydev)
{
int ret;
@@ -599,6 +709,8 @@ static struct phy_driver dp83td510_driver[] = {
.get_sqi_max = dp83td510_get_sqi_max,
.cable_test_start = dp83td510_cable_test_start,
.cable_test_get_status = dp83td510_cable_test_get_status,
+ .get_phy_stats = dp83td510_get_phy_stats,
+ .update_stats = dp83td510_update_stats,
.suspend = genphy_suspend,
.resume = genphy_resume,
diff --git a/drivers/net/phy/dp83tg720.c b/drivers/net/phy/dp83tg720.c
index 0ef4d7dba065..4ea752131b8c 100644
--- a/drivers/net/phy/dp83tg720.c
+++ b/drivers/net/phy/dp83tg720.c
@@ -51,6 +51,9 @@
/* Register 0x0405: Unknown Register */
#define DP83TG720S_UNKNOWN_0405 0x405
+#define DP83TG720S_LINK_QUAL_3 0x547
+#define DP83TG720S_LINK_LOSS_CNT_MASK GENMASK(15, 10)
+
/* Register 0x0576: TDR Master Link Down Control */
#define DP83TG720S_TDR_MASTER_LINK_DOWN 0x576
@@ -60,6 +63,29 @@
/* In RGMII mode, Enable or disable the internal delay for TXD */
#define DP83TG720S_RGMII_TX_CLK_SEL BIT(0)
+/*
+ * DP83TG720S_PKT_STAT_x registers correspond to similarly named registers
+ * in the datasheet (PKT_STAT_1 through PKT_STAT_6). These registers store
+ * 32-bit or 16-bit counters for TX and RX statistics and must be read in
+ * sequence to ensure the counters are cleared correctly.
+ *
+ * - DP83TG720S_PKT_STAT_1: Contains TX packet count bits [15:0].
+ * - DP83TG720S_PKT_STAT_2: Contains TX packet count bits [31:16].
+ * - DP83TG720S_PKT_STAT_3: Contains TX error packet count.
+ * - DP83TG720S_PKT_STAT_4: Contains RX packet count bits [15:0].
+ * - DP83TG720S_PKT_STAT_5: Contains RX packet count bits [31:16].
+ * - DP83TG720S_PKT_STAT_6: Contains RX error packet count.
+ *
+ * Keeping the register names as defined in the datasheet helps maintain
+ * clarity and alignment with the documentation.
+ */
+#define DP83TG720S_PKT_STAT_1 0x639
+#define DP83TG720S_PKT_STAT_2 0x63a
+#define DP83TG720S_PKT_STAT_3 0x63b
+#define DP83TG720S_PKT_STAT_4 0x63c
+#define DP83TG720S_PKT_STAT_5 0x63d
+#define DP83TG720S_PKT_STAT_6 0x63e
+
/* Register 0x083F: Unknown Register */
#define DP83TG720S_UNKNOWN_083F 0x83f
@@ -69,6 +95,113 @@
#define DP83TG720_SQI_MAX 7
+struct dp83tg720_stats {
+ u64 link_loss_cnt;
+ u64 tx_pkt_cnt;
+ u64 tx_err_pkt_cnt;
+ u64 rx_pkt_cnt;
+ u64 rx_err_pkt_cnt;
+};
+
+struct dp83tg720_priv {
+ struct dp83tg720_stats stats;
+};
+
+/**
+ * dp83tg720_update_stats - Update the PHY statistics for the DP83TD510 PHY.
+ * @phydev: Pointer to the phy_device structure.
+ *
+ * The function reads the PHY statistics registers and updates the statistics
+ * structure.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+static int dp83tg720_update_stats(struct phy_device *phydev)
+{
+ struct dp83tg720_priv *priv = phydev->priv;
+ u32 count;
+ int ret;
+
+ /* Read the link loss count */
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TG720S_LINK_QUAL_3);
+ if (ret < 0)
+ return ret;
+ /* link_loss_cnt */
+ count = FIELD_GET(DP83TG720S_LINK_LOSS_CNT_MASK, ret);
+ priv->stats.link_loss_cnt += count;
+
+ /* The DP83TG720S_PKT_STAT registers are divided into two groups:
+ * - Group 1 (TX stats): DP83TG720S_PKT_STAT_1 to DP83TG720S_PKT_STAT_3
+ * - Group 2 (RX stats): DP83TG720S_PKT_STAT_4 to DP83TG720S_PKT_STAT_6
+ *
+ * Registers in each group are cleared only after reading them in a
+ * plain sequence (e.g., 1, 2, 3 for Group 1 or 4, 5, 6 for Group 2).
+ * Any deviation from the sequence, such as reading 1, 2, 1, 2, 3, will
+ * prevent the group from being cleared. Additionally, the counters
+ * for a group are frozen as soon as the first register in that group
+ * is accessed.
+ */
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TG720S_PKT_STAT_1);
+ if (ret < 0)
+ return ret;
+ /* tx_pkt_cnt_15_0 */
+ count = ret;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TG720S_PKT_STAT_2);
+ if (ret < 0)
+ return ret;
+ /* tx_pkt_cnt_31_16 */
+ count |= ret << 16;
+ priv->stats.tx_pkt_cnt += count;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TG720S_PKT_STAT_3);
+ if (ret < 0)
+ return ret;
+ /* tx_err_pkt_cnt */
+ priv->stats.tx_err_pkt_cnt += ret;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TG720S_PKT_STAT_4);
+ if (ret < 0)
+ return ret;
+ /* rx_pkt_cnt_15_0 */
+ count = ret;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TG720S_PKT_STAT_5);
+ if (ret < 0)
+ return ret;
+ /* rx_pkt_cnt_31_16 */
+ count |= ret << 16;
+ priv->stats.rx_pkt_cnt += count;
+
+ ret = phy_read_mmd(phydev, MDIO_MMD_VEND2, DP83TG720S_PKT_STAT_6);
+ if (ret < 0)
+ return ret;
+ /* rx_err_pkt_cnt */
+ priv->stats.rx_err_pkt_cnt += ret;
+
+ return 0;
+}
+
+static void dp83tg720_get_link_stats(struct phy_device *phydev,
+ struct ethtool_link_ext_stats *link_stats)
+{
+ struct dp83tg720_priv *priv = phydev->priv;
+
+ link_stats->link_down_events = priv->stats.link_loss_cnt;
+}
+
+static void dp83tg720_get_phy_stats(struct phy_device *phydev,
+ struct ethtool_eth_phy_stats *eth_stats,
+ struct ethtool_phy_stats *stats)
+{
+ struct dp83tg720_priv *priv = phydev->priv;
+
+ stats->tx_packets = priv->stats.tx_pkt_cnt;
+ stats->tx_errors = priv->stats.tx_err_pkt_cnt;
+ stats->rx_packets = priv->stats.rx_pkt_cnt;
+ stats->rx_errors = priv->stats.rx_err_pkt_cnt;
+}
+
/**
* dp83tg720_cable_test_start - Start the cable test for the DP83TG720 PHY.
* @phydev: Pointer to the phy_device structure.
@@ -182,6 +315,11 @@ static int dp83tg720_cable_test_get_status(struct phy_device *phydev,
ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A, stat);
+ /* save the current stats before resetting the PHY */
+ ret = dp83tg720_update_stats(phydev);
+ if (ret)
+ return ret;
+
return phy_init_hw(phydev);
}
@@ -217,6 +355,11 @@ static int dp83tg720_read_status(struct phy_device *phydev)
phy_sts = phy_read(phydev, DP83TG720S_MII_REG_10);
phydev->link = !!(phy_sts & DP83TG720S_LINK_STATUS);
if (!phydev->link) {
+ /* save the current stats before resetting the PHY */
+ ret = dp83tg720_update_stats(phydev);
+ if (ret)
+ return ret;
+
/* According to the "DP83TC81x, DP83TG72x Software
* Implementation Guide", the PHY needs to be reset after a
* link loss or if no link is created after at least 100ms.
@@ -341,12 +484,27 @@ static int dp83tg720_config_init(struct phy_device *phydev)
return genphy_c45_pma_baset1_read_master_slave(phydev);
}
+static int dp83tg720_probe(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->mdio.dev;
+ struct dp83tg720_priv *priv;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ phydev->priv = priv;
+
+ return 0;
+}
+
static struct phy_driver dp83tg720_driver[] = {
{
PHY_ID_MATCH_MODEL(DP83TG720S_PHY_ID),
.name = "TI DP83TG720S",
.flags = PHY_POLL_CABLE_TEST,
+ .probe = dp83tg720_probe,
.config_aneg = dp83tg720_config_aneg,
.read_status = dp83tg720_read_status,
.get_features = genphy_c45_pma_read_ext_abilities,
@@ -355,6 +513,9 @@ static struct phy_driver dp83tg720_driver[] = {
.get_sqi_max = dp83tg720_get_sqi_max,
.cable_test_start = dp83tg720_cable_test_start,
.cable_test_get_status = dp83tg720_cable_test_get_status,
+ .get_link_stats = dp83tg720_get_link_stats,
+ .get_phy_stats = dp83tg720_get_phy_stats,
+ .update_stats = dp83tg720_update_stats,
.suspend = genphy_suspend,
.resume = genphy_resume,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index eeb33eb181ac..0fd1cb676cd5 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -2006,7 +2006,7 @@ static int ksz9477_config_init(struct phy_device *phydev)
* in this switch shall be regarded as broken.
*/
if (phydev->dev_flags & MICREL_NO_EEE)
- linkmode_fill(phydev->eee_broken_modes);
+ phy_disable_eee(phydev);
return kszphy_config_init(phydev);
}
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index e4b04cdaa995..c008fe050245 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -616,6 +616,49 @@ int phy_ethtool_get_stats(struct phy_device *phydev,
EXPORT_SYMBOL(phy_ethtool_get_stats);
/**
+ * __phy_ethtool_get_phy_stats - Retrieve standardized PHY statistics
+ * @phydev: Pointer to the PHY device
+ * @phy_stats: Pointer to ethtool_eth_phy_stats structure
+ * @phydev_stats: Pointer to ethtool_phy_stats structure
+ *
+ * Fetches PHY statistics using a kernel-defined interface for consistent
+ * diagnostics. Unlike phy_ethtool_get_stats(), which allows custom stats,
+ * this function enforces a standardized format for better interoperability.
+ */
+void __phy_ethtool_get_phy_stats(struct phy_device *phydev,
+ struct ethtool_eth_phy_stats *phy_stats,
+ struct ethtool_phy_stats *phydev_stats)
+{
+ if (!phydev->drv || !phydev->drv->get_phy_stats)
+ return;
+
+ mutex_lock(&phydev->lock);
+ phydev->drv->get_phy_stats(phydev, phy_stats, phydev_stats);
+ mutex_unlock(&phydev->lock);
+}
+
+/**
+ * __phy_ethtool_get_link_ext_stats - Retrieve extended link statistics for a PHY
+ * @phydev: Pointer to the PHY device
+ * @link_stats: Pointer to the structure to store extended link statistics
+ *
+ * Populates the ethtool_link_ext_stats structure with link down event counts
+ * and additional driver-specific link statistics, if available.
+ */
+void __phy_ethtool_get_link_ext_stats(struct phy_device *phydev,
+ struct ethtool_link_ext_stats *link_stats)
+{
+ link_stats->link_down_events = READ_ONCE(phydev->link_down_events);
+
+ if (!phydev->drv || !phydev->drv->get_link_stats)
+ return;
+
+ mutex_lock(&phydev->lock);
+ phydev->drv->get_link_stats(phydev, link_stats);
+ mutex_unlock(&phydev->lock);
+}
+
+/**
* phy_ethtool_get_plca_cfg - Get PLCA RS configuration
* @phydev: the phy_device struct
* @plca_cfg: where to store the retrieved configuration
@@ -1399,6 +1442,23 @@ static int phy_enable_interrupts(struct phy_device *phydev)
}
/**
+ * phy_update_stats - Update PHY device statistics if supported.
+ * @phydev: Pointer to the PHY device structure.
+ *
+ * If the PHY driver provides an update_stats callback, this function
+ * invokes it to update the PHY statistics. If not, it returns 0.
+ *
+ * Return: 0 on success, or a negative error code if the callback fails.
+ */
+static int phy_update_stats(struct phy_device *phydev)
+{
+ if (!phydev->drv->update_stats)
+ return 0;
+
+ return phydev->drv->update_stats(phydev);
+}
+
+/**
* phy_request_interrupt - request and enable interrupt for a PHY device
* @phydev: target phy_device struct
*
@@ -1467,6 +1527,9 @@ static enum phy_state_work _phy_state_machine(struct phy_device *phydev)
case PHY_RUNNING:
err = phy_check_link_status(phydev);
func = &phy_check_link_status;
+
+ if (!err)
+ err = phy_update_stats(phydev);
break;
case PHY_CABLETEST:
err = phydev->drv->cable_test_get_status(phydev, &finished);
@@ -1641,6 +1704,27 @@ void phy_mac_interrupt(struct phy_device *phydev)
EXPORT_SYMBOL(phy_mac_interrupt);
/**
+ * phy_eee_rx_clock_stop() - configure PHY receive clock in LPI
+ * @phydev: target phy_device struct
+ * @clk_stop_enable: flag to indicate whether the clock can be stopped
+ *
+ * Configure whether the PHY can disable its receive clock during LPI mode,
+ * See IEEE 802.3 sections 22.2.2.2, 35.2.2.10, and 45.2.3.1.4.
+ *
+ * Returns: 0 or negative error.
+ */
+int phy_eee_rx_clock_stop(struct phy_device *phydev, bool clk_stop_enable)
+{
+ /* Configure the PHY to stop receiving xMII
+ * clock while it is signaling LPI.
+ */
+ return phy_modify_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1,
+ MDIO_PCS_CTRL1_CLKSTOP_EN,
+ clk_stop_enable ? MDIO_PCS_CTRL1_CLKSTOP_EN : 0);
+}
+EXPORT_SYMBOL_GPL(phy_eee_rx_clock_stop);
+
+/**
* phy_init_eee - init and check the EEE feature
* @phydev: target phy_device struct
* @clk_stop_enable: PHY may stop the clock during LPI
@@ -1664,11 +1748,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable)
return -EPROTONOSUPPORT;
if (clk_stop_enable)
- /* Configure the PHY to stop receiving xMII
- * clock while it is signaling LPI.
- */
- ret = phy_set_bits_mmd(phydev, MDIO_MMD_PCS, MDIO_CTRL1,
- MDIO_PCS_CTRL1_CLKSTOP_EN);
+ ret = phy_eee_rx_clock_stop(phydev, true);
return ret < 0 ? ret : 0;
}
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index bdc997f59779..5b34d39d1d52 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -3800,6 +3800,8 @@ static const struct ethtool_phy_ops phy_ethtool_phy_ops = {
static const struct phylib_stubs __phylib_stubs = {
.hwtstamp_get = __phy_hwtstamp_get,
.hwtstamp_set = __phy_hwtstamp_set,
+ .get_phy_stats = __phy_ethtool_get_phy_stats,
+ .get_link_ext_stats = __phy_ethtool_get_link_ext_stats,
};
static void phylib_register_stubs(void)
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 6d50c2fdb190..31754d5fd659 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -691,6 +691,17 @@ static int phylink_validate_mac_and_pcs(struct phylink *pl,
return -EINVAL;
}
+ /* Ensure that this PCS supports the interface which the MAC
+ * returned it for. It is an error for the MAC to return a PCS
+ * that does not support the interface mode.
+ */
+ if (!phy_interface_empty(pcs->supported_interfaces) &&
+ !test_bit(state->interface, pcs->supported_interfaces)) {
+ phylink_err(pl, "MAC returned PCS which does not support %s\n",
+ phy_modes(state->interface));
+ return -EINVAL;
+ }
+
/* Validate the link parameters with the PCS */
if (pcs->ops->pcs_validate) {
ret = pcs->ops->pcs_validate(pcs, supported, state);
diff --git a/drivers/nfc/st21nfca/dep.c b/drivers/nfc/st21nfca/dep.c
index 1ec651e31064..3425b68f0ddc 100644
--- a/drivers/nfc/st21nfca/dep.c
+++ b/drivers/nfc/st21nfca/dep.c
@@ -116,18 +116,16 @@ static void st21nfca_tx_work(struct work_struct *work)
struct nfc_dev *dev;
struct sk_buff *skb;
- if (info) {
- dev = info->hdev->ndev;
- skb = info->dep_info.tx_pending;
+ dev = info->hdev->ndev;
+ skb = info->dep_info.tx_pending;
- device_lock(&dev->dev);
+ device_lock(&dev->dev);
- nfc_hci_send_cmd_async(info->hdev, ST21NFCA_RF_READER_F_GATE,
- ST21NFCA_WR_XCHG_DATA, skb->data, skb->len,
- info->async_cb, info);
- device_unlock(&dev->dev);
- kfree_skb(skb);
- }
+ nfc_hci_send_cmd_async(info->hdev, ST21NFCA_RF_READER_F_GATE,
+ ST21NFCA_WR_XCHG_DATA, skb->data, skb->len,
+ info->async_cb, info);
+ device_unlock(&dev->dev);
+ kfree_skb(skb);
}
static void st21nfca_im_send_pdu(struct st21nfca_hci_info *info,
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 611b02c8a8b3..c4bb8dfe1a45 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -173,6 +173,11 @@ enum nvme_quirks {
* MSI (but not MSI-X) interrupts are broken and never fire.
*/
NVME_QUIRK_BROKEN_MSI = (1 << 21),
+
+ /*
+ * Align dma pool segment size to 512 bytes
+ */
+ NVME_QUIRK_DMAPOOL_ALIGN_512 = (1 << 22),
};
/*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 1a5ba80f1811..e2634f437f33 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2834,15 +2834,20 @@ static int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
static int nvme_setup_prp_pools(struct nvme_dev *dev)
{
+ size_t small_align = 256;
+
dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
NVME_CTRL_PAGE_SIZE,
NVME_CTRL_PAGE_SIZE, 0);
if (!dev->prp_page_pool)
return -ENOMEM;
+ if (dev->ctrl.quirks & NVME_QUIRK_DMAPOOL_ALIGN_512)
+ small_align = 512;
+
/* Optimisation for I/Os between 4k and 128k */
dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev,
- 256, 256, 0);
+ 256, small_align, 0);
if (!dev->prp_small_pool) {
dma_pool_destroy(dev->prp_page_pool);
return -ENOMEM;
@@ -3607,7 +3612,7 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1217, 0x8760), /* O2 Micro 64GB Steam Deck */
- .driver_data = NVME_QUIRK_QDEPTH_ONE },
+ .driver_data = NVME_QUIRK_DMAPOOL_ALIGN_512, },
{ PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
NVME_QUIRK_BOGUS_NID, },
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 28c76a3e1bd2..b127d41dbbfe 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2024,14 +2024,6 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
return __nvme_tcp_alloc_io_queues(ctrl);
}
-static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
-{
- nvme_tcp_stop_io_queues(ctrl);
- if (remove)
- nvme_remove_io_tag_set(ctrl);
- nvme_tcp_free_io_queues(ctrl);
-}
-
static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
{
int ret, nr_queues;
@@ -2176,9 +2168,11 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
nvme_sync_io_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl);
nvme_cancel_tagset(ctrl);
- if (remove)
+ if (remove) {
nvme_unquiesce_io_queues(ctrl);
- nvme_tcp_destroy_io_queues(ctrl, remove);
+ nvme_remove_io_tag_set(ctrl);
+ }
+ nvme_tcp_free_io_queues(ctrl);
}
static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl,
@@ -2267,7 +2261,9 @@ destroy_io:
nvme_sync_io_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl);
nvme_cancel_tagset(ctrl);
- nvme_tcp_destroy_io_queues(ctrl, new);
+ if (new)
+ nvme_remove_io_tag_set(ctrl);
+ nvme_tcp_free_io_queues(ctrl);
}
destroy_admin:
nvme_stop_keep_alive(ctrl);
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 2962794ce881..fa89b0549c36 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -139,7 +139,7 @@ static u16 nvmet_get_smart_log_all(struct nvmet_req *req,
unsigned long idx;
ctrl = req->sq->ctrl;
- xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
+ nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns) {
/* we don't have the right data for file backed ns */
if (!ns->bdev)
continue;
@@ -331,9 +331,10 @@ static u32 nvmet_format_ana_group(struct nvmet_req *req, u32 grpid,
u32 count = 0;
if (!(req->cmd->get_log_page.lsp & NVME_ANA_LOG_RGO)) {
- xa_for_each(&ctrl->subsys->namespaces, idx, ns)
+ nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns) {
if (ns->anagrpid == grpid)
desc->nsids[count++] = cpu_to_le32(ns->nsid);
+ }
}
desc->grpid = cpu_to_le32(grpid);
@@ -772,7 +773,7 @@ static void nvmet_execute_identify_endgrp_list(struct nvmet_req *req)
goto out;
}
- xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
+ nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns) {
if (ns->nsid <= min_endgid)
continue;
@@ -815,7 +816,7 @@ static void nvmet_execute_identify_nslist(struct nvmet_req *req, bool match_css)
goto out;
}
- xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
+ nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns) {
if (ns->nsid <= min_nsid)
continue;
if (match_css && req->ns->csi != req->cmd->identify.csi)
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index eeee9e9b854c..2b030f0efc38 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -810,18 +810,6 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
NULL,
};
-bool nvmet_subsys_nsid_exists(struct nvmet_subsys *subsys, u32 nsid)
-{
- struct config_item *ns_item;
- char name[12];
-
- snprintf(name, sizeof(name), "%u", nsid);
- mutex_lock(&subsys->namespaces_group.cg_subsys->su_mutex);
- ns_item = config_group_find_item(&subsys->namespaces_group, name);
- mutex_unlock(&subsys->namespaces_group.cg_subsys->su_mutex);
- return ns_item != NULL;
-}
-
static void nvmet_ns_release(struct config_item *item)
{
struct nvmet_ns *ns = to_nvmet_ns(item);
@@ -2254,12 +2242,17 @@ static ssize_t nvmet_root_discovery_nqn_store(struct config_item *item,
const char *page, size_t count)
{
struct list_head *entry;
+ char *old_nqn, *new_nqn;
size_t len;
len = strcspn(page, "\n");
if (!len || len > NVMF_NQN_FIELD_LEN - 1)
return -EINVAL;
+ new_nqn = kstrndup(page, len, GFP_KERNEL);
+ if (!new_nqn)
+ return -ENOMEM;
+
down_write(&nvmet_config_sem);
list_for_each(entry, &nvmet_subsystems_group.cg_children) {
struct config_item *item =
@@ -2268,13 +2261,15 @@ static ssize_t nvmet_root_discovery_nqn_store(struct config_item *item,
if (!strncmp(config_item_name(item), page, len)) {
pr_err("duplicate NQN %s\n", config_item_name(item));
up_write(&nvmet_config_sem);
+ kfree(new_nqn);
return -EINVAL;
}
}
- memset(nvmet_disc_subsys->subsysnqn, 0, NVMF_NQN_FIELD_LEN);
- memcpy(nvmet_disc_subsys->subsysnqn, page, len);
+ old_nqn = nvmet_disc_subsys->subsysnqn;
+ nvmet_disc_subsys->subsysnqn = new_nqn;
up_write(&nvmet_config_sem);
+ kfree(old_nqn);
return len;
}
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 1f4e9989663b..fde6c555af61 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -127,7 +127,7 @@ static u32 nvmet_max_nsid(struct nvmet_subsys *subsys)
unsigned long idx;
u32 nsid = 0;
- xa_for_each(&subsys->namespaces, idx, cur)
+ nvmet_for_each_enabled_ns(&subsys->namespaces, idx, cur)
nsid = cur->nsid;
return nsid;
@@ -441,11 +441,14 @@ u16 nvmet_req_find_ns(struct nvmet_req *req)
struct nvmet_subsys *subsys = nvmet_req_subsys(req);
req->ns = xa_load(&subsys->namespaces, nsid);
- if (unlikely(!req->ns)) {
+ if (unlikely(!req->ns || !req->ns->enabled)) {
req->error_loc = offsetof(struct nvme_common_command, nsid);
- if (nvmet_subsys_nsid_exists(subsys, nsid))
- return NVME_SC_INTERNAL_PATH_ERROR;
- return NVME_SC_INVALID_NS | NVME_STATUS_DNR;
+ if (!req->ns) /* ns doesn't exist! */
+ return NVME_SC_INVALID_NS | NVME_STATUS_DNR;
+
+ /* ns exists but it's disabled */
+ req->ns = NULL;
+ return NVME_SC_INTERNAL_PATH_ERROR;
}
percpu_ref_get(&req->ns->ref);
@@ -583,8 +586,6 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
goto out_unlock;
ret = -EMFILE;
- if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
- goto out_unlock;
ret = nvmet_bdev_ns_enable(ns);
if (ret == -ENOTBLK)
@@ -599,38 +600,19 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
nvmet_p2pmem_ns_add_p2p(ctrl, ns);
- ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
- 0, GFP_KERNEL);
- if (ret)
- goto out_dev_put;
-
- if (ns->nsid > subsys->max_nsid)
- subsys->max_nsid = ns->nsid;
-
- ret = xa_insert(&subsys->namespaces, ns->nsid, ns, GFP_KERNEL);
- if (ret)
- goto out_restore_subsys_maxnsid;
-
if (ns->pr.enable) {
ret = nvmet_pr_init_ns(ns);
if (ret)
- goto out_remove_from_subsys;
+ goto out_dev_put;
}
- subsys->nr_namespaces++;
-
nvmet_ns_changed(subsys, ns->nsid);
ns->enabled = true;
+ xa_set_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED);
ret = 0;
out_unlock:
mutex_unlock(&subsys->lock);
return ret;
-
-out_remove_from_subsys:
- xa_erase(&subsys->namespaces, ns->nsid);
-out_restore_subsys_maxnsid:
- subsys->max_nsid = nvmet_max_nsid(subsys);
- percpu_ref_exit(&ns->ref);
out_dev_put:
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
@@ -649,15 +631,37 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
goto out_unlock;
ns->enabled = false;
- xa_erase(&ns->subsys->namespaces, ns->nsid);
- if (ns->nsid == subsys->max_nsid)
- subsys->max_nsid = nvmet_max_nsid(subsys);
+ xa_clear_mark(&subsys->namespaces, ns->nsid, NVMET_NS_ENABLED);
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
mutex_unlock(&subsys->lock);
+ if (ns->pr.enable)
+ nvmet_pr_exit_ns(ns);
+
+ mutex_lock(&subsys->lock);
+ nvmet_ns_changed(subsys, ns->nsid);
+ nvmet_ns_dev_disable(ns);
+out_unlock:
+ mutex_unlock(&subsys->lock);
+}
+
+void nvmet_ns_free(struct nvmet_ns *ns)
+{
+ struct nvmet_subsys *subsys = ns->subsys;
+
+ nvmet_ns_disable(ns);
+
+ mutex_lock(&subsys->lock);
+
+ xa_erase(&subsys->namespaces, ns->nsid);
+ if (ns->nsid == subsys->max_nsid)
+ subsys->max_nsid = nvmet_max_nsid(subsys);
+
+ mutex_unlock(&subsys->lock);
+
/*
* Now that we removed the namespaces from the lookup list, we
* can kill the per_cpu ref and wait for any remaining references
@@ -671,21 +675,9 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
wait_for_completion(&ns->disable_done);
percpu_ref_exit(&ns->ref);
- if (ns->pr.enable)
- nvmet_pr_exit_ns(ns);
-
mutex_lock(&subsys->lock);
-
subsys->nr_namespaces--;
- nvmet_ns_changed(subsys, ns->nsid);
- nvmet_ns_dev_disable(ns);
-out_unlock:
mutex_unlock(&subsys->lock);
-}
-
-void nvmet_ns_free(struct nvmet_ns *ns)
-{
- nvmet_ns_disable(ns);
down_write(&nvmet_ana_sem);
nvmet_ana_group_enabled[ns->anagrpid]--;
@@ -699,15 +691,33 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
{
struct nvmet_ns *ns;
+ mutex_lock(&subsys->lock);
+
+ if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
+ goto out_unlock;
+
ns = kzalloc(sizeof(*ns), GFP_KERNEL);
if (!ns)
- return NULL;
+ goto out_unlock;
init_completion(&ns->disable_done);
ns->nsid = nsid;
ns->subsys = subsys;
+ if (percpu_ref_init(&ns->ref, nvmet_destroy_namespace, 0, GFP_KERNEL))
+ goto out_free;
+
+ if (ns->nsid > subsys->max_nsid)
+ subsys->max_nsid = nsid;
+
+ if (xa_insert(&subsys->namespaces, ns->nsid, ns, GFP_KERNEL))
+ goto out_exit;
+
+ subsys->nr_namespaces++;
+
+ mutex_unlock(&subsys->lock);
+
down_write(&nvmet_ana_sem);
ns->anagrpid = NVMET_DEFAULT_ANA_GRPID;
nvmet_ana_group_enabled[ns->anagrpid]++;
@@ -718,6 +728,14 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
ns->csi = NVME_CSI_NVM;
return ns;
+out_exit:
+ subsys->max_nsid = nvmet_max_nsid(subsys);
+ percpu_ref_exit(&ns->ref);
+out_free:
+ kfree(ns);
+out_unlock:
+ mutex_unlock(&subsys->lock);
+ return NULL;
}
static void nvmet_update_sq_head(struct nvmet_req *req)
@@ -1394,7 +1412,7 @@ static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
ctrl->p2p_client = get_device(req->p2p_client);
- xa_for_each(&ctrl->subsys->namespaces, idx, ns)
+ nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns)
nvmet_p2pmem_ns_add_p2p(ctrl, ns);
}
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 0bda83d0fc3e..eaf31c823cbe 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -36,7 +36,7 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
*/
id->nsfeat |= 1 << 4;
/* NPWG = Namespace Preferred Write Granularity. 0's based */
- id->npwg = lpp0b;
+ id->npwg = to0based(bdev_io_min(bdev) / bdev_logical_block_size(bdev));
/* NPWA = Namespace Preferred Write Alignment. 0's based */
id->npwa = id->npwg;
/* NPDG = Namespace Preferred Deallocate Granularity. 0's based */
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 58328b35dc96..7233549f7c8a 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -24,6 +24,7 @@
#define NVMET_DEFAULT_VS NVME_VS(2, 1, 0)
+#define NVMET_NS_ENABLED XA_MARK_1
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
#define NVMET_NO_ERROR_LOC ((u16)-1)
@@ -33,6 +34,12 @@
#define NVMET_FR_MAX_SIZE 8
#define NVMET_PR_LOG_QUEUE_SIZE 64
+#define nvmet_for_each_ns(xa, index, entry) \
+ xa_for_each(xa, index, entry)
+
+#define nvmet_for_each_enabled_ns(xa, index, entry) \
+ xa_for_each_marked(xa, index, entry, NVMET_NS_ENABLED)
+
/*
* Supported optional AENs:
*/
diff --git a/drivers/nvme/target/pr.c b/drivers/nvme/target/pr.c
index 90e9f5bbe581..cd22d8333314 100644
--- a/drivers/nvme/target/pr.c
+++ b/drivers/nvme/target/pr.c
@@ -60,7 +60,7 @@ u16 nvmet_set_feat_resv_notif_mask(struct nvmet_req *req, u32 mask)
goto success;
}
- xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
+ nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns) {
if (ns->pr.enable)
WRITE_ONCE(ns->pr.notify_mask, mask);
}
@@ -1056,7 +1056,7 @@ int nvmet_ctrl_init_pr(struct nvmet_ctrl *ctrl)
* nvmet_pr_init_ns(), see more details in nvmet_ns_enable().
* So just check ns->pr.enable.
*/
- xa_for_each(&subsys->namespaces, idx, ns) {
+ nvmet_for_each_enabled_ns(&subsys->namespaces, idx, ns) {
if (ns->pr.enable) {
ret = nvmet_pr_alloc_and_insert_pc_ref(ns, ctrl->cntlid,
&ctrl->hostid);
@@ -1067,7 +1067,7 @@ int nvmet_ctrl_init_pr(struct nvmet_ctrl *ctrl)
return 0;
free_per_ctrl_refs:
- xa_for_each(&subsys->namespaces, idx, ns) {
+ nvmet_for_each_enabled_ns(&subsys->namespaces, idx, ns) {
if (ns->pr.enable) {
pc_ref = xa_erase(&ns->pr_per_ctrl_refs, ctrl->cntlid);
if (pc_ref)
@@ -1087,7 +1087,7 @@ void nvmet_ctrl_destroy_pr(struct nvmet_ctrl *ctrl)
kfifo_free(&ctrl->pr_log_mgr.log_queue);
mutex_destroy(&ctrl->pr_log_mgr.lock);
- xa_for_each(&ctrl->subsys->namespaces, idx, ns) {
+ nvmet_for_each_enabled_ns(&ctrl->subsys->namespaces, idx, ns) {
if (ns->pr.enable) {
pc_ref = xa_erase(&ns->pr_per_ctrl_refs, ctrl->cntlid);
if (pc_ref)
diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h
index 9ffd94ddf8c7..786f20ef2238 100644
--- a/drivers/ufs/core/ufshcd-priv.h
+++ b/drivers/ufs/core/ufshcd-priv.h
@@ -237,12 +237,6 @@ static inline void ufshcd_vops_config_scaling_param(struct ufs_hba *hba,
hba->vops->config_scaling_param(hba, p, data);
}
-static inline void ufshcd_vops_reinit_notify(struct ufs_hba *hba)
-{
- if (hba->vops && hba->vops->reinit_notify)
- hba->vops->reinit_notify(hba);
-}
-
static inline int ufshcd_vops_mcq_config_resource(struct ufs_hba *hba)
{
if (hba->vops && hba->vops->mcq_config_resource)
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 8a01e4393159..9c26e8767515 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -8858,7 +8858,6 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params)
ufshcd_device_reset(hba);
ufs_put_device_desc(hba);
ufshcd_hba_stop(hba);
- ufshcd_vops_reinit_notify(hba);
ret = ufshcd_hba_enable(hba);
if (ret) {
dev_err(hba->dev, "Host controller enable failed\n");
@@ -10591,14 +10590,17 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
}
/*
- * Set the default power management level for runtime and system PM.
+ * Set the default power management level for runtime and system PM if
+ * not set by the host controller drivers.
* Default power saving mode is to keep UFS link in Hibern8 state
* and UFS device in sleep state.
*/
- hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state(
+ if (!hba->rpm_lvl)
+ hba->rpm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state(
UFS_SLEEP_PWR_MODE,
UIC_LINK_HIBERN8_STATE);
- hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state(
+ if (!hba->spm_lvl)
+ hba->spm_lvl = ufs_get_desired_pm_lvl_for_dev_link_state(
UFS_SLEEP_PWR_MODE,
UIC_LINK_HIBERN8_STATE);
diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
index 68040b2ab5f8..91e94fe990b4 100644
--- a/drivers/ufs/host/ufs-qcom.c
+++ b/drivers/ufs/host/ufs-qcom.c
@@ -368,6 +368,11 @@ static int ufs_qcom_power_up_sequence(struct ufs_hba *hba)
if (ret)
return ret;
+ if (phy->power_count) {
+ phy_power_off(phy);
+ phy_exit(phy);
+ }
+
/* phy initialization - calibrate the phy */
ret = phy_init(phy);
if (ret) {
@@ -866,6 +871,7 @@ static u32 ufs_qcom_get_ufs_hci_version(struct ufs_hba *hba)
*/
static void ufs_qcom_advertise_quirks(struct ufs_hba *hba)
{
+ const struct ufs_qcom_drvdata *drvdata = of_device_get_match_data(hba->dev);
struct ufs_qcom_host *host = ufshcd_get_variant(hba);
if (host->hw_ver.major == 0x2)
@@ -874,9 +880,8 @@ static void ufs_qcom_advertise_quirks(struct ufs_hba *hba)
if (host->hw_ver.major > 0x3)
hba->quirks |= UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH;
- if (of_device_is_compatible(hba->dev->of_node, "qcom,sm8550-ufshc") ||
- of_device_is_compatible(hba->dev->of_node, "qcom,sm8650-ufshc"))
- hba->quirks |= UFSHCD_QUIRK_BROKEN_LSDBS_CAP;
+ if (drvdata && drvdata->quirks)
+ hba->quirks |= drvdata->quirks;
}
static void ufs_qcom_set_phy_gear(struct ufs_qcom_host *host)
@@ -1064,6 +1069,7 @@ static int ufs_qcom_init(struct ufs_hba *hba)
struct device *dev = hba->dev;
struct ufs_qcom_host *host;
struct ufs_clk_info *clki;
+ const struct ufs_qcom_drvdata *drvdata = of_device_get_match_data(hba->dev);
host = devm_kzalloc(dev, sizeof(*host), GFP_KERNEL);
if (!host)
@@ -1143,6 +1149,9 @@ static int ufs_qcom_init(struct ufs_hba *hba)
dev_warn(dev, "%s: failed to configure the testbus %d\n",
__func__, err);
+ if (drvdata && drvdata->no_phy_retention)
+ hba->spm_lvl = UFS_PM_LVL_5;
+
return 0;
out_variant_clear:
@@ -1579,13 +1588,6 @@ static void ufs_qcom_config_scaling_param(struct ufs_hba *hba,
}
#endif
-static void ufs_qcom_reinit_notify(struct ufs_hba *hba)
-{
- struct ufs_qcom_host *host = ufshcd_get_variant(hba);
-
- phy_power_off(host->generic_phy);
-}
-
/* Resources */
static const struct ufshcd_res_info ufs_res_info[RES_MAX] = {
{.name = "ufs_mem",},
@@ -1825,7 +1827,6 @@ static const struct ufs_hba_variant_ops ufs_hba_qcom_vops = {
.device_reset = ufs_qcom_device_reset,
.config_scaling_param = ufs_qcom_config_scaling_param,
.program_key = ufs_qcom_ice_program_key,
- .reinit_notify = ufs_qcom_reinit_notify,
.mcq_config_resource = ufs_qcom_mcq_config_resource,
.get_hba_mac = ufs_qcom_get_hba_mac,
.op_runtime_config = ufs_qcom_op_runtime_config,
@@ -1868,9 +1869,15 @@ static void ufs_qcom_remove(struct platform_device *pdev)
platform_device_msi_free_irqs_all(hba->dev);
}
+static const struct ufs_qcom_drvdata ufs_qcom_sm8550_drvdata = {
+ .quirks = UFSHCD_QUIRK_BROKEN_LSDBS_CAP,
+ .no_phy_retention = true,
+};
+
static const struct of_device_id ufs_qcom_of_match[] __maybe_unused = {
{ .compatible = "qcom,ufshc" },
- { .compatible = "qcom,sm8550-ufshc" },
+ { .compatible = "qcom,sm8550-ufshc", .data = &ufs_qcom_sm8550_drvdata },
+ { .compatible = "qcom,sm8650-ufshc", .data = &ufs_qcom_sm8550_drvdata },
{},
};
MODULE_DEVICE_TABLE(of, ufs_qcom_of_match);
diff --git a/drivers/ufs/host/ufs-qcom.h b/drivers/ufs/host/ufs-qcom.h
index b9de170983c9..919f53682beb 100644
--- a/drivers/ufs/host/ufs-qcom.h
+++ b/drivers/ufs/host/ufs-qcom.h
@@ -217,6 +217,11 @@ struct ufs_qcom_host {
bool esi_enabled;
};
+struct ufs_qcom_drvdata {
+ enum ufshcd_quirks quirks;
+ bool no_phy_retention;
+};
+
static inline u32
ufs_qcom_get_debug_reg_offset(struct ufs_qcom_host *host, u32 reg)
{
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 1ab58da9f38a..1a4ed5a357d3 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1661,14 +1661,15 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff;
vm_fault_t ret = VM_FAULT_SIGBUS;
- if (order && (vmf->address & ((PAGE_SIZE << order) - 1) ||
+ pfn = vma_to_pfn(vma) + pgoff;
+
+ if (order && (pfn & ((1 << order) - 1) ||
+ vmf->address & ((PAGE_SIZE << order) - 1) ||
vmf->address + (PAGE_SIZE << order) > vma->vm_end)) {
ret = VM_FAULT_FALLBACK;
goto out;
}
- pfn = vma_to_pfn(vma);
-
down_read(&vdev->memory_lock);
if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
@@ -1676,18 +1677,18 @@ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
switch (order) {
case 0:
- ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff);
+ ret = vmf_insert_pfn(vma, vmf->address, pfn);
break;
#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
case PMD_ORDER:
- ret = vmf_insert_pfn_pmd(vmf, __pfn_to_pfn_t(pfn + pgoff,
- PFN_DEV), false);
+ ret = vmf_insert_pfn_pmd(vmf,
+ __pfn_to_pfn_t(pfn, PFN_DEV), false);
break;
#endif
#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
case PUD_ORDER:
- ret = vmf_insert_pfn_pud(vmf, __pfn_to_pfn_t(pfn + pgoff,
- PFN_DEV), false);
+ ret = vmf_insert_pfn_pud(vmf,
+ __pfn_to_pfn_t(pfn, PFN_DEV), false);
break;
#endif
default:
diff --git a/drivers/watchdog/stm32_iwdg.c b/drivers/watchdog/stm32_iwdg.c
index d700e0d49bb9..8ad06b54c5ad 100644
--- a/drivers/watchdog/stm32_iwdg.c
+++ b/drivers/watchdog/stm32_iwdg.c
@@ -286,7 +286,7 @@ static int stm32_iwdg_irq_init(struct platform_device *pdev,
if (!wdt->data->has_early_wakeup)
return 0;
- irq = platform_get_irq(pdev, 0);
+ irq = platform_get_irq_optional(pdev, 0);
if (irq <= 0)
return 0;
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 819c75233235..3bc9ce6c575e 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -57,6 +57,8 @@ static void v9fs_issue_write(struct netfs_io_subrequest *subreq)
int err, len;
len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err);
+ if (len > 0)
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
netfs_write_subrequest_terminated(subreq, len ?: err, false);
}
@@ -80,8 +82,10 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
if (pos + total >= i_size_read(rreq->inode))
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
- if (!err)
+ if (!err) {
subreq->transferred += total;
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
+ }
netfs_read_subreq_terminated(subreq, err, false);
}
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 34107b55f834..ccb6aa8027c5 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -122,7 +122,7 @@ static void afs_issue_write_worker(struct work_struct *work)
if (subreq->debug_index == 3)
return netfs_write_subrequest_terminated(subreq, -ENOANO, false);
- if (!test_bit(NETFS_SREQ_RETRYING, &subreq->flags)) {
+ if (!subreq->retry_count) {
set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
return netfs_write_subrequest_terminated(subreq, -EAGAIN, false);
}
@@ -149,6 +149,9 @@ static void afs_issue_write_worker(struct work_struct *work)
afs_wait_for_operation(op);
ret = afs_put_operation(op);
switch (ret) {
+ case 0:
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
+ break;
case -EACCES:
case -EPERM:
case -ENOKEY:
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 3af8bb0c8d75..4d9305fa37a8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4878,25 +4878,29 @@ out_fail:
return ret;
}
+struct btrfs_uring_encoded_data {
+ struct btrfs_ioctl_encoded_io_args args;
+ struct iovec iovstack[UIO_FASTIOV];
+ struct iovec *iov;
+ struct iov_iter iter;
+};
+
static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args, flags);
size_t copy_end;
- struct btrfs_ioctl_encoded_io_args args = { 0 };
int ret;
u64 disk_bytenr, disk_io_size;
struct file *file;
struct btrfs_inode *inode;
struct btrfs_fs_info *fs_info;
struct extent_io_tree *io_tree;
- struct iovec iovstack[UIO_FASTIOV];
- struct iovec *iov = iovstack;
- struct iov_iter iter;
loff_t pos;
struct kiocb kiocb;
struct extent_state *cached_state = NULL;
u64 start, lockend;
void __user *sqe_addr;
+ struct btrfs_uring_encoded_data *data = io_uring_cmd_get_async_data(cmd)->op_data;
if (!capable(CAP_SYS_ADMIN)) {
ret = -EPERM;
@@ -4910,43 +4914,64 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue
if (issue_flags & IO_URING_F_COMPAT) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
- struct btrfs_ioctl_encoded_io_args_32 args32;
-
copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32, flags);
- if (copy_from_user(&args32, sqe_addr, copy_end)) {
- ret = -EFAULT;
- goto out_acct;
- }
- args.iov = compat_ptr(args32.iov);
- args.iovcnt = args32.iovcnt;
- args.offset = args32.offset;
- args.flags = args32.flags;
#else
return -ENOTTY;
#endif
} else {
copy_end = copy_end_kernel;
- if (copy_from_user(&args, sqe_addr, copy_end)) {
- ret = -EFAULT;
+ }
+
+ if (!data) {
+ data = kzalloc(sizeof(*data), GFP_NOFS);
+ if (!data) {
+ ret = -ENOMEM;
goto out_acct;
}
- }
- if (args.flags != 0)
- return -EINVAL;
+ io_uring_cmd_get_async_data(cmd)->op_data = data;
- ret = import_iovec(ITER_DEST, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
- &iov, &iter);
- if (ret < 0)
- goto out_acct;
+ if (issue_flags & IO_URING_F_COMPAT) {
+#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
+ struct btrfs_ioctl_encoded_io_args_32 args32;
- if (iov_iter_count(&iter) == 0) {
- ret = 0;
- goto out_free;
+ if (copy_from_user(&args32, sqe_addr, copy_end)) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+
+ data->args.iov = compat_ptr(args32.iov);
+ data->args.iovcnt = args32.iovcnt;
+ data->args.offset = args32.offset;
+ data->args.flags = args32.flags;
+#endif
+ } else {
+ if (copy_from_user(&data->args, sqe_addr, copy_end)) {
+ ret = -EFAULT;
+ goto out_acct;
+ }
+ }
+
+ if (data->args.flags != 0) {
+ ret = -EINVAL;
+ goto out_acct;
+ }
+
+ data->iov = data->iovstack;
+ ret = import_iovec(ITER_DEST, data->args.iov, data->args.iovcnt,
+ ARRAY_SIZE(data->iovstack), &data->iov,
+ &data->iter);
+ if (ret < 0)
+ goto out_acct;
+
+ if (iov_iter_count(&data->iter) == 0) {
+ ret = 0;
+ goto out_free;
+ }
}
- pos = args.offset;
- ret = rw_verify_area(READ, file, &pos, args.len);
+ pos = data->args.offset;
+ ret = rw_verify_area(READ, file, &pos, data->args.len);
if (ret < 0)
goto out_free;
@@ -4959,15 +4984,16 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue
start = ALIGN_DOWN(pos, fs_info->sectorsize);
lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
- ret = btrfs_encoded_read(&kiocb, &iter, &args, &cached_state,
+ ret = btrfs_encoded_read(&kiocb, &data->iter, &data->args, &cached_state,
&disk_bytenr, &disk_io_size);
if (ret < 0 && ret != -EIOCBQUEUED)
goto out_free;
file_accessed(file);
- if (copy_to_user(sqe_addr + copy_end, (const char *)&args + copy_end_kernel,
- sizeof(args) - copy_end_kernel)) {
+ if (copy_to_user(sqe_addr + copy_end,
+ (const char *)&data->args + copy_end_kernel,
+ sizeof(data->args) - copy_end_kernel)) {
if (ret == -EIOCBQUEUED) {
unlock_extent(io_tree, start, lockend, &cached_state);
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
@@ -4977,40 +5003,22 @@ static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue
}
if (ret == -EIOCBQUEUED) {
- u64 count;
-
- /*
- * If we've optimized things by storing the iovecs on the stack,
- * undo this.
- */
- if (!iov) {
- iov = kmalloc(sizeof(struct iovec) * args.iovcnt, GFP_NOFS);
- if (!iov) {
- unlock_extent(io_tree, start, lockend, &cached_state);
- btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
- ret = -ENOMEM;
- goto out_acct;
- }
-
- memcpy(iov, iovstack, sizeof(struct iovec) * args.iovcnt);
- }
-
- count = min_t(u64, iov_iter_count(&iter), disk_io_size);
+ u64 count = min_t(u64, iov_iter_count(&data->iter), disk_io_size);
/* Match ioctl by not returning past EOF if uncompressed. */
- if (!args.compression)
- count = min_t(u64, count, args.len);
+ if (!data->args.compression)
+ count = min_t(u64, count, data->args.len);
- ret = btrfs_uring_read_extent(&kiocb, &iter, start, lockend,
- cached_state, disk_bytenr,
- disk_io_size, count,
- args.compression, iov, cmd);
+ ret = btrfs_uring_read_extent(&kiocb, &data->iter, start, lockend,
+ cached_state, disk_bytenr, disk_io_size,
+ count, data->args.compression,
+ data->iov, cmd);
goto out_acct;
}
out_free:
- kfree(iov);
+ kfree(data->iov);
out_acct:
if (ret > 0)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 204c928beaf9..531312efee8d 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -1541,6 +1541,10 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
u64 extent_gen;
int ret;
+ if (unlikely(!extent_root)) {
+ btrfs_err(fs_info, "no valid extent root for scrub");
+ return -EUCLEAN;
+ }
memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) *
stripe->nr_sectors);
scrub_stripe_reset_bitmaps(stripe);
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index ddf0d5a448a7..c9e92c6941ec 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -174,10 +174,10 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
copy_page(workspace->buf + i * PAGE_SIZE,
data_in);
start += PAGE_SIZE;
- workspace->strm.avail_in =
- (in_buf_folios << PAGE_SHIFT);
}
workspace->strm.next_in = workspace->buf;
+ workspace->strm.avail_in = min(bytes_left,
+ in_buf_folios << PAGE_SHIFT);
} else {
unsigned int pg_off;
unsigned int cur_len;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 11ed523e528e..df905ae82929 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -748,8 +748,9 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
(u64)lim->max_segments << PAGE_SHIFT),
fs_info->sectorsize);
fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
- if (fs_info->max_zone_append_size < fs_info->max_extent_size)
- fs_info->max_extent_size = fs_info->max_zone_append_size;
+
+ fs_info->max_extent_size = min_not_zero(fs_info->max_extent_size,
+ fs_info->max_zone_append_size);
/*
* Check mount options here, because we might change fs_info->zoned
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 89b11336a836..1806bff8e59b 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -15,6 +15,7 @@
#include <linux/namei.h>
#include <linux/poll.h>
#include <linux/mount.h>
+#include <linux/security.h>
#include <linux/statfs.h>
#include <linux/ctype.h>
#include <linux/string.h>
@@ -576,7 +577,7 @@ static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args)
*/
static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args)
{
- char *secctx;
+ int err;
_enter(",%s", args);
@@ -585,16 +586,16 @@ static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args)
return -EINVAL;
}
- if (cache->secctx) {
+ if (cache->have_secid) {
pr_err("Second security context specified\n");
return -EINVAL;
}
- secctx = kstrdup(args, GFP_KERNEL);
- if (!secctx)
- return -ENOMEM;
+ err = security_secctx_to_secid(args, strlen(args), &cache->secid);
+ if (err)
+ return err;
- cache->secctx = secctx;
+ cache->have_secid = true;
return 0;
}
@@ -820,7 +821,6 @@ static void cachefiles_daemon_unbind(struct cachefiles_cache *cache)
put_cred(cache->cache_cred);
kfree(cache->rootdirname);
- kfree(cache->secctx);
kfree(cache->tag);
_leave("");
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 7b99bd98de75..38c236e38cef 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -122,7 +122,6 @@ struct cachefiles_cache {
#define CACHEFILES_STATE_CHANGED 3 /* T if state changed (poll trigger) */
#define CACHEFILES_ONDEMAND_MODE 4 /* T if in on-demand read mode */
char *rootdirname; /* name of cache root directory */
- char *secctx; /* LSM security context */
char *tag; /* cache binding tag */
refcount_t unbind_pincount;/* refcount to do daemon unbind */
struct xarray reqs; /* xarray of pending on-demand requests */
@@ -130,6 +129,8 @@ struct cachefiles_cache {
struct xarray ondemand_ids; /* xarray for ondemand_id allocation */
u32 ondemand_id_next;
u32 msg_id_next;
+ u32 secid; /* LSM security id */
+ bool have_secid; /* whether "secid" was set */
};
static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache)
diff --git a/fs/cachefiles/security.c b/fs/cachefiles/security.c
index fe777164f1d8..fc6611886b3b 100644
--- a/fs/cachefiles/security.c
+++ b/fs/cachefiles/security.c
@@ -18,7 +18,7 @@ int cachefiles_get_security_ID(struct cachefiles_cache *cache)
struct cred *new;
int ret;
- _enter("{%s}", cache->secctx);
+ _enter("{%u}", cache->have_secid ? cache->secid : 0);
new = prepare_kernel_cred(current);
if (!new) {
@@ -26,8 +26,8 @@ int cachefiles_get_security_ID(struct cachefiles_cache *cache)
goto error;
}
- if (cache->secctx) {
- ret = set_security_override_from_ctx(new, cache->secctx);
+ if (cache->have_secid) {
+ ret = set_security_override(new, cache->secid);
if (ret < 0) {
put_cred(new);
pr_err("Security denies permission to nominate security context: error %d\n",
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index fe0a9b8a0cd0..3103b932b674 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -122,7 +122,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
type = exfat_get_entry_type(ep);
if (type == TYPE_UNUSED) {
brelse(bh);
- break;
+ goto out;
}
if (type != TYPE_FILE && type != TYPE_DIR) {
@@ -170,6 +170,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
}
}
+out:
dir_entry->namebuf.lfn[0] = '\0';
*cpos = EXFAT_DEN_TO_B(dentry);
return 0;
diff --git a/fs/exfat/fatent.c b/fs/exfat/fatent.c
index 773c320d68f3..9e5492ac409b 100644
--- a/fs/exfat/fatent.c
+++ b/fs/exfat/fatent.c
@@ -216,6 +216,16 @@ static int __exfat_free_cluster(struct inode *inode, struct exfat_chain *p_chain
if (err)
goto dec_used_clus;
+
+ if (num_clusters >= sbi->num_clusters - EXFAT_FIRST_CLUSTER) {
+ /*
+ * The cluster chain includes a loop, scan the
+ * bitmap to get the number of used clusters.
+ */
+ exfat_count_used_clusters(sb, &sbi->used_clusters);
+
+ return 0;
+ }
} while (clu != EXFAT_EOF_CLUSTER);
}
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index fb38769c3e39..05b51e721783 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -545,6 +545,7 @@ static int exfat_extend_valid_size(struct file *file, loff_t new_valid_size)
while (pos < new_valid_size) {
u32 len;
struct folio *folio;
+ unsigned long off;
len = PAGE_SIZE - (pos & (PAGE_SIZE - 1));
if (pos + len > new_valid_size)
@@ -554,6 +555,9 @@ static int exfat_extend_valid_size(struct file *file, loff_t new_valid_size)
if (err)
goto out;
+ off = offset_in_folio(folio, pos);
+ folio_zero_new_buffers(folio, off, off + len);
+
err = ops->write_end(file, mapping, pos, len, len, folio, NULL);
if (err < 0)
goto out;
@@ -563,6 +567,8 @@ static int exfat_extend_valid_size(struct file *file, loff_t new_valid_size)
cond_resched();
}
+ return 0;
+
out:
return err;
}
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 97d2774760fe..099f80645072 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -330,8 +330,8 @@ static int exfat_find_empty_entry(struct inode *inode,
while ((dentry = exfat_search_empty_slot(sb, &hint_femp, p_dir,
num_entries, es)) < 0) {
- if (dentry == -EIO)
- break;
+ if (dentry != -ENOSPC)
+ return dentry;
if (exfat_check_max_dentries(inode))
return -ENOSPC;
diff --git a/fs/file.c b/fs/file.c
index fb1011cf6b4a..25c6e53b03f8 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -22,6 +22,7 @@
#include <linux/close_range.h>
#include <linux/file_ref.h>
#include <net/sock.h>
+#include <linux/init_task.h>
#include "internal.h"
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 494ac372ace0..e540d05549ff 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1681,6 +1681,8 @@ static int fuse_dir_open(struct inode *inode, struct file *file)
*/
if (ff->open_flags & (FOPEN_STREAM | FOPEN_NONSEEKABLE))
nonseekable_open(inode, file);
+ if (!(ff->open_flags & FOPEN_KEEP_CACHE))
+ invalidate_inode_pages2(inode->i_mapping);
}
return err;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 3bee9b5dba5e..fe09c2093a93 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -349,11 +349,13 @@ static int hfs_fill_super(struct super_block *sb, struct fs_context *fc)
goto bail_no_root;
res = hfs_cat_find_brec(sb, HFS_ROOT_CNID, &fd);
if (!res) {
- if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) {
+ if (fd.entrylength != sizeof(rec.dir)) {
res = -EIO;
goto bail_hfs_find;
}
hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength);
+ if (rec.type != HFS_CDR_DIR)
+ res = -EIO;
}
if (res)
goto bail_hfs_find;
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 955f19e27e47..54dc27d92781 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1774,7 +1774,8 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
*/
static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
struct writeback_control *wbc, struct folio *folio,
- struct inode *inode, loff_t pos, unsigned len)
+ struct inode *inode, loff_t pos, loff_t end_pos,
+ unsigned len)
{
struct iomap_folio_state *ifs = folio->private;
size_t poff = offset_in_folio(folio, pos);
@@ -1793,15 +1794,60 @@ new_ioend:
if (ifs)
atomic_add(len, &ifs->write_bytes_pending);
+
+ /*
+ * Clamp io_offset and io_size to the incore EOF so that ondisk
+ * file size updates in the ioend completion are byte-accurate.
+ * This avoids recovering files with zeroed tail regions when
+ * writeback races with appending writes:
+ *
+ * Thread 1: Thread 2:
+ * ------------ -----------
+ * write [A, A+B]
+ * update inode size to A+B
+ * submit I/O [A, A+BS]
+ * write [A+B, A+B+C]
+ * update inode size to A+B+C
+ * <I/O completes, updates disk size to min(A+B+C, A+BS)>
+ * <power failure>
+ *
+ * After reboot:
+ * 1) with A+B+C < A+BS, the file has zero padding in range
+ * [A+B, A+B+C]
+ *
+ * |< Block Size (BS) >|
+ * |DDDDDDDDDDDD0000000000000|
+ * ^ ^ ^
+ * A A+B A+B+C
+ * (EOF)
+ *
+ * 2) with A+B+C > A+BS, the file has zero padding in range
+ * [A+B, A+BS]
+ *
+ * |< Block Size (BS) >|< Block Size (BS) >|
+ * |DDDDDDDDDDDD0000000000000|00000000000000000000000000|
+ * ^ ^ ^ ^
+ * A A+B A+BS A+B+C
+ * (EOF)
+ *
+ * D = Valid Data
+ * 0 = Zero Padding
+ *
+ * Note that this defeats the ability to chain the ioends of
+ * appending writes.
+ */
wpc->ioend->io_size += len;
+ if (wpc->ioend->io_offset + wpc->ioend->io_size > end_pos)
+ wpc->ioend->io_size = end_pos - wpc->ioend->io_offset;
+
wbc_account_cgroup_owner(wbc, folio, len);
return 0;
}
static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
struct writeback_control *wbc, struct folio *folio,
- struct inode *inode, u64 pos, unsigned dirty_len,
- unsigned *count)
+ struct inode *inode, u64 pos, u64 end_pos,
+ unsigned dirty_len, unsigned *count)
{
int error;
@@ -1826,7 +1872,7 @@ static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
break;
default:
error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos,
- map_len);
+ end_pos, map_len);
if (!error)
(*count)++;
break;
@@ -1897,11 +1943,11 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode,
* remaining memory is zeroed when mapped, and writes to that
* region are not written out to the file.
*
- * Also adjust the writeback range to skip all blocks entirely
- * beyond i_size.
+ * Also adjust the end_pos to the end of file and skip writeback
+ * for all blocks entirely beyond i_size.
*/
folio_zero_segment(folio, poff, folio_size(folio));
- *end_pos = round_up(isize, i_blocksize(inode));
+ *end_pos = isize;
}
return true;
@@ -1914,6 +1960,7 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
struct inode *inode = folio->mapping->host;
u64 pos = folio_pos(folio);
u64 end_pos = pos + folio_size(folio);
+ u64 end_aligned = 0;
unsigned count = 0;
int error = 0;
u32 rlen;
@@ -1955,9 +2002,10 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
/*
* Walk through the folio to find dirty areas to write back.
*/
- while ((rlen = iomap_find_dirty_range(folio, &pos, end_pos))) {
+ end_aligned = round_up(end_pos, i_blocksize(inode));
+ while ((rlen = iomap_find_dirty_range(folio, &pos, end_aligned))) {
error = iomap_writepage_map_blocks(wpc, wbc, folio, inode,
- pos, rlen, &count);
+ pos, end_pos, rlen, &count);
if (error)
break;
pos += rlen;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 9153ff3a08e7..e8e80761ac73 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -772,9 +772,9 @@ start_journal_io:
/*
* If the journal is not located on the file system device,
* then we must flush the file system device before we issue
- * the commit record
+ * the commit record and update the journal tail sequence.
*/
- if (commit_transaction->t_need_data_flush &&
+ if ((commit_transaction->t_need_data_flush || update_tail) &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
blkdev_issue_flush(journal->j_fs_dev);
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 4556e4689024..ce63d5fde9c3 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -654,7 +654,7 @@ static void flush_descriptor(journal_t *journal,
set_buffer_jwrite(descriptor);
BUFFER_TRACE(descriptor, "write");
set_buffer_dirty(descriptor);
- write_dirty_buffer(descriptor, REQ_SYNC);
+ write_dirty_buffer(descriptor, JBD2_JOURNAL_REQ_FLAGS);
}
#endif
diff --git a/fs/namespace.c b/fs/namespace.c
index 23e81c2a1e3f..6eec7794f707 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2055,9 +2055,15 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
static bool is_mnt_ns_file(struct dentry *dentry)
{
+ struct ns_common *ns;
+
/* Is this a proxy for a mount namespace? */
- return dentry->d_op == &ns_dentry_operations &&
- dentry->d_fsdata == &mntns_operations;
+ if (dentry->d_op != &ns_dentry_operations)
+ return false;
+
+ ns = d_inode(dentry)->i_private;
+
+ return ns->ops == &mntns_operations;
}
struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 7ac34550c403..4dc9b8286355 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -275,22 +275,14 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
netfs_stat(&netfs_n_rh_download);
if (rreq->netfs_ops->prepare_read) {
ret = rreq->netfs_ops->prepare_read(subreq);
- if (ret < 0) {
- atomic_dec(&rreq->nr_outstanding);
- netfs_put_subrequest(subreq, false,
- netfs_sreq_trace_put_cancel);
- break;
- }
+ if (ret < 0)
+ goto prep_failed;
trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
}
slice = netfs_prepare_read_iterator(subreq);
- if (slice < 0) {
- atomic_dec(&rreq->nr_outstanding);
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
- ret = slice;
- break;
- }
+ if (slice < 0)
+ goto prep_iter_failed;
rreq->netfs_ops->issue_read(subreq);
goto done;
@@ -302,6 +294,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
netfs_stat(&netfs_n_rh_zero);
slice = netfs_prepare_read_iterator(subreq);
+ if (slice < 0)
+ goto prep_iter_failed;
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
netfs_read_subreq_terminated(subreq, 0, false);
goto done;
@@ -310,6 +304,8 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
if (source == NETFS_READ_FROM_CACHE) {
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
slice = netfs_prepare_read_iterator(subreq);
+ if (slice < 0)
+ goto prep_iter_failed;
netfs_read_cache_to_pagecache(rreq, subreq);
goto done;
}
@@ -318,6 +314,14 @@ static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
WARN_ON_ONCE(1);
break;
+ prep_iter_failed:
+ ret = slice;
+ prep_failed:
+ subreq->error = ret;
+ atomic_dec(&rreq->nr_outstanding);
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ break;
+
done:
size -= slice;
start += slice;
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 88f2adfab75e..173e8b5e6a93 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -104,7 +104,6 @@ ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *
trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip);
wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
TASK_UNINTERRUPTIBLE);
- smp_rmb(); /* Read error/transferred after RIP flag */
ret = wreq->error;
if (ret == 0) {
ret = wreq->transferred;
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 3cbb289535a8..e8624f5c7fcc 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -62,10 +62,14 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
} else {
trace_netfs_folio(folio, netfs_folio_trace_read_done);
}
+
+ folioq_clear(folioq, slot);
} else {
// TODO: Use of PG_private_2 is deprecated.
if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags))
netfs_pgpriv2_mark_copy_to_cache(subreq, rreq, folioq, slot);
+ else
+ folioq_clear(folioq, slot);
}
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
@@ -77,8 +81,6 @@ static void netfs_unlock_read_folio(struct netfs_io_subrequest *subreq,
folio_unlock(folio);
}
}
-
- folioq_clear(folioq, slot);
}
/*
@@ -247,16 +249,17 @@ donation_changed:
/* Deal with the trickiest case: that this subreq is in the middle of a
* folio, not touching either edge, but finishes first. In such a
- * case, we donate to the previous subreq, if there is one, so that the
- * donation is only handled when that completes - and remove this
- * subreq from the list.
+ * case, we donate to the previous subreq, if there is one and if it is
+ * contiguous, so that the donation is only handled when that completes
+ * - and remove this subreq from the list.
*
* If the previous subreq finished first, we will have acquired their
* donation and should be able to unlock folios and/or donate nextwards.
*/
if (!subreq->consumed &&
!prev_donated &&
- !list_is_first(&subreq->rreq_link, &rreq->subrequests)) {
+ !list_is_first(&subreq->rreq_link, &rreq->subrequests) &&
+ subreq->start == prev->start + prev->len) {
prev = list_prev_entry(subreq, rreq_link);
WRITE_ONCE(prev->next_donated, prev->next_donated + subreq->len);
subreq->start += subreq->len;
@@ -378,8 +381,7 @@ static void netfs_rreq_assess(struct netfs_io_request *rreq)
task_io_account_read(rreq->transferred);
trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
- clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
- wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
+ clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
trace_netfs_rreq(rreq, netfs_rreq_trace_done);
netfs_clear_subrequests(rreq, false);
@@ -438,7 +440,7 @@ void netfs_read_subreq_progress(struct netfs_io_subrequest *subreq,
rreq->origin == NETFS_READPAGE ||
rreq->origin == NETFS_READ_FOR_WRITE)) {
netfs_consume_read_data(subreq, was_async);
- __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
}
}
EXPORT_SYMBOL(netfs_read_subreq_progress);
@@ -497,7 +499,7 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
rreq->origin == NETFS_READPAGE ||
rreq->origin == NETFS_READ_FOR_WRITE)) {
netfs_consume_read_data(subreq, was_async);
- __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
}
rreq->transferred += subreq->transferred;
}
@@ -511,10 +513,13 @@ void netfs_read_subreq_terminated(struct netfs_io_subrequest *subreq,
} else {
trace_netfs_sreq(subreq, netfs_sreq_trace_short);
if (subreq->transferred > subreq->consumed) {
- __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
- set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
- } else if (!__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
+ /* If we didn't read new data, abandon retry. */
+ if (subreq->retry_count &&
+ test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
+ __set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+ set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
+ }
+ } else if (test_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags)) {
__set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
set_bit(NETFS_RREQ_NEED_RETRY, &rreq->flags);
} else {
diff --git a/fs/netfs/read_pgpriv2.c b/fs/netfs/read_pgpriv2.c
index ba5af89d37fa..54d5004fec18 100644
--- a/fs/netfs/read_pgpriv2.c
+++ b/fs/netfs/read_pgpriv2.c
@@ -170,6 +170,10 @@ void netfs_pgpriv2_write_to_the_cache(struct netfs_io_request *rreq)
trace_netfs_write(wreq, netfs_write_trace_copy_to_cache);
netfs_stat(&netfs_n_wh_copy_to_cache);
+ if (!wreq->io_streams[1].avail) {
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ goto couldnt_start;
+ }
for (;;) {
error = netfs_pgpriv2_copy_folio(wreq, folio);
diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c
index 0350592ea804..21b4a54e545e 100644
--- a/fs/netfs/read_retry.c
+++ b/fs/netfs/read_retry.c
@@ -49,13 +49,15 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
* up to the first permanently failed one.
*/
if (!rreq->netfs_ops->prepare_read &&
- !test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags)) {
+ !rreq->cache_resources.ops) {
struct netfs_io_subrequest *subreq;
list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
break;
if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
+ __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
+ subreq->retry_count++;
netfs_reset_iter(subreq);
netfs_reissue_read(rreq, subreq);
}
@@ -137,7 +139,8 @@ static void netfs_retry_read_subrequests(struct netfs_io_request *rreq)
stream0->sreq_max_len = subreq->len;
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- __set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
+ __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
+ subreq->retry_count++;
spin_lock_bh(&rreq->lock);
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
@@ -213,7 +216,6 @@ abandon:
subreq->error = -ENOMEM;
__clear_bit(NETFS_SREQ_FAILED, &subreq->flags);
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- __clear_bit(NETFS_SREQ_RETRYING, &subreq->flags);
}
spin_lock_bh(&rreq->lock);
list_splice_tail_init(&queue, &rreq->subrequests);
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
index 1d438be2e1b4..ca3a11ed9b54 100644
--- a/fs/netfs/write_collect.c
+++ b/fs/netfs/write_collect.c
@@ -179,7 +179,6 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
struct iov_iter source = subreq->io_iter;
iov_iter_revert(&source, subreq->len - source.count);
- __set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
netfs_reissue_write(stream, subreq, &source);
}
@@ -234,7 +233,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
/* Renegotiate max_len (wsize) */
trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
- __set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
+ subreq->retry_count++;
stream->prepare_write(subreq);
part = min(len, stream->sreq_max_len);
@@ -279,7 +278,7 @@ static void netfs_retry_write_stream(struct netfs_io_request *wreq,
subreq->start = start;
subreq->debug_index = atomic_inc_return(&wreq->subreq_counter);
subreq->stream_nr = to->stream_nr;
- __set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
+ subreq->retry_count = 1;
trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
refcount_read(&subreq->ref),
@@ -501,8 +500,7 @@ reassess_streams:
goto need_retry;
if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
trace_netfs_rreq(wreq, netfs_rreq_trace_unpause);
- clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags);
- wake_up_bit(&wreq->flags, NETFS_RREQ_PAUSE);
+ clear_and_wake_up_bit(NETFS_RREQ_PAUSE, &wreq->flags);
}
if (notes & NEED_REASSESS) {
@@ -605,8 +603,7 @@ void netfs_write_collection_worker(struct work_struct *work)
_debug("finished");
trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
- clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
- wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
+ clear_and_wake_up_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
if (wreq->iocb) {
size_t written = min(wreq->transferred, wreq->len);
@@ -714,8 +711,7 @@ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
- clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
- wake_up_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS);
+ clear_and_wake_up_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
/* If we are at the head of the queue, wake up the collector,
* transferring a ref to it if we were the ones to do so.
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index bf6d507578e5..ff0e82505a0b 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -244,6 +244,8 @@ void netfs_reissue_write(struct netfs_io_stream *stream,
iov_iter_advance(source, size);
iov_iter_truncate(&subreq->io_iter, size);
+ subreq->retry_count++;
+ __clear_bit(NETFS_SREQ_MADE_PROGRESS, &subreq->flags);
__set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
netfs_do_issue_write(stream, subreq);
}
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 810269ee0a50..d49e4ce27999 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -263,6 +263,12 @@ int nfs_netfs_readahead(struct readahead_control *ractl)
static atomic_t nfs_netfs_debug_id;
static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *file)
{
+ if (!file) {
+ if (WARN_ON_ONCE(rreq->origin != NETFS_PGPRIV2_COPY_TO_CACHE))
+ return -EIO;
+ return 0;
+ }
+
rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file));
rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id);
/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
@@ -274,7 +280,8 @@ static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *fi
static void nfs_netfs_free_request(struct netfs_io_request *rreq)
{
- put_nfs_open_context(rreq->netfs_priv);
+ if (rreq->netfs_priv)
+ put_nfs_open_context(rreq->netfs_priv);
}
static struct nfs_netfs_io_data *nfs_netfs_alloc(struct netfs_io_subrequest *sreq)
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index dec553034027..e933f9c65d90 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -47,10 +47,8 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
size = f->handle_bytes >> 2;
ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size);
- if ((ret == FILEID_INVALID) || (ret < 0)) {
- WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
+ if ((ret == FILEID_INVALID) || (ret < 0))
return;
- }
f->handle_type = ret;
f->handle_bytes = size * sizeof(u32);
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 2b0daced98eb..3404e7a30c33 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -893,7 +893,7 @@ static int ocfs2_get_next_id(struct super_block *sb, struct kqid *qid)
int status = 0;
trace_ocfs2_get_next_id(from_kqid(&init_user_ns, *qid), type);
- if (!sb_has_quota_loaded(sb, type)) {
+ if (!sb_has_quota_active(sb, type)) {
status = -ESRCH;
goto out;
}
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 73d3367c533b..2956d888c131 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -867,6 +867,7 @@ out:
brelse(oinfo->dqi_libh);
brelse(oinfo->dqi_lqi_bh);
kfree(oinfo);
+ info->dqi_priv = NULL;
return status;
}
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 3601ddfeddc2..0c28e5fa3407 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -415,13 +415,13 @@ int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upperdentry,
return err;
}
-struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
+struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode,
bool is_upper)
{
struct ovl_fh *fh;
int fh_type, dwords;
int buflen = MAX_HANDLE_SZ;
- uuid_t *uuid = &real->d_sb->s_uuid;
+ uuid_t *uuid = &realinode->i_sb->s_uuid;
int err;
/* Make sure the real fid stays 32bit aligned */
@@ -438,13 +438,13 @@ struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
* the price or reconnecting the dentry.
*/
dwords = buflen >> 2;
- fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0);
+ fh_type = exportfs_encode_inode_fh(realinode, (void *)fh->fb.fid,
+ &dwords, NULL, 0);
buflen = (dwords << 2);
err = -EIO;
- if (WARN_ON(fh_type < 0) ||
- WARN_ON(buflen > MAX_HANDLE_SZ) ||
- WARN_ON(fh_type == FILEID_INVALID))
+ if (fh_type < 0 || fh_type == FILEID_INVALID ||
+ WARN_ON(buflen > MAX_HANDLE_SZ))
goto out_err;
fh->fb.version = OVL_FH_VERSION;
@@ -480,7 +480,7 @@ struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin)
if (!ovl_can_decode_fh(origin->d_sb))
return NULL;
- return ovl_encode_real_fh(ofs, origin, false);
+ return ovl_encode_real_fh(ofs, d_inode(origin), false);
}
int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
@@ -505,7 +505,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
const struct ovl_fh *fh;
int err;
- fh = ovl_encode_real_fh(ofs, upper, true);
+ fh = ovl_encode_real_fh(ofs, d_inode(upper), true);
if (IS_ERR(fh))
return PTR_ERR(fh);
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 5868cb222955..444aeeccb6da 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -176,35 +176,37 @@ static int ovl_connect_layer(struct dentry *dentry)
*
* Return 0 for upper file handle, > 0 for lower file handle or < 0 on error.
*/
-static int ovl_check_encode_origin(struct dentry *dentry)
+static int ovl_check_encode_origin(struct inode *inode)
{
- struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
+ struct ovl_fs *ofs = OVL_FS(inode->i_sb);
bool decodable = ofs->config.nfs_export;
+ struct dentry *dentry;
+ int err;
/* No upper layer? */
if (!ovl_upper_mnt(ofs))
return 1;
/* Lower file handle for non-upper non-decodable */
- if (!ovl_dentry_upper(dentry) && !decodable)
+ if (!ovl_inode_upper(inode) && !decodable)
return 1;
/* Upper file handle for pure upper */
- if (!ovl_dentry_lower(dentry))
+ if (!ovl_inode_lower(inode))
return 0;
/*
* Root is never indexed, so if there's an upper layer, encode upper for
* root.
*/
- if (dentry == dentry->d_sb->s_root)
+ if (inode == d_inode(inode->i_sb->s_root))
return 0;
/*
* Upper decodable file handle for non-indexed upper.
*/
- if (ovl_dentry_upper(dentry) && decodable &&
- !ovl_test_flag(OVL_INDEX, d_inode(dentry)))
+ if (ovl_inode_upper(inode) && decodable &&
+ !ovl_test_flag(OVL_INDEX, inode))
return 0;
/*
@@ -213,14 +215,23 @@ static int ovl_check_encode_origin(struct dentry *dentry)
* ovl_connect_layer() will try to make origin's layer "connected" by
* copying up a "connectable" ancestor.
*/
- if (d_is_dir(dentry) && decodable)
- return ovl_connect_layer(dentry);
+ if (!decodable || !S_ISDIR(inode->i_mode))
+ return 1;
+
+ dentry = d_find_any_alias(inode);
+ if (!dentry)
+ return -ENOENT;
+
+ err = ovl_connect_layer(dentry);
+ dput(dentry);
+ if (err < 0)
+ return err;
/* Lower file handle for indexed and non-upper dir/non-dir */
return 1;
}
-static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry,
+static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct inode *inode,
u32 *fid, int buflen)
{
struct ovl_fh *fh = NULL;
@@ -231,13 +242,13 @@ static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry,
* Check if we should encode a lower or upper file handle and maybe
* copy up an ancestor to make lower file handle connectable.
*/
- err = enc_lower = ovl_check_encode_origin(dentry);
+ err = enc_lower = ovl_check_encode_origin(inode);
if (enc_lower < 0)
goto fail;
/* Encode an upper or lower file handle */
- fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_dentry_lower(dentry) :
- ovl_dentry_upper(dentry), !enc_lower);
+ fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_inode_lower(inode) :
+ ovl_inode_upper(inode), !enc_lower);
if (IS_ERR(fh))
return PTR_ERR(fh);
@@ -251,8 +262,8 @@ out:
return err;
fail:
- pr_warn_ratelimited("failed to encode file handle (%pd2, err=%i)\n",
- dentry, err);
+ pr_warn_ratelimited("failed to encode file handle (ino=%lu, err=%i)\n",
+ inode->i_ino, err);
goto out;
}
@@ -260,19 +271,13 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
struct inode *parent)
{
struct ovl_fs *ofs = OVL_FS(inode->i_sb);
- struct dentry *dentry;
int bytes, buflen = *max_len << 2;
/* TODO: encode connectable file handles */
if (parent)
return FILEID_INVALID;
- dentry = d_find_any_alias(inode);
- if (!dentry)
- return FILEID_INVALID;
-
- bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen);
- dput(dentry);
+ bytes = ovl_dentry_to_fid(ofs, inode, fid, buflen);
if (bytes <= 0)
return FILEID_INVALID;
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 7e27b7d4adee..cea820cb3b55 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -542,7 +542,7 @@ int ovl_verify_origin_xattr(struct ovl_fs *ofs, struct dentry *dentry,
struct ovl_fh *fh;
int err;
- fh = ovl_encode_real_fh(ofs, real, is_upper);
+ fh = ovl_encode_real_fh(ofs, d_inode(real), is_upper);
err = PTR_ERR(fh);
if (IS_ERR(fh)) {
fh = NULL;
@@ -738,7 +738,7 @@ int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
struct ovl_fh *fh;
int err;
- fh = ovl_encode_real_fh(ofs, origin, false);
+ fh = ovl_encode_real_fh(ofs, d_inode(origin), false);
if (IS_ERR(fh))
return PTR_ERR(fh);
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index b361f35762be..0021e2025020 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -865,7 +865,7 @@ int ovl_copy_up_with_data(struct dentry *dentry);
int ovl_maybe_copy_up(struct dentry *dentry, int flags);
int ovl_copy_xattr(struct super_block *sb, const struct path *path, struct dentry *new);
int ovl_set_attr(struct ovl_fs *ofs, struct dentry *upper, struct kstat *stat);
-struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
+struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct inode *realinode,
bool is_upper);
struct ovl_fh *ovl_get_origin_fh(struct ovl_fs *ofs, struct dentry *origin);
int ovl_set_origin_fh(struct ovl_fs *ofs, const struct ovl_fh *fh,
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 38a5a3e9cba2..f02cd362309a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1810,7 +1810,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
}
for (; addr != end; addr += PAGE_SIZE, idx++) {
- unsigned long cur_flags = flags;
+ u64 cur_flags = flags;
pagemap_entry_t pme;
if (folio && (flags & PM_PRESENT) &&
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 85925ec0051a..3310d1ad4d0e 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -179,8 +179,7 @@ static int qnx6_statfs(struct dentry *dentry, struct kstatfs *buf)
*/
static const char *qnx6_checkroot(struct super_block *s)
{
- static char match_root[2][3] = {".\0\0", "..\0"};
- int i, error = 0;
+ int error = 0;
struct qnx6_dir_entry *dir_entry;
struct inode *root = d_inode(s->s_root);
struct address_space *mapping = root->i_mapping;
@@ -189,11 +188,9 @@ static const char *qnx6_checkroot(struct super_block *s)
if (IS_ERR(folio))
return "error reading root directory";
dir_entry = kmap_local_folio(folio, 0);
- for (i = 0; i < 2; i++) {
- /* maximum 3 bytes - due to match_root limitation */
- if (strncmp(dir_entry[i].de_fname, match_root[i], 3))
- error = 1;
- }
+ if (memcmp(dir_entry[0].de_fname, ".", 2) ||
+ memcmp(dir_entry[1].de_fname, "..", 3))
+ error = 1;
folio_release_kmap(folio, dir_entry);
if (error)
return "error reading root directory.";
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index bd42a419458e..6cb1e81993f8 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1319,14 +1319,16 @@ cifs_readv_callback(struct mid_q_entry *mid)
}
if (rdata->result == -ENODATA) {
- __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
rdata->result = 0;
+ __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
} else {
size_t trans = rdata->subreq.transferred + rdata->got_bytes;
if (trans < rdata->subreq.len &&
rdata->subreq.start + trans == ictx->remote_i_size) {
- __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
rdata->result = 0;
+ __set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
+ } else if (rdata->got_bytes > 0) {
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags);
}
}
@@ -1670,10 +1672,13 @@ cifs_writev_callback(struct mid_q_entry *mid)
if (written > wdata->subreq.len)
written &= 0xFFFF;
- if (written < wdata->subreq.len)
+ if (written < wdata->subreq.len) {
result = -ENOSPC;
- else
+ } else {
result = written;
+ if (written > 0)
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &wdata->subreq.flags);
+ }
break;
case MID_REQUEST_SUBMITTED:
case MID_RETRY_NEEDED:
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index 959359301250..0577556f0a41 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -4615,6 +4615,7 @@ smb2_readv_callback(struct mid_q_entry *mid)
__set_bit(NETFS_SREQ_HIT_EOF, &rdata->subreq.flags);
rdata->result = 0;
}
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags);
}
trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, rdata->credits.value,
server->credits, server->in_flight,
@@ -4842,10 +4843,12 @@ smb2_writev_callback(struct mid_q_entry *mid)
cifs_stats_bytes_written(tcon, written);
- if (written < wdata->subreq.len)
+ if (written < wdata->subreq.len) {
wdata->result = -ENOSPC;
- else
+ } else if (written > 0) {
wdata->subreq.len = written;
+ __set_bit(NETFS_SREQ_MADE_PROGRESS, &wdata->subreq.flags);
+ }
break;
case MID_REQUEST_SUBMITTED:
case MID_RETRY_NEEDED:
@@ -5014,7 +5017,7 @@ smb2_async_writev(struct cifs_io_subrequest *wdata)
}
#endif
- if (test_bit(NETFS_SREQ_RETRYING, &wdata->subreq.flags))
+ if (wdata->subreq.retry_count > 0)
smb2_set_replay(server, &rqst);
cifs_dbg(FYI, "async write at %llu %u bytes iter=%zx\n",
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index f711bfd75c4d..4bf70cfec826 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -412,6 +412,29 @@ struct ethtool_eth_phy_stats {
);
};
+/**
+ * struct ethtool_phy_stats - PHY-level statistics counters
+ * @rx_packets: Total successfully received frames
+ * @rx_bytes: Total successfully received bytes
+ * @rx_errors: Total received frames with errors (e.g., CRC errors)
+ * @tx_packets: Total successfully transmitted frames
+ * @tx_bytes: Total successfully transmitted bytes
+ * @tx_errors: Total transmitted frames with errors
+ *
+ * This structure provides a standardized interface for reporting
+ * PHY-level statistics counters. It is designed to expose statistics
+ * commonly provided by PHYs but not explicitly defined in the IEEE
+ * 802.3 standard.
+ */
+struct ethtool_phy_stats {
+ u64 rx_packets;
+ u64 rx_bytes;
+ u64 rx_errors;
+ u64 tx_packets;
+ u64 tx_bytes;
+ u64 tx_errors;
+};
+
/* Basic IEEE 802.3 MAC Ctrl statistics (30.3.3.*), not otherwise exposed
* via a more targeted API.
*/
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index d495cbdb52cb..38456b42cdb5 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -176,6 +176,7 @@ struct netpoll;
* @real_dev_addr: address of underlying netdevice
* @dent: proc dir entry
* @vlan_pcpu_stats: ptr to percpu rx stats
+ * @netpoll: netpoll instance "propagated" down to @real_dev
*/
struct vlan_dev_priv {
unsigned int nr_ingress_mappings;
@@ -414,6 +415,8 @@ static inline int __vlan_insert_tag(struct sk_buff *skb,
* doesn't have to worry about freeing the original skb.
*
* Does not change skb->protocol so this function can be used during receive.
+ *
+ * Return: modified @skb on success, NULL on error (@skb is freed).
*/
static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb,
__be16 vlan_proto,
@@ -443,6 +446,8 @@ static inline struct sk_buff *vlan_insert_inner_tag(struct sk_buff *skb,
* doesn't have to worry about freeing the original skb.
*
* Does not change skb->protocol so this function can be used during receive.
+ *
+ * Return: modified @skb on success, NULL on error (@skb is freed).
*/
static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
__be16 vlan_proto, u16 vlan_tci)
@@ -461,6 +466,8 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
*
* Following the skb_unshare() example, in case of error, the calling function
* doesn't have to worry about freeing the original skb.
+ *
+ * Return: modified @skb on success, NULL on error (@skb is freed).
*/
static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb,
__be16 vlan_proto,
@@ -582,7 +589,7 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
}
/**
- * vlan_get_protocol - get protocol EtherType.
+ * __vlan_get_protocol_offset() - get protocol EtherType.
* @skb: skbuff to query
* @type: first vlan protocol
* @mac_offset: MAC offset
@@ -808,9 +815,11 @@ static inline netdev_features_t vlan_features_check(struct sk_buff *skb,
* @h1: Pointer to vlan header
* @h2: Pointer to vlan header
*
- * Compare two vlan headers, returns 0 if equal.
+ * Compare two vlan headers.
*
* Please note that alignment of h1 & h2 are only guaranteed to be 16 bits.
+ *
+ * Return: 0 if equal, arbitrary non-zero value if not equal.
*/
static inline unsigned long compare_vlan_header(const struct vlan_hdr *h1,
const struct vlan_hdr *h2)
diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h
index 0d5448c0b86c..a3ce553413de 100644
--- a/include/linux/io_uring/cmd.h
+++ b/include/linux/io_uring/cmd.h
@@ -18,6 +18,11 @@ struct io_uring_cmd {
u8 pdu[32]; /* available inline for free use */
};
+struct io_uring_cmd_data {
+ struct io_uring_sqe sqes[2];
+ void *op_data;
+};
+
static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe)
{
return sqe->cmd;
@@ -113,4 +118,9 @@ static inline struct task_struct *io_uring_cmd_get_task(struct io_uring_cmd *cmd
return cmd_to_io_kiocb(cmd)->tctx->task;
}
+static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_uring_cmd *cmd)
+{
+ return cmd_to_io_kiocb(cmd)->async_data;
+}
+
#endif /* _LINUX_IO_URING_CMD_H */
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 5675af6b740c..75bf54e76f3b 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -335,7 +335,7 @@ struct iomap_ioend {
u16 io_type;
u16 io_flags; /* IOMAP_F_* */
struct inode *io_inode; /* file being written to */
- size_t io_size; /* size of the extent */
+ size_t io_size; /* size of data within eof */
loff_t io_offset; /* offset in the file */
sector_t io_sector; /* start sector of ioend */
struct bio io_bio; /* MUST BE LAST! */
diff --git a/include/linux/memfd.h b/include/linux/memfd.h
index 3f2cf339ceaf..d437e3070850 100644
--- a/include/linux/memfd.h
+++ b/include/linux/memfd.h
@@ -7,6 +7,7 @@
#ifdef CONFIG_MEMFD_CREATE
extern long memfd_fcntl(struct file *file, unsigned int cmd, unsigned int arg);
struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx);
+unsigned int *memfd_file_seals_ptr(struct file *file);
#else
static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a)
{
@@ -16,6 +17,19 @@ static inline struct folio *memfd_alloc_folio(struct file *memfd, pgoff_t idx)
{
return ERR_PTR(-EINVAL);
}
+
+static inline unsigned int *memfd_file_seals_ptr(struct file *file)
+{
+ return NULL;
+}
#endif
+/* Retrieve memfd seals associated with the file, if any. */
+static inline unsigned int memfd_file_seals(struct file *file)
+{
+ unsigned int *sealsp = memfd_file_seals_ptr(file);
+
+ return sealsp ? *sealsp : 0;
+}
+
#endif /* __LINUX_MEMFD_H */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index ea48eb879a0f..fed666c5bd16 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -691,7 +691,6 @@ struct mlx5_timer {
struct timecounter tc;
u32 nominal_c_mult;
unsigned long overflow_period;
- struct delayed_work overflow_work;
};
struct mlx5_clock {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 370f533da107..bb99a35fc6a2 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -7025,6 +7025,7 @@ struct mlx5_ifc_alloc_packet_reformat_context_out_bits {
enum {
MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START = 0x1,
+ MLX5_REFORMAT_CONTEXT_ANCHOR_VLAN_START = 0x2,
MLX5_REFORMAT_CONTEXT_ANCHOR_IP_START = 0x7,
MLX5_REFORMAT_CONTEXT_ANCHOR_TCP_UDP_START = 0x9,
};
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 338a76ce9083..b1c3db9cf355 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3125,6 +3125,7 @@ static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc)
if (!pmd_ptlock_init(ptdesc))
return false;
__folio_set_pgtable(folio);
+ ptdesc_pmd_pts_init(ptdesc);
lruvec_stat_add_folio(folio, NR_PAGETABLE);
return true;
}
@@ -4101,6 +4102,37 @@ void mem_dump_obj(void *object);
static inline void mem_dump_obj(void *object) {}
#endif
+static inline bool is_write_sealed(int seals)
+{
+ return seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE);
+}
+
+/**
+ * is_readonly_sealed - Checks whether write-sealed but mapped read-only,
+ * in which case writes should be disallowing moving
+ * forwards.
+ * @seals: the seals to check
+ * @vm_flags: the VMA flags to check
+ *
+ * Returns whether readonly sealed, in which case writess should be disallowed
+ * going forward.
+ */
+static inline bool is_readonly_sealed(int seals, vm_flags_t vm_flags)
+{
+ /*
+ * Since an F_SEAL_[FUTURE_]WRITE sealed memfd can be mapped as
+ * MAP_SHARED and read-only, take care to not allow mprotect to
+ * revert protections on such mappings. Do this only for shared
+ * mappings. For private mappings, don't need to mask
+ * VM_MAYWRITE as we still want them to be COW-writable.
+ */
+ if (is_write_sealed(seals) &&
+ ((vm_flags & (VM_SHARED | VM_WRITE)) == VM_SHARED))
+ return true;
+
+ return false;
+}
+
/**
* seal_check_write - Check for F_SEAL_WRITE or F_SEAL_FUTURE_WRITE flags and
* handle them.
@@ -4112,24 +4144,15 @@ static inline void mem_dump_obj(void *object) {}
*/
static inline int seal_check_write(int seals, struct vm_area_struct *vma)
{
- if (seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
- /*
- * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
- * write seals are active.
- */
- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
- return -EPERM;
-
- /*
- * Since an F_SEAL_[FUTURE_]WRITE sealed memfd can be mapped as
- * MAP_SHARED and read-only, take care to not allow mprotect to
- * revert protections on such mappings. Do this only for shared
- * mappings. For private mappings, don't need to mask
- * VM_MAYWRITE as we still want them to be COW-writable.
- */
- if (vma->vm_flags & VM_SHARED)
- vm_flags_clear(vma, VM_MAYWRITE);
- }
+ if (!is_write_sealed(seals))
+ return 0;
+
+ /*
+ * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
+ * write seals are active.
+ */
+ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+ return -EPERM;
return 0;
}
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7361a8f3ab68..332cee285662 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -445,6 +445,7 @@ FOLIO_MATCH(compound_head, _head_2a);
* @pt_index: Used for s390 gmap.
* @pt_mm: Used for x86 pgds.
* @pt_frag_refcount: For fragmented page table tracking. Powerpc only.
+ * @pt_share_count: Used for HugeTLB PMD page table share count.
* @_pt_pad_2: Padding to ensure proper alignment.
* @ptl: Lock for the page table.
* @__page_type: Same as page->page_type. Unused for page tables.
@@ -471,6 +472,9 @@ struct ptdesc {
pgoff_t pt_index;
struct mm_struct *pt_mm;
atomic_t pt_frag_refcount;
+#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
+ atomic_t pt_share_count;
+#endif
};
union {
@@ -516,6 +520,32 @@ static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
const struct page *: (const struct ptdesc *)(p), \
struct page *: (struct ptdesc *)(p)))
+#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
+static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc)
+{
+ atomic_set(&ptdesc->pt_share_count, 0);
+}
+
+static inline void ptdesc_pmd_pts_inc(struct ptdesc *ptdesc)
+{
+ atomic_inc(&ptdesc->pt_share_count);
+}
+
+static inline void ptdesc_pmd_pts_dec(struct ptdesc *ptdesc)
+{
+ atomic_dec(&ptdesc->pt_share_count);
+}
+
+static inline int ptdesc_pmd_pts_count(struct ptdesc *ptdesc)
+{
+ return atomic_read(&ptdesc->pt_share_count);
+}
+#else
+static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc)
+{
+}
+#endif
+
/*
* Used for sizing the vmemmap region on some architectures
*/
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2593019ad5b1..dd8f6f8991fe 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2261,7 +2261,7 @@ struct net_device {
void *atalk_ptr;
#endif
#if IS_ENABLED(CONFIG_AX25)
- void *ax25_ptr;
+ struct ax25_dev __rcu *ax25_ptr;
#endif
#if IS_ENABLED(CONFIG_CFG80211)
struct wireless_dev *ieee80211_ptr;
@@ -3238,7 +3238,6 @@ static inline void unregister_netdevice(struct net_device *dev)
int netdev_refcnt_read(const struct net_device *dev);
void free_netdev(struct net_device *dev);
-void init_dummy_netdev(struct net_device *dev);
struct net_device *netdev_get_xmit_slave(struct net_device *dev,
struct sk_buff *skb,
@@ -3252,7 +3251,6 @@ struct net_device *netdev_get_by_index(struct net *net, int ifindex,
struct net_device *netdev_get_by_name(struct net *net, const char *name,
netdevice_tracker *tracker, gfp_t gfp);
struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
-struct net_device *dev_get_by_napi_id(unsigned int napi_id);
void netdev_copy_name(struct net_device *dev, char *name);
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
@@ -4295,7 +4293,7 @@ static inline bool netif_carrier_ok(const struct net_device *dev)
unsigned long dev_trans_start(struct net_device *dev);
-void __netdev_watchdog_up(struct net_device *dev);
+void netdev_watchdog_up(struct net_device *dev);
void netif_carrier_on(struct net_device *dev);
void netif_carrier_off(struct net_device *dev);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 5eaceef41e6c..ecdd5ced16a8 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -185,6 +185,7 @@ struct netfs_io_subrequest {
short error; /* 0 or error that occurred */
unsigned short debug_index; /* Index in list (for debugging output) */
unsigned int nr_segs; /* Number of segs in io_iter */
+ u8 retry_count; /* The number of retries (0 on initial pass) */
enum netfs_io_source source; /* Where to read from/write to */
unsigned char stream_nr; /* I/O stream this belongs to */
unsigned char curr_folioq_slot; /* Folio currently being read */
@@ -194,14 +195,13 @@ struct netfs_io_subrequest {
#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */
-#define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */
+#define NETFS_SREQ_MADE_PROGRESS 4 /* Set if we transferred at least some data */
#define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */
#define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */
#define NETFS_SREQ_HIT_EOF 7 /* Set if short due to EOF */
#define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */
#define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */
-#define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */
-#define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */
+#define NETFS_SREQ_FAILED 10 /* Set if the subreq failed unretryably */
};
enum netfs_io_origin {
@@ -269,7 +269,6 @@ struct netfs_io_request {
size_t prev_donated; /* Fallback for subreq->prev_donated */
refcount_t ref;
unsigned long flags;
-#define NETFS_RREQ_COPY_TO_CACHE 1 /* Need to write to the cache */
#define NETFS_RREQ_NO_UNLOCK_FOLIO 2 /* Don't unlock no_unlock_folio on completion */
#define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */
#define NETFS_RREQ_FAILED 4 /* The request failed */
diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h
index b5b5d17998b8..733f4ddd2ef1 100644
--- a/include/linux/pcs/pcs-xpcs.h
+++ b/include/linux/pcs/pcs-xpcs.h
@@ -50,7 +50,6 @@ struct dw_xpcs;
struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs);
int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface);
-void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces);
int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns,
int enable);
struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr);
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 35842d1e3879..5b520fe86b60 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -221,10 +221,7 @@ do { \
} while (0)
#define PERCPU_PTR(__p) \
-({ \
- unsigned long __pcpu_ptr = (__force unsigned long)(__p); \
- (typeof(*(__p)) __force __kernel *)(__pcpu_ptr); \
-})
+ (typeof(*(__p)) __force __kernel *)((__force unsigned long)(__p))
#ifdef CONFIG_SMP
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 5bc71d59910c..afaae74d0949 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1144,6 +1144,53 @@ struct phy_driver {
int (*cable_test_get_status)(struct phy_device *dev, bool *finished);
/* Get statistics from the PHY using ethtool */
+ /**
+ * @get_phy_stats: Retrieve PHY statistics.
+ * @dev: The PHY device for which the statistics are retrieved.
+ * @eth_stats: structure where Ethernet PHY stats will be stored.
+ * @stats: structure where additional PHY-specific stats will be stored.
+ *
+ * Retrieves the supported PHY statistics and populates the provided
+ * structures. The input structures are pre-initialized with
+ * `ETHTOOL_STAT_NOT_SET`, and the driver must only modify members
+ * corresponding to supported statistics. Unmodified members will remain
+ * set to `ETHTOOL_STAT_NOT_SET` and will not be returned to userspace.
+ */
+ void (*get_phy_stats)(struct phy_device *dev,
+ struct ethtool_eth_phy_stats *eth_stats,
+ struct ethtool_phy_stats *stats);
+
+ /**
+ * @get_link_stats: Retrieve link statistics.
+ * @dev: The PHY device for which the statistics are retrieved.
+ * @link_stats: structure where link-specific stats will be stored.
+ *
+ * Retrieves link-related statistics for the given PHY device. The input
+ * structure is pre-initialized with `ETHTOOL_STAT_NOT_SET`, and the
+ * driver must only modify members corresponding to supported
+ * statistics. Unmodified members will remain set to
+ * `ETHTOOL_STAT_NOT_SET` and will not be returned to userspace.
+ */
+ void (*get_link_stats)(struct phy_device *dev,
+ struct ethtool_link_ext_stats *link_stats);
+
+ /**
+ * @update_stats: Trigger periodic statistics updates.
+ * @dev: The PHY device for which statistics updates are triggered.
+ *
+ * Periodically gathers statistics from the PHY device to update locally
+ * maintained 64-bit counters. This is necessary for PHYs that implement
+ * reduced-width counters (e.g., 16-bit or 32-bit) which can overflow
+ * more frequently compared to 64-bit counters. By invoking this
+ * callback, drivers can fetch the current counter values, handle
+ * overflow detection, and accumulate the results into local 64-bit
+ * counters for accurate reporting through the `get_phy_stats` and
+ * `get_link_stats` interfaces.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+ int (*update_stats)(struct phy_device *dev);
+
/** @get_sset_count: Number of statistic counters */
int (*get_sset_count)(struct phy_device *dev);
/** @get_strings: Names of the statistic counters */
@@ -1634,6 +1681,9 @@ static inline bool phy_polling_mode(struct phy_device *phydev)
if (phydev->drv->flags & PHY_POLL_CABLE_TEST)
return true;
+ if (phydev->drv->update_stats)
+ return true;
+
return phydev->irq == PHY_POLL;
}
@@ -2096,6 +2146,7 @@ int phy_unregister_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask);
int phy_unregister_fixup_for_id(const char *bus_id);
int phy_unregister_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask);
+int phy_eee_rx_clock_stop(struct phy_device *phydev, bool clk_stop_enable);
int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable);
int phy_get_eee_err(struct phy_device *phydev);
int phy_ethtool_set_eee(struct phy_device *phydev, struct ethtool_keee *data);
@@ -2123,6 +2174,13 @@ int phy_ethtool_get_strings(struct phy_device *phydev, u8 *data);
int phy_ethtool_get_sset_count(struct phy_device *phydev);
int phy_ethtool_get_stats(struct phy_device *phydev,
struct ethtool_stats *stats, u64 *data);
+
+void __phy_ethtool_get_phy_stats(struct phy_device *phydev,
+ struct ethtool_eth_phy_stats *phy_stats,
+ struct ethtool_phy_stats *phydev_stats);
+void __phy_ethtool_get_link_ext_stats(struct phy_device *phydev,
+ struct ethtool_link_ext_stats *link_stats);
+
int phy_ethtool_get_plca_cfg(struct phy_device *phydev,
struct phy_plca_cfg *plca_cfg);
int phy_ethtool_set_plca_cfg(struct phy_device *phydev,
diff --git a/include/linux/phylib_stubs.h b/include/linux/phylib_stubs.h
index 1279f48c8a70..9d2d6090c86d 100644
--- a/include/linux/phylib_stubs.h
+++ b/include/linux/phylib_stubs.h
@@ -5,6 +5,9 @@
#include <linux/rtnetlink.h>
+struct ethtool_eth_phy_stats;
+struct ethtool_link_ext_stats;
+struct ethtool_phy_stats;
struct kernel_hwtstamp_config;
struct netlink_ext_ack;
struct phy_device;
@@ -19,6 +22,11 @@ struct phylib_stubs {
int (*hwtstamp_set)(struct phy_device *phydev,
struct kernel_hwtstamp_config *config,
struct netlink_ext_ack *extack);
+ void (*get_phy_stats)(struct phy_device *phydev,
+ struct ethtool_eth_phy_stats *phy_stats,
+ struct ethtool_phy_stats *phydev_stats);
+ void (*get_link_ext_stats)(struct phy_device *phydev,
+ struct ethtool_link_ext_stats *link_stats);
};
static inline int phy_hwtstamp_get(struct phy_device *phydev,
@@ -50,6 +58,29 @@ static inline int phy_hwtstamp_set(struct phy_device *phydev,
return phylib_stubs->hwtstamp_set(phydev, config, extack);
}
+static inline void phy_ethtool_get_phy_stats(struct phy_device *phydev,
+ struct ethtool_eth_phy_stats *phy_stats,
+ struct ethtool_phy_stats *phydev_stats)
+{
+ ASSERT_RTNL();
+
+ if (!phylib_stubs)
+ return;
+
+ phylib_stubs->get_phy_stats(phydev, phy_stats, phydev_stats);
+}
+
+static inline void phy_ethtool_get_link_ext_stats(struct phy_device *phydev,
+ struct ethtool_link_ext_stats *link_stats)
+{
+ ASSERT_RTNL();
+
+ if (!phylib_stubs)
+ return;
+
+ phylib_stubs->get_link_ext_stats(phydev, link_stats);
+}
+
#else
static inline int phy_hwtstamp_get(struct phy_device *phydev,
@@ -65,4 +96,15 @@ static inline int phy_hwtstamp_set(struct phy_device *phydev,
return -EOPNOTSUPP;
}
+static inline void phy_ethtool_get_phy_stats(struct phy_device *phydev,
+ struct ethtool_eth_phy_stats *phy_stats,
+ struct ethtool_phy_stats *phydev_stats)
+{
+}
+
+static inline void phy_ethtool_get_link_ext_stats(struct phy_device *phydev,
+ struct ethtool_link_ext_stats *link_stats)
+{
+}
+
#endif
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 5462cc6a37dc..4b7a20620b49 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -393,6 +393,8 @@ struct phylink_pcs_ops;
/**
* struct phylink_pcs - PHYLINK PCS instance
+ * @supported_interfaces: describing which PHY_INTERFACE_MODE_xxx
+ * are supported by this PCS.
* @ops: a pointer to the &struct phylink_pcs_ops structure
* @phylink: pointer to &struct phylink_config
* @neg_mode: provide PCS neg mode via "mode" argument
@@ -409,6 +411,7 @@ struct phylink_pcs_ops;
* the PCS driver.
*/
struct phylink_pcs {
+ DECLARE_PHY_INTERFACE_MASK(supported_interfaces);
const struct phylink_pcs_ops *ops;
struct phylink *phylink;
bool neg_mode;
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 3b9d132cbc9e..4bc2ee0b10b0 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -102,6 +102,7 @@ void __rtnl_net_unlock(struct net *net);
void rtnl_net_lock(struct net *net);
void rtnl_net_unlock(struct net *net);
int rtnl_net_trylock(struct net *net);
+int rtnl_net_lock_killable(struct net *net);
int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
bool rtnl_net_is_locked(struct net *net);
@@ -138,6 +139,11 @@ static inline int rtnl_net_trylock(struct net *net)
return rtnl_trylock();
}
+static inline int rtnl_net_lock_killable(struct net *net)
+{
+ return rtnl_lock_killable();
+}
+
static inline void ASSERT_RTNL_NET(struct net *net)
{
ASSERT_RTNL();
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 58337898fa21..f8f91b2038ea 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -546,4 +546,7 @@ int inet6_fill_ifmcaddr(struct sk_buff *skb,
const struct ifmcaddr6 *ifmca,
struct inet6_fill_args *args);
+int inet6_fill_ifacaddr(struct sk_buff *skb,
+ const struct ifacaddr6 *ifaca,
+ struct inet6_fill_args *args);
#endif
diff --git a/include/net/ax25.h b/include/net/ax25.h
index cb622d84cd0c..4ee141aae0a2 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -231,6 +231,7 @@ typedef struct ax25_dev {
#endif
refcount_t refcount;
bool device_up;
+ struct rcu_head rcu;
} ax25_dev;
typedef struct ax25_cb {
@@ -290,9 +291,8 @@ static inline void ax25_dev_hold(ax25_dev *ax25_dev)
static inline void ax25_dev_put(ax25_dev *ax25_dev)
{
- if (refcount_dec_and_test(&ax25_dev->refcount)) {
- kfree(ax25_dev);
- }
+ if (refcount_dec_and_test(&ax25_dev->refcount))
+ kfree_rcu(ax25_dev, rcu);
}
static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev)
{
@@ -335,9 +335,9 @@ void ax25_digi_invert(const ax25_digi *, ax25_digi *);
extern spinlock_t ax25_dev_lock;
#if IS_ENABLED(CONFIG_AX25)
-static inline ax25_dev *ax25_dev_ax25dev(struct net_device *dev)
+static inline ax25_dev *ax25_dev_ax25dev(const struct net_device *dev)
{
- return dev->ax25_ptr;
+ return rcu_dereference_rtnl(dev->ax25_ptr);
}
#endif
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 4aeedb296d67..9640d5c67f56 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -991,8 +991,6 @@ struct dsa_switch_ops {
bool (*support_eee)(struct dsa_switch *ds, int port);
int (*set_mac_eee)(struct dsa_switch *ds, int port,
struct ethtool_keee *e);
- int (*get_mac_eee)(struct dsa_switch *ds, int port,
- struct ethtool_keee *e);
/* EEPROM access */
int (*get_eeprom_len)(struct dsa_switch *ds);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 3c82fad904d4..c7f42844c79a 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -282,7 +282,7 @@ static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
{
- return inet_csk_reqsk_queue_len(sk) >= READ_ONCE(sk->sk_max_ack_backlog);
+ return inet_csk_reqsk_queue_len(sk) > READ_ONCE(sk->sk_max_ack_backlog);
}
bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 3ccbad881d74..1086256549fa 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -19,6 +19,7 @@
#include <linux/netdevice.h>
#include <net/flow.h>
+#include <net/inet_dscp.h>
#include <net/sock.h>
#include <net/request_sock.h>
#include <net/netns/hash.h>
@@ -302,6 +303,11 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet)
return READ_ONCE(inet->inet_flags) & IP_CMSG_ALL;
}
+static inline dscp_t inet_sk_dscp(const struct inet_sock *inet)
+{
+ return inet_dsfield_to_dscp(READ_ONCE(inet->tos));
+}
+
#define inet_test_bit(nr, sk) \
test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags)
#define inet_set_bit(nr, sk) \
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 32c09e85a64c..4b0677e48190 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -38,6 +38,7 @@
#define XFRM_PROTO_COMP 108
#define XFRM_PROTO_IPIP 4
#define XFRM_PROTO_IPV6 41
+#define XFRM_PROTO_IPTFS IPPROTO_AGGFRAG
#define XFRM_PROTO_ROUTING IPPROTO_ROUTING
#define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS
@@ -213,6 +214,7 @@ struct xfrm_state {
u16 family;
xfrm_address_t saddr;
int header_len;
+ int enc_hdr_len;
int trailer_len;
u32 extra_flags;
struct xfrm_mark smark;
@@ -303,6 +305,9 @@ struct xfrm_state {
* interpreted by xfrm_type methods. */
void *data;
u8 dir;
+
+ const struct xfrm_mode_cbs *mode_cbs;
+ void *mode_data;
};
static inline struct net *xs_net(struct xfrm_state *x)
@@ -460,6 +465,45 @@ struct xfrm_type_offload {
int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
+/**
+ * struct xfrm_mode_cbs - XFRM mode callbacks
+ * @owner: module owner or NULL
+ * @init_state: Add/init mode specific state in `xfrm_state *x`
+ * @clone_state: Copy mode specific values from `orig` to new state `x`
+ * @destroy_state: Cleanup mode specific state from `xfrm_state *x`
+ * @user_init: Process mode specific netlink attributes from user
+ * @copy_to_user: Add netlink attributes to `attrs` based on state in `x`
+ * @sa_len: Return space required to store mode specific netlink attributes
+ * @get_inner_mtu: Return avail payload space after removing encap overhead
+ * @input: Process received packet from SA using mode
+ * @output: Output given packet using mode
+ * @prepare_output: Add mode specific encapsulation to packet in skb. On return
+ * `transport_header` should point at ESP header, `network_header` should
+ * point at outer IP header and `mac_header` should opint at the
+ * protocol/nexthdr field of the outer IP.
+ *
+ * One should examine and understand the specific uses of these callbacks in
+ * xfrm for further detail on how and when these functions are called. RTSL.
+ */
+struct xfrm_mode_cbs {
+ struct module *owner;
+ int (*init_state)(struct xfrm_state *x);
+ int (*clone_state)(struct xfrm_state *x, struct xfrm_state *orig);
+ void (*destroy_state)(struct xfrm_state *x);
+ int (*user_init)(struct net *net, struct xfrm_state *x,
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack);
+ int (*copy_to_user)(struct xfrm_state *x, struct sk_buff *skb);
+ unsigned int (*sa_len)(const struct xfrm_state *x);
+ u32 (*get_inner_mtu)(struct xfrm_state *x, int outer_mtu);
+ int (*input)(struct xfrm_state *x, struct sk_buff *skb);
+ int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
+ int (*prepare_output)(struct xfrm_state *x, struct sk_buff *skb);
+};
+
+int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs);
+void xfrm_unregister_mode_cbs(u8 mode);
+
static inline int xfrm_af2proto(unsigned int family)
{
switch(family) {
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 7e1b3820f91f..d1089b88efc7 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -681,6 +681,7 @@ enum ethtool_link_ext_substate_module {
* @ETH_SS_STATS_ETH_MAC: names of IEEE 802.3 MAC statistics
* @ETH_SS_STATS_ETH_CTRL: names of IEEE 802.3 MAC Control statistics
* @ETH_SS_STATS_RMON: names of RMON statistics
+ * @ETH_SS_STATS_PHY: names of PHY(dev) statistics
*
* @ETH_SS_COUNT: number of defined string sets
*/
@@ -706,6 +707,7 @@ enum ethtool_stringset {
ETH_SS_STATS_ETH_MAC,
ETH_SS_STATS_ETH_CTRL,
ETH_SS_STATS_RMON,
+ ETH_SS_STATS_PHY,
/* add new constants above here */
ETH_SS_COUNT
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 9c909ce733a5..9ff72cfb2e98 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -99,6 +99,7 @@ enum {
ETHTOOL_STATS_ETH_MAC,
ETHTOOL_STATS_ETH_CTRL,
ETHTOOL_STATS_RMON,
+ ETHTOOL_STATS_PHY,
/* add new constants above here */
__ETHTOOL_STATS_CNT
@@ -193,6 +194,19 @@ enum {
ETHTOOL_A_STATS_RMON_MAX = (__ETHTOOL_A_STATS_RMON_CNT - 1)
};
+enum {
+ /* Basic packet counters if PHY has separate counters from the MAC */
+ ETHTOOL_A_STATS_PHY_RX_PKTS,
+ ETHTOOL_A_STATS_PHY_RX_BYTES,
+ ETHTOOL_A_STATS_PHY_RX_ERRORS,
+ ETHTOOL_A_STATS_PHY_TX_PKTS,
+ ETHTOOL_A_STATS_PHY_TX_BYTES,
+ ETHTOOL_A_STATS_PHY_TX_ERRORS,
+
+ /* add new constants above here */
+ __ETHTOOL_A_STATS_PHY_CNT,
+ ETHTOOL_A_STATS_PHY_MAX = (__ETHTOOL_A_STATS_PHY_CNT - 1)
+};
/* generic netlink info */
#define ETHTOOL_GENL_NAME "ethtool"
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 77730c340c8f..bfe880fbbb24 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1315,6 +1315,8 @@ enum {
IFLA_NETKIT_MODE,
IFLA_NETKIT_SCRUB,
IFLA_NETKIT_PEER_SCRUB,
+ IFLA_NETKIT_HEADROOM,
+ IFLA_NETKIT_TAILROOM,
__IFLA_NETKIT_MAX,
};
#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 5d32d53508d9..ced0fc3c3aa5 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -79,6 +79,8 @@ enum {
#define IPPROTO_MPLS IPPROTO_MPLS
IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */
#define IPPROTO_ETHERNET IPPROTO_ETHERNET
+ IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */
+#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
IPPROTO_SMC = 256, /* Shared Memory Communications */
diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
index 283dec7e3645..5bd7ce934d74 100644
--- a/include/uapi/linux/ip.h
+++ b/include/uapi/linux/ip.h
@@ -137,6 +137,22 @@ struct ip_beet_phdr {
__u8 reserved;
};
+struct ip_iptfs_hdr {
+ __u8 subtype; /* 0*: basic, 1: CC */
+ __u8 flags;
+ __be16 block_offset;
+};
+
+struct ip_iptfs_cc_hdr {
+ __u8 subtype; /* 0: basic, 1*: CC */
+ __u8 flags;
+ __be16 block_offset;
+ __be32 loss_rate;
+ __be64 rtt_adelay_xdelay;
+ __be32 tval;
+ __be32 techo;
+};
+
/* index values for the variables in ipv4_devconf */
enum
{
diff --git a/include/uapi/linux/ipsec.h b/include/uapi/linux/ipsec.h
index 50d8ee1791e2..696b790f4346 100644
--- a/include/uapi/linux/ipsec.h
+++ b/include/uapi/linux/ipsec.h
@@ -14,7 +14,8 @@ enum {
IPSEC_MODE_ANY = 0, /* We do not support this for SA */
IPSEC_MODE_TRANSPORT = 1,
IPSEC_MODE_TUNNEL = 2,
- IPSEC_MODE_BEET = 3
+ IPSEC_MODE_BEET = 3,
+ IPSEC_MODE_IPTFS = 4
};
enum {
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 5ee94c511a28..66c3903d29cf 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -100,7 +100,11 @@ enum {
RTM_GETMULTICAST,
#define RTM_GETMULTICAST RTM_GETMULTICAST
- RTM_GETANYCAST = 62,
+ RTM_NEWANYCAST = 60,
+#define RTM_NEWANYCAST RTM_NEWANYCAST
+ RTM_DELANYCAST,
+#define RTM_DELANYCAST RTM_DELANYCAST
+ RTM_GETANYCAST,
#define RTM_GETANYCAST RTM_GETANYCAST
RTM_NEWNEIGHTBL = 64,
@@ -783,6 +787,8 @@ enum rtnetlink_groups {
#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR
RTNLGRP_IPV6_MCADDR,
#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR
+ RTNLGRP_IPV6_ACADDR,
+#define RTNLGRP_IPV6_ACADDR RTNLGRP_IPV6_ACADDR
__RTNLGRP_MAX
};
#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 51da2e00112d..2e75674e7d4f 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -339,6 +339,8 @@ enum
LINUX_MIB_XFRMACQUIREERROR, /* XfrmAcquireError */
LINUX_MIB_XFRMOUTSTATEDIRERROR, /* XfrmOutStateDirError */
LINUX_MIB_XFRMINSTATEDIRERROR, /* XfrmInStateDirError */
+ LINUX_MIB_XFRMINIPTFSERROR, /* XfrmInIptfsError */
+ LINUX_MIB_XFRMOUTNOQSPACE, /* XfrmOutNoQueueSpace */
__LINUX_MIB_XFRMMAX
};
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index d73a97e3030a..a23495c0e0a1 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -158,7 +158,8 @@ enum {
#define XFRM_MODE_ROUTEOPTIMIZATION 2
#define XFRM_MODE_IN_TRIGGER 3
#define XFRM_MODE_BEET 4
-#define XFRM_MODE_MAX 5
+#define XFRM_MODE_IPTFS 5
+#define XFRM_MODE_MAX 6
/* Netlink configuration messages. */
enum {
@@ -323,6 +324,12 @@ enum xfrm_attr_type_t {
XFRMA_SA_DIR, /* __u8 */
XFRMA_NAT_KEEPALIVE_INTERVAL, /* __u32 in seconds for NAT keepalive */
XFRMA_SA_PCPU, /* __u32 */
+ XFRMA_IPTFS_DROP_TIME, /* __u32 in: usec to wait for next seq */
+ XFRMA_IPTFS_REORDER_WINDOW, /* __u16 in: reorder window size (pkts) */
+ XFRMA_IPTFS_DONT_FRAG, /* out: don't use fragmentation */
+ XFRMA_IPTFS_INIT_DELAY, /* __u32 out: initial packet wait delay (usec) */
+ XFRMA_IPTFS_MAX_QSIZE, /* __u32 out: max ingress queue size (octets) */
+ XFRMA_IPTFS_PKT_SIZE, /* __u32 out: size of outer packet, 0 for PMTU */
__XFRMA_MAX
#define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index d650ae6b58d3..74e5b9960c54 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -329,7 +329,6 @@ struct ufs_pwr_mode_info {
* @program_key: program or evict an inline encryption key
* @fill_crypto_prdt: initialize crypto-related fields in the PRDT
* @event_notify: called to notify important events
- * @reinit_notify: called to notify reinit of UFSHCD during max gear switch
* @mcq_config_resource: called to configure MCQ platform resources
* @get_hba_mac: reports maximum number of outstanding commands supported by
* the controller. Should be implemented for UFSHCI 4.0 or later
@@ -381,7 +380,6 @@ struct ufs_hba_variant_ops {
void *prdt, unsigned int num_segments);
void (*event_notify)(struct ufs_hba *hba,
enum ufs_event_type evt, void *data);
- void (*reinit_notify)(struct ufs_hba *);
int (*mcq_config_resource)(struct ufs_hba *hba);
int (*get_hba_mac)(struct ufs_hba *hba);
int (*op_runtime_config)(struct ufs_hba *hba);
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index d3403c8216db..ff691f37462c 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -320,7 +320,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
ret |= io_alloc_cache_init(&ctx->rw_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_async_rw));
ret |= io_alloc_cache_init(&ctx->uring_cache, IO_ALLOC_CACHE_MAX,
- sizeof(struct uring_cache));
+ sizeof(struct io_uring_cmd_data));
spin_lock_init(&ctx->msg_lock);
ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX,
sizeof(struct io_kiocb));
diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c
index d407576ddfb7..eec5eb7de843 100644
--- a/io_uring/kbuf.c
+++ b/io_uring/kbuf.c
@@ -139,6 +139,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
struct io_uring_buf_ring *br = bl->buf_ring;
__u16 tail, head = bl->head;
struct io_uring_buf *buf;
+ void __user *ret;
tail = smp_load_acquire(&br->tail);
if (unlikely(tail == head))
@@ -153,6 +154,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
req->buf_list = bl;
req->buf_index = buf->bid;
+ ret = u64_to_user_ptr(buf->addr);
if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
/*
@@ -168,7 +170,7 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len,
io_kbuf_commit(req, bl, *len, 1);
req->buf_list = NULL;
}
- return u64_to_user_ptr(buf->addr);
+ return ret;
}
void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
diff --git a/io_uring/net.c b/io_uring/net.c
index df1f7dc6f1c8..c6cd38cc5dc4 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -754,6 +754,7 @@ static int io_recvmsg_prep_setup(struct io_kiocb *req)
if (req->opcode == IORING_OP_RECV) {
kmsg->msg.msg_name = NULL;
kmsg->msg.msg_namelen = 0;
+ kmsg->msg.msg_inq = 0;
kmsg->msg.msg_control = NULL;
kmsg->msg.msg_get_inq = 1;
kmsg->msg.msg_controllen = 0;
diff --git a/io_uring/opdef.c b/io_uring/opdef.c
index 3de75eca1c92..e8baef4e5146 100644
--- a/io_uring/opdef.c
+++ b/io_uring/opdef.c
@@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/io_uring.h>
+#include <linux/io_uring/cmd.h>
#include "io_uring.h"
#include "opdef.h"
@@ -414,7 +415,7 @@ const struct io_issue_def io_issue_defs[] = {
.plug = 1,
.iopoll = 1,
.iopoll_queue = 1,
- .async_size = 2 * sizeof(struct io_uring_sqe),
+ .async_size = sizeof(struct io_uring_cmd_data),
.prep = io_uring_cmd_prep,
.issue = io_uring_cmd,
},
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 0bcb83e4ce3c..29bb3010f9c0 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -983,6 +983,8 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
io_kbuf_recycle(req, issue_flags);
if (ret < 0)
req_set_fail(req);
+ } else if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
+ cflags = io_put_kbuf(req, ret, issue_flags);
} else {
/*
* Any successful return value will keep the multishot read
diff --git a/io_uring/timeout.c b/io_uring/timeout.c
index bbe58638eca7..362689b17ccc 100644
--- a/io_uring/timeout.c
+++ b/io_uring/timeout.c
@@ -85,7 +85,27 @@ static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
io_req_task_complete(req, ts);
}
-static bool io_kill_timeout(struct io_kiocb *req, int status)
+static __cold bool io_flush_killed_timeouts(struct list_head *list, int err)
+{
+ if (list_empty(list))
+ return false;
+
+ while (!list_empty(list)) {
+ struct io_timeout *timeout;
+ struct io_kiocb *req;
+
+ timeout = list_first_entry(list, struct io_timeout, list);
+ list_del_init(&timeout->list);
+ req = cmd_to_io_kiocb(timeout);
+ if (err)
+ req_set_fail(req);
+ io_req_queue_tw_complete(req, err);
+ }
+
+ return true;
+}
+
+static void io_kill_timeout(struct io_kiocb *req, struct list_head *list)
__must_hold(&req->ctx->timeout_lock)
{
struct io_timeout_data *io = req->async_data;
@@ -93,21 +113,17 @@ static bool io_kill_timeout(struct io_kiocb *req, int status)
if (hrtimer_try_to_cancel(&io->timer) != -1) {
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
- if (status)
- req_set_fail(req);
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
- list_del_init(&timeout->list);
- io_req_queue_tw_complete(req, status);
- return true;
+ list_move_tail(&timeout->list, list);
}
- return false;
}
__cold void io_flush_timeouts(struct io_ring_ctx *ctx)
{
- u32 seq;
struct io_timeout *timeout, *tmp;
+ LIST_HEAD(list);
+ u32 seq;
raw_spin_lock_irq(&ctx->timeout_lock);
seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
@@ -131,10 +147,11 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
if (events_got < events_needed)
break;
- io_kill_timeout(req, 0);
+ io_kill_timeout(req, &list);
}
ctx->cq_last_tm_flush = seq;
raw_spin_unlock_irq(&ctx->timeout_lock);
+ io_flush_killed_timeouts(&list, 0);
}
static void io_req_tw_fail_links(struct io_kiocb *link, struct io_tw_state *ts)
@@ -661,7 +678,7 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx
bool cancel_all)
{
struct io_timeout *timeout, *tmp;
- int canceled = 0;
+ LIST_HEAD(list);
/*
* completion_lock is needed for io_match_task(). Take it before
@@ -672,11 +689,11 @@ __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, struct io_uring_task *tctx
list_for_each_entry_safe(timeout, tmp, &ctx->timeout_list, list) {
struct io_kiocb *req = cmd_to_io_kiocb(timeout);
- if (io_match_task(req, tctx, cancel_all) &&
- io_kill_timeout(req, -ECANCELED))
- canceled++;
+ if (io_match_task(req, tctx, cancel_all))
+ io_kill_timeout(req, &list);
}
raw_spin_unlock_irq(&ctx->timeout_lock);
spin_unlock(&ctx->completion_lock);
- return canceled != 0;
+
+ return io_flush_killed_timeouts(&list, -ECANCELED);
}
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index af842e9b4eb9..ce7726a04883 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -16,26 +16,35 @@
#include "rsrc.h"
#include "uring_cmd.h"
-static struct uring_cache *io_uring_async_get(struct io_kiocb *req)
+static struct io_uring_cmd_data *io_uring_async_get(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- struct uring_cache *cache;
+ struct io_uring_cmd_data *cache;
cache = io_alloc_cache_get(&ctx->uring_cache);
if (cache) {
+ cache->op_data = NULL;
req->flags |= REQ_F_ASYNC_DATA;
req->async_data = cache;
return cache;
}
- if (!io_alloc_async_data(req))
- return req->async_data;
+ if (!io_alloc_async_data(req)) {
+ cache = req->async_data;
+ cache->op_data = NULL;
+ return cache;
+ }
return NULL;
}
static void io_req_uring_cleanup(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
- struct uring_cache *cache = req->async_data;
+ struct io_uring_cmd_data *cache = req->async_data;
+
+ if (cache->op_data) {
+ kfree(cache->op_data);
+ cache->op_data = NULL;
+ }
if (issue_flags & IO_URING_F_UNLOCKED)
return;
@@ -183,7 +192,7 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
const struct io_uring_sqe *sqe)
{
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
- struct uring_cache *cache;
+ struct io_uring_cmd_data *cache;
cache = io_uring_async_get(req);
if (unlikely(!cache))
@@ -260,7 +269,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
ret = file->f_op->uring_cmd(ioucmd, issue_flags);
if (ret == -EAGAIN) {
- struct uring_cache *cache = req->async_data;
+ struct io_uring_cmd_data *cache = req->async_data;
if (ioucmd->sqe != (void *) cache)
memcpy(cache, ioucmd->sqe, uring_sqe_size(req->ctx));
diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h
index 7dba0f1efc58..f6837ee0955b 100644
--- a/io_uring/uring_cmd.h
+++ b/io_uring/uring_cmd.h
@@ -1,9 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-struct uring_cache {
- struct io_uring_sqe sqes[2];
-};
-
int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags);
int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh
index 383fd43ac612..7e1340da5aca 100755
--- a/kernel/gen_kheaders.sh
+++ b/kernel/gen_kheaders.sh
@@ -89,6 +89,7 @@ find $cpio_dir -type f -print0 |
# Create archive and try to normalize metadata for reproducibility.
tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \
+ --exclude=".__afs*" --exclude=".nfs*" \
--owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \
-I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null
diff --git a/kernel/kcov.c b/kernel/kcov.c
index 28a6be6e64fd..187ba1b80bda 100644
--- a/kernel/kcov.c
+++ b/kernel/kcov.c
@@ -166,7 +166,7 @@ static void kcov_remote_area_put(struct kcov_remote_area *area,
* Unlike in_serving_softirq(), this function returns false when called during
* a hardirq or an NMI that happened in the softirq context.
*/
-static inline bool in_softirq_really(void)
+static __always_inline bool in_softirq_really(void)
{
return in_serving_softirq() && !in_hardirq() && !in_nmi();
}
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 7fff1d045477..19d2699cf638 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4763,7 +4763,7 @@ static void scx_ops_bypass(bool bypass)
* sees scx_rq_bypassing() before moving tasks to SCX.
*/
if (!scx_enabled()) {
- rq_unlock_irqrestore(rq, &rf);
+ rq_unlock(rq, &rf);
continue;
}
@@ -7013,7 +7013,7 @@ __bpf_kfunc int bpf_iter_scx_dsq_new(struct bpf_iter_scx_dsq *it, u64 dsq_id,
return -ENOENT;
INIT_LIST_HEAD(&kit->cursor.node);
- kit->cursor.flags |= SCX_DSQ_LNODE_ITER_CURSOR | flags;
+ kit->cursor.flags = SCX_DSQ_LNODE_ITER_CURSOR | flags;
kit->cursor.priv = READ_ONCE(kit->dsq->seq);
return 0;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 8b07576814a5..f7d8fc204579 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3680,23 +3680,27 @@ void workqueue_softirq_dead(unsigned int cpu)
* check_flush_dependency - check for flush dependency sanity
* @target_wq: workqueue being flushed
* @target_work: work item being flushed (NULL for workqueue flushes)
+ * @from_cancel: are we called from the work cancel path
*
* %current is trying to flush the whole @target_wq or @target_work on it.
- * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
- * reclaiming memory or running on a workqueue which doesn't have
- * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
- * a deadlock.
+ * If this is not the cancel path (which implies work being flushed is either
+ * already running, or will not be at all), check if @target_wq doesn't have
+ * %WQ_MEM_RECLAIM and verify that %current is not reclaiming memory or running
+ * on a workqueue which doesn't have %WQ_MEM_RECLAIM as that can break forward-
+ * progress guarantee leading to a deadlock.
*/
static void check_flush_dependency(struct workqueue_struct *target_wq,
- struct work_struct *target_work)
+ struct work_struct *target_work,
+ bool from_cancel)
{
- work_func_t target_func = target_work ? target_work->func : NULL;
+ work_func_t target_func;
struct worker *worker;
- if (target_wq->flags & WQ_MEM_RECLAIM)
+ if (from_cancel || target_wq->flags & WQ_MEM_RECLAIM)
return;
worker = current_wq_worker();
+ target_func = target_work ? target_work->func : NULL;
WARN_ONCE(current->flags & PF_MEMALLOC,
"workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%ps",
@@ -3980,7 +3984,7 @@ void __flush_workqueue(struct workqueue_struct *wq)
list_add_tail(&this_flusher.list, &wq->flusher_overflow);
}
- check_flush_dependency(wq, NULL);
+ check_flush_dependency(wq, NULL, false);
mutex_unlock(&wq->mutex);
@@ -4155,7 +4159,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr,
}
wq = pwq->wq;
- check_flush_dependency(wq, work);
+ check_flush_dependency(wq, work, from_cancel);
insert_wq_barrier(pwq, barr, work, worker);
raw_spin_unlock_irq(&pool->lock);
@@ -5641,6 +5645,7 @@ static void wq_adjust_max_active(struct workqueue_struct *wq)
} while (activated);
}
+__printf(1, 0)
static struct workqueue_struct *__alloc_workqueue(const char *fmt,
unsigned int flags,
int max_active, va_list args)
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index d0ae808f3a14..047397136f15 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -4354,6 +4354,7 @@ int mas_alloc_cyclic(struct ma_state *mas, unsigned long *startp,
ret = 1;
}
if (ret < 0 && range_lo > min) {
+ mas_reset(mas);
ret = mas_empty_area(mas, min, range_hi, 1);
if (ret == 0)
ret = 1;
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 8b8e2933dcd4..0776452a1abb 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -868,6 +868,11 @@ static int damon_commit_schemes(struct damon_ctx *dst, struct damon_ctx *src)
NUMA_NO_NODE);
if (!new_scheme)
return -ENOMEM;
+ err = damos_commit(new_scheme, src_scheme);
+ if (err) {
+ damon_destroy_scheme(new_scheme);
+ return err;
+ }
damon_add_scheme(dst, new_scheme);
}
return 0;
@@ -961,8 +966,11 @@ static int damon_commit_targets(
return -ENOMEM;
err = damon_commit_target(new_target, false,
src_target, damon_target_has_pid(src));
- if (err)
+ if (err) {
+ damon_destroy_target(new_target);
return err;
+ }
+ damon_add_target(dst, new_target);
}
return 0;
}
diff --git a/mm/filemap.c b/mm/filemap.c
index f61cf51c2238..33b60d448fca 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -124,15 +124,6 @@
* ->private_lock (zap_pte_range->block_dirty_folio)
*/
-static void mapping_set_update(struct xa_state *xas,
- struct address_space *mapping)
-{
- if (dax_mapping(mapping) || shmem_mapping(mapping))
- return;
- xas_set_update(xas, workingset_update_node);
- xas_set_lru(xas, &shadow_nodes);
-}
-
static void page_cache_delete(struct address_space *mapping,
struct folio *folio, void *shadow)
{
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index cec4b121193f..c498874a7170 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7211,7 +7211,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
spte = hugetlb_walk(svma, saddr,
vma_mmu_pagesize(svma));
if (spte) {
- get_page(virt_to_page(spte));
+ ptdesc_pmd_pts_inc(virt_to_ptdesc(spte));
break;
}
}
@@ -7226,7 +7226,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
(pmd_t *)((unsigned long)spte & PAGE_MASK));
mm_inc_nr_pmds(mm);
} else {
- put_page(virt_to_page(spte));
+ ptdesc_pmd_pts_dec(virt_to_ptdesc(spte));
}
spin_unlock(&mm->page_table_lock);
out:
@@ -7238,10 +7238,6 @@ out:
/*
* unmap huge page backed by shared pte.
*
- * Hugetlb pte page is ref counted at the time of mapping. If pte is shared
- * indicated by page_count > 1, unmap is achieved by clearing pud and
- * decrementing the ref count. If count == 1, the pte page is not shared.
- *
* Called with page table lock held.
*
* returns: 1 successfully unmapped a shared pte page
@@ -7250,18 +7246,20 @@ out:
int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
+ unsigned long sz = huge_page_size(hstate_vma(vma));
pgd_t *pgd = pgd_offset(mm, addr);
p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud = pud_offset(p4d, addr);
i_mmap_assert_write_locked(vma->vm_file->f_mapping);
hugetlb_vma_assert_locked(vma);
- BUG_ON(page_count(virt_to_page(ptep)) == 0);
- if (page_count(virt_to_page(ptep)) == 1)
+ if (sz != PMD_SIZE)
+ return 0;
+ if (!ptdesc_pmd_pts_count(virt_to_ptdesc(ptep)))
return 0;
pud_clear(pud);
- put_page(virt_to_page(ptep));
+ ptdesc_pmd_pts_dec(virt_to_ptdesc(ptep));
mm_dec_nr_pmds(mm);
return 1;
}
diff --git a/mm/internal.h b/mm/internal.h
index 3bd08bafad04..9826f7dce607 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1504,6 +1504,12 @@ static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry,
/* Only track the nodes of mappings with shadow entries */
void workingset_update_node(struct xa_node *node);
extern struct list_lru shadow_nodes;
+#define mapping_set_update(xas, mapping) do { \
+ if (!dax_mapping(mapping) && !shmem_mapping(mapping)) { \
+ xas_set_update(xas, workingset_update_node); \
+ xas_set_lru(xas, &shadow_nodes); \
+ } \
+} while (0)
/* mremap.c */
unsigned long move_page_tables(struct vm_area_struct *vma,
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 6f8d46d107b4..653dbb1ff05c 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -19,6 +19,7 @@
#include <linux/rcupdate_wait.h>
#include <linux/swapops.h>
#include <linux/shmem_fs.h>
+#include <linux/dax.h>
#include <linux/ksm.h>
#include <asm/tlb.h>
@@ -1837,6 +1838,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
if (result != SCAN_SUCCEED)
goto out;
+ mapping_set_update(&xas, mapping);
+
__folio_set_locked(new_folio);
if (is_shmem)
__folio_set_swapbacked(new_folio);
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 2a945c07ae99..737af23f4f4e 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -373,7 +373,7 @@ static void print_unreferenced(struct seq_file *seq,
for (i = 0; i < nr_entries; i++) {
void *ptr = (void *)entries[i];
- warn_or_seq_printf(seq, " [<%pK>] %pS\n", ptr, ptr);
+ warn_or_seq_printf(seq, " %pS\n", ptr);
}
}
diff --git a/mm/list_lru.c b/mm/list_lru.c
index f93ada6a207b..7d69434c70e0 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -77,7 +77,6 @@ again:
spin_lock(&l->lock);
nr_items = READ_ONCE(l->nr_items);
if (likely(nr_items != LONG_MIN)) {
- WARN_ON(nr_items < 0);
rcu_read_unlock();
return l;
}
@@ -450,6 +449,7 @@ static void memcg_reparent_list_lru_one(struct list_lru *lru, int nid,
list_splice_init(&src->list, &dst->list);
if (src->nr_items) {
+ WARN_ON(src->nr_items < 0);
dst->nr_items += src->nr_items;
set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
}
diff --git a/mm/memfd.c b/mm/memfd.c
index c17c3ea701a1..35a370d75c9a 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -170,7 +170,7 @@ static int memfd_wait_for_pins(struct address_space *mapping)
return error;
}
-static unsigned int *memfd_file_seals_ptr(struct file *file)
+unsigned int *memfd_file_seals_ptr(struct file *file)
{
if (shmem_file(file))
return &SHMEM_I(file_inode(file))->seals;
diff --git a/mm/mmap.c b/mm/mmap.c
index d32b7e701058..aec208f90337 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -47,6 +47,7 @@
#include <linux/oom.h>
#include <linux/sched/mm.h>
#include <linux/ksm.h>
+#include <linux/memfd.h>
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
@@ -368,6 +369,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
if (file) {
struct inode *inode = file_inode(file);
+ unsigned int seals = memfd_file_seals(file);
unsigned long flags_mask;
if (!file_mmap_ok(file, inode, pgoff, len))
@@ -408,6 +410,8 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
vm_flags |= VM_SHARED | VM_MAYSHARE;
if (!(file->f_mode & FMODE_WRITE))
vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
+ else if (is_readonly_sealed(seals, vm_flags))
+ vm_flags &= ~VM_MAYWRITE;
fallthrough;
case MAP_PRIVATE:
if (!(file->f_mode & FMODE_READ))
@@ -888,7 +892,7 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
if (get_area) {
addr = get_area(file, addr, len, pgoff, flags);
- } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)
+ } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && !file
&& !addr /* no hint */
&& IS_ALIGNED(len, PMD_SIZE)) {
/* Ensures that larger anonymous mappings are THP aligned. */
diff --git a/mm/readahead.c b/mm/readahead.c
index ea650b8b02fb..e151f4b13ca4 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -646,7 +646,11 @@ void page_cache_async_ra(struct readahead_control *ractl,
1UL << order);
if (index == expected) {
ra->start += ra->size;
- ra->size = get_next_ra_size(ra, max_pages);
+ /*
+ * In the case of MADV_HUGEPAGE, the actual size might exceed
+ * the readahead window.
+ */
+ ra->size = max(ra->size, get_next_ra_size(ra, max_pages));
ra->async_size = ra->size;
goto readit;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index f6fb053ac50d..ac58d4fb2e6f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1535,7 +1535,7 @@ try_split:
!shmem_falloc->waitq &&
index >= shmem_falloc->start &&
index < shmem_falloc->next)
- shmem_falloc->nr_unswapped++;
+ shmem_falloc->nr_unswapped += nr_pages;
else
shmem_falloc = NULL;
spin_unlock(&inode->i_lock);
@@ -1689,6 +1689,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
unsigned long mask = READ_ONCE(huge_shmem_orders_always);
unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size);
unsigned long vm_flags = vma ? vma->vm_flags : 0;
+ pgoff_t aligned_index;
bool global_huge;
loff_t i_size;
int order;
@@ -1723,9 +1724,9 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
/* Allow mTHP that will be fully within i_size. */
order = highest_order(within_size_orders);
while (within_size_orders) {
- index = round_up(index + 1, order);
+ aligned_index = round_up(index + 1, 1 << order);
i_size = round_up(i_size_read(inode), PAGE_SIZE);
- if (i_size >> PAGE_SHIFT >= index) {
+ if (i_size >> PAGE_SHIFT >= aligned_index) {
mask |= within_size_orders;
break;
}
diff --git a/mm/util.c b/mm/util.c
index c1c3b06ab4f9..60aa40f612b8 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -297,12 +297,7 @@ void *memdup_user_nul(const void __user *src, size_t len)
{
char *p;
- /*
- * Always use GFP_KERNEL, since copy_from_user() can sleep and
- * cause pagefault, which makes it pointless to use GFP_NOFS
- * or GFP_ATOMIC.
- */
- p = kmalloc_track_caller(len + 1, GFP_KERNEL);
+ p = kmem_buckets_alloc_track_caller(user_buckets, len + 1, GFP_USER | __GFP_NOWARN);
if (!p)
return ERR_PTR(-ENOMEM);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 76378bc257e3..9a859b7d18d7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -374,7 +374,14 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
if (can_reclaim_anon_pages(NULL, zone_to_nid(zone), NULL))
nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) +
zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON);
-
+ /*
+ * If there are no reclaimable file-backed or anonymous pages,
+ * ensure zones with sufficient free pages are not skipped.
+ * This prevents zones like DMA32 from being ignored in reclaim
+ * scenarios where they can still help alleviate memory pressure.
+ */
+ if (nr == 0)
+ nr = zone_page_state_snapshot(zone, NR_FREE_PAGES);
return nr;
}
diff --git a/mm/zswap.c b/mm/zswap.c
index f6316b66fb23..5a27af8d86ea 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -880,6 +880,18 @@ static int zswap_cpu_comp_dead(unsigned int cpu, struct hlist_node *node)
return 0;
}
+/* Prevent CPU hotplug from freeing up the per-CPU acomp_ctx resources */
+static struct crypto_acomp_ctx *acomp_ctx_get_cpu(struct crypto_acomp_ctx __percpu *acomp_ctx)
+{
+ cpus_read_lock();
+ return raw_cpu_ptr(acomp_ctx);
+}
+
+static void acomp_ctx_put_cpu(void)
+{
+ cpus_read_unlock();
+}
+
static bool zswap_compress(struct page *page, struct zswap_entry *entry,
struct zswap_pool *pool)
{
@@ -893,8 +905,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
gfp_t gfp;
u8 *dst;
- acomp_ctx = raw_cpu_ptr(pool->acomp_ctx);
-
+ acomp_ctx = acomp_ctx_get_cpu(pool->acomp_ctx);
mutex_lock(&acomp_ctx->mutex);
dst = acomp_ctx->buffer;
@@ -950,6 +961,7 @@ unlock:
zswap_reject_alloc_fail++;
mutex_unlock(&acomp_ctx->mutex);
+ acomp_ctx_put_cpu();
return comp_ret == 0 && alloc_ret == 0;
}
@@ -960,7 +972,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
struct crypto_acomp_ctx *acomp_ctx;
u8 *src;
- acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
+ acomp_ctx = acomp_ctx_get_cpu(entry->pool->acomp_ctx);
mutex_lock(&acomp_ctx->mutex);
src = zpool_map_handle(zpool, entry->handle, ZPOOL_MM_RO);
@@ -990,6 +1002,7 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
if (src != acomp_ctx->buffer)
zpool_unmap_handle(zpool, entry->handle);
+ acomp_ctx_put_cpu();
}
/*********************************
diff --git a/net/802/psnap.c b/net/802/psnap.c
index fca9d454905f..389df460c8c4 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -55,11 +55,11 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
goto drop;
rcu_read_lock();
- proto = find_snap_client(skb_transport_header(skb));
+ proto = find_snap_client(skb->data);
if (proto) {
/* Pass the frame on. */
- skb->transport_header += 5;
skb_pull_rcsum(skb, 5);
+ skb_reset_transport_header(skb);
rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev);
}
rcu_read_unlock();
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index d6f9fae06a9d..aa6c714892ec 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -467,7 +467,7 @@ einval_put:
goto out_put;
}
-static void ax25_fillin_cb_from_dev(ax25_cb *ax25, ax25_dev *ax25_dev)
+static void ax25_fillin_cb_from_dev(ax25_cb *ax25, const ax25_dev *ax25_dev)
{
ax25->rtt = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]) / 2;
ax25->t1 = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]);
@@ -677,22 +677,22 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
break;
}
- rtnl_lock();
- dev = __dev_get_by_name(&init_net, devname);
+ rcu_read_lock();
+ dev = dev_get_by_name_rcu(&init_net, devname);
if (!dev) {
- rtnl_unlock();
+ rcu_read_unlock();
res = -ENODEV;
break;
}
ax25->ax25_dev = ax25_dev_ax25dev(dev);
if (!ax25->ax25_dev) {
- rtnl_unlock();
+ rcu_read_unlock();
res = -ENODEV;
break;
}
ax25_fillin_cb(ax25, ax25->ax25_dev);
- rtnl_unlock();
+ rcu_read_unlock();
break;
default:
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 9efd6690b344..3733c0254a50 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -90,7 +90,7 @@ void ax25_dev_device_up(struct net_device *dev)
spin_lock_bh(&ax25_dev_lock);
list_add(&ax25_dev->list, &ax25_dev_list);
- dev->ax25_ptr = ax25_dev;
+ rcu_assign_pointer(dev->ax25_ptr, ax25_dev);
spin_unlock_bh(&ax25_dev_lock);
ax25_register_dev_sysctl(ax25_dev);
@@ -125,7 +125,7 @@ void ax25_dev_device_down(struct net_device *dev)
}
}
- dev->ax25_ptr = NULL;
+ RCU_INIT_POINTER(dev->ax25_ptr, NULL);
spin_unlock_bh(&ax25_dev_lock);
netdev_put(dev, &ax25_dev->dev_tracker);
ax25_dev_put(ax25_dev);
diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c
index 36249776c021..215d4ccf12b9 100644
--- a/net/ax25/ax25_ip.c
+++ b/net/ax25/ax25_ip.c
@@ -122,6 +122,7 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
if (dev == NULL)
dev = skb->dev;
+ rcu_read_lock();
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) {
kfree_skb(skb);
goto put;
@@ -202,7 +203,7 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb)
ax25_queue_xmit(skb, dev);
put:
-
+ rcu_read_unlock();
ax25_route_lock_unuse();
return NETDEV_TX_OK;
}
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index 3db76d2470e9..8bca2ace98e5 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -39,10 +39,14 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, const ax25_address *sr
* specified.
*/
if (paclen == 0) {
- if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
+ rcu_read_lock();
+ ax25_dev = ax25_dev_ax25dev(dev);
+ if (!ax25_dev) {
+ rcu_read_unlock();
return NULL;
-
+ }
paclen = ax25_dev->values[AX25_VALUES_PACLEN];
+ rcu_read_unlock();
}
/*
@@ -53,13 +57,19 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, const ax25_address *sr
return ax25; /* It already existed */
}
- if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
+ rcu_read_lock();
+ ax25_dev = ax25_dev_ax25dev(dev);
+ if (!ax25_dev) {
+ rcu_read_unlock();
return NULL;
+ }
- if ((ax25 = ax25_create_cb()) == NULL)
+ if ((ax25 = ax25_create_cb()) == NULL) {
+ rcu_read_unlock();
return NULL;
-
+ }
ax25_fillin_cb(ax25, ax25_dev);
+ rcu_read_unlock();
ax25->source_addr = *src;
ax25->dest_addr = *dest;
@@ -358,7 +368,9 @@ void ax25_queue_xmit(struct sk_buff *skb, struct net_device *dev)
{
unsigned char *ptr;
+ rcu_read_lock();
skb->protocol = ax25_type_trans(skb, ax25_fwd_dev(dev));
+ rcu_read_unlock();
ptr = skb_push(skb, 1);
*ptr = 0x00; /* KISS */
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index b7c4d656a94b..69de75db0c9c 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -406,6 +406,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
ax25_route_lock_unuse();
return -EHOSTUNREACH;
}
+ rcu_read_lock();
if ((ax25->ax25_dev = ax25_dev_ax25dev(ax25_rt->dev)) == NULL) {
err = -EHOSTUNREACH;
goto put;
@@ -442,6 +443,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
}
put:
+ rcu_read_unlock();
ax25_route_lock_unuse();
return err;
}
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index c86f4e42e69c..7b2b04d6b856 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -1031,9 +1031,9 @@ static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags)
static int hci_set_random_addr_sync(struct hci_dev *hdev, bdaddr_t *rpa)
{
- /* If we're advertising or initiating an LE connection we can't
- * go ahead and change the random address at this time. This is
- * because the eventual initiator address used for the
+ /* If a random_addr has been set we're advertising or initiating an LE
+ * connection we can't go ahead and change the random address at this
+ * time. This is because the eventual initiator address used for the
* subsequently created connection will be undefined (some
* controllers use the new address and others the one we had
* when the operation started).
@@ -1041,8 +1041,9 @@ static int hci_set_random_addr_sync(struct hci_dev *hdev, bdaddr_t *rpa)
* In this kind of scenario skip the update and let the random
* address be updated at the next cycle.
*/
- if (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
- hci_lookup_le_connect(hdev)) {
+ if (bacmp(&hdev->random_addr, BDADDR_ANY) &&
+ (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
+ hci_lookup_le_connect(hdev))) {
bt_dev_dbg(hdev, "Deferring random address update");
hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
return 0;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index b31192d473d0..de47ad999d7b 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7655,6 +7655,24 @@ static void device_added(struct sock *sk, struct hci_dev *hdev,
mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk);
}
+static void add_device_complete(struct hci_dev *hdev, void *data, int err)
+{
+ struct mgmt_pending_cmd *cmd = data;
+ struct mgmt_cp_add_device *cp = cmd->param;
+
+ if (!err) {
+ device_added(cmd->sk, hdev, &cp->addr.bdaddr, cp->addr.type,
+ cp->action);
+ device_flags_changed(NULL, hdev, &cp->addr.bdaddr,
+ cp->addr.type, hdev->conn_flags,
+ PTR_UINT(cmd->user_data));
+ }
+
+ mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_ADD_DEVICE,
+ mgmt_status(err), &cp->addr, sizeof(cp->addr));
+ mgmt_pending_free(cmd);
+}
+
static int add_device_sync(struct hci_dev *hdev, void *data)
{
return hci_update_passive_scan_sync(hdev);
@@ -7663,6 +7681,7 @@ static int add_device_sync(struct hci_dev *hdev, void *data)
static int add_device(struct sock *sk, struct hci_dev *hdev,
void *data, u16 len)
{
+ struct mgmt_pending_cmd *cmd;
struct mgmt_cp_add_device *cp = data;
u8 auto_conn, addr_type;
struct hci_conn_params *params;
@@ -7743,9 +7762,24 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
current_flags = params->flags;
}
- err = hci_cmd_sync_queue(hdev, add_device_sync, NULL, NULL);
- if (err < 0)
+ cmd = mgmt_pending_new(sk, MGMT_OP_ADD_DEVICE, hdev, data, len);
+ if (!cmd) {
+ err = -ENOMEM;
goto unlock;
+ }
+
+ cmd->user_data = UINT_PTR(current_flags);
+
+ err = hci_cmd_sync_queue(hdev, add_device_sync, cmd,
+ add_device_complete);
+ if (err < 0) {
+ err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
+ MGMT_STATUS_FAILED, &cp->addr,
+ sizeof(cp->addr));
+ mgmt_pending_free(cmd);
+ }
+
+ goto unlock;
added:
device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index af80d599c337..21a5b5535ebc 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -201,14 +201,14 @@ static ssize_t address_show(struct device *tty_dev,
struct device_attribute *attr, char *buf)
{
struct rfcomm_dev *dev = dev_get_drvdata(tty_dev);
- return sprintf(buf, "%pMR\n", &dev->dst);
+ return sysfs_emit(buf, "%pMR\n", &dev->dst);
}
static ssize_t channel_show(struct device *tty_dev,
struct device_attribute *attr, char *buf)
{
struct rfcomm_dev *dev = dev_get_drvdata(tty_dev);
- return sprintf(buf, "%d\n", dev->channel);
+ return sysfs_emit(buf, "%d\n", dev->channel);
}
static DEVICE_ATTR_RO(address);
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index c7869a286df4..115a23054a58 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -229,7 +229,7 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
#endif
#if IS_ENABLED(CONFIG_IPV6)
-struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *msg)
+struct nd_msg *br_is_nd_neigh_msg(const struct sk_buff *skb, struct nd_msg *msg)
{
struct nd_msg *m;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 29d6ec45cf41..1054b8a88edc 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -2299,6 +2299,6 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
u16 vid, struct net_bridge_port *p);
void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
u16 vid, struct net_bridge_port *p, struct nd_msg *msg);
-struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m);
+struct nd_msg *br_is_nd_neigh_msg(const struct sk_buff *skb, struct nd_msg *m);
bool br_is_neigh_suppress_enabled(const struct net_bridge_port *p, u16 vid);
#endif
diff --git a/net/core/dev.c b/net/core/dev.c
index e7223972b9aa..fda4e1039bf0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -753,6 +753,36 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
}
EXPORT_SYMBOL_GPL(dev_fill_forward_path);
+/* must be called under rcu_read_lock(), as we dont take a reference */
+static struct napi_struct *napi_by_id(unsigned int napi_id)
+{
+ unsigned int hash = napi_id % HASH_SIZE(napi_hash);
+ struct napi_struct *napi;
+
+ hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
+ if (napi->napi_id == napi_id)
+ return napi;
+
+ return NULL;
+}
+
+/* must be called under rcu_read_lock(), as we dont take a reference */
+struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id)
+{
+ struct napi_struct *napi;
+
+ napi = napi_by_id(napi_id);
+ if (!napi)
+ return NULL;
+
+ if (WARN_ON_ONCE(!napi->dev))
+ return NULL;
+ if (!net_eq(net, dev_net(napi->dev)))
+ return NULL;
+
+ return napi;
+}
+
/**
* __dev_get_by_name - find a device by its name
* @net: the applicable net namespace
@@ -927,7 +957,6 @@ EXPORT_SYMBOL(netdev_get_by_index);
* its reference counter increased so the caller must be careful
* about locking. The caller must hold RCU lock.
*/
-
struct net_device *dev_get_by_napi_id(unsigned int napi_id)
{
struct napi_struct *napi;
@@ -941,7 +970,6 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
return napi ? napi->dev : NULL;
}
-EXPORT_SYMBOL(dev_get_by_napi_id);
static DEFINE_SEQLOCK(netdev_rename_lock);
@@ -1769,14 +1797,19 @@ int register_netdevice_notifier(struct notifier_block *nb)
/* Close race with setup_net() and cleanup_net() */
down_write(&pernet_ops_rwsem);
+
+ /* When RTNL is removed, we need protection for netdev_chain. */
rtnl_lock();
+
err = raw_notifier_chain_register(&netdev_chain, nb);
if (err)
goto unlock;
if (dev_boot_phase)
goto unlock;
for_each_net(net) {
+ __rtnl_net_lock(net);
err = call_netdevice_register_net_notifiers(nb, net);
+ __rtnl_net_unlock(net);
if (err)
goto rollback;
}
@@ -1787,8 +1820,11 @@ unlock:
return err;
rollback:
- for_each_net_continue_reverse(net)
+ for_each_net_continue_reverse(net) {
+ __rtnl_net_lock(net);
call_netdevice_unregister_net_notifiers(nb, net);
+ __rtnl_net_unlock(net);
+ }
raw_notifier_chain_unregister(&netdev_chain, nb);
goto unlock;
@@ -1821,8 +1857,11 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
if (err)
goto unlock;
- for_each_net(net)
+ for_each_net(net) {
+ __rtnl_net_lock(net);
call_netdevice_unregister_net_notifiers(nb, net);
+ __rtnl_net_unlock(net);
+ }
unlock:
rtnl_unlock();
@@ -1886,9 +1925,10 @@ int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
{
int err;
- rtnl_lock();
+ rtnl_net_lock(net);
err = __register_netdevice_notifier_net(net, nb, false);
- rtnl_unlock();
+ rtnl_net_unlock(net);
+
return err;
}
EXPORT_SYMBOL(register_netdevice_notifier_net);
@@ -1914,9 +1954,10 @@ int unregister_netdevice_notifier_net(struct net *net,
{
int err;
- rtnl_lock();
+ rtnl_net_lock(net);
err = __unregister_netdevice_notifier_net(net, nb);
- rtnl_unlock();
+ rtnl_net_unlock(net);
+
return err;
}
EXPORT_SYMBOL(unregister_netdevice_notifier_net);
@@ -1933,15 +1974,17 @@ int register_netdevice_notifier_dev_net(struct net_device *dev,
struct notifier_block *nb,
struct netdev_net_notifier *nn)
{
+ struct net *net = dev_net(dev);
int err;
- rtnl_lock();
- err = __register_netdevice_notifier_net(dev_net(dev), nb, false);
+ rtnl_net_lock(net);
+ err = __register_netdevice_notifier_net(net, nb, false);
if (!err) {
nn->nb = nb;
list_add(&nn->list, &dev->net_notifier_list);
}
- rtnl_unlock();
+ rtnl_net_unlock(net);
+
return err;
}
EXPORT_SYMBOL(register_netdevice_notifier_dev_net);
@@ -1950,12 +1993,14 @@ int unregister_netdevice_notifier_dev_net(struct net_device *dev,
struct notifier_block *nb,
struct netdev_net_notifier *nn)
{
+ struct net *net = dev_net(dev);
int err;
- rtnl_lock();
+ rtnl_net_lock(net);
list_del(&nn->list);
- err = __unregister_netdevice_notifier_net(dev_net(dev), nb);
- rtnl_unlock();
+ err = __unregister_netdevice_notifier_net(net, nb);
+ rtnl_net_unlock(net);
+
return err;
}
EXPORT_SYMBOL(unregister_netdevice_notifier_dev_net);
@@ -3233,7 +3278,7 @@ void netif_device_attach(struct net_device *dev)
if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
netif_running(dev)) {
netif_tx_wake_all_queues(dev);
- __netdev_watchdog_up(dev);
+ netdev_watchdog_up(dev);
}
}
EXPORT_SYMBOL(netif_device_attach);
@@ -5476,8 +5521,14 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
orig_dev = skb->dev;
skb_reset_network_header(skb);
+#if !defined(CONFIG_DEBUG_NET)
+ /* We plan to no longer reset the transport header here.
+ * Give some time to fuzzers and dev build to catch bugs
+ * in network stacks.
+ */
if (!skb_transport_header_was_set(skb))
skb_reset_transport_header(skb);
+#endif
skb_reset_mac_len(skb);
pt_prev = NULL;
@@ -6293,19 +6344,6 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
}
EXPORT_SYMBOL(napi_complete_done);
-/* must be called under rcu_read_lock(), as we dont take a reference */
-struct napi_struct *napi_by_id(unsigned int napi_id)
-{
- unsigned int hash = napi_id % HASH_SIZE(napi_hash);
- struct napi_struct *napi;
-
- hlist_for_each_entry_rcu(napi, &napi_hash[hash], napi_hash_node)
- if (napi->napi_id == napi_id)
- return napi;
-
- return NULL;
-}
-
static void skb_defer_free_flush(struct softnet_data *sd)
{
struct sk_buff *skb, *next;
@@ -6713,13 +6751,14 @@ static void napi_restore_config(struct napi_struct *n)
n->gro_flush_timeout = n->config->gro_flush_timeout;
n->irq_suspend_timeout = n->config->irq_suspend_timeout;
/* a NAPI ID might be stored in the config, if so use it. if not, use
- * napi_hash_add to generate one for us. It will be saved to the config
- * in napi_disable.
+ * napi_hash_add to generate one for us.
*/
- if (n->config->napi_id)
+ if (n->config->napi_id) {
napi_hash_add_with_id(n, n->config->napi_id);
- else
+ } else {
napi_hash_add(n);
+ n->config->napi_id = n->napi_id;
+ }
}
static void napi_save_config(struct napi_struct *n)
@@ -6727,10 +6766,39 @@ static void napi_save_config(struct napi_struct *n)
n->config->defer_hard_irqs = n->defer_hard_irqs;
n->config->gro_flush_timeout = n->gro_flush_timeout;
n->config->irq_suspend_timeout = n->irq_suspend_timeout;
- n->config->napi_id = n->napi_id;
napi_hash_del(n);
}
+/* Netlink wants the NAPI list to be sorted by ID, if adding a NAPI which will
+ * inherit an existing ID try to insert it at the right position.
+ */
+static void
+netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
+{
+ unsigned int new_id, pos_id;
+ struct list_head *higher;
+ struct napi_struct *pos;
+
+ new_id = UINT_MAX;
+ if (napi->config && napi->config->napi_id)
+ new_id = napi->config->napi_id;
+
+ higher = &dev->napi_list;
+ list_for_each_entry(pos, &dev->napi_list, dev_list) {
+ if (pos->napi_id >= MIN_NAPI_ID)
+ pos_id = pos->napi_id;
+ else if (pos->config)
+ pos_id = pos->config->napi_id;
+ else
+ pos_id = UINT_MAX;
+
+ if (pos_id <= new_id)
+ break;
+ higher = &pos->dev_list;
+ }
+ list_add_rcu(&napi->dev_list, higher); /* adds after higher */
+}
+
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6757,7 +6825,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
napi->list_owner = -1;
set_bit(NAPI_STATE_SCHED, &napi->state);
set_bit(NAPI_STATE_NPSVC, &napi->state);
- list_add_rcu(&napi->dev_list, &dev->napi_list);
+ netif_napi_dev_list_add(dev, napi);
/* default settings from sysfs are applied to all NAPIs. any per-NAPI
* configuration will be loaded in napi_enable
@@ -10668,26 +10736,20 @@ err_free_name:
EXPORT_SYMBOL(register_netdevice);
/* Initialize the core of a dummy net device.
- * This is useful if you are calling this function after alloc_netdev(),
- * since it does not memset the net_device fields.
+ * The setup steps dummy netdevs need which normal netdevs get by going
+ * through register_netdevice().
*/
-static void init_dummy_netdev_core(struct net_device *dev)
+static void init_dummy_netdev(struct net_device *dev)
{
/* make sure we BUG if trying to hit standard
* register/unregister code path
*/
dev->reg_state = NETREG_DUMMY;
- /* NAPI wants this */
- INIT_LIST_HEAD(&dev->napi_list);
-
/* a dummy interface is started by default */
set_bit(__LINK_STATE_PRESENT, &dev->state);
set_bit(__LINK_STATE_START, &dev->state);
- /* napi_busy_loop stats accounting wants this */
- dev_net_set(dev, &init_net);
-
/* Note : We dont allocate pcpu_refcnt for dummy devices,
* because users of this 'device' dont need to change
* its refcount.
@@ -10695,28 +10757,6 @@ static void init_dummy_netdev_core(struct net_device *dev)
}
/**
- * init_dummy_netdev - init a dummy network device for NAPI
- * @dev: device to init
- *
- * This takes a network device structure and initializes the minimum
- * amount of fields so it can be used to schedule NAPI polls without
- * registering a full blown interface. This is to be used by drivers
- * that need to tie several hardware interfaces to a single NAPI
- * poll scheduler due to HW limitations.
- */
-void init_dummy_netdev(struct net_device *dev)
-{
- /* Clear everything. Note we don't initialize spinlocks
- * as they aren't supposed to be taken by any of the
- * NAPI code and this dummy netdev is supposed to be
- * only ever used for NAPI polls
- */
- memset(dev, 0, sizeof(struct net_device));
- init_dummy_netdev_core(dev);
-}
-EXPORT_SYMBOL_GPL(init_dummy_netdev);
-
-/**
* register_netdev - register a network device
* @dev: device to register
*
@@ -10731,12 +10771,16 @@ EXPORT_SYMBOL_GPL(init_dummy_netdev);
*/
int register_netdev(struct net_device *dev)
{
+ struct net *net = dev_net(dev);
int err;
- if (rtnl_lock_killable())
+ if (rtnl_net_lock_killable(net))
return -EINTR;
+
err = register_netdevice(dev);
- rtnl_unlock();
+
+ rtnl_net_unlock(net);
+
return err;
}
EXPORT_SYMBOL(register_netdev);
@@ -11390,7 +11434,7 @@ EXPORT_SYMBOL(free_netdev);
struct net_device *alloc_netdev_dummy(int sizeof_priv)
{
return alloc_netdev(sizeof_priv, "dummy#", NET_NAME_UNKNOWN,
- init_dummy_netdev_core);
+ init_dummy_netdev);
}
EXPORT_SYMBOL_GPL(alloc_netdev_dummy);
@@ -11606,9 +11650,11 @@ EXPORT_SYMBOL(unregister_netdevice_many);
*/
void unregister_netdev(struct net_device *dev)
{
- rtnl_lock();
+ struct net *net = dev_net(dev);
+
+ rtnl_net_lock(net);
unregister_netdevice(dev);
- rtnl_unlock();
+ rtnl_net_unlock(net);
}
EXPORT_SYMBOL(unregister_netdev);
diff --git a/net/core/dev.h b/net/core/dev.h
index aa91eed55a40..d8966847794c 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -22,6 +22,9 @@ struct sd_flow_limit {
extern int netdev_flow_limit_table_len;
+struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id);
+struct net_device *dev_get_by_napi_id(unsigned int napi_id);
+
#ifdef CONFIG_PROC_FS
int __init dev_proc_init(void);
#else
@@ -269,7 +272,6 @@ void xdp_do_check_flushed(struct napi_struct *napi);
static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
#endif
-struct napi_struct *napi_by_id(unsigned int napi_id);
void kick_defer_list_purge(struct softnet_data *sd, unsigned int cpu);
#define XMIT_RECURSION_LIMIT 8
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 1b4d39e38084..cb04ef2b9807 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -42,14 +42,18 @@ static unsigned int default_operstate(const struct net_device *dev)
* first check whether lower is indeed the source of its down state.
*/
if (!netif_carrier_ok(dev)) {
- int iflink = dev_get_iflink(dev);
struct net_device *peer;
+ int iflink;
/* If called from netdev_run_todo()/linkwatch_sync_dev(),
* dev_net(dev) can be already freed, and RTNL is not held.
*/
- if (dev->reg_state == NETREG_UNREGISTERED ||
- iflink == dev->ifindex)
+ if (dev->reg_state <= NETREG_REGISTERED)
+ iflink = dev_get_iflink(dev);
+ else
+ iflink = dev->ifindex;
+
+ if (iflink == dev->ifindex)
return IF_OPER_DOWN;
ASSERT_RTNL();
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index b0772d135efb..c59619a2ec23 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -167,8 +167,6 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
void *hdr;
pid_t pid;
- if (WARN_ON_ONCE(!napi->dev))
- return -EINVAL;
if (!(napi->dev->flags & IFF_UP))
return 0;
@@ -176,8 +174,7 @@ netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
if (!hdr)
return -EMSGSIZE;
- if (napi->napi_id >= MIN_NAPI_ID &&
- nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id))
+ if (nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id))
goto nla_put_failure;
if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex))
@@ -235,7 +232,7 @@ int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
rcu_read_lock();
- napi = napi_by_id(napi_id);
+ napi = netdev_napi_by_id(genl_info_net(info), napi_id);
if (napi) {
err = netdev_nl_napi_fill_one(rsp, napi, info);
} else {
@@ -266,12 +263,21 @@ netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
struct netdev_nl_dump_ctx *ctx)
{
struct napi_struct *napi;
+ unsigned int prev_id;
int err = 0;
if (!(netdev->flags & IFF_UP))
return err;
+ prev_id = UINT_MAX;
list_for_each_entry(napi, &netdev->napi_list, dev_list) {
+ if (napi->napi_id < MIN_NAPI_ID)
+ continue;
+
+ /* Dump continuation below depends on the list being sorted */
+ WARN_ON_ONCE(napi->napi_id >= prev_id);
+ prev_id = napi->napi_id;
+
if (ctx->napi_id && napi->napi_id >= ctx->napi_id)
continue;
@@ -354,7 +360,7 @@ int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
rcu_read_lock();
- napi = napi_by_id(napi_id);
+ napi = netdev_napi_by_id(genl_info_net(info), napi_id);
if (napi) {
err = netdev_nl_napi_set_config(napi, info);
} else {
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index e217a5838c87..db82786fa0c4 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -79,3 +79,4 @@ err_free_new_mem:
return err;
}
+EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6b745096809d..1f4d4b5570ab 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -84,7 +84,6 @@ int rtnl_lock_killable(void)
{
return mutex_lock_killable(&rtnl_mutex);
}
-EXPORT_SYMBOL(rtnl_lock_killable);
static struct sk_buff *defer_kfree_skb_list;
void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail)
@@ -221,6 +220,16 @@ int rtnl_net_trylock(struct net *net)
}
EXPORT_SYMBOL(rtnl_net_trylock);
+int rtnl_net_lock_killable(struct net *net)
+{
+ int ret = rtnl_lock_killable();
+
+ if (!ret)
+ __rtnl_net_lock(net);
+
+ return ret;
+}
+
static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
{
if (net_eq(net_a, net_b))
diff --git a/net/dsa/user.c b/net/dsa/user.c
index 4a8de48a6f24..c74f2b2b92de 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -1251,7 +1251,6 @@ static int dsa_user_get_eee(struct net_device *dev, struct ethtool_keee *e)
{
struct dsa_port *dp = dsa_user_to_port(dev);
struct dsa_switch *ds = dp->ds;
- int ret;
/* Check whether the switch supports EEE */
if (!ds->ops->support_eee || !ds->ops->support_eee(ds, dp->index))
@@ -1261,13 +1260,6 @@ static int dsa_user_get_eee(struct net_device *dev, struct ethtool_keee *e)
if (!dev->phydev)
return -ENODEV;
- if (!ds->ops->get_mac_eee)
- return -EOPNOTSUPP;
-
- ret = ds->ops->get_mac_eee(ds, dp->index, e);
- if (ret)
- return ret;
-
return phylink_ethtool_get_eee(dp->pl, e);
}
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 2607aea1fbfb..2bd77c94f9f1 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -869,6 +869,7 @@ int __ethtool_get_ts_info(struct net_device *dev,
struct kernel_ethtool_ts_info *info)
{
struct hwtstamp_provider *hwprov;
+ int err = 0;
rcu_read_lock();
hwprov = rcu_dereference(dev->hwprov);
@@ -876,7 +877,6 @@ int __ethtool_get_ts_info(struct net_device *dev,
if (!hwprov) {
const struct ethtool_ops *ops = dev->ethtool_ops;
struct phy_device *phydev = dev->phydev;
- int err = 0;
ethtool_init_tsinfo(info);
if (phy_is_default_hwtstamp(phydev) &&
@@ -892,8 +892,9 @@ int __ethtool_get_ts_info(struct net_device *dev,
return err;
}
+ err = ethtool_get_ts_info_by_phc(dev, info, &hwprov->desc);
rcu_read_unlock();
- return ethtool_get_ts_info_by_phc(dev, info, &hwprov->desc);
+ return err;
}
bool net_support_hwtstamp_qualifier(struct net_device *dev,
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index 34d76e87847d..af19e1bed303 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -3,6 +3,7 @@
#include "netlink.h"
#include "common.h"
#include <linux/phy.h>
+#include <linux/phylib_stubs.h>
struct linkstate_req_info {
struct ethnl_req_info base;
@@ -26,9 +27,8 @@ const struct nla_policy ethnl_linkstate_get_policy[] = {
NLA_POLICY_NESTED(ethnl_header_policy_stats),
};
-static int linkstate_get_sqi(struct net_device *dev)
+static int linkstate_get_sqi(struct phy_device *phydev)
{
- struct phy_device *phydev = dev->phydev;
int ret;
if (!phydev)
@@ -46,9 +46,8 @@ static int linkstate_get_sqi(struct net_device *dev)
return ret;
}
-static int linkstate_get_sqi_max(struct net_device *dev)
+static int linkstate_get_sqi_max(struct phy_device *phydev)
{
- struct phy_device *phydev = dev->phydev;
int ret;
if (!phydev)
@@ -100,19 +99,28 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
{
struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base);
struct net_device *dev = reply_base->dev;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
int ret;
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_LINKSTATE_HEADER],
+ info->extack);
+ if (IS_ERR(phydev)) {
+ ret = PTR_ERR(phydev);
+ goto out;
+ }
+
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
data->link = __ethtool_get_link(dev);
- ret = linkstate_get_sqi(dev);
+ ret = linkstate_get_sqi(phydev);
if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi = ret;
- ret = linkstate_get_sqi_max(dev);
+ ret = linkstate_get_sqi_max(phydev);
if (linkstate_sqi_critical_error(ret))
goto out;
data->sqi_max = ret;
@@ -127,9 +135,9 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
sizeof(data->link_stats) / 8);
if (req_base->flags & ETHTOOL_FLAG_STATS) {
- if (dev->phydev)
- data->link_stats.link_down_events =
- READ_ONCE(dev->phydev->link_down_events);
+ if (phydev)
+ phy_ethtool_get_link_ext_stats(phydev,
+ &data->link_stats);
if (dev->ethtool_ops->get_link_ext_stats)
dev->ethtool_ops->get_link_ext_stats(dev,
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 0a09298fff92..1ce0a3de1430 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -511,5 +511,6 @@ extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING
extern const char stats_eth_mac_names[__ETHTOOL_A_STATS_ETH_MAC_CNT][ETH_GSTRING_LEN];
extern const char stats_eth_ctrl_names[__ETHTOOL_A_STATS_ETH_CTRL_CNT][ETH_GSTRING_LEN];
extern const char stats_rmon_names[__ETHTOOL_A_STATS_RMON_CNT][ETH_GSTRING_LEN];
+extern const char stats_phy_names[__ETHTOOL_A_STATS_PHY_CNT][ETH_GSTRING_LEN];
#endif /* _NET_ETHTOOL_NETLINK_H */
diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index 912f0c4fff2f..038a2558f052 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/phy.h>
+#include <linux/phylib_stubs.h>
+
#include "netlink.h"
#include "common.h"
#include "bitset.h"
@@ -20,6 +23,7 @@ struct stats_reply_data {
struct ethtool_eth_mac_stats mac_stats;
struct ethtool_eth_ctrl_stats ctrl_stats;
struct ethtool_rmon_stats rmon_stats;
+ struct ethtool_phy_stats phydev_stats;
);
const struct ethtool_rmon_hist_range *rmon_ranges;
};
@@ -32,6 +36,7 @@ const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN] = {
[ETHTOOL_STATS_ETH_MAC] = "eth-mac",
[ETHTOOL_STATS_ETH_CTRL] = "eth-ctrl",
[ETHTOOL_STATS_RMON] = "rmon",
+ [ETHTOOL_STATS_PHY] = "phydev",
};
const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN] = {
@@ -76,6 +81,15 @@ const char stats_rmon_names[__ETHTOOL_A_STATS_RMON_CNT][ETH_GSTRING_LEN] = {
[ETHTOOL_A_STATS_RMON_JABBER] = "etherStatsJabbers",
};
+const char stats_phy_names[__ETHTOOL_A_STATS_PHY_CNT][ETH_GSTRING_LEN] = {
+ [ETHTOOL_A_STATS_PHY_RX_PKTS] = "RxFrames",
+ [ETHTOOL_A_STATS_PHY_RX_BYTES] = "RxOctets",
+ [ETHTOOL_A_STATS_PHY_RX_ERRORS] = "RxErrors",
+ [ETHTOOL_A_STATS_PHY_TX_PKTS] = "TxFrames",
+ [ETHTOOL_A_STATS_PHY_TX_BYTES] = "TxOctets",
+ [ETHTOOL_A_STATS_PHY_TX_ERRORS] = "TxErrors",
+};
+
const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_SRC + 1] = {
[ETHTOOL_A_STATS_HEADER] =
NLA_POLICY_NESTED(ethnl_header_policy),
@@ -120,8 +134,15 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
struct stats_reply_data *data = STATS_REPDATA(reply_base);
enum ethtool_mac_stats_src src = req_info->src;
struct net_device *dev = reply_base->dev;
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
int ret;
+ phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_STATS_HEADER],
+ info->extack);
+ if (IS_ERR(phydev))
+ return PTR_ERR(phydev);
+
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
@@ -145,6 +166,14 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
data->ctrl_stats.src = src;
data->rmon_stats.src = src;
+ if ((test_bit(ETHTOOL_STATS_PHY, req_info->stat_mask) ||
+ test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask)) &&
+ src == ETHTOOL_MAC_STATS_SRC_AGGREGATE) {
+ if (phydev)
+ phy_ethtool_get_phy_stats(phydev, &data->phy_stats,
+ &data->phydev_stats);
+ }
+
if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) &&
dev->ethtool_ops->get_eth_phy_stats)
dev->ethtool_ops->get_eth_phy_stats(dev, &data->phy_stats);
@@ -194,6 +223,10 @@ static int stats_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(4)) * /* _A_STATS_GRP_HIST_BKT_HI */
ETHTOOL_RMON_HIST_MAX * 2;
}
+ if (test_bit(ETHTOOL_STATS_PHY, req_info->stat_mask)) {
+ n_stats += sizeof(struct ethtool_phy_stats) / sizeof(u64);
+ n_grps++;
+ }
len += n_grps * (nla_total_size(0) + /* _A_STATS_GRP */
nla_total_size(4) + /* _A_STATS_GRP_ID */
@@ -247,6 +280,25 @@ static int stats_put_phy_stats(struct sk_buff *skb,
return 0;
}
+static int stats_put_phydev_stats(struct sk_buff *skb,
+ const struct stats_reply_data *data)
+{
+ if (stat_put(skb, ETHTOOL_A_STATS_PHY_RX_PKTS,
+ data->phydev_stats.rx_packets) ||
+ stat_put(skb, ETHTOOL_A_STATS_PHY_RX_BYTES,
+ data->phydev_stats.rx_bytes) ||
+ stat_put(skb, ETHTOOL_A_STATS_PHY_RX_ERRORS,
+ data->phydev_stats.rx_errors) ||
+ stat_put(skb, ETHTOOL_A_STATS_PHY_TX_PKTS,
+ data->phydev_stats.tx_packets) ||
+ stat_put(skb, ETHTOOL_A_STATS_PHY_TX_BYTES,
+ data->phydev_stats.tx_bytes) ||
+ stat_put(skb, ETHTOOL_A_STATS_PHY_TX_ERRORS,
+ data->phydev_stats.tx_errors))
+ return -EMSGSIZE;
+ return 0;
+}
+
static int stats_put_mac_stats(struct sk_buff *skb,
const struct stats_reply_data *data)
{
@@ -423,6 +475,9 @@ static int stats_fill_reply(struct sk_buff *skb,
if (!ret && test_bit(ETHTOOL_STATS_RMON, req_info->stat_mask))
ret = stats_put_stats(skb, data, ETHTOOL_STATS_RMON,
ETH_SS_STATS_RMON, stats_put_rmon_stats);
+ if (!ret && test_bit(ETHTOOL_STATS_PHY, req_info->stat_mask))
+ ret = stats_put_stats(skb, data, ETHTOOL_STATS_PHY,
+ ETH_SS_STATS_PHY, stats_put_phydev_stats);
return ret;
}
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
index b3382b3cf325..818cf01f0911 100644
--- a/net/ethtool/strset.c
+++ b/net/ethtool/strset.c
@@ -105,6 +105,11 @@ static const struct strset_info info_template[] = {
.count = __ETHTOOL_A_STATS_RMON_CNT,
.strings = stats_rmon_names,
},
+ [ETH_SS_STATS_PHY] = {
+ .per_dev = false,
+ .count = __ETHTOOL_A_STATS_PHY_CNT,
+ .strings = stats_phy_names,
+ },
};
struct strset_req_info {
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index fcfeb79bb040..7d7551e6f0b0 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -163,6 +163,7 @@ struct hsr_port {
struct net_device *dev;
struct hsr_priv *hsr;
enum hsr_port_type type;
+ struct rcu_head rcu;
};
struct hsr_frame_info;
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index 464f683e016d..2a802a5de2ac 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -204,7 +204,6 @@ int hsr_add_port(struct hsr_priv *hsr, struct net_device *dev,
}
list_add_tail_rcu(&port->port_list, &hsr->ports);
- synchronize_rcu();
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
netdev_update_features(master->dev);
@@ -235,7 +234,5 @@ void hsr_del_port(struct hsr_port *port)
netdev_upper_dev_unlink(port->dev, master->dev);
}
- synchronize_rcu();
-
- kfree(port);
+ kfree_rcu(port, rcu);
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index f3281312eb5e..b0fbf804bbba 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -816,7 +816,8 @@ int esp_input_done2(struct sk_buff *skb, int err)
}
skb_pull_rcsum(skb, hlen);
- if (x->props.mode == XFRM_MODE_TUNNEL)
+ if (x->props.mode == XFRM_MODE_TUNNEL ||
+ x->props.mode == XFRM_MODE_IPTFS)
skb_reset_transport_header(skb);
else
skb_set_transport_header(skb, -ihl);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index e45222d5fc2e..cc2b5194a18d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -898,7 +898,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb,
sock_net_set(ctl_sk, net);
if (sk) {
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_mark : sk->sk_mark;
+ inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark);
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
transmit_time = tcp_transmit_time(sk);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4da409bc4577..c3729382be3b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5240,9 +5240,9 @@ int inet6_fill_ifmcaddr(struct sk_buff *skb,
return 0;
}
-static int inet6_fill_ifacaddr(struct sk_buff *skb,
- const struct ifacaddr6 *ifaca,
- struct inet6_fill_args *args)
+int inet6_fill_ifacaddr(struct sk_buff *skb,
+ const struct ifacaddr6 *ifaca,
+ struct inet6_fill_args *args)
{
struct net_device *dev = fib6_info_nh_dev(ifaca->aca_rt);
int ifindex = dev ? dev->ifindex : 1;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 562cace50ca9..21e01695b48c 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -278,6 +278,37 @@ static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i,
return aca;
}
+static void inet6_ifacaddr_notify(struct net_device *dev,
+ const struct ifacaddr6 *ifaca, int event)
+{
+ struct inet6_fill_args fillargs = {
+ .event = event,
+ .netnsid = -1,
+ };
+ struct net *net = dev_net(dev);
+ struct sk_buff *skb;
+ int err = -ENOMEM;
+
+ skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+ nla_total_size(sizeof(struct in6_addr)) +
+ nla_total_size(sizeof(struct ifa_cacheinfo)),
+ GFP_KERNEL);
+ if (!skb)
+ goto error;
+
+ err = inet6_fill_ifacaddr(skb, ifaca, &fillargs);
+ if (err < 0) {
+ pr_err("Failed to fill in anycast addresses (err %d)\n", err);
+ nlmsg_free(skb);
+ goto error;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ACADDR, NULL, GFP_KERNEL);
+ return;
+error:
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ACADDR, err);
+}
+
/*
* device anycast group inc (add if not found)
*/
@@ -333,6 +364,8 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr)
addrconf_join_solict(idev->dev, &aca->aca_addr);
+ inet6_ifacaddr_notify(idev->dev, aca, RTM_NEWANYCAST);
+
aca_put(aca);
return 0;
out:
@@ -375,6 +408,8 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
+ inet6_ifacaddr_notify(idev->dev, aca, RTM_DELANYCAST);
+
aca_put(aca);
return 0;
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index b2400c226a32..5f3d0cc1555a 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -859,7 +859,8 @@ int esp6_input_done2(struct sk_buff *skb, int err)
skb_postpull_rcsum(skb, skb_network_header(skb),
skb_network_header_len(skb));
skb_pull_rcsum(skb, hlen);
- if (x->props.mode == XFRM_MODE_TUNNEL)
+ if (x->props.mode == XFRM_MODE_TUNNEL ||
+ x->props.mode == XFRM_MODE_IPTFS)
skb_reset_transport_header(skb);
else
skb_set_transport_header(skb, -hdr_len);
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index c0e2da5072be..9e4631fade90 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -684,6 +684,10 @@ void ieee802154_if_remove(struct ieee802154_sub_if_data *sdata)
ASSERT_RTNL();
mutex_lock(&sdata->local->iflist_mtx);
+ if (list_empty(&sdata->local->interfaces)) {
+ mutex_unlock(&sdata->local->iflist_mtx);
+ return;
+ }
list_del_rcu(&sdata->list);
mutex_unlock(&sdata->local->iflist_mtx);
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c
index 38d8121331d4..b0dd008e2114 100644
--- a/net/mptcp/ctrl.c
+++ b/net/mptcp/ctrl.c
@@ -102,16 +102,15 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
}
#ifdef CONFIG_SYSCTL
-static int mptcp_set_scheduler(const struct net *net, const char *name)
+static int mptcp_set_scheduler(char *scheduler, const char *name)
{
- struct mptcp_pernet *pernet = mptcp_get_pernet(net);
struct mptcp_sched_ops *sched;
int ret = 0;
rcu_read_lock();
sched = mptcp_sched_find(name);
if (sched)
- strscpy(pernet->scheduler, name, MPTCP_SCHED_NAME_MAX);
+ strscpy(scheduler, name, MPTCP_SCHED_NAME_MAX);
else
ret = -ENOENT;
rcu_read_unlock();
@@ -122,7 +121,7 @@ static int mptcp_set_scheduler(const struct net *net, const char *name)
static int proc_scheduler(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- const struct net *net = current->nsproxy->net_ns;
+ char (*scheduler)[MPTCP_SCHED_NAME_MAX] = ctl->data;
char val[MPTCP_SCHED_NAME_MAX];
struct ctl_table tbl = {
.data = val,
@@ -130,11 +129,11 @@ static int proc_scheduler(const struct ctl_table *ctl, int write,
};
int ret;
- strscpy(val, mptcp_get_scheduler(net), MPTCP_SCHED_NAME_MAX);
+ strscpy(val, *scheduler, MPTCP_SCHED_NAME_MAX);
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0)
- ret = mptcp_set_scheduler(net, val);
+ ret = mptcp_set_scheduler(*scheduler, val);
return ret;
}
@@ -161,7 +160,9 @@ static int proc_blackhole_detect_timeout(const struct ctl_table *table,
int write, void *buffer, size_t *lenp,
loff_t *ppos)
{
- struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns);
+ struct mptcp_pernet *pernet = container_of(table->data,
+ struct mptcp_pernet,
+ blackhole_timeout);
int ret;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
@@ -228,7 +229,7 @@ static struct ctl_table mptcp_sysctl_table[] = {
{
.procname = "available_schedulers",
.maxlen = MPTCP_SCHED_BUF_MAX,
- .mode = 0644,
+ .mode = 0444,
.proc_handler = proc_available_schedulers,
},
{
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1b2e7cbb577f..c44c89ecaca6 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -47,7 +47,7 @@ static void __mptcp_destroy_sock(struct sock *sk);
static void mptcp_check_send_data_fin(struct sock *sk);
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
-static struct net_device mptcp_napi_dev;
+static struct net_device *mptcp_napi_dev;
/* Returns end sequence number of the receiver's advertised window */
static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
@@ -4147,11 +4147,13 @@ void __init mptcp_proto_init(void)
if (percpu_counter_init(&mptcp_sockets_allocated, 0, GFP_KERNEL))
panic("Failed to allocate MPTCP pcpu counter\n");
- init_dummy_netdev(&mptcp_napi_dev);
+ mptcp_napi_dev = alloc_netdev_dummy(0);
+ if (!mptcp_napi_dev)
+ panic("Failed to allocate MPTCP dummy netdev\n");
for_each_possible_cpu(cpu) {
delegated = per_cpu_ptr(&mptcp_delegated_actions, cpu);
INIT_LIST_HEAD(&delegated->head);
- netif_napi_add_tx(&mptcp_napi_dev, &delegated->napi,
+ netif_napi_add_tx(mptcp_napi_dev, &delegated->napi,
mptcp_napi_poll);
napi_enable(&delegated->napi);
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9db3e2b0b1c3..456446d7af20 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2517,12 +2517,15 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
struct hlist_nulls_head *hash;
unsigned int nr_slots, i;
- if (*sizep > (UINT_MAX / sizeof(struct hlist_nulls_head)))
+ if (*sizep > (INT_MAX / sizeof(struct hlist_nulls_head)))
return NULL;
BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
+ if (nr_slots > (INT_MAX / sizeof(struct hlist_nulls_head)))
+ return NULL;
+
hash = kvcalloc(nr_slots, sizeof(struct hlist_nulls_head), GFP_KERNEL);
if (hash && nulls)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f7ca7165e66e..83f3face8bb3 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -8821,6 +8821,7 @@ static void nft_unregister_flowtable_hook(struct net *net,
}
static void __nft_unregister_flowtable_net_hooks(struct net *net,
+ struct nft_flowtable *flowtable,
struct list_head *hook_list,
bool release_netdev)
{
@@ -8828,6 +8829,8 @@ static void __nft_unregister_flowtable_net_hooks(struct net *net,
list_for_each_entry_safe(hook, next, hook_list, list) {
nf_unregister_net_hook(net, &hook->ops);
+ flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
+ FLOW_BLOCK_UNBIND);
if (release_netdev) {
list_del(&hook->list);
kfree_rcu(hook, rcu);
@@ -8836,9 +8839,10 @@ static void __nft_unregister_flowtable_net_hooks(struct net *net,
}
static void nft_unregister_flowtable_net_hooks(struct net *net,
+ struct nft_flowtable *flowtable,
struct list_head *hook_list)
{
- __nft_unregister_flowtable_net_hooks(net, hook_list, false);
+ __nft_unregister_flowtable_net_hooks(net, flowtable, hook_list, false);
}
static int nft_register_flowtable_net_hooks(struct net *net,
@@ -9480,8 +9484,6 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
flowtable->data.type->free(&flowtable->data);
list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
- flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
- FLOW_BLOCK_UNBIND);
list_del_rcu(&hook->list);
kfree_rcu(hook, rcu);
}
@@ -10869,6 +10871,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
&nft_trans_flowtable_hooks(trans),
trans->msg_type);
nft_unregister_flowtable_net_hooks(net,
+ nft_trans_flowtable(trans),
&nft_trans_flowtable_hooks(trans));
} else {
list_del_rcu(&nft_trans_flowtable(trans)->list);
@@ -10877,6 +10880,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
NULL,
trans->msg_type);
nft_unregister_flowtable_net_hooks(net,
+ nft_trans_flowtable(trans),
&nft_trans_flowtable(trans)->hook_list);
}
break;
@@ -11139,11 +11143,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
case NFT_MSG_NEWFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
nft_unregister_flowtable_net_hooks(net,
+ nft_trans_flowtable(trans),
&nft_trans_flowtable_hooks(trans));
} else {
nft_use_dec_restore(&table->use);
list_del_rcu(&nft_trans_flowtable(trans)->list);
nft_unregister_flowtable_net_hooks(net,
+ nft_trans_flowtable(trans),
&nft_trans_flowtable(trans)->hook_list);
}
break;
@@ -11736,7 +11742,8 @@ static void __nft_release_hook(struct net *net, struct nft_table *table)
list_for_each_entry(chain, &table->chains, list)
__nf_tables_unregister_hook(net, table, chain, true);
list_for_each_entry(flowtable, &table->flowtables, list)
- __nft_unregister_flowtable_net_hooks(net, &flowtable->hook_list,
+ __nft_unregister_flowtable_net_hooks(net, flowtable,
+ &flowtable->hook_list,
true);
}
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 8a07b46cc8fb..3210cfc966ab 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -112,7 +112,8 @@ static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode)
return true;
}
- return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL;
+ return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL ||
+ mode == XFRM_MODE_IPTFS;
}
static void nft_xfrm_state_get_key(const struct nft_xfrm *priv,
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 351ac1747224..0581c53e6517 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -61,8 +61,10 @@ static atomic_t rds_tcp_unloading = ATOMIC_INIT(0);
static struct kmem_cache *rds_tcp_conn_slab;
-static int rds_tcp_skbuf_handler(const struct ctl_table *ctl, int write,
- void *buffer, size_t *lenp, loff_t *fpos);
+static int rds_tcp_sndbuf_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *fpos);
+static int rds_tcp_rcvbuf_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *fpos);
static int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
static int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;
@@ -74,7 +76,7 @@ static struct ctl_table rds_tcp_sysctl_table[] = {
/* data is per-net pointer */
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = rds_tcp_skbuf_handler,
+ .proc_handler = rds_tcp_sndbuf_handler,
.extra1 = &rds_tcp_min_sndbuf,
},
#define RDS_TCP_RCVBUF 1
@@ -83,7 +85,7 @@ static struct ctl_table rds_tcp_sysctl_table[] = {
/* data is per-net pointer */
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = rds_tcp_skbuf_handler,
+ .proc_handler = rds_tcp_rcvbuf_handler,
.extra1 = &rds_tcp_min_rcvbuf,
},
};
@@ -682,10 +684,10 @@ static void rds_tcp_sysctl_reset(struct net *net)
spin_unlock_irq(&rds_tcp_conn_lock);
}
-static int rds_tcp_skbuf_handler(const struct ctl_table *ctl, int write,
+static int rds_tcp_skbuf_handler(struct rds_tcp_net *rtn,
+ const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *fpos)
{
- struct net *net = current->nsproxy->net_ns;
int err;
err = proc_dointvec_minmax(ctl, write, buffer, lenp, fpos);
@@ -694,11 +696,34 @@ static int rds_tcp_skbuf_handler(const struct ctl_table *ctl, int write,
*(int *)(ctl->extra1));
return err;
}
- if (write)
+
+ if (write && rtn->rds_tcp_listen_sock && rtn->rds_tcp_listen_sock->sk) {
+ struct net *net = sock_net(rtn->rds_tcp_listen_sock->sk);
+
rds_tcp_sysctl_reset(net);
+ }
+
return 0;
}
+static int rds_tcp_sndbuf_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *fpos)
+{
+ struct rds_tcp_net *rtn = container_of(ctl->data, struct rds_tcp_net,
+ sndbuf_size);
+
+ return rds_tcp_skbuf_handler(rtn, ctl, write, buffer, lenp, fpos);
+}
+
+static int rds_tcp_rcvbuf_handler(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *fpos)
+{
+ struct rds_tcp_net *rtn = container_of(ctl->data, struct rds_tcp_net,
+ rcvbuf_size);
+
+ return rds_tcp_skbuf_handler(rtn, ctl, write, buffer, lenp, fpos);
+}
+
static void rds_tcp_exit(void)
{
rds_tcp_set_unloading();
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 5502998aace7..5c2580a07530 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -356,7 +356,8 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = {
[TCA_FLOW_KEYS] = { .type = NLA_U32 },
[TCA_FLOW_MODE] = { .type = NLA_U32 },
[TCA_FLOW_BASECLASS] = { .type = NLA_U32 },
- [TCA_FLOW_RSHIFT] = { .type = NLA_U32 },
+ [TCA_FLOW_RSHIFT] = NLA_POLICY_MAX(NLA_U32,
+ 31 /* BITS_PER_U32 - 1 */),
[TCA_FLOW_ADDEND] = { .type = NLA_U32 },
[TCA_FLOW_MASK] = { .type = NLA_U32 },
[TCA_FLOW_XOR] = { .type = NLA_U32 },
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index deb0925f536d..48dd8c88903f 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -630,6 +630,63 @@ static bool cake_ddst(int flow_mode)
return (flow_mode & CAKE_FLOW_DUAL_DST) == CAKE_FLOW_DUAL_DST;
}
+static void cake_dec_srchost_bulk_flow_count(struct cake_tin_data *q,
+ struct cake_flow *flow,
+ int flow_mode)
+{
+ if (likely(cake_dsrc(flow_mode) &&
+ q->hosts[flow->srchost].srchost_bulk_flow_count))
+ q->hosts[flow->srchost].srchost_bulk_flow_count--;
+}
+
+static void cake_inc_srchost_bulk_flow_count(struct cake_tin_data *q,
+ struct cake_flow *flow,
+ int flow_mode)
+{
+ if (likely(cake_dsrc(flow_mode) &&
+ q->hosts[flow->srchost].srchost_bulk_flow_count < CAKE_QUEUES))
+ q->hosts[flow->srchost].srchost_bulk_flow_count++;
+}
+
+static void cake_dec_dsthost_bulk_flow_count(struct cake_tin_data *q,
+ struct cake_flow *flow,
+ int flow_mode)
+{
+ if (likely(cake_ddst(flow_mode) &&
+ q->hosts[flow->dsthost].dsthost_bulk_flow_count))
+ q->hosts[flow->dsthost].dsthost_bulk_flow_count--;
+}
+
+static void cake_inc_dsthost_bulk_flow_count(struct cake_tin_data *q,
+ struct cake_flow *flow,
+ int flow_mode)
+{
+ if (likely(cake_ddst(flow_mode) &&
+ q->hosts[flow->dsthost].dsthost_bulk_flow_count < CAKE_QUEUES))
+ q->hosts[flow->dsthost].dsthost_bulk_flow_count++;
+}
+
+static u16 cake_get_flow_quantum(struct cake_tin_data *q,
+ struct cake_flow *flow,
+ int flow_mode)
+{
+ u16 host_load = 1;
+
+ if (cake_dsrc(flow_mode))
+ host_load = max(host_load,
+ q->hosts[flow->srchost].srchost_bulk_flow_count);
+
+ if (cake_ddst(flow_mode))
+ host_load = max(host_load,
+ q->hosts[flow->dsthost].dsthost_bulk_flow_count);
+
+ /* The get_random_u16() is a way to apply dithering to avoid
+ * accumulating roundoff errors
+ */
+ return (q->flow_quantum * quantum_div[host_load] +
+ get_random_u16()) >> 16;
+}
+
static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
int flow_mode, u16 flow_override, u16 host_override)
{
@@ -776,10 +833,8 @@ skip_hash:
allocate_dst = cake_ddst(flow_mode);
if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
- if (allocate_src)
- q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
- if (allocate_dst)
- q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
+ cake_dec_srchost_bulk_flow_count(q, &q->flows[outer_hash + k], flow_mode);
+ cake_dec_dsthost_bulk_flow_count(q, &q->flows[outer_hash + k], flow_mode);
}
found:
/* reserve queue for future packets in same flow */
@@ -804,9 +859,10 @@ found:
q->hosts[outer_hash + k].srchost_tag = srchost_hash;
found_src:
srchost_idx = outer_hash + k;
- if (q->flows[reduced_hash].set == CAKE_SET_BULK)
- q->hosts[srchost_idx].srchost_bulk_flow_count++;
q->flows[reduced_hash].srchost = srchost_idx;
+
+ if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+ cake_inc_srchost_bulk_flow_count(q, &q->flows[reduced_hash], flow_mode);
}
if (allocate_dst) {
@@ -827,9 +883,10 @@ found_src:
q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
found_dst:
dsthost_idx = outer_hash + k;
- if (q->flows[reduced_hash].set == CAKE_SET_BULK)
- q->hosts[dsthost_idx].dsthost_bulk_flow_count++;
q->flows[reduced_hash].dsthost = dsthost_idx;
+
+ if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+ cake_inc_dsthost_bulk_flow_count(q, &q->flows[reduced_hash], flow_mode);
}
}
@@ -1841,10 +1898,6 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* flowchain */
if (!flow->set || flow->set == CAKE_SET_DECAYING) {
- struct cake_host *srchost = &b->hosts[flow->srchost];
- struct cake_host *dsthost = &b->hosts[flow->dsthost];
- u16 host_load = 1;
-
if (!flow->set) {
list_add_tail(&flow->flowchain, &b->new_flows);
} else {
@@ -1854,18 +1907,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
flow->set = CAKE_SET_SPARSE;
b->sparse_flow_count++;
- if (cake_dsrc(q->flow_mode))
- host_load = max(host_load, srchost->srchost_bulk_flow_count);
-
- if (cake_ddst(q->flow_mode))
- host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
-
- flow->deficit = (b->flow_quantum *
- quantum_div[host_load]) >> 16;
+ flow->deficit = cake_get_flow_quantum(b, flow, q->flow_mode);
} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
- struct cake_host *srchost = &b->hosts[flow->srchost];
- struct cake_host *dsthost = &b->hosts[flow->dsthost];
-
/* this flow was empty, accounted as a sparse flow, but actually
* in the bulk rotation.
*/
@@ -1873,12 +1916,8 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
b->sparse_flow_count--;
b->bulk_flow_count++;
- if (cake_dsrc(q->flow_mode))
- srchost->srchost_bulk_flow_count++;
-
- if (cake_ddst(q->flow_mode))
- dsthost->dsthost_bulk_flow_count++;
-
+ cake_inc_srchost_bulk_flow_count(b, flow, q->flow_mode);
+ cake_inc_dsthost_bulk_flow_count(b, flow, q->flow_mode);
}
if (q->buffer_used > q->buffer_max_used)
@@ -1935,14 +1974,12 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch)
{
struct cake_sched_data *q = qdisc_priv(sch);
struct cake_tin_data *b = &q->tins[q->cur_tin];
- struct cake_host *srchost, *dsthost;
enum skb_drop_reason reason;
ktime_t now = ktime_get();
struct cake_flow *flow;
struct list_head *head;
bool first_flow = true;
struct sk_buff *skb;
- u16 host_load;
u64 delay;
u32 len;
@@ -2042,11 +2079,6 @@ retry:
q->cur_flow = flow - b->flows;
first_flow = false;
- /* triple isolation (modified DRR++) */
- srchost = &b->hosts[flow->srchost];
- dsthost = &b->hosts[flow->dsthost];
- host_load = 1;
-
/* flow isolation (DRR++) */
if (flow->deficit <= 0) {
/* Keep all flows with deficits out of the sparse and decaying
@@ -2058,11 +2090,8 @@ retry:
b->sparse_flow_count--;
b->bulk_flow_count++;
- if (cake_dsrc(q->flow_mode))
- srchost->srchost_bulk_flow_count++;
-
- if (cake_ddst(q->flow_mode))
- dsthost->dsthost_bulk_flow_count++;
+ cake_inc_srchost_bulk_flow_count(b, flow, q->flow_mode);
+ cake_inc_dsthost_bulk_flow_count(b, flow, q->flow_mode);
flow->set = CAKE_SET_BULK;
} else {
@@ -2074,19 +2103,7 @@ retry:
}
}
- if (cake_dsrc(q->flow_mode))
- host_load = max(host_load, srchost->srchost_bulk_flow_count);
-
- if (cake_ddst(q->flow_mode))
- host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
-
- WARN_ON(host_load > CAKE_QUEUES);
-
- /* The get_random_u16() is a way to apply dithering to avoid
- * accumulating roundoff errors
- */
- flow->deficit += (b->flow_quantum * quantum_div[host_load] +
- get_random_u16()) >> 16;
+ flow->deficit += cake_get_flow_quantum(b, flow, q->flow_mode);
list_move_tail(&flow->flowchain, &b->old_flows);
goto retry;
@@ -2110,11 +2127,8 @@ retry:
if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--;
- if (cake_dsrc(q->flow_mode))
- srchost->srchost_bulk_flow_count--;
-
- if (cake_ddst(q->flow_mode))
- dsthost->dsthost_bulk_flow_count--;
+ cake_dec_srchost_bulk_flow_count(b, flow, q->flow_mode);
+ cake_dec_dsthost_bulk_flow_count(b, flow, q->flow_mode);
b->decaying_flow_count++;
} else if (flow->set == CAKE_SET_SPARSE ||
@@ -2132,12 +2146,8 @@ retry:
else if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--;
- if (cake_dsrc(q->flow_mode))
- srchost->srchost_bulk_flow_count--;
-
- if (cake_ddst(q->flow_mode))
- dsthost->dsthost_bulk_flow_count--;
-
+ cake_dec_srchost_bulk_flow_count(b, flow, q->flow_mode);
+ cake_dec_dsthost_bulk_flow_count(b, flow, q->flow_mode);
} else
b->decaying_flow_count--;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 8874ae668095..14ab2f4c190a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -551,25 +551,20 @@ static void dev_watchdog(struct timer_list *t)
netdev_put(dev, &dev->watchdog_dev_tracker);
}
-void __netdev_watchdog_up(struct net_device *dev)
-{
- if (dev->netdev_ops->ndo_tx_timeout) {
- if (dev->watchdog_timeo <= 0)
- dev->watchdog_timeo = 5*HZ;
- if (!mod_timer(&dev->watchdog_timer,
- round_jiffies(jiffies + dev->watchdog_timeo)))
- netdev_hold(dev, &dev->watchdog_dev_tracker,
- GFP_ATOMIC);
- }
-}
-EXPORT_SYMBOL_GPL(__netdev_watchdog_up);
-
-static void dev_watchdog_up(struct net_device *dev)
+void netdev_watchdog_up(struct net_device *dev)
{
- __netdev_watchdog_up(dev);
+ if (!dev->netdev_ops->ndo_tx_timeout)
+ return;
+ if (dev->watchdog_timeo <= 0)
+ dev->watchdog_timeo = 5*HZ;
+ if (!mod_timer(&dev->watchdog_timer,
+ round_jiffies(jiffies + dev->watchdog_timeo)))
+ netdev_hold(dev, &dev->watchdog_dev_tracker,
+ GFP_ATOMIC);
}
+EXPORT_SYMBOL_GPL(netdev_watchdog_up);
-static void dev_watchdog_down(struct net_device *dev)
+static void netdev_watchdog_down(struct net_device *dev)
{
netif_tx_lock_bh(dev);
if (del_timer(&dev->watchdog_timer))
@@ -591,7 +586,7 @@ void netif_carrier_on(struct net_device *dev)
atomic_inc(&dev->carrier_up_count);
linkwatch_fire_event(dev);
if (netif_running(dev))
- __netdev_watchdog_up(dev);
+ netdev_watchdog_up(dev);
}
}
EXPORT_SYMBOL(netif_carrier_on);
@@ -1267,7 +1262,7 @@ void dev_activate(struct net_device *dev)
if (need_watchdog) {
netif_trans_update(dev);
- dev_watchdog_up(dev);
+ netdev_watchdog_up(dev);
}
}
EXPORT_SYMBOL(dev_activate);
@@ -1282,15 +1277,17 @@ static void qdisc_deactivate(struct Qdisc *qdisc)
static void dev_deactivate_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
- void *_qdisc_default)
+ void *_sync_needed)
{
- struct Qdisc *qdisc_default = _qdisc_default;
+ bool *sync_needed = _sync_needed;
struct Qdisc *qdisc;
qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
+ if (qdisc->enqueue)
+ *sync_needed = true;
qdisc_deactivate(qdisc);
- rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
+ rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
}
}
@@ -1357,24 +1354,22 @@ static bool some_qdisc_is_busy(struct net_device *dev)
*/
void dev_deactivate_many(struct list_head *head)
{
+ bool sync_needed = false;
struct net_device *dev;
list_for_each_entry(dev, head, close_list) {
netdev_for_each_tx_queue(dev, dev_deactivate_queue,
- &noop_qdisc);
+ &sync_needed);
if (dev_ingress_queue(dev))
dev_deactivate_queue(dev, dev_ingress_queue(dev),
- &noop_qdisc);
+ &sync_needed);
- dev_watchdog_down(dev);
+ netdev_watchdog_down(dev);
}
- /* Wait for outstanding qdisc-less dev_queue_xmit calls or
- * outstanding qdisc enqueuing calls.
- * This is avoided if all devices are in dismantle phase :
- * Caller will call synchronize_net() for us
- */
- synchronize_net();
+ /* Wait for outstanding qdisc enqueuing calls. */
+ if (sync_needed)
+ synchronize_net();
list_for_each_entry(dev, head, close_list) {
netdev_for_each_tx_queue(dev, dev_reset_queue, NULL);
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 8b9a1b96695e..29727ed1008e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -43,6 +43,7 @@
#include <net/addrconf.h>
#include <net/inet_common.h>
#include <net/inet_ecn.h>
+#include <net/inet_sock.h>
#include <net/udp_tunnel.h>
#include <net/inet_dscp.h>
@@ -427,16 +428,19 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
struct dst_entry *dst = NULL;
union sctp_addr *daddr = &t->ipaddr;
union sctp_addr dst_saddr;
- u8 tos = READ_ONCE(inet_sk(sk)->tos);
+ dscp_t dscp;
if (t->dscp & SCTP_DSCP_SET_MASK)
- tos = t->dscp & SCTP_DSCP_VAL_MASK;
+ dscp = inet_dsfield_to_dscp(t->dscp);
+ else
+ dscp = inet_sk_dscp(inet_sk(sk));
+
memset(&_fl, 0x0, sizeof(_fl));
fl4->daddr = daddr->v4.sin_addr.s_addr;
fl4->fl4_dport = daddr->v4.sin_port;
fl4->flowi4_proto = IPPROTO_SCTP;
if (asoc) {
- fl4->flowi4_tos = tos & INET_DSCP_MASK;
+ fl4->flowi4_tos = inet_dscp_to_dsfield(dscp);
fl4->flowi4_scope = ip_sock_rt_scope(asoc->base.sk);
fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if;
fl4->fl4_sport = htons(asoc->base.bind_addr.port);
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index e5a5af343c4c..8e1e97be4df7 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -387,7 +387,8 @@ static struct ctl_table sctp_net_table[] = {
static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net = current->nsproxy->net_ns;
+ struct net *net = container_of(ctl->data, struct net,
+ sctp.sctp_hmac_alg);
struct ctl_table tbl;
bool changed = false;
char *none = "none";
@@ -432,7 +433,7 @@ static int proc_sctp_do_hmac_alg(const struct ctl_table *ctl, int write,
static int proc_sctp_do_rto_min(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net = current->nsproxy->net_ns;
+ struct net *net = container_of(ctl->data, struct net, sctp.rto_min);
unsigned int min = *(unsigned int *) ctl->extra1;
unsigned int max = *(unsigned int *) ctl->extra2;
struct ctl_table tbl;
@@ -460,7 +461,7 @@ static int proc_sctp_do_rto_min(const struct ctl_table *ctl, int write,
static int proc_sctp_do_rto_max(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net = current->nsproxy->net_ns;
+ struct net *net = container_of(ctl->data, struct net, sctp.rto_max);
unsigned int min = *(unsigned int *) ctl->extra1;
unsigned int max = *(unsigned int *) ctl->extra2;
struct ctl_table tbl;
@@ -498,7 +499,7 @@ static int proc_sctp_do_alpha_beta(const struct ctl_table *ctl, int write,
static int proc_sctp_do_auth(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net = current->nsproxy->net_ns;
+ struct net *net = container_of(ctl->data, struct net, sctp.auth_enable);
struct ctl_table tbl;
int new_value, ret;
@@ -527,7 +528,7 @@ static int proc_sctp_do_auth(const struct ctl_table *ctl, int write,
static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net = current->nsproxy->net_ns;
+ struct net *net = container_of(ctl->data, struct net, sctp.udp_port);
unsigned int min = *(unsigned int *)ctl->extra1;
unsigned int max = *(unsigned int *)ctl->extra2;
struct ctl_table tbl;
@@ -568,7 +569,8 @@ static int proc_sctp_do_udp_port(const struct ctl_table *ctl, int write,
static int proc_sctp_do_probe_interval(const struct ctl_table *ctl, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net = current->nsproxy->net_ns;
+ struct net *net = container_of(ctl->data, struct net,
+ sctp.probe_interval);
struct ctl_table tbl;
int ret, new_value;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index c370efcfe3e8..ca6984541edb 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -2741,7 +2741,7 @@ int smc_accept(struct socket *sock, struct socket *new_sock,
release_sock(clcsk);
} else if (!atomic_read(&smc_sk(nsk)->conn.bytes_to_rcv)) {
lock_sock(nsk);
- smc_rx_wait(smc_sk(nsk), &timeo, smc_rx_data_available);
+ smc_rx_wait(smc_sk(nsk), &timeo, 0, smc_rx_data_available);
release_sock(nsk);
}
}
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index ccf57b7fe602..ac07b963aede 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -2155,7 +2155,7 @@ static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) {
size = min_t(int, PAGE_SIZE - offset, buf_size);
sg_set_page(sg, vmalloc_to_page(buf), size, offset);
- buf += size / sizeof(*buf);
+ buf += size;
buf_size -= size;
offset = 0;
}
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index f0cbe77a80b4..e7f1134453ef 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -197,7 +197,7 @@ static int smc_rx_splice(struct pipe_inode_info *pipe, char *src, size_t len,
partial[i].offset = offset;
partial[i].len = size;
partial[i].private = (unsigned long)priv[i];
- buf += size / sizeof(*buf);
+ buf += size;
left -= size;
offset = 0;
}
@@ -238,22 +238,23 @@ out:
return -ENOMEM;
}
-static int smc_rx_data_available_and_no_splice_pend(struct smc_connection *conn)
+static int smc_rx_data_available_and_no_splice_pend(struct smc_connection *conn, size_t peeked)
{
- return atomic_read(&conn->bytes_to_rcv) &&
+ return smc_rx_data_available(conn, peeked) &&
!atomic_read(&conn->splice_pending);
}
/* blocks rcvbuf consumer until >=len bytes available or timeout or interrupted
* @smc smc socket
* @timeo pointer to max seconds to wait, pointer to value 0 for no timeout
+ * @peeked number of bytes already peeked
* @fcrit add'l criterion to evaluate as function pointer
* Returns:
* 1 if at least 1 byte available in rcvbuf or if socket error/shutdown.
* 0 otherwise (nothing in rcvbuf nor timeout, e.g. interrupted).
*/
-int smc_rx_wait(struct smc_sock *smc, long *timeo,
- int (*fcrit)(struct smc_connection *conn))
+int smc_rx_wait(struct smc_sock *smc, long *timeo, size_t peeked,
+ int (*fcrit)(struct smc_connection *conn, size_t baseline))
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
struct smc_connection *conn = &smc->conn;
@@ -262,7 +263,7 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
struct sock *sk = &smc->sk;
int rc;
- if (fcrit(conn))
+ if (fcrit(conn, peeked))
return 1;
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
add_wait_queue(sk_sleep(sk), &wait);
@@ -271,7 +272,7 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
cflags->peer_conn_abort ||
READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN ||
conn->killed ||
- fcrit(conn),
+ fcrit(conn, peeked),
&wait);
remove_wait_queue(sk_sleep(sk), &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
@@ -322,11 +323,11 @@ static int smc_rx_recv_urg(struct smc_sock *smc, struct msghdr *msg, int len,
return -EAGAIN;
}
-static bool smc_rx_recvmsg_data_available(struct smc_sock *smc)
+static bool smc_rx_recvmsg_data_available(struct smc_sock *smc, size_t peeked)
{
struct smc_connection *conn = &smc->conn;
- if (smc_rx_data_available(conn))
+ if (smc_rx_data_available(conn, peeked))
return true;
else if (conn->urg_state == SMC_URG_VALID)
/* we received a single urgent Byte - skip */
@@ -344,10 +345,10 @@ static bool smc_rx_recvmsg_data_available(struct smc_sock *smc)
int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
struct pipe_inode_info *pipe, size_t len, int flags)
{
- size_t copylen, read_done = 0, read_remaining = len;
+ size_t copylen, read_done = 0, read_remaining = len, peeked_bytes = 0;
size_t chunk_len, chunk_off, chunk_len_sum;
struct smc_connection *conn = &smc->conn;
- int (*func)(struct smc_connection *conn);
+ int (*func)(struct smc_connection *conn, size_t baseline);
union smc_host_cursor cons;
int readable, chunk;
char *rcvbuf_base;
@@ -384,14 +385,14 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
if (conn->killed)
break;
- if (smc_rx_recvmsg_data_available(smc))
+ if (smc_rx_recvmsg_data_available(smc, peeked_bytes))
goto copy;
if (sk->sk_shutdown & RCV_SHUTDOWN) {
/* smc_cdc_msg_recv_action() could have run after
* above smc_rx_recvmsg_data_available()
*/
- if (smc_rx_recvmsg_data_available(smc))
+ if (smc_rx_recvmsg_data_available(smc, peeked_bytes))
goto copy;
break;
}
@@ -425,26 +426,28 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
}
}
- if (!smc_rx_data_available(conn)) {
- smc_rx_wait(smc, &timeo, smc_rx_data_available);
+ if (!smc_rx_data_available(conn, peeked_bytes)) {
+ smc_rx_wait(smc, &timeo, peeked_bytes, smc_rx_data_available);
continue;
}
copy:
/* initialize variables for 1st iteration of subsequent loop */
/* could be just 1 byte, even after waiting on data above */
- readable = atomic_read(&conn->bytes_to_rcv);
+ readable = smc_rx_data_available(conn, peeked_bytes);
splbytes = atomic_read(&conn->splice_pending);
if (!readable || (msg && splbytes)) {
if (splbytes)
func = smc_rx_data_available_and_no_splice_pend;
else
func = smc_rx_data_available;
- smc_rx_wait(smc, &timeo, func);
+ smc_rx_wait(smc, &timeo, peeked_bytes, func);
continue;
}
smc_curs_copy(&cons, &conn->local_tx_ctrl.cons, conn);
+ if ((flags & MSG_PEEK) && peeked_bytes)
+ smc_curs_add(conn->rmb_desc->len, &cons, peeked_bytes);
/* subsequent splice() calls pick up where previous left */
if (splbytes)
smc_curs_add(conn->rmb_desc->len, &cons, splbytes);
@@ -480,6 +483,8 @@ copy:
}
read_remaining -= chunk_len;
read_done += chunk_len;
+ if (flags & MSG_PEEK)
+ peeked_bytes += chunk_len;
if (chunk_len_sum == copylen)
break; /* either on 1st or 2nd iteration */
diff --git a/net/smc/smc_rx.h b/net/smc/smc_rx.h
index db823c97d824..994f5e42d1ba 100644
--- a/net/smc/smc_rx.h
+++ b/net/smc/smc_rx.h
@@ -21,11 +21,11 @@ void smc_rx_init(struct smc_sock *smc);
int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
struct pipe_inode_info *pipe, size_t len, int flags);
-int smc_rx_wait(struct smc_sock *smc, long *timeo,
- int (*fcrit)(struct smc_connection *conn));
-static inline int smc_rx_data_available(struct smc_connection *conn)
+int smc_rx_wait(struct smc_sock *smc, long *timeo, size_t peeked,
+ int (*fcrit)(struct smc_connection *conn, size_t baseline));
+static inline int smc_rx_data_available(struct smc_connection *conn, size_t peeked)
{
- return atomic_read(&conn->bytes_to_rcv);
+ return atomic_read(&conn->bytes_to_rcv) - peeked;
}
#endif /* SMC_RX_H */
diff --git a/net/socket.c b/net/socket.c
index 16402b8be5a7..4afe31656a2b 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -110,6 +110,8 @@
#include <linux/ptp_clock_kernel.h>
#include <trace/events/sock.h>
+#include "core/dev.h"
+
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
unsigned int sysctl_net_busy_poll __read_mostly;
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 9ee5a83c5b40..99ca4465f702 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -737,6 +737,10 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval,
else
ctx->rx_conf = conf;
update_sk_prot(sk, ctx);
+
+ if (update)
+ return 0;
+
if (tx) {
ctx->sk_write_space = sk->sk_write_space;
sk->sk_write_space = tls_write_space;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 47550d485819..914d4e1516a3 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -458,7 +458,7 @@ int tls_tx_records(struct sock *sk, int flags)
tx_err:
if (rc < 0 && rc != -EAGAIN)
- tls_err_abort(sk, -EBADMSG);
+ tls_err_abort(sk, rc);
return rc;
}
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index d7b16f2c23e9..f0157702718f 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -135,6 +135,22 @@ config NET_KEY_MIGRATE
If unsure, say N.
+config XFRM_IPTFS
+ tristate "IPsec IP-TFS/AGGFRAG (RFC 9347) encapsulation support"
+ depends on XFRM
+ help
+ Information on the IP-TFS/AGGFRAG encapsulation can be found
+ in RFC 9347. This feature supports demand driven (i.e.,
+ non-constant send rate) IP-TFS to take advantage of the
+ AGGFRAG ESP payload encapsulation. This payload type
+ supports aggregation and fragmentation of the inner IP
+ packet stream which in turn yields higher small-packet
+ bandwidth as well as reducing MTU/PMTU issues. Congestion
+ control is unimplementated as the send rate is demand driven
+ rather than constant.
+
+ If unsure, say N.
+
config XFRM_ESPINTCP
bool
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 512e0b2f8514..5a1787587cb3 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -21,5 +21,6 @@ obj-$(CONFIG_XFRM_USER) += xfrm_user.o
obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
+obj-$(CONFIG_XFRM_IPTFS) += xfrm_iptfs.o
obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o
diff --git a/net/xfrm/trace_iptfs.h b/net/xfrm/trace_iptfs.h
new file mode 100644
index 000000000000..74391ba24445
--- /dev/null
+++ b/net/xfrm/trace_iptfs.h
@@ -0,0 +1,218 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* xfrm_trace_iptfs.h
+ *
+ * August 12 2023, Christian Hopps <chopps@labn.net>
+ *
+ * Copyright (c) 2023, LabN Consulting, L.L.C.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM iptfs
+
+#if !defined(_TRACE_IPTFS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IPTFS_H
+
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/tracepoint.h>
+#include <net/ip.h>
+
+struct xfrm_iptfs_data;
+
+TRACE_EVENT(iptfs_egress_recv,
+ TP_PROTO(struct sk_buff *skb, struct xfrm_iptfs_data *xtfs, u16 blkoff),
+ TP_ARGS(skb, xtfs, blkoff),
+ TP_STRUCT__entry(__field(struct sk_buff *, skb)
+ __field(void *, head)
+ __field(void *, head_pg_addr)
+ __field(void *, pg0addr)
+ __field(u32, skb_len)
+ __field(u32, data_len)
+ __field(u32, headroom)
+ __field(u32, tailroom)
+ __field(u32, tail)
+ __field(u32, end)
+ __field(u32, pg0off)
+ __field(u8, head_frag)
+ __field(u8, frag_list)
+ __field(u8, nr_frags)
+ __field(u16, blkoff)),
+ TP_fast_assign(__entry->skb = skb;
+ __entry->head = skb->head;
+ __entry->skb_len = skb->len;
+ __entry->data_len = skb->data_len;
+ __entry->headroom = skb_headroom(skb);
+ __entry->tailroom = skb_tailroom(skb);
+ __entry->tail = (u32)skb->tail;
+ __entry->end = (u32)skb->end;
+ __entry->head_frag = skb->head_frag;
+ __entry->frag_list = (bool)skb_shinfo(skb)->frag_list;
+ __entry->nr_frags = skb_shinfo(skb)->nr_frags;
+ __entry->blkoff = blkoff;
+ __entry->head_pg_addr = page_address(virt_to_head_page(skb->head));
+ __entry->pg0addr = (__entry->nr_frags
+ ? page_address(netmem_to_page(skb_shinfo(skb)->frags[0].netmem))
+ : NULL);
+ __entry->pg0off = (__entry->nr_frags
+ ? skb_shinfo(skb)->frags[0].offset
+ : 0);
+ ),
+ TP_printk("EGRESS: skb=%p len=%u data_len=%u headroom=%u head_frag=%u frag_list=%u nr_frags=%u blkoff=%u\n\t\ttailroom=%u tail=%u end=%u head=%p hdpgaddr=%p pg0->addr=%p pg0->data=%p pg0->off=%u",
+ __entry->skb, __entry->skb_len, __entry->data_len, __entry->headroom,
+ __entry->head_frag, __entry->frag_list, __entry->nr_frags, __entry->blkoff,
+ __entry->tailroom, __entry->tail, __entry->end, __entry->head,
+ __entry->head_pg_addr, __entry->pg0addr, __entry->pg0addr + __entry->pg0off,
+ __entry->pg0off)
+ )
+
+DECLARE_EVENT_CLASS(iptfs_ingress_preq_event,
+ TP_PROTO(struct sk_buff *skb, struct xfrm_iptfs_data *xtfs,
+ u32 pmtu, u8 was_gso),
+ TP_ARGS(skb, xtfs, pmtu, was_gso),
+ TP_STRUCT__entry(__field(struct sk_buff *, skb)
+ __field(u32, skb_len)
+ __field(u32, data_len)
+ __field(u32, pmtu)
+ __field(u32, queue_size)
+ __field(u32, proto_seq)
+ __field(u8, proto)
+ __field(u8, was_gso)
+ ),
+ TP_fast_assign(__entry->skb = skb;
+ __entry->skb_len = skb->len;
+ __entry->data_len = skb->data_len;
+ __entry->queue_size =
+ xtfs->cfg.max_queue_size - xtfs->queue_size;
+ __entry->proto = __trace_ip_proto(ip_hdr(skb));
+ __entry->proto_seq = __trace_ip_proto_seq(ip_hdr(skb));
+ __entry->pmtu = pmtu;
+ __entry->was_gso = was_gso;
+ ),
+ TP_printk("INGRPREQ: skb=%p len=%u data_len=%u qsize=%u proto=%u proto_seq=%u pmtu=%u was_gso=%u",
+ __entry->skb, __entry->skb_len, __entry->data_len,
+ __entry->queue_size, __entry->proto, __entry->proto_seq,
+ __entry->pmtu, __entry->was_gso));
+
+DEFINE_EVENT(iptfs_ingress_preq_event, iptfs_enqueue,
+ TP_PROTO(struct sk_buff *skb, struct xfrm_iptfs_data *xtfs, u32 pmtu, u8 was_gso),
+ TP_ARGS(skb, xtfs, pmtu, was_gso));
+
+DEFINE_EVENT(iptfs_ingress_preq_event, iptfs_no_queue_space,
+ TP_PROTO(struct sk_buff *skb, struct xfrm_iptfs_data *xtfs, u32 pmtu, u8 was_gso),
+ TP_ARGS(skb, xtfs, pmtu, was_gso));
+
+DEFINE_EVENT(iptfs_ingress_preq_event, iptfs_too_big,
+ TP_PROTO(struct sk_buff *skb, struct xfrm_iptfs_data *xtfs, u32 pmtu, u8 was_gso),
+ TP_ARGS(skb, xtfs, pmtu, was_gso));
+
+DECLARE_EVENT_CLASS(iptfs_ingress_postq_event,
+ TP_PROTO(struct sk_buff *skb, u32 mtu, u16 blkoff, struct iphdr *iph),
+ TP_ARGS(skb, mtu, blkoff, iph),
+ TP_STRUCT__entry(__field(struct sk_buff *, skb)
+ __field(u32, skb_len)
+ __field(u32, data_len)
+ __field(u32, mtu)
+ __field(u32, proto_seq)
+ __field(u16, blkoff)
+ __field(u8, proto)),
+ TP_fast_assign(__entry->skb = skb;
+ __entry->skb_len = skb->len;
+ __entry->data_len = skb->data_len;
+ __entry->mtu = mtu;
+ __entry->blkoff = blkoff;
+ __entry->proto = iph ? __trace_ip_proto(iph) : 0;
+ __entry->proto_seq = iph ? __trace_ip_proto_seq(iph) : 0;
+ ),
+ TP_printk("INGRPSTQ: skb=%p len=%u data_len=%u mtu=%u blkoff=%u proto=%u proto_seq=%u",
+ __entry->skb, __entry->skb_len, __entry->data_len, __entry->mtu,
+ __entry->blkoff, __entry->proto, __entry->proto_seq));
+
+DEFINE_EVENT(iptfs_ingress_postq_event, iptfs_first_dequeue,
+ TP_PROTO(struct sk_buff *skb, u32 mtu, u16 blkoff,
+ struct iphdr *iph),
+ TP_ARGS(skb, mtu, blkoff, iph));
+
+DEFINE_EVENT(iptfs_ingress_postq_event, iptfs_first_fragmenting,
+ TP_PROTO(struct sk_buff *skb, u32 mtu, u16 blkoff,
+ struct iphdr *iph),
+ TP_ARGS(skb, mtu, blkoff, iph));
+
+DEFINE_EVENT(iptfs_ingress_postq_event, iptfs_first_final_fragment,
+ TP_PROTO(struct sk_buff *skb, u32 mtu, u16 blkoff,
+ struct iphdr *iph),
+ TP_ARGS(skb, mtu, blkoff, iph));
+
+DEFINE_EVENT(iptfs_ingress_postq_event, iptfs_first_toobig,
+ TP_PROTO(struct sk_buff *skb, u32 mtu, u16 blkoff,
+ struct iphdr *iph),
+ TP_ARGS(skb, mtu, blkoff, iph));
+
+TRACE_EVENT(iptfs_ingress_nth_peek,
+ TP_PROTO(struct sk_buff *skb, u32 remaining),
+ TP_ARGS(skb, remaining),
+ TP_STRUCT__entry(__field(struct sk_buff *, skb)
+ __field(u32, skb_len)
+ __field(u32, remaining)),
+ TP_fast_assign(__entry->skb = skb;
+ __entry->skb_len = skb->len;
+ __entry->remaining = remaining;
+ ),
+ TP_printk("INGRPSTQ: NTHPEEK: skb=%p len=%u remaining=%u",
+ __entry->skb, __entry->skb_len, __entry->remaining));
+
+TRACE_EVENT(iptfs_ingress_nth_add, TP_PROTO(struct sk_buff *skb, u8 share_ok),
+ TP_ARGS(skb, share_ok),
+ TP_STRUCT__entry(__field(struct sk_buff *, skb)
+ __field(u32, skb_len)
+ __field(u32, data_len)
+ __field(u8, share_ok)
+ __field(u8, head_frag)
+ __field(u8, pp_recycle)
+ __field(u8, cloned)
+ __field(u8, shared)
+ __field(u8, nr_frags)
+ __field(u8, frag_list)
+ ),
+ TP_fast_assign(__entry->skb = skb;
+ __entry->skb_len = skb->len;
+ __entry->data_len = skb->data_len;
+ __entry->share_ok = share_ok;
+ __entry->head_frag = skb->head_frag;
+ __entry->pp_recycle = skb->pp_recycle;
+ __entry->cloned = skb_cloned(skb);
+ __entry->shared = skb_shared(skb);
+ __entry->nr_frags = skb_shinfo(skb)->nr_frags;
+ __entry->frag_list = (bool)skb_shinfo(skb)->frag_list;
+ ),
+ TP_printk("INGRPSTQ: NTHADD: skb=%p len=%u data_len=%u share_ok=%u head_frag=%u pp_recycle=%u cloned=%u shared=%u nr_frags=%u frag_list=%u",
+ __entry->skb, __entry->skb_len, __entry->data_len, __entry->share_ok,
+ __entry->head_frag, __entry->pp_recycle, __entry->cloned, __entry->shared,
+ __entry->nr_frags, __entry->frag_list));
+
+DECLARE_EVENT_CLASS(iptfs_timer_event,
+ TP_PROTO(struct xfrm_iptfs_data *xtfs, u64 time_val),
+ TP_ARGS(xtfs, time_val),
+ TP_STRUCT__entry(__field(u64, time_val)
+ __field(u64, set_time)),
+ TP_fast_assign(__entry->time_val = time_val;
+ __entry->set_time = xtfs->iptfs_settime;
+ ),
+ TP_printk("TIMER: set_time=%llu time_val=%llu",
+ __entry->set_time, __entry->time_val));
+
+DEFINE_EVENT(iptfs_timer_event, iptfs_timer_start,
+ TP_PROTO(struct xfrm_iptfs_data *xtfs, u64 time_val),
+ TP_ARGS(xtfs, time_val));
+
+DEFINE_EVENT(iptfs_timer_event, iptfs_timer_expire,
+ TP_PROTO(struct xfrm_iptfs_data *xtfs, u64 time_val),
+ TP_ARGS(xtfs, time_val));
+
+#endif /* _TRACE_IPTFS_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../net/xfrm
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace_iptfs
+#include <trace/define_trace.h>
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index 5b9ee63e30b6..b8d2e6930041 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -284,9 +284,15 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src)
case XFRMA_SA_DIR:
case XFRMA_NAT_KEEPALIVE_INTERVAL:
case XFRMA_SA_PCPU:
+ case XFRMA_IPTFS_DROP_TIME:
+ case XFRMA_IPTFS_REORDER_WINDOW:
+ case XFRMA_IPTFS_DONT_FRAG:
+ case XFRMA_IPTFS_INIT_DELAY:
+ case XFRMA_IPTFS_MAX_QSIZE:
+ case XFRMA_IPTFS_PKT_SIZE:
return xfrm_nla_cpy(dst, src, nla_len(src));
default:
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_IPTFS_PKT_SIZE);
pr_warn_once("unsupported nla_type %d\n", src->nla_type);
return -EOPNOTSUPP;
}
@@ -441,7 +447,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
int err;
if (type > XFRMA_MAX) {
- BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU);
+ BUILD_BUG_ON(XFRMA_MAX != XFRMA_IPTFS_PKT_SIZE);
NL_SET_ERR_MSG(extack, "Bad attribute");
return -EOPNOTSUPP;
}
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index b33c4591e09a..d1fa94e52cea 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -42,7 +42,8 @@ static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb,
skb->transport_header = skb->network_header + hsize;
skb_reset_mac_len(skb);
- pskb_pull(skb, skb->mac_len + x->props.header_len);
+ pskb_pull(skb,
+ skb->mac_len + x->props.header_len - x->props.enc_hdr_len);
}
static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb,
@@ -68,6 +69,7 @@ static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb,
static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb)
{
switch (x->outer_mode.encap) {
+ case XFRM_MODE_IPTFS:
case XFRM_MODE_TUNNEL:
if (x->outer_mode.family == AF_INET)
return __xfrm_mode_tunnel_prep(x, skb,
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 841a60a6fbfe..7e6a71b9d6a3 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -48,7 +48,7 @@ static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
static struct xfrm_input_afinfo const __rcu *xfrm_input_afinfo[2][AF_INET6 + 1];
static struct gro_cells gro_cells;
-static struct net_device xfrm_napi_dev;
+static struct net_device *xfrm_napi_dev;
static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
@@ -446,6 +446,9 @@ static int xfrm_inner_mode_input(struct xfrm_state *x,
WARN_ON_ONCE(1);
break;
default:
+ if (x->mode_cbs && x->mode_cbs->input)
+ return x->mode_cbs->input(x, skb);
+
WARN_ON_ONCE(1);
break;
}
@@ -453,6 +456,10 @@ static int xfrm_inner_mode_input(struct xfrm_state *x,
return -EOPNOTSUPP;
}
+/* NOTE: encap_type - In addition to the normal (non-negative) values for
+ * encap_type, a negative value of -1 or -2 can be used to resume/restart this
+ * function after a previous invocation early terminated for async operation.
+ */
int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
{
const struct xfrm_state_afinfo *afinfo;
@@ -489,6 +496,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
family = x->props.family;
+ /* An encap_type of -2 indicates reconstructed inner packet */
+ if (encap_type == -2)
+ goto resume_decapped;
+
/* An encap_type of -1 indicates async resumption. */
if (encap_type == -1) {
async = 1;
@@ -679,11 +690,14 @@ resume:
XFRM_MODE_SKB_CB(skb)->protocol = nexthdr;
- if (xfrm_inner_mode_input(x, skb)) {
+ err = xfrm_inner_mode_input(x, skb);
+ if (err == -EINPROGRESS)
+ return 0;
+ else if (err) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
goto drop;
}
-
+resume_decapped:
if (x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL) {
decaps = 1;
break;
@@ -811,8 +825,11 @@ void __init xfrm_input_init(void)
int err;
int i;
- init_dummy_netdev(&xfrm_napi_dev);
- err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
+ xfrm_napi_dev = alloc_netdev_dummy(0);
+ if (!xfrm_napi_dev)
+ panic("Failed to allocate XFRM dummy netdev\n");
+
+ err = gro_cells_init(&gro_cells, xfrm_napi_dev);
if (err)
gro_cells.cells = NULL;
diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c
new file mode 100644
index 000000000000..755f1eea8bfa
--- /dev/null
+++ b/net/xfrm/xfrm_iptfs.c
@@ -0,0 +1,2764 @@
+// SPDX-License-Identifier: GPL-2.0
+/* xfrm_iptfs: IPTFS encapsulation support
+ *
+ * April 21 2022, Christian Hopps <chopps@labn.net>
+ *
+ * Copyright (c) 2022, LabN Consulting, L.L.C.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/icmpv6.h>
+#include <linux/skbuff_ref.h>
+#include <net/gro.h>
+#include <net/icmp.h>
+#include <net/ip6_route.h>
+#include <net/inet_ecn.h>
+#include <net/xfrm.h>
+
+#include <crypto/aead.h>
+
+#include "xfrm_inout.h"
+#include "trace_iptfs.h"
+
+/* IPTFS encap (header) values. */
+#define IPTFS_SUBTYPE_BASIC 0
+#define IPTFS_SUBTYPE_CC 1
+
+/* ----------------------------------------------- */
+/* IP-TFS default SA values (tunnel egress/dir-in) */
+/* ----------------------------------------------- */
+
+/**
+ * define IPTFS_DEFAULT_DROP_TIME_USECS - default drop time
+ *
+ * The default IPTFS drop time in microseconds. The drop time is the amount of
+ * time before a missing out-of-order IPTFS tunnel packet is considered lost.
+ * See also the reorder window.
+ *
+ * Default 1s.
+ */
+#define IPTFS_DEFAULT_DROP_TIME_USECS 1000000
+
+/**
+ * define IPTFS_DEFAULT_REORDER_WINDOW - default reorder window size
+ *
+ * The default IPTFS reorder window size. The reorder window size dictates the
+ * maximum number of IPTFS tunnel packets in a sequence that may arrive out of
+ * order.
+ *
+ * Default 3. (tcp folks suggested)
+ */
+#define IPTFS_DEFAULT_REORDER_WINDOW 3
+
+/* ------------------------------------------------ */
+/* IPTFS default SA values (tunnel ingress/dir-out) */
+/* ------------------------------------------------ */
+
+/**
+ * define IPTFS_DEFAULT_INIT_DELAY_USECS - default initial output delay
+ *
+ * The initial output delay is the amount of time prior to servicing the output
+ * queue after queueing the first packet on said queue. This applies anytime the
+ * output queue was previously empty.
+ *
+ * Default 0.
+ */
+#define IPTFS_DEFAULT_INIT_DELAY_USECS 0
+
+/**
+ * define IPTFS_DEFAULT_MAX_QUEUE_SIZE - default max output queue size.
+ *
+ * The default IPTFS max output queue size in octets. The output queue is where
+ * received packets destined for output over an IPTFS tunnel are stored prior to
+ * being output in aggregated/fragmented form over the IPTFS tunnel.
+ *
+ * Default 1M.
+ */
+#define IPTFS_DEFAULT_MAX_QUEUE_SIZE (1024 * 10240)
+
+/* Assumed: skb->head is cache aligned.
+ *
+ * L2 Header resv: Arrange for cacheline to start at skb->data - 16 to keep the
+ * to-be-pushed L2 header in the same cacheline as resulting `skb->data` (i.e.,
+ * the L3 header). If cacheline size is > 64 then skb->data + pushed L2 will all
+ * be in a single cacheline if we simply reserve 64 bytes.
+ *
+ * L3 Header resv: For L3+L2 headers (i.e., skb->data points at the IPTFS payload)
+ * we want `skb->data` to be cacheline aligned and all pushed L2L3 headers will
+ * be in their own cacheline[s]. 128 works for cachelins up to 128 bytes, for
+ * any larger cacheline sizes the pushed headers will simply share the cacheline
+ * with the start of the IPTFS payload (skb->data).
+ */
+#define XFRM_IPTFS_MIN_L3HEADROOM 128
+#define XFRM_IPTFS_MIN_L2HEADROOM (L1_CACHE_BYTES > 64 ? 64 : 64 + 16)
+
+/* Min to try to share outer iptfs skb data vs copying into new skb */
+#define IPTFS_PKT_SHARE_MIN 129
+
+#define NSECS_IN_USEC 1000
+
+#define IPTFS_HRTIMER_MODE HRTIMER_MODE_REL_SOFT
+
+/**
+ * struct xfrm_iptfs_config - configuration for the IPTFS tunnel.
+ * @pkt_size: size of the outer IP packet. 0 to use interface and MTU discovery,
+ * otherwise the user specified value.
+ * @max_queue_size: The maximum number of octets allowed to be queued to be sent
+ * over the IPTFS SA. The queue size is measured as the size of all the
+ * packets enqueued.
+ * @reorder_win_size: the number slots in the reorder window, thus the number of
+ * packets that may arrive out of order.
+ * @dont_frag: true to inhibit fragmenting across IPTFS outer packets.
+ */
+struct xfrm_iptfs_config {
+ u32 pkt_size; /* outer_packet_size or 0 */
+ u32 max_queue_size; /* octets */
+ u16 reorder_win_size;
+ u8 dont_frag : 1;
+};
+
+struct skb_wseq {
+ struct sk_buff *skb;
+ u64 drop_time;
+};
+
+/**
+ * struct xfrm_iptfs_data - mode specific xfrm state.
+ * @cfg: IPTFS tunnel config.
+ * @x: owning SA (xfrm_state).
+ * @queue: queued user packets to send.
+ * @queue_size: number of octets on queue (sum of packet sizes).
+ * @ecn_queue_size: octets above with ECN mark.
+ * @init_delay_ns: nanoseconds to wait to send initial IPTFS packet.
+ * @iptfs_timer: output timer.
+ * @iptfs_settime: time the output timer was set.
+ * @payload_mtu: max payload size.
+ * @w_seq_set: true after first seq received.
+ * @w_wantseq: waiting for this seq number as next to process (in order).
+ * @w_saved: the saved buf array (reorder window).
+ * @w_savedlen: the saved len (not size).
+ * @drop_lock: lock to protect reorder queue.
+ * @drop_timer: timer for considering next packet lost.
+ * @drop_time_ns: timer intervan in nanoseconds.
+ * @ra_newskb: new pkt being reassembled.
+ * @ra_wantseq: expected next sequence for reassembly.
+ * @ra_runt: last pkt bytes from very end of last skb.
+ * @ra_runtlen: size of ra_runt.
+ */
+struct xfrm_iptfs_data {
+ struct xfrm_iptfs_config cfg;
+
+ /* Ingress User Input */
+ struct xfrm_state *x; /* owning state */
+ struct sk_buff_head queue; /* output queue */
+
+ u32 queue_size; /* octets */
+ u32 ecn_queue_size; /* octets above which ECN mark */
+ u64 init_delay_ns; /* nanoseconds */
+ struct hrtimer iptfs_timer; /* output timer */
+ time64_t iptfs_settime; /* time timer was set */
+ u32 payload_mtu; /* max payload size */
+
+ /* Tunnel input reordering */
+ bool w_seq_set; /* true after first seq received */
+ u64 w_wantseq; /* expected next sequence */
+ struct skb_wseq *w_saved; /* the saved buf array */
+ u32 w_savedlen; /* the saved len (not size) */
+ spinlock_t drop_lock;
+ struct hrtimer drop_timer;
+ u64 drop_time_ns;
+
+ /* Tunnel input reassembly */
+ struct sk_buff *ra_newskb; /* new pkt being reassembled */
+ u64 ra_wantseq; /* expected next sequence */
+ u8 ra_runt[6]; /* last pkt bytes from last skb */
+ u8 ra_runtlen; /* count of ra_runt */
+};
+
+static u32 __iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu);
+static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me);
+static enum hrtimer_restart iptfs_drop_timer(struct hrtimer *me);
+
+/* ================= */
+/* Utility Functions */
+/* ================= */
+
+#ifdef TRACEPOINTS_ENABLED
+static u32 __trace_ip_proto(struct iphdr *iph)
+{
+ if (iph->version == 4)
+ return iph->protocol;
+ return ((struct ipv6hdr *)iph)->nexthdr;
+}
+
+static u32 __trace_ip_proto_seq(struct iphdr *iph)
+{
+ void *nexthdr;
+ u32 protocol = 0;
+
+ if (iph->version == 4) {
+ nexthdr = (void *)(iph + 1);
+ protocol = iph->protocol;
+ } else if (iph->version == 6) {
+ nexthdr = (void *)(((struct ipv6hdr *)(iph)) + 1);
+ protocol = ((struct ipv6hdr *)(iph))->nexthdr;
+ }
+ switch (protocol) {
+ case IPPROTO_ICMP:
+ return ntohs(((struct icmphdr *)nexthdr)->un.echo.sequence);
+ case IPPROTO_ICMPV6:
+ return ntohs(((struct icmp6hdr *)nexthdr)->icmp6_sequence);
+ case IPPROTO_TCP:
+ return ntohl(((struct tcphdr *)nexthdr)->seq);
+ case IPPROTO_UDP:
+ return ntohs(((struct udphdr *)nexthdr)->source);
+ default:
+ return 0;
+ }
+}
+#endif /*TRACEPOINTS_ENABLED*/
+
+static u64 __esp_seq(struct sk_buff *skb)
+{
+ u64 seq = ntohl(XFRM_SKB_CB(skb)->seq.input.low);
+
+ return seq | (u64)ntohl(XFRM_SKB_CB(skb)->seq.input.hi) << 32;
+}
+
+/* ======================= */
+/* IPTFS SK_BUFF Functions */
+/* ======================= */
+
+/**
+ * iptfs_alloc_skb() - Allocate a new `skb`.
+ * @tpl: the skb to copy required meta-data from.
+ * @len: the linear length of the head data, zero is fine.
+ * @l3resv: true if skb reserve needs to support pushing L3 headers
+ *
+ * A new `skb` is allocated and required meta-data is copied from `tpl`, the
+ * head data is sized to `len` + reserved space set according to the @l3resv
+ * boolean.
+ *
+ * When @l3resv is false, resv is XFRM_IPTFS_MIN_L2HEADROOM which arranges for
+ * `skb->data - 16` which is a good guess for good cache alignment (placing the
+ * to be pushed L2 header at the start of a cacheline.
+ *
+ * Otherwise, @l3resv is true and resv is set to the correct reserved space for
+ * dst->dev plus the calculated L3 overhead for the xfrm dst or
+ * XFRM_IPTFS_MIN_L3HEADROOM whichever is larger. This is then cache aligned so
+ * that all the headers will commonly fall in a cacheline when possible.
+ *
+ * l3resv=true is used on tunnel ingress (tx), because we need to reserve for
+ * the new IPTFS packet (i.e., L2+L3 headers). On tunnel egress (rx) the data
+ * being copied into the skb includes the user L3 headers already so we only
+ * need to reserve for L2.
+ *
+ * Return: the new skb or NULL.
+ */
+static struct sk_buff *iptfs_alloc_skb(struct sk_buff *tpl, u32 len, bool l3resv)
+{
+ struct sk_buff *skb;
+ u32 resv;
+
+ if (!l3resv) {
+ resv = XFRM_IPTFS_MIN_L2HEADROOM;
+ } else {
+ struct dst_entry *dst = skb_dst(tpl);
+
+ resv = LL_RESERVED_SPACE(dst->dev) + dst->header_len;
+ resv = max(resv, XFRM_IPTFS_MIN_L3HEADROOM);
+ resv = L1_CACHE_ALIGN(resv);
+ }
+
+ skb = alloc_skb(len + resv, GFP_ATOMIC | __GFP_NOWARN);
+ if (!skb)
+ return NULL;
+
+ skb_reserve(skb, resv);
+
+ if (!l3resv) {
+ /* xfrm_input resume needs dev and xfrm ext from tunnel pkt */
+ skb->dev = tpl->dev;
+ __skb_ext_copy(skb, tpl);
+ }
+
+ /* dropped by xfrm_input, used by xfrm_output */
+ skb_dst_copy(skb, tpl);
+
+ return skb;
+}
+
+/**
+ * iptfs_skb_head_to_frag() - initialize a skb_frag_t based on skb head data
+ * @skb: skb with the head data
+ * @frag: frag to initialize
+ */
+static void iptfs_skb_head_to_frag(const struct sk_buff *skb, skb_frag_t *frag)
+{
+ struct page *page = virt_to_head_page(skb->data);
+ unsigned char *addr = (unsigned char *)page_address(page);
+
+ skb_frag_fill_page_desc(frag, page, skb->data - addr, skb_headlen(skb));
+}
+
+/**
+ * struct iptfs_skb_frag_walk - use to track a walk through fragments
+ * @fragi: current fragment index
+ * @past: length of data in fragments before @fragi
+ * @total: length of data in all fragments
+ * @nr_frags: number of fragments present in array
+ * @initial_offset: the value passed in to skb_prepare_frag_walk()
+ * @frags: the page fragments inc. room for head page
+ * @pp_recycle: copy of skb->pp_recycle
+ */
+struct iptfs_skb_frag_walk {
+ u32 fragi;
+ u32 past;
+ u32 total;
+ u32 nr_frags;
+ u32 initial_offset;
+ skb_frag_t frags[MAX_SKB_FRAGS + 1];
+ bool pp_recycle;
+};
+
+/**
+ * iptfs_skb_prepare_frag_walk() - initialize a frag walk over an skb.
+ * @skb: the skb to walk.
+ * @initial_offset: start the walk @initial_offset into the skb.
+ * @walk: the walk to initialize
+ *
+ * Future calls to skb_add_frags() will expect the @offset value to be at
+ * least @initial_offset large.
+ */
+static void iptfs_skb_prepare_frag_walk(struct sk_buff *skb, u32 initial_offset,
+ struct iptfs_skb_frag_walk *walk)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ skb_frag_t *frag, *from;
+ u32 i;
+
+ walk->initial_offset = initial_offset;
+ walk->fragi = 0;
+ walk->past = 0;
+ walk->total = 0;
+ walk->nr_frags = 0;
+ walk->pp_recycle = skb->pp_recycle;
+
+ if (skb->head_frag) {
+ if (initial_offset >= skb_headlen(skb)) {
+ initial_offset -= skb_headlen(skb);
+ } else {
+ frag = &walk->frags[walk->nr_frags++];
+ iptfs_skb_head_to_frag(skb, frag);
+ frag->offset += initial_offset;
+ frag->len -= initial_offset;
+ walk->total += frag->len;
+ initial_offset = 0;
+ }
+ } else {
+ initial_offset -= skb_headlen(skb);
+ }
+
+ for (i = 0; i < shinfo->nr_frags; i++) {
+ from = &shinfo->frags[i];
+ if (initial_offset >= from->len) {
+ initial_offset -= from->len;
+ continue;
+ }
+ frag = &walk->frags[walk->nr_frags++];
+ *frag = *from;
+ if (initial_offset) {
+ frag->offset += initial_offset;
+ frag->len -= initial_offset;
+ initial_offset = 0;
+ }
+ walk->total += frag->len;
+ }
+}
+
+static u32 iptfs_skb_reset_frag_walk(struct iptfs_skb_frag_walk *walk,
+ u32 offset)
+{
+ /* Adjust offset to refer to internal walk values */
+ offset -= walk->initial_offset;
+
+ /* Get to the correct fragment for offset */
+ while (offset < walk->past) {
+ walk->past -= walk->frags[--walk->fragi].len;
+ if (offset >= walk->past)
+ break;
+ }
+ while (offset >= walk->past + walk->frags[walk->fragi].len)
+ walk->past += walk->frags[walk->fragi++].len;
+
+ /* offset now relative to this current frag */
+ offset -= walk->past;
+ return offset;
+}
+
+/**
+ * iptfs_skb_can_add_frags() - check if ok to add frags from walk to skb
+ * @skb: skb to check for adding frags to
+ * @walk: the walk that will be used as source for frags.
+ * @offset: offset from beginning of original skb to start from.
+ * @len: amount of data to add frag references to in @skb.
+ *
+ * Return: true if ok to add frags.
+ */
+static bool iptfs_skb_can_add_frags(const struct sk_buff *skb,
+ struct iptfs_skb_frag_walk *walk,
+ u32 offset, u32 len)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ u32 fragi, nr_frags, fraglen;
+
+ if (skb_has_frag_list(skb) || skb->pp_recycle != walk->pp_recycle)
+ return false;
+
+ /* Make offset relative to current frag after setting that */
+ offset = iptfs_skb_reset_frag_walk(walk, offset);
+
+ /* Verify we have array space for the fragments we need to add */
+ fragi = walk->fragi;
+ nr_frags = shinfo->nr_frags;
+ while (len && fragi < walk->nr_frags) {
+ skb_frag_t *frag = &walk->frags[fragi];
+
+ fraglen = frag->len;
+ if (offset) {
+ fraglen -= offset;
+ offset = 0;
+ }
+ if (++nr_frags > MAX_SKB_FRAGS)
+ return false;
+ if (len <= fraglen)
+ return true;
+ len -= fraglen;
+ fragi++;
+ }
+ /* We may not copy all @len but what we have will fit. */
+ return true;
+}
+
+/**
+ * iptfs_skb_add_frags() - add a range of fragment references into an skb
+ * @skb: skb to add references into
+ * @walk: the walk to add referenced fragments from.
+ * @offset: offset from beginning of original skb to start from.
+ * @len: amount of data to add frag references to in @skb.
+ *
+ * iptfs_skb_can_add_frags() should be called before this function to verify
+ * that the destination @skb is compatible with the walk and has space in the
+ * array for the to be added frag references.
+ *
+ * Return: The number of bytes not added to @skb b/c we reached the end of the
+ * walk before adding all of @len.
+ */
+static int iptfs_skb_add_frags(struct sk_buff *skb,
+ struct iptfs_skb_frag_walk *walk, u32 offset,
+ u32 len)
+{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
+ u32 fraglen;
+
+ if (!walk->nr_frags || offset >= walk->total + walk->initial_offset)
+ return len;
+
+ /* make offset relative to current frag after setting that */
+ offset = iptfs_skb_reset_frag_walk(walk, offset);
+
+ while (len && walk->fragi < walk->nr_frags) {
+ skb_frag_t *frag = &walk->frags[walk->fragi];
+ skb_frag_t *tofrag = &shinfo->frags[shinfo->nr_frags];
+
+ *tofrag = *frag;
+ if (offset) {
+ tofrag->offset += offset;
+ tofrag->len -= offset;
+ offset = 0;
+ }
+ __skb_frag_ref(tofrag);
+ shinfo->nr_frags++;
+
+ /* see if we are done */
+ fraglen = tofrag->len;
+ if (len < fraglen) {
+ tofrag->len = len;
+ skb->len += len;
+ skb->data_len += len;
+ return 0;
+ }
+ /* advance to next source fragment */
+ len -= fraglen; /* careful, use dst bv_len */
+ skb->len += fraglen; /* careful, " " " */
+ skb->data_len += fraglen; /* careful, " " " */
+ walk->past += frag->len; /* careful, use src bv_len */
+ walk->fragi++;
+ }
+ return len;
+}
+
+/* ================================== */
+/* IPTFS Trace Event Definitions */
+/* ================================== */
+
+#define CREATE_TRACE_POINTS
+#include "trace_iptfs.h"
+
+/* ================================== */
+/* IPTFS Receiving (egress) Functions */
+/* ================================== */
+
+/**
+ * iptfs_pskb_add_frags() - Create and add frags into a new sk_buff.
+ * @tpl: template to create new skb from.
+ * @walk: The source for fragments to add.
+ * @off: The offset into @walk to add frags from, also used with @st and
+ * @copy_len.
+ * @len: The length of data to add covering frags from @walk into @skb.
+ * This must be <= @skblen.
+ * @st: The sequence state to copy from into the new head skb.
+ * @copy_len: Copy @copy_len bytes from @st at offset @off into the new skb
+ * linear space.
+ *
+ * Create a new sk_buff `skb` using the template @tpl. Copy @copy_len bytes from
+ * @st into the new skb linear space, and then add shared fragments from the
+ * frag walk for the remaining @len of data (i.e., @len - @copy_len bytes).
+ *
+ * Return: The newly allocated sk_buff `skb` or NULL if an error occurs.
+ */
+static struct sk_buff *
+iptfs_pskb_add_frags(struct sk_buff *tpl, struct iptfs_skb_frag_walk *walk,
+ u32 off, u32 len, struct skb_seq_state *st, u32 copy_len)
+{
+ struct sk_buff *skb;
+
+ skb = iptfs_alloc_skb(tpl, copy_len, false);
+ if (!skb)
+ return NULL;
+
+ /* this should not normally be happening */
+ if (!iptfs_skb_can_add_frags(skb, walk, off + copy_len,
+ len - copy_len)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ if (copy_len &&
+ skb_copy_seq_read(st, off, skb_put(skb, copy_len), copy_len)) {
+ XFRM_INC_STATS(dev_net(st->root_skb->dev),
+ LINUX_MIB_XFRMINERROR);
+ kfree_skb(skb);
+ return NULL;
+ }
+
+ iptfs_skb_add_frags(skb, walk, off + copy_len, len - copy_len);
+ return skb;
+}
+
+/**
+ * iptfs_pskb_extract_seq() - Create and load data into a new sk_buff.
+ * @skblen: the total data size for `skb`.
+ * @st: The source for the rest of the data to copy into `skb`.
+ * @off: The offset into @st to copy data from.
+ * @len: The length of data to copy from @st into `skb`. This must be <=
+ * @skblen.
+ *
+ * Create a new sk_buff `skb` with @skblen of packet data space. If non-zero,
+ * copy @rlen bytes of @runt into `skb`. Then using seq functions copy @len
+ * bytes from @st into `skb` starting from @off.
+ *
+ * It is an error for @len to be greater than the amount of data left in @st.
+ *
+ * Return: The newly allocated sk_buff `skb` or NULL if an error occurs.
+ */
+static struct sk_buff *
+iptfs_pskb_extract_seq(u32 skblen, struct skb_seq_state *st, u32 off, int len)
+{
+ struct sk_buff *skb = iptfs_alloc_skb(st->root_skb, skblen, false);
+
+ if (!skb)
+ return NULL;
+ if (skb_copy_seq_read(st, off, skb_put(skb, len), len)) {
+ XFRM_INC_STATS(dev_net(st->root_skb->dev), LINUX_MIB_XFRMINERROR);
+ kfree_skb(skb);
+ return NULL;
+ }
+ return skb;
+}
+
+/**
+ * iptfs_input_save_runt() - save data in xtfs runt space.
+ * @xtfs: xtfs state
+ * @seq: the current sequence
+ * @buf: packet data
+ * @len: length of packet data
+ *
+ * Save the small (`len`) start of a fragmented packet in `buf` in the xtfs data
+ * runt space.
+ */
+static void iptfs_input_save_runt(struct xfrm_iptfs_data *xtfs, u64 seq,
+ u8 *buf, int len)
+{
+ memcpy(xtfs->ra_runt, buf, len);
+
+ xtfs->ra_runtlen = len;
+ xtfs->ra_wantseq = seq + 1;
+}
+
+/**
+ * __iptfs_iphlen() - return the v4/v6 header length using packet data.
+ * @data: pointer at octet with version nibble
+ *
+ * The version data has been checked to be valid (i.e., either 4 or 6).
+ *
+ * Return: the IP header size based on the IP version.
+ */
+static u32 __iptfs_iphlen(u8 *data)
+{
+ struct iphdr *iph = (struct iphdr *)data;
+
+ if (iph->version == 0x4)
+ return sizeof(*iph);
+ return sizeof(struct ipv6hdr);
+}
+
+/**
+ * __iptfs_iplen() - return the v4/v6 length using packet data.
+ * @data: pointer to ip (v4/v6) packet header
+ *
+ * Grab the IPv4 or IPv6 length value in the start of the inner packet header
+ * pointed to by `data`. Assumes data len is enough for the length field only.
+ *
+ * The version data has been checked to be valid (i.e., either 4 or 6).
+ *
+ * Return: the length value.
+ */
+static u32 __iptfs_iplen(u8 *data)
+{
+ struct iphdr *iph = (struct iphdr *)data;
+
+ if (iph->version == 0x4)
+ return ntohs(iph->tot_len);
+ return ntohs(((struct ipv6hdr *)iph)->payload_len) +
+ sizeof(struct ipv6hdr);
+}
+
+/**
+ * iptfs_complete_inner_skb() - finish preparing the inner packet for gro recv.
+ * @x: xfrm state
+ * @skb: the inner packet
+ *
+ * Finish the standard xfrm processing on the inner packet prior to sending back
+ * through gro_cells_receive. We do this separately b/c we are building a list
+ * of packets in the hopes that one day a list will be taken by
+ * xfrm_input.
+ */
+static void iptfs_complete_inner_skb(struct xfrm_state *x, struct sk_buff *skb)
+{
+ skb_reset_network_header(skb);
+
+ /* The packet is going back through gro_cells_receive no need to
+ * set this.
+ */
+ skb_reset_transport_header(skb);
+
+ /* Packet already has checksum value set. */
+ skb->ip_summed = CHECKSUM_NONE;
+
+ /* Our skb will contain the header data copied when this outer packet
+ * which contained the start of this inner packet. This is true
+ * when we allocate a new skb as well as when we reuse the existing skb.
+ */
+ if (ip_hdr(skb)->version == 0x4) {
+ struct iphdr *iph = ip_hdr(skb);
+
+ if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+ ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, iph);
+ if (!(x->props.flags & XFRM_STATE_NOECN))
+ if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
+ IP_ECN_set_ce(iph);
+
+ skb->protocol = htons(ETH_P_IP);
+ } else {
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+
+ if (x->props.flags & XFRM_STATE_DECAP_DSCP)
+ ipv6_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, iph);
+ if (!(x->props.flags & XFRM_STATE_NOECN))
+ if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
+ IP6_ECN_set_ce(skb, iph);
+
+ skb->protocol = htons(ETH_P_IPV6);
+ }
+}
+
+static void __iptfs_reassem_done(struct xfrm_iptfs_data *xtfs, bool free)
+{
+ assert_spin_locked(&xtfs->drop_lock);
+
+ /* We don't care if it works locking takes care of things */
+ hrtimer_try_to_cancel(&xtfs->drop_timer);
+ if (free)
+ kfree_skb(xtfs->ra_newskb);
+ xtfs->ra_newskb = NULL;
+}
+
+/**
+ * iptfs_reassem_abort() - In-progress packet is aborted free the state.
+ * @xtfs: xtfs state
+ */
+static void iptfs_reassem_abort(struct xfrm_iptfs_data *xtfs)
+{
+ __iptfs_reassem_done(xtfs, true);
+}
+
+/**
+ * iptfs_reassem_done() - In-progress packet is complete, clear the state.
+ * @xtfs: xtfs state
+ */
+static void iptfs_reassem_done(struct xfrm_iptfs_data *xtfs)
+{
+ __iptfs_reassem_done(xtfs, false);
+}
+
+/**
+ * iptfs_reassem_cont() - Continue the reassembly of an inner packets.
+ * @xtfs: xtfs state
+ * @seq: sequence of current packet
+ * @st: seq read stat for current packet
+ * @skb: current packet
+ * @data: offset into sequential packet data
+ * @blkoff: packet blkoff value
+ * @list: list of skbs to enqueue completed packet on
+ *
+ * Process an IPTFS payload that has a non-zero `blkoff` or when we are
+ * expecting the continuation b/c we have a runt or in-progress packet.
+ *
+ * Return: the new data offset to continue processing from.
+ */
+static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq,
+ struct skb_seq_state *st, struct sk_buff *skb,
+ u32 data, u32 blkoff, struct list_head *list)
+{
+ struct iptfs_skb_frag_walk _fragwalk;
+ struct iptfs_skb_frag_walk *fragwalk = NULL;
+ struct sk_buff *newskb = xtfs->ra_newskb;
+ u32 remaining = skb->len - data;
+ u32 runtlen = xtfs->ra_runtlen;
+ u32 copylen, fraglen, ipremain, iphlen, iphremain, rrem;
+
+ /* Handle packet fragment we aren't expecting */
+ if (!runtlen && !xtfs->ra_newskb)
+ return data + min(blkoff, remaining);
+
+ /* Important to remember that input to this function is an ordered
+ * packet stream (unless the user disabled the reorder window). Thus if
+ * we are waiting for, and expecting the next packet so we can continue
+ * assembly, a newer sequence number indicates older ones are not coming
+ * (or if they do should be ignored). Technically we can receive older
+ * ones when the reorder window is disabled; however, the user should
+ * have disabled fragmentation in this case, and regardless we don't
+ * deal with it.
+ *
+ * blkoff could be zero if the stream is messed up (or it's an all pad
+ * insertion) be careful to handle that case in each of the below
+ */
+
+ /* Too old case: This can happen when the reorder window is disabled so
+ * ordering isn't actually guaranteed.
+ */
+ if (seq < xtfs->ra_wantseq)
+ return data + remaining;
+
+ /* Too new case: We missed what we wanted cleanup. */
+ if (seq > xtfs->ra_wantseq) {
+ XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMINIPTFSERROR);
+ goto abandon;
+ }
+
+ if (blkoff == 0) {
+ if ((*skb->data & 0xF0) != 0) {
+ XFRM_INC_STATS(xs_net(xtfs->x),
+ LINUX_MIB_XFRMINIPTFSERROR);
+ goto abandon;
+ }
+ /* Handle all pad case, advance expected sequence number.
+ * (RFC 9347 S2.2.3)
+ */
+ xtfs->ra_wantseq++;
+ /* will end parsing */
+ return data + remaining;
+ }
+
+ if (runtlen) {
+ /* Regardless of what happens we're done with the runt */
+ xtfs->ra_runtlen = 0;
+
+ /* The start of this inner packet was at the very end of the last
+ * iptfs payload which didn't include enough for the ip header
+ * length field. We must have *at least* that now.
+ */
+ rrem = sizeof(xtfs->ra_runt) - runtlen;
+ if (remaining < rrem || blkoff < rrem) {
+ XFRM_INC_STATS(xs_net(xtfs->x),
+ LINUX_MIB_XFRMINIPTFSERROR);
+ goto abandon;
+ }
+
+ /* fill in the runt data */
+ if (skb_copy_seq_read(st, data, &xtfs->ra_runt[runtlen],
+ rrem)) {
+ XFRM_INC_STATS(xs_net(xtfs->x),
+ LINUX_MIB_XFRMINBUFFERERROR);
+ goto abandon;
+ }
+
+ /* We have enough data to get the ip length value now,
+ * allocate an in progress skb
+ */
+ ipremain = __iptfs_iplen(xtfs->ra_runt);
+ if (ipremain < sizeof(xtfs->ra_runt)) {
+ /* length has to be at least runtsize large */
+ XFRM_INC_STATS(xs_net(xtfs->x),
+ LINUX_MIB_XFRMINIPTFSERROR);
+ goto abandon;
+ }
+
+ /* For the runt case we don't attempt sharing currently. NOTE:
+ * Currently, this IPTFS implementation will not create runts.
+ */
+
+ newskb = iptfs_alloc_skb(skb, ipremain, false);
+ if (!newskb) {
+ XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMINERROR);
+ goto abandon;
+ }
+ xtfs->ra_newskb = newskb;
+
+ /* Copy the runt data into the buffer, but leave data
+ * pointers the same as normal non-runt case. The extra `rrem`
+ * recopied bytes are basically cacheline free. Allows using
+ * same logic below to complete.
+ */
+ memcpy(skb_put(newskb, runtlen), xtfs->ra_runt,
+ sizeof(xtfs->ra_runt));
+ }
+
+ /* Continue reassembling the packet */
+ ipremain = __iptfs_iplen(newskb->data);
+ iphlen = __iptfs_iphlen(newskb->data);
+
+ ipremain -= newskb->len;
+ if (blkoff < ipremain) {
+ /* Corrupt data, we don't have enough to complete the packet */
+ XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMINIPTFSERROR);
+ goto abandon;
+ }
+
+ /* We want the IP header in linear space */
+ if (newskb->len < iphlen) {
+ iphremain = iphlen - newskb->len;
+ if (blkoff < iphremain) {
+ XFRM_INC_STATS(xs_net(xtfs->x),
+ LINUX_MIB_XFRMINIPTFSERROR);
+ goto abandon;
+ }
+ fraglen = min(blkoff, remaining);
+ copylen = min(fraglen, iphremain);
+ if (skb_copy_seq_read(st, data, skb_put(newskb, copylen),
+ copylen)) {
+ XFRM_INC_STATS(xs_net(xtfs->x),
+ LINUX_MIB_XFRMINBUFFERERROR);
+ goto abandon;
+ }
+ /* this is a silly condition that might occur anyway */
+ if (copylen < iphremain) {
+ xtfs->ra_wantseq++;
+ return data + fraglen;
+ }
+ /* update data and things derived from it */
+ data += copylen;
+ blkoff -= copylen;
+ remaining -= copylen;
+ ipremain -= copylen;
+ }
+
+ fraglen = min(blkoff, remaining);
+ copylen = min(fraglen, ipremain);
+
+ /* If we may have the opportunity to share prepare a fragwalk. */
+ if (!skb_has_frag_list(skb) && !skb_has_frag_list(newskb) &&
+ (skb->head_frag || skb->len == skb->data_len) &&
+ skb->pp_recycle == newskb->pp_recycle) {
+ fragwalk = &_fragwalk;
+ iptfs_skb_prepare_frag_walk(skb, data, fragwalk);
+ }
+
+ /* Try share then copy. */
+ if (fragwalk &&
+ iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) {
+ iptfs_skb_add_frags(newskb, fragwalk, data, copylen);
+ } else {
+ /* copy fragment data into newskb */
+ if (skb_copy_seq_read(st, data, skb_put(newskb, copylen),
+ copylen)) {
+ XFRM_INC_STATS(xs_net(xtfs->x),
+ LINUX_MIB_XFRMINBUFFERERROR);
+ goto abandon;
+ }
+ }
+
+ if (copylen < ipremain) {
+ xtfs->ra_wantseq++;
+ } else {
+ /* We are done with packet reassembly! */
+ iptfs_reassem_done(xtfs);
+ iptfs_complete_inner_skb(xtfs->x, newskb);
+ list_add_tail(&newskb->list, list);
+ }
+
+ /* will continue on to new data block or end */
+ return data + fraglen;
+
+abandon:
+ if (xtfs->ra_newskb) {
+ iptfs_reassem_abort(xtfs);
+ } else {
+ xtfs->ra_runtlen = 0;
+ xtfs->ra_wantseq = 0;
+ }
+ /* skip past fragment, maybe to end */
+ return data + min(blkoff, remaining);
+}
+
+static bool __input_process_payload(struct xfrm_state *x, u32 data,
+ struct skb_seq_state *skbseq,
+ struct list_head *sublist)
+{
+ u8 hbytes[sizeof(struct ipv6hdr)];
+ struct iptfs_skb_frag_walk _fragwalk;
+ struct iptfs_skb_frag_walk *fragwalk = NULL;
+ struct sk_buff *defer, *first_skb, *next, *skb;
+ const unsigned char *old_mac;
+ struct xfrm_iptfs_data *xtfs;
+ struct iphdr *iph;
+ struct net *net;
+ u32 first_iplen, iphlen, iplen, remaining, tail;
+ u32 capturelen;
+ u64 seq;
+
+ xtfs = x->mode_data;
+ net = xs_net(x);
+ skb = skbseq->root_skb;
+ first_skb = NULL;
+ defer = NULL;
+
+ seq = __esp_seq(skb);
+
+ /* Save the old mac header if set */
+ old_mac = skb_mac_header_was_set(skb) ? skb_mac_header(skb) : NULL;
+
+ /* New packets */
+
+ tail = skb->len;
+ while (data < tail) {
+ __be16 protocol = 0;
+
+ /* Gather information on the next data block.
+ * `data` points to the start of the data block.
+ */
+ remaining = tail - data;
+
+ /* try and copy enough bytes to read length from ipv4/ipv6 */
+ iphlen = min_t(u32, remaining, 6);
+ if (skb_copy_seq_read(skbseq, data, hbytes, iphlen)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
+ goto done;
+ }
+
+ iph = (struct iphdr *)hbytes;
+ if (iph->version == 0x4) {
+ /* must have at least tot_len field present */
+ if (remaining < 4) {
+ /* save the bytes we have, advance data and exit */
+ iptfs_input_save_runt(xtfs, seq, hbytes,
+ remaining);
+ data += remaining;
+ break;
+ }
+
+ iplen = be16_to_cpu(iph->tot_len);
+ iphlen = iph->ihl << 2;
+ protocol = cpu_to_be16(ETH_P_IP);
+ XFRM_MODE_SKB_CB(skbseq->root_skb)->tos = iph->tos;
+ } else if (iph->version == 0x6) {
+ /* must have at least payload_len field present */
+ if (remaining < 6) {
+ /* save the bytes we have, advance data and exit */
+ iptfs_input_save_runt(xtfs, seq, hbytes,
+ remaining);
+ data += remaining;
+ break;
+ }
+
+ iplen = be16_to_cpu(((struct ipv6hdr *)hbytes)->payload_len);
+ iplen += sizeof(struct ipv6hdr);
+ iphlen = sizeof(struct ipv6hdr);
+ protocol = cpu_to_be16(ETH_P_IPV6);
+ XFRM_MODE_SKB_CB(skbseq->root_skb)->tos =
+ ipv6_get_dsfield((struct ipv6hdr *)iph);
+ } else if (iph->version == 0x0) {
+ /* pad */
+ data = tail;
+ break;
+ } else {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
+ goto done;
+ }
+
+ if (unlikely(skbseq->stepped_offset)) {
+ /* We need to reset our seq read, it can't backup at
+ * this point.
+ */
+ struct sk_buff *save = skbseq->root_skb;
+
+ skb_abort_seq_read(skbseq);
+ skb_prepare_seq_read(save, data, tail, skbseq);
+ }
+
+ if (first_skb) {
+ skb = NULL;
+ } else {
+ first_skb = skb;
+ first_iplen = iplen;
+ fragwalk = NULL;
+
+ /* We are going to skip over `data` bytes to reach the
+ * start of the IP header of `iphlen` len for `iplen`
+ * inner packet.
+ */
+
+ if (skb_has_frag_list(skb)) {
+ defer = skb;
+ skb = NULL;
+ } else if (data + iphlen <= skb_headlen(skb) &&
+ /* make sure our header is 32-bit aligned? */
+ /* ((uintptr_t)(skb->data + data) & 0x3) == 0 && */
+ skb_tailroom(skb) + tail - data >= iplen) {
+ /* Reuse the received skb.
+ *
+ * We have enough headlen to pull past any
+ * initial fragment data, leaving at least the
+ * IP header in the linear buffer space.
+ *
+ * For linear buffer space we only require that
+ * linear buffer space is large enough to
+ * eventually hold the entire reassembled
+ * packet (by including tailroom in the check).
+ *
+ * For non-linear tailroom is 0 and so we only
+ * re-use if the entire packet is present
+ * already.
+ *
+ * NOTE: there are many more options for
+ * sharing, KISS for now. Also, this can produce
+ * skb's with the IP header unaligned to 32
+ * bits. If that ends up being a problem then a
+ * check should be added to the conditional
+ * above that the header lies on a 32-bit
+ * boundary as well.
+ */
+ skb_pull(skb, data);
+
+ /* our range just changed */
+ data = 0;
+ tail = skb->len;
+ remaining = skb->len;
+
+ skb->protocol = protocol;
+ skb_mac_header_rebuild(skb);
+ if (skb->mac_len)
+ eth_hdr(skb)->h_proto = skb->protocol;
+
+ /* all pointers could be changed now reset walk */
+ skb_abort_seq_read(skbseq);
+ skb_prepare_seq_read(skb, data, tail, skbseq);
+ } else if (skb->head_frag &&
+ /* We have the IP header right now */
+ remaining >= iphlen) {
+ fragwalk = &_fragwalk;
+ iptfs_skb_prepare_frag_walk(skb, data, fragwalk);
+ defer = skb;
+ skb = NULL;
+ } else {
+ /* We couldn't reuse the input skb so allocate a
+ * new one.
+ */
+ defer = skb;
+ skb = NULL;
+ }
+
+ /* Don't trim `first_skb` until the end as we are
+ * walking that data now.
+ */
+ }
+
+ capturelen = min(iplen, remaining);
+ if (!skb) {
+ if (!fragwalk ||
+ /* Large enough to be worth sharing */
+ iplen < IPTFS_PKT_SHARE_MIN ||
+ /* Have IP header + some data to share. */
+ capturelen <= iphlen ||
+ /* Try creating skb and adding frags */
+ !(skb = iptfs_pskb_add_frags(first_skb, fragwalk,
+ data, capturelen,
+ skbseq, iphlen))) {
+ skb = iptfs_pskb_extract_seq(iplen, skbseq, data, capturelen);
+ }
+ if (!skb) {
+ /* skip to next packet or done */
+ data += capturelen;
+ continue;
+ }
+
+ skb->protocol = protocol;
+ if (old_mac) {
+ /* rebuild the mac header */
+ skb_set_mac_header(skb, -first_skb->mac_len);
+ memcpy(skb_mac_header(skb), old_mac, first_skb->mac_len);
+ eth_hdr(skb)->h_proto = skb->protocol;
+ }
+ }
+
+ data += capturelen;
+
+ if (skb->len < iplen) {
+ /* Start reassembly */
+ spin_lock(&xtfs->drop_lock);
+
+ xtfs->ra_newskb = skb;
+ xtfs->ra_wantseq = seq + 1;
+ if (!hrtimer_is_queued(&xtfs->drop_timer)) {
+ /* softirq blocked lest the timer fire and interrupt us */
+ hrtimer_start(&xtfs->drop_timer,
+ xtfs->drop_time_ns,
+ IPTFS_HRTIMER_MODE);
+ }
+
+ spin_unlock(&xtfs->drop_lock);
+
+ break;
+ }
+
+ iptfs_complete_inner_skb(x, skb);
+ list_add_tail(&skb->list, sublist);
+ }
+
+ if (data != tail)
+ /* this should not happen from the above code */
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINIPTFSERROR);
+
+ if (first_skb && first_iplen && !defer && first_skb != xtfs->ra_newskb) {
+ /* first_skb is queued b/c !defer and not partial */
+ if (pskb_trim(first_skb, first_iplen)) {
+ /* error trimming */
+ list_del(&first_skb->list);
+ defer = first_skb;
+ }
+ first_skb->ip_summed = CHECKSUM_NONE;
+ }
+
+ /* Send the packets! */
+ list_for_each_entry_safe(skb, next, sublist, list) {
+ skb_list_del_init(skb);
+ if (xfrm_input(skb, 0, 0, -2))
+ kfree_skb(skb);
+ }
+done:
+ skb = skbseq->root_skb;
+ skb_abort_seq_read(skbseq);
+
+ if (defer) {
+ consume_skb(defer);
+ } else if (!first_skb) {
+ /* skb is the original passed in skb, but we didn't get far
+ * enough to process it as the first_skb, if we had it would
+ * either be save in ra_newskb, trimmed and sent on as an skb or
+ * placed in defer to be freed.
+ */
+ kfree_skb(skb);
+ }
+ return true;
+}
+
+/**
+ * iptfs_input_ordered() - handle next in order IPTFS payload.
+ * @x: xfrm state
+ * @skb: current packet
+ *
+ * Process the IPTFS payload in `skb` and consume it afterwards.
+ */
+static void iptfs_input_ordered(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct ip_iptfs_cc_hdr iptcch;
+ struct skb_seq_state skbseq;
+ struct list_head sublist; /* rename this it's just a list */
+ struct xfrm_iptfs_data *xtfs;
+ struct ip_iptfs_hdr *ipth;
+ struct net *net;
+ u32 blkoff, data, remaining;
+ bool consumed = false;
+ u64 seq;
+
+ xtfs = x->mode_data;
+ net = xs_net(x);
+
+ seq = __esp_seq(skb);
+
+ /* Large enough to hold both types of header */
+ ipth = (struct ip_iptfs_hdr *)&iptcch;
+
+ skb_prepare_seq_read(skb, 0, skb->len, &skbseq);
+
+ /* Get the IPTFS header and validate it */
+
+ if (skb_copy_seq_read(&skbseq, 0, ipth, sizeof(*ipth))) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
+ goto done;
+ }
+ data = sizeof(*ipth);
+
+ trace_iptfs_egress_recv(skb, xtfs, be16_to_cpu(ipth->block_offset));
+
+ /* Set data past the basic header */
+ if (ipth->subtype == IPTFS_SUBTYPE_CC) {
+ /* Copy the rest of the CC header */
+ remaining = sizeof(iptcch) - sizeof(*ipth);
+ if (skb_copy_seq_read(&skbseq, data, ipth + 1, remaining)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
+ goto done;
+ }
+ data += remaining;
+ } else if (ipth->subtype != IPTFS_SUBTYPE_BASIC) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
+ goto done;
+ }
+
+ if (ipth->flags != 0) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
+ goto done;
+ }
+
+ INIT_LIST_HEAD(&sublist);
+
+ /* Handle fragment at start of payload, and/or waiting reassembly. */
+
+ blkoff = ntohs(ipth->block_offset);
+ /* check before locking i.e., maybe */
+ if (blkoff || xtfs->ra_runtlen || xtfs->ra_newskb) {
+ spin_lock(&xtfs->drop_lock);
+
+ /* check again after lock */
+ if (blkoff || xtfs->ra_runtlen || xtfs->ra_newskb) {
+ data = iptfs_reassem_cont(xtfs, seq, &skbseq, skb, data,
+ blkoff, &sublist);
+ }
+
+ spin_unlock(&xtfs->drop_lock);
+ }
+
+ /* New packets */
+ consumed = __input_process_payload(x, data, &skbseq, &sublist);
+done:
+ if (!consumed) {
+ skb = skbseq.root_skb;
+ skb_abort_seq_read(&skbseq);
+ kfree_skb(skb);
+ }
+}
+
+/* ------------------------------- */
+/* Input (Egress) Re-ordering Code */
+/* ------------------------------- */
+
+static void __vec_shift(struct xfrm_iptfs_data *xtfs, u32 shift)
+{
+ u32 savedlen = xtfs->w_savedlen;
+
+ if (shift > savedlen)
+ shift = savedlen;
+ if (shift != savedlen)
+ memcpy(xtfs->w_saved, xtfs->w_saved + shift,
+ (savedlen - shift) * sizeof(*xtfs->w_saved));
+ memset(xtfs->w_saved + savedlen - shift, 0,
+ shift * sizeof(*xtfs->w_saved));
+ xtfs->w_savedlen -= shift;
+}
+
+static void __reorder_past(struct xfrm_iptfs_data *xtfs, struct sk_buff *inskb,
+ struct list_head *freelist)
+{
+ list_add_tail(&inskb->list, freelist);
+}
+
+static u32 __reorder_drop(struct xfrm_iptfs_data *xtfs, struct list_head *list)
+
+{
+ struct skb_wseq *s, *se;
+ const u32 savedlen = xtfs->w_savedlen;
+ time64_t now = ktime_get_raw_fast_ns();
+ u32 count = 0;
+ u32 scount = 0;
+
+ if (xtfs->w_saved[0].drop_time > now)
+ goto set_timer;
+
+ ++xtfs->w_wantseq;
+
+ /* Keep flushing packets until we reach a drop time greater than now. */
+ s = xtfs->w_saved;
+ se = s + savedlen;
+ do {
+ /* Walking past empty slots until we reach a packet */
+ for (; s < se && !s->skb; s++) {
+ if (s->drop_time > now)
+ goto outerdone;
+ }
+ /* Sending packets until we hit another empty slot. */
+ for (; s < se && s->skb; scount++, s++)
+ list_add_tail(&s->skb->list, list);
+ } while (s < se);
+outerdone:
+
+ count = s - xtfs->w_saved;
+ if (count) {
+ xtfs->w_wantseq += count;
+
+ /* Shift handled slots plus final empty slot into slot 0. */
+ __vec_shift(xtfs, count);
+ }
+
+ if (xtfs->w_savedlen) {
+set_timer:
+ /* Drifting is OK */
+ hrtimer_start(&xtfs->drop_timer,
+ xtfs->w_saved[0].drop_time - now,
+ IPTFS_HRTIMER_MODE);
+ }
+ return scount;
+}
+
+static void __reorder_this(struct xfrm_iptfs_data *xtfs, struct sk_buff *inskb,
+ struct list_head *list)
+{
+ struct skb_wseq *s, *se;
+ const u32 savedlen = xtfs->w_savedlen;
+ u32 count = 0;
+
+ /* Got what we wanted. */
+ list_add_tail(&inskb->list, list);
+ ++xtfs->w_wantseq;
+ if (!savedlen)
+ return;
+
+ /* Flush remaining consecutive packets. */
+
+ /* Keep sending until we hit another missed pkt. */
+ for (s = xtfs->w_saved, se = s + savedlen; s < se && s->skb; s++)
+ list_add_tail(&s->skb->list, list);
+ count = s - xtfs->w_saved;
+ if (count)
+ xtfs->w_wantseq += count;
+
+ /* Shift handled slots plus final empty slot into slot 0. */
+ __vec_shift(xtfs, count + 1);
+}
+
+/* Set the slot's drop time and all the empty slots below it until reaching a
+ * filled slot which will already be set.
+ */
+static void iptfs_set_window_drop_times(struct xfrm_iptfs_data *xtfs, int index)
+{
+ const u32 savedlen = xtfs->w_savedlen;
+ struct skb_wseq *s = xtfs->w_saved;
+ time64_t drop_time;
+
+ assert_spin_locked(&xtfs->drop_lock);
+
+ if (savedlen > index + 1) {
+ /* we are below another, our drop time and the timer are already set */
+ return;
+ }
+ /* we are the most future so get a new drop time. */
+ drop_time = ktime_get_raw_fast_ns();
+ drop_time += xtfs->drop_time_ns;
+
+ /* Walk back through the array setting drop times as we go */
+ s[index].drop_time = drop_time;
+ while (index-- > 0 && !s[index].skb)
+ s[index].drop_time = drop_time;
+
+ /* If we walked all the way back, schedule the drop timer if needed */
+ if (index == -1 && !hrtimer_is_queued(&xtfs->drop_timer))
+ hrtimer_start(&xtfs->drop_timer, xtfs->drop_time_ns,
+ IPTFS_HRTIMER_MODE);
+}
+
+static void __reorder_future_fits(struct xfrm_iptfs_data *xtfs,
+ struct sk_buff *inskb,
+ struct list_head *freelist)
+{
+ const u64 inseq = __esp_seq(inskb);
+ const u64 wantseq = xtfs->w_wantseq;
+ const u64 distance = inseq - wantseq;
+ const u32 savedlen = xtfs->w_savedlen;
+ const u32 index = distance - 1;
+
+ /* Handle future sequence number received which fits in the window.
+ *
+ * We know we don't have the seq we want so we won't be able to flush
+ * anything.
+ */
+
+ /* slot count is 4, saved size is 3 savedlen is 2
+ *
+ * "window boundary" is based on the fixed window size
+ * distance is also slot number
+ * index is an array index (i.e., - 1 of slot)
+ * : : - implicit NULL after array len
+ *
+ * +--------- used length (savedlen == 2)
+ * | +----- array size (nslots - 1 == 3)
+ * | | + window boundary (nslots == 4)
+ * V V | V
+ * |
+ * 0 1 2 3 | slot number
+ * --- 0 1 2 | array index
+ * [-] [b] : :| array
+ *
+ * "2" "3" "4" *5*| seq numbers
+ *
+ * We receive seq number 5
+ * distance == 3 [inseq(5) - w_wantseq(2)]
+ * index == 2 [distance(6) - 1]
+ */
+
+ if (xtfs->w_saved[index].skb) {
+ /* a dup of a future */
+ list_add_tail(&inskb->list, freelist);
+ return;
+ }
+
+ xtfs->w_saved[index].skb = inskb;
+ xtfs->w_savedlen = max(savedlen, index + 1);
+ iptfs_set_window_drop_times(xtfs, index);
+}
+
+static void __reorder_future_shifts(struct xfrm_iptfs_data *xtfs,
+ struct sk_buff *inskb,
+ struct list_head *list)
+{
+ const u32 nslots = xtfs->cfg.reorder_win_size + 1;
+ const u64 inseq = __esp_seq(inskb);
+ u32 savedlen = xtfs->w_savedlen;
+ u64 wantseq = xtfs->w_wantseq;
+ struct skb_wseq *wnext;
+ struct sk_buff *slot0;
+ u32 beyond, shifting, slot;
+ u64 distance;
+
+ /* Handle future sequence number received.
+ *
+ * IMPORTANT: we are at least advancing w_wantseq (i.e., wantseq) by 1
+ * b/c we are beyond the window boundary.
+ *
+ * We know we don't have the wantseq so that counts as a drop.
+ */
+
+ /* example: slot count is 4, array size is 3 savedlen is 2, slot 0 is
+ * the missing sequence number.
+ *
+ * the final slot at savedlen (index savedlen - 1) is always occupied.
+ *
+ * beyond is "beyond array size" not savedlen.
+ *
+ * +--------- array length (savedlen == 2)
+ * | +----- array size (nslots - 1 == 3)
+ * | | +- window boundary (nslots == 4)
+ * V V |
+ * |
+ * 0 1 2 3 | slot number
+ * --- 0 1 2 | array index
+ * [b] [c] : :| array
+ * |
+ * "2" "3" "4" "5"|*6* seq numbers
+ *
+ * We receive seq number 6
+ * distance == 4 [inseq(6) - w_wantseq(2)]
+ * newslot == distance
+ * index == 3 [distance(4) - 1]
+ * beyond == 1 [newslot(4) - lastslot((nslots(4) - 1))]
+ * shifting == 1 [min(savedlen(2), beyond(1)]
+ * slot0_skb == [b], and should match w_wantseq
+ *
+ * +--- window boundary (nslots == 4)
+ * 0 1 2 3 | 4 slot number
+ * --- 0 1 2 | 3 array index
+ * [b] : : : :| array
+ * "2" "3" "4" "5" *6* seq numbers
+ *
+ * We receive seq number 6
+ * distance == 4 [inseq(6) - w_wantseq(2)]
+ * newslot == distance
+ * index == 3 [distance(4) - 1]
+ * beyond == 1 [newslot(4) - lastslot((nslots(4) - 1))]
+ * shifting == 1 [min(savedlen(1), beyond(1)]
+ * slot0_skb == [b] and should match w_wantseq
+ *
+ * +-- window boundary (nslots == 4)
+ * 0 1 2 3 | 4 5 6 slot number
+ * --- 0 1 2 | 3 4 5 array index
+ * [-] [c] : :| array
+ * "2" "3" "4" "5" "6" "7" *8* seq numbers
+ *
+ * savedlen = 2, beyond = 3
+ * iter 1: slot0 == NULL, missed++, lastdrop = 2 (2+1-1), slot0 = [-]
+ * iter 2: slot0 == NULL, missed++, lastdrop = 3 (2+2-1), slot0 = [c]
+ * 2 < 3, extra = 1 (3-2), missed += extra, lastdrop = 4 (2+2+1-1)
+ *
+ * We receive seq number 8
+ * distance == 6 [inseq(8) - w_wantseq(2)]
+ * newslot == distance
+ * index == 5 [distance(6) - 1]
+ * beyond == 3 [newslot(6) - lastslot((nslots(4) - 1))]
+ * shifting == 2 [min(savedlen(2), beyond(3)]
+ *
+ * slot0_skb == NULL changed from [b] when "savedlen < beyond" is true.
+ */
+
+ /* Now send any packets that are being shifted out of saved, and account
+ * for missing packets that are exiting the window as we shift it.
+ */
+
+ distance = inseq - wantseq;
+ beyond = distance - (nslots - 1);
+
+ /* If savedlen > beyond we are shifting some, else all. */
+ shifting = min(savedlen, beyond);
+
+ /* slot0 is the buf that just shifted out and into slot0 */
+ slot0 = NULL;
+ wnext = xtfs->w_saved;
+ for (slot = 1; slot <= shifting; slot++, wnext++) {
+ /* handle what was in slot0 before we occupy it */
+ if (slot0)
+ list_add_tail(&slot0->list, list);
+ slot0 = wnext->skb;
+ wnext->skb = NULL;
+ }
+
+ /* slot0 is now either NULL (in which case it's what we now are waiting
+ * for, or a buf in which case we need to handle it like we received it;
+ * however, we may be advancing past that buffer as well..
+ */
+
+ /* Handle case where we need to shift more than we had saved, slot0 will
+ * be NULL iff savedlen is 0, otherwise slot0 will always be
+ * non-NULL b/c we shifted the final element, which is always set if
+ * there is any saved, into slot0.
+ */
+ if (savedlen < beyond) {
+ if (savedlen != 0)
+ list_add_tail(&slot0->list, list);
+ slot0 = NULL;
+ /* slot0 has had an empty slot pushed into it */
+ }
+
+ /* Remove the entries */
+ __vec_shift(xtfs, beyond);
+
+ /* Advance want seq */
+ xtfs->w_wantseq += beyond;
+
+ /* Process drops here when implementing congestion control */
+
+ /* We've shifted. plug the packet in at the end. */
+ xtfs->w_savedlen = nslots - 1;
+ xtfs->w_saved[xtfs->w_savedlen - 1].skb = inskb;
+ iptfs_set_window_drop_times(xtfs, xtfs->w_savedlen - 1);
+
+ /* if we don't have a slot0 then we must wait for it */
+ if (!slot0)
+ return;
+
+ /* If slot0, seq must match new want seq */
+
+ /* slot0 is valid, treat like we received expected. */
+ __reorder_this(xtfs, slot0, list);
+}
+
+/* Receive a new packet into the reorder window. Return a list of ordered
+ * packets from the window.
+ */
+static void iptfs_input_reorder(struct xfrm_iptfs_data *xtfs,
+ struct sk_buff *inskb, struct list_head *list,
+ struct list_head *freelist)
+{
+ const u32 nslots = xtfs->cfg.reorder_win_size + 1;
+ u64 inseq = __esp_seq(inskb);
+ u64 wantseq;
+
+ assert_spin_locked(&xtfs->drop_lock);
+
+ if (unlikely(!xtfs->w_seq_set)) {
+ xtfs->w_seq_set = true;
+ xtfs->w_wantseq = inseq;
+ }
+ wantseq = xtfs->w_wantseq;
+
+ if (likely(inseq == wantseq))
+ __reorder_this(xtfs, inskb, list);
+ else if (inseq < wantseq)
+ __reorder_past(xtfs, inskb, freelist);
+ else if ((inseq - wantseq) < nslots)
+ __reorder_future_fits(xtfs, inskb, freelist);
+ else
+ __reorder_future_shifts(xtfs, inskb, list);
+}
+
+/**
+ * iptfs_drop_timer() - Handle drop timer expiry.
+ * @me: the timer
+ *
+ * This is similar to our input function.
+ *
+ * The drop timer is set when we start an in progress reassembly, and also when
+ * we save a future packet in the window saved array.
+ *
+ * NOTE packets in the save window are always newer WRT drop times as
+ * they get further in the future. i.e. for:
+ *
+ * if slots (S0, S1, ... Sn) and `Dn` is the drop time for slot `Sn`,
+ * then D(n-1) <= D(n).
+ *
+ * So, regardless of why the timer is firing we can always discard any inprogress
+ * fragment; either it's the reassembly timer, or slot 0 is going to be
+ * dropped as S0 must have the most recent drop time, and slot 0 holds the
+ * continuation fragment of the in progress packet.
+ *
+ * Returns HRTIMER_NORESTART.
+ */
+static enum hrtimer_restart iptfs_drop_timer(struct hrtimer *me)
+{
+ struct sk_buff *skb, *next;
+ struct list_head list;
+ struct xfrm_iptfs_data *xtfs;
+ struct xfrm_state *x;
+ u32 count;
+
+ xtfs = container_of(me, typeof(*xtfs), drop_timer);
+ x = xtfs->x;
+
+ INIT_LIST_HEAD(&list);
+
+ spin_lock(&xtfs->drop_lock);
+
+ /* Drop any in progress packet */
+ skb = xtfs->ra_newskb;
+ xtfs->ra_newskb = NULL;
+
+ /* Now drop as many packets as we should from the reordering window
+ * saved array
+ */
+ count = xtfs->w_savedlen ? __reorder_drop(xtfs, &list) : 0;
+
+ spin_unlock(&xtfs->drop_lock);
+
+ if (skb)
+ kfree_skb_reason(skb, SKB_DROP_REASON_FRAG_REASM_TIMEOUT);
+
+ if (count) {
+ list_for_each_entry_safe(skb, next, &list, list) {
+ skb_list_del_init(skb);
+ iptfs_input_ordered(x, skb);
+ }
+ }
+
+ return HRTIMER_NORESTART;
+}
+
+/**
+ * iptfs_input() - handle receipt of iptfs payload
+ * @x: xfrm state
+ * @skb: the packet
+ *
+ * We have an IPTFS payload order it if needed, then process newly in order
+ * packets.
+ *
+ * Return: -EINPROGRESS to inform xfrm_input to stop processing the skb.
+ */
+static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct list_head freelist, list;
+ struct xfrm_iptfs_data *xtfs = x->mode_data;
+ struct sk_buff *next;
+
+ /* Fast path for no reorder window. */
+ if (xtfs->cfg.reorder_win_size == 0) {
+ iptfs_input_ordered(x, skb);
+ goto done;
+ }
+
+ /* Fetch list of in-order packets from the reordering window as well as
+ * a list of buffers we need to now free.
+ */
+ INIT_LIST_HEAD(&list);
+ INIT_LIST_HEAD(&freelist);
+
+ spin_lock(&xtfs->drop_lock);
+ iptfs_input_reorder(xtfs, skb, &list, &freelist);
+ spin_unlock(&xtfs->drop_lock);
+
+ list_for_each_entry_safe(skb, next, &list, list) {
+ skb_list_del_init(skb);
+ iptfs_input_ordered(x, skb);
+ }
+
+ list_for_each_entry_safe(skb, next, &freelist, list) {
+ skb_list_del_init(skb);
+ kfree_skb(skb);
+ }
+done:
+ /* We always have dealt with the input SKB, either we are re-using it,
+ * or we have freed it. Return EINPROGRESS so that xfrm_input stops
+ * processing it.
+ */
+ return -EINPROGRESS;
+}
+
+/* ================================= */
+/* IPTFS Sending (ingress) Functions */
+/* ================================= */
+
+/* ------------------------- */
+/* Enqueue to send functions */
+/* ------------------------- */
+
+/**
+ * iptfs_enqueue() - enqueue packet if ok to send.
+ * @xtfs: xtfs state
+ * @skb: the packet
+ *
+ * Return: true if packet enqueued.
+ */
+static bool iptfs_enqueue(struct xfrm_iptfs_data *xtfs, struct sk_buff *skb)
+{
+ u64 newsz = xtfs->queue_size + skb->len;
+ struct iphdr *iph;
+
+ assert_spin_locked(&xtfs->x->lock);
+
+ if (newsz > xtfs->cfg.max_queue_size)
+ return false;
+
+ /* Set ECN CE if we are above our ECN queue threshold */
+ if (newsz > xtfs->ecn_queue_size) {
+ iph = ip_hdr(skb);
+ if (iph->version == 4)
+ IP_ECN_set_ce(iph);
+ else if (iph->version == 6)
+ IP6_ECN_set_ce(skb, ipv6_hdr(skb));
+ }
+
+ __skb_queue_tail(&xtfs->queue, skb);
+ xtfs->queue_size += skb->len;
+ return true;
+}
+
+static int iptfs_get_cur_pmtu(struct xfrm_state *x, struct xfrm_iptfs_data *xtfs,
+ struct sk_buff *skb)
+{
+ struct xfrm_dst *xdst = (struct xfrm_dst *)skb_dst(skb);
+ u32 payload_mtu = xtfs->payload_mtu;
+ u32 pmtu = __iptfs_get_inner_mtu(x, xdst->child_mtu_cached);
+
+ if (payload_mtu && payload_mtu < pmtu)
+ pmtu = payload_mtu;
+
+ return pmtu;
+}
+
+static int iptfs_is_too_big(struct sock *sk, struct sk_buff *skb, u32 pmtu)
+{
+ if (skb->len <= pmtu)
+ return 0;
+
+ /* We only send ICMP too big if the user has configured us as
+ * dont-fragment.
+ */
+ if (skb->dev)
+ XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMOUTERROR);
+
+ if (sk)
+ xfrm_local_error(skb, pmtu);
+ else if (ip_hdr(skb)->version == 4)
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(pmtu));
+ else
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, pmtu);
+
+ return 1;
+}
+
+/* IPv4/IPv6 packet ingress to IPTFS tunnel, arrange to send in IPTFS payload
+ * (i.e., aggregating or fragmenting as appropriate).
+ * This is set in dst->output for an SA.
+ */
+static int iptfs_output_collect(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_state *x = dst->xfrm;
+ struct xfrm_iptfs_data *xtfs = x->mode_data;
+ struct sk_buff *segs, *nskb;
+ u32 pmtu = 0;
+ bool ok = true;
+ bool was_gso;
+
+ /* We have hooked into dst_entry->output which means we have skipped the
+ * protocol specific netfilter (see xfrm4_output, xfrm6_output).
+ * when our timer runs we will end up calling xfrm_output directly on
+ * the encapsulated traffic.
+ *
+ * For both cases this is the NF_INET_POST_ROUTING hook which allows
+ * changing the skb->dst entry which then may not be xfrm based anymore
+ * in which case a REROUTED flag is set. and dst_output is called.
+ *
+ * For IPv6 we are also skipping fragmentation handling for local
+ * sockets, which may or may not be good depending on our tunnel DF
+ * setting. Normally with fragmentation supported we want to skip this
+ * fragmentation.
+ */
+
+ if (xtfs->cfg.dont_frag)
+ pmtu = iptfs_get_cur_pmtu(x, xtfs, skb);
+
+ /* Break apart GSO skbs. If the queue is nearing full then we want the
+ * accounting and queuing to be based on the individual packets not on the
+ * aggregate GSO buffer.
+ */
+ was_gso = skb_is_gso(skb);
+ if (!was_gso) {
+ segs = skb;
+ } else {
+ segs = skb_gso_segment(skb, 0);
+ if (IS_ERR_OR_NULL(segs)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR);
+ kfree_skb(skb);
+ if (IS_ERR(segs))
+ return PTR_ERR(segs);
+ return -EINVAL;
+ }
+ consume_skb(skb);
+ skb = NULL;
+ }
+
+ /* We can be running on multiple cores and from the network softirq or
+ * from user context depending on where the packet is coming from.
+ */
+ spin_lock_bh(&x->lock);
+
+ skb_list_walk_safe(segs, skb, nskb) {
+ skb_mark_not_on_list(skb);
+
+ /* Once we drop due to no queue space we continue to drop the
+ * rest of the packets from that GRO.
+ */
+ if (!ok) {
+nospace:
+ trace_iptfs_no_queue_space(skb, xtfs, pmtu, was_gso);
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOQSPACE);
+ kfree_skb_reason(skb, SKB_DROP_REASON_FULL_RING);
+ continue;
+ }
+
+ /* If the user indicated no iptfs fragmenting check before
+ * enqueue.
+ */
+ if (xtfs->cfg.dont_frag && iptfs_is_too_big(sk, skb, pmtu)) {
+ trace_iptfs_too_big(skb, xtfs, pmtu, was_gso);
+ kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
+ continue;
+ }
+
+ /* Enqueue to send in tunnel */
+ ok = iptfs_enqueue(xtfs, skb);
+ if (!ok)
+ goto nospace;
+
+ trace_iptfs_enqueue(skb, xtfs, pmtu, was_gso);
+ }
+
+ /* Start a delay timer if we don't have one yet */
+ if (!hrtimer_is_queued(&xtfs->iptfs_timer)) {
+ hrtimer_start(&xtfs->iptfs_timer, xtfs->init_delay_ns, IPTFS_HRTIMER_MODE);
+ xtfs->iptfs_settime = ktime_get_raw_fast_ns();
+ trace_iptfs_timer_start(xtfs, xtfs->init_delay_ns);
+ }
+
+ spin_unlock_bh(&x->lock);
+ return 0;
+}
+
+/* -------------------------- */
+/* Dequeue and send functions */
+/* -------------------------- */
+
+static void iptfs_output_prepare_skb(struct sk_buff *skb, u32 blkoff)
+{
+ struct ip_iptfs_hdr *h;
+ size_t hsz = sizeof(*h);
+
+ /* now reset values to be pointing at the rest of the packets */
+ h = skb_push(skb, hsz);
+ memset(h, 0, hsz);
+ if (blkoff)
+ h->block_offset = htons(blkoff);
+
+ /* network_header current points at the inner IP packet
+ * move it to the iptfs header
+ */
+ skb->transport_header = skb->network_header;
+ skb->network_header -= hsz;
+
+ IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
+}
+
+/**
+ * iptfs_copy_create_frag() - create an inner fragment skb.
+ * @st: The source packet data.
+ * @offset: offset in @st of the new fragment data.
+ * @copy_len: the amount of data to copy from @st.
+ *
+ * Create a new skb holding a single IPTFS inner packet fragment. @copy_len must
+ * not be greater than the max fragment size.
+ *
+ * Return: the new fragment skb or an ERR_PTR().
+ */
+static struct sk_buff *iptfs_copy_create_frag(struct skb_seq_state *st, u32 offset, u32 copy_len)
+{
+ struct sk_buff *src = st->root_skb;
+ struct sk_buff *skb;
+ int err;
+
+ skb = iptfs_alloc_skb(src, copy_len, true);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ /* Now copy `copy_len` data from src */
+ err = skb_copy_seq_read(st, offset, skb_put(skb, copy_len), copy_len);
+ if (err) {
+ kfree_skb(skb);
+ return ERR_PTR(err);
+ }
+
+ return skb;
+}
+
+/**
+ * iptfs_copy_create_frags() - create and send N-1 fragments of a larger skb.
+ * @skbp: the source packet skb (IN), skb holding the last fragment in
+ * the fragment stream (OUT).
+ * @xtfs: IPTFS SA state.
+ * @mtu: the max IPTFS fragment size.
+ *
+ * This function is responsible for fragmenting a larger inner packet into a
+ * sequence of IPTFS payload packets. The last fragment is returned rather than
+ * being sent so that the caller can append more inner packets (aggregation) if
+ * there is room.
+ *
+ * Return: 0 on success or a negative error code on failure
+ */
+static int iptfs_copy_create_frags(struct sk_buff **skbp, struct xfrm_iptfs_data *xtfs, u32 mtu)
+{
+ struct skb_seq_state skbseq;
+ struct list_head sublist;
+ struct sk_buff *skb = *skbp;
+ struct sk_buff *nskb = *skbp;
+ u32 copy_len, offset;
+ u32 to_copy = skb->len - mtu;
+ u32 blkoff = 0;
+ int err = 0;
+
+ INIT_LIST_HEAD(&sublist);
+
+ skb_prepare_seq_read(skb, 0, skb->len, &skbseq);
+
+ /* A trimmed `skb` will be sent as the first fragment, later. */
+ offset = mtu;
+ to_copy = skb->len - offset;
+ while (to_copy) {
+ /* Send all but last fragment to allow agg. append */
+ trace_iptfs_first_fragmenting(nskb, mtu, to_copy, NULL);
+ list_add_tail(&nskb->list, &sublist);
+
+ /* FUTURE: if the packet has an odd/non-aligning length we could
+ * send less data in the penultimate fragment so that the last
+ * fragment then ends on an aligned boundary.
+ */
+ copy_len = min(to_copy, mtu);
+ nskb = iptfs_copy_create_frag(&skbseq, offset, copy_len);
+ if (IS_ERR(nskb)) {
+ XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMOUTERROR);
+ skb_abort_seq_read(&skbseq);
+ err = PTR_ERR(nskb);
+ nskb = NULL;
+ break;
+ }
+ iptfs_output_prepare_skb(nskb, to_copy);
+ offset += copy_len;
+ to_copy -= copy_len;
+ blkoff = to_copy;
+ }
+ skb_abort_seq_read(&skbseq);
+
+ /* return last fragment that will be unsent (or NULL) */
+ *skbp = nskb;
+ if (nskb)
+ trace_iptfs_first_final_fragment(nskb, mtu, blkoff, NULL);
+
+ /* trim the original skb to MTU */
+ if (!err)
+ err = pskb_trim(skb, mtu);
+
+ if (err) {
+ /* Free all frags. Don't bother sending a partial packet we will
+ * never complete.
+ */
+ kfree_skb(nskb);
+ list_for_each_entry_safe(skb, nskb, &sublist, list) {
+ skb_list_del_init(skb);
+ kfree_skb(skb);
+ }
+ return err;
+ }
+
+ /* prepare the initial fragment with an iptfs header */
+ iptfs_output_prepare_skb(skb, 0);
+
+ /* Send all but last fragment, if we fail to send a fragment then free
+ * the rest -- no point in sending a packet that can't be reassembled.
+ */
+ list_for_each_entry_safe(skb, nskb, &sublist, list) {
+ skb_list_del_init(skb);
+ if (!err)
+ err = xfrm_output(NULL, skb);
+ else
+ kfree_skb(skb);
+ }
+ if (err)
+ kfree_skb(*skbp);
+ return err;
+}
+
+/**
+ * iptfs_first_skb() - handle the first dequeued inner packet for output
+ * @skbp: the source packet skb (IN), skb holding the last fragment in
+ * the fragment stream (OUT).
+ * @xtfs: IPTFS SA state.
+ * @mtu: the max IPTFS fragment size.
+ *
+ * This function is responsible for fragmenting a larger inner packet into a
+ * sequence of IPTFS payload packets.
+ *
+ * The last fragment is returned rather than being sent so that the caller can
+ * append more inner packets (aggregation) if there is room.
+ *
+ * Return: 0 on success or a negative error code on failure
+ */
+static int iptfs_first_skb(struct sk_buff **skbp, struct xfrm_iptfs_data *xtfs, u32 mtu)
+{
+ struct sk_buff *skb = *skbp;
+ int err;
+
+ /* Classic ESP skips the don't fragment ICMP error if DF is clear on
+ * the inner packet or ignore_df is set. Otherwise it will send an ICMP
+ * or local error if the inner packet won't fit it's MTU.
+ *
+ * With IPTFS we do not care about the inner packet DF bit. If the
+ * tunnel is configured to "don't fragment" we error back if things
+ * don't fit in our max packet size. Otherwise we iptfs-fragment as
+ * normal.
+ */
+
+ /* The opportunity for HW offload has ended */
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ err = skb_checksum_help(skb);
+ if (err)
+ return err;
+ }
+
+ /* We've split gso up before queuing */
+
+ trace_iptfs_first_dequeue(skb, mtu, 0, ip_hdr(skb));
+
+ /* Consider the buffer Tx'd and no longer owned */
+ skb_orphan(skb);
+
+ /* Simple case -- it fits. `mtu` accounted for all the overhead
+ * including the basic IPTFS header.
+ */
+ if (skb->len <= mtu) {
+ iptfs_output_prepare_skb(skb, 0);
+ return 0;
+ }
+
+ return iptfs_copy_create_frags(skbp, xtfs, mtu);
+}
+
+static struct sk_buff **iptfs_rehome_fraglist(struct sk_buff **nextp, struct sk_buff *child)
+{
+ u32 fllen = 0;
+
+ /* It might be possible to account for a frag list in addition to page
+ * fragment if it's a valid state to be in. The page fragments size
+ * should be kept as data_len so only the frag_list size is removed,
+ * this must be done above as well.
+ */
+ *nextp = skb_shinfo(child)->frag_list;
+ while (*nextp) {
+ fllen += (*nextp)->len;
+ nextp = &(*nextp)->next;
+ }
+ skb_frag_list_init(child);
+ child->len -= fllen;
+ child->data_len -= fllen;
+
+ return nextp;
+}
+
+static void iptfs_consume_frags(struct sk_buff *to, struct sk_buff *from)
+{
+ struct skb_shared_info *fromi = skb_shinfo(from);
+ struct skb_shared_info *toi = skb_shinfo(to);
+ unsigned int new_truesize;
+
+ /* If we have data in a head page, grab it */
+ if (!skb_headlen(from)) {
+ new_truesize = SKB_TRUESIZE(skb_end_offset(from));
+ } else {
+ iptfs_skb_head_to_frag(from, &toi->frags[toi->nr_frags]);
+ skb_frag_ref(to, toi->nr_frags++);
+ new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff));
+ }
+
+ /* Move any other page fragments rather than copy */
+ memcpy(&toi->frags[toi->nr_frags], fromi->frags,
+ sizeof(fromi->frags[0]) * fromi->nr_frags);
+ toi->nr_frags += fromi->nr_frags;
+ fromi->nr_frags = 0;
+ from->data_len = 0;
+ from->len = 0;
+ to->truesize += from->truesize - new_truesize;
+ from->truesize = new_truesize;
+
+ /* We are done with this SKB */
+ consume_skb(from);
+}
+
+static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list)
+{
+ struct xfrm_iptfs_data *xtfs = x->mode_data;
+ struct sk_buff *skb, *skb2, **nextp;
+ struct skb_shared_info *shi, *shi2;
+
+ /* If we are fragmenting due to a large inner packet we will output all
+ * the outer IPTFS packets required to contain the fragments of the
+ * single large inner packet. These outer packets need to be sent
+ * consecutively (ESP seq-wise). Since this output function is always
+ * running from a timer we do not need a lock to provide this guarantee.
+ * We will output our packets consecutively before the timer is allowed
+ * to run again on some other CPU.
+ */
+
+ while ((skb = __skb_dequeue(list))) {
+ u32 mtu = iptfs_get_cur_pmtu(x, xtfs, skb);
+ bool share_ok = true;
+ int remaining;
+
+ /* protocol comes to us cleared sometimes */
+ skb->protocol = x->outer_mode.family == AF_INET ? htons(ETH_P_IP) :
+ htons(ETH_P_IPV6);
+
+ if (skb->len > mtu && xtfs->cfg.dont_frag) {
+ /* We handle this case before enqueueing so we are only
+ * here b/c MTU changed after we enqueued before we
+ * dequeued, just drop these.
+ */
+ XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTERROR);
+
+ trace_iptfs_first_toobig(skb, mtu, 0, ip_hdr(skb));
+ kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG);
+ continue;
+ }
+
+ /* Convert first inner packet into an outer IPTFS packet,
+ * dealing with any fragmentation into multiple outer packets
+ * if necessary.
+ */
+ if (iptfs_first_skb(&skb, xtfs, mtu))
+ continue;
+
+ /* If fragmentation was required the returned skb is the last
+ * IPTFS fragment in the chain, and it's IPTFS header blkoff has
+ * been set just past the end of the fragment data.
+ *
+ * In either case the space remaining to send more inner packet
+ * data is `mtu` - (skb->len - sizeof iptfs header). This is b/c
+ * the `mtu` value has the basic IPTFS header len accounted for,
+ * and we added that header to the skb so it is a part of
+ * skb->len, thus we subtract it from the skb length.
+ */
+ remaining = mtu - (skb->len - sizeof(struct ip_iptfs_hdr));
+
+ /* Re-home (un-nest) nested fragment lists. We need to do this
+ * b/c we will simply be appending any following aggregated
+ * inner packets using the frag list.
+ */
+ shi = skb_shinfo(skb);
+ nextp = &shi->frag_list;
+ while (*nextp) {
+ if (skb_has_frag_list(*nextp))
+ nextp = iptfs_rehome_fraglist(&(*nextp)->next, *nextp);
+ else
+ nextp = &(*nextp)->next;
+ }
+
+ if (shi->frag_list || skb_cloned(skb) || skb_shared(skb))
+ share_ok = false;
+
+ /* See if we have enough space to simply append.
+ *
+ * NOTE: Maybe do not append if we will be mis-aligned,
+ * SW-based endpoints will probably have to copy in this
+ * case.
+ */
+ while ((skb2 = skb_peek(list))) {
+ trace_iptfs_ingress_nth_peek(skb2, remaining);
+ if (skb2->len > remaining)
+ break;
+
+ __skb_unlink(skb2, list);
+
+ /* Consider the buffer Tx'd and no longer owned */
+ skb_orphan(skb);
+
+ /* If we don't have a cksum in the packet we need to add
+ * one before encapsulation.
+ */
+ if (skb2->ip_summed == CHECKSUM_PARTIAL) {
+ if (skb_checksum_help(skb2)) {
+ XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTERROR);
+ kfree_skb(skb2);
+ continue;
+ }
+ }
+
+ /* skb->pp_recycle is passed to __skb_flag_unref for all
+ * frag pages so we can only share pages with skb's who
+ * match ourselves.
+ */
+ shi2 = skb_shinfo(skb2);
+ if (share_ok &&
+ (shi2->frag_list ||
+ (!skb2->head_frag && skb_headlen(skb)) ||
+ skb->pp_recycle != skb2->pp_recycle ||
+ skb_zcopy(skb2) ||
+ (shi->nr_frags + shi2->nr_frags + 1 > MAX_SKB_FRAGS)))
+ share_ok = false;
+
+ /* Do accounting */
+ skb->data_len += skb2->len;
+ skb->len += skb2->len;
+ remaining -= skb2->len;
+
+ trace_iptfs_ingress_nth_add(skb2, share_ok);
+
+ if (share_ok) {
+ iptfs_consume_frags(skb, skb2);
+ } else {
+ /* Append to the frag_list */
+ *nextp = skb2;
+ nextp = &skb2->next;
+ if (skb_has_frag_list(skb2))
+ nextp = iptfs_rehome_fraglist(nextp,
+ skb2);
+ skb->truesize += skb2->truesize;
+ }
+ }
+
+ xfrm_output(NULL, skb);
+ }
+}
+
+static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me)
+{
+ struct sk_buff_head list;
+ struct xfrm_iptfs_data *xtfs;
+ struct xfrm_state *x;
+ time64_t settime;
+
+ xtfs = container_of(me, typeof(*xtfs), iptfs_timer);
+ x = xtfs->x;
+
+ /* Process all the queued packets
+ *
+ * softirq execution order: timer > tasklet > hrtimer
+ *
+ * Network rx will have run before us giving one last chance to queue
+ * ingress packets for us to process and transmit.
+ */
+
+ spin_lock(&x->lock);
+ __skb_queue_head_init(&list);
+ skb_queue_splice_init(&xtfs->queue, &list);
+ xtfs->queue_size = 0;
+ settime = xtfs->iptfs_settime;
+ spin_unlock(&x->lock);
+
+ /* After the above unlock, packets can begin queuing again, and the
+ * timer can be set again, from another CPU either in softirq or user
+ * context (not from this one since we are running at softirq level
+ * already).
+ */
+
+ trace_iptfs_timer_expire(xtfs, (unsigned long long)(ktime_get_raw_fast_ns() - settime));
+
+ iptfs_output_queued(x, &list);
+
+ return HRTIMER_NORESTART;
+}
+
+/**
+ * iptfs_encap_add_ipv4() - add outer encaps
+ * @x: xfrm state
+ * @skb: the packet
+ *
+ * This was originally taken from xfrm4_tunnel_encap_add. The reason for the
+ * copy is that IP-TFS/AGGFRAG can have different functionality for how to set
+ * the TOS/DSCP bits. Sets the protocol to a different value and doesn't do
+ * anything with inner headers as they aren't pointing into a normal IP
+ * singleton inner packet.
+ *
+ * Return: 0 on success or a negative error code on failure
+ */
+static int iptfs_encap_add_ipv4(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct iphdr *top_iph;
+
+ skb_reset_inner_network_header(skb);
+ skb_reset_inner_transport_header(skb);
+
+ skb_set_network_header(skb, -(x->props.header_len - x->props.enc_hdr_len));
+ skb->mac_header = skb->network_header + offsetof(struct iphdr, protocol);
+ skb->transport_header = skb->network_header + sizeof(*top_iph);
+
+ top_iph = ip_hdr(skb);
+ top_iph->ihl = 5;
+ top_iph->version = 4;
+ top_iph->protocol = IPPROTO_AGGFRAG;
+
+ /* As we have 0, fractional, 1 or N inner packets there's no obviously
+ * correct DSCP mapping to inherit. ECN should be cleared per RFC9347
+ * 3.1.
+ */
+ top_iph->tos = 0;
+
+ top_iph->frag_off = htons(IP_DF);
+ top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst));
+ top_iph->saddr = x->props.saddr.a4;
+ top_iph->daddr = x->id.daddr.a4;
+ ip_select_ident(dev_net(dst->dev), skb, NULL);
+
+ return 0;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+/**
+ * iptfs_encap_add_ipv6() - add outer encaps
+ * @x: xfrm state
+ * @skb: the packet
+ *
+ * This was originally taken from xfrm6_tunnel_encap_add. The reason for the
+ * copy is that IP-TFS/AGGFRAG can have different functionality for how to set
+ * the flow label and TOS/DSCP bits. It also sets the protocol to a different
+ * value and doesn't do anything with inner headers as they aren't pointing into
+ * a normal IP singleton inner packet.
+ *
+ * Return: 0 on success or a negative error code on failure
+ */
+static int iptfs_encap_add_ipv6(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ struct ipv6hdr *top_iph;
+ int dsfield;
+
+ skb_reset_inner_network_header(skb);
+ skb_reset_inner_transport_header(skb);
+
+ skb_set_network_header(skb, -x->props.header_len + x->props.enc_hdr_len);
+ skb->mac_header = skb->network_header + offsetof(struct ipv6hdr, nexthdr);
+ skb->transport_header = skb->network_header + sizeof(*top_iph);
+
+ top_iph = ipv6_hdr(skb);
+ top_iph->version = 6;
+ top_iph->priority = 0;
+ memset(top_iph->flow_lbl, 0, sizeof(top_iph->flow_lbl));
+ top_iph->nexthdr = IPPROTO_AGGFRAG;
+
+ /* As we have 0, fractional, 1 or N inner packets there's no obviously
+ * correct DSCP mapping to inherit. ECN should be cleared per RFC9347
+ * 3.1.
+ */
+ dsfield = 0;
+ ipv6_change_dsfield(top_iph, 0, dsfield);
+
+ top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst));
+ top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+ top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
+
+ return 0;
+}
+#endif
+
+/**
+ * iptfs_prepare_output() - prepare the skb for output
+ * @x: xfrm state
+ * @skb: the packet
+ *
+ * Return: Error value, if 0 then skb values should be as follows:
+ * - transport_header should point at ESP header
+ * - network_header should point at Outer IP header
+ * - mac_header should point at protocol/nexthdr of the outer IP
+ */
+static int iptfs_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+ if (x->outer_mode.family == AF_INET)
+ return iptfs_encap_add_ipv4(x, skb);
+ if (x->outer_mode.family == AF_INET6) {
+#if IS_ENABLED(CONFIG_IPV6)
+ return iptfs_encap_add_ipv6(x, skb);
+#else
+ return -EAFNOSUPPORT;
+#endif
+ }
+ return -EOPNOTSUPP;
+}
+
+/* ========================== */
+/* State Management Functions */
+/* ========================== */
+
+/**
+ * __iptfs_get_inner_mtu() - return inner MTU with no fragmentation.
+ * @x: xfrm state.
+ * @outer_mtu: the outer mtu
+ *
+ * Return: Correct MTU taking in to account the encap overhead.
+ */
+static u32 __iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu)
+{
+ struct crypto_aead *aead;
+ u32 blksize;
+
+ aead = x->data;
+ blksize = ALIGN(crypto_aead_blocksize(aead), 4);
+ return ((outer_mtu - x->props.header_len - crypto_aead_authsize(aead)) &
+ ~(blksize - 1)) - 2;
+}
+
+/**
+ * iptfs_get_inner_mtu() - return the inner MTU for an IPTFS xfrm.
+ * @x: xfrm state.
+ * @outer_mtu: Outer MTU for the encapsulated packet.
+ *
+ * Return: Correct MTU taking in to account the encap overhead.
+ */
+static u32 iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu)
+{
+ struct xfrm_iptfs_data *xtfs = x->mode_data;
+
+ /* If not dont-frag we have no MTU */
+ if (!xtfs->cfg.dont_frag)
+ return x->outer_mode.family == AF_INET ? IP_MAX_MTU : IP6_MAX_MTU;
+ return __iptfs_get_inner_mtu(x, outer_mtu);
+}
+
+/**
+ * iptfs_user_init() - initialize the SA with IPTFS options from netlink.
+ * @net: the net data
+ * @x: xfrm state
+ * @attrs: netlink attributes
+ * @extack: extack return data
+ *
+ * Return: 0 on success or a negative error code on failure
+ */
+static int iptfs_user_init(struct net *net, struct xfrm_state *x,
+ struct nlattr **attrs,
+ struct netlink_ext_ack *extack)
+{
+ struct xfrm_iptfs_data *xtfs = x->mode_data;
+ struct xfrm_iptfs_config *xc;
+ u64 q;
+
+ xc = &xtfs->cfg;
+ xc->max_queue_size = IPTFS_DEFAULT_MAX_QUEUE_SIZE;
+ xc->reorder_win_size = IPTFS_DEFAULT_REORDER_WINDOW;
+ xtfs->drop_time_ns = IPTFS_DEFAULT_DROP_TIME_USECS * NSECS_IN_USEC;
+ xtfs->init_delay_ns = IPTFS_DEFAULT_INIT_DELAY_USECS * NSECS_IN_USEC;
+
+ if (attrs[XFRMA_IPTFS_DONT_FRAG])
+ xc->dont_frag = true;
+ if (attrs[XFRMA_IPTFS_REORDER_WINDOW])
+ xc->reorder_win_size =
+ nla_get_u16(attrs[XFRMA_IPTFS_REORDER_WINDOW]);
+ /* saved array is for saving 1..N seq nums from wantseq */
+ if (xc->reorder_win_size) {
+ xtfs->w_saved = kcalloc(xc->reorder_win_size,
+ sizeof(*xtfs->w_saved), GFP_KERNEL);
+ if (!xtfs->w_saved) {
+ NL_SET_ERR_MSG(extack, "Cannot alloc reorder window");
+ return -ENOMEM;
+ }
+ }
+ if (attrs[XFRMA_IPTFS_PKT_SIZE]) {
+ xc->pkt_size = nla_get_u32(attrs[XFRMA_IPTFS_PKT_SIZE]);
+ if (!xc->pkt_size) {
+ xtfs->payload_mtu = 0;
+ } else if (xc->pkt_size > x->props.header_len) {
+ xtfs->payload_mtu = xc->pkt_size - x->props.header_len;
+ } else {
+ NL_SET_ERR_MSG(extack,
+ "Packet size must be 0 or greater than IPTFS/ESP header length");
+ return -EINVAL;
+ }
+ }
+ if (attrs[XFRMA_IPTFS_MAX_QSIZE])
+ xc->max_queue_size = nla_get_u32(attrs[XFRMA_IPTFS_MAX_QSIZE]);
+ if (attrs[XFRMA_IPTFS_DROP_TIME])
+ xtfs->drop_time_ns =
+ (u64)nla_get_u32(attrs[XFRMA_IPTFS_DROP_TIME]) *
+ NSECS_IN_USEC;
+ if (attrs[XFRMA_IPTFS_INIT_DELAY])
+ xtfs->init_delay_ns =
+ (u64)nla_get_u32(attrs[XFRMA_IPTFS_INIT_DELAY]) * NSECS_IN_USEC;
+
+ q = (u64)xc->max_queue_size * 95;
+ do_div(q, 100);
+ xtfs->ecn_queue_size = (u32)q;
+
+ return 0;
+}
+
+static unsigned int iptfs_sa_len(const struct xfrm_state *x)
+{
+ struct xfrm_iptfs_data *xtfs = x->mode_data;
+ struct xfrm_iptfs_config *xc = &xtfs->cfg;
+ unsigned int l = 0;
+
+ if (x->dir == XFRM_SA_DIR_IN) {
+ l += nla_total_size(sizeof(u32)); /* drop time usec */
+ l += nla_total_size(sizeof(xc->reorder_win_size));
+ } else {
+ if (xc->dont_frag)
+ l += nla_total_size(0); /* dont-frag flag */
+ l += nla_total_size(sizeof(u32)); /* init delay usec */
+ l += nla_total_size(sizeof(xc->max_queue_size));
+ l += nla_total_size(sizeof(xc->pkt_size));
+ }
+
+ return l;
+}
+
+static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb)
+{
+ struct xfrm_iptfs_data *xtfs = x->mode_data;
+ struct xfrm_iptfs_config *xc = &xtfs->cfg;
+ int ret = 0;
+ u64 q;
+
+ if (x->dir == XFRM_SA_DIR_IN) {
+ q = xtfs->drop_time_ns;
+ do_div(q, NSECS_IN_USEC);
+ ret = nla_put_u32(skb, XFRMA_IPTFS_DROP_TIME, q);
+ if (ret)
+ return ret;
+
+ ret = nla_put_u16(skb, XFRMA_IPTFS_REORDER_WINDOW,
+ xc->reorder_win_size);
+ } else {
+ if (xc->dont_frag) {
+ ret = nla_put_flag(skb, XFRMA_IPTFS_DONT_FRAG);
+ if (ret)
+ return ret;
+ }
+
+ q = xtfs->init_delay_ns;
+ do_div(q, NSECS_IN_USEC);
+ ret = nla_put_u32(skb, XFRMA_IPTFS_INIT_DELAY, q);
+ if (ret)
+ return ret;
+
+ ret = nla_put_u32(skb, XFRMA_IPTFS_MAX_QSIZE, xc->max_queue_size);
+ if (ret)
+ return ret;
+
+ ret = nla_put_u32(skb, XFRMA_IPTFS_PKT_SIZE, xc->pkt_size);
+ }
+
+ return ret;
+}
+
+static void __iptfs_init_state(struct xfrm_state *x,
+ struct xfrm_iptfs_data *xtfs)
+{
+ __skb_queue_head_init(&xtfs->queue);
+ hrtimer_init(&xtfs->iptfs_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE);
+ xtfs->iptfs_timer.function = iptfs_delay_timer;
+
+ spin_lock_init(&xtfs->drop_lock);
+ hrtimer_init(&xtfs->drop_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE);
+ xtfs->drop_timer.function = iptfs_drop_timer;
+
+ /* Modify type (esp) adjustment values */
+
+ if (x->props.family == AF_INET)
+ x->props.header_len += sizeof(struct iphdr) + sizeof(struct ip_iptfs_hdr);
+ else if (x->props.family == AF_INET6)
+ x->props.header_len += sizeof(struct ipv6hdr) + sizeof(struct ip_iptfs_hdr);
+ x->props.enc_hdr_len = sizeof(struct ip_iptfs_hdr);
+
+ /* Always keep a module reference when x->mode_data is set */
+ __module_get(x->mode_cbs->owner);
+
+ x->mode_data = xtfs;
+ xtfs->x = x;
+}
+
+static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig)
+{
+ struct xfrm_iptfs_data *xtfs;
+
+ xtfs = kmemdup(orig->mode_data, sizeof(*xtfs), GFP_KERNEL);
+ if (!xtfs)
+ return -ENOMEM;
+
+ x->mode_data = xtfs;
+ xtfs->x = x;
+
+ xtfs->ra_newskb = NULL;
+ if (xtfs->cfg.reorder_win_size) {
+ xtfs->w_saved = kcalloc(xtfs->cfg.reorder_win_size,
+ sizeof(*xtfs->w_saved), GFP_KERNEL);
+ if (!xtfs->w_saved) {
+ kfree_sensitive(xtfs);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static int iptfs_init_state(struct xfrm_state *x)
+{
+ struct xfrm_iptfs_data *xtfs;
+
+ if (x->mode_data) {
+ /* We have arrived here from xfrm_state_clone() */
+ xtfs = x->mode_data;
+ } else {
+ xtfs = kzalloc(sizeof(*xtfs), GFP_KERNEL);
+ if (!xtfs)
+ return -ENOMEM;
+ }
+
+ __iptfs_init_state(x, xtfs);
+
+ return 0;
+}
+
+static void iptfs_destroy_state(struct xfrm_state *x)
+{
+ struct xfrm_iptfs_data *xtfs = x->mode_data;
+ struct sk_buff_head list;
+ struct skb_wseq *s, *se;
+ struct sk_buff *skb;
+
+ if (!xtfs)
+ return;
+
+ spin_lock_bh(&xtfs->x->lock);
+ hrtimer_cancel(&xtfs->iptfs_timer);
+ __skb_queue_head_init(&list);
+ skb_queue_splice_init(&xtfs->queue, &list);
+ spin_unlock_bh(&xtfs->x->lock);
+
+ while ((skb = __skb_dequeue(&list)))
+ kfree_skb(skb);
+
+ spin_lock_bh(&xtfs->drop_lock);
+ hrtimer_cancel(&xtfs->drop_timer);
+ spin_unlock_bh(&xtfs->drop_lock);
+
+ if (xtfs->ra_newskb)
+ kfree_skb(xtfs->ra_newskb);
+
+ for (s = xtfs->w_saved, se = s + xtfs->w_savedlen; s < se; s++) {
+ if (s->skb)
+ kfree_skb(s->skb);
+ }
+
+ kfree_sensitive(xtfs->w_saved);
+ kfree_sensitive(xtfs);
+
+ module_put(x->mode_cbs->owner);
+}
+
+static const struct xfrm_mode_cbs iptfs_mode_cbs = {
+ .owner = THIS_MODULE,
+ .init_state = iptfs_init_state,
+ .clone_state = iptfs_clone_state,
+ .destroy_state = iptfs_destroy_state,
+ .user_init = iptfs_user_init,
+ .copy_to_user = iptfs_copy_to_user,
+ .sa_len = iptfs_sa_len,
+ .get_inner_mtu = iptfs_get_inner_mtu,
+ .input = iptfs_input,
+ .output = iptfs_output_collect,
+ .prepare_output = iptfs_prepare_output,
+};
+
+static int __init xfrm_iptfs_init(void)
+{
+ int err;
+
+ pr_info("xfrm_iptfs: IPsec IP-TFS tunnel mode module\n");
+
+ err = xfrm_register_mode_cbs(XFRM_MODE_IPTFS, &iptfs_mode_cbs);
+ if (err < 0)
+ pr_info("%s: can't register IP-TFS\n", __func__);
+
+ return err;
+}
+
+static void __exit xfrm_iptfs_fini(void)
+{
+ xfrm_unregister_mode_cbs(XFRM_MODE_IPTFS);
+}
+
+module_init(xfrm_iptfs_init);
+module_exit(xfrm_iptfs_fini);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP-TFS support for xfrm ipsec tunnels");
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index e5722c95b8bb..b5025cf6136e 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -472,6 +472,8 @@ static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb)
WARN_ON_ONCE(1);
break;
default:
+ if (x->mode_cbs && x->mode_cbs->prepare_output)
+ return x->mode_cbs->prepare_output(x, skb);
WARN_ON_ONCE(1);
break;
}
@@ -675,6 +677,10 @@ static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x)
return;
}
+ if (x->outer_mode.encap == XFRM_MODE_IPTFS) {
+ xo->inner_ipproto = IPPROTO_AGGFRAG;
+ return;
+ }
/* non-Tunnel Mode */
if (!skb->encapsulation)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4408c11c0835..9e510021ee91 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2497,6 +2497,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
if (tmpl->mode == XFRM_MODE_TUNNEL ||
+ tmpl->mode == XFRM_MODE_IPTFS ||
tmpl->mode == XFRM_MODE_BEET) {
remote = &tmpl->id.daddr;
local = &tmpl->saddr;
@@ -2748,13 +2749,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
dst1->input = dst_discard;
- rcu_read_lock();
- afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
- if (likely(afinfo))
- dst1->output = afinfo->output;
- else
- dst1->output = dst_discard_out;
- rcu_read_unlock();
+ if (xfrm[i]->mode_cbs && xfrm[i]->mode_cbs->output) {
+ dst1->output = xfrm[i]->mode_cbs->output;
+ } else {
+ rcu_read_lock();
+ afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
+ if (likely(afinfo))
+ dst1->output = afinfo->output;
+ else
+ dst1->output = dst_discard_out;
+ rcu_read_unlock();
+ }
xdst_prev = xdst;
@@ -3290,7 +3295,8 @@ no_transform:
ok:
xfrm_pols_put(pols, drop_pols);
if (dst && dst->xfrm &&
- dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
+ (dst->xfrm->props.mode == XFRM_MODE_TUNNEL ||
+ dst->xfrm->props.mode == XFRM_MODE_IPTFS))
dst->flags |= DST_XFRM_TUNNEL;
return dst;
@@ -4519,6 +4525,7 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm
switch (t->mode) {
case XFRM_MODE_TUNNEL:
case XFRM_MODE_BEET:
+ case XFRM_MODE_IPTFS:
if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
m->old_family) &&
xfrm_addr_equal(&t->saddr, &m->old_saddr,
@@ -4561,7 +4568,8 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
continue;
n++;
if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
- pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
+ pol->xfrm_vec[i].mode != XFRM_MODE_BEET &&
+ pol->xfrm_vec[i].mode != XFRM_MODE_IPTFS)
continue;
/* update endpoints */
memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index eeb984be03a7..8e07dd614b0b 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -43,6 +43,8 @@ static const struct snmp_mib xfrm_mib_list[] = {
SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR),
SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR),
SNMP_MIB_ITEM("XfrmInStateDirError", LINUX_MIB_XFRMINSTATEDIRERROR),
+ SNMP_MIB_ITEM("XfrmInIptfsError", LINUX_MIB_XFRMINIPTFSERROR),
+ SNMP_MIB_ITEM("XfrmOutNoQueueSpace", LINUX_MIB_XFRMOUTNOQSPACE),
SNMP_MIB_SENTINEL
};
diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c
index bc56c6305725..e500aebbad22 100644
--- a/net/xfrm/xfrm_replay.c
+++ b/net/xfrm/xfrm_replay.c
@@ -729,6 +729,7 @@ static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff
}
replay_esn->oseq = oseq;
+ xfrm_dev_state_advance_esn(x);
if (xfrm_aevent_is_on(net))
xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 67ca7ac955a3..34067cb8a479 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -467,6 +467,11 @@ static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = {
.flags = XFRM_MODE_FLAG_TUNNEL,
.family = AF_INET,
},
+ [XFRM_MODE_IPTFS] = {
+ .encap = XFRM_MODE_IPTFS,
+ .flags = XFRM_MODE_FLAG_TUNNEL,
+ .family = AF_INET,
+ },
};
static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = {
@@ -488,6 +493,11 @@ static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = {
.flags = XFRM_MODE_FLAG_TUNNEL,
.family = AF_INET6,
},
+ [XFRM_MODE_IPTFS] = {
+ .encap = XFRM_MODE_IPTFS,
+ .flags = XFRM_MODE_FLAG_TUNNEL,
+ .family = AF_INET6,
+ },
};
static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
@@ -515,6 +525,60 @@ static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
return NULL;
}
+static const struct xfrm_mode_cbs __rcu *xfrm_mode_cbs_map[XFRM_MODE_MAX];
+static DEFINE_SPINLOCK(xfrm_mode_cbs_map_lock);
+
+int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs)
+{
+ if (mode >= XFRM_MODE_MAX)
+ return -EINVAL;
+
+ spin_lock_bh(&xfrm_mode_cbs_map_lock);
+ rcu_assign_pointer(xfrm_mode_cbs_map[mode], mode_cbs);
+ spin_unlock_bh(&xfrm_mode_cbs_map_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(xfrm_register_mode_cbs);
+
+void xfrm_unregister_mode_cbs(u8 mode)
+{
+ if (mode >= XFRM_MODE_MAX)
+ return;
+
+ spin_lock_bh(&xfrm_mode_cbs_map_lock);
+ RCU_INIT_POINTER(xfrm_mode_cbs_map[mode], NULL);
+ spin_unlock_bh(&xfrm_mode_cbs_map_lock);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL(xfrm_unregister_mode_cbs);
+
+static const struct xfrm_mode_cbs *xfrm_get_mode_cbs(u8 mode)
+{
+ const struct xfrm_mode_cbs *cbs;
+ bool try_load = true;
+
+ if (mode >= XFRM_MODE_MAX)
+ return NULL;
+
+retry:
+ rcu_read_lock();
+
+ cbs = rcu_dereference(xfrm_mode_cbs_map[mode]);
+ if (cbs && !try_module_get(cbs->owner))
+ cbs = NULL;
+
+ rcu_read_unlock();
+
+ if (mode == XFRM_MODE_IPTFS && !cbs && try_load) {
+ request_module("xfrm-iptfs");
+ try_load = false;
+ goto retry;
+ }
+
+ return cbs;
+}
+
void xfrm_state_free(struct xfrm_state *x)
{
kmem_cache_free(xfrm_state_cache, x);
@@ -523,6 +587,8 @@ EXPORT_SYMBOL(xfrm_state_free);
static void ___xfrm_state_destroy(struct xfrm_state *x)
{
+ if (x->mode_cbs && x->mode_cbs->destroy_state)
+ x->mode_cbs->destroy_state(x);
hrtimer_cancel(&x->mtimer);
del_timer_sync(&x->rtimer);
kfree(x->aead);
@@ -682,6 +748,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
x->replay_maxdiff = 0;
x->pcpu_num = UINT_MAX;
spin_lock_init(&x->lock);
+ x->mode_data = NULL;
}
return x;
}
@@ -1945,6 +2012,12 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
x->new_mapping_sport = 0;
x->dir = orig->dir;
+ x->mode_cbs = orig->mode_cbs;
+ if (x->mode_cbs && x->mode_cbs->clone_state) {
+ if (x->mode_cbs->clone_state(x, orig))
+ goto error;
+ }
+
return x;
error:
@@ -2271,6 +2344,7 @@ static int __xfrm6_state_sort_cmp(const void *p)
#endif
case XFRM_MODE_TUNNEL:
case XFRM_MODE_BEET:
+ case XFRM_MODE_IPTFS:
return 4;
}
return 5;
@@ -2297,6 +2371,7 @@ static int __xfrm6_tmpl_sort_cmp(const void *p)
#endif
case XFRM_MODE_TUNNEL:
case XFRM_MODE_BEET:
+ case XFRM_MODE_IPTFS:
return 3;
}
return 4;
@@ -2986,6 +3061,9 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu)
case XFRM_MODE_TUNNEL:
break;
default:
+ if (x->mode_cbs && x->mode_cbs->get_inner_mtu)
+ return x->mode_cbs->get_inner_mtu(x, mtu);
+
WARN_ON_ONCE(1);
break;
}
@@ -3086,6 +3164,12 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload,
}
}
+ x->mode_cbs = xfrm_get_mode_cbs(x->props.mode);
+ if (x->mode_cbs) {
+ if (x->mode_cbs->init_state)
+ err = x->mode_cbs->init_state(x);
+ module_put(x->mode_cbs->owner);
+ }
error:
return err;
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index b2876e09328b..08c6d6f0179f 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -301,6 +301,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
NL_SET_ERR_MSG(extack, "TFC padding can only be used in tunnel mode");
goto out;
}
+ if ((attrs[XFRMA_IPTFS_DROP_TIME] ||
+ attrs[XFRMA_IPTFS_REORDER_WINDOW] ||
+ attrs[XFRMA_IPTFS_DONT_FRAG] ||
+ attrs[XFRMA_IPTFS_INIT_DELAY] ||
+ attrs[XFRMA_IPTFS_MAX_QSIZE] ||
+ attrs[XFRMA_IPTFS_PKT_SIZE]) &&
+ p->mode != XFRM_MODE_IPTFS) {
+ NL_SET_ERR_MSG(extack, "IP-TFS options can only be used in IP-TFS mode");
+ goto out;
+ }
break;
case IPPROTO_COMP:
@@ -373,6 +383,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
case XFRM_MODE_ROUTEOPTIMIZATION:
case XFRM_MODE_BEET:
break;
+ case XFRM_MODE_IPTFS:
+ if (p->id.proto != IPPROTO_ESP) {
+ NL_SET_ERR_MSG(extack, "IP-TFS mode only supported with ESP");
+ goto out;
+ }
+ if (sa_dir == 0) {
+ NL_SET_ERR_MSG(extack, "IP-TFS mode requires in or out direction attribute");
+ goto out;
+ }
+ break;
default:
NL_SET_ERR_MSG(extack, "Unsupported mode");
@@ -421,6 +441,18 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
goto out;
}
+ if (attrs[XFRMA_IPTFS_DROP_TIME]) {
+ NL_SET_ERR_MSG(extack, "IP-TFS drop time should not be set for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (attrs[XFRMA_IPTFS_REORDER_WINDOW]) {
+ NL_SET_ERR_MSG(extack, "IP-TFS reorder window should not be set for output SA");
+ err = -EINVAL;
+ goto out;
+ }
+
if (attrs[XFRMA_REPLAY_VAL]) {
struct xfrm_replay_state *replay;
@@ -458,6 +490,30 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
}
}
+
+ if (attrs[XFRMA_IPTFS_DONT_FRAG]) {
+ NL_SET_ERR_MSG(extack, "IP-TFS don't fragment should not be set for input SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (attrs[XFRMA_IPTFS_INIT_DELAY]) {
+ NL_SET_ERR_MSG(extack, "IP-TFS initial delay should not be set for input SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (attrs[XFRMA_IPTFS_MAX_QSIZE]) {
+ NL_SET_ERR_MSG(extack, "IP-TFS max queue size should not be set for input SA");
+ err = -EINVAL;
+ goto out;
+ }
+
+ if (attrs[XFRMA_IPTFS_PKT_SIZE]) {
+ NL_SET_ERR_MSG(extack, "IP-TFS packet size should not be set for input SA");
+ err = -EINVAL;
+ goto out;
+ }
}
if (!sa_dir && attrs[XFRMA_SA_PCPU]) {
@@ -886,6 +942,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
goto error;
}
+ if (x->mode_cbs && x->mode_cbs->user_init) {
+ err = x->mode_cbs->user_init(net, x, attrs, extack);
+ if (err)
+ goto error;
+ }
+
return x;
error:
@@ -1301,6 +1363,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
if (ret)
goto out;
}
+ if (x->mode_cbs && x->mode_cbs->copy_to_user)
+ ret = x->mode_cbs->copy_to_user(x, skb);
+ if (ret)
+ goto out;
if (x->mapping_maxage) {
ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage);
if (ret)
@@ -1958,6 +2024,8 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family,
return -EINVAL;
}
break;
+ case XFRM_MODE_IPTFS:
+ break;
default:
if (ut[i].family != prev_family) {
NL_SET_ERR_MSG(extack, "Mode in template doesn't support a family change");
@@ -3220,6 +3288,12 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT),
[XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 },
[XFRMA_SA_PCPU] = { .type = NLA_U32 },
+ [XFRMA_IPTFS_DROP_TIME] = { .type = NLA_U32 },
+ [XFRMA_IPTFS_REORDER_WINDOW] = { .type = NLA_U16 },
+ [XFRMA_IPTFS_DONT_FRAG] = { .type = NLA_FLAG },
+ [XFRMA_IPTFS_INIT_DELAY] = { .type = NLA_U32 },
+ [XFRMA_IPTFS_MAX_QSIZE] = { .type = NLA_U32 },
+ [XFRMA_IPTFS_PKT_SIZE] = { .type = NLA_U32 },
};
EXPORT_SYMBOL_GPL(xfrma_policy);
@@ -3554,6 +3628,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
if (x->nat_keepalive_interval)
l += nla_total_size(sizeof(x->nat_keepalive_interval));
+ if (x->mode_cbs && x->mode_cbs->sa_len)
+ l += x->mode_cbs->sa_len(x);
+
return l;
}
diff --git a/rust/kernel/workqueue.rs b/rust/kernel/workqueue.rs
index 4d1d2062f6eb..fd3e97192ed8 100644
--- a/rust/kernel/workqueue.rs
+++ b/rust/kernel/workqueue.rs
@@ -519,7 +519,15 @@ impl_has_work! {
impl{T} HasWork<Self> for ClosureWork<T> { self.work }
}
-// SAFETY: TODO.
+// SAFETY: The `__enqueue` implementation in RawWorkItem uses a `work_struct` initialized with the
+// `run` method of this trait as the function pointer because:
+// - `__enqueue` gets the `work_struct` from the `Work` field, using `T::raw_get_work`.
+// - The only safe way to create a `Work` object is through `Work::new`.
+// - `Work::new` makes sure that `T::Pointer::run` is passed to `init_work_with_key`.
+// - Finally `Work` and `RawWorkItem` guarantee that the correct `Work` field
+// will be used because of the ID const generic bound. This makes sure that `T::raw_get_work`
+// uses the correct offset for the `Work` field, and `Work::new` picks the correct
+// implementation of `WorkItemPointer` for `Arc<T>`.
unsafe impl<T, const ID: u64> WorkItemPointer<ID> for Arc<T>
where
T: WorkItem<ID, Pointer = Self>,
@@ -537,7 +545,13 @@ where
}
}
-// SAFETY: TODO.
+// SAFETY: The `work_struct` raw pointer is guaranteed to be valid for the duration of the call to
+// the closure because we get it from an `Arc`, which means that the ref count will be at least 1,
+// and we don't drop the `Arc` ourselves. If `queue_work_on` returns true, it is further guaranteed
+// to be valid until a call to the function pointer in `work_struct` because we leak the memory it
+// points to, and only reclaim it if the closure returns false, or in `WorkItemPointer::run`, which
+// is what the function pointer in the `work_struct` must be pointing to, according to the safety
+// requirements of `WorkItemPointer`.
unsafe impl<T, const ID: u64> RawWorkItem<ID> for Arc<T>
where
T: WorkItem<ID, Pointer = Self>,
diff --git a/scripts/mksysmap b/scripts/mksysmap
index c12723a04655..3accbdb269ac 100755
--- a/scripts/mksysmap
+++ b/scripts/mksysmap
@@ -26,7 +26,7 @@
# (do not forget a space before each pattern)
# local symbols for ARM, MIPS, etc.
-/ \\$/d
+/ \$/d
# local labels, .LBB, .Ltmpxxx, .L__unnamed_xx, .LASANPC, etc.
/ \.L/d
@@ -39,7 +39,7 @@
/ __pi_\.L/d
# arm64 local symbols in non-VHE KVM namespace
-/ __kvm_nvhe_\\$/d
+/ __kvm_nvhe_\$/d
/ __kvm_nvhe_\.L/d
# lld arm/aarch64/mips thunks
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 5b5745f00eb3..19ec72a69e90 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -132,7 +132,8 @@ struct devtable {
* based at address m.
*/
#define DEF_FIELD(m, devid, f) \
- typeof(((struct devid *)0)->f) f = TO_NATIVE(*(typeof(f) *)((m) + OFF_##devid##_##f))
+ typeof(((struct devid *)0)->f) f = \
+ get_unaligned_native((typeof(f) *)((m) + OFF_##devid##_##f))
/* Define a variable f that holds the address of field f of struct devid
* based at address m. Due to the way typeof works, for a field of type
@@ -600,7 +601,7 @@ static void do_pnp_card_entry(struct module *mod, void *symval)
static void do_pcmcia_entry(struct module *mod, void *symval)
{
char alias[256] = {};
- unsigned int i;
+
DEF_FIELD(symval, pcmcia_device_id, match_flags);
DEF_FIELD(symval, pcmcia_device_id, manf_id);
DEF_FIELD(symval, pcmcia_device_id, card_id);
@@ -609,10 +610,6 @@ static void do_pcmcia_entry(struct module *mod, void *symval)
DEF_FIELD(symval, pcmcia_device_id, device_no);
DEF_FIELD_ADDR(symval, pcmcia_device_id, prod_id_hash);
- for (i=0; i<4; i++) {
- (*prod_id_hash)[i] = TO_NATIVE((*prod_id_hash)[i]);
- }
-
ADD(alias, "m", match_flags & PCMCIA_DEV_ID_MATCH_MANF_ID,
manf_id);
ADD(alias, "c", match_flags & PCMCIA_DEV_ID_MATCH_CARD_ID,
@@ -623,10 +620,14 @@ static void do_pcmcia_entry(struct module *mod, void *symval)
function);
ADD(alias, "pfn", match_flags & PCMCIA_DEV_ID_MATCH_DEVICE_NO,
device_no);
- ADD(alias, "pa", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID1, (*prod_id_hash)[0]);
- ADD(alias, "pb", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID2, (*prod_id_hash)[1]);
- ADD(alias, "pc", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID3, (*prod_id_hash)[2]);
- ADD(alias, "pd", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID4, (*prod_id_hash)[3]);
+ ADD(alias, "pa", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID1,
+ get_unaligned_native(*prod_id_hash + 0));
+ ADD(alias, "pb", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID2,
+ get_unaligned_native(*prod_id_hash + 1));
+ ADD(alias, "pc", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID3,
+ get_unaligned_native(*prod_id_hash + 2));
+ ADD(alias, "pd", match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID4,
+ get_unaligned_native(*prod_id_hash + 3));
module_alias_printf(mod, true, "pcmcia:%s", alias);
}
@@ -654,10 +655,9 @@ static void do_input(char *alias,
{
unsigned int i;
- for (i = min / BITS_PER_LONG; i < max / BITS_PER_LONG + 1; i++)
- arr[i] = TO_NATIVE(arr[i]);
- for (i = min; i < max; i++)
- if (arr[i / BITS_PER_LONG] & (1ULL << (i%BITS_PER_LONG)))
+ for (i = min; i <= max; i++)
+ if (get_unaligned_native(arr + i / BITS_PER_LONG) &
+ (1ULL << (i % BITS_PER_LONG)))
sprintf(alias + strlen(alias), "%X,*", i);
}
@@ -812,15 +812,13 @@ static void do_virtio_entry(struct module *mod, void *symval)
* Each byte of the guid will be represented by two hex characters
* in the name.
*/
-
static void do_vmbus_entry(struct module *mod, void *symval)
{
- int i;
DEF_FIELD_ADDR(symval, hv_vmbus_device_id, guid);
- char guid_name[(sizeof(*guid) + 1) * 2];
+ char guid_name[sizeof(*guid) * 2 + 1];
- for (i = 0; i < (sizeof(*guid) * 2); i += 2)
- sprintf(&guid_name[i], "%02x", TO_NATIVE((guid->b)[i/2]));
+ for (int i = 0; i < sizeof(*guid); i++)
+ sprintf(&guid_name[i * 2], "%02x", guid->b[i]);
module_alias_printf(mod, false, "vmbus:%s", guid_name);
}
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 94ee49207a45..7ea59dc4926b 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1138,9 +1138,9 @@ static Elf_Addr addend_386_rel(uint32_t *location, unsigned int r_type)
{
switch (r_type) {
case R_386_32:
- return TO_NATIVE(*location);
+ return get_unaligned_native(location);
case R_386_PC32:
- return TO_NATIVE(*location) + 4;
+ return get_unaligned_native(location) + 4;
}
return (Elf_Addr)(-1);
@@ -1161,24 +1161,24 @@ static Elf_Addr addend_arm_rel(void *loc, Elf_Sym *sym, unsigned int r_type)
switch (r_type) {
case R_ARM_ABS32:
case R_ARM_REL32:
- inst = TO_NATIVE(*(uint32_t *)loc);
+ inst = get_unaligned_native((uint32_t *)loc);
return inst + sym->st_value;
case R_ARM_MOVW_ABS_NC:
case R_ARM_MOVT_ABS:
- inst = TO_NATIVE(*(uint32_t *)loc);
+ inst = get_unaligned_native((uint32_t *)loc);
offset = sign_extend32(((inst & 0xf0000) >> 4) | (inst & 0xfff),
15);
return offset + sym->st_value;
case R_ARM_PC24:
case R_ARM_CALL:
case R_ARM_JUMP24:
- inst = TO_NATIVE(*(uint32_t *)loc);
+ inst = get_unaligned_native((uint32_t *)loc);
offset = sign_extend32((inst & 0x00ffffff) << 2, 25);
return offset + sym->st_value + 8;
case R_ARM_THM_MOVW_ABS_NC:
case R_ARM_THM_MOVT_ABS:
- upper = TO_NATIVE(*(uint16_t *)loc);
- lower = TO_NATIVE(*((uint16_t *)loc + 1));
+ upper = get_unaligned_native((uint16_t *)loc);
+ lower = get_unaligned_native((uint16_t *)loc + 1);
offset = sign_extend32(((upper & 0x000f) << 12) |
((upper & 0x0400) << 1) |
((lower & 0x7000) >> 4) |
@@ -1195,8 +1195,8 @@ static Elf_Addr addend_arm_rel(void *loc, Elf_Sym *sym, unsigned int r_type)
* imm11 = lower[10:0]
* imm32 = SignExtend(S:J2:J1:imm6:imm11:'0')
*/
- upper = TO_NATIVE(*(uint16_t *)loc);
- lower = TO_NATIVE(*((uint16_t *)loc + 1));
+ upper = get_unaligned_native((uint16_t *)loc);
+ lower = get_unaligned_native((uint16_t *)loc + 1);
sign = (upper >> 10) & 1;
j1 = (lower >> 13) & 1;
@@ -1219,8 +1219,8 @@ static Elf_Addr addend_arm_rel(void *loc, Elf_Sym *sym, unsigned int r_type)
* I2 = NOT(J2 XOR S)
* imm32 = SignExtend(S:I1:I2:imm10:imm11:'0')
*/
- upper = TO_NATIVE(*(uint16_t *)loc);
- lower = TO_NATIVE(*((uint16_t *)loc + 1));
+ upper = get_unaligned_native((uint16_t *)loc);
+ lower = get_unaligned_native((uint16_t *)loc + 1);
sign = (upper >> 10) & 1;
j1 = (lower >> 13) & 1;
@@ -1241,7 +1241,7 @@ static Elf_Addr addend_mips_rel(uint32_t *location, unsigned int r_type)
{
uint32_t inst;
- inst = TO_NATIVE(*location);
+ inst = get_unaligned_native(location);
switch (r_type) {
case R_MIPS_LO16:
return inst & 0xffff;
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 8b72c227ebf4..ffd0a52a606e 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -65,6 +65,20 @@
#define TO_NATIVE(x) \
(target_is_big_endian == host_is_big_endian ? x : bswap(x))
+#define __get_unaligned_t(type, ptr) ({ \
+ const struct { type x; } __attribute__((__packed__)) *__pptr = \
+ (typeof(__pptr))(ptr); \
+ __pptr->x; \
+})
+
+#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
+
+#define get_unaligned_native(ptr) \
+({ \
+ typeof(*(ptr)) _val = get_unaligned(ptr); \
+ TO_NATIVE(_val); \
+})
+
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#define strstarts(str, prefix) (strncmp(str, prefix, strlen(prefix)) == 0)
diff --git a/scripts/package/PKGBUILD b/scripts/package/PKGBUILD
index f83493838cf9..dca706617adc 100644
--- a/scripts/package/PKGBUILD
+++ b/scripts/package/PKGBUILD
@@ -103,7 +103,7 @@ _package-headers() {
_package-api-headers() {
pkgdesc="Kernel headers sanitized for use in userspace"
- provides=(linux-api-headers)
+ provides=(linux-api-headers="${pkgver}")
conflicts=(linux-api-headers)
_prologue
diff --git a/scripts/sorttable.h b/scripts/sorttable.h
index 7bd0184380d3..a7c5445baf00 100644
--- a/scripts/sorttable.h
+++ b/scripts/sorttable.h
@@ -110,7 +110,7 @@ static inline unsigned long orc_ip(const int *ip)
static int orc_sort_cmp(const void *_a, const void *_b)
{
- struct orc_entry *orc_a;
+ struct orc_entry *orc_a, *orc_b;
const int *a = g_orc_ip_table + *(int *)_a;
const int *b = g_orc_ip_table + *(int *)_b;
unsigned long a_val = orc_ip(a);
@@ -128,6 +128,9 @@ static int orc_sort_cmp(const void *_a, const void *_b)
* whitelisted .o files which didn't get objtool generation.
*/
orc_a = g_orc_table + (a - g_orc_ip_table);
+ orc_b = g_orc_table + (b - g_orc_ip_table);
+ if (orc_a->type == ORC_TYPE_UNDEFINED && orc_b->type == ORC_TYPE_UNDEFINED)
+ return 0;
return orc_a->type == ORC_TYPE_UNDEFINED ? -1 : 1;
}
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index cc0b0af20296..1f2680bcc43a 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -174,13 +174,15 @@ int avc_get_hash_stats(char *page)
* using a linked list for extended_perms_decision lookup because the list is
* always small. i.e. less than 5, typically 1
*/
-static struct extended_perms_decision *avc_xperms_decision_lookup(u8 driver,
- struct avc_xperms_node *xp_node)
+static struct extended_perms_decision *
+avc_xperms_decision_lookup(u8 driver, u8 base_perm,
+ struct avc_xperms_node *xp_node)
{
struct avc_xperms_decision_node *xpd_node;
list_for_each_entry(xpd_node, &xp_node->xpd_head, xpd_list) {
- if (xpd_node->xpd.driver == driver)
+ if (xpd_node->xpd.driver == driver &&
+ xpd_node->xpd.base_perm == base_perm)
return &xpd_node->xpd;
}
return NULL;
@@ -205,11 +207,12 @@ avc_xperms_has_perm(struct extended_perms_decision *xpd,
}
static void avc_xperms_allow_perm(struct avc_xperms_node *xp_node,
- u8 driver, u8 perm)
+ u8 driver, u8 base_perm, u8 perm)
{
struct extended_perms_decision *xpd;
security_xperm_set(xp_node->xp.drivers.p, driver);
- xpd = avc_xperms_decision_lookup(driver, xp_node);
+ xp_node->xp.base_perms |= base_perm;
+ xpd = avc_xperms_decision_lookup(driver, base_perm, xp_node);
if (xpd && xpd->allowed)
security_xperm_set(xpd->allowed->p, perm);
}
@@ -245,6 +248,7 @@ static void avc_xperms_free(struct avc_xperms_node *xp_node)
static void avc_copy_xperms_decision(struct extended_perms_decision *dest,
struct extended_perms_decision *src)
{
+ dest->base_perm = src->base_perm;
dest->driver = src->driver;
dest->used = src->used;
if (dest->used & XPERMS_ALLOWED)
@@ -272,6 +276,7 @@ static inline void avc_quick_copy_xperms_decision(u8 perm,
*/
u8 i = perm >> 5;
+ dest->base_perm = src->base_perm;
dest->used = src->used;
if (dest->used & XPERMS_ALLOWED)
dest->allowed->p[i] = src->allowed->p[i];
@@ -357,6 +362,7 @@ static int avc_xperms_populate(struct avc_node *node,
memcpy(dest->xp.drivers.p, src->xp.drivers.p, sizeof(dest->xp.drivers.p));
dest->xp.len = src->xp.len;
+ dest->xp.base_perms = src->xp.base_perms;
/* for each source xpd allocate a destination xpd and copy */
list_for_each_entry(src_xpd, &src->xpd_head, xpd_list) {
@@ -807,6 +813,7 @@ out:
* @event : Updating event
* @perms : Permission mask bits
* @driver: xperm driver information
+ * @base_perm: the base permission associated with the extended permission
* @xperm: xperm permissions
* @ssid: AVC entry source sid
* @tsid: AVC entry target sid
@@ -820,10 +827,9 @@ out:
* otherwise, this function updates the AVC entry. The original AVC-entry object
* will release later by RCU.
*/
-static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid,
- u32 tsid, u16 tclass, u32 seqno,
- struct extended_perms_decision *xpd,
- u32 flags)
+static int avc_update_node(u32 event, u32 perms, u8 driver, u8 base_perm,
+ u8 xperm, u32 ssid, u32 tsid, u16 tclass, u32 seqno,
+ struct extended_perms_decision *xpd, u32 flags)
{
u32 hvalue;
int rc = 0;
@@ -880,7 +886,7 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid,
case AVC_CALLBACK_GRANT:
node->ae.avd.allowed |= perms;
if (node->ae.xp_node && (flags & AVC_EXTENDED_PERMS))
- avc_xperms_allow_perm(node->ae.xp_node, driver, xperm);
+ avc_xperms_allow_perm(node->ae.xp_node, driver, base_perm, xperm);
break;
case AVC_CALLBACK_TRY_REVOKE:
case AVC_CALLBACK_REVOKE:
@@ -987,10 +993,9 @@ static noinline void avc_compute_av(u32 ssid, u32 tsid, u16 tclass,
avc_insert(ssid, tsid, tclass, avd, xp_node);
}
-static noinline int avc_denied(u32 ssid, u32 tsid,
- u16 tclass, u32 requested,
- u8 driver, u8 xperm, unsigned int flags,
- struct av_decision *avd)
+static noinline int avc_denied(u32 ssid, u32 tsid, u16 tclass, u32 requested,
+ u8 driver, u8 base_perm, u8 xperm,
+ unsigned int flags, struct av_decision *avd)
{
if (flags & AVC_STRICT)
return -EACCES;
@@ -999,7 +1004,7 @@ static noinline int avc_denied(u32 ssid, u32 tsid,
!(avd->flags & AVD_FLAGS_PERMISSIVE))
return -EACCES;
- avc_update_node(AVC_CALLBACK_GRANT, requested, driver,
+ avc_update_node(AVC_CALLBACK_GRANT, requested, driver, base_perm,
xperm, ssid, tsid, tclass, avd->seqno, NULL, flags);
return 0;
}
@@ -1012,7 +1017,8 @@ static noinline int avc_denied(u32 ssid, u32 tsid,
* driver field is used to specify which set contains the permission.
*/
int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested,
- u8 driver, u8 xperm, struct common_audit_data *ad)
+ u8 driver, u8 base_perm, u8 xperm,
+ struct common_audit_data *ad)
{
struct avc_node *node;
struct av_decision avd;
@@ -1047,22 +1053,23 @@ int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested,
local_xpd.auditallow = &auditallow;
local_xpd.dontaudit = &dontaudit;
- xpd = avc_xperms_decision_lookup(driver, xp_node);
+ xpd = avc_xperms_decision_lookup(driver, base_perm, xp_node);
if (unlikely(!xpd)) {
/*
* Compute the extended_perms_decision only if the driver
- * is flagged
+ * is flagged and the base permission is known.
*/
- if (!security_xperm_test(xp_node->xp.drivers.p, driver)) {
+ if (!security_xperm_test(xp_node->xp.drivers.p, driver) ||
+ !(xp_node->xp.base_perms & base_perm)) {
avd.allowed &= ~requested;
goto decision;
}
rcu_read_unlock();
- security_compute_xperms_decision(ssid, tsid, tclass,
- driver, &local_xpd);
+ security_compute_xperms_decision(ssid, tsid, tclass, driver,
+ base_perm, &local_xpd);
rcu_read_lock();
- avc_update_node(AVC_CALLBACK_ADD_XPERMS, requested,
- driver, xperm, ssid, tsid, tclass, avd.seqno,
+ avc_update_node(AVC_CALLBACK_ADD_XPERMS, requested, driver,
+ base_perm, xperm, ssid, tsid, tclass, avd.seqno,
&local_xpd, 0);
} else {
avc_quick_copy_xperms_decision(xperm, &local_xpd, xpd);
@@ -1075,8 +1082,8 @@ int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested,
decision:
denied = requested & ~(avd.allowed);
if (unlikely(denied))
- rc = avc_denied(ssid, tsid, tclass, requested,
- driver, xperm, AVC_EXTENDED_PERMS, &avd);
+ rc = avc_denied(ssid, tsid, tclass, requested, driver,
+ base_perm, xperm, AVC_EXTENDED_PERMS, &avd);
rcu_read_unlock();
@@ -1110,7 +1117,7 @@ static noinline int avc_perm_nonode(u32 ssid, u32 tsid, u16 tclass,
avc_compute_av(ssid, tsid, tclass, avd, &xp_node);
denied = requested & ~(avd->allowed);
if (unlikely(denied))
- return avc_denied(ssid, tsid, tclass, requested, 0, 0,
+ return avc_denied(ssid, tsid, tclass, requested, 0, 0, 0,
flags, avd);
return 0;
}
@@ -1158,7 +1165,7 @@ inline int avc_has_perm_noaudit(u32 ssid, u32 tsid,
rcu_read_unlock();
if (unlikely(denied))
- return avc_denied(ssid, tsid, tclass, requested, 0, 0,
+ return avc_denied(ssid, tsid, tclass, requested, 0, 0, 0,
flags, avd);
return 0;
}
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 366c87a40bd1..171dd7fceac5 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3688,8 +3688,8 @@ static int ioctl_has_perm(const struct cred *cred, struct file *file,
return 0;
isec = inode_security(inode);
- rc = avc_has_extended_perms(ssid, isec->sid, isec->sclass,
- requested, driver, xperm, &ad);
+ rc = avc_has_extended_perms(ssid, isec->sid, isec->sclass, requested,
+ driver, AVC_EXT_IOCTL, xperm, &ad);
out:
return rc;
}
@@ -5952,7 +5952,7 @@ static int nlmsg_sock_has_extended_perms(struct sock *sk, u32 perms, u16 nlmsg_t
xperm = nlmsg_type & 0xff;
return avc_has_extended_perms(current_sid(), sksec->sid, sksec->sclass,
- perms, driver, xperm, &ad);
+ perms, driver, AVC_EXT_NLMSG, xperm, &ad);
}
static int selinux_netlink_send(struct sock *sk, struct sk_buff *skb)
diff --git a/security/selinux/include/avc.h b/security/selinux/include/avc.h
index 96a614d47df8..281f40103663 100644
--- a/security/selinux/include/avc.h
+++ b/security/selinux/include/avc.h
@@ -136,8 +136,11 @@ int avc_has_perm_noaudit(u32 ssid, u32 tsid, u16 tclass, u32 requested,
int avc_has_perm(u32 ssid, u32 tsid, u16 tclass, u32 requested,
struct common_audit_data *auditdata);
+#define AVC_EXT_IOCTL (1 << 0) /* Cache entry for an ioctl extended permission */
+#define AVC_EXT_NLMSG (1 << 1) /* Cache entry for an nlmsg extended permission */
int avc_has_extended_perms(u32 ssid, u32 tsid, u16 tclass, u32 requested,
- u8 driver, u8 perm, struct common_audit_data *ad);
+ u8 driver, u8 base_perm, u8 perm,
+ struct common_audit_data *ad);
u32 avc_policy_seqno(void);
diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h
index c7f2731abd03..700bd6c8bb38 100644
--- a/security/selinux/include/security.h
+++ b/security/selinux/include/security.h
@@ -239,6 +239,7 @@ struct extended_perms_data {
struct extended_perms_decision {
u8 used;
u8 driver;
+ u8 base_perm;
struct extended_perms_data *allowed;
struct extended_perms_data *auditallow;
struct extended_perms_data *dontaudit;
@@ -246,6 +247,7 @@ struct extended_perms_decision {
struct extended_perms {
u16 len; /* length associated decision chain */
+ u8 base_perms; /* which base permissions are covered */
struct extended_perms_data drivers; /* flag drivers that are used */
};
@@ -257,6 +259,7 @@ void security_compute_av(u32 ssid, u32 tsid, u16 tclass,
struct extended_perms *xperms);
void security_compute_xperms_decision(u32 ssid, u32 tsid, u16 tclass, u8 driver,
+ u8 base_perm,
struct extended_perms_decision *xpermd);
void security_compute_av_user(u32 ssid, u32 tsid, u16 tclass,
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 3d5c563cfc4c..d9f58b5d0f49 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -582,7 +582,7 @@ static void type_attribute_bounds_av(struct policydb *policydb,
}
/*
- * Flag which drivers have permissions.
+ * Flag which drivers have permissions and which base permissions are covered.
*/
void services_compute_xperms_drivers(
struct extended_perms *xperms,
@@ -592,12 +592,19 @@ void services_compute_xperms_drivers(
switch (node->datum.u.xperms->specified) {
case AVTAB_XPERMS_IOCTLDRIVER:
+ xperms->base_perms |= AVC_EXT_IOCTL;
/* if one or more driver has all permissions allowed */
for (i = 0; i < ARRAY_SIZE(xperms->drivers.p); i++)
xperms->drivers.p[i] |= node->datum.u.xperms->perms.p[i];
break;
case AVTAB_XPERMS_IOCTLFUNCTION:
+ xperms->base_perms |= AVC_EXT_IOCTL;
+ /* if allowing permissions within a driver */
+ security_xperm_set(xperms->drivers.p,
+ node->datum.u.xperms->driver);
+ break;
case AVTAB_XPERMS_NLMSG:
+ xperms->base_perms |= AVC_EXT_NLMSG;
/* if allowing permissions within a driver */
security_xperm_set(xperms->drivers.p,
node->datum.u.xperms->driver);
@@ -631,8 +638,7 @@ static void context_struct_compute_av(struct policydb *policydb,
avd->auditallow = 0;
avd->auditdeny = 0xffffffff;
if (xperms) {
- memset(&xperms->drivers, 0, sizeof(xperms->drivers));
- xperms->len = 0;
+ memset(xperms, 0, sizeof(*xperms));
}
if (unlikely(!tclass || tclass > policydb->p_classes.nprim)) {
@@ -969,13 +975,19 @@ void services_compute_xperms_decision(struct extended_perms_decision *xpermd,
{
switch (node->datum.u.xperms->specified) {
case AVTAB_XPERMS_IOCTLFUNCTION:
- case AVTAB_XPERMS_NLMSG:
- if (xpermd->driver != node->datum.u.xperms->driver)
+ if (xpermd->base_perm != AVC_EXT_IOCTL ||
+ xpermd->driver != node->datum.u.xperms->driver)
return;
break;
case AVTAB_XPERMS_IOCTLDRIVER:
- if (!security_xperm_test(node->datum.u.xperms->perms.p,
- xpermd->driver))
+ if (xpermd->base_perm != AVC_EXT_IOCTL ||
+ !security_xperm_test(node->datum.u.xperms->perms.p,
+ xpermd->driver))
+ return;
+ break;
+ case AVTAB_XPERMS_NLMSG:
+ if (xpermd->base_perm != AVC_EXT_NLMSG ||
+ xpermd->driver != node->datum.u.xperms->driver)
return;
break;
default:
@@ -1010,6 +1022,7 @@ void security_compute_xperms_decision(u32 ssid,
u32 tsid,
u16 orig_tclass,
u8 driver,
+ u8 base_perm,
struct extended_perms_decision *xpermd)
{
struct selinux_policy *policy;
@@ -1023,6 +1036,7 @@ void security_compute_xperms_decision(u32 ssid,
struct ebitmap_node *snode, *tnode;
unsigned int i, j;
+ xpermd->base_perm = base_perm;
xpermd->driver = driver;
xpermd->used = 0;
memset(xpermd->allowed->p, 0, sizeof(xpermd->allowed->p));
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 8516c1ccd57a..7e46ca4cd31b 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -1315,6 +1315,8 @@ enum {
IFLA_NETKIT_MODE,
IFLA_NETKIT_SCRUB,
IFLA_NETKIT_PEER_SCRUB,
+ IFLA_NETKIT_HEADROOM,
+ IFLA_NETKIT_TAILROOM,
__IFLA_NETKIT_MAX,
};
#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)
diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile
index d1cdf2a8f826..211df5a93ad9 100644
--- a/tools/net/ynl/Makefile
+++ b/tools/net/ynl/Makefile
@@ -1,5 +1,17 @@
# SPDX-License-Identifier: GPL-2.0
+include ../../scripts/Makefile.arch
+
+INSTALL ?= install
+prefix ?= /usr
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+libdir ?= $(prefix)/$(libdir_relative)
+includedir ?= $(prefix)/include
+
SUBDIRS = lib generated samples
all: $(SUBDIRS) libynl.a
@@ -21,5 +33,20 @@ clean distclean:
fi \
done
rm -f libynl.a
+ rm -rf pyynl/__pycache__
+ rm -rf pyynl/lib/__pycache__
+ rm -rf pyynl.egg-info
+ rm -rf build
+
+install: libynl.a lib/*.h
+ @echo -e "\tINSTALL libynl.a"
+ @$(INSTALL) -d $(DESTDIR)$(libdir)
+ @$(INSTALL) -m 0644 libynl.a $(DESTDIR)$(libdir)/libynl.a
+ @echo -e "\tINSTALL libynl headers"
+ @$(INSTALL) -d $(DESTDIR)$(includedir)/ynl
+ @$(INSTALL) -m 0644 lib/*.h $(DESTDIR)$(includedir)/ynl/
+ @echo -e "\tINSTALL pyynl"
+ @pip install --prefix=$(DESTDIR)$(prefix) .
+ @make -C generated install
-.PHONY: all clean distclean $(SUBDIRS)
+.PHONY: all clean distclean install $(SUBDIRS)
diff --git a/tools/net/ynl/generated/.gitignore b/tools/net/ynl/generated/.gitignore
index ade488626d26..859a6fb446e1 100644
--- a/tools/net/ynl/generated/.gitignore
+++ b/tools/net/ynl/generated/.gitignore
@@ -1,2 +1,3 @@
*-user.c
*-user.h
+*.rst
diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile
index 7db5240de58a..21f9e299dc75 100644
--- a/tools/net/ynl/generated/Makefile
+++ b/tools/net/ynl/generated/Makefile
@@ -7,32 +7,44 @@ ifeq ("$(DEBUG)","1")
CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
endif
+INSTALL ?= install
+prefix ?= /usr
+datarootdir ?= $(prefix)/share
+docdir ?= $(datarootdir)/doc
+includedir ?= $(prefix)/include
+
include ../Makefile.deps
YNL_GEN_ARG_ethtool:=--user-header linux/ethtool_netlink.h \
--exclude-op stats-get
-TOOL:=../ynl-gen-c.py
+TOOL:=../pyynl/ynl_gen_c.py
+TOOL_RST:=../pyynl/ynl_gen_rst.py
+SPECS_DIR:=../../../../Documentation/netlink/specs
GENS_PATHS=$(shell grep -nrI --files-without-match \
'protocol: netlink' \
- ../../../../Documentation/netlink/specs/)
-GENS=$(patsubst ../../../../Documentation/netlink/specs/%.yaml,%,${GENS_PATHS})
+ $(SPECS_DIR))
+GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS})
SRCS=$(patsubst %,%-user.c,${GENS})
HDRS=$(patsubst %,%-user.h,${GENS})
OBJS=$(patsubst %,%-user.o,${GENS})
-all: protos.a $(HDRS) $(SRCS) $(KHDRS) $(KSRCS) $(UAPI)
+SPECS_PATHS=$(wildcard $(SPECS_DIR)/*.yaml)
+SPECS=$(patsubst $(SPECS_DIR)/%.yaml,%,${SPECS_PATHS})
+RSTS=$(patsubst %,%.rst,${SPECS})
+
+all: protos.a $(HDRS) $(SRCS) $(KHDRS) $(KSRCS) $(UAPI) $(RSTS)
protos.a: $(OBJS)
@echo -e "\tAR $@"
@ar rcs $@ $(OBJS)
-%-user.h: ../../../../Documentation/netlink/specs/%.yaml $(TOOL)
+%-user.h: $(SPECS_DIR)/%.yaml $(TOOL)
@echo -e "\tGEN $@"
@$(TOOL) --mode user --header --spec $< -o $@ $(YNL_GEN_ARG_$*)
-%-user.c: ../../../../Documentation/netlink/specs/%.yaml $(TOOL)
+%-user.c: $(SPECS_DIR)/%.yaml $(TOOL)
@echo -e "\tGEN $@"
@$(TOOL) --mode user --source --spec $< -o $@ $(YNL_GEN_ARG_$*)
@@ -40,14 +52,37 @@ protos.a: $(OBJS)
@echo -e "\tCC $@"
@$(COMPILE.c) $(CFLAGS_$*) -o $@ $<
+%.rst: $(SPECS_DIR)/%.yaml $(TOOL_RST)
+ @echo -e "\tGEN_RST $@"
+ @$(TOOL_RST) -o $@ -i $<
+
clean:
rm -f *.o
distclean: clean
- rm -f *.c *.h *.a
+ rm -f *.c *.h *.a *.rst
regen:
@../ynl-regen.sh
-.PHONY: all clean distclean regen
+install-headers: $(HDRS)
+ @echo -e "\tINSTALL generated headers"
+ @$(INSTALL) -d $(DESTDIR)$(includedir)/ynl
+ @$(INSTALL) -m 0644 *.h $(DESTDIR)$(includedir)/ynl/
+
+install-rsts: $(RSTS)
+ @echo -e "\tINSTALL generated docs"
+ @$(INSTALL) -d $(DESTDIR)$(docdir)/ynl
+ @$(INSTALL) -m 0644 $(RSTS) $(DESTDIR)$(docdir)/ynl/
+
+install-specs:
+ @echo -e "\tINSTALL specs"
+ @$(INSTALL) -d $(DESTDIR)$(datarootdir)/ynl
+ @$(INSTALL) -m 0644 ../../../../Documentation/netlink/*.yaml $(DESTDIR)$(datarootdir)/ynl/
+ @$(INSTALL) -d $(DESTDIR)$(datarootdir)/ynl/specs
+ @$(INSTALL) -m 0644 $(SPECS_DIR)/*.yaml $(DESTDIR)$(datarootdir)/ynl/specs/
+
+install: install-headers install-rsts install-specs
+
+.PHONY: all clean distclean regen install install-headers install-rsts install-specs
.DEFAULT_GOAL: all
diff --git a/tools/net/ynl/lib/.gitignore b/tools/net/ynl/lib/.gitignore
index 296c4035dbf2..a4383358ec72 100644
--- a/tools/net/ynl/lib/.gitignore
+++ b/tools/net/ynl/lib/.gitignore
@@ -1,2 +1 @@
-__pycache__/
*.d
diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile
index 94c49cca3dca..4b2b98704ff9 100644
--- a/tools/net/ynl/lib/Makefile
+++ b/tools/net/ynl/lib/Makefile
@@ -19,7 +19,6 @@ ynl.a: $(OBJS)
clean:
rm -f *.o *.d *~
- rm -rf __pycache__
distclean: clean
rm -f *.a
diff --git a/tools/net/ynl/pyproject.toml b/tools/net/ynl/pyproject.toml
new file mode 100644
index 000000000000..a81d8779b0e0
--- /dev/null
+++ b/tools/net/ynl/pyproject.toml
@@ -0,0 +1,24 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pyynl"
+authors = [
+ {name = "Donald Hunter", email = "donald.hunter@gmail.com"},
+ {name = "Jakub Kicinski", email = "kuba@kernel.org"},
+]
+description = "yaml netlink (ynl)"
+version = "0.0.1"
+requires-python = ">=3.9"
+dependencies = [
+ "pyyaml==6.*",
+ "jsonschema==4.*"
+]
+
+[tool.setuptools.packages.find]
+include = ["pyynl", "pyynl.lib"]
+
+[project.scripts]
+ynl = "pyynl.cli:main"
+ynl-ethtool = "pyynl.ethtool:main"
diff --git a/tools/net/ynl/pyynl/.gitignore b/tools/net/ynl/pyynl/.gitignore
new file mode 100644
index 000000000000..b801cd2d016e
--- /dev/null
+++ b/tools/net/ynl/pyynl/.gitignore
@@ -0,0 +1,2 @@
+__pycache__/
+lib/__pycache__/
diff --git a/tools/net/ynl/pyynl/__init__.py b/tools/net/ynl/pyynl/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/net/ynl/pyynl/__init__.py
diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/pyynl/cli.py
index 41d9fa5c818d..794e3c7dcc65 100755
--- a/tools/net/ynl/cli.py
+++ b/tools/net/ynl/pyynl/cli.py
@@ -3,6 +3,7 @@
import argparse
import json
+import os
import pathlib
import pprint
import sys
@@ -10,6 +11,24 @@ import sys
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
from lib import YnlFamily, Netlink, NlError
+sys_schema_dir='/usr/share/ynl'
+relative_schema_dir='../../../../Documentation/netlink'
+
+def schema_dir():
+ script_dir = os.path.dirname(os.path.abspath(__file__))
+ schema_dir = os.path.abspath(f"{script_dir}/{relative_schema_dir}")
+ if not os.path.isdir(schema_dir):
+ schema_dir = sys_schema_dir
+ if not os.path.isdir(schema_dir):
+ raise Exception(f"Schema directory {schema_dir} does not exist")
+ return schema_dir
+
+def spec_dir():
+ spec_dir = schema_dir() + '/specs'
+ if not os.path.isdir(spec_dir):
+ raise Exception(f"Spec directory {spec_dir} does not exist")
+ return spec_dir
+
class YnlEncoder(json.JSONEncoder):
def default(self, obj):
@@ -32,7 +51,14 @@ def main():
parser = argparse.ArgumentParser(description=description,
epilog=epilog)
- parser.add_argument('--spec', dest='spec', type=str, required=True)
+ spec_group = parser.add_mutually_exclusive_group(required=True)
+ spec_group.add_argument('--family', dest='family', type=str,
+ help='name of the netlink FAMILY')
+ spec_group.add_argument('--list-families', action='store_true',
+ help='list all netlink families supported by YNL (has spec)')
+ spec_group.add_argument('--spec', dest='spec', type=str,
+ help='choose the family by SPEC file path')
+
parser.add_argument('--schema', dest='schema', type=str)
parser.add_argument('--no-schema', action='store_true')
parser.add_argument('--json', dest='json_text', type=str)
@@ -70,6 +96,12 @@ def main():
else:
pprint.PrettyPrinter().pprint(msg)
+ if args.list_families:
+ for filename in sorted(os.listdir(spec_dir())):
+ if filename.endswith('.yaml'):
+ print(filename.removesuffix('.yaml'))
+ return
+
if args.no_schema:
args.schema = ''
@@ -77,7 +109,16 @@ def main():
if args.json_text:
attrs = json.loads(args.json_text)
- ynl = YnlFamily(args.spec, args.schema, args.process_unknown,
+ if args.family:
+ spec = f"{spec_dir()}/{args.family}.yaml"
+ if args.schema is None and spec.startswith(sys_schema_dir):
+ args.schema = '' # disable schema validation when installed
+ else:
+ spec = args.spec
+ if not os.path.isfile(spec):
+ raise Exception(f"Spec file {spec} does not exist")
+
+ ynl = YnlFamily(spec, args.schema, args.process_unknown,
recv_size=args.dbg_small_recv)
if args.dbg_small_recv:
ynl.set_recv_dbg(True)
diff --git a/tools/net/ynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py
index ebb0a11f67bf..af7fddd7b085 100755
--- a/tools/net/ynl/ethtool.py
+++ b/tools/net/ynl/pyynl/ethtool.py
@@ -11,6 +11,7 @@ import os
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
from lib import YnlFamily
+from cli import schema_dir, spec_dir
def args_to_req(ynl, op_name, args, req):
"""
@@ -156,10 +157,8 @@ def main():
args = parser.parse_args()
script_abs_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
- spec = os.path.join(script_abs_dir,
- '../../../Documentation/netlink/specs/ethtool.yaml')
- schema = os.path.join(script_abs_dir,
- '../../../Documentation/netlink/genetlink-legacy.yaml')
+ spec = os.path.join(spec_dir(), 'ethtool.yaml')
+ schema = os.path.join(schema_dir(), 'genetlink-legacy.yaml')
ynl = YnlFamily(spec, schema)
diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py
index 9137b83e580a..9137b83e580a 100644
--- a/tools/net/ynl/lib/__init__.py
+++ b/tools/net/ynl/pyynl/lib/__init__.py
diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/pyynl/lib/nlspec.py
index a745739655ad..314ec8007496 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/pyynl/lib/nlspec.py
@@ -219,7 +219,10 @@ class SpecAttrSet(SpecElement):
else:
real_set = family.attr_sets[self.subset_of]
for elem in self.yaml['attributes']:
- attr = real_set[elem['name']]
+ real_attr = real_set[elem['name']]
+ combined_elem = real_attr.yaml | elem
+ attr = self.new_attr(combined_elem, real_attr.value)
+
self.attrs[attr.name] = attr
self.attrs_by_val[attr.value] = attr
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index eea29359a899..08f8bf89cfc2 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -733,41 +733,45 @@ class YnlFamily(SpecFamily):
self._rsp_add(rsp, attr_name, None, self._decode_unknown(attr))
continue
- if attr_spec["type"] == 'nest':
- subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs)
- decoded = subdict
- elif attr_spec["type"] == 'string':
- decoded = attr.as_strz()
- elif attr_spec["type"] == 'binary':
- decoded = self._decode_binary(attr, attr_spec)
- elif attr_spec["type"] == 'flag':
- decoded = True
- elif attr_spec.is_auto_scalar:
- decoded = attr.as_auto_scalar(attr_spec['type'], attr_spec.byte_order)
- elif attr_spec["type"] in NlAttr.type_formats:
- decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order)
- if 'enum' in attr_spec:
- decoded = self._decode_enum(decoded, attr_spec)
- elif attr_spec.display_hint:
- decoded = self._formatted_string(decoded, attr_spec.display_hint)
- elif attr_spec["type"] == 'indexed-array':
- decoded = self._decode_array_attr(attr, attr_spec)
- elif attr_spec["type"] == 'bitfield32':
- value, selector = struct.unpack("II", attr.raw)
- if 'enum' in attr_spec:
- value = self._decode_enum(value, attr_spec)
- selector = self._decode_enum(selector, attr_spec)
- decoded = {"value": value, "selector": selector}
- elif attr_spec["type"] == 'sub-message':
- decoded = self._decode_sub_msg(attr, attr_spec, search_attrs)
- elif attr_spec["type"] == 'nest-type-value':
- decoded = self._decode_nest_type_value(attr, attr_spec)
- else:
- if not self.process_unknown:
- raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
- decoded = self._decode_unknown(attr)
-
- self._rsp_add(rsp, attr_spec["name"], attr_spec.is_multi, decoded)
+ try:
+ if attr_spec["type"] == 'nest':
+ subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs)
+ decoded = subdict
+ elif attr_spec["type"] == 'string':
+ decoded = attr.as_strz()
+ elif attr_spec["type"] == 'binary':
+ decoded = self._decode_binary(attr, attr_spec)
+ elif attr_spec["type"] == 'flag':
+ decoded = True
+ elif attr_spec.is_auto_scalar:
+ decoded = attr.as_auto_scalar(attr_spec['type'], attr_spec.byte_order)
+ elif attr_spec["type"] in NlAttr.type_formats:
+ decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order)
+ if 'enum' in attr_spec:
+ decoded = self._decode_enum(decoded, attr_spec)
+ elif attr_spec.display_hint:
+ decoded = self._formatted_string(decoded, attr_spec.display_hint)
+ elif attr_spec["type"] == 'indexed-array':
+ decoded = self._decode_array_attr(attr, attr_spec)
+ elif attr_spec["type"] == 'bitfield32':
+ value, selector = struct.unpack("II", attr.raw)
+ if 'enum' in attr_spec:
+ value = self._decode_enum(value, attr_spec)
+ selector = self._decode_enum(selector, attr_spec)
+ decoded = {"value": value, "selector": selector}
+ elif attr_spec["type"] == 'sub-message':
+ decoded = self._decode_sub_msg(attr, attr_spec, search_attrs)
+ elif attr_spec["type"] == 'nest-type-value':
+ decoded = self._decode_nest_type_value(attr, attr_spec)
+ else:
+ if not self.process_unknown:
+ raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
+ decoded = self._decode_unknown(attr)
+
+ self._rsp_add(rsp, attr_spec["name"], attr_spec.is_multi, decoded)
+ except:
+ print(f"Error decoding '{attr_spec.name}' from '{space}'")
+ raise
return rsp
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index ec2288948795..d3a7dfbcf929 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -79,6 +79,20 @@ class Type(SpecAttr):
self.enum_name = None
delattr(self, "enum_name")
+ def _get_real_attr(self):
+ # if the attr is for a subset return the "real" attr (just one down, does not recurse)
+ return self.family.attr_sets[self.attr_set.subset_of][self.name]
+
+ def set_request(self):
+ self.request = True
+ if self.attr_set.subset_of:
+ self._get_real_attr().set_request()
+
+ def set_reply(self):
+ self.reply = True
+ if self.attr_set.subset_of:
+ self._get_real_attr().set_reply()
+
def get_limit(self, limit, default=None):
value = self.checks.get(limit, default)
if value is None:
@@ -106,6 +120,10 @@ class Type(SpecAttr):
enum_name = f"{self.attr_set.name_prefix}{self.name}"
self.enum_name = c_upper(enum_name)
+ if self.attr_set.subset_of:
+ if self.checks != self._get_real_attr().checks:
+ raise Exception("Overriding checks not supported by codegen, yet")
+
def is_multi_val(self):
return None
@@ -1119,17 +1137,17 @@ class Family(SpecFamily):
for _, struct in self.pure_nested_structs.items():
if struct.request:
for _, arg in struct.member_list():
- arg.request = True
+ arg.set_request()
if struct.reply:
for _, arg in struct.member_list():
- arg.reply = True
+ arg.set_reply()
for root_set, rs_members in self.root_sets.items():
for attr, spec in self.attr_sets[root_set].items():
if attr in rs_members['request']:
- spec.request = True
+ spec.set_request()
if attr in rs_members['reply']:
- spec.reply = True
+ spec.set_reply()
def _load_global_policy(self):
global_set = set()
@@ -1765,7 +1783,14 @@ def parse_rsp_nested(ri, struct):
f'{struct.ptr_name}dst = yarg->data;']
init_lines = []
- _multi_parse(ri, struct, init_lines, local_vars)
+ if struct.member_list():
+ _multi_parse(ri, struct, init_lines, local_vars)
+ else:
+ # Empty nest
+ ri.cw.block_start()
+ ri.cw.p('return 0;')
+ ri.cw.block_end()
+ ri.cw.nl()
def parse_rsp_msg(ri, deref=False):
@@ -2592,7 +2617,8 @@ def render_uapi(family, cw):
val = attr.value
val += 1
cw.p(attr.enum_name + suffix)
- cw.nl()
+ if attr_set.items():
+ cw.nl()
cw.p(attr_set.cnt_name + ('' if max_by_define else ','))
if not max_by_define:
cw.p(f"{attr_set.max_name} = {max_value}")
diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py
index 6c56d0d726b4..6c56d0d726b4 100755
--- a/tools/net/ynl/ynl-gen-rst.py
+++ b/tools/net/ynl/pyynl/ynl_gen_rst.py
diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh
index a37304dcc88e..81b4ecd89100 100755
--- a/tools/net/ynl/ynl-regen.sh
+++ b/tools/net/ynl/ynl-regen.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
-TOOL=$(dirname $(realpath $0))/ynl-gen-c.py
+TOOL=$(dirname $(realpath $0))/pyynl/ynl_gen_c.py
force=
search=
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h
index 2f36b7b6418d..625f5b046776 100644
--- a/tools/sched_ext/include/scx/common.bpf.h
+++ b/tools/sched_ext/include/scx/common.bpf.h
@@ -40,9 +40,9 @@ void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_fl
void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak;
u32 scx_bpf_dispatch_nr_slots(void) __ksym;
void scx_bpf_dispatch_cancel(void) __ksym;
-bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym;
-void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym;
-void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym;
+bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym __weak;
+void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak;
+void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak;
bool scx_bpf_dsq_move(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
bool scx_bpf_dsq_move_vtime(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak;
u32 scx_bpf_reenqueue_local(void) __ksym;
diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c
index 21deea320bd7..e938156ed0a0 100644
--- a/tools/sched_ext/scx_central.c
+++ b/tools/sched_ext/scx_central.c
@@ -97,7 +97,7 @@ restart:
SCX_BUG_ON(!cpuset, "Failed to allocate cpuset");
CPU_ZERO(cpuset);
CPU_SET(skel->rodata->central_cpu, cpuset);
- SCX_BUG_ON(sched_setaffinity(0, sizeof(cpuset), cpuset),
+ SCX_BUG_ON(sched_setaffinity(0, sizeof(*cpuset), cpuset),
"Failed to affinitize to central CPU %d (max %d)",
skel->rodata->central_cpu, skel->rodata->nr_cpu_ids - 1);
CPU_FREE(cpuset);
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 7eeb3cbe18c7..0a016cd71cba 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -129,7 +129,6 @@ TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
TEST_PROGS := test_kmod.sh \
test_xdp_redirect.sh \
test_xdp_redirect_multi.sh \
- test_xdp_meta.sh \
test_tunnel.sh \
test_lwt_seg6local.sh \
test_lirc_mode2.sh \
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
index 151a4210028f..2461d183dee5 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
@@ -14,10 +14,16 @@
#include "netlink_helpers.h"
#include "tc_helpers.h"
+#define NETKIT_HEADROOM 32
+#define NETKIT_TAILROOM 8
+
#define MARK 42
#define PRIO 0xeb9f
#define ICMP_ECHO 8
+#define FLAG_ADJUST_ROOM (1 << 0)
+#define FLAG_SAME_NETNS (1 << 1)
+
struct icmphdr {
__u8 type;
__u8 code;
@@ -35,7 +41,7 @@ struct iplink_req {
};
static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
- bool same_netns, int scrub, int peer_scrub)
+ int scrub, int peer_scrub, __u32 flags)
{
struct rtnl_handle rth = { .fd = -1 };
struct iplink_req req = {};
@@ -63,6 +69,10 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub);
addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub);
addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+ if (flags & FLAG_ADJUST_ROOM) {
+ addattr16(&req.n, sizeof(req), IFLA_NETKIT_HEADROOM, NETKIT_HEADROOM);
+ addattr16(&req.n, sizeof(req), IFLA_NETKIT_TAILROOM, NETKIT_TAILROOM);
+ }
addattr_nest_end(&req.n, data);
addattr_nest_end(&req.n, linkinfo);
@@ -87,7 +97,7 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
" addr ee:ff:bb:cc:aa:dd"),
"set hwaddress");
}
- if (same_netns) {
+ if (flags & FLAG_SAME_NETNS) {
ASSERT_OK(system("ip link set dev " netkit_peer " up"),
"up peer");
ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"),
@@ -184,8 +194,8 @@ void serial_test_tc_netkit_basic(void)
int err, ifindex;
err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
- &ifindex, false, NETKIT_SCRUB_DEFAULT,
- NETKIT_SCRUB_DEFAULT);
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, 0);
if (err)
return;
@@ -299,8 +309,8 @@ static void serial_test_tc_netkit_multi_links_target(int mode, int target)
int err, ifindex;
err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
- &ifindex, false, NETKIT_SCRUB_DEFAULT,
- NETKIT_SCRUB_DEFAULT);
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, 0);
if (err)
return;
@@ -428,8 +438,8 @@ static void serial_test_tc_netkit_multi_opts_target(int mode, int target)
int err, ifindex;
err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
- &ifindex, false, NETKIT_SCRUB_DEFAULT,
- NETKIT_SCRUB_DEFAULT);
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, 0);
if (err)
return;
@@ -543,8 +553,8 @@ void serial_test_tc_netkit_device(void)
int err, ifindex, ifindex2;
err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS,
- &ifindex, true, NETKIT_SCRUB_DEFAULT,
- NETKIT_SCRUB_DEFAULT);
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS);
if (err)
return;
@@ -655,8 +665,8 @@ static void serial_test_tc_netkit_neigh_links_target(int mode, int target)
int err, ifindex;
err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
- &ifindex, false, NETKIT_SCRUB_DEFAULT,
- NETKIT_SCRUB_DEFAULT);
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, 0);
if (err)
return;
@@ -733,8 +743,8 @@ static void serial_test_tc_netkit_pkt_type_mode(int mode)
struct bpf_link *link;
err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
- &ifindex, true, NETKIT_SCRUB_DEFAULT,
- NETKIT_SCRUB_DEFAULT);
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS);
if (err)
return;
@@ -799,7 +809,7 @@ void serial_test_tc_netkit_pkt_type(void)
serial_test_tc_netkit_pkt_type_mode(NETKIT_L3);
}
-static void serial_test_tc_netkit_scrub_type(int scrub)
+static void serial_test_tc_netkit_scrub_type(int scrub, bool room)
{
LIBBPF_OPTS(bpf_netkit_opts, optl);
struct test_tc_link *skel;
@@ -807,7 +817,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub)
int err, ifindex;
err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
- &ifindex, false, scrub, scrub);
+ &ifindex, scrub, scrub,
+ room ? FLAG_ADJUST_ROOM : 0);
if (err)
return;
@@ -842,6 +853,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub)
ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8");
ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark");
ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio");
+ ASSERT_EQ(skel->bss->headroom, room ? NETKIT_HEADROOM : 0, "headroom");
+ ASSERT_EQ(skel->bss->tailroom, room ? NETKIT_TAILROOM : 0, "tailroom");
cleanup:
test_tc_link__destroy(skel);
@@ -852,6 +865,6 @@ cleanup:
void serial_test_tc_netkit_scrub(void)
{
- serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT);
- serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE);
+ serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT, false);
+ serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE, true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index e6a783c7f5db..937da9b7532a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -2,6 +2,14 @@
#include <test_progs.h>
#include <network_helpers.h>
#include "test_xdp_context_test_run.skel.h"
+#include "test_xdp_meta.skel.h"
+
+#define TX_ADDR "10.0.0.1"
+#define RX_ADDR "10.0.0.2"
+#define RX_NAME "veth0"
+#define TX_NAME "veth1"
+#define TX_NETNS "xdp_context_tx"
+#define RX_NETNS "xdp_context_rx"
void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
__u32 data_meta, __u32 data, __u32 data_end,
@@ -103,3 +111,82 @@ void test_xdp_context_test_run(void)
test_xdp_context_test_run__destroy(skel);
}
+
+void test_xdp_context_functional(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ struct netns_obj *rx_ns = NULL, *tx_ns = NULL;
+ struct bpf_program *tc_prog, *xdp_prog;
+ struct test_xdp_meta *skel = NULL;
+ struct nstoken *nstoken = NULL;
+ int rx_ifindex;
+ int ret;
+
+ tx_ns = netns_new(TX_NETNS, false);
+ if (!ASSERT_OK_PTR(tx_ns, "create tx_ns"))
+ return;
+
+ rx_ns = netns_new(RX_NETNS, false);
+ if (!ASSERT_OK_PTR(rx_ns, "create rx_ns"))
+ goto close;
+
+ SYS(close, "ip link add " RX_NAME " netns " RX_NETNS
+ " type veth peer name " TX_NAME " netns " TX_NETNS);
+
+ nstoken = open_netns(RX_NETNS);
+ if (!ASSERT_OK_PTR(nstoken, "setns rx_ns"))
+ goto close;
+
+ SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME);
+ SYS(close, "ip link set dev " RX_NAME " up");
+
+ skel = test_xdp_meta__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+ goto close;
+
+ rx_ifindex = if_nametoindex(RX_NAME);
+ if (!ASSERT_GE(rx_ifindex, 0, "if_nametoindex rx"))
+ goto close;
+
+ tc_hook.ifindex = rx_ifindex;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto close;
+
+ tc_prog = bpf_object__find_program_by_name(skel->obj, "ing_cls");
+ if (!ASSERT_OK_PTR(tc_prog, "open ing_cls prog"))
+ goto close;
+
+ tc_opts.prog_fd = bpf_program__fd(tc_prog);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+
+ xdp_prog = bpf_object__find_program_by_name(skel->obj, "ing_xdp");
+ if (!ASSERT_OK_PTR(xdp_prog, "open ing_xdp prog"))
+ goto close;
+
+ ret = bpf_xdp_attach(rx_ifindex,
+ bpf_program__fd(xdp_prog),
+ 0, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto close;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(TX_NETNS);
+ if (!ASSERT_OK_PTR(nstoken, "setns tx_ns"))
+ goto close;
+
+ SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME);
+ SYS(close, "ip link set dev " TX_NAME " up");
+ ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping");
+
+close:
+ close_netns(nstoken);
+ test_xdp_meta__destroy(skel);
+ netns_free(rx_ns);
+ netns_free(tx_ns);
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c
index 10d825928499..630f12e51b07 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_link.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_link.c
@@ -8,6 +8,7 @@
#include <linux/if_packet.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
char LICENSE[] SEC("license") = "GPL";
@@ -27,6 +28,7 @@ bool seen_host;
bool seen_mcast;
int mark, prio;
+unsigned short headroom, tailroom;
SEC("tc/ingress")
int tc1(struct __sk_buff *skb)
@@ -104,11 +106,24 @@ out:
return TCX_PASS;
}
+struct sk_buff {
+ struct net_device *dev;
+};
+
+struct net_device {
+ unsigned short needed_headroom;
+ unsigned short needed_tailroom;
+};
+
SEC("tc/egress")
int tc8(struct __sk_buff *skb)
{
+ struct net_device *dev = BPF_CORE_READ((struct sk_buff *)skb, dev);
+
seen_tc8 = true;
mark = skb->mark;
prio = skb->priority;
+ headroom = BPF_CORE_READ(dev, needed_headroom);
+ tailroom = BPF_CORE_READ(dev, needed_tailroom);
return TCX_PASS;
}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index a7c4a7d49fe6..fe2d71ae0e71 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -8,7 +8,7 @@
#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
-SEC("t")
+SEC("tc")
int ing_cls(struct __sk_buff *ctx)
{
__u8 *data, *data_meta, *data_end;
@@ -28,7 +28,7 @@ int ing_cls(struct __sk_buff *ctx)
return diff ? TC_ACT_SHOT : TC_ACT_OK;
}
-SEC("x")
+SEC("xdp")
int ing_xdp(struct xdp_md *ctx)
{
__u8 *data, *data_meta, *data_end;
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
deleted file mode 100755
index 2740322c1878..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_meta.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/sh
-
-BPF_FILE="test_xdp_meta.bpf.o"
-# Kselftest framework requirement - SKIP code is 4.
-readonly KSFT_SKIP=4
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-
-cleanup()
-{
- if [ "$?" = "0" ]; then
- echo "selftests: test_xdp_meta [PASS]";
- else
- echo "selftests: test_xdp_meta [FAILED]";
- fi
-
- set +e
- ip link del veth1 2> /dev/null
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
-}
-
-ip link set dev lo xdp off 2>/dev/null > /dev/null
-if [ $? -ne 0 ];then
- echo "selftests: [SKIP] Could not run test without the ip xdp support"
- exit $KSFT_SKIP
-fi
-set -e
-
-ip netns add ${NS1}
-ip netns add ${NS2}
-
-trap cleanup 0 2 3 6 9
-
-ip link add veth1 type veth peer name veth2
-
-ip link set veth1 netns ${NS1}
-ip link set veth2 netns ${NS2}
-
-ip netns exec ${NS1} ip addr add 10.1.1.11/24 dev veth1
-ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2
-
-ip netns exec ${NS1} tc qdisc add dev veth1 clsact
-ip netns exec ${NS2} tc qdisc add dev veth2 clsact
-
-ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec t
-ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec t
-
-ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec x
-ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec x
-
-ip netns exec ${NS1} ip link set dev veth1 up
-ip netns exec ${NS2} ip link set dev veth2 up
-
-ip netns exec ${NS1} ping -c 1 10.1.1.22
-ip netns exec ${NS2} ping -c 1 10.1.1.11
-
-exit 0
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 6f9956eed797..e38675d9b118 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -79,7 +79,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
- .flags = XSK_UMEM__DEFAULT_FLAGS,
+ .flags = XDP_UMEM_TX_METADATA_LEN,
.tx_metadata_len = sizeof(struct xsk_tx_metadata),
};
__u32 idx = 0;
@@ -551,6 +551,7 @@ static void hwtstamp_enable(const char *ifname)
{
struct hwtstamp_config cfg = {
.rx_filter = HWTSTAMP_FILTER_ALL,
+ .tx_type = HWTSTAMP_TX_ON,
};
hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 0fec8f9801ad..469179c18935 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -1,11 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
TEST_INCLUDES := $(wildcard lib/py/*.py) \
+ $(wildcard lib/sh/*.sh) \
../../net/net_helper.sh \
../../net/lib.sh \
TEST_PROGS := \
netcons_basic.sh \
+ netcons_overflow.sh \
ping.py \
queues.py \
stats.py \
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 03a089165d3f..2b10854e4b1e 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -10,7 +10,7 @@ TEST_PROGS := \
mode-2-recovery-updelay.sh \
bond_options.sh \
bond-eth-type-change.sh \
- bond_macvlan.sh
+ bond_macvlan_ipvlan.sh
TEST_FILES := \
lag_lib.sh \
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
deleted file mode 100755
index b609fb6231f4..000000000000
--- a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test macvlan over balance-alb
-
-lib_dir=$(dirname "$0")
-source ${lib_dir}/bond_topo_2d1c.sh
-
-m1_ns="m1-$(mktemp -u XXXXXX)"
-m2_ns="m1-$(mktemp -u XXXXXX)"
-m1_ip4="192.0.2.11"
-m1_ip6="2001:db8::11"
-m2_ip4="192.0.2.12"
-m2_ip6="2001:db8::12"
-
-cleanup()
-{
- ip -n ${m1_ns} link del macv0
- ip netns del ${m1_ns}
- ip -n ${m2_ns} link del macv0
- ip netns del ${m2_ns}
-
- client_destroy
- server_destroy
- gateway_destroy
-}
-
-check_connection()
-{
- local ns=${1}
- local target=${2}
- local message=${3:-"macvlan_over_bond"}
- RET=0
-
-
- ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
- check_err $? "ping failed"
- log_test "$mode: $message"
-}
-
-macvlan_over_bond()
-{
- local param="$1"
- RET=0
-
- # setup new bond mode
- bond_reset "${param}"
-
- ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
- ip -n ${s_ns} link set macv0 netns ${m1_ns}
- ip -n ${m1_ns} link set dev macv0 up
- ip -n ${m1_ns} addr add ${m1_ip4}/24 dev macv0
- ip -n ${m1_ns} addr add ${m1_ip6}/24 dev macv0
-
- ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
- ip -n ${s_ns} link set macv0 netns ${m2_ns}
- ip -n ${m2_ns} link set dev macv0 up
- ip -n ${m2_ns} addr add ${m2_ip4}/24 dev macv0
- ip -n ${m2_ns} addr add ${m2_ip6}/24 dev macv0
-
- sleep 2
-
- check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server"
- check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server"
- check_connection "${c_ns}" "${m1_ip4}" "IPv4: client->macvlan_1"
- check_connection "${c_ns}" "${m1_ip6}" "IPv6: client->macvlan_1"
- check_connection "${c_ns}" "${m2_ip4}" "IPv4: client->macvlan_2"
- check_connection "${c_ns}" "${m2_ip6}" "IPv6: client->macvlan_2"
- check_connection "${m1_ns}" "${m2_ip4}" "IPv4: macvlan_1->macvlan_2"
- check_connection "${m1_ns}" "${m2_ip6}" "IPv6: macvlan_1->macvlan_2"
-
-
- sleep 5
-
- check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client"
- check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client"
- check_connection "${m1_ns}" "${c_ip4}" "IPv4: macvlan_1->client"
- check_connection "${m1_ns}" "${c_ip6}" "IPv6: macvlan_1->client"
- check_connection "${m2_ns}" "${c_ip4}" "IPv4: macvlan_2->client"
- check_connection "${m2_ns}" "${c_ip6}" "IPv6: macvlan_2->client"
- check_connection "${m2_ns}" "${m1_ip4}" "IPv4: macvlan_2->macvlan_2"
- check_connection "${m2_ns}" "${m1_ip6}" "IPv6: macvlan_2->macvlan_2"
-
- ip -n ${c_ns} neigh flush dev eth0
-}
-
-trap cleanup EXIT
-
-setup_prepare
-ip netns add ${m1_ns}
-ip netns add ${m2_ns}
-
-modes="active-backup balance-tlb balance-alb"
-
-for mode in $modes; do
- macvlan_over_bond "mode $mode"
-done
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
new file mode 100755
index 000000000000..c4711272fe45
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test macvlan/ipvlan over bond
+
+lib_dir=$(dirname "$0")
+source ${lib_dir}/bond_topo_2d1c.sh
+
+xvlan1_ns="xvlan1-$(mktemp -u XXXXXX)"
+xvlan2_ns="xvlan2-$(mktemp -u XXXXXX)"
+xvlan1_ip4="192.0.2.11"
+xvlan1_ip6="2001:db8::11"
+xvlan2_ip4="192.0.2.12"
+xvlan2_ip6="2001:db8::12"
+
+cleanup()
+{
+ client_destroy
+ server_destroy
+ gateway_destroy
+
+ ip netns del ${xvlan1_ns}
+ ip netns del ${xvlan2_ns}
+}
+
+check_connection()
+{
+ local ns=${1}
+ local target=${2}
+ local message=${3}
+ RET=0
+
+ ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
+ check_err $? "ping failed"
+ log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}"
+}
+
+xvlan_over_bond()
+{
+ local param="$1"
+ local xvlan_type="$2"
+ local xvlan_mode="$3"
+ RET=0
+
+ # setup new bond mode
+ bond_reset "${param}"
+
+ ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode}
+ ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan1_ns}
+ ip -n ${xvlan1_ns} link set dev ${xvlan_type}0 up
+ ip -n ${xvlan1_ns} addr add ${xvlan1_ip4}/24 dev ${xvlan_type}0
+ ip -n ${xvlan1_ns} addr add ${xvlan1_ip6}/24 dev ${xvlan_type}0
+
+ ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode}
+ ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan2_ns}
+ ip -n ${xvlan2_ns} link set dev ${xvlan_type}0 up
+ ip -n ${xvlan2_ns} addr add ${xvlan2_ip4}/24 dev ${xvlan_type}0
+ ip -n ${xvlan2_ns} addr add ${xvlan2_ip6}/24 dev ${xvlan_type}0
+
+ sleep 2
+
+ check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server"
+ check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server"
+ check_connection "${c_ns}" "${xvlan1_ip4}" "IPv4: client->${xvlan_type}_1"
+ check_connection "${c_ns}" "${xvlan1_ip6}" "IPv6: client->${xvlan_type}_1"
+ check_connection "${c_ns}" "${xvlan2_ip4}" "IPv4: client->${xvlan_type}_2"
+ check_connection "${c_ns}" "${xvlan2_ip6}" "IPv6: client->${xvlan_type}_2"
+ check_connection "${xvlan1_ns}" "${xvlan2_ip4}" "IPv4: ${xvlan_type}_1->${xvlan_type}_2"
+ check_connection "${xvlan1_ns}" "${xvlan2_ip6}" "IPv6: ${xvlan_type}_1->${xvlan_type}_2"
+
+ check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client"
+ check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client"
+ check_connection "${xvlan1_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_1->client"
+ check_connection "${xvlan1_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_1->client"
+ check_connection "${xvlan2_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_2->client"
+ check_connection "${xvlan2_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_2->client"
+ check_connection "${xvlan2_ns}" "${xvlan1_ip4}" "IPv4: ${xvlan_type}_2->${xvlan_type}_1"
+ check_connection "${xvlan2_ns}" "${xvlan1_ip6}" "IPv6: ${xvlan_type}_2->${xvlan_type}_1"
+
+ ip -n ${c_ns} neigh flush dev eth0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+ip netns add ${xvlan1_ns}
+ip netns add ${xvlan2_ns}
+
+bond_modes="active-backup balance-tlb balance-alb"
+
+for bond_mode in ${bond_modes}; do
+ xvlan_over_bond "mode ${bond_mode}" macvlan bridge
+ xvlan_over_bond "mode ${bond_mode}" ipvlan l2
+done
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
index 899d7fb6ea8e..dad4e5fda4db 100644
--- a/tools/testing/selftests/drivers/net/bonding/config
+++ b/tools/testing/selftests/drivers/net/bonding/config
@@ -3,6 +3,7 @@ CONFIG_BRIDGE=y
CONFIG_DUMMY=y
CONFIG_IPV6=y
CONFIG_MACVLAN=y
+CONFIG_IPVLAN=y
CONFIG_NET_ACT_GACT=y
CONFIG_NET_CLS_FLOWER=y
CONFIG_NET_SCH_INGRESS=y
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index fea343f209ea..987e452d3a45 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -48,6 +48,7 @@ class NetDrvEnv:
else:
self._ns = NetdevSimDev(**kwargs)
self.dev = self._ns.nsims[0].dev
+ self.ifname = self.dev['ifname']
self.ifindex = self.dev['ifindex']
def __enter__(self):
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
new file mode 100644
index 000000000000..3acaba41ac7b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -0,0 +1,225 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This file contains functions and helpers to support the netconsole
+# selftests
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+SRCIF="" # to be populated later
+SRCIP=192.0.2.1
+DSTIF="" # to be populated later
+DSTIP=192.0.2.2
+
+PORT="6666"
+MSG="netconsole selftest"
+USERDATA_KEY="key"
+USERDATA_VALUE="value"
+TARGET=$(mktemp -u netcons_XXXXX)
+DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk)
+NETCONS_CONFIGFS="/sys/kernel/config/netconsole"
+NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
+# NAMESPACE will be populated by setup_ns with a random value
+NAMESPACE=""
+
+# IDs for netdevsim
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
+
+# Used to create and delete namespaces
+source "${LIBDIR}"/../../../../net/lib.sh
+source "${LIBDIR}"/../../../../net/net_helper.sh
+
+# Create netdevsim interfaces
+create_ifaces() {
+
+ echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
+ echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
+ udevadm settle 2> /dev/null || true
+
+ local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID"
+ local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID"
+
+ # These are global variables
+ SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \
+ -path "$NSIM1"/net -exec basename {} \;)
+ DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \
+ -path "$NSIM2"/net -exec basename {} \;)
+}
+
+link_ifaces() {
+ local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
+ local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex)
+ local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex)
+
+ exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}"
+ exec {INITNS_FD}</proc/self/ns/net
+
+ # Bind the dst interface to namespace
+ ip link set "${DSTIF}" netns "${NAMESPACE}"
+
+ # Linking one device to the other one (on the other namespace}
+ if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK
+ then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup
+ exit "${ksft_skip}"
+ fi
+}
+
+function configure_ip() {
+ # Configure the IPs for both interfaces
+ ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}"
+ ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up
+
+ ip addr add "${SRCIP}"/24 dev "${SRCIF}"
+ ip link set "${SRCIF}" up
+}
+
+function set_network() {
+ # setup_ns function is coming from lib.sh
+ setup_ns NAMESPACE
+
+ # Create both interfaces, and assign the destination to a different
+ # namespace
+ create_ifaces
+
+ # Link both interfaces back to back
+ link_ifaces
+
+ configure_ip
+}
+
+function create_dynamic_target() {
+ DSTMAC=$(ip netns exec "${NAMESPACE}" \
+ ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+
+ # Create a dynamic target
+ mkdir "${NETCONS_PATH}"
+
+ echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip
+ echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip
+ echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
+ echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
+
+ echo 1 > "${NETCONS_PATH}"/enabled
+}
+
+function cleanup() {
+ local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
+
+ # delete netconsole dynamic reconfiguration
+ echo 0 > "${NETCONS_PATH}"/enabled
+ # Remove all the keys that got created during the selftest
+ find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
+ # Remove the configfs entry
+ rmdir "${NETCONS_PATH}"
+
+ # Delete netdevsim devices
+ echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
+ echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
+
+ # this is coming from lib.sh
+ cleanup_all_ns
+
+ # Restoring printk configurations
+ echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
+}
+
+function set_user_data() {
+ if [[ ! -d "${NETCONS_PATH}""/userdata" ]]
+ then
+ echo "Userdata path not available in ${NETCONS_PATH}/userdata"
+ exit "${ksft_skip}"
+ fi
+
+ KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}"
+ mkdir -p "${KEY_PATH}"
+ VALUE_PATH="${KEY_PATH}""/value"
+ echo "${USERDATA_VALUE}" > "${VALUE_PATH}"
+}
+
+function listen_port_and_save_to() {
+ local OUTPUT=${1}
+ # Just wait for 2 seconds
+ timeout 2 ip netns exec "${NAMESPACE}" \
+ socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
+}
+
+function validate_result() {
+ local TMPFILENAME="$1"
+
+ # TMPFILENAME will contain something like:
+ # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+ # key=value
+
+ # Check if the file exists
+ if [ ! -f "$TMPFILENAME" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${MSG}" "${TMPFILENAME}"; then
+ echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2
+ cat "${TMPFILENAME}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
+ echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
+ cat "${TMPFILENAME}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # Delete the file once it is validated, otherwise keep it
+ # for debugging purposes
+ rm "${TMPFILENAME}"
+ exit "${ksft_pass}"
+}
+
+function check_for_dependencies() {
+ if [ "$(id -u)" -ne 0 ]; then
+ echo "This test must be run as root" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which socat > /dev/null ; then
+ echo "SKIP: socat(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which ip > /dev/null ; then
+ echo "SKIP: ip(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which udevadm > /dev/null ; then
+ echo "SKIP: udevadm(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then
+ echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if [ ! -d "${NETCONS_CONFIGFS}" ]; then
+ echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ip link show "${DSTIF}" 2> /dev/null; then
+ echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
+ echo "SKIP: IPs already in use. Skipping it" >&2
+ exit "${ksft_skip}"
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
index b175f4d966e5..fe765da498e8 100755
--- a/tools/testing/selftests/drivers/net/netcons_basic.sh
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -18,224 +18,8 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-# Simple script to test dynamic targets in netconsole
-SRCIF="" # to be populated later
-SRCIP=192.0.2.1
-DSTIF="" # to be populated later
-DSTIP=192.0.2.2
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
-PORT="6666"
-MSG="netconsole selftest"
-USERDATA_KEY="key"
-USERDATA_VALUE="value"
-TARGET=$(mktemp -u netcons_XXXXX)
-DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk)
-NETCONS_CONFIGFS="/sys/kernel/config/netconsole"
-NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
-KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}"
-# NAMESPACE will be populated by setup_ns with a random value
-NAMESPACE=""
-
-# IDs for netdevsim
-NSIM_DEV_1_ID=$((256 + RANDOM % 256))
-NSIM_DEV_2_ID=$((512 + RANDOM % 256))
-NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
-
-# Used to create and delete namespaces
-source "${SCRIPTDIR}"/../../net/lib.sh
-source "${SCRIPTDIR}"/../../net/net_helper.sh
-
-# Create netdevsim interfaces
-create_ifaces() {
-
- echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
- echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
- udevadm settle 2> /dev/null || true
-
- local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID"
- local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID"
-
- # These are global variables
- SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \
- -path "$NSIM1"/net -exec basename {} \;)
- DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \
- -path "$NSIM2"/net -exec basename {} \;)
-}
-
-link_ifaces() {
- local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
- local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex)
- local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex)
-
- exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}"
- exec {INITNS_FD}</proc/self/ns/net
-
- # Bind the dst interface to namespace
- ip link set "${DSTIF}" netns "${NAMESPACE}"
-
- # Linking one device to the other one (on the other namespace}
- if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK
- then
- echo "linking netdevsim1 with netdevsim2 should succeed"
- cleanup
- exit "${ksft_skip}"
- fi
-}
-
-function configure_ip() {
- # Configure the IPs for both interfaces
- ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}"
- ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up
-
- ip addr add "${SRCIP}"/24 dev "${SRCIF}"
- ip link set "${SRCIF}" up
-}
-
-function set_network() {
- # setup_ns function is coming from lib.sh
- setup_ns NAMESPACE
-
- # Create both interfaces, and assign the destination to a different
- # namespace
- create_ifaces
-
- # Link both interfaces back to back
- link_ifaces
-
- configure_ip
-}
-
-function create_dynamic_target() {
- DSTMAC=$(ip netns exec "${NAMESPACE}" \
- ip link show "${DSTIF}" | awk '/ether/ {print $2}')
-
- # Create a dynamic target
- mkdir "${NETCONS_PATH}"
-
- echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip
- echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip
- echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
- echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
-
- echo 1 > "${NETCONS_PATH}"/enabled
-}
-
-function cleanup() {
- local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
-
- # delete netconsole dynamic reconfiguration
- echo 0 > "${NETCONS_PATH}"/enabled
- # Remove key
- rmdir "${KEY_PATH}"
- # Remove the configfs entry
- rmdir "${NETCONS_PATH}"
-
- # Delete netdevsim devices
- echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
- echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
-
- # this is coming from lib.sh
- cleanup_all_ns
-
- # Restoring printk configurations
- echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
-}
-
-function set_user_data() {
- if [[ ! -d "${NETCONS_PATH}""/userdata" ]]
- then
- echo "Userdata path not available in ${NETCONS_PATH}/userdata"
- exit "${ksft_skip}"
- fi
-
- mkdir -p "${KEY_PATH}"
- VALUE_PATH="${KEY_PATH}""/value"
- echo "${USERDATA_VALUE}" > "${VALUE_PATH}"
-}
-
-function listen_port_and_save_to() {
- local OUTPUT=${1}
- # Just wait for 2 seconds
- timeout 2 ip netns exec "${NAMESPACE}" \
- socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
-}
-
-function validate_result() {
- local TMPFILENAME="$1"
-
- # TMPFILENAME will contain something like:
- # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
- # key=value
-
- # Check if the file exists
- if [ ! -f "$TMPFILENAME" ]; then
- echo "FAIL: File was not generated." >&2
- exit "${ksft_fail}"
- fi
-
- if ! grep -q "${MSG}" "${TMPFILENAME}"; then
- echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2
- cat "${TMPFILENAME}" >&2
- exit "${ksft_fail}"
- fi
-
- if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
- echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
- cat "${TMPFILENAME}" >&2
- exit "${ksft_fail}"
- fi
-
- # Delete the file once it is validated, otherwise keep it
- # for debugging purposes
- rm "${TMPFILENAME}"
- exit "${ksft_pass}"
-}
-
-function check_for_dependencies() {
- if [ "$(id -u)" -ne 0 ]; then
- echo "This test must be run as root" >&2
- exit "${ksft_skip}"
- fi
-
- if ! which socat > /dev/null ; then
- echo "SKIP: socat(1) is not available" >&2
- exit "${ksft_skip}"
- fi
-
- if ! which ip > /dev/null ; then
- echo "SKIP: ip(1) is not available" >&2
- exit "${ksft_skip}"
- fi
-
- if ! which udevadm > /dev/null ; then
- echo "SKIP: udevadm(1) is not available" >&2
- exit "${ksft_skip}"
- fi
-
- if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then
- echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2
- exit "${ksft_skip}"
- fi
-
- if [ ! -d "${NETCONS_CONFIGFS}" ]; then
- echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2
- exit "${ksft_skip}"
- fi
-
- if ip link show "${DSTIF}" 2> /dev/null; then
- echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2
- exit "${ksft_skip}"
- fi
-
- if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
- echo "SKIP: IPs already in use. Skipping it" >&2
- exit "${ksft_skip}"
- fi
-}
-
-# ========== #
-# Start here #
-# ========== #
modprobe netdevsim 2> /dev/null || true
modprobe netconsole 2> /dev/null || true
diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netcons_overflow.sh
new file mode 100755
index 000000000000..29bad56448a2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_overflow.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test verifies that users can successfully create up to
+# MAX_USERDATA_ITEMS userdata entries without encountering any failures.
+#
+# Additionally, it tests for expected failure when attempting to exceed this
+# maximum limit.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+# This is coming from netconsole code. Check for it in drivers/net/netconsole.c
+MAX_USERDATA_ITEMS=16
+
+# Function to create userdata entries
+function create_userdata_max_entries() {
+ # All these keys should be created without any error
+ for i in $(seq $MAX_USERDATA_ITEMS)
+ do
+ # USERDATA_KEY is used by set_user_data
+ USERDATA_KEY="key"${i}
+ set_user_data
+ done
+}
+
+# Function to verify the entry limit
+function verify_entry_limit() {
+ # Allowing the test to fail without exiting, since the next command
+ # will fail
+ set +e
+ mkdir "${NETCONS_PATH}/userdata/key_that_will_fail" 2> /dev/null
+ ret="$?"
+ set -e
+ if [ "$ret" -eq 0 ];
+ then
+ echo "Adding more than ${MAX_USERDATA_ITEMS} entries in userdata should fail, but it didn't" >&2
+ ls "${NETCONS_PATH}/userdata/" >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+# ========== #
+# Start here #
+# ========== #
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# populate the maximum number of supported keys in userdata
+create_userdata_max_entries
+# Verify an additional entry is not allowed
+verify_entry_limit
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index 031ac9def6c0..efcc1e10575b 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -2,12 +2,15 @@
# SPDX-License-Identifier: GPL-2.0
import errno
+import subprocess
+import time
from lib.py import ksft_run, ksft_exit, ksft_pr
-from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx
+from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
+from lib.py import KsftSkipEx, KsftXfailEx
from lib.py import ksft_disruptive
from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
from lib.py import NetDrvEnv
-from lib.py import ip, defer
+from lib.py import cmd, ip, defer
ethnl = EthtoolFamily()
netfam = NetdevFamily()
@@ -174,10 +177,95 @@ def check_down(cfg) -> None:
netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)
+def __run_inf_loop(body):
+ body = body.strip()
+ if body[-1] != ';':
+ body += ';'
+
+ return subprocess.Popen(f"while true; do {body} done", shell=True,
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+
+def __stats_increase_sanely(old, new) -> None:
+ for k in old.keys():
+ ksft_ge(new[k], old[k])
+ ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error")
+
+
+def procfs_hammer(cfg) -> None:
+ """
+ Reading stats via procfs only holds the RCU lock, which is not an exclusive
+ lock, make sure drivers can handle parallel reads of stats.
+ """
+ one = __run_inf_loop("cat /proc/net/dev")
+ defer(one.kill)
+ two = __run_inf_loop("cat /proc/net/dev")
+ defer(two.kill)
+
+ time.sleep(1)
+ # Make sure the processes are running
+ ksft_is(one.poll(), None)
+ ksft_is(two.poll(), None)
+
+ rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ time.sleep(2)
+ rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ __stats_increase_sanely(rtstat1, rtstat2)
+ # defers will kill the loops
+
+
+@ksft_disruptive
+def procfs_downup_hammer(cfg) -> None:
+ """
+ Reading stats via procfs only holds the RCU lock, drivers often try
+ to sleep when reading the stats, or don't protect against races.
+ """
+ # Max out the queues, we'll flip between max and 1
+ channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if channels['combined-count'] == 0:
+ rx_type = 'rx'
+ else:
+ rx_type = 'combined'
+ cur_queue_cnt = channels[f'{rx_type}-count']
+ max_queue_cnt = channels[f'{rx_type}-max']
+
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
+ defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
+
+ # Real test stats
+ stats = __run_inf_loop("cat /proc/net/dev")
+ defer(stats.kill)
+
+ ipset = f"ip link set dev {cfg.ifname}"
+ defer(ip, f"link set dev {cfg.ifname} up")
+ # The "echo -n 1" lets us count iterations below
+ updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \
+ f"ethtool -L {cfg.ifname} {rx_type} 1; " + \
+ f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \
+ "echo -n 1"
+ updown = __run_inf_loop(updown)
+ kill_updown = defer(updown.kill)
+
+ time.sleep(1)
+ # Make sure the processes are running
+ ksft_is(stats.poll(), None)
+ ksft_is(updown.poll(), None)
+
+ rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ # We're looking for crashes, give it extra time
+ time.sleep(9)
+ rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ __stats_increase_sanely(rtstat1, rtstat2)
+
+ kill_updown.exec()
+ stdout, _ = updown.communicate(timeout=5)
+ ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8')))
+
+
def main() -> None:
with NetDrvEnv(__file__, queue_count=100) as cfg:
ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
- check_down],
+ check_down, procfs_hammer, procfs_downup_hammer],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/kselftest/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh
index 79a125eb24c2..05a461890671 100644
--- a/tools/testing/selftests/kselftest/ktap_helpers.sh
+++ b/tools/testing/selftests/kselftest/ktap_helpers.sh
@@ -7,6 +7,7 @@
KTAP_TESTNO=1
KTAP_CNT_PASS=0
KTAP_CNT_FAIL=0
+KTAP_CNT_XFAIL=0
KTAP_CNT_SKIP=0
KSFT_PASS=0
@@ -69,6 +70,16 @@ ktap_test_skip() {
KTAP_CNT_SKIP=$((KTAP_CNT_SKIP+1))
}
+ktap_test_xfail() {
+ description="$1"
+
+ result="ok"
+ directive="XFAIL"
+ __ktap_test "$result" "$description" "$directive"
+
+ KTAP_CNT_XFAIL=$((KTAP_CNT_XFAIL+1))
+}
+
ktap_test_fail() {
description="$1"
@@ -99,7 +110,7 @@ ktap_exit_fail_msg() {
ktap_finished() {
ktap_print_totals
- if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP)) -eq "$KSFT_NUM_TESTS" ]; then
+ if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP + KTAP_CNT_XFAIL)) -eq "$KSFT_NUM_TESTS" ]; then
exit "$KSFT_PASS"
else
exit "$KSFT_FAIL"
@@ -107,5 +118,5 @@ ktap_finished() {
}
ktap_print_totals() {
- echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:0 xpass:0 skip:$KTAP_CNT_SKIP error:0"
+ echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:$KTAP_CNT_XFAIL xpass:0 skip:$KTAP_CNT_SKIP error:0"
}
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 0a0b55516028..c0c53451a16d 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -282,6 +282,24 @@ static void *mfd_assert_mmap_shared(int fd)
return p;
}
+static void *mfd_assert_mmap_read_shared(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
static void *mfd_assert_mmap_private(int fd)
{
void *p;
@@ -980,6 +998,30 @@ static void test_seal_future_write(void)
close(fd);
}
+static void test_seal_write_map_read_shared(void)
+{
+ int fd;
+ void *p;
+
+ printf("%s SEAL-WRITE-MAP-READ\n", memfd_str);
+
+ fd = mfd_assert_new("kern_memfd_seal_write_map_read",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE);
+
+ p = mfd_assert_mmap_read_shared(fd);
+
+ mfd_assert_read(fd);
+ mfd_assert_read_shared(fd);
+ mfd_fail_write(fd);
+
+ munmap(p, mfd_def_size);
+ close(fd);
+}
+
/*
* Test SEAL_SHRINK
* Test whether SEAL_SHRINK actually prevents shrinking
@@ -1593,6 +1635,7 @@ int main(int argc, char **argv)
test_seal_write();
test_seal_future_write();
+ test_seal_write_map_read_shared();
test_seal_shrink();
test_seal_grow();
test_seal_resize();
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 477ae76de93d..3efe005436cd 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -71,6 +71,11 @@ def ksft_in(a, b, comment=""):
_fail("Check failed", a, "not in", b, comment)
+def ksft_is(a, b, comment=""):
+ if a is not b:
+ _fail("Check failed", a, "is not", b, comment)
+
+
def ksft_ge(a, b, comment=""):
if a < b:
_fail("Check failed", a, "<", b, comment)
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index 076a7e8dc3eb..ad1e36baee2a 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -13,14 +13,14 @@ try:
SPEC_PATH = KSFT_DIR / "net/lib/specs"
sys.path.append(tools_full_path.as_posix())
- from net.lib.ynl.lib import YnlFamily, NlError
+ from net.lib.ynl.pyynl.lib import YnlFamily, NlError
else:
# Running in tree
tools_full_path = KSRC / "tools"
SPEC_PATH = KSRC / "Documentation/netlink/specs"
sys.path.append(tools_full_path.as_posix())
- from net.ynl.lib import YnlFamily, NlError
+ from net.ynl.pyynl.lib import YnlFamily, NlError
except ModuleNotFoundError as e:
ksft_pr("Failed importing `ynl` library from kernel sources")
ksft_pr(str(e))
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index 93d9d914529b..93e8cb671c3d 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -18,6 +18,23 @@ def lo_check(nf) -> None:
ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0)
+def napi_list_check(nf) -> None:
+ with NetdevSimDev(queue_count=100) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 100)
+
+ for q in [50, 0, 99]:
+ for i in range(4):
+ nsim.dfs_write("queue_reset", f"{q} {i}")
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 100,
+ comment=f"queue count after reset queue {q} mode {i}")
+
+
def page_pool_check(nf) -> None:
with NetdevSimDev() as nsimdev:
nsim = nsimdev.nsims[0]
@@ -89,7 +106,7 @@ def page_pool_check(nf) -> None:
def main() -> None:
nf = NetdevFamily()
- ksft_run([empty_check, lo_check, page_pool_check],
+ ksft_run([empty_check, lo_check, page_pool_check, napi_list_check],
args=(nf, ))
ksft_exit()
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index 4071c133f29e..ff989c325eef 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -23,7 +23,7 @@ if [ $# -ne 1 ]; then
ktap_exit_fail_msg "usage: $0 <script>"
exit "$KSFT_FAIL"
fi
-script="$1"
+script="$(basename $1)"
if [ -z "$(which packetdrill)" ]; then
ktap_skip_all "packetdrill not found in PATH"
@@ -31,16 +31,29 @@ if [ -z "$(which packetdrill)" ]; then
fi
declare -a optargs
+failfunc=ktap_test_fail
+
if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
optargs+=('--tolerance_usecs=14000')
+
+ # xfail tests that are known flaky with dbg config, not fixable.
+ # still run them for coverage (and expect 100% pass without dbg).
+ declare -ar xfail_list=(
+ "tcp_fast_recovery_prr-ss.*.pkt"
+ "tcp_timestamping.*.pkt"
+ "tcp_user_timeout_user-timeout-probe.pkt"
+ "tcp_zerocopy_epoll_.*.pkt"
+ )
+ readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$"
+ [[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail
fi
ktap_print_header
ktap_set_plan 2
-unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $(basename $script) > /dev/null \
- && ktap_test_pass "ipv4" || ktap_test_fail "ipv4"
-unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $(basename $script) > /dev/null \
- && ktap_test_pass "ipv6" || ktap_test_fail "ipv6"
+unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \
+ && ktap_test_pass "ipv4" || $failfunc "ipv4"
+unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \
+ && ktap_test_pass "ipv6" || $failfunc "ipv6"
ktap_finished
diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk
index d43afe243779..12e7cae251be 100644
--- a/tools/testing/selftests/net/ynl.mk
+++ b/tools/testing/selftests/net/ynl.mk
@@ -31,7 +31,8 @@ $(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig
$(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a
EXTRA_CLEAN += \
- $(top_srcdir)/tools/net/ynl/lib/__pycache__ \
+ $(top_srcdir)/tools/net/ynl/pyynl/__pycache__ \
+ $(top_srcdir)/tools/net/ynl/pyynl/lib/__pycache__ \
$(top_srcdir)/tools/net/ynl/lib/*.[ado] \
$(OUTPUT)/.libynl-*.sig \
$(OUTPUT)/libynl.a
diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c
index 37d9bf6fb745..6f4c3f5a1c5d 100644
--- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c
+++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c
@@ -20,7 +20,7 @@ s32 BPF_STRUCT_OPS(ddsp_bogus_dsq_fail_select_cpu, struct task_struct *p,
* If we dispatch to a bogus DSQ that will fall back to the
* builtin global DSQ, we fail gracefully.
*/
- scx_bpf_dispatch_vtime(p, 0xcafef00d, SCX_SLICE_DFL,
+ scx_bpf_dsq_insert_vtime(p, 0xcafef00d, SCX_SLICE_DFL,
p->scx.dsq_vtime, 0);
return cpu;
}
diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c
index dffc97d9cdf1..e4a55027778f 100644
--- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c
+++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c
@@ -17,8 +17,8 @@ s32 BPF_STRUCT_OPS(ddsp_vtimelocal_fail_select_cpu, struct task_struct *p,
if (cpu >= 0) {
/* Shouldn't be allowed to vtime dispatch to a builtin DSQ. */
- scx_bpf_dispatch_vtime(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL,
- p->scx.dsq_vtime, 0);
+ scx_bpf_dsq_insert_vtime(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL,
+ p->scx.dsq_vtime, 0);
return cpu;
}
diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
index 6a7db1502c29..fbda6bf54671 100644
--- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
+++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c
@@ -43,9 +43,12 @@ void BPF_STRUCT_OPS(dsp_local_on_dispatch, s32 cpu, struct task_struct *prev)
if (!p)
return;
- target = bpf_get_prandom_u32() % nr_cpus;
+ if (p->nr_cpus_allowed == nr_cpus)
+ target = bpf_get_prandom_u32() % nr_cpus;
+ else
+ target = scx_bpf_task_cpu(p);
- scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0);
+ scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | target, SCX_SLICE_DFL, 0);
bpf_task_release(p);
}
diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.c b/tools/testing/selftests/sched_ext/dsp_local_on.c
index 472851b56854..0ff27e57fe43 100644
--- a/tools/testing/selftests/sched_ext/dsp_local_on.c
+++ b/tools/testing/selftests/sched_ext/dsp_local_on.c
@@ -34,9 +34,10 @@ static enum scx_test_status run(void *ctx)
/* Just sleeping is fine, plenty of scheduling events happening */
sleep(1);
- SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_ERROR));
bpf_link__destroy(link);
+ SCX_EQ(skel->data->uei.kind, EXIT_KIND(SCX_EXIT_UNREG));
+
return SCX_TEST_PASS;
}
@@ -50,7 +51,7 @@ static void cleanup(void *ctx)
struct scx_test dsp_local_on = {
.name = "dsp_local_on",
.description = "Verify we can directly dispatch tasks to a local DSQs "
- "from osp.dispatch()",
+ "from ops.dispatch()",
.setup = setup,
.run = run,
.cleanup = cleanup,
diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
index 1efb50d61040..a7cf868d5e31 100644
--- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
+++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c
@@ -31,7 +31,7 @@ void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p,
/* Can only call from ops.select_cpu() */
scx_bpf_select_cpu_dfl(p, 0, 0, &found);
- scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
}
SEC(".struct_ops.link")
diff --git a/tools/testing/selftests/sched_ext/exit.bpf.c b/tools/testing/selftests/sched_ext/exit.bpf.c
index d75d4faf07f6..4bc36182d3ff 100644
--- a/tools/testing/selftests/sched_ext/exit.bpf.c
+++ b/tools/testing/selftests/sched_ext/exit.bpf.c
@@ -33,7 +33,7 @@ void BPF_STRUCT_OPS(exit_enqueue, struct task_struct *p, u64 enq_flags)
if (exit_point == EXIT_ENQUEUE)
EXIT_CLEANLY();
- scx_bpf_dispatch(p, DSQ_ID, SCX_SLICE_DFL, enq_flags);
+ scx_bpf_dsq_insert(p, DSQ_ID, SCX_SLICE_DFL, enq_flags);
}
void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p)
@@ -41,7 +41,7 @@ void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p)
if (exit_point == EXIT_DISPATCH)
EXIT_CLEANLY();
- scx_bpf_consume(DSQ_ID);
+ scx_bpf_dsq_move_to_local(DSQ_ID);
}
void BPF_STRUCT_OPS(exit_enable, struct task_struct *p)
diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c
index 4d4cd8d966db..430f5e13bf55 100644
--- a/tools/testing/selftests/sched_ext/maximal.bpf.c
+++ b/tools/testing/selftests/sched_ext/maximal.bpf.c
@@ -12,6 +12,8 @@
char _license[] SEC("license") = "GPL";
+#define DSQ_ID 0
+
s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu,
u64 wake_flags)
{
@@ -20,7 +22,7 @@ s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu,
void BPF_STRUCT_OPS(maximal_enqueue, struct task_struct *p, u64 enq_flags)
{
- scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+ scx_bpf_dsq_insert(p, DSQ_ID, SCX_SLICE_DFL, enq_flags);
}
void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags)
@@ -28,7 +30,7 @@ void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags)
void BPF_STRUCT_OPS(maximal_dispatch, s32 cpu, struct task_struct *prev)
{
- scx_bpf_consume(SCX_DSQ_GLOBAL);
+ scx_bpf_dsq_move_to_local(DSQ_ID);
}
void BPF_STRUCT_OPS(maximal_runnable, struct task_struct *p, u64 enq_flags)
@@ -123,7 +125,7 @@ void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init)
{
- return 0;
+ return scx_bpf_create_dsq(DSQ_ID, -1);
}
void BPF_STRUCT_OPS(maximal_exit, struct scx_exit_info *info)
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c
index f171ac470970..13d0f5be788d 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c
@@ -30,7 +30,7 @@ void BPF_STRUCT_OPS(select_cpu_dfl_enqueue, struct task_struct *p,
}
scx_bpf_put_idle_cpumask(idle_mask);
- scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
}
SEC(".struct_ops.link")
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c
index 9efdbb7da928..815f1d5d61ac 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c
@@ -67,7 +67,7 @@ void BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_enqueue, struct task_struct *p,
saw_local = true;
}
- scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, enq_flags);
+ scx_bpf_dsq_insert(p, dsq_id, SCX_SLICE_DFL, enq_flags);
}
s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_init_task,
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c
index 59bfc4f36167..4bb99699e920 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c
@@ -29,7 +29,7 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_select_cpu, struct task_struct *p,
cpu = prev_cpu;
dispatch:
- scx_bpf_dispatch(p, dsq_id, SCX_SLICE_DFL, 0);
+ scx_bpf_dsq_insert(p, dsq_id, SCX_SLICE_DFL, 0);
return cpu;
}
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c
index 3bbd5fcdfb18..2a75de11b2cf 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c
@@ -18,7 +18,7 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_select_cpu, struct task_struct *p
s32 prev_cpu, u64 wake_flags)
{
/* Dispatching to a random DSQ should fail. */
- scx_bpf_dispatch(p, 0xcafef00d, SCX_SLICE_DFL, 0);
+ scx_bpf_dsq_insert(p, 0xcafef00d, SCX_SLICE_DFL, 0);
return prev_cpu;
}
diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c
index 0fda57fe0ecf..99d075695c97 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c
@@ -18,8 +18,8 @@ s32 BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_select_cpu, struct task_struct *p
s32 prev_cpu, u64 wake_flags)
{
/* Dispatching twice in a row is disallowed. */
- scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
- scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
+ scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
return prev_cpu;
}
diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c
index e6c67bcf5e6e..bfcb96cd4954 100644
--- a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c
+++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c
@@ -2,8 +2,8 @@
/*
* A scheduler that validates that enqueue flags are properly stored and
* applied at dispatch time when a task is directly dispatched from
- * ops.select_cpu(). We validate this by using scx_bpf_dispatch_vtime(), and
- * making the test a very basic vtime scheduler.
+ * ops.select_cpu(). We validate this by using scx_bpf_dsq_insert_vtime(),
+ * and making the test a very basic vtime scheduler.
*
* Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
* Copyright (c) 2024 David Vernet <dvernet@meta.com>
@@ -47,13 +47,13 @@ s32 BPF_STRUCT_OPS(select_cpu_vtime_select_cpu, struct task_struct *p,
cpu = prev_cpu;
scx_bpf_test_and_clear_cpu_idle(cpu);
ddsp:
- scx_bpf_dispatch_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0);
+ scx_bpf_dsq_insert_vtime(p, VTIME_DSQ, SCX_SLICE_DFL, task_vtime(p), 0);
return cpu;
}
void BPF_STRUCT_OPS(select_cpu_vtime_dispatch, s32 cpu, struct task_struct *p)
{
- if (scx_bpf_consume(VTIME_DSQ))
+ if (scx_bpf_dsq_move_to_local(VTIME_DSQ))
consumed = true;
}
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json
index 996448afe31b..91d120548bf5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json
@@ -78,10 +78,10 @@
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
- "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0xff",
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0x1f",
"expExitCode": "0",
"verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
- "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 255 baseclass",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 31 baseclass",
"matchCount": "1",
"teardown": [
"$TC qdisc del dev $DEV1 ingress"