aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-bus-thunderbolt48
-rw-r--r--Documentation/admin-guide/thunderbolt.rst24
-rw-r--r--Documentation/devicetree/bindings/net/bluetooth.txt5
-rw-r--r--Documentation/devicetree/bindings/net/renesas,ravb.txt4
-rw-r--r--Documentation/devicetree/bindings/net/sh_eth.txt14
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.txt8
-rw-r--r--Documentation/driver-api/80211/cfg80211.rst3
-rw-r--r--Documentation/networking/netvsc.txt8
-rw-r--r--Documentation/networking/regulatory.txt30
-rw-r--r--Documentation/networking/rxrpc.txt53
-rw-r--r--Documentation/networking/vrf.txt13
-rw-r--r--MAINTAINERS34
-rw-r--r--arch/blackfin/mach-bf518/boards/ezbrd.c47
-rw-r--r--arch/blackfin/mach-bf518/boards/tcm-bf518.c1
-rw-r--r--arch/um/drivers/net_kern.c9
-rw-r--r--drivers/atm/fore200e.c8
-rw-r--r--drivers/atm/idt77105.c2
-rw-r--r--drivers/atm/idt77252.c21
-rw-r--r--drivers/atm/idt77252.h3
-rw-r--r--drivers/atm/suni.c10
-rw-r--r--drivers/bcma/Kconfig18
-rw-r--r--drivers/bcma/driver_mips.c7
-rw-r--r--drivers/bluetooth/Kconfig3
-rw-r--r--drivers/bluetooth/bcm203x.c2
-rw-r--r--drivers/bluetooth/bluecard_cs.c7
-rw-r--r--drivers/bluetooth/btbcm.c3
-rw-r--r--drivers/bluetooth/btmrvl_sdio.c6
-rw-r--r--drivers/bluetooth/btqcomsmd.c45
-rw-r--r--drivers/bluetooth/btusb.c18
-rw-r--r--drivers/bluetooth/hci_bcm.c263
-rw-r--r--drivers/bluetooth/hci_bcsp.c10
-rw-r--r--drivers/bluetooth/hci_h5.c10
-rw-r--r--drivers/bluetooth/hci_ldisc.c7
-rw-r--r--drivers/bluetooth/hci_ll.c2
-rw-r--r--drivers/bluetooth/hci_qca.c17
-rw-r--r--drivers/connector/cn_queue.c4
-rw-r--r--drivers/connector/connector.c2
-rw-r--r--drivers/infiniband/hw/qedr/Kconfig1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c15
-rw-r--r--drivers/isdn/gigaset/bas-gigaset.c46
-rw-r--r--drivers/isdn/gigaset/common.c7
-rw-r--r--drivers/isdn/hardware/mISDN/mISDNipac.c7
-rw-r--r--drivers/isdn/hardware/mISDN/w6692.c7
-rw-r--r--drivers/isdn/hisax/amd7930_fn.c6
-rw-r--r--drivers/isdn/hisax/arcofi.c6
-rw-r--r--drivers/isdn/hisax/diva.c9
-rw-r--r--drivers/isdn/hisax/elsa.c10
-rw-r--r--drivers/isdn/hisax/fsm.c7
-rw-r--r--drivers/isdn/hisax/hfc4s8s_l1.c6
-rw-r--r--drivers/isdn/hisax/hfc_2bds0.c4
-rw-r--r--drivers/isdn/hisax/hfc_pci.c9
-rw-r--r--drivers/isdn/hisax/hfc_sx.c9
-rw-r--r--drivers/isdn/hisax/hfc_usb.c10
-rw-r--r--drivers/isdn/hisax/hfcscard.c5
-rw-r--r--drivers/isdn/hisax/icc.c6
-rw-r--r--drivers/isdn/hisax/ipacx.c8
-rw-r--r--drivers/isdn/hisax/isac.c6
-rw-r--r--drivers/isdn/hisax/isar.c9
-rw-r--r--drivers/isdn/hisax/isdnl3.c6
-rw-r--r--drivers/isdn/hisax/saphir.c7
-rw-r--r--drivers/isdn/hisax/teleint.c5
-rw-r--r--drivers/isdn/hisax/w6692.c7
-rw-r--r--drivers/isdn/isdnloop/isdnloop.c33
-rw-r--r--drivers/isdn/mISDN/dsp.h2
-rw-r--r--drivers/isdn/mISDN/dsp_core.c6
-rw-r--r--drivers/isdn/mISDN/dsp_tones.c6
-rw-r--r--drivers/isdn/mISDN/fsm.c7
-rw-r--r--drivers/isdn/mISDN/l1oip_core.c15
-rw-r--r--drivers/isdn/mISDN/timerdev.c6
-rw-r--r--drivers/net/Kconfig12
-rw-r--r--drivers/net/Makefile3
-rw-r--r--drivers/net/appletalk/cops.c4
-rw-r--r--drivers/net/appletalk/ltpc.c18
-rw-r--r--drivers/net/arcnet/arcnet.c4
-rw-r--r--drivers/net/bonding/bond_main.c33
-rw-r--r--drivers/net/bonding/bond_options.c2
-rw-r--r--drivers/net/caif/caif_hsi.c15
-rw-r--r--drivers/net/can/grcan.c10
-rw-r--r--drivers/net/can/sja1000/peak_pcmcia.c4
-rw-r--r--drivers/net/can/usb/peak_usb/pcan_usb.c5
-rw-r--r--drivers/net/dsa/b53/b53_common.c159
-rw-r--r--drivers/net/dsa/b53/b53_priv.h145
-rw-r--r--drivers/net/dsa/b53/b53_regs.h48
-rw-r--r--drivers/net/dsa/bcm_sf2.c188
-rw-r--r--drivers/net/dsa/bcm_sf2.h2
-rw-r--r--drivers/net/dsa/bcm_sf2_cfp.c6
-rw-r--r--drivers/net/dsa/bcm_sf2_regs.h34
-rw-r--r--drivers/net/dsa/dsa_loop.c8
-rw-r--r--drivers/net/dsa/lan9303-core.c497
-rw-r--r--drivers/net/dsa/lan9303.h13
-rw-r--r--drivers/net/dsa/lan9303_mdio.c5
-rw-r--r--drivers/net/dsa/mt7530.c11
-rw-r--r--drivers/net/dsa/mv88e6060.c45
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c43
-rw-r--r--drivers/net/dsa/qca8k.c6
-rw-r--r--drivers/net/dummy.c4
-rw-r--r--drivers/net/eql.c4
-rw-r--r--drivers/net/ethernet/3com/3c574_cs.c12
-rw-r--r--drivers/net/ethernet/3com/3c589_cs.c10
-rw-r--r--drivers/net/ethernet/3com/3c59x.c20
-rw-r--r--drivers/net/ethernet/adi/bfin_mac.c5
-rw-r--r--drivers/net/ethernet/agere/et131x.c5
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_admin_defs.h6
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.c16
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_ethtool.c5
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c197
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.h12
-rw-r--r--drivers/net/ethernet/amd/a2065.c7
-rw-r--r--drivers/net/ethernet/amd/am79c961a.c4
-rw-r--r--drivers/net/ethernet/amd/amd8111e.c8
-rw-r--r--drivers/net/ethernet/amd/declance.c6
-rw-r--r--drivers/net/ethernet/amd/pcnet32.c5
-rw-r--r--drivers/net/ethernet/amd/sunlance.c5
-rw-r--r--drivers/net/ethernet/apple/bmac.c12
-rw-r--r--drivers/net/ethernet/apple/mace.c12
-rw-r--r--drivers/net/ethernet/broadcom/Kconfig1
-rw-r--r--drivers/net/ethernet/broadcom/b44.c4
-rw-r--r--drivers/net/ethernet/broadcom/bcm63xx_enet.c108
-rw-r--r--drivers/net/ethernet/broadcom/bcm63xx_enet.h1
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c119
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.h11
-rw-r--r--drivers/net/ethernet/broadcom/bgmac-bcma.c8
-rw-r--r--drivers/net/ethernet/broadcom/bnx2.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c4
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c7
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/Makefile3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c44
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c65
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h39
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c8
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c94
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h37
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c1
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c4
-rw-r--r--drivers/net/ethernet/brocade/bna/bnad.c43
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_main.c90
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_vf_main.c5
-rw-r--r--drivers/net/ethernet/cavium/liquidio/liquidio_image.h1
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.c11
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_device.h10
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb/sge.c30
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/Makefile5
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c13
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c54
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h181
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h69
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c867
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h103
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h81
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h63
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c296
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h44
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c15
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c13
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c70
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c233
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c108
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c827
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h120
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h3
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/l2t.c4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/smt.c247
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/smt.h76
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c471
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_msg.h44
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h8
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_regs.h2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h47
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h75
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c6
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_clsf.h5
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c5
-rw-r--r--drivers/net/ethernet/dec/tulip/de2104x.c21
-rw-r--r--drivers/net/ethernet/dec/tulip/de4x5.c13
-rw-r--r--drivers/net/ethernet/dec/tulip/dmfe.c12
-rw-r--r--drivers/net/ethernet/dec/tulip/interrupt.c6
-rw-r--r--drivers/net/ethernet/dec/tulip/pnic.c6
-rw-r--r--drivers/net/ethernet/dec/tulip/pnic2.c6
-rw-r--r--drivers/net/ethernet/dec/tulip/timer.c12
-rw-r--r--drivers/net/ethernet/dec/tulip/tulip.h12
-rw-r--r--drivers/net/ethernet/dec/tulip/tulip_core.c14
-rw-r--r--drivers/net/ethernet/dec/tulip/uli526x.c12
-rw-r--r--drivers/net/ethernet/dec/tulip/winbond-840.c12
-rw-r--r--drivers/net/ethernet/dlink/dl2k.c10
-rw-r--r--drivers/net/ethernet/dlink/sundance.c4
-rw-r--r--drivers/net/ethernet/faraday/ftgmac100.c26
-rw-r--r--drivers/net/ethernet/fealnx.c8
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c139
-rw-r--r--drivers/net/ethernet/freescale/fman/Makefile12
-rw-r--r--drivers/net/ethernet/freescale/fman/mac.c135
-rw-r--r--drivers/net/ethernet/freescale/fman/mac.h6
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c2
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/fs_enet.h1
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth.c5
-rw-r--r--drivers/net/ethernet/hisilicon/Kconfig15
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_enet.c7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h22
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c25
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h137
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c347
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h21
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c821
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h21
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c257
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h15
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c100
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c242
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h18
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c412
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_rx.c18
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_tx.c14
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c83
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.h5
-rw-r--r--drivers/net/ethernet/intel/e100.c11
-rw-r--r--drivers/net/ethernet/intel/e1000e/defines.h1
-rw-r--r--drivers/net/ethernet/intel/e1000e/e1000.h4
-rw-r--r--drivers/net/ethernet/intel/e1000e/mac.c11
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c92
-rw-r--r--drivers/net/ethernet/intel/e1000e/param.c16
-rw-r--r--drivers/net/ethernet/intel/e1000e/phy.c7
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k.h60
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_common.c6
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c4
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_iov.c176
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_main.c6
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_mbx.c4
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_netdev.c207
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pci.c482
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pf.c12
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_pf.h3
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h154
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq.c12
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h72
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c256
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c11
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_diag.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c903
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c1771
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_nvm.c20
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_prototype.h9
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_register.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c62
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h13
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_type.h26
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c259
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h6
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h72
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_common.c69
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_prototype.h9
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c25
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.h11
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_type.h23
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf.h34
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c38
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c193
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c45
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c20
-rw-r--r--drivers/net/ethernet/intel/igbvf/netdev.c7
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_main.c10
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe.h9
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_common.c8
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c54
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c11
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c314
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c19
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c14
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c8
-rw-r--r--drivers/net/ethernet/korina.c230
-rw-r--r--drivers/net/ethernet/marvell/mvpp2.c1
-rw-r--r--drivers/net/ethernet/marvell/pxa168_eth.c5
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c8
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/catas.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cq.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c11
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_main.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_resources.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_tx.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/qp.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/srq.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Kconfig2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_clock.c619
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c144
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c71
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c38
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tx.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c994
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c260
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h36
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c350
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c525
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Makefile3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c106
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h407
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/resources.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c241
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c106
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c129
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h44
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c397
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c1011
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h134
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c838
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h43
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c768
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h22
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c514
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/trap.h6
-rw-r--r--drivers/net/ethernet/micrel/ksz884x.c18
-rw-r--r--drivers/net/ethernet/natsemi/natsemi.c4
-rw-r--r--drivers/net/ethernet/natsemi/ns83820.c4
-rw-r--r--drivers/net/ethernet/neterion/s2io.c13
-rw-r--r--drivers/net/ethernet/neterion/s2io.h2
-rw-r--r--drivers/net/ethernet/neterion/vxge/vxge-main.c12
-rw-r--r--drivers/net/ethernet/neterion/vxge/vxge-main.h8
-rw-r--r--drivers/net/ethernet/neterion/vxge/vxge-traffic.c19
-rw-r--r--drivers/net/ethernet/netronome/nfp/Makefile4
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c788
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c62
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h52
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/verifier.c21
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c413
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.c16
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.h144
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.c13
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.h35
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/match.c114
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/metadata.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c148
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c811
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_app.h8
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.c257
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.h300
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c39
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h2
-rw-r--r--drivers/net/ethernet/nvidia/forcedeth.c104
-rw-r--r--drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h3
-rw-r--r--drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c42
-rw-r--r--drivers/net/ethernet/packetengines/hamachi.c4
-rw-r--r--drivers/net/ethernet/packetengines/yellowfin.c4
-rw-r--r--drivers/net/ethernet/qlogic/Kconfig3
-rw-r--r--drivers/net/ethernet/qlogic/qed/Makefile3
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_cxt.c6
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dcbx.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iwarp.c749
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iwarp.h44
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ll2.c103
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ll2.h10
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.c8
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ooo.c16
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ooo.h2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_rdma.c24
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sp_commands.c1
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_fp.c1
-rw-r--r--drivers/net/ethernet/qlogic/qla3xxx.c4
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac-mac.c16
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac-sgmii.c15
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac.c27
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c168
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h19
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c137
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h3
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c4
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h8
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c36
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h7
-rw-r--r--drivers/net/ethernet/realtek/atp.c4
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c90
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c32
-rw-r--r--drivers/net/ethernet/seeq/ether3.c11
-rw-r--r--drivers/net/ethernet/seeq/ether3.h1
-rw-r--r--drivers/net/ethernet/sgi/ioc3-eth.c8
-rw-r--r--drivers/net/ethernet/sis/sis900.c4
-rw-r--r--drivers/net/ethernet/smsc/epic100.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c14
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c27
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c43
-rw-r--r--drivers/net/ethernet/sun/cassini.c8
-rw-r--r--drivers/net/ethernet/sun/ldmvsw.c5
-rw-r--r--drivers/net/ethernet/sun/niu.c12
-rw-r--r--drivers/net/ethernet/sun/sunbmac.c10
-rw-r--r--drivers/net/ethernet/sun/sungem.c8
-rw-r--r--drivers/net/ethernet/sun/sunhme.c10
-rw-r--r--drivers/net/ethernet/sun/sunvnet.c3
-rw-r--r--drivers/net/ethernet/sun/sunvnet_common.c4
-rw-r--r--drivers/net/ethernet/sun/sunvnet_common.h2
-rw-r--r--drivers/net/ethernet/ti/cpsw_ale.c4
-rw-r--r--drivers/net/ethernet/ti/netcp_ethss.c5
-rw-r--r--drivers/net/ethernet/ti/tlan.c32
-rw-r--r--drivers/net/ethernet/toshiba/spider_net.c12
-rw-r--r--drivers/net/geneve.c19
-rw-r--r--drivers/net/hamradio/6pack.c34
-rw-r--r--drivers/net/hamradio/baycom_epp.c50
-rw-r--r--drivers/net/hamradio/baycom_par.c48
-rw-r--r--drivers/net/hamradio/mkiss.c3
-rw-r--r--drivers/net/hamradio/scc.c69
-rw-r--r--drivers/net/hippi/rrunner.c4
-rw-r--r--drivers/net/hyperv/hyperv_net.h17
-rw-r--r--drivers/net/hyperv/netvsc.c19
-rw-r--r--drivers/net/hyperv/netvsc_drv.c114
-rw-r--r--drivers/net/hyperv/rndis_filter.c6
-rw-r--r--drivers/net/ieee802154/atusb.c317
-rw-r--r--drivers/net/ieee802154/atusb.h8
-rw-r--r--drivers/net/ieee802154/ca8210.c2
-rw-r--r--drivers/net/ifb.c3
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c23
-rw-r--r--drivers/net/macsec.c27
-rw-r--r--drivers/net/macvlan.c31
-rw-r--r--drivers/net/macvtap.c2
-rw-r--r--drivers/net/phy/Kconfig10
-rw-r--r--drivers/net/phy/Makefile2
-rw-r--r--drivers/net/phy/at803x.c2
-rw-r--r--drivers/net/phy/broadcom.c6
-rw-r--r--drivers/net/phy/dp83822.c344
-rw-r--r--drivers/net/phy/dp83848.c3
-rw-r--r--drivers/net/phy/realtek.c54
-rw-r--r--drivers/net/phy/uPD60620.c109
-rw-r--r--drivers/net/ppp/ppp_async.c10
-rw-r--r--drivers/net/ppp/ppp_generic.c21
-rw-r--r--drivers/net/ppp/ppp_synctty.c11
-rw-r--r--drivers/net/slip/slip.c8
-rw-r--r--drivers/net/team/team.c5
-rw-r--r--drivers/net/team/team_mode_loadbalance.c8
-rw-r--r--drivers/net/thunderbolt.c1362
-rw-r--r--drivers/net/tun.c306
-rw-r--r--drivers/net/usb/asix_common.c2
-rw-r--r--drivers/net/usb/catc.c8
-rw-r--r--drivers/net/usb/lan78xx.c10
-rw-r--r--drivers/net/usb/qmi_wwan.c2
-rw-r--r--drivers/net/usb/sierra_net.c12
-rw-r--r--drivers/net/usb/usbnet.c12
-rw-r--r--drivers/net/virtio_net.0
-rw-r--r--drivers/net/virtio_net.c86
-rw-r--r--drivers/net/vrf.c24
-rw-r--r--drivers/net/wan/dscc4.c4
-rw-r--r--drivers/net/wan/hdlc_cisco.c15
-rw-r--r--drivers/net/wan/hdlc_fr.c13
-rw-r--r--drivers/net/wan/hdlc_ppp.c4
-rw-r--r--drivers/net/wan/lmc/lmc_main.c4
-rw-r--r--drivers/net/wan/sdla.c4
-rw-r--r--drivers/net/wireless/ath/ath6kl/txrx.c4
-rw-r--r--drivers/net/wireless/atmel/atmel.c5
-rw-r--r--drivers/net/wireless/broadcom/b43/phy_g.c2
-rw-r--r--drivers/net/wireless/broadcom/b43/phy_ht.c6
-rw-r--r--drivers/net/wireless/broadcom/b43legacy/radio.c2
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c5
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c5
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c3
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c6
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c2
-rw-r--r--drivers/net/wireless/intel/iwlegacy/4965-mac.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/Makefile1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/cfg/a000.c103
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.c210
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/acpi.h138
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/binding.h41
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/commands.h1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/debug.h9
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h67
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/power.h3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/api/sta.h4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/dbg.c15
-rw-r--r--drivers/net/wireless/intel/iwlwifi/fw/file.h3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-config.h3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-debug.h1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-drv.c7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c91
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h17
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-trans.c16
-rw-r--r--drivers/net/wireless/intel/iwlwifi/iwl-trans.h16
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/d3.c16
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c76
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c32
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/fw.c207
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c52
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/mvm.h62
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/nvm.c3
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/ops.c22
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/quota.c59
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rx.c7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/sta.c1
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/utils.c96
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c100
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/internal.h4
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans.c89
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c8
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/tx.c23
-rw-r--r--drivers/net/wireless/intersil/p54/main.c7
-rw-r--r--drivers/net/wireless/mac80211_hwsim.c192
-rw-r--r--drivers/net/wireless/marvell/mwifiex/11n.c9
-rw-r--r--drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c6
-rw-r--r--drivers/net/wireless/marvell/mwifiex/cfg80211.c76
-rw-r--r--drivers/net/wireless/marvell/mwifiex/cmdevt.c10
-rw-r--r--drivers/net/wireless/marvell/mwifiex/fw.h1
-rw-r--r--drivers/net/wireless/marvell/mwifiex/init.c4
-rw-r--r--drivers/net/wireless/marvell/mwifiex/main.h1
-rw-r--r--drivers/net/wireless/marvell/mwifiex/scan.c5
-rw-r--r--drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c6
-rw-r--r--drivers/net/wireless/marvell/mwifiex/sta_event.c6
-rw-r--r--drivers/net/wireless/marvell/mwifiex/tdls.c7
-rw-r--r--drivers/net/wireless/marvell/mwifiex/wmm.c3
-rw-r--r--drivers/net/wireless/quantenna/qtnfmac/cfg80211.c173
-rw-r--r--drivers/net/wireless/quantenna/qtnfmac/commands.c215
-rw-r--r--drivers/net/wireless/quantenna/qtnfmac/commands.h6
-rw-r--r--drivers/net/wireless/quantenna/qtnfmac/core.h30
-rw-r--r--drivers/net/wireless/quantenna/qtnfmac/event.c48
-rw-r--r--drivers/net/wireless/quantenna/qtnfmac/qlink.h137
-rw-r--r--drivers/net/wireless/quantenna/qtnfmac/qlink_util.c113
-rw-r--r--drivers/net/wireless/quantenna/qtnfmac/qlink_util.h7
-rw-r--r--drivers/net/wireless/ray_cs.c53
-rw-r--r--drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/base.c104
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/base.h4
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c13
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/core.c8
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/pci.c49
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/pci.h10
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c11
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.h3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.h2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c40
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c9
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c49
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.h140
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192se/dm.c3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c8
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.h3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c11
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.h3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c13
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.c5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.h3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8723com/fw_common.c3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c13
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h2
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c6
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.c5
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h3
-rw-r--r--drivers/net/wireless/realtek/rtlwifi/wifi.h60
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_core.c33
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_debugfs.c19
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_hal.c91
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_mac80211.c446
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_main.c34
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_mgmt.c46
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_ps.c15
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_sdio.c128
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_usb.c4
-rw-r--r--drivers/net/wireless/rsi/rsi_common.h4
-rw-r--r--drivers/net/wireless/rsi/rsi_hal.h6
-rw-r--r--drivers/net/wireless/rsi/rsi_main.h24
-rw-r--r--drivers/net/wireless/rsi/rsi_mgmt.h21
-rw-r--r--drivers/net/wireless/rsi/rsi_ps.h7
-rw-r--r--drivers/net/wireless/rsi/rsi_sdio.h2
-rw-r--r--drivers/net/wireless/st/cw1200/pm.c5
-rw-r--r--drivers/net/xen-netback/common.h2
-rw-r--r--drivers/net/xen-netback/interface.c3
-rw-r--r--drivers/net/xen-netback/netback.c6
-rw-r--r--drivers/pcmcia/electra_cf.c12
-rw-r--r--drivers/s390/net/fsm.c8
-rw-r--r--drivers/s390/net/qeth_core.h38
-rw-r--r--drivers/s390/net/qeth_core_main.c210
-rw-r--r--drivers/s390/net/qeth_core_mpc.c4
-rw-r--r--drivers/s390/net/qeth_core_mpc.h73
-rw-r--r--drivers/s390/net/qeth_core_sys.c4
-rw-r--r--drivers/s390/net/qeth_l2.h6
-rw-r--r--drivers/s390/net/qeth_l2_main.c557
-rw-r--r--drivers/s390/net/qeth_l2_sys.c215
-rw-r--r--drivers/s390/net/qeth_l3_main.c37
-rw-r--r--drivers/s390/net/qeth_l3_sys.c45
-rw-r--r--drivers/scsi/qedi/Kconfig1
-rw-r--r--drivers/thunderbolt/Makefile2
-rw-r--r--drivers/thunderbolt/ctl.c46
-rw-r--r--drivers/thunderbolt/ctl.h3
-rw-r--r--drivers/thunderbolt/domain.c197
-rw-r--r--drivers/thunderbolt/icm.c218
-rw-r--r--drivers/thunderbolt/nhi.c412
-rw-r--r--drivers/thunderbolt/nhi.h141
-rw-r--r--drivers/thunderbolt/nhi_regs.h11
-rw-r--r--drivers/thunderbolt/property.c670
-rw-r--r--drivers/thunderbolt/switch.c7
-rw-r--r--drivers/thunderbolt/tb.h88
-rw-r--r--drivers/thunderbolt/tb_msgs.h168
-rw-r--r--drivers/thunderbolt/xdomain.c1579
-rw-r--r--drivers/vhost/net.c12
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/rxrpc.c42
-rw-r--r--include/linux/avf/virtchnl.h20
-rw-r--r--include/linux/bpf-cgroup.h54
-rw-r--r--include/linux/bpf.h104
-rw-r--r--include/linux/bpf_types.h29
-rw-r--r--include/linux/bpf_verifier.h18
-rw-r--r--include/linux/brcmphy.h1
-rw-r--r--include/linux/byteorder/generic.h16
-rw-r--r--include/linux/connector.h4
-rw-r--r--include/linux/dynamic_queue_limits.h2
-rw-r--r--include/linux/ethtool.h10
-rw-r--r--include/linux/filter.h32
-rw-r--r--include/linux/ieee80211.h1
-rw-r--r--include/linux/if_arp.h2
-rw-r--r--include/linux/if_bridge.h6
-rw-r--r--include/linux/if_macvlan.h3
-rw-r--r--include/linux/if_phonet.h2
-rw-r--r--include/linux/inetdevice.h2
-rw-r--r--include/linux/lsm_hooks.h54
-rw-r--r--include/linux/mlx4/device.h8
-rw-r--r--include/linux/mlx5/cq.h4
-rw-r--r--include/linux/mlx5/driver.h24
-rw-r--r--include/linux/mod_devicetable.h26
-rw-r--r--include/linux/mroute.h46
-rw-r--r--include/linux/netdevice.h33
-rw-r--r--include/linux/once.h6
-rw-r--r--include/linux/perf_event.h7
-rw-r--r--include/linux/qed/qed_ll2_if.h8
-rw-r--r--include/linux/rtnetlink.h4
-rw-r--r--include/linux/security.h45
-rw-r--r--include/linux/skbuff.h133
-rw-r--r--include/linux/tcp.h1
-rw-r--r--include/linux/thunderbolt.h598
-rw-r--r--include/net/act_api.h37
-rw-r--r--include/net/addrconf.h5
-rw-r--r--include/net/af_rxrpc.h7
-rw-r--r--include/net/af_vsock.h20
-rw-r--r--include/net/bonding.h3
-rw-r--r--include/net/cfg80211.h40
-rw-r--r--include/net/dn.h7
-rw-r--r--include/net/dn_nsp.h1
-rw-r--r--include/net/dsa.h85
-rw-r--r--include/net/dst.h17
-rw-r--r--include/net/dst_metadata.h6
-rw-r--r--include/net/fib_notifier.h2
-rw-r--r--include/net/fq.h7
-rw-r--r--include/net/fq_impl.h72
-rw-r--r--include/net/inet_connection_sock.h6
-rw-r--r--include/net/inet_ecn.h5
-rw-r--r--include/net/inet_frag.h2
-rw-r--r--include/net/ip6_fib.h79
-rw-r--r--include/net/ip6_route.h5
-rw-r--r--include/net/ip_fib.h3
-rw-r--r--include/net/ip_tunnels.h3
-rw-r--r--include/net/ipv6.h4
-rw-r--r--include/net/mac80211.h8
-rw-r--r--include/net/neighbour.h4
-rw-r--r--include/net/netns/ipv4.h12
-rw-r--r--include/net/netns/ipv6.h5
-rw-r--r--include/net/phonet/phonet.h6
-rw-r--r--include/net/pkt_cls.h192
-rw-r--r--include/net/pkt_sched.h14
-rw-r--r--include/net/request_sock.h2
-rw-r--r--include/net/rtnetlink.h5
-rw-r--r--include/net/sch_generic.h7
-rw-r--r--include/net/sctp/stream_sched.h72
-rw-r--r--include/net/sctp/structs.h63
-rw-r--r--include/net/sock.h7
-rw-r--r--include/net/switchdev.h1
-rw-r--r--include/net/tc_act/tc_gact.h5
-rw-r--r--include/net/tc_act/tc_ife.h12
-rw-r--r--include/net/tc_act/tc_mirred.h1
-rw-r--r--include/net/tcp.h145
-rw-r--r--include/trace/events/bpf.h5
-rw-r--r--include/trace/events/fib6.h6
-rw-r--r--include/trace/events/tcp.h70
-rw-r--r--include/trace/events/xdp.h80
-rw-r--r--include/uapi/linux/bpf.h136
-rw-r--r--include/uapi/linux/if_link.h3
-rw-r--r--include/uapi/linux/if_tun.h2
-rw-r--r--include/uapi/linux/if_tunnel.h1
-rw-r--r--include/uapi/linux/in6.h1
-rw-r--r--include/uapi/linux/nl80211.h82
-rw-r--r--include/uapi/linux/openvswitch.h3
-rw-r--r--include/uapi/linux/pkt_sched.h33
-rw-r--r--include/uapi/linux/qrtr.h35
-rw-r--r--include/uapi/linux/sctp.h16
-rw-r--r--include/uapi/linux/tc_act/tc_mirred.h6
-rw-r--r--include/uapi/linux/tcp.h1
-rw-r--r--include/uapi/linux/tipc.h15
-rw-r--r--include/uapi/linux/vm_sockets_diag.h33
-rw-r--r--kernel/bpf/Makefile2
-rw-r--r--kernel/bpf/arraymap.c8
-rw-r--r--kernel/bpf/cgroup.c511
-rw-r--r--kernel/bpf/core.c89
-rw-r--r--kernel/bpf/cpumap.c702
-rw-r--r--kernel/bpf/devmap.c5
-rw-r--r--kernel/bpf/disasm.c214
-rw-r--r--kernel/bpf/disasm.h32
-rw-r--r--kernel/bpf/hashtab.c5
-rw-r--r--kernel/bpf/inode.c15
-rw-r--r--kernel/bpf/lpm_trie.c98
-rw-r--r--kernel/bpf/sockmap.c9
-rw-r--r--kernel/bpf/stackmap.c5
-rw-r--r--kernel/bpf/syscall.c268
-rw-r--r--kernel/bpf/verifier.c890
-rw-r--r--kernel/cgroup/cgroup.c38
-rw-r--r--kernel/events/core.c15
-rw-r--r--kernel/trace/bpf_trace.c88
-rw-r--r--lib/dynamic_queue_limits.c3
-rw-r--r--lib/kobject_uevent.c94
-rw-r--r--lib/once.c8
-rw-r--r--lib/test_rhashtable.c293
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_netlink.c2
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/lec.c19
-rw-r--r--net/atm/mpc.c1
-rw-r--r--net/batman-adv/bat_iv_ogm.c24
-rw-r--r--net/batman-adv/bat_v.c2
-rw-r--r--net/batman-adv/bat_v_elp.c6
-rw-r--r--net/batman-adv/bat_v_ogm.c12
-rw-r--r--net/batman-adv/distributed-arp-table.c4
-rw-r--r--net/batman-adv/gateway_client.c8
-rw-r--r--net/batman-adv/gateway_common.c18
-rw-r--r--net/batman-adv/hard-interface.c14
-rw-r--r--net/batman-adv/icmp_socket.c4
-rw-r--r--net/batman-adv/main.c12
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c2
-rw-r--r--net/batman-adv/originator.c26
-rw-r--r--net/batman-adv/routing.c6
-rw-r--r--net/batman-adv/send.c6
-rw-r--r--net/batman-adv/soft-interface.c9
-rw-r--r--net/batman-adv/sysfs.c4
-rw-r--r--net/batman-adv/tp_meter.c2
-rw-r--r--net/bluetooth/ecdh_helper.c228
-rw-r--r--net/bluetooth/ecdh_helper.h9
-rw-r--r--net/bluetooth/selftest.c48
-rw-r--r--net/bluetooth/smp.c127
-rw-r--r--net/bpf/test_run.c3
-rw-r--r--net/bridge/Makefile2
-rw-r--r--net/bridge/br_arp_nd_proxy.c469
-rw-r--r--net/bridge/br_device.c25
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/bridge/br_if.c20
-rw-r--r--net/bridge/br_input.c75
-rw-r--r--net/bridge/br_ioctl.c5
-rw-r--r--net/bridge/br_multicast.c50
-rw-r--r--net/bridge/br_netlink.c22
-rw-r--r--net/bridge/br_private.h21
-rw-r--r--net/bridge/br_sysfs_if.c20
-rw-r--r--net/can/af_can.c4
-rw-r--r--net/can/af_can.h2
-rw-r--r--net/can/proc.c8
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c218
-rw-r--r--net/core/dst.c16
-rw-r--r--net/core/ethtool.c16
-rw-r--r--net/core/filter.c379
-rw-r--r--net/core/flow_dissector.c100
-rw-r--r--net/core/lwt_bpf.c2
-rw-r--r--net/core/neighbour.c18
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/net-traces.c1
-rw-r--r--net/core/rtnetlink.c350
-rw-r--r--net/core/skbuff.c57
-rw-r--r--net/core/sock.c2
-rw-r--r--net/dccp/input.c1
-rw-r--r--net/dccp/options.c2
-rw-r--r--net/dccp/timer.c18
-rw-r--r--net/decnet/af_decnet.c7
-rw-r--r--net/decnet/dn_dev.c12
-rw-r--r--net/decnet/dn_nsp_out.c11
-rw-r--r--net/decnet/dn_route.c8
-rw-r--r--net/dsa/Makefile2
-rw-r--r--net/dsa/dsa.c63
-rw-r--r--net/dsa/dsa2.c53
-rw-r--r--net/dsa/dsa_priv.h59
-rw-r--r--net/dsa/legacy.c47
-rw-r--r--net/dsa/master.c117
-rw-r--r--net/dsa/port.c42
-rw-r--r--net/dsa/slave.c559
-rw-r--r--net/dsa/tag_brcm.c20
-rw-r--r--net/dsa/tag_dsa.c28
-rw-r--r--net/dsa/tag_edsa.c28
-rw-r--r--net/dsa/tag_ksz.c13
-rw-r--r--net/dsa/tag_lan9303.c24
-rw-r--r--net/dsa/tag_mtk.c20
-rw-r--r--net/dsa/tag_qca.c22
-rw-r--r--net/dsa/tag_trailer.c13
-rw-r--r--net/ieee802154/6lowpan/reassembly.c5
-rw-r--r--net/ieee802154/netlink.c6
-rw-r--r--net/ipv4/af_inet.c12
-rw-r--r--net/ipv4/arp.c1
-rw-r--r--net/ipv4/devinet.c43
-rw-r--r--net/ipv4/fib_frontend.c22
-rw-r--r--net/ipv4/fib_semantics.c15
-rw-r--r--net/ipv4/fib_trie.c39
-rw-r--r--net/ipv4/icmp.c15
-rw-r--r--net/ipv4/inet_connection_sock.c57
-rw-r--r--net/ipv4/inet_fragment.c4
-rw-r--r--net/ipv4/inet_timewait_sock.c7
-rw-r--r--net/ipv4/inetpeer.c11
-rw-r--r--net/ipv4/ip_fragment.c5
-rw-r--r--net/ipv4/ip_gre.c30
-rw-r--r--net/ipv4/ip_tunnel.c12
-rw-r--r--net/ipv4/ip_vti.c7
-rw-r--r--net/ipv4/ipip.c7
-rw-r--r--net/ipv4/ipmr.c262
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c3
-rw-r--r--net/ipv4/route.c5
-rw-r--r--net/ipv4/sysctl_net_ipv4.c61
-rw-r--r--net/ipv4/tcp.c116
-rw-r--r--net/ipv4/tcp_cdg.c2
-rw-r--r--net/ipv4/tcp_fastopen.c146
-rw-r--r--net/ipv4/tcp_input.c271
-rw-r--r--net/ipv4/tcp_ipv4.c16
-rw-r--r--net/ipv4/tcp_metrics.c14
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/ipv4/tcp_nv.c40
-rw-r--r--net/ipv4/tcp_output.c182
-rw-r--r--net/ipv4/tcp_recovery.c52
-rw-r--r--net/ipv4/tcp_timer.c42
-rw-r--r--net/ipv4/tcp_vegas.c2
-rw-r--r--net/ipv4/udp.c3
-rw-r--r--net/ipv6/addrconf.c272
-rw-r--r--net/ipv6/addrconf_core.c9
-rw-r--r--net/ipv6/addrlabel.c146
-rw-r--r--net/ipv6/ah6.c1
-rw-r--r--net/ipv6/exthdrs.c1
-rw-r--r--net/ipv6/exthdrs_core.c5
-rw-r--r--net/ipv6/icmp.c50
-rw-r--r--net/ipv6/ip6_fib.c652
-rw-r--r--net/ipv6/ip6_gre.c8
-rw-r--r--net/ipv6/ip6_tunnel.c21
-rw-r--r--net/ipv6/ip6_vti.c23
-rw-r--r--net/ipv6/ip6mr.c1
-rw-r--r--net/ipv6/ipv6_sockglue.c12
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c5
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c3
-rw-r--r--net/ipv6/ping.c5
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/reassembly.c5
-rw-r--r--net/ipv6/route.c931
-rw-r--r--net/ipv6/sit.c9
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/ipv6/xfrm6_policy.c1
-rw-r--r--net/ipx/af_ipx.c1
-rw-r--r--net/kcm/kcmsock.c2
-rw-r--r--net/l2tp/l2tp_netlink.c2
-rw-r--r--net/lapb/lapb_iface.c4
-rw-r--r--net/lapb/lapb_timer.c18
-rw-r--r--net/mac80211/Makefile3
-rw-r--r--net/mac80211/aead_api.c (renamed from net/mac80211/aes_ccm.c)40
-rw-r--r--net/mac80211/aead_api.h27
-rw-r--r--net/mac80211/aes_ccm.h42
-rw-r--r--net/mac80211/aes_gcm.c109
-rw-r--r--net/mac80211/aes_gcm.h38
-rw-r--r--net/mac80211/agg-rx.c4
-rw-r--r--net/mac80211/ht.c12
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/iface.c29
-rw-r--r--net/mac80211/mesh.c3
-rw-r--r--net/mac80211/mesh.h1
-rw-r--r--net/mac80211/mesh_hwmp.c8
-rw-r--r--net/mac80211/mesh_plink.c13
-rw-r--r--net/mac80211/mlme.c33
-rw-r--r--net/mac80211/scan.c37
-rw-r--r--net/mac80211/sta_info.c61
-rw-r--r--net/mac80211/sta_info.h4
-rw-r--r--net/mac80211/tx.c34
-rw-r--r--net/mac80211/util.c25
-rw-r--r--net/mac80211/vht.c10
-rw-r--r--net/mac80211/wpa.c4
-rw-r--r--net/mac802154/llsec.c14
-rw-r--r--net/mpls/Kconfig1
-rw-r--r--net/mpls/af_mpls.c36
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h10
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c2
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h12
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c12
-rw-r--r--net/netfilter/xt_bpf.c2
-rw-r--r--net/netlink/af_netlink.c12
-rw-r--r--net/netrom/af_netrom.c6
-rw-r--r--net/netrom/nr_in.c2
-rw-r--r--net/netrom/nr_timer.c48
-rw-r--r--net/nfc/core.c5
-rw-r--r--net/nfc/hci/core.c5
-rw-r--r--net/nfc/hci/llc_shdlc.c15
-rw-r--r--net/nfc/llcp_core.c10
-rw-r--r--net/openvswitch/actions.c4
-rw-r--r--net/openvswitch/conntrack.c12
-rw-r--r--net/openvswitch/conntrack.h7
-rw-r--r--net/openvswitch/flow_netlink.c56
-rw-r--r--net/openvswitch/vport-netdev.c3
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/phonet/af_phonet.c17
-rw-r--r--net/phonet/datagram.c2
-rw-r--r--net/phonet/pep.c2
-rw-r--r--net/qrtr/qrtr.c377
-rw-r--r--net/rds/ib.c11
-rw-r--r--net/rds/ib.h2
-rw-r--r--net/rds/ib_rdma.c4
-rw-r--r--net/rose/af_rose.c17
-rw-r--r--net/rose/rose_in.c1
-rw-r--r--net/rose/rose_link.c16
-rw-r--r--net/rose/rose_loopback.c9
-rw-r--r--net/rose/rose_route.c10
-rw-r--r--net/rose/rose_timer.c39
-rw-r--r--net/rxrpc/af_rxrpc.c24
-rw-r--r--net/rxrpc/peer_object.c13
-rw-r--r--net/rxrpc/recvmsg.c5
-rw-r--r--net/rxrpc/sendmsg.c107
-rw-r--r--net/sched/act_api.c221
-rw-r--r--net/sched/act_bpf.c4
-rw-r--r--net/sched/act_ife.c153
-rw-r--r--net/sched/act_meta_mark.c2
-rw-r--r--net/sched/act_meta_skbprio.c2
-rw-r--r--net/sched/act_meta_skbtcindex.c2
-rw-r--r--net/sched/act_mirred.c13
-rw-r--r--net/sched/cls_api.c268
-rw-r--r--net/sched/cls_basic.c38
-rw-r--r--net/sched/cls_bpf.c89
-rw-r--r--net/sched/cls_flow.c14
-rw-r--r--net/sched/cls_flower.c75
-rw-r--r--net/sched/cls_fw.c5
-rw-r--r--net/sched/cls_matchall.c58
-rw-r--r--net/sched/cls_tcindex.c5
-rw-r--r--net/sched/cls_u32.c187
-rw-r--r--net/sched/ematch.c2
-rw-r--r--net/sched/sch_api.c9
-rw-r--r--net/sched/sch_atm.c4
-rw-r--r--net/sched/sch_cbq.c3
-rw-r--r--net/sched/sch_drr.c3
-rw-r--r--net/sched/sch_dsmark.c2
-rw-r--r--net/sched/sch_fq_codel.c3
-rw-r--r--net/sched/sch_generic.c16
-rw-r--r--net/sched/sch_hfsc.c5
-rw-r--r--net/sched/sch_htb.c10
-rw-r--r--net/sched/sch_ingress.c36
-rw-r--r--net/sched/sch_mqprio.c263
-rw-r--r--net/sched/sch_multiq.c3
-rw-r--r--net/sched/sch_netem.c28
-rw-r--r--net/sched/sch_pie.c10
-rw-r--r--net/sched/sch_prio.c3
-rw-r--r--net/sched/sch_qfq.c3
-rw-r--r--net/sched/sch_red.c10
-rw-r--r--net/sched/sch_sfb.c3
-rw-r--r--net/sched/sch_sfq.c13
-rw-r--r--net/sctp/Makefile3
-rw-r--r--net/sctp/chunk.c6
-rw-r--r--net/sctp/outqueue.c63
-rw-r--r--net/sctp/sm_sideeffect.c3
-rw-r--r--net/sctp/socket.c179
-rw-r--r--net/sctp/stream.c196
-rw-r--r--net/sctp/stream_sched.c275
-rw-r--r--net/sctp/stream_sched_prio.c347
-rw-r--r--net/sctp/stream_sched_rr.c201
-rw-r--r--net/smc/smc_cdc.c7
-rw-r--r--net/smc/smc_cdc.h3
-rw-r--r--net/smc/smc_core.c12
-rw-r--r--net/smc/smc_ib.c26
-rw-r--r--net/smc/smc_tx.c6
-rw-r--r--net/sunrpc/sched.c8
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svc_xprt.c9
-rw-r--r--net/sunrpc/xprt.c9
-rw-r--r--net/tipc/Makefile2
-rw-r--r--net/tipc/bcast.c18
-rw-r--r--net/tipc/core.h5
-rw-r--r--net/tipc/group.c871
-rw-r--r--net/tipc/group.h73
-rw-r--r--net/tipc/link.c8
-rw-r--r--net/tipc/msg.c7
-rw-r--r--net/tipc/msg.h118
-rw-r--r--net/tipc/name_table.c174
-rw-r--r--net/tipc/name_table.h28
-rw-r--r--net/tipc/node.c42
-rw-r--r--net/tipc/node.h5
-rw-r--r--net/tipc/server.c121
-rw-r--r--net/tipc/server.h5
-rw-r--r--net/tipc/socket.c833
-rw-r--r--net/unix/af_unix.c1
-rw-r--r--net/vmw_vsock/Kconfig10
-rw-r--r--net/vmw_vsock/Makefile3
-rw-r--r--net/vmw_vsock/af_vsock.c66
-rw-r--r--net/vmw_vsock/diag.c186
-rw-r--r--net/vmw_vsock/hyperv_transport.c13
-rw-r--r--net/vmw_vsock/virtio_transport.c2
-rw-r--r--net/vmw_vsock/virtio_transport_common.c22
-rw-r--r--net/vmw_vsock/vmci_transport.c35
-rw-r--r--net/vmw_vsock/vmci_transport_notify.c2
-rw-r--r--net/vmw_vsock/vmci_transport_notify_qstate.c2
-rw-r--r--net/wireless/.gitignore3
-rw-r--r--net/wireless/Kconfig58
-rw-r--r--net/wireless/Makefile24
-rw-r--r--net/wireless/certs/sforshee.x509bin0 -> 680 bytes
-rw-r--r--net/wireless/chan.c4
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/core.h5
-rw-r--r--net/wireless/db.txt17
-rw-r--r--net/wireless/genregdb.awk158
-rw-r--r--net/wireless/nl80211.c199
-rw-r--r--net/wireless/nl80211.h2
-rw-r--r--net/wireless/reg.c452
-rw-r--r--net/wireless/reg.h14
-rw-r--r--net/wireless/regdb.h23
-rw-r--r--net/wireless/sme.c45
-rw-r--r--net/wireless/util.c202
-rw-r--r--net/x25/af_x25.c9
-rw-r--r--net/x25/x25_facilities.c2
-rw-r--r--net/x25/x25_in.c1
-rw-r--r--net/x25/x25_timer.c18
-rw-r--r--net/xfrm/xfrm_policy.c17
-rw-r--r--samples/bpf/Makefile17
-rw-r--r--samples/bpf/README.rst10
-rw-r--r--samples/bpf/bpf_load.c2
-rw-r--r--samples/bpf/cgroup_helpers.c4
-rw-r--r--samples/bpf/map_perf_test_kern.c2
-rw-r--r--samples/bpf/map_perf_test_user.c3
-rw-r--r--samples/bpf/tcp_basertt_kern.c78
-rw-r--r--samples/bpf/tcp_bbf.readme26
-rw-r--r--samples/bpf/test_cgrp2_attach2.c224
-rw-r--r--samples/bpf/trace_event_kern.c10
-rw-r--r--samples/bpf/trace_event_user.c13
-rw-r--r--samples/bpf/tracex6_kern.c26
-rw-r--r--samples/bpf/tracex6_user.c13
-rw-r--r--samples/bpf/xdp_monitor_kern.c60
-rw-r--r--samples/bpf/xdp_monitor_user.c123
-rw-r--r--samples/bpf/xdp_redirect_cpu_kern.c609
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c697
-rw-r--r--scripts/mod/devicetable-offsets.c7
-rw-r--r--scripts/mod/file2alias.c25
-rw-r--r--security/security.c32
-rw-r--r--security/selinux/hooks.c160
-rw-r--r--security/selinux/include/classmap.h2
-rw-r--r--security/selinux/include/objsec.h4
-rw-r--r--tools/Makefile14
-rw-r--r--tools/bpf/Makefile (renamed from tools/net/Makefile)18
-rw-r--r--tools/bpf/bpf_asm.c (renamed from tools/net/bpf_asm.c)0
-rw-r--r--tools/bpf/bpf_dbg.c (renamed from tools/net/bpf_dbg.c)0
-rw-r--r--tools/bpf/bpf_exp.l (renamed from tools/net/bpf_exp.l)0
-rw-r--r--tools/bpf/bpf_exp.y (renamed from tools/net/bpf_exp.y)0
-rw-r--r--tools/bpf/bpf_jit_disasm.c (renamed from tools/net/bpf_jit_disasm.c)0
-rw-r--r--tools/bpf/bpftool/Documentation/Makefile34
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst110
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst82
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst36
-rw-r--r--tools/bpf/bpftool/Makefile89
-rw-r--r--tools/bpf/bpftool/common.c216
-rw-r--r--tools/bpf/bpftool/jit_disasm.c87
-rw-r--r--tools/bpf/bpftool/main.c224
-rw-r--r--tools/bpf/bpftool/main.h94
-rw-r--r--tools/bpf/bpftool/map.c744
-rw-r--r--tools/bpf/bpftool/prog.c496
-rw-r--r--tools/include/uapi/linux/bpf.h81
-rw-r--r--tools/lib/bpf/bpf.c89
-rw-r--r--tools/lib/bpf/bpf.h27
-rw-r--r--tools/lib/bpf/libbpf.c179
-rw-r--r--tools/lib/bpf/libbpf.h1
-rw-r--r--tools/testing/selftests/bpf/Makefile23
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h67
-rw-r--r--tools/testing/selftests/bpf/test_lpm_map.c201
-rw-r--r--tools/testing/selftests/bpf/test_maps.c48
-rw-r--r--tools/testing/selftests/bpf/test_progs.c195
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c321
-rw-r--r--tools/testing/selftests/bpf/test_verifier_log.c171
-rw-r--r--tools/testing/selftests/bpf/test_xdp_meta.c53
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_meta.sh51
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh271
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/gact.json469
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ife.json52
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json223
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json527
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/simple.json130
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json320
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json372
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/tests.json1165
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py8
-rw-r--r--tools/testing/vsock/.gitignore2
-rw-r--r--tools/testing/vsock/Makefile9
-rw-r--r--tools/testing/vsock/README36
-rw-r--r--tools/testing/vsock/control.c219
-rw-r--r--tools/testing/vsock/control.h13
-rw-r--r--tools/testing/vsock/timeout.c64
-rw-r--r--tools/testing/vsock/timeout.h14
-rw-r--r--tools/testing/vsock/vsock_diag_test.c681
1143 files changed, 58382 insertions, 16495 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-thunderbolt b/Documentation/ABI/testing/sysfs-bus-thunderbolt
index 392bef5bd399..93798c02e28b 100644
--- a/Documentation/ABI/testing/sysfs-bus-thunderbolt
+++ b/Documentation/ABI/testing/sysfs-bus-thunderbolt
@@ -110,3 +110,51 @@ Description: When new NVM image is written to the non-active NVM
is directly the status value from the DMA configuration
based mailbox before the device is power cycled. Writing
0 here clears the status.
+
+What: /sys/bus/thunderbolt/devices/<xdomain>.<service>/key
+Date: Jan 2018
+KernelVersion: 4.15
+Contact: thunderbolt-software@lists.01.org
+Description: This contains name of the property directory the XDomain
+ service exposes. This entry describes the protocol in
+ question. Following directories are already reserved by
+ the Apple XDomain specification:
+
+ network: IP/ethernet over Thunderbolt
+ targetdm: Target disk mode protocol over Thunderbolt
+ extdisp: External display mode protocol over Thunderbolt
+
+What: /sys/bus/thunderbolt/devices/<xdomain>.<service>/modalias
+Date: Jan 2018
+KernelVersion: 4.15
+Contact: thunderbolt-software@lists.01.org
+Description: Stores the same MODALIAS value emitted by uevent for
+ the XDomain service. Format: tbtsvc:kSpNvNrN
+
+What: /sys/bus/thunderbolt/devices/<xdomain>.<service>/prtcid
+Date: Jan 2018
+KernelVersion: 4.15
+Contact: thunderbolt-software@lists.01.org
+Description: This contains XDomain protocol identifier the XDomain
+ service supports.
+
+What: /sys/bus/thunderbolt/devices/<xdomain>.<service>/prtcvers
+Date: Jan 2018
+KernelVersion: 4.15
+Contact: thunderbolt-software@lists.01.org
+Description: This contains XDomain protocol version the XDomain
+ service supports.
+
+What: /sys/bus/thunderbolt/devices/<xdomain>.<service>/prtcrevs
+Date: Jan 2018
+KernelVersion: 4.15
+Contact: thunderbolt-software@lists.01.org
+Description: This contains XDomain software version the XDomain
+ service supports.
+
+What: /sys/bus/thunderbolt/devices/<xdomain>.<service>/prtcstns
+Date: Jan 2018
+KernelVersion: 4.15
+Contact: thunderbolt-software@lists.01.org
+Description: This contains XDomain service specific settings as
+ bitmask. Format: %x
diff --git a/Documentation/admin-guide/thunderbolt.rst b/Documentation/admin-guide/thunderbolt.rst
index 6a4cd1f159ca..5c62d11d77e8 100644
--- a/Documentation/admin-guide/thunderbolt.rst
+++ b/Documentation/admin-guide/thunderbolt.rst
@@ -197,3 +197,27 @@ information is missing.
To recover from this mode, one needs to flash a valid NVM image to the
host host controller in the same way it is done in the previous chapter.
+
+Networking over Thunderbolt cable
+---------------------------------
+Thunderbolt technology allows software communication across two hosts
+connected by a Thunderbolt cable.
+
+It is possible to tunnel any kind of traffic over Thunderbolt link but
+currently we only support Apple ThunderboltIP protocol.
+
+If the other host is running Windows or macOS only thing you need to
+do is to connect Thunderbolt cable between the two hosts, the
+``thunderbolt-net`` is loaded automatically. If the other host is also
+Linux you should load ``thunderbolt-net`` manually on one host (it does
+not matter which one)::
+
+ # modprobe thunderbolt-net
+
+This triggers module load on the other host automatically. If the driver
+is built-in to the kernel image, there is no need to do anything.
+
+The driver will create one virtual ethernet interface per Thunderbolt
+port which are named like ``thunderbolt0`` and so on. From this point
+you can either use standard userspace tools like ``ifconfig`` to
+configure the interface or let your GUI to handle it automatically.
diff --git a/Documentation/devicetree/bindings/net/bluetooth.txt b/Documentation/devicetree/bindings/net/bluetooth.txt
new file mode 100644
index 000000000000..94797df751b8
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/bluetooth.txt
@@ -0,0 +1,5 @@
+The following properties are common to the Bluetooth controllers:
+
+- local-bd-address: array of 6 bytes, specifies the BD address that was
+ uniquely assigned to the Bluetooth device, formatted with least significant
+ byte first (little-endian).
diff --git a/Documentation/devicetree/bindings/net/renesas,ravb.txt b/Documentation/devicetree/bindings/net/renesas,ravb.txt
index 16723535e1aa..c902261893b9 100644
--- a/Documentation/devicetree/bindings/net/renesas,ravb.txt
+++ b/Documentation/devicetree/bindings/net/renesas,ravb.txt
@@ -17,6 +17,8 @@ Required properties:
- "renesas,etheravb-r8a7795" for the R8A7795 SoC.
- "renesas,etheravb-r8a7796" for the R8A7796 SoC.
+ - "renesas,etheravb-r8a77970" for the R8A77970 SoC.
+ - "renesas,etheravb-r8a77995" for the R8A77995 SoC.
- "renesas,etheravb-rcar-gen3" as a fallback for the above
R-Car Gen3 devices.
@@ -40,7 +42,7 @@ Optional properties:
- interrupt-parent: the phandle for the interrupt controller that services
interrupts for this device.
- interrupt-names: A list of interrupt names.
- For the R8A779[56] SoCs this property is mandatory;
+ For the R-Car Gen 3 SoCs this property is mandatory;
it should include one entry per channel, named "ch%u",
where %u is the channel number ranging from 0 to 24.
For other SoCs this property is optional; if present
diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt
index 0115c85a2425..5172799a7f1a 100644
--- a/Documentation/devicetree/bindings/net/sh_eth.txt
+++ b/Documentation/devicetree/bindings/net/sh_eth.txt
@@ -4,7 +4,8 @@ This file provides information on what the device node for the SH EtherMAC
interface contains.
Required properties:
-- compatible: "renesas,gether-r8a7740" if the device is a part of R8A7740 SoC.
+- compatible: Must contain one or more of the following:
+ "renesas,gether-r8a7740" if the device is a part of R8A7740 SoC.
"renesas,ether-r8a7743" if the device is a part of R8A7743 SoC.
"renesas,ether-r8a7745" if the device is a part of R8A7745 SoC.
"renesas,ether-r8a7778" if the device is a part of R8A7778 SoC.
@@ -14,6 +15,14 @@ Required properties:
"renesas,ether-r8a7793" if the device is a part of R8A7793 SoC.
"renesas,ether-r8a7794" if the device is a part of R8A7794 SoC.
"renesas,ether-r7s72100" if the device is a part of R7S72100 SoC.
+ "renesas,rcar-gen1-ether" for a generic R-Car Gen1 device.
+ "renesas,rcar-gen2-ether" for a generic R-Car Gen2 or RZ/G1
+ device.
+
+ When compatible with the generic version, nodes must list
+ the SoC-specific version corresponding to the platform
+ first followed by the generic version.
+
- reg: offset and length of (1) the E-DMAC/feLic register block (required),
(2) the TSU register block (optional).
- interrupts: interrupt specifier for the sole interrupt.
@@ -36,7 +45,8 @@ Optional properties:
Example (Lager board):
ethernet@ee700000 {
- compatible = "renesas,ether-r8a7790";
+ compatible = "renesas,ether-r8a7790",
+ "renesas,rcar-gen2-ether";
reg = <0 0xee700000 0 0x400>;
interrupt-parent = <&gic>;
interrupts = <0 162 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.txt b/Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.txt
index 4ea39e9186a7..042a2e4159bd 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.txt
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,wcnss.txt
@@ -37,6 +37,11 @@ The following properties are defined to the bluetooth node:
Definition: must be:
"qcom,wcnss-bt"
+- local-bd-address:
+ Usage: optional
+ Value type: <u8 array>
+ Definition: see Documentation/devicetree/bindings/net/bluetooth.txt
+
== WiFi
The following properties are defined to the WiFi node:
@@ -91,6 +96,9 @@ smd {
bt {
compatible = "qcom,wcnss-bt";
+
+ /* BD address 00:11:22:33:44:55 */
+ local-bd-address = [ 55 44 33 22 11 00 ];
};
wlan {
diff --git a/Documentation/driver-api/80211/cfg80211.rst b/Documentation/driver-api/80211/cfg80211.rst
index 8ffac57e1f5b..eeab91b59457 100644
--- a/Documentation/driver-api/80211/cfg80211.rst
+++ b/Documentation/driver-api/80211/cfg80211.rst
@@ -300,9 +300,6 @@ Data path helpers
:functions: ieee80211_data_to_8023
.. kernel-doc:: include/net/cfg80211.h
- :functions: ieee80211_data_from_8023
-
-.. kernel-doc:: include/net/cfg80211.h
:functions: ieee80211_amsdu_to_8023s
.. kernel-doc:: include/net/cfg80211.h
diff --git a/Documentation/networking/netvsc.txt b/Documentation/networking/netvsc.txt
index 93560fb1170a..92f5b31392fa 100644
--- a/Documentation/networking/netvsc.txt
+++ b/Documentation/networking/netvsc.txt
@@ -19,12 +19,12 @@ Features
Receive Side Scaling
--------------------
- Hyper-V supports receive side scaling. For TCP, packets are
- distributed among available queues based on IP address and port
+ Hyper-V supports receive side scaling. For TCP & UDP, packets can
+ be distributed among available queues based on IP address and port
number.
- For UDP, we can switch UDP hash level between L3 and L4 by ethtool
- command. UDP over IPv4 and v6 can be set differently. The default
+ For TCP & UDP, we can switch hash level between L3 and L4 by ethtool
+ command. TCP/UDP over IPv4 and v6 can be set differently. The default
hash level is L4. We currently only allow switching TX hash level
from within the guests.
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
index 7818b5fe448b..381e5b23d61d 100644
--- a/Documentation/networking/regulatory.txt
+++ b/Documentation/networking/regulatory.txt
@@ -19,6 +19,14 @@ core regulatory domain all wireless devices should adhere to.
How to get regulatory domains to the kernel
-------------------------------------------
+When the regulatory domain is first set up, the kernel will request a
+database file (regulatory.db) containing all the regulatory rules. It
+will then use that database when it needs to look up the rules for a
+given country.
+
+How to get regulatory domains to the kernel (old CRDA solution)
+---------------------------------------------------------------
+
Userspace gets a regulatory domain in the kernel by having
a userspace agent build it and send it via nl80211. Only
expected regulatory domains will be respected by the kernel.
@@ -192,23 +200,5 @@ Then in some part of your code after your wiphy has been registered:
Statically compiled regulatory database
---------------------------------------
-In most situations the userland solution using CRDA as described
-above is the preferred solution. However in some cases a set of
-rules built into the kernel itself may be desirable. To account
-for this situation, a configuration option has been provided
-(i.e. CONFIG_CFG80211_INTERNAL_REGDB). With this option enabled,
-the wireless database information contained in net/wireless/db.txt is
-used to generate a data structure encoded in net/wireless/regdb.c.
-That option also enables code in net/wireless/reg.c which queries
-the data in regdb.c as an alternative to using CRDA.
-
-The file net/wireless/db.txt should be kept up-to-date with the db.txt
-file available in the git repository here:
-
- git://git.kernel.org/pub/scm/linux/kernel/git/sforshee/wireless-regdb.git
-
-Again, most users in most situations should be using the CRDA package
-provided with their distribution, and in most other situations users
-should be building and using CRDA on their own rather than using
-this option. If you are not absolutely sure that you should be using
-CONFIG_CFG80211_INTERNAL_REGDB then _DO_NOT_USE_IT_.
+When a database should be fixed into the kernel, it can be provided as a
+firmware file at build time that is then linked into the kernel.
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index 810620153a44..b5407163d53b 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -280,6 +280,18 @@ Interaction with the user of the RxRPC socket:
nominated by a socket option.
+Notes on sendmsg:
+
+ (*) MSG_WAITALL can be set to tell sendmsg to ignore signals if the peer is
+ making progress at accepting packets within a reasonable time such that we
+ manage to queue up all the data for transmission. This requires the
+ client to accept at least one packet per 2*RTT time period.
+
+ If this isn't set, sendmsg() will return immediately, either returning
+ EINTR/ERESTARTSYS if nothing was consumed or returning the amount of data
+ consumed.
+
+
Notes on recvmsg:
(*) If there's a sequence of data messages belonging to a particular call on
@@ -782,7 +794,9 @@ The kernel interface functions are as follows:
struct key *key,
unsigned long user_call_ID,
s64 tx_total_len,
- gfp_t gfp);
+ gfp_t gfp,
+ rxrpc_notify_rx_t notify_rx,
+ bool upgrade);
This allocates the infrastructure to make a new RxRPC call and assigns
call and connection numbers. The call will be made on the UDP port that
@@ -803,6 +817,13 @@ The kernel interface functions are as follows:
allows the kernel to encrypt directly to the packet buffers, thereby
saving a copy. The value may not be less than -1.
+ notify_rx is a pointer to a function to be called when events such as
+ incoming data packets or remote aborts happen.
+
+ upgrade should be set to true if a client operation should request that
+ the server upgrade the service to a better one. The resultant service ID
+ is returned by rxrpc_kernel_recv_data().
+
If this function is successful, an opaque reference to the RxRPC call is
returned. The caller now holds a reference on this and it must be
properly ended.
@@ -850,7 +871,8 @@ The kernel interface functions are as follows:
size_t size,
size_t *_offset,
bool want_more,
- u32 *_abort)
+ u32 *_abort,
+ u16 *_service)
This is used to receive data from either the reply part of a client call
or the request part of a service call. buf and size specify how much
@@ -873,6 +895,9 @@ The kernel interface functions are as follows:
If a remote ABORT is detected, the abort code received will be stored in
*_abort and ECONNABORTED will be returned.
+ The service ID that the call ended up with is returned into *_service.
+ This can be used to see if a call got a service upgrade.
+
(*) Abort a call.
void rxrpc_kernel_abort_call(struct socket *sock,
@@ -1020,6 +1045,30 @@ The kernel interface functions are as follows:
It returns 0 if the call was requeued and an error otherwise.
+ (*) Get call RTT.
+
+ u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call);
+
+ Get the RTT time to the peer in use by a call. The value returned is in
+ nanoseconds.
+
+ (*) Check call still alive.
+
+ u32 rxrpc_kernel_check_life(struct socket *sock,
+ struct rxrpc_call *call);
+
+ This returns a number that is updated when ACKs are received from the peer
+ (notably including PING RESPONSE ACKs which we can elicit by sending PING
+ ACKs to see if the call still exists on the server). The caller should
+ compare the numbers of two calls to see if the call is still alive after
+ waiting for a suitable interval.
+
+ This allows the caller to work out if the server is still contactable and
+ if the call is still alive on the server whilst waiting for the server to
+ process a client operation.
+
+ This function may transmit a PING ACK.
+
=======================
CONFIGURABLE PARAMETERS
diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt
index 3918dae964d4..8ff7b4c8f91b 100644
--- a/Documentation/networking/vrf.txt
+++ b/Documentation/networking/vrf.txt
@@ -71,7 +71,12 @@ Setup
ip ru add iif vrf-blue table 10
3. Set the default route for the table (and hence default route for the VRF).
- ip route add table 10 unreachable default
+ ip route add table 10 unreachable default metric 4278198272
+
+ This high metric value ensures that the default unreachable route can
+ be overridden by a routing protocol suite. FRRouting interprets
+ kernel metrics as a combined admin distance (upper byte) and priority
+ (lower 3 bytes). Thus the above metric translates to [255/8192].
4. Enslave L3 interfaces to a VRF device.
ip link set dev eth1 master vrf-blue
@@ -256,7 +261,7 @@ older form without it.
For example:
$ ip route show vrf red
- prohibit default
+ unreachable default metric 4278198272
broadcast 10.2.1.0 dev eth1 proto kernel scope link src 10.2.1.2
10.2.1.0/24 dev eth1 proto kernel scope link src 10.2.1.2
local 10.2.1.2 dev eth1 proto kernel scope host src 10.2.1.2
@@ -282,7 +287,7 @@ older form without it.
ff00::/8 dev red metric 256 pref medium
ff00::/8 dev eth1 metric 256 pref medium
ff00::/8 dev eth2 metric 256 pref medium
-
+ unreachable default dev lo metric 4278198272 error -101 pref medium
8. Route Lookup for a VRF
@@ -331,7 +336,7 @@ function vrf_create
ip link add ${VRF} type vrf table ${TBID}
if [ "${VRF}" != "mgmt" ]; then
- ip route add table ${TBID} unreachable default
+ ip route add table ${TBID} unreachable default metric 4278198272
fi
ip link set dev ${VRF} up
}
diff --git a/MAINTAINERS b/MAINTAINERS
index a74227ad082e..e3a7ca9d2783 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -700,9 +700,9 @@ F: include/linux/altera_uart.h
F: include/linux/altera_jtaguart.h
AMAZON ETHERNET DRIVERS
-M: Netanel Belgazal <netanel@annapurnalabs.com>
-R: Saeed Bishara <saeed@annapurnalabs.com>
-R: Zorik Machulsky <zorik@annapurnalabs.com>
+M: Netanel Belgazal <netanel@amazon.com>
+R: Saeed Bishara <saeedb@amazon.com>
+R: Zorik Machulsky <zorik@amazon.com>
L: netdev@vger.kernel.org
S: Supported
F: Documentation/networking/ena.txt
@@ -2725,7 +2725,7 @@ F: net/core/filter.c
F: net/sched/act_bpf.c
F: net/sched/cls_bpf.c
F: samples/bpf/
-F: tools/net/bpf*
+F: tools/bpf/
F: tools/testing/selftests/bpf/
BROADCOM B44 10/100 ETHERNET DRIVER
@@ -3329,17 +3329,22 @@ S: Maintained
F: drivers/auxdisplay/cfag12864bfb.c
F: include/linux/cfag12864b.h
-CFG80211 and NL80211
+802.11 (including CFG80211/NL80211)
M: Johannes Berg <johannes@sipsolutions.net>
L: linux-wireless@vger.kernel.org
W: http://wireless.kernel.org/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
S: Maintained
+F: net/wireless/
F: include/uapi/linux/nl80211.h
+F: include/linux/ieee80211.h
+F: include/net/wext.h
F: include/net/cfg80211.h
-F: net/wireless/*
-X: net/wireless/wext*
+F: include/net/iw_handler.h
+F: include/net/ieee80211_radiotap.h
+F: Documentation/driver-api/80211/cfg80211.rst
+F: Documentation/networking/regulatory.txt
CHAR and MISC DRIVERS
M: Arnd Bergmann <arnd@arndb.de>
@@ -8208,6 +8213,7 @@ F: Documentation/networking/mac80211-injection.txt
F: include/net/mac80211.h
F: net/mac80211/
F: drivers/net/wireless/mac80211_hwsim.[ch]
+F: Documentation/networking/mac80211_hwsim/README
MAILBOX API
M: Jassi Brar <jassisinghbrar@gmail.com>
@@ -9429,7 +9435,6 @@ F: include/uapi/linux/in.h
F: include/uapi/linux/net.h
F: include/uapi/linux/netdevice.h
F: include/uapi/linux/net_namespace.h
-F: tools/net/
F: tools/testing/selftests/net/
F: lib/random32.c
@@ -11493,6 +11498,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
S: Maintained
F: Documentation/rfkill.txt
+F: Documentation/ABI/stable/sysfs-class-rfkill
F: net/rfkill/
RHASHTABLE
@@ -13291,6 +13297,15 @@ M: Mika Westerberg <mika.westerberg@linux.intel.com>
M: Yehezkel Bernat <yehezkel.bernat@intel.com>
S: Maintained
F: drivers/thunderbolt/
+F: include/linux/thunderbolt.h
+
+THUNDERBOLT NETWORK DRIVER
+M: Michael Jamet <michael.jamet@intel.com>
+M: Mika Westerberg <mika.westerberg@linux.intel.com>
+M: Yehezkel Bernat <yehezkel.bernat@intel.com>
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/net/thunderbolt.c
THUNDERX GPIO DRIVER
M: David Daney <david.daney@cavium.com>
@@ -14279,12 +14294,15 @@ S: Maintained
F: include/linux/virtio_vsock.h
F: include/uapi/linux/virtio_vsock.h
F: include/uapi/linux/vsockmon.h
+F: include/uapi/linux/vm_sockets_diag.h
+F: net/vmw_vsock/diag.c
F: net/vmw_vsock/af_vsock_tap.c
F: net/vmw_vsock/virtio_transport_common.c
F: net/vmw_vsock/virtio_transport.c
F: drivers/net/vsockmon.c
F: drivers/vhost/vsock.c
F: drivers/vhost/vsock.h
+F: tools/testing/vsock/
VIRTIO CONSOLE DRIVER
M: Amit Shah <amit@kernel.org>
diff --git a/arch/blackfin/mach-bf518/boards/ezbrd.c b/arch/blackfin/mach-bf518/boards/ezbrd.c
index d022112927c2..c51d1b810ac3 100644
--- a/arch/blackfin/mach-bf518/boards/ezbrd.c
+++ b/arch/blackfin/mach-bf518/boards/ezbrd.c
@@ -25,7 +25,6 @@
#include <asm/dpmc.h>
#include <asm/bfin_sdh.h>
#include <linux/spi/ad7877.h>
-#include <net/dsa.h>
/*
* Name the Board for the /proc/cpuinfo
@@ -105,11 +104,7 @@ static const unsigned short bfin_mac_peripherals[] = {
static struct bfin_phydev_platform_data bfin_phydev_data[] = {
{
-#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M)
- .addr = 3,
-#else
.addr = 1,
-#endif
.irq = IRQ_MAC_PHYINT,
},
};
@@ -119,9 +114,6 @@ static struct bfin_mii_bus_platform_data bfin_mii_bus_data = {
.phydev_data = bfin_phydev_data,
.phy_mode = PHY_INTERFACE_MODE_MII,
.mac_peripherals = bfin_mac_peripherals,
-#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M)
- .phy_mask = 0xfff7, /* Only probe the port phy connect to the on chip MAC */
-#endif
.vlan1_mask = 1,
.vlan2_mask = 2,
};
@@ -140,29 +132,6 @@ static struct platform_device bfin_mac_device = {
}
};
-#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M)
-static struct dsa_chip_data ksz8893m_switch_chip_data = {
- .mii_bus = &bfin_mii_bus.dev,
- .port_names = {
- NULL,
- "eth%d",
- "eth%d",
- "cpu",
- },
-};
-static struct dsa_platform_data ksz8893m_switch_data = {
- .nr_chips = 1,
- .netdev = &bfin_mac_device.dev,
- .chip = &ksz8893m_switch_chip_data,
-};
-
-static struct platform_device ksz8893m_switch_device = {
- .name = "dsa",
- .id = 0,
- .num_resources = 0,
- .dev.platform_data = &ksz8893m_switch_data,
-};
-#endif
#endif
#if IS_ENABLED(CONFIG_MTD_M25P80)
@@ -228,19 +197,6 @@ static struct spi_board_info bfin_spi_board_info[] __initdata = {
},
#endif
-#if IS_ENABLED(CONFIG_BFIN_MAC)
-#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M)
- {
- .modalias = "ksz8893m",
- .max_speed_hz = 5000000,
- .bus_num = 0,
- .chip_select = 1,
- .platform_data = NULL,
- .mode = SPI_MODE_3,
- },
-#endif
-#endif
-
#if IS_ENABLED(CONFIG_MMC_SPI)
{
.modalias = "mmc_spi",
@@ -714,9 +670,6 @@ static struct platform_device *stamp_devices[] __initdata = {
#if IS_ENABLED(CONFIG_BFIN_MAC)
&bfin_mii_bus,
&bfin_mac_device,
-#if IS_ENABLED(CONFIG_NET_DSA_KSZ8893M)
- &ksz8893m_switch_device,
-#endif
#endif
#if IS_ENABLED(CONFIG_SPI_BFIN5XX)
diff --git a/arch/blackfin/mach-bf518/boards/tcm-bf518.c b/arch/blackfin/mach-bf518/boards/tcm-bf518.c
index 240d5cb1f02c..37d868085f6a 100644
--- a/arch/blackfin/mach-bf518/boards/tcm-bf518.c
+++ b/arch/blackfin/mach-bf518/boards/tcm-bf518.c
@@ -25,7 +25,6 @@
#include <asm/dpmc.h>
#include <asm/bfin_sdh.h>
#include <linux/spi/ad7877.h>
-#include <net/dsa.h>
/*
* Name the Board for the /proc/cpuinfo
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 1669240c7a25..b305f8247909 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -168,7 +168,6 @@ static int uml_net_open(struct net_device *dev)
goto out_close;
}
- lp->tl.data = (unsigned long) &lp->user;
netif_start_queue(dev);
/* clear buffer - it can happen that the host side of the interface
@@ -278,10 +277,11 @@ static const struct ethtool_ops uml_net_ethtool_ops = {
.get_ts_info = ethtool_op_get_ts_info,
};
-static void uml_net_user_timer_expire(unsigned long _conn)
+static void uml_net_user_timer_expire(struct timer_list *t)
{
#ifdef undef
- struct connection *conn = (struct connection *)_conn;
+ struct uml_net_private *lp = from_timer(lp, t, tl);
+ struct connection *conn = &lp->user;
dprintk(KERN_INFO "uml_net_user_timer_expire [%p]\n", conn);
do_connect(conn);
@@ -458,9 +458,8 @@ static void eth_configure(int n, void *init, char *mac,
.add_address = transport->user->add_address,
.delete_address = transport->user->delete_address });
- init_timer(&lp->tl);
+ timer_setup(&lp->tl, uml_net_user_timer_expire, 0);
spin_lock_init(&lp->lock);
- lp->tl.function = uml_net_user_timer_expire;
memcpy(lp->mac, dev->dev_addr, sizeof(lp->mac));
if ((transport->user->init != NULL) &&
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index f8b7e86907cc..126855e6cb7d 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -358,26 +358,33 @@ fore200e_shutdown(struct fore200e* fore200e)
case FORE200E_STATE_COMPLETE:
kfree(fore200e->stats);
+ /* fall through */
case FORE200E_STATE_IRQ:
free_irq(fore200e->irq, fore200e->atm_dev);
+ /* fall through */
case FORE200E_STATE_ALLOC_BUF:
fore200e_free_rx_buf(fore200e);
+ /* fall through */
case FORE200E_STATE_INIT_BSQ:
fore200e_uninit_bs_queue(fore200e);
+ /* fall through */
case FORE200E_STATE_INIT_RXQ:
fore200e->bus->dma_chunk_free(fore200e, &fore200e->host_rxq.status);
fore200e->bus->dma_chunk_free(fore200e, &fore200e->host_rxq.rpd);
+ /* fall through */
case FORE200E_STATE_INIT_TXQ:
fore200e->bus->dma_chunk_free(fore200e, &fore200e->host_txq.status);
fore200e->bus->dma_chunk_free(fore200e, &fore200e->host_txq.tpd);
+ /* fall through */
case FORE200E_STATE_INIT_CMDQ:
fore200e->bus->dma_chunk_free(fore200e, &fore200e->host_cmdq.status);
+ /* fall through */
case FORE200E_STATE_INITIALIZE:
/* nothing to do for that state */
@@ -390,6 +397,7 @@ fore200e_shutdown(struct fore200e* fore200e)
case FORE200E_STATE_MAP:
fore200e->bus->unmap(fore200e);
+ /* fall through */
case FORE200E_STATE_CONFIGURE:
/* nothing to do for that state */
diff --git a/drivers/atm/idt77105.c b/drivers/atm/idt77105.c
index 082aa02abc57..d781b3f87693 100644
--- a/drivers/atm/idt77105.c
+++ b/drivers/atm/idt77105.c
@@ -306,11 +306,9 @@ static int idt77105_start(struct atm_dev *dev)
if (start_timer) {
start_timer = 0;
- setup_timer(&stats_timer, idt77105_stats_timer_func, 0UL);
stats_timer.expires = jiffies+IDT77105_STATS_TIMER_PERIOD;
add_timer(&stats_timer);
- setup_timer(&restart_timer, idt77105_restart_timer_func, 0UL);
restart_timer.expires = jiffies+IDT77105_RESTART_TIMER_PERIOD;
add_timer(&restart_timer);
}
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 47f3c4ae0594..0e3b9c44c808 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -2073,21 +2073,19 @@ idt77252_rate_logindex(struct idt77252_dev *card, int pcr)
}
static void
-idt77252_est_timer(unsigned long data)
+idt77252_est_timer(struct timer_list *t)
{
- struct vc_map *vc = (struct vc_map *)data;
+ struct rate_estimator *est = from_timer(est, t, timer);
+ struct vc_map *vc = est->vc;
struct idt77252_dev *card = vc->card;
- struct rate_estimator *est;
unsigned long flags;
u32 rate, cps;
u64 ncells;
u8 lacr;
spin_lock_irqsave(&vc->lock, flags);
- est = vc->estimator;
- if (!est)
+ if (!vc->estimator)
goto out;
-
ncells = est->cells;
rate = ((u32)(ncells - est->last_cells)) << (7 - est->interval);
@@ -2126,10 +2124,11 @@ idt77252_init_est(struct vc_map *vc, int pcr)
est->maxcps = pcr < 0 ? -pcr : pcr;
est->cps = est->maxcps;
est->avcps = est->cps << 5;
+ est->vc = vc;
est->interval = 2; /* XXX: make this configurable */
est->ewma_log = 2; /* XXX: make this configurable */
- setup_timer(&est->timer, idt77252_est_timer, (unsigned long)vc);
+ timer_setup(&est->timer, idt77252_est_timer, 0);
mod_timer(&est->timer, jiffies + ((HZ / 4) << est->interval));
return est;
@@ -2209,16 +2208,20 @@ static int
idt77252_init_ubr(struct idt77252_dev *card, struct vc_map *vc,
struct atm_vcc *vcc, struct atm_qos *qos)
{
+ struct rate_estimator *est = NULL;
unsigned long flags;
int tcr;
spin_lock_irqsave(&vc->lock, flags);
if (vc->estimator) {
- del_timer(&vc->estimator->timer);
- kfree(vc->estimator);
+ est = vc->estimator;
vc->estimator = NULL;
}
spin_unlock_irqrestore(&vc->lock, flags);
+ if (est) {
+ del_timer_sync(&est->timer);
+ kfree(est);
+ }
tcr = atm_pcr_goal(&qos->txtp);
if (tcr == 0)
diff --git a/drivers/atm/idt77252.h b/drivers/atm/idt77252.h
index 3a82cc23a053..9339197d701c 100644
--- a/drivers/atm/idt77252.h
+++ b/drivers/atm/idt77252.h
@@ -184,6 +184,8 @@ struct aal1 {
unsigned char sequence;
};
+struct vc_map;
+
struct rate_estimator {
struct timer_list timer;
unsigned int interval;
@@ -193,6 +195,7 @@ struct rate_estimator {
long avcps;
u32 cps;
u32 maxcps;
+ struct vc_map *vc;
};
struct vc_map {
diff --git a/drivers/atm/suni.c b/drivers/atm/suni.c
index b0363149b2fd..b8825f2d79e0 100644
--- a/drivers/atm/suni.c
+++ b/drivers/atm/suni.c
@@ -53,7 +53,7 @@ static DEFINE_SPINLOCK(sunis_lock);
if (atomic_read(&stats->s) < 0) atomic_set(&stats->s,INT_MAX);
-static void suni_hz(unsigned long from_timer)
+static void suni_hz(struct timer_list *timer)
{
struct suni_priv *walk;
struct atm_dev *dev;
@@ -85,7 +85,7 @@ static void suni_hz(unsigned long from_timer)
((GET(TACP_TCC) & 0xff) << 8) |
((GET(TACP_TCCM) & 7) << 16));
}
- if (from_timer) mod_timer(&poll_timer,jiffies+HZ);
+ if (timer) mod_timer(&poll_timer,jiffies+HZ);
}
@@ -322,13 +322,11 @@ static int suni_start(struct atm_dev *dev)
printk(KERN_WARNING "%s(itf %d): no signal\n",dev->type,
dev->number);
PRIV(dev)->loop_mode = ATM_LM_NONE;
- suni_hz(0); /* clear SUNI counters */
+ suni_hz(NULL); /* clear SUNI counters */
(void) fetch_stats(dev,NULL,1); /* clear kernel counters */
if (first) {
- init_timer(&poll_timer);
+ timer_setup(&poll_timer, suni_hz, 0);
poll_timer.expires = jiffies+HZ;
- poll_timer.function = suni_hz;
- poll_timer.data = 1;
#if 0
printk(KERN_DEBUG "[u] p=0x%lx,n=0x%lx\n",(unsigned long) poll_timer.list.prev,
(unsigned long) poll_timer.list.next);
diff --git a/drivers/bcma/Kconfig b/drivers/bcma/Kconfig
index 54f81c554815..56ee55b66dfe 100644
--- a/drivers/bcma/Kconfig
+++ b/drivers/bcma/Kconfig
@@ -10,14 +10,15 @@ menuconfig BCMA
Bus driver for Broadcom specific Advanced Microcontroller Bus
Architecture.
+if BCMA
+
# Support for Block-I/O. SELECT this from the driver that needs it.
config BCMA_BLOCKIO
bool
- depends on BCMA
config BCMA_HOST_PCI_POSSIBLE
bool
- depends on BCMA && PCI = y
+ depends on PCI = y
default y
config BCMA_HOST_PCI
@@ -28,7 +29,6 @@ config BCMA_HOST_PCI
config BCMA_HOST_SOC
bool "Support for BCMA in a SoC"
- depends on BCMA
help
Host interface for a Broadcom AIX bus directly mapped into
the memory. This only works with the Broadcom SoCs from the
@@ -38,7 +38,7 @@ config BCMA_HOST_SOC
config BCMA_DRIVER_PCI
bool "BCMA Broadcom PCI core driver"
- depends on BCMA && PCI
+ depends on PCI
default y
help
BCMA bus may have many versions of PCIe core. This driver
@@ -54,13 +54,13 @@ config BCMA_DRIVER_PCI
config BCMA_DRIVER_PCI_HOSTMODE
bool "Driver for PCI core working in hostmode"
- depends on BCMA && MIPS && BCMA_DRIVER_PCI
+ depends on MIPS && BCMA_DRIVER_PCI
help
PCI core hostmode operation (external PCI bus).
config BCMA_DRIVER_MIPS
bool "BCMA Broadcom MIPS core driver"
- depends on BCMA && MIPS
+ depends on MIPS
help
Driver for the Broadcom MIPS core attached to Broadcom specific
Advanced Microcontroller Bus.
@@ -91,7 +91,6 @@ config BCMA_NFLASH
config BCMA_DRIVER_GMAC_CMN
bool "BCMA Broadcom GBIT MAC COMMON core driver"
- depends on BCMA
help
Driver for the Broadcom GBIT MAC COMMON core attached to Broadcom
specific Advanced Microcontroller Bus.
@@ -100,7 +99,7 @@ config BCMA_DRIVER_GMAC_CMN
config BCMA_DRIVER_GPIO
bool "BCMA GPIO driver"
- depends on BCMA && GPIOLIB
+ depends on GPIOLIB
select GPIOLIB_IRQCHIP if BCMA_HOST_SOC
help
Driver to provide access to the GPIO pins of the bcma bus.
@@ -109,8 +108,9 @@ config BCMA_DRIVER_GPIO
config BCMA_DEBUG
bool "BCMA debugging"
- depends on BCMA
help
This turns on additional debugging messages.
If unsure, say N
+
+endif # BCMA
diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index 89af807cf29c..5904ef1aa624 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -184,10 +184,11 @@ static void bcma_core_mips_print_irq(struct bcma_device *dev, unsigned int irq)
{
int i;
static const char *irq_name[] = {"2(S)", "3", "4", "5", "6", "D", "I"};
- printk(KERN_DEBUG KBUILD_MODNAME ": core 0x%04x, irq :", dev->id.id);
+
+ bcma_debug(dev->bus, "core 0x%04x, irq :", dev->id.id);
for (i = 0; i <= 6; i++)
- printk(" %s%s", irq_name[i], i == irq ? "*" : " ");
- printk("\n");
+ pr_cont(" %s%s", irq_name[i], i == irq ? "*" : " ");
+ pr_cont("\n");
}
static void bcma_core_mips_dump_irq(struct bcma_bus *bus)
diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig
index fae5a74dc737..6475f8c0d3b2 100644
--- a/drivers/bluetooth/Kconfig
+++ b/drivers/bluetooth/Kconfig
@@ -65,6 +65,7 @@ config BT_HCIBTSDIO
config BT_HCIUART
tristate "HCI UART driver"
+ depends on SERIAL_DEV_BUS || !SERIAL_DEV_BUS
depends on TTY
help
Bluetooth HCI UART driver.
@@ -79,7 +80,6 @@ config BT_HCIUART
config BT_HCIUART_SERDEV
bool
depends on SERIAL_DEV_BUS && BT_HCIUART
- depends on SERIAL_DEV_BUS=y || SERIAL_DEV_BUS=BT_HCIUART
default y
config BT_HCIUART_H4
@@ -169,6 +169,7 @@ config BT_HCIUART_BCM
bool "Broadcom protocol support"
depends on BT_HCIUART
depends on BT_HCIUART_SERDEV
+ depends on (!ACPI || SERIAL_DEV_CTRL_TTYPORT)
select BT_HCIUART_H4
select BT_BCM
help
diff --git a/drivers/bluetooth/bcm203x.c b/drivers/bluetooth/bcm203x.c
index 5ce6d4176dc3..8e9547f195ef 100644
--- a/drivers/bluetooth/bcm203x.c
+++ b/drivers/bluetooth/bcm203x.c
@@ -121,7 +121,7 @@ static void bcm203x_complete(struct urb *urb)
}
data->state = BCM203X_LOAD_FIRMWARE;
-
+ /* fall through */
case BCM203X_LOAD_FIRMWARE:
if (data->fw_sent == data->fw_size) {
usb_fill_int_urb(urb, udev, usb_rcvintpipe(udev, BCM203X_IN_EP),
diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index b07ca9565291..d513ef4743dc 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -156,9 +156,9 @@ static void bluecard_detach(struct pcmcia_device *p_dev);
/* ======================== LED handling routines ======================== */
-static void bluecard_activity_led_timeout(u_long arg)
+static void bluecard_activity_led_timeout(struct timer_list *t)
{
- struct bluecard_info *info = (struct bluecard_info *)arg;
+ struct bluecard_info *info = from_timer(info, t, timer);
unsigned int iobase = info->p_dev->resource[0]->start;
if (test_bit(CARD_ACTIVITY, &(info->hw_state))) {
@@ -691,8 +691,7 @@ static int bluecard_open(struct bluecard_info *info)
spin_lock_init(&(info->lock));
- setup_timer(&(info->timer), &bluecard_activity_led_timeout,
- (u_long)info);
+ timer_setup(&info->timer, bluecard_activity_led_timeout, 0);
skb_queue_head_init(&(info->txq));
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index cc4bdefa6648..ae1fa390f508 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -327,6 +327,8 @@ static const struct {
{ 0x4406, "BCM4324B3" }, /* 002.004.006 */
{ 0x610c, "BCM4354" }, /* 003.001.012 */
{ 0x2209, "BCM43430A1" }, /* 001.002.009 */
+ { 0x6119, "BCM4345C0" }, /* 003.001.025 */
+ { 0x230f, "BCM4356A2" }, /* 001.003.015 */
{ }
};
@@ -361,6 +363,7 @@ int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len)
switch ((rev & 0xf000) >> 12) {
case 0:
case 1:
+ case 2:
case 3:
for (i = 0; bcm_uart_subver_table[i].name; i++) {
if (subver == bcm_uart_subver_table[i].subver) {
diff --git a/drivers/bluetooth/btmrvl_sdio.c b/drivers/bluetooth/btmrvl_sdio.c
index 03341ce98c32..7dbb4463b539 100644
--- a/drivers/bluetooth/btmrvl_sdio.c
+++ b/drivers/bluetooth/btmrvl_sdio.c
@@ -64,7 +64,7 @@ static irqreturn_t btmrvl_wake_irq_bt(int irq, void *priv)
struct btmrvl_sdio_card *card = priv;
struct btmrvl_plt_wake_cfg *cfg = card->plt_wake_cfg;
- pr_info("%s: wake by bt", __func__);
+ pr_info("%s: wake by bt\n", __func__);
cfg->wake_by_bt = true;
disable_irq_nosync(irq);
@@ -87,7 +87,7 @@ static int btmrvl_sdio_probe_of(struct device *dev,
if (!dev->of_node ||
!of_match_node(btmrvl_sdio_of_match_table, dev->of_node)) {
- pr_err("sdio platform data not available");
+ pr_err("sdio platform data not available\n");
return -1;
}
@@ -99,7 +99,7 @@ static int btmrvl_sdio_probe_of(struct device *dev,
if (cfg && card->plt_of_node) {
cfg->irq_bt = irq_of_parse_and_map(card->plt_of_node, 0);
if (!cfg->irq_bt) {
- dev_err(dev, "fail to parse irq_bt from device tree");
+ dev_err(dev, "fail to parse irq_bt from device tree\n");
cfg->irq_bt = -1;
} else {
ret = devm_request_irq(dev, cfg->irq_bt,
diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c
index d00c4fdae924..663bed63b871 100644
--- a/drivers/bluetooth/btqcomsmd.c
+++ b/drivers/bluetooth/btqcomsmd.c
@@ -15,6 +15,8 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/rpmsg.h>
+#include <linux/of.h>
+
#include <linux/soc/qcom/wcnss_ctrl.h>
#include <linux/platform_device.h>
@@ -26,6 +28,7 @@
struct btqcomsmd {
struct hci_dev *hdev;
+ bdaddr_t bdaddr;
struct rpmsg_endpoint *acl_channel;
struct rpmsg_endpoint *cmd_channel;
};
@@ -100,6 +103,38 @@ static int btqcomsmd_close(struct hci_dev *hdev)
return 0;
}
+static int btqcomsmd_setup(struct hci_dev *hdev)
+{
+ struct btqcomsmd *btq = hci_get_drvdata(hdev);
+ struct sk_buff *skb;
+ int err;
+
+ skb = __hci_cmd_sync(hdev, HCI_OP_RESET, 0, NULL, HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+ kfree_skb(skb);
+
+ /* Devices do not have persistent storage for BD address. If no
+ * BD address has been retrieved during probe, mark the device
+ * as having an invalid BD address.
+ */
+ if (!bacmp(&btq->bdaddr, BDADDR_ANY)) {
+ set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
+ return 0;
+ }
+
+ /* When setting a configured BD address fails, mark the device
+ * as having an invalid BD address.
+ */
+ err = qca_set_bdaddr_rome(hdev, &btq->bdaddr);
+ if (err) {
+ set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
+ return 0;
+ }
+
+ return 0;
+}
+
static int btqcomsmd_probe(struct platform_device *pdev)
{
struct btqcomsmd *btq;
@@ -123,6 +158,15 @@ static int btqcomsmd_probe(struct platform_device *pdev)
if (IS_ERR(btq->cmd_channel))
return PTR_ERR(btq->cmd_channel);
+ /* The local-bd-address property is usually injected by the
+ * bootloader which has access to the allocated BD address.
+ */
+ if (!of_property_read_u8_array(pdev->dev.of_node, "local-bd-address",
+ (u8 *)&btq->bdaddr, sizeof(bdaddr_t))) {
+ dev_info(&pdev->dev, "BD address %pMR retrieved from device-tree",
+ &btq->bdaddr);
+ }
+
hdev = hci_alloc_dev();
if (!hdev)
return -ENOMEM;
@@ -135,6 +179,7 @@ static int btqcomsmd_probe(struct platform_device *pdev)
hdev->open = btqcomsmd_open;
hdev->close = btqcomsmd_close;
hdev->send = btqcomsmd_send;
+ hdev->setup = btqcomsmd_setup;
hdev->set_bdaddr = qca_set_bdaddr_rome;
ret = hci_register_dev(hdev);
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 7a5c06aaa181..c054d7bce490 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -66,7 +66,6 @@ static struct usb_driver btusb_driver;
#define BTUSB_BCM2045 0x40000
#define BTUSB_IFNUM_2 0x80000
#define BTUSB_CW6622 0x100000
-#define BTUSB_BCM_NO_PRODID 0x200000
static const struct usb_device_id btusb_table[] = {
/* Generic Bluetooth USB device */
@@ -171,10 +170,6 @@ static const struct usb_device_id btusb_table[] = {
{ USB_VENDOR_AND_INTERFACE_INFO(0x0930, 0xff, 0x01, 0x01),
.driver_info = BTUSB_BCM_PATCHRAM },
- /* Broadcom devices with missing product id */
- { USB_DEVICE_AND_INTERFACE_INFO(0x0000, 0x0000, 0xff, 0x01, 0x01),
- .driver_info = BTUSB_BCM_PATCHRAM | BTUSB_BCM_NO_PRODID },
-
/* Intel Bluetooth USB Bootloader (RAM module) */
{ USB_DEVICE(0x8087, 0x0a5a),
.driver_info = BTUSB_INTEL_BOOT | BTUSB_BROKEN_ISOC },
@@ -2909,19 +2904,6 @@ static int btusb_probe(struct usb_interface *intf,
if (id->driver_info == BTUSB_IGNORE)
return -ENODEV;
- if (id->driver_info & BTUSB_BCM_NO_PRODID) {
- struct usb_device *udev = interface_to_usbdev(intf);
-
- /* For the broken Broadcom devices that show 0000:0000
- * as USB vendor and product information, check that the
- * manufacturer string identifies them as Broadcom based
- * devices.
- */
- if (!udev->manufacturer ||
- strcmp(udev->manufacturer, "Broadcom Corp"))
- return -ENODEV;
- }
-
if (id->driver_info & BTUSB_ATH3012) {
struct usb_device *udev = interface_to_usbdev(intf);
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index e2540113d0da..707c2d1b84c7 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -52,11 +52,13 @@
#define BCM_AUTOSUSPEND_DELAY 5000 /* default autosleep delay */
-/* platform device driver resources */
+/* device driver resources */
struct bcm_device {
+ /* Must be the first member, hci_serdev.c expects this. */
+ struct hci_uart serdev_hu;
struct list_head list;
- struct platform_device *pdev;
+ struct device *dev;
const char *name;
struct gpio_desc *device_wakeup;
@@ -68,7 +70,7 @@ struct bcm_device {
u32 init_speed;
u32 oper_speed;
int irq;
- u8 irq_polarity;
+ bool irq_active_low;
#ifdef CONFIG_PM
struct hci_uart *hu;
@@ -76,11 +78,6 @@ struct bcm_device {
#endif
};
-/* serdev driver resources */
-struct bcm_serdev {
- struct hci_uart hu;
-};
-
/* generic bcm uart resources */
struct bcm_data {
struct sk_buff *rx_skb;
@@ -155,6 +152,12 @@ static bool bcm_device_exists(struct bcm_device *device)
{
struct list_head *p;
+#ifdef CONFIG_PM
+ /* Devices using serdev always exist */
+ if (device && device->hu && device->hu->serdev)
+ return true;
+#endif
+
list_for_each(p, &bcm_device_list) {
struct bcm_device *dev = list_entry(p, struct bcm_device, list);
@@ -188,9 +191,9 @@ static irqreturn_t bcm_host_wake(int irq, void *data)
bt_dev_dbg(bdev, "Host wake IRQ");
- pm_runtime_get(&bdev->pdev->dev);
- pm_runtime_mark_last_busy(&bdev->pdev->dev);
- pm_runtime_put_autosuspend(&bdev->pdev->dev);
+ pm_runtime_get(bdev->dev);
+ pm_runtime_mark_last_busy(bdev->dev);
+ pm_runtime_put_autosuspend(bdev->dev);
return IRQ_HANDLED;
}
@@ -200,7 +203,6 @@ static int bcm_request_irq(struct bcm_data *bcm)
struct bcm_device *bdev = bcm->dev;
int err;
- /* If this is not a platform device, do not enable PM functionalities */
mutex_lock(&bcm_device_lock);
if (!bcm_device_exists(bdev)) {
err = -ENODEV;
@@ -212,18 +214,20 @@ static int bcm_request_irq(struct bcm_data *bcm)
goto unlock;
}
- err = devm_request_irq(&bdev->pdev->dev, bdev->irq, bcm_host_wake,
- IRQF_TRIGGER_RISING, "host_wake", bdev);
+ err = devm_request_irq(bdev->dev, bdev->irq, bcm_host_wake,
+ bdev->irq_active_low ? IRQF_TRIGGER_FALLING :
+ IRQF_TRIGGER_RISING,
+ "host_wake", bdev);
if (err)
goto unlock;
- device_init_wakeup(&bdev->pdev->dev, true);
+ device_init_wakeup(bdev->dev, true);
- pm_runtime_set_autosuspend_delay(&bdev->pdev->dev,
+ pm_runtime_set_autosuspend_delay(bdev->dev,
BCM_AUTOSUSPEND_DELAY);
- pm_runtime_use_autosuspend(&bdev->pdev->dev);
- pm_runtime_set_active(&bdev->pdev->dev);
- pm_runtime_enable(&bdev->pdev->dev);
+ pm_runtime_use_autosuspend(bdev->dev);
+ pm_runtime_set_active(bdev->dev);
+ pm_runtime_enable(bdev->dev);
unlock:
mutex_unlock(&bcm_device_lock);
@@ -253,7 +257,7 @@ static int bcm_setup_sleep(struct hci_uart *hu)
struct sk_buff *skb;
struct bcm_set_sleep_mode sleep_params = default_sleep_params;
- sleep_params.host_wake_active = !bcm->dev->irq_polarity;
+ sleep_params.host_wake_active = !bcm->dev->irq_active_low;
skb = __hci_cmd_sync(hu->hdev, 0xfc27, sizeof(sleep_params),
&sleep_params, HCI_INIT_TIMEOUT);
@@ -311,18 +315,17 @@ static int bcm_open(struct hci_uart *hu)
hu->priv = bcm;
- /* If this is a serdev defined device, then only use
- * serdev open primitive and skip the rest.
- */
+ mutex_lock(&bcm_device_lock);
+
if (hu->serdev) {
serdev_device_open(hu->serdev);
+ bcm->dev = serdev_device_get_drvdata(hu->serdev);
goto out;
}
if (!hu->tty->dev)
goto out;
- mutex_lock(&bcm_device_lock);
list_for_each(p, &bcm_device_list) {
struct bcm_device *dev = list_entry(p, struct bcm_device, list);
@@ -330,50 +333,56 @@ static int bcm_open(struct hci_uart *hu)
* platform device (saved during device probe) and
* parent of tty device used by hci_uart
*/
- if (hu->tty->dev->parent == dev->pdev->dev.parent) {
+ if (hu->tty->dev->parent == dev->dev->parent) {
bcm->dev = dev;
- hu->init_speed = dev->init_speed;
- hu->oper_speed = dev->oper_speed;
#ifdef CONFIG_PM
dev->hu = hu;
#endif
- bcm_gpio_set_power(bcm->dev, true);
break;
}
}
- mutex_unlock(&bcm_device_lock);
out:
+ if (bcm->dev) {
+ hu->init_speed = bcm->dev->init_speed;
+ hu->oper_speed = bcm->dev->oper_speed;
+ bcm_gpio_set_power(bcm->dev, true);
+ }
+
+ mutex_unlock(&bcm_device_lock);
return 0;
}
static int bcm_close(struct hci_uart *hu)
{
struct bcm_data *bcm = hu->priv;
- struct bcm_device *bdev = bcm->dev;
+ struct bcm_device *bdev = NULL;
bt_dev_dbg(hu->hdev, "hu %p", hu);
- /* If this is a serdev defined device, only use serdev
- * close primitive and then continue as usual.
- */
- if (hu->serdev)
- serdev_device_close(hu->serdev);
-
/* Protect bcm->dev against removal of the device or driver */
mutex_lock(&bcm_device_lock);
- if (bcm_device_exists(bdev)) {
+
+ if (hu->serdev) {
+ serdev_device_close(hu->serdev);
+ bdev = serdev_device_get_drvdata(hu->serdev);
+ } else if (bcm_device_exists(bcm->dev)) {
+ bdev = bcm->dev;
+#ifdef CONFIG_PM
+ bdev->hu = NULL;
+#endif
+ }
+
+ if (bdev) {
bcm_gpio_set_power(bdev, false);
#ifdef CONFIG_PM
- pm_runtime_disable(&bdev->pdev->dev);
- pm_runtime_set_suspended(&bdev->pdev->dev);
+ pm_runtime_disable(bdev->dev);
+ pm_runtime_set_suspended(bdev->dev);
- if (device_can_wakeup(&bdev->pdev->dev)) {
- devm_free_irq(&bdev->pdev->dev, bdev->irq, bdev);
- device_init_wakeup(&bdev->pdev->dev, false);
+ if (device_can_wakeup(bdev->dev)) {
+ devm_free_irq(bdev->dev, bdev->irq, bdev);
+ device_init_wakeup(bdev->dev, false);
}
-
- bdev->hu = NULL;
#endif
}
mutex_unlock(&bcm_device_lock);
@@ -504,9 +513,9 @@ static int bcm_recv(struct hci_uart *hu, const void *data, int count)
/* Delay auto-suspend when receiving completed packet */
mutex_lock(&bcm_device_lock);
if (bcm->dev && bcm_device_exists(bcm->dev)) {
- pm_runtime_get(&bcm->dev->pdev->dev);
- pm_runtime_mark_last_busy(&bcm->dev->pdev->dev);
- pm_runtime_put_autosuspend(&bcm->dev->pdev->dev);
+ pm_runtime_get(bcm->dev->dev);
+ pm_runtime_mark_last_busy(bcm->dev->dev);
+ pm_runtime_put_autosuspend(bcm->dev->dev);
}
mutex_unlock(&bcm_device_lock);
}
@@ -537,15 +546,15 @@ static struct sk_buff *bcm_dequeue(struct hci_uart *hu)
if (bcm_device_exists(bcm->dev)) {
bdev = bcm->dev;
- pm_runtime_get_sync(&bdev->pdev->dev);
+ pm_runtime_get_sync(bdev->dev);
/* Shall be resumed here */
}
skb = skb_dequeue(&bcm->txq);
if (bdev) {
- pm_runtime_mark_last_busy(&bdev->pdev->dev);
- pm_runtime_put_autosuspend(&bdev->pdev->dev);
+ pm_runtime_mark_last_busy(bdev->dev);
+ pm_runtime_put_autosuspend(bdev->dev);
}
mutex_unlock(&bcm_device_lock);
@@ -556,7 +565,7 @@ static struct sk_buff *bcm_dequeue(struct hci_uart *hu)
#ifdef CONFIG_PM
static int bcm_suspend_device(struct device *dev)
{
- struct bcm_device *bdev = platform_get_drvdata(to_platform_device(dev));
+ struct bcm_device *bdev = dev_get_drvdata(dev);
bt_dev_dbg(bdev, "");
@@ -579,7 +588,7 @@ static int bcm_suspend_device(struct device *dev)
static int bcm_resume_device(struct device *dev)
{
- struct bcm_device *bdev = platform_get_drvdata(to_platform_device(dev));
+ struct bcm_device *bdev = dev_get_drvdata(dev);
bt_dev_dbg(bdev, "");
@@ -601,16 +610,18 @@ static int bcm_resume_device(struct device *dev)
#endif
#ifdef CONFIG_PM_SLEEP
-/* Platform suspend callback */
+/* suspend callback */
static int bcm_suspend(struct device *dev)
{
- struct bcm_device *bdev = platform_get_drvdata(to_platform_device(dev));
+ struct bcm_device *bdev = dev_get_drvdata(dev);
int error;
bt_dev_dbg(bdev, "suspend: is_suspended %d", bdev->is_suspended);
- /* bcm_suspend can be called at any time as long as platform device is
- * bound, so it should use bcm_device_lock to protect access to hci_uart
+ /*
+ * When used with a device instantiated as platform_device, bcm_suspend
+ * can be called at any time as long as the platform device is bound,
+ * so it should use bcm_device_lock to protect access to hci_uart
* and device_wake-up GPIO.
*/
mutex_lock(&bcm_device_lock);
@@ -621,7 +632,7 @@ static int bcm_suspend(struct device *dev)
if (pm_runtime_active(dev))
bcm_suspend_device(dev);
- if (device_may_wakeup(&bdev->pdev->dev)) {
+ if (device_may_wakeup(dev)) {
error = enable_irq_wake(bdev->irq);
if (!error)
bt_dev_dbg(bdev, "BCM irq: enabled");
@@ -633,15 +644,17 @@ unlock:
return 0;
}
-/* Platform resume callback */
+/* resume callback */
static int bcm_resume(struct device *dev)
{
- struct bcm_device *bdev = platform_get_drvdata(to_platform_device(dev));
+ struct bcm_device *bdev = dev_get_drvdata(dev);
bt_dev_dbg(bdev, "resume: is_suspended %d", bdev->is_suspended);
- /* bcm_resume can be called at any time as long as platform device is
- * bound, so it should use bcm_device_lock to protect access to hci_uart
+ /*
+ * When used with a device instantiated as platform_device, bcm_resume
+ * can be called at any time as long as platform device is bound,
+ * so it should use bcm_device_lock to protect access to hci_uart
* and device_wake-up GPIO.
*/
mutex_lock(&bcm_device_lock);
@@ -649,7 +662,7 @@ static int bcm_resume(struct device *dev)
if (!bdev->hu)
goto unlock;
- if (device_may_wakeup(&bdev->pdev->dev)) {
+ if (device_may_wakeup(dev)) {
disable_irq_wake(bdev->irq);
bt_dev_dbg(bdev, "BCM irq: disabled");
}
@@ -690,10 +703,8 @@ static const struct acpi_gpio_mapping acpi_bcm_int_first_gpios[] = {
};
#ifdef CONFIG_ACPI
-static u8 acpi_active_low = ACPI_ACTIVE_LOW;
-
/* IRQ polarity of some chipsets are not defined correctly in ACPI table. */
-static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
+static const struct dmi_system_id bcm_active_low_irq_dmi_table[] = {
{
.ident = "Asus T100TA",
.matches = {
@@ -701,7 +712,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
"ASUSTeK COMPUTER INC."),
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"),
},
- .driver_data = &acpi_active_low,
},
{
.ident = "Asus T100CHI",
@@ -710,7 +720,6 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
"ASUSTeK COMPUTER INC."),
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100CHI"),
},
- .driver_data = &acpi_active_low,
},
{ /* Handle ThinkPad 8 tablets with BCM2E55 chipset ACPI ID */
.ident = "Lenovo ThinkPad 8",
@@ -718,7 +727,13 @@ static const struct dmi_system_id bcm_wrong_irq_dmi_table[] = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "ThinkPad 8"),
},
- .driver_data = &acpi_active_low,
+ },
+ {
+ .ident = "MINIX Z83-4",
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "MINIX"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Z83-4"),
+ },
},
{ }
};
@@ -733,13 +748,13 @@ static int bcm_resource(struct acpi_resource *ares, void *data)
switch (ares->type) {
case ACPI_RESOURCE_TYPE_EXTENDED_IRQ:
irq = &ares->data.extended_irq;
- dev->irq_polarity = irq->polarity;
+ dev->irq_active_low = irq->polarity == ACPI_ACTIVE_LOW;
break;
case ACPI_RESOURCE_TYPE_GPIO:
gpio = &ares->data.gpio;
if (gpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT)
- dev->irq_polarity = gpio->polarity;
+ dev->irq_active_low = gpio->polarity == ACPI_ACTIVE_LOW;
break;
case ACPI_RESOURCE_TYPE_SERIAL_BUS:
@@ -754,36 +769,32 @@ static int bcm_resource(struct acpi_resource *ares, void *data)
break;
}
- /* Always tell the ACPI core to skip this resource */
- return 1;
+ return 0;
}
#endif /* CONFIG_ACPI */
-static int bcm_platform_probe(struct bcm_device *dev)
+static int bcm_get_resources(struct bcm_device *dev)
{
- struct platform_device *pdev = dev->pdev;
-
- dev->name = dev_name(&pdev->dev);
+ dev->name = dev_name(dev->dev);
- dev->clk = devm_clk_get(&pdev->dev, NULL);
+ dev->clk = devm_clk_get(dev->dev, NULL);
- dev->device_wakeup = devm_gpiod_get_optional(&pdev->dev,
+ dev->device_wakeup = devm_gpiod_get_optional(dev->dev,
"device-wakeup",
GPIOD_OUT_LOW);
if (IS_ERR(dev->device_wakeup))
return PTR_ERR(dev->device_wakeup);
- dev->shutdown = devm_gpiod_get_optional(&pdev->dev, "shutdown",
+ dev->shutdown = devm_gpiod_get_optional(dev->dev, "shutdown",
GPIOD_OUT_LOW);
if (IS_ERR(dev->shutdown))
return PTR_ERR(dev->shutdown);
/* IRQ can be declared in ACPI table as Interrupt or GpioInt */
- dev->irq = platform_get_irq(pdev, 0);
if (dev->irq <= 0) {
struct gpio_desc *gpio;
- gpio = devm_gpiod_get_optional(&pdev->dev, "host-wakeup",
+ gpio = devm_gpiod_get_optional(dev->dev, "host-wakeup",
GPIOD_IN);
if (IS_ERR(gpio))
return PTR_ERR(gpio);
@@ -791,54 +802,48 @@ static int bcm_platform_probe(struct bcm_device *dev)
dev->irq = gpiod_to_irq(gpio);
}
- dev_info(&pdev->dev, "BCM irq: %d\n", dev->irq);
-
- /* Make sure at-least one of the GPIO is defined and that
- * a name is specified for this instance
- */
- if ((!dev->device_wakeup && !dev->shutdown) || !dev->name) {
- dev_err(&pdev->dev, "invalid platform data\n");
- return -EINVAL;
- }
-
+ dev_info(dev->dev, "BCM irq: %d\n", dev->irq);
return 0;
}
#ifdef CONFIG_ACPI
static int bcm_acpi_probe(struct bcm_device *dev)
{
- struct platform_device *pdev = dev->pdev;
LIST_HEAD(resources);
const struct dmi_system_id *dmi_id;
const struct acpi_gpio_mapping *gpio_mapping = acpi_bcm_int_last_gpios;
const struct acpi_device_id *id;
+ struct resource_entry *entry;
int ret;
/* Retrieve GPIO data */
- id = acpi_match_device(pdev->dev.driver->acpi_match_table, &pdev->dev);
+ id = acpi_match_device(dev->dev->driver->acpi_match_table, dev->dev);
if (id)
gpio_mapping = (const struct acpi_gpio_mapping *) id->driver_data;
- ret = devm_acpi_dev_add_driver_gpios(&pdev->dev, gpio_mapping);
- if (ret)
- return ret;
-
- ret = bcm_platform_probe(dev);
+ ret = devm_acpi_dev_add_driver_gpios(dev->dev, gpio_mapping);
if (ret)
return ret;
/* Retrieve UART ACPI info */
- ret = acpi_dev_get_resources(ACPI_COMPANION(&dev->pdev->dev),
+ ret = acpi_dev_get_resources(ACPI_COMPANION(dev->dev),
&resources, bcm_resource, dev);
if (ret < 0)
return ret;
+
+ resource_list_for_each_entry(entry, &resources) {
+ if (resource_type(entry->res) == IORESOURCE_IRQ) {
+ dev->irq = entry->res->start;
+ break;
+ }
+ }
acpi_dev_free_resource_list(&resources);
- dmi_id = dmi_first_match(bcm_wrong_irq_dmi_table);
+ dmi_id = dmi_first_match(bcm_active_low_irq_dmi_table);
if (dmi_id) {
- bt_dev_warn(dev, "%s: Overwriting IRQ polarity to active low",
+ dev_warn(dev->dev, "%s: Overwriting IRQ polarity to active low",
dmi_id->ident);
- dev->irq_polarity = *(u8 *)dmi_id->driver_data;
+ dev->irq_active_low = true;
}
return 0;
@@ -850,6 +855,12 @@ static int bcm_acpi_probe(struct bcm_device *dev)
}
#endif /* CONFIG_ACPI */
+static int bcm_of_probe(struct bcm_device *bdev)
+{
+ device_property_read_u32(bdev->dev, "max-speed", &bdev->oper_speed);
+ return 0;
+}
+
static int bcm_probe(struct platform_device *pdev)
{
struct bcm_device *dev;
@@ -859,12 +870,16 @@ static int bcm_probe(struct platform_device *pdev)
if (!dev)
return -ENOMEM;
- dev->pdev = pdev;
+ dev->dev = &pdev->dev;
+ dev->irq = platform_get_irq(pdev, 0);
- if (has_acpi_companion(&pdev->dev))
+ if (has_acpi_companion(&pdev->dev)) {
ret = bcm_acpi_probe(dev);
- else
- ret = bcm_platform_probe(dev);
+ if (ret)
+ return ret;
+ }
+
+ ret = bcm_get_resources(dev);
if (ret)
return ret;
@@ -926,14 +941,16 @@ static const struct acpi_device_id bcm_acpi_match[] = {
{ "BCM2E71", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
{ "BCM2E7B", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
{ "BCM2E7C", (kernel_ulong_t)&acpi_bcm_int_last_gpios },
+ { "BCM2E7E", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
{ "BCM2E95", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
{ "BCM2E96", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
+ { "BCM2EA4", (kernel_ulong_t)&acpi_bcm_int_first_gpios },
{ },
};
MODULE_DEVICE_TABLE(acpi, bcm_acpi_match);
#endif
-/* Platform suspend and resume callbacks */
+/* suspend and resume callbacks */
static const struct dev_pm_ops bcm_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(bcm_suspend, bcm_resume)
SET_RUNTIME_PM_OPS(bcm_suspend_device, bcm_resume_device, NULL)
@@ -951,29 +968,41 @@ static struct platform_driver bcm_driver = {
static int bcm_serdev_probe(struct serdev_device *serdev)
{
- struct bcm_serdev *bcmdev;
- u32 speed;
+ struct bcm_device *bcmdev;
int err;
bcmdev = devm_kzalloc(&serdev->dev, sizeof(*bcmdev), GFP_KERNEL);
if (!bcmdev)
return -ENOMEM;
- bcmdev->hu.serdev = serdev;
+ bcmdev->dev = &serdev->dev;
+#ifdef CONFIG_PM
+ bcmdev->hu = &bcmdev->serdev_hu;
+#endif
+ bcmdev->serdev_hu.serdev = serdev;
serdev_device_set_drvdata(serdev, bcmdev);
- err = device_property_read_u32(&serdev->dev, "max-speed", &speed);
- if (!err)
- bcmdev->hu.oper_speed = speed;
+ if (has_acpi_companion(&serdev->dev))
+ err = bcm_acpi_probe(bcmdev);
+ else
+ err = bcm_of_probe(bcmdev);
+ if (err)
+ return err;
+
+ err = bcm_get_resources(bcmdev);
+ if (err)
+ return err;
+
+ bcm_gpio_set_power(bcmdev, false);
- return hci_uart_register_device(&bcmdev->hu, &bcm_proto);
+ return hci_uart_register_device(&bcmdev->serdev_hu, &bcm_proto);
}
static void bcm_serdev_remove(struct serdev_device *serdev)
{
- struct bcm_serdev *bcmdev = serdev_device_get_drvdata(serdev);
+ struct bcm_device *bcmdev = serdev_device_get_drvdata(serdev);
- hci_uart_unregister_device(&bcmdev->hu);
+ hci_uart_unregister_device(&bcmdev->serdev_hu);
}
#ifdef CONFIG_OF
@@ -990,6 +1019,8 @@ static struct serdev_device_driver bcm_serdev_driver = {
.driver = {
.name = "hci_uart_bcm",
.of_match_table = of_match_ptr(bcm_bluetooth_of_match),
+ .acpi_match_table = ACPI_PTR(bcm_acpi_match),
+ .pm = &bcm_pm_ops,
},
};
diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c
index d880f4e33c75..1a7f0c82fb36 100644
--- a/drivers/bluetooth/hci_bcsp.c
+++ b/drivers/bluetooth/hci_bcsp.c
@@ -65,6 +65,7 @@ struct bcsp_struct {
u8 rxseq_txack; /* rxseq == txack. */
u8 rxack; /* Last packet sent by us that the peer ack'ed */
struct timer_list tbcsp;
+ struct hci_uart *hu;
enum {
BCSP_W4_PKT_DELIMITER,
@@ -697,10 +698,10 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count)
}
/* Arrange to retransmit all messages in the relq. */
-static void bcsp_timed_event(unsigned long arg)
+static void bcsp_timed_event(struct timer_list *t)
{
- struct hci_uart *hu = (struct hci_uart *)arg;
- struct bcsp_struct *bcsp = hu->priv;
+ struct bcsp_struct *bcsp = from_timer(bcsp, t, tbcsp);
+ struct hci_uart *hu = bcsp->hu;
struct sk_buff *skb;
unsigned long flags;
@@ -729,11 +730,12 @@ static int bcsp_open(struct hci_uart *hu)
return -ENOMEM;
hu->priv = bcsp;
+ bcsp->hu = hu;
skb_queue_head_init(&bcsp->unack);
skb_queue_head_init(&bcsp->rel);
skb_queue_head_init(&bcsp->unrel);
- setup_timer(&bcsp->tbcsp, bcsp_timed_event, (u_long)hu);
+ timer_setup(&bcsp->tbcsp, bcsp_timed_event, 0);
bcsp->rx_state = BCSP_W4_PKT_DELIMITER;
diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c
index c0e4e26dc30d..6a8d0d06aba7 100644
--- a/drivers/bluetooth/hci_h5.c
+++ b/drivers/bluetooth/hci_h5.c
@@ -78,6 +78,7 @@ struct h5 {
int (*rx_func)(struct hci_uart *hu, u8 c);
struct timer_list timer; /* Retransmission timer */
+ struct hci_uart *hu; /* Parent HCI UART */
u8 tx_seq; /* Next seq number to send */
u8 tx_ack; /* Next ack number to send */
@@ -120,12 +121,12 @@ static u8 h5_cfg_field(struct h5 *h5)
return h5->tx_win & 0x07;
}
-static void h5_timed_event(unsigned long arg)
+static void h5_timed_event(struct timer_list *t)
{
const unsigned char sync_req[] = { 0x01, 0x7e };
unsigned char conf_req[3] = { 0x03, 0xfc };
- struct hci_uart *hu = (struct hci_uart *)arg;
- struct h5 *h5 = hu->priv;
+ struct h5 *h5 = from_timer(h5, t, timer);
+ struct hci_uart *hu = h5->hu;
struct sk_buff *skb;
unsigned long flags;
@@ -197,6 +198,7 @@ static int h5_open(struct hci_uart *hu)
return -ENOMEM;
hu->priv = h5;
+ h5->hu = hu;
skb_queue_head_init(&h5->unack);
skb_queue_head_init(&h5->rel);
@@ -204,7 +206,7 @@ static int h5_open(struct hci_uart *hu)
h5_reset_rx(h5);
- setup_timer(&h5->timer, h5_timed_event, (unsigned long)hu);
+ timer_setup(&h5->timer, h5_timed_event, 0);
h5->tx_win = H5_TX_WIN_MAX;
diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c
index a746627e784e..eec95019f15c 100644
--- a/drivers/bluetooth/hci_ldisc.c
+++ b/drivers/bluetooth/hci_ldisc.c
@@ -41,6 +41,7 @@
#include <linux/ioctl.h>
#include <linux/skbuff.h>
#include <linux/firmware.h>
+#include <linux/serdev.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -298,6 +299,12 @@ void hci_uart_set_flow_control(struct hci_uart *hu, bool enable)
unsigned int set = 0;
unsigned int clear = 0;
+ if (hu->serdev) {
+ serdev_device_set_flow_control(hu->serdev, !enable);
+ serdev_device_set_rts(hu->serdev, !enable);
+ return;
+ }
+
if (enable) {
/* Disable hardware flow control */
ktermios = tty->termios;
diff --git a/drivers/bluetooth/hci_ll.c b/drivers/bluetooth/hci_ll.c
index 424c15aa7bb7..e2c078d61730 100644
--- a/drivers/bluetooth/hci_ll.c
+++ b/drivers/bluetooth/hci_ll.c
@@ -242,7 +242,7 @@ static void ll_device_want_to_wakeup(struct hci_uart *hu)
* perfectly safe to always send one.
*/
BT_DBG("dual wake-up-indication");
- /* deliberate fall-through - do not add break */
+ /* fall through */
case HCILL_ASLEEP:
/* acknowledge device wake up */
if (send_hcill_cmd(HCILL_WAKE_UP_ACK, hu) < 0) {
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 392f412b4575..4a949bb60394 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -307,10 +307,10 @@ static void qca_wq_serial_tx_clock_vote_off(struct work_struct *work)
serial_clock_vote(HCI_IBS_TX_VOTE_CLOCK_OFF, hu);
}
-static void hci_ibs_tx_idle_timeout(unsigned long arg)
+static void hci_ibs_tx_idle_timeout(struct timer_list *t)
{
- struct hci_uart *hu = (struct hci_uart *)arg;
- struct qca_data *qca = hu->priv;
+ struct qca_data *qca = from_timer(qca, t, tx_idle_timer);
+ struct hci_uart *hu = qca->hu;
unsigned long flags;
BT_DBG("hu %p idle timeout in %d state", hu, qca->tx_ibs_state);
@@ -342,10 +342,10 @@ static void hci_ibs_tx_idle_timeout(unsigned long arg)
spin_unlock_irqrestore(&qca->hci_ibs_lock, flags);
}
-static void hci_ibs_wake_retrans_timeout(unsigned long arg)
+static void hci_ibs_wake_retrans_timeout(struct timer_list *t)
{
- struct hci_uart *hu = (struct hci_uart *)arg;
- struct qca_data *qca = hu->priv;
+ struct qca_data *qca = from_timer(qca, t, wake_retrans_timer);
+ struct hci_uart *hu = qca->hu;
unsigned long flags, retrans_delay;
bool retransmit = false;
@@ -438,11 +438,10 @@ static int qca_open(struct hci_uart *hu)
hu->priv = qca;
- setup_timer(&qca->wake_retrans_timer, hci_ibs_wake_retrans_timeout,
- (u_long)hu);
+ timer_setup(&qca->wake_retrans_timer, hci_ibs_wake_retrans_timeout, 0);
qca->wake_retrans = IBS_WAKE_RETRANS_TIMEOUT_MS;
- setup_timer(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, (u_long)hu);
+ timer_setup(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, 0);
qca->tx_idle_delay = IBS_TX_IDLE_TIMEOUT_MS;
BT_DBG("HCI_UART_QCA open, tx_idle_delay=%u, wake_retrans=%u",
diff --git a/drivers/connector/cn_queue.c b/drivers/connector/cn_queue.c
index 1f8bf054d11c..9c54fdf7acea 100644
--- a/drivers/connector/cn_queue.c
+++ b/drivers/connector/cn_queue.c
@@ -45,7 +45,7 @@ cn_queue_alloc_callback_entry(struct cn_queue_dev *dev, const char *name,
return NULL;
}
- atomic_set(&cbq->refcnt, 1);
+ refcount_set(&cbq->refcnt, 1);
atomic_inc(&dev->refcnt);
cbq->pdev = dev;
@@ -58,7 +58,7 @@ cn_queue_alloc_callback_entry(struct cn_queue_dev *dev, const char *name,
void cn_queue_release_callback(struct cn_callback_entry *cbq)
{
- if (!atomic_dec_and_test(&cbq->refcnt))
+ if (!refcount_dec_and_test(&cbq->refcnt))
return;
atomic_dec(&cbq->pdev->refcnt);
diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index 25693b045371..8615594bd065 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -157,7 +157,7 @@ static int cn_call_callback(struct sk_buff *skb)
spin_lock_bh(&dev->cbdev->queue_lock);
list_for_each_entry(i, &dev->cbdev->queue_list, callback_entry) {
if (cn_cb_equal(&i->id.id, &msg->id)) {
- atomic_inc(&i->refcnt);
+ refcount_inc(&i->refcnt);
cbq = i;
break;
}
diff --git a/drivers/infiniband/hw/qedr/Kconfig b/drivers/infiniband/hw/qedr/Kconfig
index 6c9f3923e838..60e867d80b88 100644
--- a/drivers/infiniband/hw/qedr/Kconfig
+++ b/drivers/infiniband/hw/qedr/Kconfig
@@ -2,6 +2,7 @@ config INFINIBAND_QEDR
tristate "QLogic RoCE driver"
depends on 64BIT && QEDE
select QED_LL2
+ select QED_OOO
select QED_RDMA
---help---
This driver provides low-level InfiniBand over Ethernet
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 6cd61638b441..fe690f82af29 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -893,13 +893,17 @@ dev_stop:
void ipoib_pkey_dev_check_presence(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
if (!(priv->pkey & 0x7fff) ||
ib_find_pkey(priv->ca, priv->port, priv->pkey,
- &priv->pkey_index))
+ &priv->pkey_index)) {
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
- else
+ } else {
+ if (rn->set_id)
+ rn->set_id(dev, priv->pkey_index);
set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
+ }
}
void ipoib_ib_dev_up(struct net_device *dev)
@@ -1203,10 +1207,15 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
ipoib_ib_dev_down(dev);
if (level == IPOIB_FLUSH_HEAVY) {
+ rtnl_lock();
if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
ipoib_ib_dev_stop(dev);
- if (ipoib_ib_dev_open(dev) != 0)
+
+ result = ipoib_ib_dev_open(dev);
+ rtnl_unlock();
+ if (result)
return;
+
if (netif_queue_stopped(dev))
netif_start_queue(dev);
}
diff --git a/drivers/isdn/gigaset/bas-gigaset.c b/drivers/isdn/gigaset/bas-gigaset.c
index 2da3ff650e1d..c990c6bbffc2 100644
--- a/drivers/isdn/gigaset/bas-gigaset.c
+++ b/drivers/isdn/gigaset/bas-gigaset.c
@@ -433,10 +433,11 @@ static void check_pending(struct bas_cardstate *ucs)
* argument:
* controller state structure
*/
-static void cmd_in_timeout(unsigned long data)
+static void cmd_in_timeout(struct timer_list *t)
{
- struct cardstate *cs = (struct cardstate *) data;
- struct bas_cardstate *ucs = cs->hw.bas;
+ struct bas_cardstate *ucs = from_timer(ucs, t, timer_cmd_in);
+ struct urb *urb = ucs->urb_int_in;
+ struct cardstate *cs = urb->context;
int rc;
if (!ucs->rcvbuf_size) {
@@ -639,10 +640,11 @@ static void int_in_work(struct work_struct *work)
* argument:
* controller state structure
*/
-static void int_in_resubmit(unsigned long data)
+static void int_in_resubmit(struct timer_list *t)
{
- struct cardstate *cs = (struct cardstate *) data;
- struct bas_cardstate *ucs = cs->hw.bas;
+ struct bas_cardstate *ucs = from_timer(ucs, t, timer_int_in);
+ struct urb *urb = ucs->urb_int_in;
+ struct cardstate *cs = urb->context;
int rc;
if (ucs->retry_int_in++ >= BAS_RETRY) {
@@ -1441,10 +1443,11 @@ error:
* argument:
* controller state structure
*/
-static void req_timeout(unsigned long data)
+static void req_timeout(struct timer_list *t)
{
- struct cardstate *cs = (struct cardstate *) data;
- struct bas_cardstate *ucs = cs->hw.bas;
+ struct bas_cardstate *ucs = from_timer(ucs, t, timer_ctrl);
+ struct urb *urb = ucs->urb_int_in;
+ struct cardstate *cs = urb->context;
int pending;
unsigned long flags;
@@ -1837,10 +1840,11 @@ static void write_command_callback(struct urb *urb)
* argument:
* controller state structure
*/
-static void atrdy_timeout(unsigned long data)
+static void atrdy_timeout(struct timer_list *t)
{
- struct cardstate *cs = (struct cardstate *) data;
- struct bas_cardstate *ucs = cs->hw.bas;
+ struct bas_cardstate *ucs = from_timer(ucs, t, timer_atrdy);
+ struct urb *urb = ucs->urb_int_in;
+ struct cardstate *cs = urb->context;
dev_warn(cs->dev, "timeout waiting for HD_READY_SEND_ATDATA\n");
@@ -2200,7 +2204,7 @@ static int gigaset_initcshw(struct cardstate *cs)
{
struct bas_cardstate *ucs;
- cs->hw.bas = ucs = kmalloc(sizeof *ucs, GFP_KERNEL);
+ cs->hw.bas = ucs = kzalloc(sizeof(*ucs), GFP_KERNEL);
if (!ucs) {
pr_err("out of memory\n");
return -ENOMEM;
@@ -2212,19 +2216,11 @@ static int gigaset_initcshw(struct cardstate *cs)
return -ENOMEM;
}
- ucs->urb_cmd_in = NULL;
- ucs->urb_cmd_out = NULL;
- ucs->rcvbuf = NULL;
- ucs->rcvbuf_size = 0;
-
spin_lock_init(&ucs->lock);
- ucs->pending = 0;
-
- ucs->basstate = 0;
- setup_timer(&ucs->timer_ctrl, req_timeout, (unsigned long) cs);
- setup_timer(&ucs->timer_atrdy, atrdy_timeout, (unsigned long) cs);
- setup_timer(&ucs->timer_cmd_in, cmd_in_timeout, (unsigned long) cs);
- setup_timer(&ucs->timer_int_in, int_in_resubmit, (unsigned long) cs);
+ timer_setup(&ucs->timer_ctrl, req_timeout, 0);
+ timer_setup(&ucs->timer_atrdy, atrdy_timeout, 0);
+ timer_setup(&ucs->timer_cmd_in, cmd_in_timeout, 0);
+ timer_setup(&ucs->timer_int_in, int_in_resubmit, 0);
init_waitqueue_head(&ucs->waitqueue);
INIT_WORK(&ucs->int_in_wq, int_in_work);
diff --git a/drivers/isdn/gigaset/common.c b/drivers/isdn/gigaset/common.c
index 7c7814497e3e..15482c5de33c 100644
--- a/drivers/isdn/gigaset/common.c
+++ b/drivers/isdn/gigaset/common.c
@@ -153,9 +153,9 @@ static int test_timeout(struct at_state_t *at_state)
return 1;
}
-static void timer_tick(unsigned long data)
+static void timer_tick(struct timer_list *t)
{
- struct cardstate *cs = (struct cardstate *) data;
+ struct cardstate *cs = from_timer(cs, t, timer);
unsigned long flags;
unsigned channel;
struct at_state_t *at_state;
@@ -687,7 +687,7 @@ struct cardstate *gigaset_initcs(struct gigaset_driver *drv, int channels,
cs->ignoreframes = ignoreframes;
INIT_LIST_HEAD(&cs->temp_at_states);
cs->running = 0;
- init_timer(&cs->timer); /* clear next & prev */
+ timer_setup(&cs->timer, timer_tick, 0);
spin_lock_init(&cs->ev_lock);
cs->ev_tail = 0;
cs->ev_head = 0;
@@ -768,7 +768,6 @@ struct cardstate *gigaset_initcs(struct gigaset_driver *drv, int channels,
spin_lock_irqsave(&cs->lock, flags);
cs->running = 1;
spin_unlock_irqrestore(&cs->lock, flags);
- setup_timer(&cs->timer, timer_tick, (unsigned long) cs);
cs->timer.expires = jiffies + msecs_to_jiffies(GIG_TICK);
add_timer(&cs->timer);
diff --git a/drivers/isdn/hardware/mISDN/mISDNipac.c b/drivers/isdn/hardware/mISDN/mISDNipac.c
index e240010b93fa..4d78f870435e 100644
--- a/drivers/isdn/hardware/mISDN/mISDNipac.c
+++ b/drivers/isdn/hardware/mISDN/mISDNipac.c
@@ -172,7 +172,6 @@ isac_fill_fifo(struct isac_hw *isac)
pr_debug("%s: %s dbusytimer running\n", isac->name, __func__);
del_timer(&isac->dch.timer);
}
- init_timer(&isac->dch.timer);
isac->dch.timer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ)/1000);
add_timer(&isac->dch.timer);
if (isac->dch.debug & DEBUG_HW_DFIFO) {
@@ -727,8 +726,9 @@ isac_release(struct isac_hw *isac)
}
static void
-dbusy_timer_handler(struct isac_hw *isac)
+dbusy_timer_handler(struct timer_list *t)
{
+ struct isac_hw *isac = from_timer(isac, t, dch.timer);
int rbch, star;
u_long flags;
@@ -796,8 +796,7 @@ isac_init(struct isac_hw *isac)
}
isac->mon_tx = NULL;
isac->mon_rx = NULL;
- setup_timer(&isac->dch.timer, (void *)dbusy_timer_handler,
- (long)isac);
+ timer_setup(&isac->dch.timer, dbusy_timer_handler, 0);
isac->mocr = 0xaa;
if (isac->type & IPAC_TYPE_ISACX) {
/* Disable all IRQ */
diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c
index d80072fef434..536d5137f49d 100644
--- a/drivers/isdn/hardware/mISDN/w6692.c
+++ b/drivers/isdn/hardware/mISDN/w6692.c
@@ -311,7 +311,6 @@ W6692_fill_Dfifo(struct w6692_hw *card)
pr_debug("%s: fill_Dfifo dbusytimer running\n", card->name);
del_timer(&dch->timer);
}
- init_timer(&dch->timer);
dch->timer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ) / 1000);
add_timer(&dch->timer);
if (debug & DEBUG_HW_DFIFO) {
@@ -819,8 +818,9 @@ w6692_irq(int intno, void *dev_id)
}
static void
-dbusy_timer_handler(struct dchannel *dch)
+dbusy_timer_handler(struct timer_list *t)
{
+ struct dchannel *dch = from_timer(dch, t, timer);
struct w6692_hw *card = dch->hw;
int rbch, star;
u_long flags;
@@ -852,8 +852,7 @@ static void initW6692(struct w6692_hw *card)
{
u8 val;
- setup_timer(&card->dch.timer, (void *)dbusy_timer_handler,
- (u_long)&card->dch);
+ timer_setup(&card->dch.timer, dbusy_timer_handler, 0);
w6692_mode(&card->bc[0], ISDN_P_NONE);
w6692_mode(&card->bc[1], ISDN_P_NONE);
WriteW6692(card, W_D_CTL, 0x00);
diff --git a/drivers/isdn/hisax/amd7930_fn.c b/drivers/isdn/hisax/amd7930_fn.c
index dcf4c2a9fcea..77debda2221b 100644
--- a/drivers/isdn/hisax/amd7930_fn.c
+++ b/drivers/isdn/hisax/amd7930_fn.c
@@ -398,7 +398,6 @@ Amd7930_fill_Dfifo(struct IsdnCardState *cs)
debugl1(cs, "Amd7930: fill_Dfifo dbusytimer running");
del_timer(&cs->dbusytimer);
}
- init_timer(&cs->dbusytimer);
cs->dbusytimer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ) / 1000);
add_timer(&cs->dbusytimer);
@@ -686,8 +685,9 @@ DC_Close_Amd7930(struct IsdnCardState *cs) {
static void
-dbusy_timer_handler(struct IsdnCardState *cs)
+dbusy_timer_handler(struct timer_list *t)
{
+ struct IsdnCardState *cs = from_timer(cs, t, dbusytimer);
u_long flags;
struct PStack *stptr;
WORD dtcr, der;
@@ -790,5 +790,5 @@ void Amd7930_init(struct IsdnCardState *cs)
void setup_Amd7930(struct IsdnCardState *cs)
{
INIT_WORK(&cs->tqueue, Amd7930_bh);
- setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler, (long)cs);
+ timer_setup(&cs->dbusytimer, dbusy_timer_handler, 0);
}
diff --git a/drivers/isdn/hisax/arcofi.c b/drivers/isdn/hisax/arcofi.c
index 9826bad49e2c..2f784f96d439 100644
--- a/drivers/isdn/hisax/arcofi.c
+++ b/drivers/isdn/hisax/arcofi.c
@@ -23,7 +23,6 @@ add_arcofi_timer(struct IsdnCardState *cs) {
if (test_and_set_bit(FLG_ARCOFI_TIMER, &cs->HW_Flags)) {
del_timer(&cs->dc.isac.arcofitimer);
}
- init_timer(&cs->dc.isac.arcofitimer);
cs->dc.isac.arcofitimer.expires = jiffies + ((ARCOFI_TIMER_VALUE * HZ) / 1000);
add_timer(&cs->dc.isac.arcofitimer);
}
@@ -112,7 +111,8 @@ arcofi_fsm(struct IsdnCardState *cs, int event, void *data) {
}
static void
-arcofi_timer(struct IsdnCardState *cs) {
+arcofi_timer(struct timer_list *t) {
+ struct IsdnCardState *cs = from_timer(cs, t, dc.isac.arcofitimer);
arcofi_fsm(cs, ARCOFI_TIMEOUT, NULL);
}
@@ -125,7 +125,7 @@ clear_arcofi(struct IsdnCardState *cs) {
void
init_arcofi(struct IsdnCardState *cs) {
- setup_timer(&cs->dc.isac.arcofitimer, (void *)arcofi_timer, (long)cs);
+ timer_setup(&cs->dc.isac.arcofitimer, arcofi_timer, 0);
init_waitqueue_head(&cs->dc.isac.arcofi_wait);
test_and_set_bit(HW_ARCOFI, &cs->HW_Flags);
}
diff --git a/drivers/isdn/hisax/diva.c b/drivers/isdn/hisax/diva.c
index 3fc94e7741ae..38bdd3f7b960 100644
--- a/drivers/isdn/hisax/diva.c
+++ b/drivers/isdn/hisax/diva.c
@@ -798,8 +798,9 @@ reset_diva(struct IsdnCardState *cs)
#define DIVA_ASSIGN 1
static void
-diva_led_handler(struct IsdnCardState *cs)
+diva_led_handler(struct timer_list *t)
{
+ struct IsdnCardState *cs = from_timer(cs, t, hw.diva.tl);
int blink = 0;
if ((cs->subtyp == DIVA_IPAC_ISA) ||
@@ -828,7 +829,6 @@ diva_led_handler(struct IsdnCardState *cs)
byteout(cs->hw.diva.ctrl, cs->hw.diva.ctrl_reg);
if (blink) {
- init_timer(&cs->hw.diva.tl);
cs->hw.diva.tl.expires = jiffies + ((blink * HZ) / 1000);
add_timer(&cs->hw.diva.tl);
}
@@ -900,7 +900,7 @@ Diva_card_msg(struct IsdnCardState *cs, int mt, void *arg)
(cs->subtyp != DIVA_IPAC_PCI) &&
(cs->subtyp != DIVA_IPACX_PCI)) {
spin_lock_irqsave(&cs->lock, flags);
- diva_led_handler(cs);
+ diva_led_handler(&cs->hw.diva.tl);
spin_unlock_irqrestore(&cs->lock, flags);
}
return (0);
@@ -978,8 +978,7 @@ static int setup_diva_common(struct IsdnCardState *cs)
printk(KERN_INFO "Diva: IPACX Design Id: %x\n",
MemReadISAC_IPACX(cs, IPACX_ID) & 0x3F);
} else { /* DIVA 2.0 */
- setup_timer(&cs->hw.diva.tl, (void *)diva_led_handler,
- (long)cs);
+ timer_setup(&cs->hw.diva.tl, diva_led_handler, 0);
cs->readisac = &ReadISAC;
cs->writeisac = &WriteISAC;
cs->readisacfifo = &ReadISACfifo;
diff --git a/drivers/isdn/hisax/elsa.c b/drivers/isdn/hisax/elsa.c
index 03bc5d504e22..b21c05820f44 100644
--- a/drivers/isdn/hisax/elsa.c
+++ b/drivers/isdn/hisax/elsa.c
@@ -606,8 +606,9 @@ check_arcofi(struct IsdnCardState *cs)
#endif /* ARCOFI_USE */
static void
-elsa_led_handler(struct IsdnCardState *cs)
+elsa_led_handler(struct timer_list *t)
{
+ struct IsdnCardState *cs = from_timer(cs, t, hw.elsa.tl);
int blink = 0;
if (cs->subtyp == ELSA_PCMCIA || cs->subtyp == ELSA_PCMCIA_IPAC)
@@ -640,7 +641,6 @@ elsa_led_handler(struct IsdnCardState *cs)
} else
byteout(cs->hw.elsa.ctrl, cs->hw.elsa.ctrl_reg);
if (blink) {
- init_timer(&cs->hw.elsa.tl);
cs->hw.elsa.tl.expires = jiffies + ((blink * HZ) / 1000);
add_timer(&cs->hw.elsa.tl);
}
@@ -715,7 +715,7 @@ Elsa_card_msg(struct IsdnCardState *cs, int mt, void *arg)
init_modem(cs);
}
#endif
- elsa_led_handler(cs);
+ elsa_led_handler(&cs->hw.elsa.tl);
return (ret);
case (MDL_REMOVE | REQUEST):
cs->hw.elsa.status &= 0;
@@ -767,7 +767,7 @@ Elsa_card_msg(struct IsdnCardState *cs, int mt, void *arg)
else
cs->hw.elsa.status &= ~ELSA_BAD_PWR;
}
- elsa_led_handler(cs);
+ elsa_led_handler(&cs->hw.elsa.tl);
return (ret);
}
@@ -1147,7 +1147,7 @@ static int setup_elsa_common(struct IsdnCard *card)
init_arcofi(cs);
#endif
setup_isac(cs);
- setup_timer(&cs->hw.elsa.tl, (void *)elsa_led_handler, (long)cs);
+ timer_setup(&cs->hw.elsa.tl, elsa_led_handler, 0);
/* Teste Timer */
if (cs->hw.elsa.timer) {
byteout(cs->hw.elsa.trig, 0xff);
diff --git a/drivers/isdn/hisax/fsm.c b/drivers/isdn/hisax/fsm.c
index d63266fa8cbd..3e020ec0f65e 100644
--- a/drivers/isdn/hisax/fsm.c
+++ b/drivers/isdn/hisax/fsm.c
@@ -85,8 +85,9 @@ FsmChangeState(struct FsmInst *fi, int newstate)
}
static void
-FsmExpireTimer(struct FsmTimer *ft)
+FsmExpireTimer(struct timer_list *t)
{
+ struct FsmTimer *ft = from_timer(ft, t, tl);
#if FSM_TIMER_DEBUG
if (ft->fi->debug)
ft->fi->printdebug(ft->fi, "FsmExpireTimer %lx", (long) ft);
@@ -102,7 +103,7 @@ FsmInitTimer(struct FsmInst *fi, struct FsmTimer *ft)
if (ft->fi->debug)
ft->fi->printdebug(ft->fi, "FsmInitTimer %lx", (long) ft);
#endif
- setup_timer(&ft->tl, (void *)FsmExpireTimer, (long)ft);
+ timer_setup(&ft->tl, FsmExpireTimer, 0);
}
void
@@ -131,7 +132,6 @@ FsmAddTimer(struct FsmTimer *ft,
ft->fi->printdebug(ft->fi, "FsmAddTimer already active!");
return -1;
}
- init_timer(&ft->tl);
ft->event = event;
ft->arg = arg;
ft->tl.expires = jiffies + (millisec * HZ) / 1000;
@@ -152,7 +152,6 @@ FsmRestartTimer(struct FsmTimer *ft,
if (timer_pending(&ft->tl))
del_timer(&ft->tl);
- init_timer(&ft->tl);
ft->event = event;
ft->arg = arg;
ft->tl.expires = jiffies + (millisec * HZ) / 1000;
diff --git a/drivers/isdn/hisax/hfc4s8s_l1.c b/drivers/isdn/hisax/hfc4s8s_l1.c
index 9090cc1e1f29..e9bb8fb67ad0 100644
--- a/drivers/isdn/hisax/hfc4s8s_l1.c
+++ b/drivers/isdn/hisax/hfc4s8s_l1.c
@@ -591,8 +591,9 @@ bch_l2l1(struct hisax_if *ifc, int pr, void *arg)
/* layer 1 timer function */
/**************************/
static void
-hfc_l1_timer(struct hfc4s8s_l1 *l1)
+hfc_l1_timer(struct timer_list *t)
{
+ struct hfc4s8s_l1 *l1 = from_timer(l1, t, l1_timer);
u_long flags;
if (!l1->enabled)
@@ -1396,8 +1397,7 @@ setup_instance(hfc4s8s_hw *hw)
l1p = hw->l1 + i;
spin_lock_init(&l1p->lock);
l1p->hw = hw;
- setup_timer(&l1p->l1_timer, (void *)hfc_l1_timer,
- (long)(l1p));
+ timer_setup(&l1p->l1_timer, hfc_l1_timer, 0);
l1p->st_num = i;
skb_queue_head_init(&l1p->d_tx_queue);
l1p->d_if.ifc.priv = hw->l1 + i;
diff --git a/drivers/isdn/hisax/hfc_2bds0.c b/drivers/isdn/hisax/hfc_2bds0.c
index ad8597a1a07e..86b82172e992 100644
--- a/drivers/isdn/hisax/hfc_2bds0.c
+++ b/drivers/isdn/hisax/hfc_2bds0.c
@@ -1014,7 +1014,7 @@ setstack_hfcd(struct PStack *st, struct IsdnCardState *cs)
}
static void
-hfc_dbusy_timer(struct IsdnCardState *cs)
+hfc_dbusy_timer(struct timer_list *t)
{
}
@@ -1073,6 +1073,6 @@ set_cs_func(struct IsdnCardState *cs)
cs->writeisacfifo = &dummyf;
cs->BC_Read_Reg = &ReadReg;
cs->BC_Write_Reg = &WriteReg;
- setup_timer(&cs->dbusytimer, (void *)hfc_dbusy_timer, (long)cs);
+ timer_setup(&cs->dbusytimer, hfc_dbusy_timer, 0);
INIT_WORK(&cs->tqueue, hfcd_bh);
}
diff --git a/drivers/isdn/hisax/hfc_pci.c b/drivers/isdn/hisax/hfc_pci.c
index f9ca35cc32b1..8e5b03161b2f 100644
--- a/drivers/isdn/hisax/hfc_pci.c
+++ b/drivers/isdn/hisax/hfc_pci.c
@@ -165,8 +165,9 @@ reset_hfcpci(struct IsdnCardState *cs)
/* Timer function called when kernel timer expires */
/***************************************************/
static void
-hfcpci_Timer(struct IsdnCardState *cs)
+hfcpci_Timer(struct timer_list *t)
{
+ struct IsdnCardState *cs = from_timer(cs, t, hw.hfcpci.timer);
cs->hw.hfcpci.timer.expires = jiffies + 75;
/* WD RESET */
/* WriteReg(cs, HFCD_DATA, HFCD_CTMT, cs->hw.hfcpci.ctmt | 0x80);
@@ -1095,7 +1096,7 @@ hfcpci_interrupt(int intno, void *dev_id)
/* timer callback for D-chan busy resolution. Currently no function */
/********************************************************************/
static void
-hfcpci_dbusy_timer(struct IsdnCardState *cs)
+hfcpci_dbusy_timer(struct timer_list *t)
{
}
@@ -1582,7 +1583,7 @@ inithfcpci(struct IsdnCardState *cs)
cs->bcs[1].BC_SetStack = setstack_2b;
cs->bcs[0].BC_Close = close_hfcpci;
cs->bcs[1].BC_Close = close_hfcpci;
- setup_timer(&cs->dbusytimer, (void *)hfcpci_dbusy_timer, (long)cs);
+ timer_setup(&cs->dbusytimer, hfcpci_dbusy_timer, 0);
mode_hfcpci(cs->bcs, 0, 0);
mode_hfcpci(cs->bcs + 1, 0, 1);
}
@@ -1744,7 +1745,7 @@ setup_hfcpci(struct IsdnCard *card)
cs->BC_Write_Reg = NULL;
cs->irq_func = &hfcpci_interrupt;
cs->irq_flags |= IRQF_SHARED;
- setup_timer(&cs->hw.hfcpci.timer, (void *)hfcpci_Timer, (long)cs);
+ timer_setup(&cs->hw.hfcpci.timer, hfcpci_Timer, 0);
cs->cardmsg = &hfcpci_card_msg;
cs->auxcmd = &hfcpci_auxcmd;
diff --git a/drivers/isdn/hisax/hfc_sx.c b/drivers/isdn/hisax/hfc_sx.c
index 3aef8e1a90e4..d925f579bc80 100644
--- a/drivers/isdn/hisax/hfc_sx.c
+++ b/drivers/isdn/hisax/hfc_sx.c
@@ -418,8 +418,9 @@ reset_hfcsx(struct IsdnCardState *cs)
/* Timer function called when kernel timer expires */
/***************************************************/
static void
-hfcsx_Timer(struct IsdnCardState *cs)
+hfcsx_Timer(struct timer_list *t)
{
+ struct IsdnCardState *cs = from_timer(cs, t, hw.hfcsx.timer);
cs->hw.hfcsx.timer.expires = jiffies + 75;
/* WD RESET */
/* WriteReg(cs, HFCD_DATA, HFCD_CTMT, cs->hw.hfcsx.ctmt | 0x80);
@@ -860,7 +861,7 @@ hfcsx_interrupt(int intno, void *dev_id)
/* timer callback for D-chan busy resolution. Currently no function */
/********************************************************************/
static void
-hfcsx_dbusy_timer(struct IsdnCardState *cs)
+hfcsx_dbusy_timer(struct timer_list *t)
{
}
@@ -1495,7 +1496,7 @@ int setup_hfcsx(struct IsdnCard *card)
} else
return (0); /* no valid card type */
- setup_timer(&cs->dbusytimer, (void *)hfcsx_dbusy_timer, (long)cs);
+ timer_setup(&cs->dbusytimer, hfcsx_dbusy_timer, 0);
INIT_WORK(&cs->tqueue, hfcsx_bh);
cs->readisac = NULL;
cs->writeisac = NULL;
@@ -1507,7 +1508,7 @@ int setup_hfcsx(struct IsdnCard *card)
cs->hw.hfcsx.b_fifo_size = 0; /* fifo size still unknown */
cs->hw.hfcsx.cirm = ccd_sp_irqtab[cs->irq & 0xF]; /* RAM not evaluated */
- setup_timer(&cs->hw.hfcsx.timer, (void *)hfcsx_Timer, (long)cs);
+ timer_setup(&cs->hw.hfcsx.timer, hfcsx_Timer, 0);
reset_hfcsx(cs);
cs->cardmsg = &hfcsx_card_msg;
diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c
index e8212185d386..97ecb3073045 100644
--- a/drivers/isdn/hisax/hfc_usb.c
+++ b/drivers/isdn/hisax/hfc_usb.c
@@ -343,8 +343,9 @@ handle_led(hfcusb_data *hfc, int event)
/* ISDN l1 timer T3 expires */
static void
-l1_timer_expire_t3(hfcusb_data *hfc)
+l1_timer_expire_t3(struct timer_list *t)
{
+ hfcusb_data *hfc = from_timer(hfc, t, t3_timer);
hfc->d_if.ifc.l1l2(&hfc->d_if.ifc, PH_DEACTIVATE | INDICATION,
NULL);
@@ -360,8 +361,9 @@ l1_timer_expire_t3(hfcusb_data *hfc)
/* ISDN l1 timer T4 expires */
static void
-l1_timer_expire_t4(hfcusb_data *hfc)
+l1_timer_expire_t4(struct timer_list *t)
{
+ hfcusb_data *hfc = from_timer(hfc, t, t4_timer);
hfc->d_if.ifc.l1l2(&hfc->d_if.ifc, PH_DEACTIVATE | INDICATION,
NULL);
@@ -1165,10 +1167,10 @@ hfc_usb_init(hfcusb_data *hfc)
hfc->old_led_state = 0;
/* init the t3 timer */
- setup_timer(&hfc->t3_timer, (void *)l1_timer_expire_t3, (long)hfc);
+ timer_setup(&hfc->t3_timer, l1_timer_expire_t3, 0);
/* init the t4 timer */
- setup_timer(&hfc->t4_timer, (void *)l1_timer_expire_t4, (long)hfc);
+ timer_setup(&hfc->t4_timer, l1_timer_expire_t4, 0);
/* init the background machinery for control requests */
hfc->ctrl_read.bRequestType = 0xc0;
diff --git a/drivers/isdn/hisax/hfcscard.c b/drivers/isdn/hisax/hfcscard.c
index 467287096918..380bbeda9c74 100644
--- a/drivers/isdn/hisax/hfcscard.c
+++ b/drivers/isdn/hisax/hfcscard.c
@@ -41,8 +41,9 @@ hfcs_interrupt(int intno, void *dev_id)
}
static void
-hfcs_Timer(struct IsdnCardState *cs)
+hfcs_Timer(struct timer_list *t)
{
+ struct IsdnCardState *cs = from_timer(cs, t, hw.hfcD.timer);
cs->hw.hfcD.timer.expires = jiffies + 75;
/* WD RESET */
/* WriteReg(cs, HFCD_DATA, HFCD_CTMT, cs->hw.hfcD.ctmt | 0x80);
@@ -253,7 +254,7 @@ int setup_hfcs(struct IsdnCard *card)
outb(0x57, cs->hw.hfcD.addr | 1);
}
set_cs_func(cs);
- setup_timer(&cs->hw.hfcD.timer, (void *)hfcs_Timer, (long)cs);
+ timer_setup(&cs->hw.hfcD.timer, hfcs_Timer, 0);
cs->cardmsg = &hfcs_card_msg;
cs->irq_func = &hfcs_interrupt;
return (1);
diff --git a/drivers/isdn/hisax/icc.c b/drivers/isdn/hisax/icc.c
index 8d1804572b32..831dd1bb81ef 100644
--- a/drivers/isdn/hisax/icc.c
+++ b/drivers/isdn/hisax/icc.c
@@ -168,7 +168,6 @@ icc_fill_fifo(struct IsdnCardState *cs)
debugl1(cs, "icc_fill_fifo dbusytimer running");
del_timer(&cs->dbusytimer);
}
- init_timer(&cs->dbusytimer);
cs->dbusytimer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ)/1000);
add_timer(&cs->dbusytimer);
if (cs->debug & L1_DEB_ISAC_FIFO) {
@@ -580,8 +579,9 @@ DC_Close_icc(struct IsdnCardState *cs) {
}
static void
-dbusy_timer_handler(struct IsdnCardState *cs)
+dbusy_timer_handler(struct timer_list *t)
{
+ struct IsdnCardState *cs = from_timer(cs, t, dbusytimer);
struct PStack *stptr;
int rbch, star;
@@ -676,5 +676,5 @@ clear_pending_icc_ints(struct IsdnCardState *cs)
void setup_icc(struct IsdnCardState *cs)
{
INIT_WORK(&cs->tqueue, icc_bh);
- setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler, (long)cs);
+ timer_setup(&cs->dbusytimer, dbusy_timer_handler, 0);
}
diff --git a/drivers/isdn/hisax/ipacx.c b/drivers/isdn/hisax/ipacx.c
index c426b4fea28a..c7086c1534bd 100644
--- a/drivers/isdn/hisax/ipacx.c
+++ b/drivers/isdn/hisax/ipacx.c
@@ -35,7 +35,7 @@
static void ph_command(struct IsdnCardState *cs, unsigned int command);
static inline void cic_int(struct IsdnCardState *cs);
static void dch_l2l1(struct PStack *st, int pr, void *arg);
-static void dbusy_timer_handler(struct IsdnCardState *cs);
+static void dbusy_timer_handler(struct timer_list *t);
static void dch_empty_fifo(struct IsdnCardState *cs, int count);
static void dch_fill_fifo(struct IsdnCardState *cs);
static inline void dch_int(struct IsdnCardState *cs);
@@ -198,8 +198,9 @@ dch_l2l1(struct PStack *st, int pr, void *arg)
//----------------------------------------------------------
//----------------------------------------------------------
static void
-dbusy_timer_handler(struct IsdnCardState *cs)
+dbusy_timer_handler(struct timer_list *t)
{
+ struct IsdnCardState *cs = from_timer(cs, t, dbusytimer);
struct PStack *st;
int rbchd, stard;
@@ -298,7 +299,6 @@ dch_fill_fifo(struct IsdnCardState *cs)
debugl1(cs, "dch_fill_fifo dbusytimer running");
del_timer(&cs->dbusytimer);
}
- init_timer(&cs->dbusytimer);
cs->dbusytimer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ)/1000);
add_timer(&cs->dbusytimer);
@@ -424,7 +424,7 @@ dch_init(struct IsdnCardState *cs)
cs->setstack_d = dch_setstack;
- setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler, (long)cs);
+ timer_setup(&cs->dbusytimer, dbusy_timer_handler, 0);
cs->writeisac(cs, IPACX_TR_CONF0, 0x00); // clear LDD
cs->writeisac(cs, IPACX_TR_CONF2, 0x00); // enable transmitter
diff --git a/drivers/isdn/hisax/isac.c b/drivers/isdn/hisax/isac.c
index ea965f29a555..bd40e0671ded 100644
--- a/drivers/isdn/hisax/isac.c
+++ b/drivers/isdn/hisax/isac.c
@@ -171,7 +171,6 @@ isac_fill_fifo(struct IsdnCardState *cs)
debugl1(cs, "isac_fill_fifo dbusytimer running");
del_timer(&cs->dbusytimer);
}
- init_timer(&cs->dbusytimer);
cs->dbusytimer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ)/1000);
add_timer(&cs->dbusytimer);
if (cs->debug & L1_DEB_ISAC_FIFO) {
@@ -584,8 +583,9 @@ DC_Close_isac(struct IsdnCardState *cs)
}
static void
-dbusy_timer_handler(struct IsdnCardState *cs)
+dbusy_timer_handler(struct timer_list *t)
{
+ struct IsdnCardState *cs = from_timer(cs, t, dbusytimer);
struct PStack *stptr;
int rbch, star;
@@ -677,5 +677,5 @@ void clear_pending_isac_ints(struct IsdnCardState *cs)
void setup_isac(struct IsdnCardState *cs)
{
INIT_WORK(&cs->tqueue, isac_bh);
- setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler, (long)cs);
+ timer_setup(&cs->dbusytimer, dbusy_timer_handler, 0);
}
diff --git a/drivers/isdn/hisax/isar.c b/drivers/isdn/hisax/isar.c
index 98b4b67ea337..d01ff116797b 100644
--- a/drivers/isdn/hisax/isar.c
+++ b/drivers/isdn/hisax/isar.c
@@ -1267,7 +1267,8 @@ isar_int_main(struct IsdnCardState *cs)
}
static void
-ftimer_handler(struct BCState *bcs) {
+ftimer_handler(struct timer_list *t) {
+ struct BCState *bcs = from_timer(bcs, t, hw.isar.ftimer);
if (bcs->cs->debug)
debugl1(bcs->cs, "ftimer flags %04lx",
bcs->Flag);
@@ -1902,8 +1903,6 @@ void initisar(struct IsdnCardState *cs)
cs->bcs[1].BC_SetStack = setstack_isar;
cs->bcs[0].BC_Close = close_isarstate;
cs->bcs[1].BC_Close = close_isarstate;
- setup_timer(&cs->bcs[0].hw.isar.ftimer, (void *)ftimer_handler,
- (long)&cs->bcs[0]);
- setup_timer(&cs->bcs[1].hw.isar.ftimer, (void *)ftimer_handler,
- (long)&cs->bcs[1]);
+ timer_setup(&cs->bcs[0].hw.isar.ftimer, ftimer_handler, 0);
+ timer_setup(&cs->bcs[1].hw.isar.ftimer, ftimer_handler, 0);
}
diff --git a/drivers/isdn/hisax/isdnl3.c b/drivers/isdn/hisax/isdnl3.c
index 569ce52c567b..bb3f9ec62749 100644
--- a/drivers/isdn/hisax/isdnl3.c
+++ b/drivers/isdn/hisax/isdnl3.c
@@ -160,8 +160,9 @@ newl3state(struct l3_process *pc, int state)
}
static void
-L3ExpireTimer(struct L3Timer *t)
+L3ExpireTimer(struct timer_list *timer)
{
+ struct L3Timer *t = from_timer(t, timer, tl);
t->pc->st->lli.l4l3(t->pc->st, t->event, t->pc);
}
@@ -169,7 +170,7 @@ void
L3InitTimer(struct l3_process *pc, struct L3Timer *t)
{
t->pc = pc;
- setup_timer(&t->tl, (void *)L3ExpireTimer, (long)t);
+ timer_setup(&t->tl, L3ExpireTimer, 0);
}
void
@@ -186,7 +187,6 @@ L3AddTimer(struct L3Timer *t,
printk(KERN_WARNING "L3AddTimer: timer already active!\n");
return -1;
}
- init_timer(&t->tl);
t->event = event;
t->tl.expires = jiffies + (millisec * HZ) / 1000;
add_timer(&t->tl);
diff --git a/drivers/isdn/hisax/saphir.c b/drivers/isdn/hisax/saphir.c
index 6b2d0eccdd56..db906cb37a3f 100644
--- a/drivers/isdn/hisax/saphir.c
+++ b/drivers/isdn/hisax/saphir.c
@@ -159,8 +159,9 @@ Start_ISAC:
}
static void
-SaphirWatchDog(struct IsdnCardState *cs)
+SaphirWatchDog(struct timer_list *t)
{
+ struct IsdnCardState *cs = from_timer(cs, t, hw.saphir.timer);
u_long flags;
spin_lock_irqsave(&cs->lock, flags);
@@ -268,9 +269,7 @@ int setup_saphir(struct IsdnCard *card)
cs->irq, cs->hw.saphir.cfg_reg);
setup_isac(cs);
- cs->hw.saphir.timer.function = (void *) SaphirWatchDog;
- cs->hw.saphir.timer.data = (long) cs;
- init_timer(&cs->hw.saphir.timer);
+ timer_setup(&cs->hw.saphir.timer, SaphirWatchDog, 0);
cs->hw.saphir.timer.expires = jiffies + 4 * HZ;
add_timer(&cs->hw.saphir.timer);
if (saphir_reset(cs)) {
diff --git a/drivers/isdn/hisax/teleint.c b/drivers/isdn/hisax/teleint.c
index 950399f066ef..247aa33076b1 100644
--- a/drivers/isdn/hisax/teleint.c
+++ b/drivers/isdn/hisax/teleint.c
@@ -179,8 +179,9 @@ Start_ISAC:
}
static void
-TeleInt_Timer(struct IsdnCardState *cs)
+TeleInt_Timer(struct timer_list *t)
{
+ struct IsdnCardState *cs = from_timer(cs, t, hw.hfc.timer);
int stat = 0;
u_long flags;
@@ -278,7 +279,7 @@ int setup_TeleInt(struct IsdnCard *card)
cs->bcs[0].hw.hfc.send = NULL;
cs->bcs[1].hw.hfc.send = NULL;
cs->hw.hfc.fifosize = 7 * 1024 + 512;
- setup_timer(&cs->hw.hfc.timer, (void *)TeleInt_Timer, (long)cs);
+ timer_setup(&cs->hw.hfc.timer, TeleInt_Timer, 0);
if (!request_region(cs->hw.hfc.addr, 2, "TeleInt isdn")) {
printk(KERN_WARNING
"HiSax: TeleInt config port %x-%x already in use\n",
diff --git a/drivers/isdn/hisax/w6692.c b/drivers/isdn/hisax/w6692.c
index 6f6733b7c1e4..c4be1644f5bb 100644
--- a/drivers/isdn/hisax/w6692.c
+++ b/drivers/isdn/hisax/w6692.c
@@ -188,7 +188,6 @@ W6692_fill_fifo(struct IsdnCardState *cs)
debugl1(cs, "W6692_fill_fifo dbusytimer running");
del_timer(&cs->dbusytimer);
}
- init_timer(&cs->dbusytimer);
cs->dbusytimer.expires = jiffies + ((DBUSY_TIMER_VALUE * HZ) / 1000);
add_timer(&cs->dbusytimer);
if (cs->debug & L1_DEB_ISAC_FIFO) {
@@ -684,8 +683,9 @@ DC_Close_W6692(struct IsdnCardState *cs)
}
static void
-dbusy_timer_handler(struct IsdnCardState *cs)
+dbusy_timer_handler(struct timer_list *t)
{
+ struct IsdnCardState *cs = from_timer(cs, t, dbusytimer);
struct PStack *stptr;
int rbch, star;
u_long flags;
@@ -904,8 +904,7 @@ static void initW6692(struct IsdnCardState *cs, int part)
if (part & 1) {
cs->setstack_d = setstack_W6692;
cs->DC_Close = DC_Close_W6692;
- setup_timer(&cs->dbusytimer, (void *)dbusy_timer_handler,
- (long)cs);
+ timer_setup(&cs->dbusytimer, dbusy_timer_handler, 0);
resetW6692(cs);
ph_command(cs, W_L1CMD_RST);
cs->dc.w6692.ph_state = W_L1CMD_RST;
diff --git a/drivers/isdn/isdnloop/isdnloop.c b/drivers/isdn/isdnloop/isdnloop.c
index e97232646ba1..a4597e96c916 100644
--- a/drivers/isdn/isdnloop/isdnloop.c
+++ b/drivers/isdn/isdnloop/isdnloop.c
@@ -90,9 +90,9 @@ isdnloop_bchan_send(isdnloop_card *card, int ch)
* data = pointer to card struct, set by kernel timer.data
*/
static void
-isdnloop_pollbchan(unsigned long data)
+isdnloop_pollbchan(struct timer_list *t)
{
- isdnloop_card *card = (isdnloop_card *) data;
+ isdnloop_card *card = from_timer(card, t, rb_timer);
unsigned long flags;
if (card->flags & ISDNLOOP_FLAGS_B1ACTIVE)
@@ -305,9 +305,9 @@ isdnloop_putmsg(isdnloop_card *card, unsigned char c)
* data = pointer to card struct
*/
static void
-isdnloop_polldchan(unsigned long data)
+isdnloop_polldchan(struct timer_list *t)
{
- isdnloop_card *card = (isdnloop_card *) data;
+ isdnloop_card *card = from_timer(card, t, st_timer);
struct sk_buff *skb;
int avail;
int left;
@@ -373,8 +373,6 @@ isdnloop_polldchan(unsigned long data)
card->flags |= ISDNLOOP_FLAGS_RBTIMER;
spin_lock_irqsave(&card->isdnloop_lock, flags);
del_timer(&card->rb_timer);
- card->rb_timer.function = isdnloop_pollbchan;
- card->rb_timer.data = (unsigned long) card;
card->rb_timer.expires = jiffies + ISDNLOOP_TIMER_BCREAD;
add_timer(&card->rb_timer);
spin_unlock_irqrestore(&card->isdnloop_lock, flags);
@@ -588,9 +586,10 @@ isdnloop_atimeout(isdnloop_card *card, int ch)
* Wrapper for isdnloop_atimeout().
*/
static void
-isdnloop_atimeout0(unsigned long data)
+isdnloop_atimeout0(struct timer_list *t)
{
- isdnloop_card *card = (isdnloop_card *) data;
+ isdnloop_card *card = from_timer(card, t, c_timer[0]);
+
isdnloop_atimeout(card, 0);
}
@@ -598,9 +597,10 @@ isdnloop_atimeout0(unsigned long data)
* Wrapper for isdnloop_atimeout().
*/
static void
-isdnloop_atimeout1(unsigned long data)
+isdnloop_atimeout1(struct timer_list *t)
{
- isdnloop_card *card = (isdnloop_card *) data;
+ isdnloop_card *card = from_timer(card, t, c_timer[1]);
+
isdnloop_atimeout(card, 1);
}
@@ -617,13 +617,9 @@ isdnloop_start_ctimer(isdnloop_card *card, int ch)
unsigned long flags;
spin_lock_irqsave(&card->isdnloop_lock, flags);
- init_timer(&card->c_timer[ch]);
+ timer_setup(&card->c_timer[ch], ch ? isdnloop_atimeout1
+ : isdnloop_atimeout0, 0);
card->c_timer[ch].expires = jiffies + ISDNLOOP_TIMER_ALERTWAIT;
- if (ch)
- card->c_timer[ch].function = isdnloop_atimeout1;
- else
- card->c_timer[ch].function = isdnloop_atimeout0;
- card->c_timer[ch].data = (unsigned long) card;
add_timer(&card->c_timer[ch]);
spin_unlock_irqrestore(&card->isdnloop_lock, flags);
}
@@ -1113,10 +1109,9 @@ isdnloop_start(isdnloop_card *card, isdnloop_sdef *sdefp)
sdef.ptype);
return -EINVAL;
}
- init_timer(&card->st_timer);
+ timer_setup(&card->rb_timer, isdnloop_pollbchan, 0);
+ timer_setup(&card->st_timer, isdnloop_polldchan, 0);
card->st_timer.expires = jiffies + ISDNLOOP_TIMER_DCREAD;
- card->st_timer.function = isdnloop_polldchan;
- card->st_timer.data = (unsigned long) card;
add_timer(&card->st_timer);
card->flags |= ISDNLOOP_FLAGS_RUNNING;
spin_unlock_irqrestore(&card->isdnloop_lock, flags);
diff --git a/drivers/isdn/mISDN/dsp.h b/drivers/isdn/mISDN/dsp.h
index fc1733a08845..fa09d511a8ed 100644
--- a/drivers/isdn/mISDN/dsp.h
+++ b/drivers/isdn/mISDN/dsp.h
@@ -259,7 +259,7 @@ extern u8 *dsp_dtmf_goertzel_decode(struct dsp *dsp, u8 *data, int len,
extern int dsp_tone(struct dsp *dsp, int tone);
extern void dsp_tone_copy(struct dsp *dsp, u8 *data, int len);
-extern void dsp_tone_timeout(void *arg);
+extern void dsp_tone_timeout(struct timer_list *t);
extern void dsp_bf_encrypt(struct dsp *dsp, u8 *data, int len);
extern void dsp_bf_decrypt(struct dsp *dsp, u8 *data, int len);
diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
index 880e9d367a39..cd036e87335a 100644
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -1092,7 +1092,7 @@ dspcreate(struct channel_req *crq)
ndsp->pcm_bank_tx = -1;
ndsp->hfc_conf = -1; /* current conference number */
/* set tone timer */
- setup_timer(&ndsp->tone.tl, (void *)dsp_tone_timeout, (long)ndsp);
+ timer_setup(&ndsp->tone.tl, dsp_tone_timeout, 0);
if (dtmfthreshold < 20 || dtmfthreshold > 500)
dtmfthreshold = 200;
@@ -1202,9 +1202,7 @@ static int __init dsp_init(void)
}
/* set sample timer */
- dsp_spl_tl.function = (void *)dsp_cmx_send;
- dsp_spl_tl.data = 0;
- init_timer(&dsp_spl_tl);
+ timer_setup(&dsp_spl_tl, (void *)dsp_cmx_send, 0);
dsp_spl_tl.expires = jiffies + dsp_tics;
dsp_spl_jiffies = dsp_spl_tl.expires;
add_timer(&dsp_spl_tl);
diff --git a/drivers/isdn/mISDN/dsp_tones.c b/drivers/isdn/mISDN/dsp_tones.c
index 057e0d6a369b..8389e2105cdc 100644
--- a/drivers/isdn/mISDN/dsp_tones.c
+++ b/drivers/isdn/mISDN/dsp_tones.c
@@ -457,9 +457,9 @@ dsp_tone_hw_message(struct dsp *dsp, u8 *sample, int len)
* timer expires *
*****************/
void
-dsp_tone_timeout(void *arg)
+dsp_tone_timeout(struct timer_list *t)
{
- struct dsp *dsp = arg;
+ struct dsp *dsp = from_timer(dsp, t, tone.tl);
struct dsp_tone *tone = &dsp->tone;
struct pattern *pat = (struct pattern *)tone->pattern;
int index = tone->index;
@@ -478,7 +478,6 @@ dsp_tone_timeout(void *arg)
else
dsp_tone_hw_message(dsp, pat->data[index], *(pat->siz[index]));
/* set timer */
- init_timer(&tone->tl);
tone->tl.expires = jiffies + (pat->seq[index] * HZ) / 8000;
add_timer(&tone->tl);
}
@@ -541,7 +540,6 @@ dsp_tone(struct dsp *dsp, int tone)
/* set timer */
if (timer_pending(&tonet->tl))
del_timer(&tonet->tl);
- init_timer(&tonet->tl);
tonet->tl.expires = jiffies + (pat->seq[0] * HZ) / 8000;
add_timer(&tonet->tl);
} else {
diff --git a/drivers/isdn/mISDN/fsm.c b/drivers/isdn/mISDN/fsm.c
index 92e6570b1143..cabcb906e0b5 100644
--- a/drivers/isdn/mISDN/fsm.c
+++ b/drivers/isdn/mISDN/fsm.c
@@ -100,8 +100,9 @@ mISDN_FsmChangeState(struct FsmInst *fi, int newstate)
EXPORT_SYMBOL(mISDN_FsmChangeState);
static void
-FsmExpireTimer(struct FsmTimer *ft)
+FsmExpireTimer(struct timer_list *t)
{
+ struct FsmTimer *ft = from_timer(ft, t, tl);
#if FSM_TIMER_DEBUG
if (ft->fi->debug)
ft->fi->printdebug(ft->fi, "FsmExpireTimer %lx", (long) ft);
@@ -117,7 +118,7 @@ mISDN_FsmInitTimer(struct FsmInst *fi, struct FsmTimer *ft)
if (ft->fi->debug)
ft->fi->printdebug(ft->fi, "mISDN_FsmInitTimer %lx", (long) ft);
#endif
- setup_timer(&ft->tl, (void *)FsmExpireTimer, (long)ft);
+ timer_setup(&ft->tl, FsmExpireTimer, 0);
}
EXPORT_SYMBOL(mISDN_FsmInitTimer);
@@ -153,7 +154,6 @@ mISDN_FsmAddTimer(struct FsmTimer *ft,
}
return -1;
}
- init_timer(&ft->tl);
ft->event = event;
ft->arg = arg;
ft->tl.expires = jiffies + (millisec * HZ) / 1000;
@@ -175,7 +175,6 @@ mISDN_FsmRestartTimer(struct FsmTimer *ft,
if (timer_pending(&ft->tl))
del_timer(&ft->tl);
- init_timer(&ft->tl);
ft->event = event;
ft->arg = arg;
ft->tl.expires = jiffies + (millisec * HZ) / 1000;
diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c
index 6be2041248d3..b5d590e378ac 100644
--- a/drivers/isdn/mISDN/l1oip_core.c
+++ b/drivers/isdn/mISDN/l1oip_core.c
@@ -842,17 +842,18 @@ l1oip_send_bh(struct work_struct *work)
* timer stuff
*/
static void
-l1oip_keepalive(void *data)
+l1oip_keepalive(struct timer_list *t)
{
- struct l1oip *hc = (struct l1oip *)data;
+ struct l1oip *hc = from_timer(hc, t, keep_tl);
schedule_work(&hc->workq);
}
static void
-l1oip_timeout(void *data)
+l1oip_timeout(struct timer_list *t)
{
- struct l1oip *hc = (struct l1oip *)data;
+ struct l1oip *hc = from_timer(hc, t,
+ timeout_tl);
struct dchannel *dch = hc->chan[hc->d_idx].dch;
if (debug & DEBUG_L1OIP_MSG)
@@ -1437,13 +1438,11 @@ init_card(struct l1oip *hc, int pri, int bundle)
if (ret)
return ret;
- hc->keep_tl.function = (void *)l1oip_keepalive;
- hc->keep_tl.data = (ulong)hc;
- init_timer(&hc->keep_tl);
+ timer_setup(&hc->keep_tl, l1oip_keepalive, 0);
hc->keep_tl.expires = jiffies + 2 * HZ; /* two seconds first time */
add_timer(&hc->keep_tl);
- setup_timer(&hc->timeout_tl, (void *)l1oip_timeout, (ulong)hc);
+ timer_setup(&hc->timeout_tl, l1oip_timeout, 0);
hc->timeout_on = 0; /* state that we have timer off */
return 0;
diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c
index b1e135fc1fb5..c50a34340f67 100644
--- a/drivers/isdn/mISDN/timerdev.c
+++ b/drivers/isdn/mISDN/timerdev.c
@@ -162,9 +162,9 @@ mISDN_poll(struct file *filep, poll_table *wait)
}
static void
-dev_expire_timer(unsigned long data)
+dev_expire_timer(struct timer_list *t)
{
- struct mISDNtimer *timer = (void *)data;
+ struct mISDNtimer *timer = from_timer(timer, t, tl);
u_long flags;
spin_lock_irqsave(&timer->dev->lock, flags);
@@ -189,7 +189,7 @@ misdn_add_timer(struct mISDNtimerdev *dev, int timeout)
if (!timer)
return -ENOMEM;
timer->dev = dev;
- setup_timer(&timer->tl, dev_expire_timer, (long)timer);
+ timer_setup(&timer->tl, dev_expire_timer, 0);
spin_lock_irq(&dev->lock);
id = timer->id = dev->next_id++;
if (dev->next_id < 0)
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index aba0d652095b..0936da592e12 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -483,6 +483,18 @@ config FUJITSU_ES
This driver provides support for Extended Socket network device
on Extended Partitioning of FUJITSU PRIMEQUEST 2000 E2 series.
+config THUNDERBOLT_NET
+ tristate "Networking over Thunderbolt cable"
+ depends on THUNDERBOLT && INET
+ help
+ Select this if you want to create network between two
+ computers over a Thunderbolt cable. The driver supports Apple
+ ThunderboltIP protocol and allows communication with any host
+ supporting the same protocol including Windows and macOS.
+
+ To compile this driver a module, choose M here. The module will be
+ called thunderbolt-net.
+
source "drivers/net/hyperv/Kconfig"
endif # NETDEVICES
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 8dff900085d6..7c8f4dd3a7c5 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -74,3 +74,6 @@ obj-$(CONFIG_HYPERV_NET) += hyperv/
obj-$(CONFIG_NTB_NETDEV) += ntb_netdev.o
obj-$(CONFIG_FUJITSU_ES) += fjes/
+
+thunderbolt-net-y += thunderbolt.o
+obj-$(CONFIG_THUNDERBOLT_NET) += thunderbolt-net.o
diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index 486e1e6997fc..caf04284711a 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -424,9 +424,7 @@ static int cops_open(struct net_device *dev)
*/
if(lp->board==TANGENT) /* Poll 20 times per second */
{
- init_timer(&cops_timer);
- cops_timer.function = cops_poll;
- cops_timer.data = (unsigned long)dev;
+ setup_timer(&cops_timer, cops_poll, (unsigned long)dev);
cops_timer.expires = jiffies + HZ/20;
add_timer(&cops_timer);
}
diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c
index ac755d2950a6..75a5a9b87c5a 100644
--- a/drivers/net/appletalk/ltpc.c
+++ b/drivers/net/appletalk/ltpc.c
@@ -694,6 +694,7 @@ static int do_read(struct net_device *dev, void *cbuf, int cbuflen,
/* end of idle handlers -- what should be seen is do_read, do_write */
static struct timer_list ltpc_timer;
+static struct net_device *ltpc_timer_dev;
static netdev_tx_t ltpc_xmit(struct sk_buff *skb, struct net_device *dev);
@@ -867,10 +868,8 @@ static void set_multicast_list(struct net_device *dev)
static int ltpc_poll_counter;
-static void ltpc_poll(unsigned long l)
+static void ltpc_poll(struct timer_list *unused)
{
- struct net_device *dev = (struct net_device *) l;
-
del_timer(&ltpc_timer);
if(debug & DEBUG_VERBOSE) {
@@ -880,14 +879,10 @@ static void ltpc_poll(unsigned long l)
}
ltpc_poll_counter--;
}
-
- if (!dev)
- return; /* we've been downed */
/* poll 20 times per second */
- idle(dev);
+ idle(ltpc_timer_dev);
ltpc_timer.expires = jiffies + HZ/20;
-
add_timer(&ltpc_timer);
}
@@ -1165,9 +1160,8 @@ struct net_device * __init ltpc_probe(void)
dev->irq = 0;
/* polled mode -- 20 times per second */
/* this is really, really slow... should it poll more often? */
- init_timer(&ltpc_timer);
- ltpc_timer.function=ltpc_poll;
- ltpc_timer.data = (unsigned long) dev;
+ ltpc_timer_dev = dev;
+ timer_setup(&ltpc_timer, ltpc_poll, 0);
ltpc_timer.expires = jiffies + HZ/20;
add_timer(&ltpc_timer);
@@ -1254,8 +1248,6 @@ static void __exit ltpc_cleanup(void)
if(debug & DEBUG_VERBOSE) printk("unregister_netdev\n");
unregister_netdev(dev_ltpc);
- ltpc_timer.data = 0; /* signal the poll routine that we're done */
-
del_timer_sync(&ltpc_timer);
if(debug & DEBUG_VERBOSE) printk("freeing irq\n");
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index fcfccbb3d9a2..13236b2cdf13 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -450,9 +450,7 @@ struct net_device *alloc_arcdev(const char *name)
lp->dev = dev;
spin_lock_init(&lp->lock);
- init_timer(&lp->timer);
- lp->timer.data = (unsigned long) dev;
- lp->timer.function = arcnet_timer;
+ setup_timer(&lp->timer, arcnet_timer, (unsigned long)dev);
}
return dev;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c99dc59d729b..172eeeb68152 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1217,14 +1217,15 @@ static enum netdev_lag_tx_type bond_lag_tx_type(struct bonding *bond)
}
}
-static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave)
+static int bond_master_upper_dev_link(struct bonding *bond, struct slave *slave,
+ struct netlink_ext_ack *extack)
{
struct netdev_lag_upper_info lag_upper_info;
int err;
lag_upper_info.tx_type = bond_lag_tx_type(bond);
err = netdev_master_upper_dev_link(slave->dev, bond->dev, slave,
- &lag_upper_info);
+ &lag_upper_info, extack);
if (err)
return err;
rtmsg_ifinfo(RTM_NEWLINK, slave->dev, IFF_SLAVE, GFP_KERNEL);
@@ -1328,7 +1329,8 @@ void bond_lower_state_changed(struct slave *slave)
}
/* enslave device <slave> to bond device <master> */
-int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
+int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
+ struct netlink_ext_ack *extack)
{
struct bonding *bond = netdev_priv(bond_dev);
const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
@@ -1346,12 +1348,14 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
/* already in-use? */
if (netdev_is_rx_handler_busy(slave_dev)) {
+ NL_SET_ERR_MSG(extack, "Device is in use and cannot be enslaved");
netdev_err(bond_dev,
"Error: Device is in use and cannot be enslaved\n");
return -EBUSY;
}
if (bond_dev == slave_dev) {
+ NL_SET_ERR_MSG(extack, "Cannot enslave bond to itself.");
netdev_err(bond_dev, "cannot enslave bond to itself.\n");
return -EPERM;
}
@@ -1362,6 +1366,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
netdev_dbg(bond_dev, "%s is NETIF_F_VLAN_CHALLENGED\n",
slave_dev->name);
if (vlan_uses_dev(bond_dev)) {
+ NL_SET_ERR_MSG(extack, "Can not enslave VLAN challenged device to VLAN enabled bond");
netdev_err(bond_dev, "Error: cannot enslave VLAN challenged slave %s on VLAN enabled bond %s\n",
slave_dev->name, bond_dev->name);
return -EPERM;
@@ -1381,6 +1386,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
* enslaving it; the old ifenslave will not.
*/
if (slave_dev->flags & IFF_UP) {
+ NL_SET_ERR_MSG(extack, "Device can not be enslaved while up");
netdev_err(bond_dev, "%s is up - this may be due to an out of date ifenslave\n",
slave_dev->name);
return -EPERM;
@@ -1421,6 +1427,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
bond_dev);
}
} else if (bond_dev->type != slave_dev->type) {
+ NL_SET_ERR_MSG(extack, "Device type is different from other slaves");
netdev_err(bond_dev, "%s ether type (%d) is different from other slaves (%d), can not enslave it\n",
slave_dev->name, slave_dev->type, bond_dev->type);
return -EINVAL;
@@ -1428,6 +1435,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
if (slave_dev->type == ARPHRD_INFINIBAND &&
BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
+ NL_SET_ERR_MSG(extack, "Only active-backup mode is supported for infiniband slaves");
netdev_warn(bond_dev, "Type (%d) supports only active-backup mode\n",
slave_dev->type);
res = -EOPNOTSUPP;
@@ -1443,6 +1451,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
bond->params.fail_over_mac = BOND_FOM_ACTIVE;
netdev_warn(bond_dev, "Setting fail_over_mac to active for active-backup mode\n");
} else {
+ NL_SET_ERR_MSG(extack, "Slave device does not support setting the MAC address, but fail_over_mac is not set to active");
netdev_err(bond_dev, "The slave device specified does not support setting the MAC address, but fail_over_mac is not set to active\n");
res = -EOPNOTSUPP;
goto err_undo_flags;
@@ -1709,7 +1718,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
goto err_detach;
}
- res = bond_master_upper_dev_link(bond, new_slave);
+ res = bond_master_upper_dev_link(bond, new_slave, extack);
if (res) {
netdev_dbg(bond_dev, "Error %d calling bond_master_upper_dev_link\n", res);
goto err_unregister;
@@ -2491,7 +2500,8 @@ int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
struct slave *curr_active_slave, *curr_arp_slave;
unsigned char *arp_ptr;
__be32 sip, tip;
- int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
+ int is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
+ unsigned int alen;
if (!slave_do_arp_validate(bond, slave)) {
if ((slave_do_arp_validate_only(bond) && is_arp) ||
@@ -3072,7 +3082,16 @@ static int bond_slave_netdev_event(unsigned long event,
break;
case NETDEV_UP:
case NETDEV_CHANGE:
- bond_update_speed_duplex(slave);
+ /* For 802.3ad mode only:
+ * Getting invalid Speed/Duplex values here will put slave
+ * in weird state. So mark it as link-down for the time
+ * being and let link-monitoring (miimon) set it right when
+ * correct speeds/duplex are available.
+ */
+ if (bond_update_speed_duplex(slave) &&
+ BOND_MODE(bond) == BOND_MODE_8023AD)
+ slave->link = BOND_LINK_DOWN;
+
if (BOND_MODE(bond) == BOND_MODE_8023AD)
bond_3ad_adapter_speed_duplex_changed(slave);
/* Fallthrough */
@@ -3482,7 +3501,7 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
switch (cmd) {
case BOND_ENSLAVE_OLD:
case SIOCBONDENSLAVE:
- res = bond_enslave(bond_dev, slave_dev);
+ res = bond_enslave(bond_dev, slave_dev, NULL);
break;
case BOND_RELEASE_OLD:
case SIOCBONDRELEASE:
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 5931aa2fe997..8a9b085c2a98 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1383,7 +1383,7 @@ static int bond_option_slaves_set(struct bonding *bond,
switch (command[0]) {
case '+':
netdev_dbg(bond->dev, "Adding slave %s\n", dev->name);
- ret = bond_enslave(bond->dev, dev);
+ ret = bond_enslave(bond->dev, dev, NULL);
break;
case '-':
diff --git a/drivers/net/caif/caif_hsi.c b/drivers/net/caif/caif_hsi.c
index 438966bf51c2..fed75e75207a 100644
--- a/drivers/net/caif/caif_hsi.c
+++ b/drivers/net/caif/caif_hsi.c
@@ -1211,17 +1211,14 @@ static int cfhsi_open(struct net_device *ndev)
init_waitqueue_head(&cfhsi->flush_fifo_wait);
/* Setup the inactivity timer. */
- init_timer(&cfhsi->inactivity_timer);
- cfhsi->inactivity_timer.data = (unsigned long)cfhsi;
- cfhsi->inactivity_timer.function = cfhsi_inactivity_tout;
+ setup_timer(&cfhsi->inactivity_timer, cfhsi_inactivity_tout,
+ (unsigned long)cfhsi);
/* Setup the slowpath RX timer. */
- init_timer(&cfhsi->rx_slowpath_timer);
- cfhsi->rx_slowpath_timer.data = (unsigned long)cfhsi;
- cfhsi->rx_slowpath_timer.function = cfhsi_rx_slowpath;
+ setup_timer(&cfhsi->rx_slowpath_timer, cfhsi_rx_slowpath,
+ (unsigned long)cfhsi);
/* Setup the aggregation timer. */
- init_timer(&cfhsi->aggregation_timer);
- cfhsi->aggregation_timer.data = (unsigned long)cfhsi;
- cfhsi->aggregation_timer.function = cfhsi_aggregation_tout;
+ setup_timer(&cfhsi->aggregation_timer, cfhsi_aggregation_tout,
+ (unsigned long)cfhsi);
/* Activate HSI interface. */
res = cfhsi->ops->cfhsi_up(cfhsi->ops);
diff --git a/drivers/net/can/grcan.c b/drivers/net/can/grcan.c
index a7be12d9a139..8570cfdaea75 100644
--- a/drivers/net/can/grcan.c
+++ b/drivers/net/can/grcan.c
@@ -1626,13 +1626,11 @@ static int grcan_setup_netdev(struct platform_device *ofdev,
spin_lock_init(&priv->lock);
if (priv->need_txbug_workaround) {
- init_timer(&priv->rr_timer);
- priv->rr_timer.function = grcan_running_reset;
- priv->rr_timer.data = (unsigned long)dev;
+ setup_timer(&priv->rr_timer, grcan_running_reset,
+ (unsigned long)dev);
- init_timer(&priv->hang_timer);
- priv->hang_timer.function = grcan_initiate_running_reset;
- priv->hang_timer.data = (unsigned long)dev;
+ setup_timer(&priv->hang_timer, grcan_initiate_running_reset,
+ (unsigned long)dev);
}
netif_napi_add(dev, &priv->napi, grcan_poll, GRCAN_NAPI_WEIGHT);
diff --git a/drivers/net/can/sja1000/peak_pcmcia.c b/drivers/net/can/sja1000/peak_pcmcia.c
index dd56133cc461..4b8758e10bd4 100644
--- a/drivers/net/can/sja1000/peak_pcmcia.c
+++ b/drivers/net/can/sja1000/peak_pcmcia.c
@@ -692,9 +692,7 @@ static int pcan_probe(struct pcmcia_device *pdev)
}
/* init the timer which controls the leds */
- init_timer(&card->led_timer);
- card->led_timer.function = pcan_led_timer;
- card->led_timer.data = (unsigned long)card;
+ setup_timer(&card->led_timer, pcan_led_timer, (unsigned long)card);
/* request the given irq */
err = request_irq(pdev->irq, &pcan_isr, IRQF_SHARED, PCC_NAME, card);
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c
index 838545ce468d..7e10dbdded28 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb.c
@@ -798,9 +798,8 @@ static int pcan_usb_init(struct peak_usb_device *dev)
int err;
/* initialize a timer needed to wait for hardware restart */
- init_timer(&pdev->restart_timer);
- pdev->restart_timer.function = pcan_usb_restart;
- pdev->restart_timer.data = (unsigned long)dev;
+ setup_timer(&pdev->restart_timer, pcan_usb_restart,
+ (unsigned long)dev);
/*
* explicit use of dev_xxx() instead of netdev_xxx() here:
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 274f3679f33d..b48cf0487b43 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -484,7 +484,7 @@ static int b53_fast_age_vlan(struct b53_device *dev, u16 vid)
return b53_flush_arl(dev, FAST_AGE_VLAN);
}
-static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port)
+void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port)
{
struct b53_device *dev = ds->priv;
unsigned int i;
@@ -500,9 +500,9 @@ static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port)
b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(i), pvlan);
}
}
+EXPORT_SYMBOL(b53_imp_vlan_setup);
-static int b53_enable_port(struct dsa_switch *ds, int port,
- struct phy_device *phy)
+int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
{
struct b53_device *dev = ds->priv;
unsigned int cpu_port = dev->cpu_port;
@@ -523,11 +523,15 @@ static int b53_enable_port(struct dsa_switch *ds, int port,
b53_imp_vlan_setup(ds, cpu_port);
+ /* If EEE was enabled, restore it */
+ if (dev->ports[port].eee.eee_enabled)
+ b53_eee_enable_set(ds, port, true);
+
return 0;
}
+EXPORT_SYMBOL(b53_enable_port);
-static void b53_disable_port(struct dsa_switch *ds, int port,
- struct phy_device *phy)
+void b53_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
{
struct b53_device *dev = ds->priv;
u8 reg;
@@ -537,20 +541,67 @@ static void b53_disable_port(struct dsa_switch *ds, int port,
reg |= PORT_CTRL_RX_DISABLE | PORT_CTRL_TX_DISABLE;
b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg);
}
+EXPORT_SYMBOL(b53_disable_port);
-static void b53_enable_cpu_port(struct b53_device *dev)
+void b53_brcm_hdr_setup(struct dsa_switch *ds, int port)
+{
+ struct b53_device *dev = ds->priv;
+ u8 hdr_ctl, val;
+ u16 reg;
+
+ /* Resolve which bit controls the Broadcom tag */
+ switch (port) {
+ case 8:
+ val = BRCM_HDR_P8_EN;
+ break;
+ case 7:
+ val = BRCM_HDR_P7_EN;
+ break;
+ case 5:
+ val = BRCM_HDR_P5_EN;
+ break;
+ default:
+ val = 0;
+ break;
+ }
+
+ /* Enable Broadcom tags for IMP port */
+ b53_read8(dev, B53_MGMT_PAGE, B53_BRCM_HDR, &hdr_ctl);
+ hdr_ctl |= val;
+ b53_write8(dev, B53_MGMT_PAGE, B53_BRCM_HDR, hdr_ctl);
+
+ /* Registers below are only accessible on newer devices */
+ if (!is58xx(dev))
+ return;
+
+ /* Enable reception Broadcom tag for CPU TX (switch RX) to
+ * allow us to tag outgoing frames
+ */
+ b53_read16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_RX_DIS, &reg);
+ reg &= ~BIT(port);
+ b53_write16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_RX_DIS, reg);
+
+ /* Enable transmission of Broadcom tags from the switch (CPU RX) to
+ * allow delivering frames to the per-port net_devices
+ */
+ b53_read16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_TX_DIS, &reg);
+ reg &= ~BIT(port);
+ b53_write16(dev, B53_MGMT_PAGE, B53_BRCM_HDR_TX_DIS, reg);
+}
+EXPORT_SYMBOL(b53_brcm_hdr_setup);
+
+static void b53_enable_cpu_port(struct b53_device *dev, int port)
{
- unsigned int cpu_port = dev->cpu_port;
u8 port_ctrl;
/* BCM5325 CPU port is at 8 */
- if ((is5325(dev) || is5365(dev)) && cpu_port == B53_CPU_PORT_25)
- cpu_port = B53_CPU_PORT;
+ if ((is5325(dev) || is5365(dev)) && port == B53_CPU_PORT_25)
+ port = B53_CPU_PORT;
port_ctrl = PORT_CTRL_RX_BCST_EN |
PORT_CTRL_RX_MCST_EN |
PORT_CTRL_RX_UCST_EN;
- b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(cpu_port), port_ctrl);
+ b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), port_ctrl);
}
static void b53_enable_mib(struct b53_device *dev)
@@ -816,12 +867,13 @@ static int b53_setup(struct dsa_switch *ds)
if (ret)
dev_err(ds->dev, "failed to apply configuration\n");
+ /* Configure IMP/CPU port, disable unused ports. Enabled
+ * ports will be configured with .port_enable
+ */
for (port = 0; port < dev->num_ports; port++) {
- if (BIT(port) & ds->enabled_port_mask)
- b53_enable_port(ds, port, NULL);
- else if (dsa_is_cpu_port(ds, port))
- b53_enable_cpu_port(dev);
- else
+ if (dsa_is_cpu_port(ds, port))
+ b53_enable_cpu_port(dev, port);
+ else if (!(BIT(port) & ds->enabled_port_mask))
b53_disable_port(ds, port, NULL);
}
@@ -832,6 +884,7 @@ static void b53_adjust_link(struct dsa_switch *ds, int port,
struct phy_device *phydev)
{
struct b53_device *dev = ds->priv;
+ struct ethtool_eee *p = &dev->ports[port].eee;
u8 rgmii_ctrl = 0, reg = 0, off;
if (!phy_is_pseudo_fixed_link(phydev))
@@ -953,6 +1006,9 @@ static void b53_adjust_link(struct dsa_switch *ds, int port,
b53_write8(dev, B53_CTRL_PAGE, po_reg, gmii_po);
}
}
+
+ /* Re-negotiate EEE if it was enabled already */
+ p->eee_enabled = b53_eee_init(ds, port, phydev);
}
int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering)
@@ -1280,7 +1336,7 @@ EXPORT_SYMBOL(b53_fdb_dump);
int b53_br_join(struct dsa_switch *ds, int port, struct net_device *br)
{
struct b53_device *dev = ds->priv;
- s8 cpu_port = ds->dst->cpu_dp->index;
+ s8 cpu_port = ds->ports[port].cpu_dp->index;
u16 pvlan, reg;
unsigned int i;
@@ -1298,7 +1354,7 @@ int b53_br_join(struct dsa_switch *ds, int port, struct net_device *br)
b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), &pvlan);
b53_for_each_port(dev, i) {
- if (ds->ports[i].bridge_dev != br)
+ if (dsa_to_port(ds, i)->bridge_dev != br)
continue;
/* Add this local port to the remote port VLAN control
@@ -1326,7 +1382,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *br)
{
struct b53_device *dev = ds->priv;
struct b53_vlan *vl = &dev->vlans[0];
- s8 cpu_port = ds->dst->cpu_dp->index;
+ s8 cpu_port = ds->ports[port].cpu_dp->index;
unsigned int i;
u16 pvlan, reg, pvid;
@@ -1334,7 +1390,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *br)
b53_for_each_port(dev, i) {
/* Don't touch the remaining ports */
- if (ds->ports[i].bridge_dev != br)
+ if (dsa_to_port(ds, i)->bridge_dev != br)
continue;
b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(i), &reg);
@@ -1484,6 +1540,69 @@ void b53_mirror_del(struct dsa_switch *ds, int port,
}
EXPORT_SYMBOL(b53_mirror_del);
+void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
+{
+ struct b53_device *dev = ds->priv;
+ u16 reg;
+
+ b53_read16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, &reg);
+ if (enable)
+ reg |= BIT(port);
+ else
+ reg &= ~BIT(port);
+ b53_write16(dev, B53_EEE_PAGE, B53_EEE_EN_CTRL, reg);
+}
+EXPORT_SYMBOL(b53_eee_enable_set);
+
+
+/* Returns 0 if EEE was not enabled, or 1 otherwise
+ */
+int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy)
+{
+ int ret;
+
+ ret = phy_init_eee(phy, 0);
+ if (ret)
+ return 0;
+
+ b53_eee_enable_set(ds, port, true);
+
+ return 1;
+}
+EXPORT_SYMBOL(b53_eee_init);
+
+int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e)
+{
+ struct b53_device *dev = ds->priv;
+ struct ethtool_eee *p = &dev->ports[port].eee;
+ u16 reg;
+
+ if (is5325(dev) || is5365(dev))
+ return -EOPNOTSUPP;
+
+ b53_read16(dev, B53_EEE_PAGE, B53_EEE_LPI_INDICATE, &reg);
+ e->eee_enabled = p->eee_enabled;
+ e->eee_active = !!(reg & BIT(port));
+
+ return 0;
+}
+EXPORT_SYMBOL(b53_get_mac_eee);
+
+int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e)
+{
+ struct b53_device *dev = ds->priv;
+ struct ethtool_eee *p = &dev->ports[port].eee;
+
+ if (is5325(dev) || is5365(dev))
+ return -EOPNOTSUPP;
+
+ p->eee_enabled = e->eee_enabled;
+ b53_eee_enable_set(ds, port, e->eee_enabled);
+
+ return 0;
+}
+EXPORT_SYMBOL(b53_set_mac_eee);
+
static const struct dsa_switch_ops b53_switch_ops = {
.get_tag_protocol = b53_get_tag_protocol,
.setup = b53_setup,
@@ -1495,6 +1614,8 @@ static const struct dsa_switch_ops b53_switch_ops = {
.adjust_link = b53_adjust_link,
.port_enable = b53_enable_port,
.port_disable = b53_disable_port,
+ .get_mac_eee = b53_get_mac_eee,
+ .set_mac_eee = b53_set_mac_eee,
.port_bridge_join = b53_br_join,
.port_bridge_leave = b53_br_leave,
.port_stp_state_set = b53_br_set_stp_state,
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 01bd8cbe9a3f..603c66d240d8 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -70,6 +70,7 @@ enum {
struct b53_port {
u16 vlan_ctl_mask;
+ struct ethtool_eee eee;
};
struct b53_vlan {
@@ -186,11 +187,6 @@ static inline int is58xx(struct b53_device *dev)
#define B53_CPU_PORT_25 5
#define B53_CPU_PORT 8
-static inline int is_cpu_port(struct b53_device *dev, int port)
-{
- return dev->cpu_port;
-}
-
struct b53_device *b53_switch_alloc(struct device *base,
const struct b53_io_ops *ops,
void *priv);
@@ -204,119 +200,30 @@ static inline void b53_switch_remove(struct b53_device *dev)
dsa_unregister_switch(dev->ds);
}
-static inline int b53_read8(struct b53_device *dev, u8 page, u8 reg, u8 *val)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->read8(dev, page, reg, val);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
+#define b53_build_op(type_op_size, val_type) \
+static inline int b53_##type_op_size(struct b53_device *dev, u8 page, \
+ u8 reg, val_type val) \
+{ \
+ int ret; \
+ \
+ mutex_lock(&dev->reg_mutex); \
+ ret = dev->ops->type_op_size(dev, page, reg, val); \
+ mutex_unlock(&dev->reg_mutex); \
+ \
+ return ret; \
}
-static inline int b53_read16(struct b53_device *dev, u8 page, u8 reg, u16 *val)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->read16(dev, page, reg, val);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int b53_read32(struct b53_device *dev, u8 page, u8 reg, u32 *val)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->read32(dev, page, reg, val);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int b53_read48(struct b53_device *dev, u8 page, u8 reg, u64 *val)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->read48(dev, page, reg, val);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int b53_read64(struct b53_device *dev, u8 page, u8 reg, u64 *val)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->read64(dev, page, reg, val);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
+b53_build_op(read8, u8 *);
+b53_build_op(read16, u16 *);
+b53_build_op(read32, u32 *);
+b53_build_op(read48, u64 *);
+b53_build_op(read64, u64 *);
-static inline int b53_write8(struct b53_device *dev, u8 page, u8 reg, u8 value)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->write8(dev, page, reg, value);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int b53_write16(struct b53_device *dev, u8 page, u8 reg,
- u16 value)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->write16(dev, page, reg, value);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int b53_write32(struct b53_device *dev, u8 page, u8 reg,
- u32 value)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->write32(dev, page, reg, value);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int b53_write48(struct b53_device *dev, u8 page, u8 reg,
- u64 value)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->write48(dev, page, reg, value);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
-
-static inline int b53_write64(struct b53_device *dev, u8 page, u8 reg,
- u64 value)
-{
- int ret;
-
- mutex_lock(&dev->reg_mutex);
- ret = dev->ops->write64(dev, page, reg, value);
- mutex_unlock(&dev->reg_mutex);
-
- return ret;
-}
+b53_build_op(write8, u8);
+b53_build_op(write16, u16);
+b53_build_op(write32, u32);
+b53_build_op(write48, u64);
+b53_build_op(write64, u64);
struct b53_arl_entry {
u8 port;
@@ -377,6 +284,7 @@ static inline int b53_switch_get_reset_gpio(struct b53_device *dev)
#endif
/* Exported functions towards other drivers */
+void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port);
void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data);
void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data);
int b53_get_sset_count(struct dsa_switch *ds);
@@ -403,5 +311,12 @@ int b53_mirror_add(struct dsa_switch *ds, int port,
struct dsa_mall_mirror_tc_entry *mirror, bool ingress);
void b53_mirror_del(struct dsa_switch *ds, int port,
struct dsa_mall_mirror_tc_entry *mirror);
+int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
+void b53_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
+void b53_brcm_hdr_setup(struct dsa_switch *ds, int port);
+void b53_eee_enable_set(struct dsa_switch *ds, int port, bool enable);
+int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy);
+int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e);
+int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e);
#endif
diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h
index e5c86d44667a..2a9f421680aa 100644
--- a/drivers/net/dsa/b53/b53_regs.h
+++ b/drivers/net/dsa/b53/b53_regs.h
@@ -50,6 +50,9 @@
/* Jumbo Frame Registers */
#define B53_JUMBO_PAGE 0x40
+/* EEE Control Registers Page */
+#define B53_EEE_PAGE 0x92
+
/* CFP Configuration Registers Page */
#define B53_CFP_PAGE 0xa1
@@ -210,6 +213,7 @@
#define B53_BRCM_HDR 0x03
#define BRCM_HDR_P8_EN BIT(0) /* Enable tagging on port 8 */
#define BRCM_HDR_P5_EN BIT(1) /* Enable tagging on port 5 */
+#define BRCM_HDR_P7_EN BIT(2) /* Enable tagging on port 7 */
/* Mirror capture control register (16 bit) */
#define B53_MIR_CAP_CTL 0x10
@@ -249,6 +253,12 @@
/* Revision ID register (8 bit) */
#define B53_REV_ID 0x40
+/* Broadcom header RX control (16 bit) */
+#define B53_BRCM_HDR_RX_DIS 0x60
+
+/* Broadcom header TX control (16 bit) */
+#define B53_BRCM_HDR_TX_DIS 0x62
+
/*************************************************************************
* ARL Access Page Registers
*************************************************************************/
@@ -465,6 +475,44 @@
#define JMS_MAX_SIZE 9724
/*************************************************************************
+ * EEE Configuration Page Registers
+ *************************************************************************/
+
+/* EEE Enable control register (16 bit) */
+#define B53_EEE_EN_CTRL 0x00
+
+/* EEE LPI assert status register (16 bit) */
+#define B53_EEE_LPI_ASSERT_STS 0x02
+
+/* EEE LPI indicate status register (16 bit) */
+#define B53_EEE_LPI_INDICATE 0x4
+
+/* EEE Receiving idle symbols status register (16 bit) */
+#define B53_EEE_RX_IDLE_SYM_STS 0x6
+
+/* EEE Pipeline timer register (32 bit) */
+#define B53_EEE_PIP_TIMER 0xC
+
+/* EEE Sleep timer Gig register (32 bit) */
+#define B53_EEE_SLEEP_TIMER_GIG(i) (0x10 + 4 * (i))
+
+/* EEE Sleep timer FE register (32 bit) */
+#define B53_EEE_SLEEP_TIMER_FE(i) (0x34 + 4 * (i))
+
+/* EEE Minimum LP timer Gig register (32 bit) */
+#define B53_EEE_MIN_LP_TIMER_GIG(i) (0x58 + 4 * (i))
+
+/* EEE Minimum LP timer FE register (32 bit) */
+#define B53_EEE_MIN_LP_TIMER_FE(i) (0x7c + 4 * (i))
+
+/* EEE Wake timer Gig register (16 bit) */
+#define B53_EEE_WAKE_TIMER_GIG(i) (0xa0 + 2 * (i))
+
+/* EEE Wake timer FE register (16 bit) */
+#define B53_EEE_WAKE_TIMER_FE(i) (0xb2 + 2 * (i))
+
+
+/*************************************************************************
* CFP Configuration Page Registers
*************************************************************************/
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index d7b53d53c116..b43c063b9634 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -40,66 +40,6 @@ static enum dsa_tag_protocol bcm_sf2_sw_get_tag_protocol(struct dsa_switch *ds)
return DSA_TAG_PROTO_BRCM;
}
-static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port)
-{
- struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
- unsigned int i;
- u32 reg;
-
- /* Enable the IMP Port to be in the same VLAN as the other ports
- * on a per-port basis such that we only have Port i and IMP in
- * the same VLAN.
- */
- for (i = 0; i < priv->hw_params.num_ports; i++) {
- if (!((1 << i) & ds->enabled_port_mask))
- continue;
-
- reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i));
- reg |= (1 << cpu_port);
- core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i));
- }
-}
-
-static void bcm_sf2_brcm_hdr_setup(struct bcm_sf2_priv *priv, int port)
-{
- u32 reg, val;
-
- /* Resolve which bit controls the Broadcom tag */
- switch (port) {
- case 8:
- val = BRCM_HDR_EN_P8;
- break;
- case 7:
- val = BRCM_HDR_EN_P7;
- break;
- case 5:
- val = BRCM_HDR_EN_P5;
- break;
- default:
- val = 0;
- break;
- }
-
- /* Enable Broadcom tags for IMP port */
- reg = core_readl(priv, CORE_BRCM_HDR_CTRL);
- reg |= val;
- core_writel(priv, reg, CORE_BRCM_HDR_CTRL);
-
- /* Enable reception Broadcom tag for CPU TX (switch RX) to
- * allow us to tag outgoing frames
- */
- reg = core_readl(priv, CORE_BRCM_HDR_RX_DIS);
- reg &= ~(1 << port);
- core_writel(priv, reg, CORE_BRCM_HDR_RX_DIS);
-
- /* Enable transmission of Broadcom tags from the switch (CPU RX) to
- * allow delivering frames to the per-port net_devices
- */
- reg = core_readl(priv, CORE_BRCM_HDR_TX_DIS);
- reg &= ~(1 << port);
- core_writel(priv, reg, CORE_BRCM_HDR_TX_DIS);
-}
-
static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
{
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
@@ -138,7 +78,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
reg |= i << (PRT_TO_QID_SHIFT * i);
core_writel(priv, reg, CORE_PORT_TC2_QOS_MAP_PORT(port));
- bcm_sf2_brcm_hdr_setup(priv, port);
+ b53_brcm_hdr_setup(ds, port);
/* Force link status for IMP port */
reg = core_readl(priv, offset);
@@ -146,19 +86,6 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
core_writel(priv, reg, offset);
}
-static void bcm_sf2_eee_enable_set(struct dsa_switch *ds, int port, bool enable)
-{
- struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
- u32 reg;
-
- reg = core_readl(priv, CORE_EEE_EN_CTRL);
- if (enable)
- reg |= 1 << port;
- else
- reg &= ~(1 << port);
- core_writel(priv, reg, CORE_EEE_EN_CTRL);
-}
-
static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable)
{
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
@@ -236,7 +163,6 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
struct phy_device *phy)
{
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
- s8 cpu_port = ds->dst->cpu_dp->index;
unsigned int i;
u32 reg;
@@ -247,7 +173,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
/* Enable Broadcom tags for that port if requested */
if (priv->brcm_tag_mask & BIT(port))
- bcm_sf2_brcm_hdr_setup(priv, port);
+ b53_brcm_hdr_setup(ds, port);
/* Configure Traffic Class to QoS mapping, allow each priority to map
* to a different queue number
@@ -257,9 +183,6 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
reg |= i << (PRT_TO_QID_SHIFT * i);
core_writel(priv, reg, CORE_PORT_TC2_QOS_MAP_PORT(port));
- /* Clear the Rx and Tx disable bits and set to no spanning tree */
- core_writel(priv, 0, CORE_G_PCTL_PORT(port));
-
/* Re-enable the GPHY and re-apply workarounds */
if (priv->int_phy_mask & 1 << port && priv->hw_params.num_gphy == 1) {
bcm_sf2_gphy_enable_set(ds, true);
@@ -282,23 +205,20 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
if (port == priv->moca_port)
bcm_sf2_port_intr_enable(priv, port);
- /* Set this port, and only this one to be in the default VLAN,
- * if member of a bridge, restore its membership prior to
- * bringing down this port.
- */
- reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port));
- reg &= ~PORT_VLAN_CTRL_MASK;
- reg |= (1 << port);
- reg |= priv->dev->ports[port].vlan_ctl_mask;
- core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(port));
-
- bcm_sf2_imp_vlan_setup(ds, cpu_port);
-
- /* If EEE was enabled, restore it */
- if (priv->port_sts[port].eee.eee_enabled)
- bcm_sf2_eee_enable_set(ds, port, true);
+ /* Set per-queue pause threshold to 32 */
+ core_writel(priv, 32, CORE_TXQ_THD_PAUSE_QN_PORT(port));
+
+ /* Set ACB threshold to 24 */
+ for (i = 0; i < SF2_NUM_EGRESS_QUEUES; i++) {
+ reg = acb_readl(priv, ACB_QUEUE_CFG(port *
+ SF2_NUM_EGRESS_QUEUES + i));
+ reg &= ~XOFF_THRESHOLD_MASK;
+ reg |= 24;
+ acb_writel(priv, reg, ACB_QUEUE_CFG(port *
+ SF2_NUM_EGRESS_QUEUES + i));
+ }
- return 0;
+ return b53_enable_port(ds, port, phy);
}
static void bcm_sf2_port_disable(struct dsa_switch *ds, int port,
@@ -321,9 +241,7 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port,
else
off = CORE_G_PCTL_PORT(port);
- reg = core_readl(priv, off);
- reg |= RX_DIS | TX_DIS;
- core_writel(priv, reg, off);
+ b53_disable_port(ds, port, phy);
/* Power down the port memory */
reg = core_readl(priv, CORE_MEM_PSM_VDD_CTRL);
@@ -331,47 +249,6 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port,
core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL);
}
-/* Returns 0 if EEE was not enabled, or 1 otherwise
- */
-static int bcm_sf2_eee_init(struct dsa_switch *ds, int port,
- struct phy_device *phy)
-{
- int ret;
-
- ret = phy_init_eee(phy, 0);
- if (ret)
- return 0;
-
- bcm_sf2_eee_enable_set(ds, port, true);
-
- return 1;
-}
-
-static int bcm_sf2_sw_get_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_eee *e)
-{
- struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
- struct ethtool_eee *p = &priv->port_sts[port].eee;
- u32 reg;
-
- reg = core_readl(priv, CORE_EEE_LPI_INDICATE);
- e->eee_enabled = p->eee_enabled;
- e->eee_active = !!(reg & (1 << port));
-
- return 0;
-}
-
-static int bcm_sf2_sw_set_mac_eee(struct dsa_switch *ds, int port,
- struct ethtool_eee *e)
-{
- struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
- struct ethtool_eee *p = &priv->port_sts[port].eee;
-
- p->eee_enabled = e->eee_enabled;
- bcm_sf2_eee_enable_set(ds, port, e->eee_enabled);
-
- return 0;
-}
static int bcm_sf2_sw_indir_rw(struct bcm_sf2_priv *priv, int op, int addr,
int regnum, u16 val)
@@ -606,7 +483,7 @@ static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port,
struct phy_device *phydev)
{
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
- struct ethtool_eee *p = &priv->port_sts[port].eee;
+ struct ethtool_eee *p = &priv->dev->ports[port].eee;
u32 id_mode_dis = 0, port_mode;
const char *str = NULL;
u32 reg, offset;
@@ -688,7 +565,7 @@ force_link:
core_writel(priv, reg, offset);
if (!phydev->is_pseudo_fixed_link)
- p->eee_enabled = bcm_sf2_eee_init(ds, port, phydev);
+ p->eee_enabled = b53_eee_init(ds, port, phydev);
}
static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port,
@@ -724,7 +601,7 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port,
* state machine and make it go in PHY_FORCING state instead.
*/
if (!status->link)
- netif_carrier_off(ds->ports[port].netdev);
+ netif_carrier_off(ds->ports[port].slave);
status->duplex = 1;
} else {
status->link = 1;
@@ -749,6 +626,20 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port,
status->pause = 1;
}
+static void bcm_sf2_enable_acb(struct dsa_switch *ds)
+{
+ struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+ u32 reg;
+
+ /* Enable ACB globally */
+ reg = acb_readl(priv, ACB_CONTROL);
+ reg |= (ACB_FLUSH_MASK << ACB_FLUSH_SHIFT);
+ acb_writel(priv, reg, ACB_CONTROL);
+ reg &= ~(ACB_FLUSH_MASK << ACB_FLUSH_SHIFT);
+ reg |= ACB_EN | ACB_ALGORITHM;
+ acb_writel(priv, reg, ACB_CONTROL);
+}
+
static int bcm_sf2_sw_suspend(struct dsa_switch *ds)
{
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
@@ -791,13 +682,15 @@ static int bcm_sf2_sw_resume(struct dsa_switch *ds)
bcm_sf2_imp_setup(ds, port);
}
+ bcm_sf2_enable_acb(ds);
+
return 0;
}
static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port,
struct ethtool_wolinfo *wol)
{
- struct net_device *p = ds->dst->cpu_dp->netdev;
+ struct net_device *p = ds->ports[port].cpu_dp->master;
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
struct ethtool_wolinfo pwol;
@@ -820,9 +713,9 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port,
static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port,
struct ethtool_wolinfo *wol)
{
- struct net_device *p = ds->dst->cpu_dp->netdev;
+ struct net_device *p = ds->ports[port].cpu_dp->master;
struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
- s8 cpu_port = ds->dst->cpu_dp->index;
+ s8 cpu_port = ds->ports[port].cpu_dp->index;
struct ethtool_wolinfo pwol;
p->ethtool_ops->get_wol(p, &pwol);
@@ -902,6 +795,7 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds)
}
bcm_sf2_sw_configure_vlan(ds);
+ bcm_sf2_enable_acb(ds);
return 0;
}
@@ -1020,8 +914,8 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
.set_wol = bcm_sf2_sw_set_wol,
.port_enable = bcm_sf2_port_setup,
.port_disable = bcm_sf2_port_disable,
- .get_mac_eee = bcm_sf2_sw_get_mac_eee,
- .set_mac_eee = bcm_sf2_sw_set_mac_eee,
+ .get_mac_eee = b53_get_mac_eee,
+ .set_mac_eee = b53_set_mac_eee,
.port_bridge_join = b53_br_join,
.port_bridge_leave = b53_br_leave,
.port_stp_state_set = b53_br_set_stp_state,
diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h
index 02c499f9c56b..1922e027ff59 100644
--- a/drivers/net/dsa/bcm_sf2.h
+++ b/drivers/net/dsa/bcm_sf2.h
@@ -48,8 +48,6 @@ struct bcm_sf2_hw_params {
struct bcm_sf2_port_status {
unsigned int link;
-
- struct ethtool_eee eee;
};
struct bcm_sf2_cfp_priv {
diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c
index 8a1da7e67707..94649e1481ec 100644
--- a/drivers/net/dsa/bcm_sf2_cfp.c
+++ b/drivers/net/dsa/bcm_sf2_cfp.c
@@ -144,7 +144,7 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port,
* destination port is enabled and that we are within the
* number of ports supported by the switch
*/
- port_num = fs->ring_cookie / 8;
+ port_num = fs->ring_cookie / SF2_NUM_EGRESS_QUEUES;
if (fs->ring_cookie == RX_CLS_FLOW_DISC ||
!(BIT(port_num) & ds->enabled_port_mask) ||
@@ -280,7 +280,7 @@ static int bcm_sf2_cfp_rule_set(struct dsa_switch *ds, int port,
* We have a small oddity where Port 6 just does not have a
* valid bit here (so we subtract by one).
*/
- queue_num = fs->ring_cookie % 8;
+ queue_num = fs->ring_cookie % SF2_NUM_EGRESS_QUEUES;
if (port_num >= 7)
port_num -= 1;
@@ -401,7 +401,7 @@ static int bcm_sf2_cfp_rule_get(struct bcm_sf2_priv *priv, int port,
/* There is no Port 6, so we compensate for that here */
if (nfc->fs.ring_cookie >= 6)
nfc->fs.ring_cookie++;
- nfc->fs.ring_cookie *= 8;
+ nfc->fs.ring_cookie *= SF2_NUM_EGRESS_QUEUES;
/* Extract the destination queue */
queue_num = (reg >> NEW_TC_SHIFT) & NEW_TC_MASK;
diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h
index 49695fcc2ea8..d1596dfca323 100644
--- a/drivers/net/dsa/bcm_sf2_regs.h
+++ b/drivers/net/dsa/bcm_sf2_regs.h
@@ -115,6 +115,24 @@ enum bcm_sf2_reg_offs {
#define P7_IRQ_OFF 0
#define P_IRQ_OFF(x) ((6 - (x)) * P_NUM_IRQ)
+/* Register set relative to 'ACB' */
+#define ACB_CONTROL 0x00
+#define ACB_EN (1 << 0)
+#define ACB_ALGORITHM (1 << 1)
+#define ACB_FLUSH_SHIFT 2
+#define ACB_FLUSH_MASK 0x3
+
+#define ACB_QUEUE_0_CFG 0x08
+#define XOFF_THRESHOLD_MASK 0x7ff
+#define XON_EN (1 << 11)
+#define TOTAL_XOFF_THRESHOLD_SHIFT 12
+#define TOTAL_XOFF_THRESHOLD_MASK 0x7ff
+#define TOTAL_XOFF_EN (1 << 23)
+#define TOTAL_XON_EN (1 << 24)
+#define PKTLEN_SHIFT 25
+#define PKTLEN_MASK 0x3f
+#define ACB_QUEUE_CFG(x) (ACB_QUEUE_0_CFG + ((x) * 0x4))
+
/* Register set relative to 'CORE' */
#define CORE_G_PCTL_PORT0 0x00000
#define CORE_G_PCTL_PORT(x) (CORE_G_PCTL_PORT0 + (x * 0x4))
@@ -205,16 +223,8 @@ enum bcm_sf2_reg_offs {
#define CORE_IMP0_PRT_ID 0x0804
-#define CORE_BRCM_HDR_CTRL 0x0080c
-#define BRCM_HDR_EN_P8 (1 << 0)
-#define BRCM_HDR_EN_P5 (1 << 1)
-#define BRCM_HDR_EN_P7 (1 << 2)
-
#define CORE_RST_MIB_CNT_EN 0x0950
-#define CORE_BRCM_HDR_RX_DIS 0x0980
-#define CORE_BRCM_HDR_TX_DIS 0x0988
-
#define CORE_ARLA_VTBL_RWCTRL 0x1600
#define ARLA_VTBL_CMD_WRITE 0
#define ARLA_VTBL_CMD_READ 1
@@ -245,6 +255,11 @@ enum bcm_sf2_reg_offs {
#define CORE_PORT_VLAN_CTL_PORT(x) (0xc400 + ((x) * 0x8))
#define PORT_VLAN_CTRL_MASK 0x1ff
+#define CORE_TXQ_THD_PAUSE_QN_PORT_0 0x2c80
+#define TXQ_PAUSE_THD_MASK 0x7ff
+#define CORE_TXQ_THD_PAUSE_QN_PORT(x) (CORE_TXQ_THD_PAUSE_QN_PORT_0 + \
+ (x) * 0x8)
+
#define CORE_DEFAULT_1Q_TAG_P(x) (0xd040 + ((x) * 8))
#define CFI_SHIFT 12
#define PRI_SHIFT 13
@@ -252,9 +267,6 @@ enum bcm_sf2_reg_offs {
#define CORE_JOIN_ALL_VLAN_EN 0xd140
-#define CORE_EEE_EN_CTRL 0x24800
-#define CORE_EEE_LPI_INDICATE 0x24810
-
#define CORE_CFP_ACC 0x28000
#define OP_STR_DONE (1 << 0)
#define OP_SEL_SHIFT 1
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index d55051abf4ed..3a3f4f7ba364 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -110,13 +110,6 @@ static void dsa_loop_get_ethtool_stats(struct dsa_switch *ds, int port,
data[i] = ps->ports[port].mib[i].val;
}
-static int dsa_loop_set_addr(struct dsa_switch *ds, u8 *addr)
-{
- dev_dbg(ds->dev, "%s\n", __func__);
-
- return 0;
-}
-
static int dsa_loop_phy_read(struct dsa_switch *ds, int port, int regnum)
{
struct dsa_loop_priv *ps = ds->priv;
@@ -263,7 +256,6 @@ static const struct dsa_switch_ops dsa_loop_driver = {
.get_strings = dsa_loop_get_strings,
.get_ethtool_stats = dsa_loop_get_ethtool_stats,
.get_sset_count = dsa_loop_get_sset_count,
- .set_addr = dsa_loop_set_addr,
.phy_read = dsa_loop_phy_read,
.phy_write = dsa_loop_phy_write,
.port_bridge_join = dsa_loop_port_bridge_join,
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index b471413d3df9..87f919f0e641 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -17,6 +17,9 @@
#include <linux/regmap.h>
#include <linux/mutex.h>
#include <linux/mii.h>
+#include <linux/phy.h>
+#include <linux/if_bridge.h>
+#include <linux/etherdevice.h>
#include "lan9303.h"
@@ -57,6 +60,7 @@
#define LAN9303_SWITCH_CSR_CMD_LANES (BIT(19) | BIT(18) | BIT(17) | BIT(16))
#define LAN9303_VIRT_PHY_BASE 0x70
#define LAN9303_VIRT_SPECIAL_CTRL 0x77
+#define LAN9303_VIRT_SPECIAL_TURBO BIT(10) /*Turbo MII Enable*/
/*13.4 Switch Fabric Control and Status Registers
* Accessed indirectly via SWITCH_CSR_CMD, SWITCH_CSR_DATA.
@@ -121,6 +125,21 @@
#define LAN9303_MAC_RX_CFG_2 0x0c01
#define LAN9303_MAC_TX_CFG_2 0x0c40
#define LAN9303_SWE_ALR_CMD 0x1800
+# define LAN9303_ALR_CMD_MAKE_ENTRY BIT(2)
+# define LAN9303_ALR_CMD_GET_FIRST BIT(1)
+# define LAN9303_ALR_CMD_GET_NEXT BIT(0)
+#define LAN9303_SWE_ALR_WR_DAT_0 0x1801
+#define LAN9303_SWE_ALR_WR_DAT_1 0x1802
+# define LAN9303_ALR_DAT1_VALID BIT(26)
+# define LAN9303_ALR_DAT1_END_OF_TABL BIT(25)
+# define LAN9303_ALR_DAT1_AGE_OVERRID BIT(25)
+# define LAN9303_ALR_DAT1_STATIC BIT(24)
+# define LAN9303_ALR_DAT1_PORT_BITOFFS 16
+# define LAN9303_ALR_DAT1_PORT_MASK (7 << LAN9303_ALR_DAT1_PORT_BITOFFS)
+#define LAN9303_SWE_ALR_RD_DAT_0 0x1805
+#define LAN9303_SWE_ALR_RD_DAT_1 0x1806
+#define LAN9303_SWE_ALR_CMD_STS 0x1808
+# define ALR_STS_MAKE_PEND BIT(0)
#define LAN9303_SWE_VLAN_CMD 0x180b
# define LAN9303_SWE_VLAN_CMD_RNW BIT(5)
# define LAN9303_SWE_VLAN_CMD_PVIDNVLAN BIT(4)
@@ -144,6 +163,7 @@
# define LAN9303_SWE_PORT_STATE_FORWARDING_PORT0 (0)
# define LAN9303_SWE_PORT_STATE_LEARNING_PORT0 BIT(1)
# define LAN9303_SWE_PORT_STATE_BLOCKING_PORT0 BIT(0)
+# define LAN9303_SWE_PORT_STATE_DISABLED_PORT0 (3)
#define LAN9303_SWE_PORT_MIRROR 0x1846
# define LAN9303_SWE_PORT_MIRROR_SNIFF_ALL BIT(8)
# define LAN9303_SWE_PORT_MIRROR_SNIFFER_PORT2 BIT(7)
@@ -154,7 +174,9 @@
# define LAN9303_SWE_PORT_MIRROR_MIRRORED_PORT0 BIT(2)
# define LAN9303_SWE_PORT_MIRROR_ENABLE_RX_MIRRORING BIT(1)
# define LAN9303_SWE_PORT_MIRROR_ENABLE_TX_MIRRORING BIT(0)
+# define LAN9303_SWE_PORT_MIRROR_DISABLED 0
#define LAN9303_SWE_INGRESS_PORT_TYPE 0x1847
+#define LAN9303_SWE_INGRESS_PORT_TYPE_VLAN 3
#define LAN9303_BM_CFG 0x1c00
#define LAN9303_BM_EGRSS_PORT_TYPE 0x1c0c
# define LAN9303_BM_EGRSS_PORT_TYPE_SPECIAL_TAG_PORT2 (BIT(17) | BIT(16))
@@ -472,6 +494,220 @@ static int lan9303_detect_phy_setup(struct lan9303 *chip)
return 0;
}
+/* Map ALR-port bits to port bitmap, and back */
+static const int alrport_2_portmap[] = {1, 2, 4, 0, 3, 5, 6, 7 };
+static const int portmap_2_alrport[] = {3, 0, 1, 4, 2, 5, 6, 7 };
+
+/* Return pointer to first free ALR cache entry, return NULL if none */
+static struct lan9303_alr_cache_entry *
+lan9303_alr_cache_find_free(struct lan9303 *chip)
+{
+ int i;
+ struct lan9303_alr_cache_entry *entr = chip->alr_cache;
+
+ for (i = 0; i < LAN9303_NUM_ALR_RECORDS; i++, entr++)
+ if (entr->port_map == 0)
+ return entr;
+
+ return NULL;
+}
+
+/* Return pointer to ALR cache entry matching MAC address */
+static struct lan9303_alr_cache_entry *
+lan9303_alr_cache_find_mac(struct lan9303 *chip, const u8 *mac_addr)
+{
+ int i;
+ struct lan9303_alr_cache_entry *entr = chip->alr_cache;
+
+ BUILD_BUG_ON_MSG(sizeof(struct lan9303_alr_cache_entry) & 1,
+ "ether_addr_equal require u16 alignment");
+
+ for (i = 0; i < LAN9303_NUM_ALR_RECORDS; i++, entr++)
+ if (ether_addr_equal(entr->mac_addr, mac_addr))
+ return entr;
+
+ return NULL;
+}
+
+/* Wait a while until mask & reg == value. Otherwise return timeout. */
+static int lan9303_csr_reg_wait(struct lan9303 *chip, int regno,
+ int mask, char value)
+{
+ int i;
+
+ for (i = 0; i < 0x1000; i++) {
+ u32 reg;
+
+ lan9303_read_switch_reg(chip, regno, &reg);
+ if ((reg & mask) == value)
+ return 0;
+ usleep_range(1000, 2000);
+ }
+ return -ETIMEDOUT;
+}
+
+static int lan9303_alr_make_entry_raw(struct lan9303 *chip, u32 dat0, u32 dat1)
+{
+ lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_WR_DAT_0, dat0);
+ lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_WR_DAT_1, dat1);
+ lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD,
+ LAN9303_ALR_CMD_MAKE_ENTRY);
+ lan9303_csr_reg_wait(chip, LAN9303_SWE_ALR_CMD_STS, ALR_STS_MAKE_PEND,
+ 0);
+ lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, 0);
+
+ return 0;
+}
+
+typedef void alr_loop_cb_t(struct lan9303 *chip, u32 dat0, u32 dat1,
+ int portmap, void *ctx);
+
+static void lan9303_alr_loop(struct lan9303 *chip, alr_loop_cb_t *cb, void *ctx)
+{
+ int i;
+
+ lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD,
+ LAN9303_ALR_CMD_GET_FIRST);
+ lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, 0);
+
+ for (i = 1; i < LAN9303_NUM_ALR_RECORDS; i++) {
+ u32 dat0, dat1;
+ int alrport, portmap;
+
+ lan9303_read_switch_reg(chip, LAN9303_SWE_ALR_RD_DAT_0, &dat0);
+ lan9303_read_switch_reg(chip, LAN9303_SWE_ALR_RD_DAT_1, &dat1);
+ if (dat1 & LAN9303_ALR_DAT1_END_OF_TABL)
+ break;
+
+ alrport = (dat1 & LAN9303_ALR_DAT1_PORT_MASK) >>
+ LAN9303_ALR_DAT1_PORT_BITOFFS;
+ portmap = alrport_2_portmap[alrport];
+
+ cb(chip, dat0, dat1, portmap, ctx);
+
+ lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD,
+ LAN9303_ALR_CMD_GET_NEXT);
+ lan9303_write_switch_reg(chip, LAN9303_SWE_ALR_CMD, 0);
+ }
+}
+
+static void alr_reg_to_mac(u32 dat0, u32 dat1, u8 mac[6])
+{
+ mac[0] = (dat0 >> 0) & 0xff;
+ mac[1] = (dat0 >> 8) & 0xff;
+ mac[2] = (dat0 >> 16) & 0xff;
+ mac[3] = (dat0 >> 24) & 0xff;
+ mac[4] = (dat1 >> 0) & 0xff;
+ mac[5] = (dat1 >> 8) & 0xff;
+}
+
+struct del_port_learned_ctx {
+ int port;
+};
+
+/* Clear learned (non-static) entry on given port */
+static void alr_loop_cb_del_port_learned(struct lan9303 *chip, u32 dat0,
+ u32 dat1, int portmap, void *ctx)
+{
+ struct del_port_learned_ctx *del_ctx = ctx;
+ int port = del_ctx->port;
+
+ if (((BIT(port) & portmap) == 0) || (dat1 & LAN9303_ALR_DAT1_STATIC))
+ return;
+
+ /* learned entries has only one port, we can just delete */
+ dat1 &= ~LAN9303_ALR_DAT1_VALID; /* delete entry */
+ lan9303_alr_make_entry_raw(chip, dat0, dat1);
+}
+
+struct port_fdb_dump_ctx {
+ int port;
+ void *data;
+ dsa_fdb_dump_cb_t *cb;
+};
+
+static void alr_loop_cb_fdb_port_dump(struct lan9303 *chip, u32 dat0,
+ u32 dat1, int portmap, void *ctx)
+{
+ struct port_fdb_dump_ctx *dump_ctx = ctx;
+ u8 mac[ETH_ALEN];
+ bool is_static;
+
+ if ((BIT(dump_ctx->port) & portmap) == 0)
+ return;
+
+ alr_reg_to_mac(dat0, dat1, mac);
+ is_static = !!(dat1 & LAN9303_ALR_DAT1_STATIC);
+ dump_ctx->cb(mac, 0, is_static, dump_ctx->data);
+}
+
+/* Set a static ALR entry. Delete entry if port_map is zero */
+static void lan9303_alr_set_entry(struct lan9303 *chip, const u8 *mac,
+ u8 port_map, bool stp_override)
+{
+ u32 dat0, dat1, alr_port;
+
+ dev_dbg(chip->dev, "%s(%pM, %d)\n", __func__, mac, port_map);
+ dat1 = LAN9303_ALR_DAT1_STATIC;
+ if (port_map)
+ dat1 |= LAN9303_ALR_DAT1_VALID;
+ /* otherwise no ports: delete entry */
+ if (stp_override)
+ dat1 |= LAN9303_ALR_DAT1_AGE_OVERRID;
+
+ alr_port = portmap_2_alrport[port_map & 7];
+ dat1 &= ~LAN9303_ALR_DAT1_PORT_MASK;
+ dat1 |= alr_port << LAN9303_ALR_DAT1_PORT_BITOFFS;
+
+ dat0 = 0;
+ dat0 |= (mac[0] << 0);
+ dat0 |= (mac[1] << 8);
+ dat0 |= (mac[2] << 16);
+ dat0 |= (mac[3] << 24);
+
+ dat1 |= (mac[4] << 0);
+ dat1 |= (mac[5] << 8);
+
+ lan9303_alr_make_entry_raw(chip, dat0, dat1);
+}
+
+/* Add port to static ALR entry, create new static entry if needed */
+static int lan9303_alr_add_port(struct lan9303 *chip, const u8 *mac, int port,
+ bool stp_override)
+{
+ struct lan9303_alr_cache_entry *entr;
+
+ entr = lan9303_alr_cache_find_mac(chip, mac);
+ if (!entr) { /*New entry */
+ entr = lan9303_alr_cache_find_free(chip);
+ if (!entr)
+ return -ENOSPC;
+ ether_addr_copy(entr->mac_addr, mac);
+ }
+ entr->port_map |= BIT(port);
+ entr->stp_override = stp_override;
+ lan9303_alr_set_entry(chip, mac, entr->port_map, stp_override);
+
+ return 0;
+}
+
+/* Delete static port from ALR entry, delete entry if last port */
+static int lan9303_alr_del_port(struct lan9303 *chip, const u8 *mac, int port)
+{
+ struct lan9303_alr_cache_entry *entr;
+
+ entr = lan9303_alr_cache_find_mac(chip, mac);
+ if (!entr)
+ return 0; /* no static entry found */
+
+ entr->port_map &= ~BIT(port);
+ if (entr->port_map == 0) /* zero means its free again */
+ eth_zero_addr(&entr->port_map);
+ lan9303_alr_set_entry(chip, mac, entr->port_map, entr->stp_override);
+
+ return 0;
+}
+
static int lan9303_disable_processing_port(struct lan9303 *chip,
unsigned int port)
{
@@ -508,11 +744,30 @@ static int lan9303_enable_processing_port(struct lan9303 *chip,
LAN9303_MAC_TX_CFG_X_TX_ENABLE);
}
+/* forward special tagged packets from port 0 to port 1 *or* port 2 */
+static int lan9303_setup_tagging(struct lan9303 *chip)
+{
+ int ret;
+ u32 val;
+ /* enable defining the destination port via special VLAN tagging
+ * for port 0
+ */
+ ret = lan9303_write_switch_reg(chip, LAN9303_SWE_INGRESS_PORT_TYPE,
+ LAN9303_SWE_INGRESS_PORT_TYPE_VLAN);
+ if (ret)
+ return ret;
+
+ /* tag incoming packets at port 1 and 2 on their way to port 0 to be
+ * able to discover their source port
+ */
+ val = LAN9303_BM_EGRSS_PORT_TYPE_SPECIAL_TAG_PORT0;
+ return lan9303_write_switch_reg(chip, LAN9303_BM_EGRSS_PORT_TYPE, val);
+}
+
/* We want a special working switch:
* - do not forward packets between port 1 and 2
* - forward everything from port 1 to port 0
* - forward everything from port 2 to port 0
- * - forward special tagged packets from port 0 to port 1 *or* port 2
*/
static int lan9303_separate_ports(struct lan9303 *chip)
{
@@ -527,22 +782,6 @@ static int lan9303_separate_ports(struct lan9303 *chip)
if (ret)
return ret;
- /* enable defining the destination port via special VLAN tagging
- * for port 0
- */
- ret = lan9303_write_switch_reg(chip, LAN9303_SWE_INGRESS_PORT_TYPE,
- 0x03);
- if (ret)
- return ret;
-
- /* tag incoming packets at port 1 and 2 on their way to port 0 to be
- * able to discover their source port
- */
- ret = lan9303_write_switch_reg(chip, LAN9303_BM_EGRSS_PORT_TYPE,
- LAN9303_BM_EGRSS_PORT_TYPE_SPECIAL_TAG_PORT0);
- if (ret)
- return ret;
-
/* prevent port 1 and 2 from forwarding packets by their own */
return lan9303_write_switch_reg(chip, LAN9303_SWE_PORT_STATE,
LAN9303_SWE_PORT_STATE_FORWARDING_PORT0 |
@@ -550,6 +789,16 @@ static int lan9303_separate_ports(struct lan9303 *chip)
LAN9303_SWE_PORT_STATE_BLOCKING_PORT2);
}
+static void lan9303_bridge_ports(struct lan9303 *chip)
+{
+ /* ports bridged: remove mirroring */
+ lan9303_write_switch_reg(chip, LAN9303_SWE_PORT_MIRROR,
+ LAN9303_SWE_PORT_MIRROR_DISABLED);
+
+ lan9303_write_switch_reg(chip, LAN9303_SWE_PORT_STATE,
+ chip->swe_port_state);
+}
+
static int lan9303_handle_reset(struct lan9303 *chip)
{
if (!chip->reset_gpio)
@@ -642,6 +891,10 @@ static int lan9303_setup(struct dsa_switch *ds)
return -EINVAL;
}
+ ret = lan9303_setup_tagging(chip);
+ if (ret)
+ dev_err(chip->dev, "failed to setup port tagging %d\n", ret);
+
ret = lan9303_separate_ports(chip);
if (ret)
dev_err(chip->dev, "failed to separate ports %d\n", ret);
@@ -760,6 +1013,43 @@ static int lan9303_phy_write(struct dsa_switch *ds, int phy, int regnum,
return chip->ops->phy_write(chip, phy, regnum, val);
}
+static void lan9303_adjust_link(struct dsa_switch *ds, int port,
+ struct phy_device *phydev)
+{
+ struct lan9303 *chip = ds->priv;
+ int ctl, res;
+
+ if (!phy_is_pseudo_fixed_link(phydev))
+ return;
+
+ ctl = lan9303_phy_read(ds, port, MII_BMCR);
+
+ ctl &= ~BMCR_ANENABLE;
+
+ if (phydev->speed == SPEED_100)
+ ctl |= BMCR_SPEED100;
+ else if (phydev->speed == SPEED_10)
+ ctl &= ~BMCR_SPEED100;
+ else
+ dev_err(ds->dev, "unsupported speed: %d\n", phydev->speed);
+
+ if (phydev->duplex == DUPLEX_FULL)
+ ctl |= BMCR_FULLDPLX;
+ else
+ ctl &= ~BMCR_FULLDPLX;
+
+ res = lan9303_phy_write(ds, port, MII_BMCR, ctl);
+
+ if (port == chip->phy_addr_sel_strap) {
+ /* Virtual Phy: Remove Turbo 200Mbit mode */
+ lan9303_read(chip->regmap, LAN9303_VIRT_SPECIAL_CTRL, &ctl);
+
+ ctl &= ~LAN9303_VIRT_SPECIAL_TURBO;
+ res = regmap_write(chip->regmap,
+ LAN9303_VIRT_SPECIAL_CTRL, ctl);
+ }
+}
+
static int lan9303_port_enable(struct dsa_switch *ds, int port,
struct phy_device *phy)
{
@@ -797,16 +1087,189 @@ static void lan9303_port_disable(struct dsa_switch *ds, int port,
}
}
+static int lan9303_port_bridge_join(struct dsa_switch *ds, int port,
+ struct net_device *br)
+{
+ struct lan9303 *chip = ds->priv;
+
+ dev_dbg(chip->dev, "%s(port %d)\n", __func__, port);
+ if (dsa_to_port(ds, 1)->bridge_dev == dsa_to_port(ds, 2)->bridge_dev) {
+ lan9303_bridge_ports(chip);
+ chip->is_bridged = true; /* unleash stp_state_set() */
+ }
+
+ return 0;
+}
+
+static void lan9303_port_bridge_leave(struct dsa_switch *ds, int port,
+ struct net_device *br)
+{
+ struct lan9303 *chip = ds->priv;
+
+ dev_dbg(chip->dev, "%s(port %d)\n", __func__, port);
+ if (chip->is_bridged) {
+ lan9303_separate_ports(chip);
+ chip->is_bridged = false;
+ }
+}
+
+static void lan9303_port_stp_state_set(struct dsa_switch *ds, int port,
+ u8 state)
+{
+ int portmask, portstate;
+ struct lan9303 *chip = ds->priv;
+
+ dev_dbg(chip->dev, "%s(port %d, state %d)\n",
+ __func__, port, state);
+
+ switch (state) {
+ case BR_STATE_DISABLED:
+ portstate = LAN9303_SWE_PORT_STATE_DISABLED_PORT0;
+ break;
+ case BR_STATE_BLOCKING:
+ case BR_STATE_LISTENING:
+ portstate = LAN9303_SWE_PORT_STATE_BLOCKING_PORT0;
+ break;
+ case BR_STATE_LEARNING:
+ portstate = LAN9303_SWE_PORT_STATE_LEARNING_PORT0;
+ break;
+ case BR_STATE_FORWARDING:
+ portstate = LAN9303_SWE_PORT_STATE_FORWARDING_PORT0;
+ break;
+ default:
+ portstate = LAN9303_SWE_PORT_STATE_DISABLED_PORT0;
+ dev_err(chip->dev, "unknown stp state: port %d, state %d\n",
+ port, state);
+ }
+
+ portmask = 0x3 << (port * 2);
+ portstate <<= (port * 2);
+
+ chip->swe_port_state = (chip->swe_port_state & ~portmask) | portstate;
+
+ if (chip->is_bridged)
+ lan9303_write_switch_reg(chip, LAN9303_SWE_PORT_STATE,
+ chip->swe_port_state);
+ /* else: touching SWE_PORT_STATE would break port separation */
+}
+
+static void lan9303_port_fast_age(struct dsa_switch *ds, int port)
+{
+ struct lan9303 *chip = ds->priv;
+ struct del_port_learned_ctx del_ctx = {
+ .port = port,
+ };
+
+ dev_dbg(chip->dev, "%s(%d)\n", __func__, port);
+ lan9303_alr_loop(chip, alr_loop_cb_del_port_learned, &del_ctx);
+}
+
+static int lan9303_port_fdb_add(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid)
+{
+ struct lan9303 *chip = ds->priv;
+
+ dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, addr, vid);
+ if (vid)
+ return -EOPNOTSUPP;
+
+ return lan9303_alr_add_port(chip, addr, port, false);
+}
+
+static int lan9303_port_fdb_del(struct dsa_switch *ds, int port,
+ const unsigned char *addr, u16 vid)
+
+{
+ struct lan9303 *chip = ds->priv;
+
+ dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, addr, vid);
+ if (vid)
+ return -EOPNOTSUPP;
+ lan9303_alr_del_port(chip, addr, port);
+
+ return 0;
+}
+
+static int lan9303_port_fdb_dump(struct dsa_switch *ds, int port,
+ dsa_fdb_dump_cb_t *cb, void *data)
+{
+ struct lan9303 *chip = ds->priv;
+ struct port_fdb_dump_ctx dump_ctx = {
+ .port = port,
+ .data = data,
+ .cb = cb,
+ };
+
+ dev_dbg(chip->dev, "%s(%d)\n", __func__, port);
+ lan9303_alr_loop(chip, alr_loop_cb_fdb_port_dump, &dump_ctx);
+
+ return 0;
+}
+
+static int lan9303_port_mdb_prepare(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct switchdev_trans *trans)
+{
+ struct lan9303 *chip = ds->priv;
+
+ dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, mdb->addr,
+ mdb->vid);
+ if (mdb->vid)
+ return -EOPNOTSUPP;
+ if (lan9303_alr_cache_find_mac(chip, mdb->addr))
+ return 0;
+ if (!lan9303_alr_cache_find_free(chip))
+ return -ENOSPC;
+
+ return 0;
+}
+
+static void lan9303_port_mdb_add(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb,
+ struct switchdev_trans *trans)
+{
+ struct lan9303 *chip = ds->priv;
+
+ dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, mdb->addr,
+ mdb->vid);
+ lan9303_alr_add_port(chip, mdb->addr, port, false);
+}
+
+static int lan9303_port_mdb_del(struct dsa_switch *ds, int port,
+ const struct switchdev_obj_port_mdb *mdb)
+{
+ struct lan9303 *chip = ds->priv;
+
+ dev_dbg(chip->dev, "%s(%d, %pM, %d)\n", __func__, port, mdb->addr,
+ mdb->vid);
+ if (mdb->vid)
+ return -EOPNOTSUPP;
+ lan9303_alr_del_port(chip, mdb->addr, port);
+
+ return 0;
+}
+
static const struct dsa_switch_ops lan9303_switch_ops = {
.get_tag_protocol = lan9303_get_tag_protocol,
.setup = lan9303_setup,
.get_strings = lan9303_get_strings,
.phy_read = lan9303_phy_read,
.phy_write = lan9303_phy_write,
+ .adjust_link = lan9303_adjust_link,
.get_ethtool_stats = lan9303_get_ethtool_stats,
.get_sset_count = lan9303_get_sset_count,
.port_enable = lan9303_port_enable,
.port_disable = lan9303_port_disable,
+ .port_bridge_join = lan9303_port_bridge_join,
+ .port_bridge_leave = lan9303_port_bridge_leave,
+ .port_stp_state_set = lan9303_port_stp_state_set,
+ .port_fast_age = lan9303_port_fast_age,
+ .port_fdb_add = lan9303_port_fdb_add,
+ .port_fdb_del = lan9303_port_fdb_del,
+ .port_fdb_dump = lan9303_port_fdb_dump,
+ .port_mdb_prepare = lan9303_port_mdb_prepare,
+ .port_mdb_add = lan9303_port_mdb_add,
+ .port_mdb_del = lan9303_port_mdb_del,
};
static int lan9303_register_switch(struct lan9303 *chip)
diff --git a/drivers/net/dsa/lan9303.h b/drivers/net/dsa/lan9303.h
index 4d8be555ff4d..d807b1be35f2 100644
--- a/drivers/net/dsa/lan9303.h
+++ b/drivers/net/dsa/lan9303.h
@@ -11,6 +11,13 @@ struct lan9303_phy_ops {
int regnum, u16 val);
};
+#define LAN9303_NUM_ALR_RECORDS 512
+struct lan9303_alr_cache_entry {
+ u8 mac_addr[ETH_ALEN];
+ u8 port_map; /* Bitmap of ports. Zero if unused entry */
+ u8 stp_override; /* non zero if set ALR_DAT1_AGE_OVERRID */
+};
+
struct lan9303 {
struct device *dev;
struct regmap *regmap;
@@ -21,6 +28,12 @@ struct lan9303 {
struct dsa_switch *ds;
struct mutex indirect_mutex; /* protect indexed register access */
const struct lan9303_phy_ops *ops;
+ bool is_bridged; /* true if port 1 and 2 are bridged */
+ u32 swe_port_state; /* remember SWE_PORT_STATE while not bridged */
+ /* LAN9303 do not offer reading specific ALR entry. Cache all
+ * static entries in a flat table
+ **/
+ struct lan9303_alr_cache_entry alr_cache[LAN9303_NUM_ALR_RECORDS];
};
extern const struct regmap_access_table lan9303_register_set;
diff --git a/drivers/net/dsa/lan9303_mdio.c b/drivers/net/dsa/lan9303_mdio.c
index fc16668a487f..0bc56b9900f9 100644
--- a/drivers/net/dsa/lan9303_mdio.c
+++ b/drivers/net/dsa/lan9303_mdio.c
@@ -67,14 +67,15 @@ static int lan9303_mdio_read(void *ctx, uint32_t reg, uint32_t *val)
return 0;
}
-int lan9303_mdio_phy_write(struct lan9303 *chip, int phy, int reg, u16 val)
+static int lan9303_mdio_phy_write(struct lan9303 *chip, int phy, int reg,
+ u16 val)
{
struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev);
return mdiobus_write_nested(sw_dev->device->bus, phy, reg, val);
}
-int lan9303_mdio_phy_read(struct lan9303 *chip, int phy, int reg)
+static int lan9303_mdio_phy_read(struct lan9303 *chip, int phy, int reg)
{
struct lan9303_mdio *sw_dev = dev_get_drvdata(chip->dev);
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index c142b97add2c..21431be2831e 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -564,7 +564,8 @@ static int mt7530_phy_read(struct dsa_switch *ds, int port, int regnum)
return mdiobus_read_nested(priv->bus, port, regnum);
}
-int mt7530_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val)
+static int mt7530_phy_write(struct dsa_switch *ds, int port, int regnum,
+ u16 val)
{
struct mt7530_priv *priv = ds->priv;
@@ -781,7 +782,7 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port,
* and not being setup until the port becomes enabled.
*/
if (ds->enabled_port_mask & BIT(i) && i != port) {
- if (ds->ports[i].bridge_dev != bridge)
+ if (dsa_to_port(ds, i)->bridge_dev != bridge)
continue;
if (priv->ports[i].enable)
mt7530_set(priv, MT7530_PCR_P(i),
@@ -818,7 +819,7 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port,
* is kept and not being setup until the port becomes enabled.
*/
if (ds->enabled_port_mask & BIT(i) && i != port) {
- if (ds->ports[i].bridge_dev != bridge)
+ if (dsa_to_port(ds, i)->bridge_dev != bridge)
continue;
if (priv->ports[i].enable)
mt7530_clear(priv, MT7530_PCR_P(i),
@@ -928,11 +929,11 @@ mt7530_setup(struct dsa_switch *ds)
struct device_node *dn;
struct mt7530_dummy_poll p;
- /* The parent node of cpu_dp->netdev which holds the common system
+ /* The parent node of master netdev which holds the common system
* controller also is the container for two GMACs nodes representing
* as two netdev instances.
*/
- dn = ds->dst->cpu_dp->netdev->dev.of_node->parent;
+ dn = ds->ports[MT7530_CPU_PORT].master->dev.of_node->parent;
priv->ethernet = syscon_node_to_regmap(dn);
if (IS_ERR(priv->ethernet))
return PTR_ERR(priv->ethernet);
diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index f123ed57630d..f78b9e13be1c 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -9,6 +9,7 @@
*/
#include <linux/delay.h>
+#include <linux/etherdevice.h>
#include <linux/jiffies.h>
#include <linux/list.h>
#include <linux/module.h>
@@ -176,7 +177,7 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p)
((p & 0xf) << PORT_VLAN_MAP_DBNUM_SHIFT) |
(dsa_is_cpu_port(ds, p) ?
ds->enabled_port_mask :
- BIT(ds->dst->cpu_dp->index)));
+ BIT(dsa_to_port(ds, p)->cpu_dp->index)));
/* Port Association Vector: when learning source addresses
* of packets, add the address to the address database using
@@ -188,6 +189,27 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p)
return 0;
}
+static int mv88e6060_setup_addr(struct dsa_switch *ds)
+{
+ u8 addr[ETH_ALEN];
+ u16 val;
+
+ eth_random_addr(addr);
+
+ val = addr[0] << 8 | addr[1];
+
+ /* The multicast bit is always transmitted as a zero, so the switch uses
+ * bit 8 for "DiffAddr", where 0 means all ports transmit the same SA.
+ */
+ val &= 0xfeff;
+
+ REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, val);
+ REG_WRITE(REG_GLOBAL, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]);
+ REG_WRITE(REG_GLOBAL, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]);
+
+ return 0;
+}
+
static int mv88e6060_setup(struct dsa_switch *ds)
{
int ret;
@@ -203,6 +225,10 @@ static int mv88e6060_setup(struct dsa_switch *ds)
if (ret < 0)
return ret;
+ ret = mv88e6060_setup_addr(ds);
+ if (ret < 0)
+ return ret;
+
for (i = 0; i < MV88E6060_PORTS; i++) {
ret = mv88e6060_setup_port(ds, i);
if (ret < 0)
@@ -212,22 +238,6 @@ static int mv88e6060_setup(struct dsa_switch *ds)
return 0;
}
-static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr)
-{
- u16 val = addr[0] << 8 | addr[1];
-
- /* The multicast bit is always transmitted as a zero, so the switch uses
- * bit 8 for "DiffAddr", where 0 means all ports transmit the same SA.
- */
- val &= 0xfeff;
-
- REG_WRITE(REG_GLOBAL, GLOBAL_MAC_01, val);
- REG_WRITE(REG_GLOBAL, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]);
- REG_WRITE(REG_GLOBAL, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]);
-
- return 0;
-}
-
static int mv88e6060_port_to_phy_addr(int port)
{
if (port >= 0 && port < MV88E6060_PORTS)
@@ -262,7 +272,6 @@ static const struct dsa_switch_ops mv88e6060_switch_ops = {
.get_tag_protocol = mv88e6060_get_tag_protocol,
.probe = mv88e6060_drv_probe,
.setup = mv88e6060_setup,
- .set_addr = mv88e6060_set_addr,
.phy_read = mv88e6060_phy_read,
.phy_write = mv88e6060_phy_write,
};
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index d74c7335c512..677d6902807e 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -851,7 +851,7 @@ static u16 mv88e6xxx_port_vlan(struct mv88e6xxx_chip *chip, int dev, int port)
for (i = 0; i < mv88e6xxx_num_ports(chip); ++i)
if (dsa_is_cpu_port(chip->ds, i) ||
dsa_is_dsa_port(chip->ds, i) ||
- (br && chip->ds->ports[i].bridge_dev == br))
+ (br && dsa_to_port(chip->ds, i)->bridge_dev == br))
pvlan |= BIT(i);
return pvlan;
@@ -932,6 +932,19 @@ static int mv88e6xxx_irl_setup(struct mv88e6xxx_chip *chip)
return 0;
}
+static int mv88e6xxx_mac_setup(struct mv88e6xxx_chip *chip)
+{
+ if (chip->info->ops->set_switch_mac) {
+ u8 addr[ETH_ALEN];
+
+ eth_random_addr(addr);
+
+ return chip->info->ops->set_switch_mac(chip, addr);
+ }
+
+ return 0;
+}
+
static int mv88e6xxx_pvt_map(struct mv88e6xxx_chip *chip, int dev, int port)
{
u16 pvlan = 0;
@@ -1124,23 +1137,23 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port,
if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i))
continue;
- if (!ds->ports[port].netdev)
+ if (!ds->ports[port].slave)
continue;
if (vlan.member[i] ==
MV88E6XXX_G1_VTU_DATA_MEMBER_TAG_NON_MEMBER)
continue;
- if (ds->ports[i].bridge_dev ==
+ if (dsa_to_port(ds, i)->bridge_dev ==
ds->ports[port].bridge_dev)
break; /* same bridge, check next VLAN */
- if (!ds->ports[i].bridge_dev)
+ if (!dsa_to_port(ds, i)->bridge_dev)
continue;
dev_err(ds->dev, "p%d: hw VLAN %d already used by %s\n",
port, vlan.vid,
- netdev_name(ds->ports[i].bridge_dev));
+ netdev_name(dsa_to_port(ds, i)->bridge_dev));
err = -EOPNOTSUPP;
goto unlock;
}
@@ -2013,6 +2026,10 @@ static int mv88e6xxx_setup(struct dsa_switch *ds)
if (err)
goto unlock;
+ err = mv88e6xxx_mac_setup(chip);
+ if (err)
+ goto unlock;
+
err = mv88e6xxx_phy_setup(chip);
if (err)
goto unlock;
@@ -2043,21 +2060,6 @@ unlock:
return err;
}
-static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr)
-{
- struct mv88e6xxx_chip *chip = ds->priv;
- int err;
-
- if (!chip->info->ops->set_switch_mac)
- return -EOPNOTSUPP;
-
- mutex_lock(&chip->reg_lock);
- err = chip->info->ops->set_switch_mac(chip, addr);
- mutex_unlock(&chip->reg_lock);
-
- return err;
-}
-
static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg)
{
struct mv88e6xxx_mdio_bus *mdio_bus = bus->priv;
@@ -3785,7 +3787,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = {
.probe = mv88e6xxx_drv_probe,
.get_tag_protocol = mv88e6xxx_get_tag_protocol,
.setup = mv88e6xxx_setup,
- .set_addr = mv88e6xxx_set_addr,
.adjust_link = mv88e6xxx_adjust_link,
.get_strings = mv88e6xxx_get_strings,
.get_ethtool_stats = mv88e6xxx_get_ethtool_stats,
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index 5ada7a41449c..d1b0b1fb632f 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -506,7 +506,7 @@ qca8k_setup(struct dsa_switch *ds)
pr_warn("regmap initialization failed");
/* Initialize CPU port pad mode (xMII type, delays...) */
- phy_mode = of_get_phy_mode(ds->dst->cpu_dp->dn);
+ phy_mode = of_get_phy_mode(ds->ports[QCA8K_CPU_PORT].dn);
if (phy_mode < 0) {
pr_err("Can't find phy-mode for master device\n");
return phy_mode;
@@ -700,7 +700,7 @@ qca8k_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *br)
int i;
for (i = 1; i < QCA8K_NUM_PORTS; i++) {
- if (ds->ports[i].bridge_dev != br)
+ if (dsa_to_port(ds, i)->bridge_dev != br)
continue;
/* Add this port to the portvlan mask of the other ports
* in the bridge
@@ -725,7 +725,7 @@ qca8k_port_bridge_leave(struct dsa_switch *ds, int port, struct net_device *br)
int i;
for (i = 1; i < QCA8K_NUM_PORTS; i++) {
- if (ds->ports[i].bridge_dev != br)
+ if (dsa_to_port(ds, i)->bridge_dev != br)
continue;
/* Remove this port to the portvlan mask of the other ports
* in the bridge
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index d0a1f9ce3168..58483af80bdb 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -353,7 +353,7 @@ static void dummy_setup(struct net_device *dev)
eth_hw_addr_random(dev);
dev->min_mtu = 0;
- dev->max_mtu = ETH_MAX_MTU;
+ dev->max_mtu = 0;
}
static int dummy_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -388,7 +388,7 @@ static int __init dummy_init_one(void)
int err;
dev_dummy = alloc_netdev(sizeof(struct dummy_priv),
- "dummy%d", NET_NAME_UNKNOWN, dummy_setup);
+ "dummy%d", NET_NAME_ENUM, dummy_setup);
if (!dev_dummy)
return -ENOMEM;
diff --git a/drivers/net/eql.c b/drivers/net/eql.c
index fe13bfea30ac..fccce4b47778 100644
--- a/drivers/net/eql.c
+++ b/drivers/net/eql.c
@@ -178,10 +178,8 @@ static void __init eql_setup(struct net_device *dev)
{
equalizer_t *eql = netdev_priv(dev);
- init_timer(&eql->timer);
- eql->timer.data = (unsigned long) eql;
+ setup_timer(&eql->timer, eql_timer, (unsigned long)eql);
eql->timer.expires = jiffies + EQL_DEFAULT_RESCHED_IVAL;
- eql->timer.function = eql_timer;
spin_lock_init(&eql->queue.lock);
INIT_LIST_HEAD(&eql->queue.all_slaves);
diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c
index 47c844cc9d27..48bc7fa0258c 100644
--- a/drivers/net/ethernet/3com/3c574_cs.c
+++ b/drivers/net/ethernet/3com/3c574_cs.c
@@ -225,7 +225,7 @@ static unsigned short read_eeprom(unsigned int ioaddr, int index);
static void tc574_wait_for_completion(struct net_device *dev, int cmd);
static void tc574_reset(struct net_device *dev);
-static void media_check(unsigned long arg);
+static void media_check(struct timer_list *t);
static int el3_open(struct net_device *dev);
static netdev_tx_t el3_start_xmit(struct sk_buff *skb,
struct net_device *dev);
@@ -377,7 +377,7 @@ static int tc574_config(struct pcmcia_device *link)
lp->autoselect = config & Autoselect ? 1 : 0;
}
- init_timer(&lp->media);
+ timer_setup(&lp->media, media_check, 0);
{
int phy;
@@ -681,8 +681,6 @@ static int el3_open(struct net_device *dev)
netif_start_queue(dev);
tc574_reset(dev);
- lp->media.function = media_check;
- lp->media.data = (unsigned long) dev;
lp->media.expires = jiffies + HZ;
add_timer(&lp->media);
@@ -859,10 +857,10 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id)
(and as a last resort, poll the NIC for events), and to monitor
the MII, reporting changes in cable status.
*/
-static void media_check(unsigned long arg)
+static void media_check(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *) arg;
- struct el3_private *lp = netdev_priv(dev);
+ struct el3_private *lp = from_timer(lp, t, media);
+ struct net_device *dev = lp->p_dev->priv;
unsigned int ioaddr = dev->base_addr;
unsigned long flags;
unsigned short /* cable, */ media, partner;
diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c
index e28254a00599..2b2695311bda 100644
--- a/drivers/net/ethernet/3com/3c589_cs.c
+++ b/drivers/net/ethernet/3com/3c589_cs.c
@@ -163,7 +163,7 @@ static void tc589_release(struct pcmcia_device *link);
static u16 read_eeprom(unsigned int ioaddr, int index);
static void tc589_reset(struct net_device *dev);
-static void media_check(unsigned long arg);
+static void media_check(struct timer_list *t);
static int el3_config(struct net_device *dev, struct ifmap *map);
static int el3_open(struct net_device *dev);
static netdev_tx_t el3_start_xmit(struct sk_buff *skb,
@@ -517,7 +517,7 @@ static int el3_open(struct net_device *dev)
netif_start_queue(dev);
tc589_reset(dev);
- setup_timer(&lp->media, media_check, (unsigned long)dev);
+ timer_setup(&lp->media, media_check, 0);
mod_timer(&lp->media, jiffies + HZ);
dev_dbg(&link->dev, "%s: opened, status %4.4x.\n",
@@ -676,10 +676,10 @@ static irqreturn_t el3_interrupt(int irq, void *dev_id)
return IRQ_RETVAL(handled);
}
-static void media_check(unsigned long arg)
+static void media_check(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)(arg);
- struct el3_private *lp = netdev_priv(dev);
+ struct el3_private *lp = from_timer(lp, t, media);
+ struct net_device *dev = lp->p_dev->priv;
unsigned int ioaddr = dev->base_addr;
u16 media, errs;
unsigned long flags;
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 402d9090ad29..f4e13a7014bd 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -759,8 +759,8 @@ static int vortex_open(struct net_device *dev);
static void mdio_sync(struct vortex_private *vp, int bits);
static int mdio_read(struct net_device *dev, int phy_id, int location);
static void mdio_write(struct net_device *vp, int phy_id, int location, int value);
-static void vortex_timer(unsigned long arg);
-static void rx_oom_timer(unsigned long arg);
+static void vortex_timer(struct timer_list *t);
+static void rx_oom_timer(struct timer_list *t);
static netdev_tx_t vortex_start_xmit(struct sk_buff *skb,
struct net_device *dev);
static netdev_tx_t boomerang_start_xmit(struct sk_buff *skb,
@@ -1599,9 +1599,9 @@ vortex_up(struct net_device *dev)
dev->name, media_tbl[dev->if_port].name);
}
- setup_timer(&vp->timer, vortex_timer, (unsigned long)dev);
+ timer_setup(&vp->timer, vortex_timer, 0);
mod_timer(&vp->timer, RUN_AT(media_tbl[dev->if_port].wait));
- setup_timer(&vp->rx_oom_timer, rx_oom_timer, (unsigned long)dev);
+ timer_setup(&vp->rx_oom_timer, rx_oom_timer, 0);
if (vortex_debug > 1)
pr_debug("%s: Initial media type %s.\n",
@@ -1784,10 +1784,10 @@ out:
}
static void
-vortex_timer(unsigned long data)
+vortex_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct vortex_private *vp = netdev_priv(dev);
+ struct vortex_private *vp = from_timer(vp, t, timer);
+ struct net_device *dev = vp->mii.dev;
void __iomem *ioaddr = vp->ioaddr;
int next_tick = 60*HZ;
int ok = 0;
@@ -2687,10 +2687,10 @@ boomerang_rx(struct net_device *dev)
* for some memory. Otherwise there is no way to restart the rx process.
*/
static void
-rx_oom_timer(unsigned long arg)
+rx_oom_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)arg;
- struct vortex_private *vp = netdev_priv(dev);
+ struct vortex_private *vp = from_timer(vp, t, rx_oom_timer);
+ struct net_device *dev = vp->mii.dev;
spin_lock_irq(&vp->lock);
if ((vp->cur_rx - vp->dirty_rx) == RX_RING_SIZE) /* This test is redundant, but makes me feel good */
diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c
index a251de8d9a91..0658cde1586a 100644
--- a/drivers/net/ethernet/adi/bfin_mac.c
+++ b/drivers/net/ethernet/adi/bfin_mac.c
@@ -1650,9 +1650,8 @@ static int bfin_mac_probe(struct platform_device *pdev)
ndev->netdev_ops = &bfin_mac_netdev_ops;
ndev->ethtool_ops = &bfin_mac_ethtool_ops;
- init_timer(&lp->tx_reclaim_timer);
- lp->tx_reclaim_timer.data = (unsigned long)lp;
- lp->tx_reclaim_timer.function = tx_reclaim_skb_timeout;
+ setup_timer(&lp->tx_reclaim_timer, tx_reclaim_skb_timeout,
+ (unsigned long)lp);
lp->flags = 0;
netif_napi_add(ndev, &lp->napi, bfin_mac_poll, CONFIG_BFIN_RX_DESC_NUM);
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 54eff90e2f02..658e92f79d36 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -3624,11 +3624,10 @@ static int et131x_open(struct net_device *netdev)
int result;
/* Start the timer to track NIC errors */
- init_timer(&adapter->error_timer);
+ setup_timer(&adapter->error_timer, et131x_error_timer_handler,
+ (unsigned long)adapter);
adapter->error_timer.expires = jiffies +
msecs_to_jiffies(TX_ERROR_PERIOD);
- adapter->error_timer.function = et131x_error_timer_handler;
- adapter->error_timer.data = (unsigned long)adapter;
add_timer(&adapter->error_timer);
result = request_irq(irq, et131x_isr,
diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index 305dc1996b4e..4532e574ebcd 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -627,6 +627,12 @@ enum ena_admin_flow_hash_proto {
ENA_ADMIN_RSS_NOT_IP = 7,
+ /* TCPv6 with extension header */
+ ENA_ADMIN_RSS_TCP6_EX = 8,
+
+ /* IPv6 with extension header */
+ ENA_ADMIN_RSS_IP6_EX = 9,
+
ENA_ADMIN_RSS_PROTO_NUM = 16,
};
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 52beba8c7a39..bf2de5298005 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -63,6 +63,8 @@
#define ENA_REGS_ADMIN_INTR_MASK 1
+#define ENA_POLL_MS 5
+
/*****************************************************************************/
/*****************************************************************************/
/*****************************************************************************/
@@ -315,7 +317,7 @@ static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue
cmd_size_in_bytes,
comp,
comp_size_in_bytes);
- if (unlikely(IS_ERR(comp_ctx)))
+ if (IS_ERR(comp_ctx))
admin_queue->running_state = false;
spin_unlock_irqrestore(&admin_queue->q_lock, flags);
@@ -533,7 +535,7 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c
goto err;
}
- msleep(100);
+ msleep(ENA_POLL_MS);
}
if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) {
@@ -746,6 +748,9 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
{
u32 val, i;
+ /* Convert timeout from resolution of 100ms to ENA_POLL_MS */
+ timeout = (timeout * 100) / ENA_POLL_MS;
+
for (i = 0; i < timeout; i++) {
val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
@@ -758,8 +763,7 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
exp_state)
return 0;
- /* The resolution of the timeout is 100ms */
- msleep(100);
+ msleep(ENA_POLL_MS);
}
return -ETIME;
@@ -1130,7 +1134,7 @@ int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue,
comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size,
comp, comp_size);
- if (unlikely(IS_ERR(comp_ctx))) {
+ if (IS_ERR(comp_ctx)) {
if (comp_ctx == ERR_PTR(-ENODEV))
pr_debug("Failed to submit command [%ld]\n",
PTR_ERR(comp_ctx));
@@ -1253,7 +1257,7 @@ void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev)
spin_lock_irqsave(&admin_queue->q_lock, flags);
while (atomic_read(&admin_queue->outstanding_cmds) != 0) {
spin_unlock_irqrestore(&admin_queue->q_lock, flags);
- msleep(20);
+ msleep(ENA_POLL_MS);
spin_lock_irqsave(&admin_queue->q_lock, flags);
}
spin_unlock_irqrestore(&admin_queue->q_lock, flags);
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 967020fb26ee..060cb18fa659 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -60,8 +60,8 @@ struct ena_stats {
static const struct ena_stats ena_stats_global_strings[] = {
ENA_STAT_GLOBAL_ENTRY(tx_timeout),
- ENA_STAT_GLOBAL_ENTRY(io_suspend),
- ENA_STAT_GLOBAL_ENTRY(io_resume),
+ ENA_STAT_GLOBAL_ENTRY(suspend),
+ ENA_STAT_GLOBAL_ENTRY(resume),
ENA_STAT_GLOBAL_ENTRY(wd_expired),
ENA_STAT_GLOBAL_ENTRY(interface_up),
ENA_STAT_GLOBAL_ENTRY(interface_down),
@@ -81,6 +81,7 @@ static const struct ena_stats ena_stats_tx_strings[] = {
ENA_STAT_TX_ENTRY(doorbells),
ENA_STAT_TX_ENTRY(prepare_ctx_err),
ENA_STAT_TX_ENTRY(bad_req_id),
+ ENA_STAT_TX_ENTRY(missed_tx),
};
static const struct ena_stats ena_stats_rx_strings[] = {
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index c6bd5e24005d..5417e4da64ca 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2361,38 +2361,6 @@ static const struct net_device_ops ena_netdev_ops = {
#endif /* CONFIG_NET_POLL_CONTROLLER */
};
-static void ena_device_io_suspend(struct work_struct *work)
-{
- struct ena_adapter *adapter =
- container_of(work, struct ena_adapter, suspend_io_task);
- struct net_device *netdev = adapter->netdev;
-
- /* ena_napi_disable_all disables only the IO handling.
- * We are still subject to AENQ keep alive watchdog.
- */
- u64_stats_update_begin(&adapter->syncp);
- adapter->dev_stats.io_suspend++;
- u64_stats_update_begin(&adapter->syncp);
- ena_napi_disable_all(adapter);
- netif_tx_lock(netdev);
- netif_device_detach(netdev);
- netif_tx_unlock(netdev);
-}
-
-static void ena_device_io_resume(struct work_struct *work)
-{
- struct ena_adapter *adapter =
- container_of(work, struct ena_adapter, resume_io_task);
- struct net_device *netdev = adapter->netdev;
-
- u64_stats_update_begin(&adapter->syncp);
- adapter->dev_stats.io_resume++;
- u64_stats_update_end(&adapter->syncp);
-
- netif_device_attach(netdev);
- ena_napi_enable_all(adapter);
-}
-
static int ena_device_validate_params(struct ena_adapter *adapter,
struct ena_com_dev_get_features_ctx *get_feat_ctx)
{
@@ -2561,38 +2529,31 @@ err_disable_msix:
return rc;
}
-static void ena_fw_reset_device(struct work_struct *work)
+static void ena_destroy_device(struct ena_adapter *adapter)
{
- struct ena_com_dev_get_features_ctx get_feat_ctx;
- struct ena_adapter *adapter =
- container_of(work, struct ena_adapter, reset_task);
struct net_device *netdev = adapter->netdev;
struct ena_com_dev *ena_dev = adapter->ena_dev;
- struct pci_dev *pdev = adapter->pdev;
- bool dev_up, wd_state;
- int rc;
-
- if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
- dev_err(&pdev->dev,
- "device reset schedule while reset bit is off\n");
- return;
- }
+ bool dev_up;
netif_carrier_off(netdev);
del_timer_sync(&adapter->timer_service);
- rtnl_lock();
-
dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+ adapter->dev_up_before_reset = dev_up;
+
ena_com_set_admin_running_state(ena_dev, false);
- /* After calling ena_close the tx queues and the napi
- * are disabled so no one can interfere or touch the
- * data structures
- */
ena_close(netdev);
+ /* Before releasing the ENA resources, a device reset is required.
+ * (to prevent the device from accessing them).
+ * In case the reset flag is set and the device is up, ena_close
+ * already perform the reset, so it can be skipped.
+ */
+ if (!(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags) && dev_up))
+ ena_com_dev_reset(adapter->ena_dev, adapter->reset_reason);
+
ena_free_mgmnt_irq(adapter);
ena_disable_msix(adapter);
@@ -2606,9 +2567,17 @@ static void ena_fw_reset_device(struct work_struct *work)
ena_com_mmio_reg_read_request_destroy(ena_dev);
adapter->reset_reason = ENA_REGS_RESET_NORMAL;
+
clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+}
- /* Finish with the destroy part. Start the init part */
+static int ena_restore_device(struct ena_adapter *adapter)
+{
+ struct ena_com_dev_get_features_ctx get_feat_ctx;
+ struct ena_com_dev *ena_dev = adapter->ena_dev;
+ struct pci_dev *pdev = adapter->pdev;
+ bool wd_state;
+ int rc;
rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state);
if (rc) {
@@ -2630,7 +2599,7 @@ static void ena_fw_reset_device(struct work_struct *work)
goto err_device_destroy;
}
/* If the interface was up before the reset bring it up */
- if (dev_up) {
+ if (adapter->dev_up_before_reset) {
rc = ena_up(adapter);
if (rc) {
dev_err(&pdev->dev, "Failed to create I/O queues\n");
@@ -2639,24 +2608,38 @@ static void ena_fw_reset_device(struct work_struct *work)
}
mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
-
- rtnl_unlock();
-
dev_err(&pdev->dev, "Device reset completed successfully\n");
- return;
+ return rc;
err_disable_msix:
ena_free_mgmnt_irq(adapter);
ena_disable_msix(adapter);
err_device_destroy:
ena_com_admin_destroy(ena_dev);
err:
- rtnl_unlock();
-
clear_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags);
dev_err(&pdev->dev,
"Reset attempt failed. Can not reset the device\n");
+
+ return rc;
+}
+
+static void ena_fw_reset_device(struct work_struct *work)
+{
+ struct ena_adapter *adapter =
+ container_of(work, struct ena_adapter, reset_task);
+ struct pci_dev *pdev = adapter->pdev;
+
+ if (unlikely(!test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+ dev_err(&pdev->dev,
+ "device reset schedule while reset bit is off\n");
+ return;
+ }
+ rtnl_lock();
+ ena_destroy_device(adapter);
+ ena_restore_device(adapter);
+ rtnl_unlock();
}
static int check_missing_comp_in_queue(struct ena_adapter *adapter,
@@ -2665,7 +2648,7 @@ static int check_missing_comp_in_queue(struct ena_adapter *adapter,
struct ena_tx_buffer *tx_buf;
unsigned long last_jiffies;
u32 missed_tx = 0;
- int i;
+ int i, rc = 0;
for (i = 0; i < tx_ring->ring_size; i++) {
tx_buf = &tx_ring->tx_buffer_info[i];
@@ -2679,21 +2662,25 @@ static int check_missing_comp_in_queue(struct ena_adapter *adapter,
tx_buf->print_once = 1;
missed_tx++;
-
- if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
- netif_err(adapter, tx_err, adapter->netdev,
- "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
- missed_tx,
- adapter->missing_tx_completion_threshold);
- adapter->reset_reason =
- ENA_REGS_RESET_MISS_TX_CMPL;
- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
- return -EIO;
- }
}
}
- return 0;
+ if (unlikely(missed_tx > adapter->missing_tx_completion_threshold)) {
+ netif_err(adapter, tx_err, adapter->netdev,
+ "The number of lost tx completions is above the threshold (%d > %d). Reset the device\n",
+ missed_tx,
+ adapter->missing_tx_completion_threshold);
+ adapter->reset_reason =
+ ENA_REGS_RESET_MISS_TX_CMPL;
+ set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ rc = -EIO;
+ }
+
+ u64_stats_update_begin(&tx_ring->syncp);
+ tx_ring->tx_stats.missed_tx = missed_tx;
+ u64_stats_update_end(&tx_ring->syncp);
+
+ return rc;
}
static void check_for_missing_tx_completions(struct ena_adapter *adapter)
@@ -3276,8 +3263,6 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_rss;
}
- INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend);
- INIT_WORK(&adapter->resume_io_task, ena_device_io_resume);
INIT_WORK(&adapter->reset_task, ena_fw_reset_device);
adapter->last_keep_alive_jiffies = jiffies;
@@ -3311,8 +3296,6 @@ err_free_msix:
err_worker_destroy:
ena_com_destroy_interrupt_moderation(ena_dev);
del_timer(&adapter->timer_service);
- cancel_work_sync(&adapter->suspend_io_task);
- cancel_work_sync(&adapter->resume_io_task);
err_netdev_destroy:
free_netdev(netdev);
err_device_destroy:
@@ -3382,10 +3365,6 @@ static void ena_remove(struct pci_dev *pdev)
cancel_work_sync(&adapter->reset_task);
- cancel_work_sync(&adapter->suspend_io_task);
-
- cancel_work_sync(&adapter->resume_io_task);
-
/* Reset the device only if the device is running. */
if (test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))
ena_com_dev_reset(ena_dev, adapter->reset_reason);
@@ -3419,11 +3398,59 @@ static void ena_remove(struct pci_dev *pdev)
vfree(ena_dev);
}
+#ifdef CONFIG_PM
+/* ena_suspend - PM suspend callback
+ * @pdev: PCI device information struct
+ * @state:power state
+ */
+static int ena_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ struct ena_adapter *adapter = pci_get_drvdata(pdev);
+
+ u64_stats_update_begin(&adapter->syncp);
+ adapter->dev_stats.suspend++;
+ u64_stats_update_end(&adapter->syncp);
+
+ rtnl_lock();
+ if (unlikely(test_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) {
+ dev_err(&pdev->dev,
+ "ignoring device reset request as the device is being suspended\n");
+ clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ }
+ ena_destroy_device(adapter);
+ rtnl_unlock();
+ return 0;
+}
+
+/* ena_resume - PM resume callback
+ * @pdev: PCI device information struct
+ *
+ */
+static int ena_resume(struct pci_dev *pdev)
+{
+ struct ena_adapter *adapter = pci_get_drvdata(pdev);
+ int rc;
+
+ u64_stats_update_begin(&adapter->syncp);
+ adapter->dev_stats.resume++;
+ u64_stats_update_end(&adapter->syncp);
+
+ rtnl_lock();
+ rc = ena_restore_device(adapter);
+ rtnl_unlock();
+ return rc;
+}
+#endif
+
static struct pci_driver ena_pci_driver = {
.name = DRV_MODULE_NAME,
.id_table = ena_pci_tbl,
.probe = ena_probe,
.remove = ena_remove,
+#ifdef CONFIG_PM
+ .suspend = ena_suspend,
+ .resume = ena_resume,
+#endif
.sriov_configure = ena_sriov_configure,
};
@@ -3504,16 +3531,6 @@ static void ena_notification(void *adapter_data,
ENA_ADMIN_NOTIFICATION);
switch (aenq_e->aenq_common_desc.syndrom) {
- case ENA_ADMIN_SUSPEND:
- /* Suspend just the IO queues.
- * We deliberately don't suspend admin so the timer and
- * the keep_alive events should remain.
- */
- queue_work(ena_wq, &adapter->suspend_io_task);
- break;
- case ENA_ADMIN_RESUME:
- queue_work(ena_wq, &adapter->resume_io_task);
- break;
case ENA_ADMIN_UPDATE_HINTS:
hints = (struct ena_admin_ena_hw_hints *)
(&aenq_e->inline_data_w4);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 29bb5704260b..ed8bd0a579c4 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -44,7 +44,7 @@
#include "ena_eth_com.h"
#define DRV_MODULE_VER_MAJOR 1
-#define DRV_MODULE_VER_MINOR 2
+#define DRV_MODULE_VER_MINOR 3
#define DRV_MODULE_VER_SUBMINOR 0
#define DRV_MODULE_NAME "ena"
@@ -52,7 +52,7 @@
#define DRV_MODULE_VERSION \
__stringify(DRV_MODULE_VER_MAJOR) "." \
__stringify(DRV_MODULE_VER_MINOR) "." \
- __stringify(DRV_MODULE_VER_SUBMINOR) "k"
+ __stringify(DRV_MODULE_VER_SUBMINOR) "K"
#endif
#define DEVICE_NAME "Elastic Network Adapter (ENA)"
@@ -185,6 +185,7 @@ struct ena_stats_tx {
u64 tx_poll;
u64 doorbells;
u64 bad_req_id;
+ u64 missed_tx;
};
struct ena_stats_rx {
@@ -257,8 +258,8 @@ struct ena_ring {
struct ena_stats_dev {
u64 tx_timeout;
- u64 io_suspend;
- u64 io_resume;
+ u64 suspend;
+ u64 resume;
u64 wd_expired;
u64 interface_up;
u64 interface_down;
@@ -326,11 +327,10 @@ struct ena_adapter {
/* timer service */
struct work_struct reset_task;
- struct work_struct suspend_io_task;
- struct work_struct resume_io_task;
struct timer_list timer_service;
bool wd_state;
+ bool dev_up_before_reset;
unsigned long last_keep_alive_jiffies;
struct u64_stats_sync syncp;
diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c
index e22f976a0d18..998d30e050a6 100644
--- a/drivers/net/ethernet/amd/a2065.c
+++ b/drivers/net/ethernet/amd/a2065.c
@@ -733,10 +733,9 @@ static int a2065_init_one(struct zorro_dev *z,
dev->watchdog_timeo = 5*HZ;
dev->dma = 0;
- init_timer(&priv->multicast_timer);
- priv->multicast_timer.data = (unsigned long) dev;
- priv->multicast_timer.function =
- (void (*)(unsigned long))lance_set_multicast;
+ setup_timer(&priv->multicast_timer,
+ (void(*)(unsigned long))lance_set_multicast,
+ (unsigned long)dev);
err = register_netdev(dev);
if (err) {
diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c
index b11e910850f7..0612dbee00d2 100644
--- a/drivers/net/ethernet/amd/am79c961a.c
+++ b/drivers/net/ethernet/amd/am79c961a.c
@@ -728,9 +728,7 @@ static int am79c961_probe(struct platform_device *pdev)
am79c961_banner();
spin_lock_init(&priv->chip_lock);
- init_timer(&priv->timer);
- priv->timer.data = (unsigned long)dev;
- priv->timer.function = am79c961_timer;
+ setup_timer(&priv->timer, am79c961_timer, (unsigned long)dev);
if (am79c961_hw_init(dev))
goto release;
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 7b5df562f30f..358f7ab77c70 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -1669,9 +1669,9 @@ static int amd8111e_resume(struct pci_dev *pci_dev)
return 0;
}
-static void amd8111e_config_ipg(struct net_device *dev)
+static void amd8111e_config_ipg(struct timer_list *t)
{
- struct amd8111e_priv *lp = netdev_priv(dev);
+ struct amd8111e_priv *lp = from_timer(lp, t, ipg_data.ipg_timer);
struct ipg_info *ipg_data = &lp->ipg_data;
void __iomem *mmio = lp->mmio;
unsigned int prev_col_cnt = ipg_data->col_cnt;
@@ -1883,9 +1883,7 @@ static int amd8111e_probe_one(struct pci_dev *pdev,
/* Initialize software ipg timer */
if(lp->options & OPTION_DYN_IPG_ENABLE){
- init_timer(&lp->ipg_data.ipg_timer);
- lp->ipg_data.ipg_timer.data = (unsigned long) dev;
- lp->ipg_data.ipg_timer.function = (void *)&amd8111e_config_ipg;
+ timer_setup(&lp->ipg_data.ipg_timer, amd8111e_config_ipg, 0);
lp->ipg_data.ipg_timer.expires = jiffies +
IPG_CONVERGE_JIFFIES;
lp->ipg_data.ipg = DEFAULT_IPG;
diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c
index 82cc81385033..9bdf81c2cd00 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c
@@ -1246,9 +1246,9 @@ static int dec_lance_probe(struct device *bdev, const int type)
* can occur from interrupts (ex. IPv6). So we
* use a timer to try again later when necessary. -DaveM
*/
- init_timer(&lp->multicast_timer);
- lp->multicast_timer.data = (unsigned long) dev;
- lp->multicast_timer.function = lance_set_multicast_retry;
+ setup_timer(&lp->multicast_timer, lance_set_multicast_retry,
+ (unsigned long)dev);
+
ret = register_netdev(dev);
if (ret) {
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index 7f60d17819ce..e46153654016 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -1970,9 +1970,8 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
lp->options |= PCNET32_PORT_MII;
}
- init_timer(&lp->watchdog_timer);
- lp->watchdog_timer.data = (unsigned long)dev;
- lp->watchdog_timer.function = (void *)&pcnet32_watchdog;
+ setup_timer(&lp->watchdog_timer, (void *)&pcnet32_watchdog,
+ (unsigned long)dev);
/* The PCNET32-specific entries in the device structure. */
dev->netdev_ops = &pcnet32_netdev_ops;
diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c
index 291ca5187f12..0183ffb9d3ba 100644
--- a/drivers/net/ethernet/amd/sunlance.c
+++ b/drivers/net/ethernet/amd/sunlance.c
@@ -1459,9 +1459,8 @@ no_link_test:
* can occur from interrupts (ex. IPv6). So we
* use a timer to try again later when necessary. -DaveM
*/
- init_timer(&lp->multicast_timer);
- lp->multicast_timer.data = (unsigned long) dev;
- lp->multicast_timer.function = lance_set_multicast_retry;
+ setup_timer(&lp->multicast_timer, lance_set_multicast_retry,
+ (unsigned long)dev);
if (register_netdev(dev)) {
printk(KERN_ERR "SunLance: Cannot register device.\n");
diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index eac740c476ce..5a655d289dd5 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -157,7 +157,7 @@ static irqreturn_t bmac_misc_intr(int irq, void *dev_id);
static irqreturn_t bmac_txdma_intr(int irq, void *dev_id);
static irqreturn_t bmac_rxdma_intr(int irq, void *dev_id);
static void bmac_set_timeout(struct net_device *dev);
-static void bmac_tx_timeout(unsigned long data);
+static void bmac_tx_timeout(struct timer_list *t);
static int bmac_output(struct sk_buff *skb, struct net_device *dev);
static void bmac_start(struct net_device *dev);
@@ -555,8 +555,6 @@ static inline void bmac_set_timeout(struct net_device *dev)
if (bp->timeout_active)
del_timer(&bp->tx_timeout);
bp->tx_timeout.expires = jiffies + TX_TIMEOUT;
- bp->tx_timeout.function = bmac_tx_timeout;
- bp->tx_timeout.data = (unsigned long) dev;
add_timer(&bp->tx_timeout);
bp->timeout_active = 1;
spin_unlock_irqrestore(&bp->lock, flags);
@@ -1321,7 +1319,7 @@ static int bmac_probe(struct macio_dev *mdev, const struct of_device_id *match)
bp->queue = (struct sk_buff_head *)(bp->rx_cmds + N_RX_RING + 1);
skb_queue_head_init(bp->queue);
- init_timer(&bp->tx_timeout);
+ timer_setup(&bp->tx_timeout, bmac_tx_timeout, 0);
ret = request_irq(dev->irq, bmac_misc_intr, 0, "BMAC-misc", dev);
if (ret) {
@@ -1471,10 +1469,10 @@ bmac_output(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
-static void bmac_tx_timeout(unsigned long data)
+static void bmac_tx_timeout(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *) data;
- struct bmac_data *bp = netdev_priv(dev);
+ struct bmac_data *bp = from_timer(bp, t, tx_timeout);
+ struct net_device *dev = macio_get_drvdata(bp->mdev);
volatile struct dbdma_regs __iomem *td = bp->tx_dma;
volatile struct dbdma_regs __iomem *rd = bp->rx_dma;
volatile struct dbdma_cmd *cp;
diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c
index e58b157b7d7c..0b5429d76bcf 100644
--- a/drivers/net/ethernet/apple/mace.c
+++ b/drivers/net/ethernet/apple/mace.c
@@ -86,7 +86,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id);
static irqreturn_t mace_txdma_intr(int irq, void *dev_id);
static irqreturn_t mace_rxdma_intr(int irq, void *dev_id);
static void mace_set_timeout(struct net_device *dev);
-static void mace_tx_timeout(unsigned long data);
+static void mace_tx_timeout(struct timer_list *t);
static inline void dbdma_reset(volatile struct dbdma_regs __iomem *dma);
static inline void mace_clean_rings(struct mace_data *mp);
static void __mace_set_address(struct net_device *dev, void *addr);
@@ -196,7 +196,7 @@ static int mace_probe(struct macio_dev *mdev, const struct of_device_id *match)
memset((char *) mp->tx_cmds, 0,
(NCMDS_TX*N_TX_RING + N_RX_RING + 2) * sizeof(struct dbdma_cmd));
- init_timer(&mp->tx_timeout);
+ timer_setup(&mp->tx_timeout, mace_tx_timeout, 0);
spin_lock_init(&mp->lock);
mp->timeout_active = 0;
@@ -521,8 +521,6 @@ static inline void mace_set_timeout(struct net_device *dev)
if (mp->timeout_active)
del_timer(&mp->tx_timeout);
mp->tx_timeout.expires = jiffies + TX_TIMEOUT;
- mp->tx_timeout.function = mace_tx_timeout;
- mp->tx_timeout.data = (unsigned long) dev;
add_timer(&mp->tx_timeout);
mp->timeout_active = 1;
}
@@ -801,10 +799,10 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void mace_tx_timeout(unsigned long data)
+static void mace_tx_timeout(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *) data;
- struct mace_data *mp = netdev_priv(dev);
+ struct mace_data *mp = from_timer(mp, t, tx_timeout);
+ struct net_device *dev = macio_get_drvdata(mp->mdev);
volatile struct mace __iomem *mb = mp->mace;
volatile struct dbdma_regs __iomem *td = mp->tx_dma;
volatile struct dbdma_regs __iomem *rd = mp->rx_dma;
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 67134ece1107..af75156919ed 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -184,6 +184,7 @@ config BGMAC_PLATFORM
config SYSTEMPORT
tristate "Broadcom SYSTEMPORT internal MAC support"
depends on OF
+ depends on NET_DSA || !NET_DSA
select MII
select PHYLIB
select FIXED_PHY
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index a1125d10c825..42e44fc03a18 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -1474,10 +1474,8 @@ static int b44_open(struct net_device *dev)
goto out;
}
- init_timer(&bp->timer);
+ setup_timer(&bp->timer, b44_timer, (unsigned long)bp);
bp->timer.expires = jiffies + HZ;
- bp->timer.data = (unsigned long) bp;
- bp->timer.function = b44_timer;
add_timer(&bp->timer);
b44_enable_ints(bp);
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 4f3845a58126..d9346e2ac720 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -295,16 +295,13 @@ static int bcm_enet_refill_rx(struct net_device *dev)
/*
* timer callback to defer refill rx queue in case we're OOM
*/
-static void bcm_enet_refill_rx_timer(unsigned long data)
+static void bcm_enet_refill_rx_timer(struct timer_list *t)
{
- struct net_device *dev;
- struct bcm_enet_priv *priv;
-
- dev = (struct net_device *)data;
- priv = netdev_priv(dev);
+ struct bcm_enet_priv *priv = from_timer(priv, t, rx_timeout);
+ struct net_device *dev = priv->net_dev;
spin_lock(&priv->rx_lock);
- bcm_enet_refill_rx((struct net_device *)data);
+ bcm_enet_refill_rx(dev);
spin_unlock(&priv->rx_lock);
}
@@ -1062,7 +1059,8 @@ static int bcm_enet_open(struct net_device *dev)
val = enet_readl(priv, ENET_CTL_REG);
val |= ENET_CTL_ENABLE_MASK;
enet_writel(priv, val, ENET_CTL_REG);
- enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG);
+ if (priv->dma_has_sram)
+ enet_dma_writel(priv, ENETDMA_CFG_EN_MASK, ENETDMA_CFG_REG);
enet_dmac_writel(priv, priv->dma_chan_en_mask,
ENETDMAC_CHANCFG, priv->rx_chan);
@@ -1721,10 +1719,8 @@ static int bcm_enet_probe(struct platform_device *pdev)
const char *clk_name;
int i, ret;
- /* stop if shared driver failed, assume driver->probe will be
- * called in the same order we register devices (correct ?) */
if (!bcm_enet_shared_base[0])
- return -ENODEV;
+ return -EPROBE_DEFER;
res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
res_irq_rx = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
@@ -1768,12 +1764,14 @@ static int bcm_enet_probe(struct platform_device *pdev)
clk_name = "enet1";
}
- priv->mac_clk = clk_get(&pdev->dev, clk_name);
+ priv->mac_clk = devm_clk_get(&pdev->dev, clk_name);
if (IS_ERR(priv->mac_clk)) {
ret = PTR_ERR(priv->mac_clk);
goto out;
}
- clk_prepare_enable(priv->mac_clk);
+ ret = clk_prepare_enable(priv->mac_clk);
+ if (ret)
+ goto out;
/* initialize default and fetch platform data */
priv->rx_ring_size = BCMENET_DEF_RX_DESC;
@@ -1801,13 +1799,15 @@ static int bcm_enet_probe(struct platform_device *pdev)
if (priv->mac_id == 0 && priv->has_phy && !priv->use_external_mii) {
/* using internal PHY, enable clock */
- priv->phy_clk = clk_get(&pdev->dev, "ephy");
+ priv->phy_clk = devm_clk_get(&pdev->dev, "ephy");
if (IS_ERR(priv->phy_clk)) {
ret = PTR_ERR(priv->phy_clk);
priv->phy_clk = NULL;
- goto out_put_clk_mac;
+ goto out_disable_clk_mac;
}
- clk_prepare_enable(priv->phy_clk);
+ ret = clk_prepare_enable(priv->phy_clk);
+ if (ret)
+ goto out_disable_clk_mac;
}
/* do minimal hardware init to be able to probe mii bus */
@@ -1857,9 +1857,7 @@ static int bcm_enet_probe(struct platform_device *pdev)
spin_lock_init(&priv->rx_lock);
/* init rx timeout (used for oom) */
- init_timer(&priv->rx_timeout);
- priv->rx_timeout.function = bcm_enet_refill_rx_timer;
- priv->rx_timeout.data = (unsigned long)dev;
+ timer_setup(&priv->rx_timeout, bcm_enet_refill_rx_timer, 0);
/* init the mib update lock&work */
mutex_init(&priv->mib_update_lock);
@@ -1901,14 +1899,10 @@ out_free_mdio:
out_uninit_hw:
/* turn off mdc clock */
enet_writel(priv, 0, ENET_MIISC_REG);
- if (priv->phy_clk) {
- clk_disable_unprepare(priv->phy_clk);
- clk_put(priv->phy_clk);
- }
+ clk_disable_unprepare(priv->phy_clk);
-out_put_clk_mac:
+out_disable_clk_mac:
clk_disable_unprepare(priv->mac_clk);
- clk_put(priv->mac_clk);
out:
free_netdev(dev);
return ret;
@@ -1944,12 +1938,8 @@ static int bcm_enet_remove(struct platform_device *pdev)
}
/* disable hw block clocks */
- if (priv->phy_clk) {
- clk_disable_unprepare(priv->phy_clk);
- clk_put(priv->phy_clk);
- }
+ clk_disable_unprepare(priv->phy_clk);
clk_disable_unprepare(priv->mac_clk);
- clk_put(priv->mac_clk);
free_netdev(dev);
return 0;
@@ -2021,9 +2011,9 @@ static inline int bcm_enet_port_is_rgmii(int portid)
/*
* enet sw PHY polling
*/
-static void swphy_poll_timer(unsigned long data)
+static void swphy_poll_timer(struct timer_list *t)
{
- struct bcm_enet_priv *priv = (struct bcm_enet_priv *)data;
+ struct bcm_enet_priv *priv = from_timer(priv, t, swphy_poll);
unsigned int i;
for (i = 0; i < priv->num_ports; i++) {
@@ -2332,11 +2322,8 @@ static int bcm_enetsw_open(struct net_device *dev)
}
/* start phy polling timer */
- init_timer(&priv->swphy_poll);
- priv->swphy_poll.function = swphy_poll_timer;
- priv->swphy_poll.data = (unsigned long)priv;
- priv->swphy_poll.expires = jiffies;
- add_timer(&priv->swphy_poll);
+ timer_setup(&priv->swphy_poll, swphy_poll_timer, 0);
+ mod_timer(&priv->swphy_poll, jiffies);
return 0;
out:
@@ -2692,11 +2679,8 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
struct resource *res_mem;
int ret, irq_rx, irq_tx;
- /* stop if shared driver failed, assume driver->probe will be
- * called in the same order we register devices (correct ?)
- */
if (!bcm_enet_shared_base[0])
- return -ENODEV;
+ return -EPROBE_DEFER;
res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
irq_rx = platform_get_irq(pdev, 0);
@@ -2735,33 +2719,27 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
if (ret)
goto out;
- if (!request_mem_region(res_mem->start, resource_size(res_mem),
- "bcm63xx_enetsw")) {
- ret = -EBUSY;
+ priv->base = devm_ioremap_resource(&pdev->dev, res_mem);
+ if (IS_ERR(priv->base)) {
+ ret = PTR_ERR(priv->base);
goto out;
}
- priv->base = ioremap(res_mem->start, resource_size(res_mem));
- if (priv->base == NULL) {
- ret = -ENOMEM;
- goto out_release_mem;
- }
-
- priv->mac_clk = clk_get(&pdev->dev, "enetsw");
+ priv->mac_clk = devm_clk_get(&pdev->dev, "enetsw");
if (IS_ERR(priv->mac_clk)) {
ret = PTR_ERR(priv->mac_clk);
- goto out_unmap;
+ goto out;
}
- clk_enable(priv->mac_clk);
+ ret = clk_prepare_enable(priv->mac_clk);
+ if (ret)
+ goto out;
priv->rx_chan = 0;
priv->tx_chan = 1;
spin_lock_init(&priv->rx_lock);
/* init rx timeout (used for oom) */
- init_timer(&priv->rx_timeout);
- priv->rx_timeout.function = bcm_enet_refill_rx_timer;
- priv->rx_timeout.data = (unsigned long)dev;
+ timer_setup(&priv->rx_timeout, bcm_enet_refill_rx_timer, 0);
/* register netdevice */
dev->netdev_ops = &bcm_enetsw_ops;
@@ -2773,7 +2751,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
ret = register_netdev(dev);
if (ret)
- goto out_put_clk;
+ goto out_disable_clk;
netif_carrier_off(dev);
platform_set_drvdata(pdev, dev);
@@ -2782,14 +2760,8 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
return 0;
-out_put_clk:
- clk_put(priv->mac_clk);
-
-out_unmap:
- iounmap(priv->base);
-
-out_release_mem:
- release_mem_region(res_mem->start, resource_size(res_mem));
+out_disable_clk:
+ clk_disable_unprepare(priv->mac_clk);
out:
free_netdev(dev);
return ret;
@@ -2801,17 +2773,13 @@ static int bcm_enetsw_remove(struct platform_device *pdev)
{
struct bcm_enet_priv *priv;
struct net_device *dev;
- struct resource *res;
/* stop netdevice */
dev = platform_get_drvdata(pdev);
priv = netdev_priv(dev);
unregister_netdev(dev);
- /* release device resources */
- iounmap(priv->base);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- release_mem_region(res->start, resource_size(res));
+ clk_disable_unprepare(priv->mac_clk);
free_netdev(dev);
return 0;
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.h b/drivers/net/ethernet/broadcom/bcm63xx_enet.h
index 0a1b7b2e55bd..dd6ae3077433 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.h
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.h
@@ -8,7 +8,6 @@
#include <linux/platform_device.h>
#include <bcm63xx_regs.h>
-#include <bcm63xx_irq.h>
#include <bcm63xx_io.h>
#include <bcm63xx_iudma.h>
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 83eec9a8c275..dafc26690555 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1416,9 +1416,20 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
tdma_writel(priv, 0, TDMA_DESC_RING_COUNT(index));
tdma_writel(priv, 1, TDMA_DESC_RING_INTR_CONTROL(index));
tdma_writel(priv, 0, TDMA_DESC_RING_PROD_CONS_INDEX(index));
- tdma_writel(priv, RING_IGNORE_STATUS, TDMA_DESC_RING_MAPPING(index));
+
+ /* Configure QID and port mapping */
+ reg = tdma_readl(priv, TDMA_DESC_RING_MAPPING(index));
+ reg &= ~(RING_QID_MASK | RING_PORT_ID_MASK << RING_PORT_ID_SHIFT);
+ reg |= ring->switch_queue & RING_QID_MASK;
+ reg |= ring->switch_port << RING_PORT_ID_SHIFT;
+ tdma_writel(priv, reg, TDMA_DESC_RING_MAPPING(index));
tdma_writel(priv, 0, TDMA_DESC_RING_PCP_DEI_VID(index));
+ /* Enable ACB algorithm 2 */
+ reg = tdma_readl(priv, TDMA_CONTROL);
+ reg |= tdma_control_bit(priv, ACB_ALGO);
+ tdma_writel(priv, reg, TDMA_CONTROL);
+
/* Do not use tdma_control_bit() here because TSB_SWAP1 collides
* with the original definition of ACB_ALGO
*/
@@ -1447,8 +1458,9 @@ static int bcm_sysport_init_tx_ring(struct bcm_sysport_priv *priv,
napi_enable(&ring->napi);
netif_dbg(priv, hw, priv->netdev,
- "TDMA cfg, size=%d, desc_cpu=%p\n",
- ring->size, ring->desc_cpu);
+ "TDMA cfg, size=%d, desc_cpu=%p switch q=%d,port=%d\n",
+ ring->size, ring->desc_cpu, ring->switch_queue,
+ ring->switch_port);
return 0;
}
@@ -2011,6 +2023,92 @@ static const struct ethtool_ops bcm_sysport_ethtool_ops = {
.set_link_ksettings = phy_ethtool_set_link_ksettings,
};
+static u16 bcm_sysport_select_queue(struct net_device *dev, struct sk_buff *skb,
+ void *accel_priv,
+ select_queue_fallback_t fallback)
+{
+ struct bcm_sysport_priv *priv = netdev_priv(dev);
+ u16 queue = skb_get_queue_mapping(skb);
+ struct bcm_sysport_tx_ring *tx_ring;
+ unsigned int q, port;
+
+ if (!netdev_uses_dsa(dev))
+ return fallback(dev, skb);
+
+ /* DSA tagging layer will have configured the correct queue */
+ q = BRCM_TAG_GET_QUEUE(queue);
+ port = BRCM_TAG_GET_PORT(queue);
+ tx_ring = priv->ring_map[q + port * priv->per_port_num_tx_queues];
+
+ return tx_ring->index;
+}
+
+static int bcm_sysport_map_queues(struct net_device *dev,
+ struct dsa_notifier_register_info *info)
+{
+ struct bcm_sysport_priv *priv = netdev_priv(dev);
+ struct bcm_sysport_tx_ring *ring;
+ struct net_device *slave_dev;
+ unsigned int num_tx_queues;
+ unsigned int q, start, port;
+
+ /* We can't be setting up queue inspection for non directly attached
+ * switches
+ */
+ if (info->switch_number)
+ return 0;
+
+ port = info->port_number;
+ slave_dev = info->info.dev;
+
+ /* On SYSTEMPORT Lite we have twice as less queues, so we cannot do a
+ * 1:1 mapping, we can only do a 2:1 mapping. By reducing the number of
+ * per-port (slave_dev) network devices queue, we achieve just that.
+ * This need to happen now before any slave network device is used such
+ * it accurately reflects the number of real TX queues.
+ */
+ if (priv->is_lite)
+ netif_set_real_num_tx_queues(slave_dev,
+ slave_dev->num_tx_queues / 2);
+ num_tx_queues = slave_dev->real_num_tx_queues;
+
+ if (priv->per_port_num_tx_queues &&
+ priv->per_port_num_tx_queues != num_tx_queues)
+ netdev_warn(slave_dev, "asymetric number of per-port queues\n");
+
+ priv->per_port_num_tx_queues = num_tx_queues;
+
+ start = find_first_zero_bit(&priv->queue_bitmap, dev->num_tx_queues);
+ for (q = 0; q < num_tx_queues; q++) {
+ ring = &priv->tx_rings[q + start];
+
+ /* Just remember the mapping actual programming done
+ * during bcm_sysport_init_tx_ring
+ */
+ ring->switch_queue = q;
+ ring->switch_port = port;
+ priv->ring_map[q + port * num_tx_queues] = ring;
+
+ /* Set all queues as being used now */
+ set_bit(q + start, &priv->queue_bitmap);
+ }
+
+ return 0;
+}
+
+static int bcm_sysport_dsa_notifier(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct dsa_notifier_register_info *info;
+
+ if (event != DSA_PORT_REGISTER)
+ return NOTIFY_DONE;
+
+ info = ptr;
+
+ return notifier_from_errno(bcm_sysport_map_queues(info->master, info));
+}
+
static const struct net_device_ops bcm_sysport_netdev_ops = {
.ndo_start_xmit = bcm_sysport_xmit,
.ndo_tx_timeout = bcm_sysport_tx_timeout,
@@ -2023,6 +2121,7 @@ static const struct net_device_ops bcm_sysport_netdev_ops = {
.ndo_poll_controller = bcm_sysport_poll_controller,
#endif
.ndo_get_stats64 = bcm_sysport_get_stats64,
+ .ndo_select_queue = bcm_sysport_select_queue,
};
#define REV_FMT "v%2x.%02x"
@@ -2172,10 +2271,18 @@ static int bcm_sysport_probe(struct platform_device *pdev)
u64_stats_init(&priv->syncp);
+ priv->dsa_notifier.notifier_call = bcm_sysport_dsa_notifier;
+
+ ret = register_dsa_notifier(&priv->dsa_notifier);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register DSA notifier\n");
+ goto err_deregister_fixed_link;
+ }
+
ret = register_netdev(dev);
if (ret) {
dev_err(&pdev->dev, "failed to register net_device\n");
- goto err_deregister_fixed_link;
+ goto err_deregister_notifier;
}
priv->rev = topctrl_readl(priv, REV_CNTL) & REV_MASK;
@@ -2188,6 +2295,8 @@ static int bcm_sysport_probe(struct platform_device *pdev)
return 0;
+err_deregister_notifier:
+ unregister_dsa_notifier(&priv->dsa_notifier);
err_deregister_fixed_link:
if (of_phy_is_fixed_link(dn))
of_phy_deregister_fixed_link(dn);
@@ -2199,11 +2308,13 @@ err_free_netdev:
static int bcm_sysport_remove(struct platform_device *pdev)
{
struct net_device *dev = dev_get_drvdata(&pdev->dev);
+ struct bcm_sysport_priv *priv = netdev_priv(dev);
struct device_node *dn = pdev->dev.of_node;
/* Not much to do, ndo_close has been called
* and we use managed allocations
*/
+ unregister_dsa_notifier(&priv->dsa_notifier);
unregister_netdev(dev);
if (of_phy_is_fixed_link(dn))
of_phy_deregister_fixed_link(dn);
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h
index 82e401df199e..82f70a6783cb 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.h
+++ b/drivers/net/ethernet/broadcom/bcmsysport.h
@@ -404,7 +404,7 @@ struct bcm_rsb {
#define RING_CONS_INDEX_MASK 0xffff
#define RING_MAPPING 0x14
-#define RING_QID_MASK 0x3
+#define RING_QID_MASK 0x7
#define RING_PORT_ID_SHIFT 3
#define RING_PORT_ID_MASK 0x7
#define RING_IGNORE_STATUS (1 << 6)
@@ -712,6 +712,8 @@ struct bcm_sysport_tx_ring {
struct bcm_sysport_priv *priv; /* private context backpointer */
unsigned long packets; /* packets statistics */
unsigned long bytes; /* bytes statistics */
+ unsigned int switch_queue; /* switch port queue number */
+ unsigned int switch_port; /* switch port queue number */
};
/* Driver private structure */
@@ -765,5 +767,12 @@ struct bcm_sysport_priv {
/* For atomic update generic 64bit value on 32bit Machine */
struct u64_stats_sync syncp;
+
+ /* map information between switch port queues and local queues */
+ struct notifier_block dsa_notifier;
+ unsigned int per_port_num_tx_queues;
+ unsigned long queue_bitmap;
+ struct bcm_sysport_tx_ring *ring_map[DSA_MAX_PORTS * 8];
+
};
#endif /* __BCM_SYSPORT_H */
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index 6322594ab260..6fe074c1588b 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -184,13 +184,19 @@ static int bgmac_probe(struct bcma_device *core)
if (!bgmac_is_bcm4707_family(core) &&
!(ci->id == BCMA_CHIP_ID_BCM53573 && core->core_unit == 1)) {
+ struct phy_device *phydev;
+
mii_bus = bcma_mdio_mii_register(bgmac);
if (IS_ERR(mii_bus)) {
err = PTR_ERR(mii_bus);
goto err;
}
-
bgmac->mii_bus = mii_bus;
+
+ phydev = mdiobus_get_phy(bgmac->mii_bus, bgmac->phyaddr);
+ if (ci->id == BCMA_CHIP_ID_BCM53573 && phydev &&
+ (phydev->drv->phy_id & phydev->drv->phy_id_mask) == PHY_ID_BCM54210E)
+ phydev->dev_flags |= PHY_BRCM_EN_MASTER_MODE;
}
if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) {
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index e3af1f3cb61f..b3055a76dfbf 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -8462,10 +8462,8 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
bnx2_set_default_link(bp);
bp->req_flow_ctrl = FLOW_CTRL_RX | FLOW_CTRL_TX;
- init_timer(&bp->timer);
+ setup_timer(&bp->timer, bnx2_timer, (unsigned long)bp);
bp->timer.expires = RUN_AT(BNX2_TIMER_INTERVAL);
- bp->timer.data = (unsigned long) bp;
- bp->timer.function = bnx2_timer;
#ifdef BCM_CNIC
if (bnx2_shmem_rd(bp, BNX2_ISCSI_INITIATOR) & BNX2_ISCSI_INITIATOR_EN)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index c12b4d3e946e..54d1571384a0 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12414,10 +12414,8 @@ static int bnx2x_init_bp(struct bnx2x *bp)
bp->current_interval = CHIP_REV_IS_SLOW(bp) ? 5*HZ : HZ;
- init_timer(&bp->timer);
+ setup_timer(&bp->timer, bnx2x_timer, (unsigned long)bp);
bp->timer.expires = jiffies + bp->current_interval;
- bp->timer.data = (unsigned long) bp;
- bp->timer.function = bnx2x_timer;
if (SHMEM2_HAS(bp, dcbx_lldp_params_offset) &&
SHMEM2_HAS(bp, dcbx_lldp_dcbx_stat_offset) &&
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 9ca994d0bab6..3591077a5f6b 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -1074,11 +1074,6 @@ static void bnx2x_vf_set_bars(struct bnx2x *bp, struct bnx2x_virtf *vf)
}
}
-static int bnx2x_ari_enabled(struct pci_dev *dev)
-{
- return dev->bus->self && dev->bus->self->ari_enabled;
-}
-
static int
bnx2x_get_vf_igu_cam_info(struct bnx2x *bp)
{
@@ -1212,7 +1207,7 @@ int bnx2x_iov_init_one(struct bnx2x *bp, int int_mode_param,
err = -EIO;
/* verify ari is enabled */
- if (!bnx2x_ari_enabled(bp->pdev)) {
+ if (!pci_ari_enabled(bp->pdev->bus)) {
BNX2X_ERR("ARI not supported (check pci bridge ARI forwarding), SRIOV can not be enabled\n");
return 0;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
index 4f0cb8e1ffc0..59c8ec9c1cad 100644
--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
+++ b/drivers/net/ethernet/broadcom/bnxt/Makefile
@@ -1,3 +1,4 @@
obj-$(CONFIG_BNXT) += bnxt_en.o
-bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_tc.o
+bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o bnxt_xdp.o bnxt_vfr.o bnxt_devlink.o
+bnxt_en-$(CONFIG_BNXT_FLOWER_OFFLOAD) += bnxt_tc.o
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index dc5de275352a..24d55724ceff 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -61,6 +61,7 @@
#include "bnxt_xdp.h"
#include "bnxt_vfr.h"
#include "bnxt_tc.h"
+#include "bnxt_devlink.h"
#define BNXT_TX_TIMEOUT (5 * HZ)
@@ -1509,7 +1510,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
(struct rx_tpa_end_cmp *)rxcmp,
(struct rx_tpa_end_cmp_ext *)rxcmp1, event);
- if (unlikely(IS_ERR(skb)))
+ if (IS_ERR(skb))
return -EBUSY;
rc = -ENOMEM;
@@ -7214,9 +7215,7 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
- init_timer(&bp->timer);
- bp->timer.data = (unsigned long)bp;
- bp->timer.function = bnxt_timer;
+ setup_timer(&bp->timer, bnxt_timer, (unsigned long)bp);
bp->current_interval = BNXT_TIMER_INTERVAL;
clear_bit(BNXT_STATE_OPEN, &bp->state);
@@ -7321,23 +7320,48 @@ int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
return 0;
}
-static int bnxt_setup_flower(struct net_device *dev,
- struct tc_cls_flower_offload *cls_flower)
+static int bnxt_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
{
- struct bnxt *bp = netdev_priv(dev);
+ struct bnxt *bp = cb_priv;
if (BNXT_VF(bp))
return -EOPNOTSUPP;
- return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, cls_flower);
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int bnxt_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ struct bnxt *bp = netdev_priv(dev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, bnxt_setup_tc_block_cb,
+ bp, bp);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, bnxt_setup_tc_block_cb, bp);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
}
static int bnxt_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
switch (type) {
- case TC_SETUP_CLSFLOWER:
- return bnxt_setup_flower(dev, type_data);
+ case TC_SETUP_BLOCK:
+ return bnxt_setup_tc_block(dev, type_data);
case TC_SETUP_MQPRIO: {
struct tc_mqprio_qopt *mqprio = type_data;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
new file mode 100644
index 000000000000..f3f6aa868d6c
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -0,0 +1,65 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+#include "bnxt_vfr.h"
+#include "bnxt_devlink.h"
+
+static const struct devlink_ops bnxt_dl_ops = {
+#ifdef CONFIG_BNXT_SRIOV
+ .eswitch_mode_set = bnxt_dl_eswitch_mode_set,
+ .eswitch_mode_get = bnxt_dl_eswitch_mode_get,
+#endif /* CONFIG_BNXT_SRIOV */
+};
+
+int bnxt_dl_register(struct bnxt *bp)
+{
+ struct devlink *dl;
+ int rc;
+
+ if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV))
+ return 0;
+
+ if (bp->hwrm_spec_code < 0x10800) {
+ netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n");
+ return -ENOTSUPP;
+ }
+
+ dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
+ if (!dl) {
+ netdev_warn(bp->dev, "devlink_alloc failed");
+ return -ENOMEM;
+ }
+
+ bnxt_link_bp_to_dl(bp, dl);
+ bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
+ rc = devlink_register(dl, &bp->pdev->dev);
+ if (rc) {
+ bnxt_link_bp_to_dl(bp, NULL);
+ devlink_free(dl);
+ netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+void bnxt_dl_unregister(struct bnxt *bp)
+{
+ struct devlink *dl = bp->dl;
+
+ if (!dl)
+ return;
+
+ devlink_unregister(dl);
+ devlink_free(dl);
+}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
new file mode 100644
index 000000000000..e92a35d8b642
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
@@ -0,0 +1,39 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2017 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef BNXT_DEVLINK_H
+#define BNXT_DEVLINK_H
+
+/* Struct to hold housekeeping info needed by devlink interface */
+struct bnxt_dl {
+ struct bnxt *bp; /* back ptr to the controlling dev */
+};
+
+static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl)
+{
+ return ((struct bnxt_dl *)devlink_priv(dl))->bp;
+}
+
+/* To clear devlink pointer from bp, pass NULL dl */
+static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl)
+{
+ bp->dl = dl;
+
+ /* add a back pointer in dl to bp */
+ if (dl) {
+ struct bnxt_dl *bp_dl = devlink_priv(dl);
+
+ bp_dl->bp = bp;
+ }
+}
+
+int bnxt_dl_register(struct bnxt *bp);
+void bnxt_dl_unregister(struct bnxt *bp);
+
+#endif /* BNXT_DEVLINK_H */
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 7dd3d131043a..a9cb653b4d29 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -23,8 +23,6 @@
#include "bnxt_tc.h"
#include "bnxt_vfr.h"
-#ifdef CONFIG_BNXT_FLOWER_OFFLOAD
-
#define BNXT_FID_INVALID 0xffff
#define VLAN_TCI(vid, prio) ((vid) | ((prio) << VLAN_PRIO_SHIFT))
@@ -750,8 +748,7 @@ int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
{
int rc = 0;
- if (!is_classid_clsact_ingress(cls_flower->common.classid) ||
- cls_flower->common.chain_index)
+ if (cls_flower->common.chain_index)
return -EOPNOTSUPP;
switch (cls_flower->command) {
@@ -833,6 +830,3 @@ void bnxt_shutdown_tc(struct bnxt *bp)
rhashtable_destroy(&tc_info->flow_table);
rhashtable_destroy(&tc_info->l2_table);
}
-
-#else
-#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
index e75db04c6cdc..4ae935999ebe 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
@@ -16,6 +16,7 @@
#include "bnxt_hsi.h"
#include "bnxt.h"
#include "bnxt_vfr.h"
+#include "bnxt_devlink.h"
#include "bnxt_tc.h"
#ifdef CONFIG_BNXT_SRIOV
@@ -115,10 +116,11 @@ bnxt_vf_rep_get_stats64(struct net_device *dev,
stats->tx_bytes = vf_rep->tx_stats.bytes;
}
-static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
- void *type_data)
+static int bnxt_vf_rep_setup_tc_block_cb(enum tc_setup_type type,
+ void *type_data,
+ void *cb_priv)
{
- struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+ struct bnxt_vf_rep *vf_rep = cb_priv;
struct bnxt *bp = vf_rep->bp;
int vf_fid = bp->pf.vf[vf_rep->vf_idx].fw_fid;
@@ -130,6 +132,40 @@ static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
}
}
+static int bnxt_vf_rep_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block,
+ bnxt_vf_rep_setup_tc_block_cb,
+ vf_rep, vf_rep);
+ return 0;
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block,
+ bnxt_vf_rep_setup_tc_block_cb, vf_rep);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int bnxt_vf_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return bnxt_vf_rep_setup_tc_block(dev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code)
{
u16 vf_idx;
@@ -416,7 +452,7 @@ err:
}
/* Devlink related routines */
-static int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode)
+int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode)
{
struct bnxt *bp = bnxt_get_bp_from_dl(devlink);
@@ -424,7 +460,7 @@ static int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode)
return 0;
}
-static int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode)
+int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode)
{
struct bnxt *bp = bnxt_get_bp_from_dl(devlink);
int rc = 0;
@@ -462,52 +498,4 @@ done:
return rc;
}
-static const struct devlink_ops bnxt_dl_ops = {
- .eswitch_mode_set = bnxt_dl_eswitch_mode_set,
- .eswitch_mode_get = bnxt_dl_eswitch_mode_get
-};
-
-int bnxt_dl_register(struct bnxt *bp)
-{
- struct devlink *dl;
- int rc;
-
- if (!pci_find_ext_capability(bp->pdev, PCI_EXT_CAP_ID_SRIOV))
- return 0;
-
- if (bp->hwrm_spec_code < 0x10800) {
- netdev_warn(bp->dev, "Firmware does not support SR-IOV E-Switch SWITCHDEV mode.\n");
- return -ENOTSUPP;
- }
-
- dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
- if (!dl) {
- netdev_warn(bp->dev, "devlink_alloc failed");
- return -ENOMEM;
- }
-
- bnxt_link_bp_to_dl(bp, dl);
- bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
- rc = devlink_register(dl, &bp->pdev->dev);
- if (rc) {
- bnxt_link_bp_to_dl(bp, NULL);
- devlink_free(dl);
- netdev_warn(bp->dev, "devlink_register failed. rc=%d", rc);
- return rc;
- }
-
- return 0;
-}
-
-void bnxt_dl_unregister(struct bnxt *bp)
-{
- struct devlink *dl = bp->dl;
-
- if (!dl)
- return;
-
- devlink_unregister(dl);
- devlink_free(dl);
-}
-
#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
index 7787cd24606a..fb06bbe70e42 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h
@@ -14,31 +14,6 @@
#define MAX_CFA_CODE 65536
-/* Struct to hold housekeeping info needed by devlink interface */
-struct bnxt_dl {
- struct bnxt *bp; /* back ptr to the controlling dev */
-};
-
-static inline struct bnxt *bnxt_get_bp_from_dl(struct devlink *dl)
-{
- return ((struct bnxt_dl *)devlink_priv(dl))->bp;
-}
-
-/* To clear devlink pointer from bp, pass NULL dl */
-static inline void bnxt_link_bp_to_dl(struct bnxt *bp, struct devlink *dl)
-{
- bp->dl = dl;
-
- /* add a back pointer in dl to bp */
- if (dl) {
- struct bnxt_dl *bp_dl = devlink_priv(dl);
-
- bp_dl->bp = bp;
- }
-}
-
-int bnxt_dl_register(struct bnxt *bp);
-void bnxt_dl_unregister(struct bnxt *bp);
void bnxt_vf_reps_destroy(struct bnxt *bp);
void bnxt_vf_reps_close(struct bnxt *bp);
void bnxt_vf_reps_open(struct bnxt *bp);
@@ -53,16 +28,10 @@ static inline u16 bnxt_vf_rep_get_fid(struct net_device *dev)
return bp->pf.vf[vf_rep->vf_idx].fw_fid;
}
-#else
-
-static inline int bnxt_dl_register(struct bnxt *bp)
-{
- return 0;
-}
+int bnxt_dl_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+int bnxt_dl_eswitch_mode_set(struct devlink *devlink, u16 mode);
-static inline void bnxt_dl_unregister(struct bnxt *bp)
-{
-}
+#else
static inline void bnxt_vf_reps_close(struct bnxt *bp)
{
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index d8f0c837b72c..06ce63c00821 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -94,6 +94,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
xdp.data_hard_start = *data_ptr - offset;
xdp.data = *data_ptr;
+ xdp_set_data_meta_invalid(&xdp);
xdp.data_end = *data_ptr + *len;
orig_data = xdp.data;
mapping = rx_buf->mapping - bp->rx_dma_offset;
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 656e6af70f0a..d8d5f207c759 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -11087,9 +11087,7 @@ static void tg3_timer_init(struct tg3 *tp)
tp->asf_multiplier = (HZ / tp->timer_offset) *
TG3_FW_UPDATE_FREQ_SEC;
- init_timer(&tp->timer);
- tp->timer.data = (unsigned long) tp;
- tp->timer.function = tg3_timer;
+ setup_timer(&tp->timer, tg3_timer, (unsigned long)tp);
}
static void tg3_timer_start(struct tg3 *tp)
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index 6e13c937d715..a843076597ec 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -1693,9 +1693,9 @@ err_return:
/* Timer callbacks */
/* a) IOC timer */
static void
-bnad_ioc_timeout(unsigned long data)
+bnad_ioc_timeout(struct timer_list *t)
{
- struct bnad *bnad = (struct bnad *)data;
+ struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.ioc_timer);
unsigned long flags;
spin_lock_irqsave(&bnad->bna_lock, flags);
@@ -1704,9 +1704,9 @@ bnad_ioc_timeout(unsigned long data)
}
static void
-bnad_ioc_hb_check(unsigned long data)
+bnad_ioc_hb_check(struct timer_list *t)
{
- struct bnad *bnad = (struct bnad *)data;
+ struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.hb_timer);
unsigned long flags;
spin_lock_irqsave(&bnad->bna_lock, flags);
@@ -1715,9 +1715,9 @@ bnad_ioc_hb_check(unsigned long data)
}
static void
-bnad_iocpf_timeout(unsigned long data)
+bnad_iocpf_timeout(struct timer_list *t)
{
- struct bnad *bnad = (struct bnad *)data;
+ struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.iocpf_timer);
unsigned long flags;
spin_lock_irqsave(&bnad->bna_lock, flags);
@@ -1726,9 +1726,9 @@ bnad_iocpf_timeout(unsigned long data)
}
static void
-bnad_iocpf_sem_timeout(unsigned long data)
+bnad_iocpf_sem_timeout(struct timer_list *t)
{
- struct bnad *bnad = (struct bnad *)data;
+ struct bnad *bnad = from_timer(bnad, t, bna.ioceth.ioc.sem_timer);
unsigned long flags;
spin_lock_irqsave(&bnad->bna_lock, flags);
@@ -1748,9 +1748,9 @@ bnad_iocpf_sem_timeout(unsigned long data)
/* b) Dynamic Interrupt Moderation Timer */
static void
-bnad_dim_timeout(unsigned long data)
+bnad_dim_timeout(struct timer_list *t)
{
- struct bnad *bnad = (struct bnad *)data;
+ struct bnad *bnad = from_timer(bnad, t, dim_timer);
struct bnad_rx_info *rx_info;
struct bnad_rx_ctrl *rx_ctrl;
int i, j;
@@ -1781,9 +1781,9 @@ bnad_dim_timeout(unsigned long data)
/* c) Statistics Timer */
static void
-bnad_stats_timeout(unsigned long data)
+bnad_stats_timeout(struct timer_list *t)
{
- struct bnad *bnad = (struct bnad *)data;
+ struct bnad *bnad = from_timer(bnad, t, stats_timer);
unsigned long flags;
if (!netif_running(bnad->netdev) ||
@@ -1804,8 +1804,7 @@ bnad_dim_timer_start(struct bnad *bnad)
{
if (bnad->cfg_flags & BNAD_CF_DIM_ENABLED &&
!test_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags)) {
- setup_timer(&bnad->dim_timer, bnad_dim_timeout,
- (unsigned long)bnad);
+ timer_setup(&bnad->dim_timer, bnad_dim_timeout, 0);
set_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags);
mod_timer(&bnad->dim_timer,
jiffies + msecs_to_jiffies(BNAD_DIM_TIMER_FREQ));
@@ -1823,8 +1822,7 @@ bnad_stats_timer_start(struct bnad *bnad)
spin_lock_irqsave(&bnad->bna_lock, flags);
if (!test_and_set_bit(BNAD_RF_STATS_TIMER_RUNNING, &bnad->run_flags)) {
- setup_timer(&bnad->stats_timer, bnad_stats_timeout,
- (unsigned long)bnad);
+ timer_setup(&bnad->stats_timer, bnad_stats_timeout, 0);
mod_timer(&bnad->stats_timer,
jiffies + msecs_to_jiffies(BNAD_STATS_TIMER_FREQ));
}
@@ -3692,14 +3690,11 @@ bnad_pci_probe(struct pci_dev *pdev,
goto res_free;
/* Set up timers */
- setup_timer(&bnad->bna.ioceth.ioc.ioc_timer, bnad_ioc_timeout,
- (unsigned long)bnad);
- setup_timer(&bnad->bna.ioceth.ioc.hb_timer, bnad_ioc_hb_check,
- (unsigned long)bnad);
- setup_timer(&bnad->bna.ioceth.ioc.iocpf_timer, bnad_iocpf_timeout,
- (unsigned long)bnad);
- setup_timer(&bnad->bna.ioceth.ioc.sem_timer, bnad_iocpf_sem_timeout,
- (unsigned long)bnad);
+ timer_setup(&bnad->bna.ioceth.ioc.ioc_timer, bnad_ioc_timeout, 0);
+ timer_setup(&bnad->bna.ioceth.ioc.hb_timer, bnad_ioc_hb_check, 0);
+ timer_setup(&bnad->bna.ioceth.ioc.iocpf_timer, bnad_iocpf_timeout, 0);
+ timer_setup(&bnad->bna.ioceth.ioc.sem_timer, bnad_iocpf_sem_timeout,
+ 0);
/*
* Start the chip
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 5b19826a7e16..eafae3eb4fed 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -59,9 +59,9 @@ static int debug = -1;
module_param(debug, int, 0644);
MODULE_PARM_DESC(debug, "NETIF_MSG debug bits");
-static char fw_type[LIO_MAX_FW_TYPE_LEN] = LIO_FW_NAME_TYPE_NIC;
+static char fw_type[LIO_MAX_FW_TYPE_LEN] = LIO_FW_NAME_TYPE_AUTO;
module_param_string(fw_type, fw_type, sizeof(fw_type), 0444);
-MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded. Default \"nic\". Use \"none\" to load firmware from flash.");
+MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded (default is \"auto\"), which uses firmware in flash, if present, else loads \"nic\".");
static u32 console_bitmask;
module_param(console_bitmask, int, 0644);
@@ -1115,10 +1115,10 @@ liquidio_probe(struct pci_dev *pdev,
return 0;
}
-static bool fw_type_is_none(void)
+static bool fw_type_is_auto(void)
{
- return strncmp(fw_type, LIO_FW_NAME_TYPE_NONE,
- sizeof(LIO_FW_NAME_TYPE_NONE)) == 0;
+ return strncmp(fw_type, LIO_FW_NAME_TYPE_AUTO,
+ sizeof(LIO_FW_NAME_TYPE_AUTO)) == 0;
}
/**
@@ -1302,7 +1302,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
* Implementation note: only soft-reset the device
* if it is a CN6XXX OR the LAST CN23XX device.
*/
- if (fw_type_is_none())
+ if (atomic_read(oct->adapter_fw_state) == FW_IS_PRELOADED)
octeon_pci_flr(oct);
else if (OCTEON_CN6XXX(oct) || !refcount)
oct->fn_list.soft_reset(oct);
@@ -1934,10 +1934,12 @@ static int load_firmware(struct octeon_device *oct)
char fw_name[LIO_MAX_FW_FILENAME_LEN];
char *tmp_fw_type;
- if (fw_type[0] == '\0')
+ if (fw_type_is_auto()) {
tmp_fw_type = LIO_FW_NAME_TYPE_NIC;
- else
+ strncpy(fw_type, tmp_fw_type, sizeof(fw_type));
+ } else {
tmp_fw_type = fw_type;
+ }
sprintf(fw_name, "%s%s%s_%s%s", LIO_FW_DIR, LIO_FW_BASE_NAME,
octeon_get_conf(oct)->card_name, tmp_fw_type,
@@ -3303,7 +3305,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
{
struct lio *lio = NULL;
struct net_device *netdev;
- u8 mac[6], i, j;
+ u8 mac[6], i, j, *fw_ver;
struct octeon_soft_command *sc;
struct liquidio_if_cfg_context *ctx;
struct liquidio_if_cfg_resp *resp;
@@ -3414,6 +3416,22 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
goto setup_nic_dev_fail;
}
+ /* Verify f/w version (in case of 'auto' loading from flash) */
+ fw_ver = octeon_dev->fw_info.liquidio_firmware_version;
+ if (memcmp(LIQUIDIO_BASE_VERSION,
+ fw_ver,
+ strlen(LIQUIDIO_BASE_VERSION))) {
+ dev_err(&octeon_dev->pci_dev->dev,
+ "Unmatched firmware version. Expected %s.x, got %s.\n",
+ LIQUIDIO_BASE_VERSION, fw_ver);
+ goto setup_nic_dev_fail;
+ } else if (atomic_read(octeon_dev->adapter_fw_state) ==
+ FW_IS_PRELOADED) {
+ dev_info(&octeon_dev->pci_dev->dev,
+ "Using auto-loaded firmware version %s.\n",
+ fw_ver);
+ }
+
octeon_swap_8B_data((u64 *)(&resp->cfg_info),
(sizeof(struct liquidio_if_cfg_info)) >> 3);
@@ -3882,9 +3900,9 @@ octeon_recv_vf_drv_notice(struct octeon_recv_info *recv_info, void *buf)
static int octeon_device_init(struct octeon_device *octeon_dev)
{
int j, ret;
- int fw_loaded = 0;
char bootcmd[] = "\n";
char *dbg_enb = NULL;
+ enum lio_fw_state fw_state;
struct octeon_device_priv *oct_priv =
(struct octeon_device_priv *)octeon_dev->priv;
atomic_set(&octeon_dev->status, OCT_DEV_BEGIN_STATE);
@@ -3916,24 +3934,40 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
octeon_dev->app_mode = CVM_DRV_INVALID_APP;
- if (OCTEON_CN23XX_PF(octeon_dev)) {
- if (!cn23xx_fw_loaded(octeon_dev) && !fw_type_is_none()) {
- fw_loaded = 0;
- /* Do a soft reset of the Octeon device. */
- if (octeon_dev->fn_list.soft_reset(octeon_dev))
- return 1;
- /* things might have changed */
- if (!cn23xx_fw_loaded(octeon_dev))
- fw_loaded = 0;
- else
- fw_loaded = 1;
- } else {
- fw_loaded = 1;
- }
- } else if (octeon_dev->fn_list.soft_reset(octeon_dev)) {
- return 1;
+ /* CN23XX supports preloaded firmware if the following is true:
+ *
+ * The adapter indicates that firmware is currently running AND
+ * 'fw_type' is 'auto'.
+ *
+ * (default state is NEEDS_TO_BE_LOADED, override it if appropriate).
+ */
+ if (OCTEON_CN23XX_PF(octeon_dev) &&
+ cn23xx_fw_loaded(octeon_dev) && fw_type_is_auto()) {
+ atomic_cmpxchg(octeon_dev->adapter_fw_state,
+ FW_NEEDS_TO_BE_LOADED, FW_IS_PRELOADED);
}
+ /* If loading firmware, only first device of adapter needs to do so. */
+ fw_state = atomic_cmpxchg(octeon_dev->adapter_fw_state,
+ FW_NEEDS_TO_BE_LOADED,
+ FW_IS_BEING_LOADED);
+
+ /* Here, [local variable] 'fw_state' is set to one of:
+ *
+ * FW_IS_PRELOADED: No firmware is to be loaded (see above)
+ * FW_NEEDS_TO_BE_LOADED: The driver's first instance will load
+ * firmware to the adapter.
+ * FW_IS_BEING_LOADED: The driver's second instance will not load
+ * firmware to the adapter.
+ */
+
+ /* Prior to f/w load, perform a soft reset of the Octeon device;
+ * if error resetting, return w/error.
+ */
+ if (fw_state == FW_NEEDS_TO_BE_LOADED)
+ if (octeon_dev->fn_list.soft_reset(octeon_dev))
+ return 1;
+
/* Initialize the dispatch mechanism used to push packets arriving on
* Octeon Output queues.
*/
@@ -4063,7 +4097,7 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
atomic_set(&octeon_dev->status, OCT_DEV_IO_QUEUES_DONE);
- if ((!OCTEON_CN23XX_PF(octeon_dev)) || !fw_loaded) {
+ if (fw_state == FW_NEEDS_TO_BE_LOADED) {
dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n");
if (!ddr_timeout) {
dev_info(&octeon_dev->pci_dev->dev,
@@ -4125,6 +4159,8 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n");
return 1;
}
+
+ atomic_set(octeon_dev->adapter_fw_state, FW_HAS_BEEN_LOADED);
}
handshake[octeon_dev->octeon_id].init_ok = 1;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 2e993ce43b66..4c3b5688529b 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -435,8 +435,7 @@ static void delete_glists(struct lio *lio)
do {
g = (struct octnic_gather *)
list_delete_head(&lio->glist[i]);
- if (g)
- kfree(g);
+ kfree(g);
} while (g);
if (lio->glists_virt_base && lio->glists_virt_base[i] &&
@@ -748,7 +747,7 @@ static void octeon_destroy_resources(struct octeon_device *oct)
if (lio_wait_for_oq_pkts(oct))
dev_err(&oct->pci_dev->dev, "OQ had pending packets\n");
-
+ /* fall through */
case OCT_DEV_INTR_SET_DONE:
/* Disable interrupts */
oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_image.h b/drivers/net/ethernet/cavium/liquidio/liquidio_image.h
index 78a3685f6fe0..5bf5e8791dfb 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_image.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_image.h
@@ -24,6 +24,7 @@
#define LIO_FW_BASE_NAME "lio_"
#define LIO_FW_NAME_SUFFIX ".bin"
#define LIO_FW_NAME_TYPE_NIC "nic"
+#define LIO_FW_NAME_TYPE_AUTO "auto"
#define LIO_FW_NAME_TYPE_NONE "none"
#define LIO_MAX_FIRMWARE_VERSION_LEN 16
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 29d53b1763a7..e4aa3395a578 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -541,6 +541,7 @@ static char oct_dev_app_str[CVM_DRV_APP_COUNT + 1][32] = {
static struct octeon_device *octeon_device[MAX_OCTEON_DEVICES];
static atomic_t adapter_refcounts[MAX_OCTEON_DEVICES];
+static atomic_t adapter_fw_states[MAX_OCTEON_DEVICES];
static u32 octeon_device_count;
/* locks device array (i.e. octeon_device[]) */
@@ -770,6 +771,10 @@ int octeon_register_device(struct octeon_device *oct,
oct->adapter_refcount = &adapter_refcounts[oct->octeon_id];
atomic_set(oct->adapter_refcount, 0);
+ /* Like the reference count, the f/w state is shared 'per-adapter' */
+ oct->adapter_fw_state = &adapter_fw_states[oct->octeon_id];
+ atomic_set(oct->adapter_fw_state, FW_NEEDS_TO_BE_LOADED);
+
spin_lock(&octeon_devices_lock);
for (idx = (int)oct->octeon_id - 1; idx >= 0; idx--) {
if (!octeon_device[idx]) {
@@ -780,11 +785,15 @@ int octeon_register_device(struct octeon_device *oct,
atomic_inc(oct->adapter_refcount);
return 1; /* here, refcount is guaranteed to be 1 */
}
- /* if another device is at same bus/dev, use its refcounter */
+ /* If another device is at same bus/dev, use its refcounter
+ * (and f/w state variable).
+ */
if ((octeon_device[idx]->loc.bus == bus) &&
(octeon_device[idx]->loc.dev == dev)) {
oct->adapter_refcount =
octeon_device[idx]->adapter_refcount;
+ oct->adapter_fw_state =
+ octeon_device[idx]->adapter_fw_state;
break;
}
}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 894af199ddef..33d19c4509bc 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -50,6 +50,13 @@ enum octeon_pci_swap_mode {
OCTEON_PCI_32BIT_LW_SWAP = 3
};
+enum lio_fw_state {
+ FW_IS_PRELOADED = 0,
+ FW_NEEDS_TO_BE_LOADED = 1,
+ FW_IS_BEING_LOADED = 2,
+ FW_HAS_BEEN_LOADED = 3,
+};
+
enum {
OCTEON_CONFIG_TYPE_DEFAULT = 0,
NUM_OCTEON_CONFS,
@@ -557,6 +564,9 @@ struct octeon_device {
} loc;
atomic_t *adapter_refcount; /* reference count of adapter */
+
+ atomic_t *adapter_fw_state; /* per-adapter, lio_fw_state */
+
bool ptp_enable;
};
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 805ab45e9b5a..71989e180289 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -523,6 +523,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
xdp.data_hard_start = page_address(page);
xdp.data = (void *)cpu_addr;
+ xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
orig_data = xdp.data;
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index 0f13a7f7c1d3..30de26ef3da4 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1882,10 +1882,10 @@ send:
/*
* Callback for the Tx buffer reclaim timer. Runs with softirqs disabled.
*/
-static void sge_tx_reclaim_cb(unsigned long data)
+static void sge_tx_reclaim_cb(struct timer_list *t)
{
int i;
- struct sge *sge = (struct sge *)data;
+ struct sge *sge = from_timer(sge, t, tx_reclaim_timer);
for (i = 0; i < SGE_CMDQ_N; ++i) {
struct cmdQ *q = &sge->cmdQ[i];
@@ -1978,10 +1978,10 @@ void t1_sge_start(struct sge *sge)
/*
* Callback for the T2 ESPI 'stuck packet feature' workaorund
*/
-static void espibug_workaround_t204(unsigned long data)
+static void espibug_workaround_t204(struct timer_list *t)
{
- struct adapter *adapter = (struct adapter *)data;
- struct sge *sge = adapter->sge;
+ struct sge *sge = from_timer(sge, t, espibug_timer);
+ struct adapter *adapter = sge->adapter;
unsigned int nports = adapter->params.nports;
u32 seop[MAX_NPORTS];
@@ -2021,10 +2021,10 @@ static void espibug_workaround_t204(unsigned long data)
mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout);
}
-static void espibug_workaround(unsigned long data)
+static void espibug_workaround(struct timer_list *t)
{
- struct adapter *adapter = (struct adapter *)data;
- struct sge *sge = adapter->sge;
+ struct sge *sge = from_timer(sge, t, espibug_timer);
+ struct adapter *adapter = sge->adapter;
if (netif_running(adapter->port[0].dev)) {
struct sk_buff *skb = sge->espibug_skb[0];
@@ -2075,19 +2075,15 @@ struct sge *t1_sge_create(struct adapter *adapter, struct sge_params *p)
goto nomem_port;
}
- init_timer(&sge->tx_reclaim_timer);
- sge->tx_reclaim_timer.data = (unsigned long)sge;
- sge->tx_reclaim_timer.function = sge_tx_reclaim_cb;
+ timer_setup(&sge->tx_reclaim_timer, sge_tx_reclaim_cb, 0);
if (is_T2(sge->adapter)) {
- init_timer(&sge->espibug_timer);
+ timer_setup(&sge->espibug_timer,
+ adapter->params.nports > 1 ? espibug_workaround_t204 : espibug_workaround,
+ 0);
- if (adapter->params.nports > 1) {
+ if (adapter->params.nports > 1)
tx_sched_init(sge);
- sge->espibug_timer.function = espibug_workaround_t204;
- } else
- sge->espibug_timer.function = espibug_workaround;
- sge->espibug_timer.data = (unsigned long)sge->adapter;
sge->espibug_timeout = 1;
/* for T204, every 10ms */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile
index 817212702f0a..43c86b74dfb8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/Makefile
+++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile
@@ -4,7 +4,10 @@
obj-$(CONFIG_CHELSIO_T4) += cxgb4.o
-cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o cxgb4_ptp.o
+cxgb4-objs := cxgb4_main.o l2t.o smt.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o \
+ cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o \
+ cxgb4_ptp.o cxgb4_tc_flower.o cxgb4_cudbg.o \
+ cudbg_common.o cudbg_lib.o
cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o
cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o
cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o
diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
index 3103ef9b561d..290039026ece 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.c
@@ -96,7 +96,8 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6)
if (!ret) {
ce = cte;
read_unlock_bh(&ctbl->lock);
- goto found;
+ refcount_inc(&ce->refcnt);
+ return 0;
}
}
read_unlock_bh(&ctbl->lock);
@@ -108,7 +109,7 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6)
list_del(&ce->list);
INIT_LIST_HEAD(&ce->list);
spin_lock_init(&ce->lock);
- atomic_set(&ce->refcnt, 0);
+ refcount_set(&ce->refcnt, 0);
atomic_dec(&ctbl->nfree);
list_add_tail(&ce->list, &ctbl->hash_list[hash]);
if (v6) {
@@ -138,9 +139,7 @@ int cxgb4_clip_get(const struct net_device *dev, const u32 *lip, u8 v6)
return -ENOMEM;
}
write_unlock_bh(&ctbl->lock);
-found:
- atomic_inc(&ce->refcnt);
-
+ refcount_set(&ce->refcnt, 1);
return 0;
}
EXPORT_SYMBOL(cxgb4_clip_get);
@@ -179,7 +178,7 @@ void cxgb4_clip_release(const struct net_device *dev, const u32 *lip, u8 v6)
found:
write_lock_bh(&ctbl->lock);
spin_lock_bh(&ce->lock);
- if (atomic_dec_and_test(&ce->refcnt)) {
+ if (refcount_dec_and_test(&ce->refcnt)) {
list_del(&ce->list);
INIT_LIST_HEAD(&ce->list);
list_add_tail(&ce->list, &ctbl->ce_free_head);
@@ -266,7 +265,7 @@ int clip_tbl_show(struct seq_file *seq, void *v)
ip[0] = '\0';
sprintf(ip, "%pISc", &ce->addr);
seq_printf(seq, "%-25s %u\n", ip,
- atomic_read(&ce->refcnt));
+ refcount_read(&ce->refcnt));
}
}
seq_printf(seq, "Free clip entries : %d\n", atomic_read(&ctbl->nfree));
diff --git a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h
index 35eb43c6bcbb..a0e0ae19649f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/clip_tbl.h
@@ -10,9 +10,11 @@
* release for licensing terms and conditions.
*/
+#include <linux/refcount.h>
+
struct clip_entry {
spinlock_t lock; /* Hold while modifying clip reference */
- atomic_t refcnt;
+ refcount_t refcnt;
struct list_head list;
union {
struct sockaddr_in addr;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c
new file mode 100644
index 000000000000..f78ba1743b5a
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_common.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2017 Chelsio Communications. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include "cxgb4.h"
+#include "cudbg_if.h"
+#include "cudbg_lib_common.h"
+
+int cudbg_get_buff(struct cudbg_buffer *pdbg_buff, u32 size,
+ struct cudbg_buffer *pin_buff)
+{
+ u32 offset;
+
+ offset = pdbg_buff->offset;
+ if (offset + size > pdbg_buff->size)
+ return CUDBG_STATUS_NO_MEM;
+
+ pin_buff->data = (char *)pdbg_buff->data + offset;
+ pin_buff->offset = offset;
+ pin_buff->size = size;
+ pdbg_buff->size -= size;
+ return 0;
+}
+
+void cudbg_put_buff(struct cudbg_buffer *pin_buff,
+ struct cudbg_buffer *pdbg_buff)
+{
+ pdbg_buff->size += pin_buff->size;
+ pin_buff->data = NULL;
+ pin_buff->offset = 0;
+ pin_buff->size = 0;
+}
+
+void cudbg_update_buff(struct cudbg_buffer *pin_buff,
+ struct cudbg_buffer *pout_buff)
+{
+ /* We already write to buffer provided by ethool, so just
+ * increment offset to next free space.
+ */
+ pout_buff->offset += pin_buff->size;
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
new file mode 100644
index 000000000000..d7f3392f618f
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (C) 2017 Chelsio Communications. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#ifndef __CUDBG_ENTITY_H__
+#define __CUDBG_ENTITY_H__
+
+#define EDC0_FLAG 3
+#define EDC1_FLAG 4
+
+struct card_mem {
+ u16 size_edc0;
+ u16 size_edc1;
+ u16 mem_flag;
+};
+
+struct cudbg_mbox_log {
+ struct mbox_cmd entry;
+ u32 hi[MBOX_LEN / 8];
+ u32 lo[MBOX_LEN / 8];
+};
+
+struct ireg_field {
+ u32 ireg_addr;
+ u32 ireg_data;
+ u32 ireg_local_offset;
+ u32 ireg_offset_range;
+};
+
+struct ireg_buf {
+ struct ireg_field tp_pio;
+ u32 outbuf[32];
+};
+
+#define IREG_NUM_ELEM 4
+
+static const u32 t6_tp_pio_array[][IREG_NUM_ELEM] = {
+ {0x7e40, 0x7e44, 0x020, 28}, /* t6_tp_pio_regs_20_to_3b */
+ {0x7e40, 0x7e44, 0x040, 10}, /* t6_tp_pio_regs_40_to_49 */
+ {0x7e40, 0x7e44, 0x050, 10}, /* t6_tp_pio_regs_50_to_59 */
+ {0x7e40, 0x7e44, 0x060, 14}, /* t6_tp_pio_regs_60_to_6d */
+ {0x7e40, 0x7e44, 0x06F, 1}, /* t6_tp_pio_regs_6f */
+ {0x7e40, 0x7e44, 0x070, 6}, /* t6_tp_pio_regs_70_to_75 */
+ {0x7e40, 0x7e44, 0x130, 18}, /* t6_tp_pio_regs_130_to_141 */
+ {0x7e40, 0x7e44, 0x145, 19}, /* t6_tp_pio_regs_145_to_157 */
+ {0x7e40, 0x7e44, 0x160, 1}, /* t6_tp_pio_regs_160 */
+ {0x7e40, 0x7e44, 0x230, 25}, /* t6_tp_pio_regs_230_to_248 */
+ {0x7e40, 0x7e44, 0x24a, 3}, /* t6_tp_pio_regs_24c */
+ {0x7e40, 0x7e44, 0x8C0, 1} /* t6_tp_pio_regs_8c0 */
+};
+
+static const u32 t5_tp_pio_array[][IREG_NUM_ELEM] = {
+ {0x7e40, 0x7e44, 0x020, 28}, /* t5_tp_pio_regs_20_to_3b */
+ {0x7e40, 0x7e44, 0x040, 19}, /* t5_tp_pio_regs_40_to_52 */
+ {0x7e40, 0x7e44, 0x054, 2}, /* t5_tp_pio_regs_54_to_55 */
+ {0x7e40, 0x7e44, 0x060, 13}, /* t5_tp_pio_regs_60_to_6c */
+ {0x7e40, 0x7e44, 0x06F, 1}, /* t5_tp_pio_regs_6f */
+ {0x7e40, 0x7e44, 0x120, 4}, /* t5_tp_pio_regs_120_to_123 */
+ {0x7e40, 0x7e44, 0x12b, 2}, /* t5_tp_pio_regs_12b_to_12c */
+ {0x7e40, 0x7e44, 0x12f, 21}, /* t5_tp_pio_regs_12f_to_143 */
+ {0x7e40, 0x7e44, 0x145, 19}, /* t5_tp_pio_regs_145_to_157 */
+ {0x7e40, 0x7e44, 0x230, 25}, /* t5_tp_pio_regs_230_to_248 */
+ {0x7e40, 0x7e44, 0x8C0, 1} /* t5_tp_pio_regs_8c0 */
+};
+
+static const u32 t6_tp_tm_pio_array[][IREG_NUM_ELEM] = {
+ {0x7e18, 0x7e1c, 0x0, 12}
+};
+
+static const u32 t5_tp_tm_pio_array[][IREG_NUM_ELEM] = {
+ {0x7e18, 0x7e1c, 0x0, 12}
+};
+
+static const u32 t6_tp_mib_index_array[6][IREG_NUM_ELEM] = {
+ {0x7e50, 0x7e54, 0x0, 13},
+ {0x7e50, 0x7e54, 0x10, 6},
+ {0x7e50, 0x7e54, 0x18, 21},
+ {0x7e50, 0x7e54, 0x30, 32},
+ {0x7e50, 0x7e54, 0x50, 22},
+ {0x7e50, 0x7e54, 0x68, 12}
+};
+
+static const u32 t5_tp_mib_index_array[9][IREG_NUM_ELEM] = {
+ {0x7e50, 0x7e54, 0x0, 13},
+ {0x7e50, 0x7e54, 0x10, 6},
+ {0x7e50, 0x7e54, 0x18, 8},
+ {0x7e50, 0x7e54, 0x20, 13},
+ {0x7e50, 0x7e54, 0x30, 16},
+ {0x7e50, 0x7e54, 0x40, 16},
+ {0x7e50, 0x7e54, 0x50, 16},
+ {0x7e50, 0x7e54, 0x60, 6},
+ {0x7e50, 0x7e54, 0x68, 4}
+};
+
+static const u32 t5_sge_dbg_index_array[2][IREG_NUM_ELEM] = {
+ {0x10cc, 0x10d0, 0x0, 16},
+ {0x10cc, 0x10d4, 0x0, 16},
+};
+
+static const u32 t5_pcie_pdbg_array[][IREG_NUM_ELEM] = {
+ {0x5a04, 0x5a0c, 0x00, 0x20}, /* t5_pcie_pdbg_regs_00_to_20 */
+ {0x5a04, 0x5a0c, 0x21, 0x20}, /* t5_pcie_pdbg_regs_21_to_40 */
+ {0x5a04, 0x5a0c, 0x41, 0x10}, /* t5_pcie_pdbg_regs_41_to_50 */
+};
+
+static const u32 t5_pcie_cdbg_array[][IREG_NUM_ELEM] = {
+ {0x5a10, 0x5a18, 0x00, 0x20}, /* t5_pcie_cdbg_regs_00_to_20 */
+ {0x5a10, 0x5a18, 0x21, 0x18}, /* t5_pcie_cdbg_regs_21_to_37 */
+};
+
+static const u32 t5_pm_rx_array[][IREG_NUM_ELEM] = {
+ {0x8FD0, 0x8FD4, 0x10000, 0x20}, /* t5_pm_rx_regs_10000_to_10020 */
+ {0x8FD0, 0x8FD4, 0x10021, 0x0D}, /* t5_pm_rx_regs_10021_to_1002c */
+};
+
+static const u32 t5_pm_tx_array[][IREG_NUM_ELEM] = {
+ {0x8FF0, 0x8FF4, 0x10000, 0x20}, /* t5_pm_tx_regs_10000_to_10020 */
+ {0x8FF0, 0x8FF4, 0x10021, 0x1D}, /* t5_pm_tx_regs_10021_to_1003c */
+};
+
+static const u32 t6_ma_ireg_array[][IREG_NUM_ELEM] = {
+ {0x78f8, 0x78fc, 0xa000, 23}, /* t6_ma_regs_a000_to_a016 */
+ {0x78f8, 0x78fc, 0xa400, 30}, /* t6_ma_regs_a400_to_a41e */
+ {0x78f8, 0x78fc, 0xa800, 20} /* t6_ma_regs_a800_to_a813 */
+};
+
+static const u32 t6_ma_ireg_array2[][IREG_NUM_ELEM] = {
+ {0x78f8, 0x78fc, 0xe400, 17}, /* t6_ma_regs_e400_to_e600 */
+ {0x78f8, 0x78fc, 0xe640, 13} /* t6_ma_regs_e640_to_e7c0 */
+};
+
+static const u32 t6_up_cim_reg_array[][IREG_NUM_ELEM] = {
+ {0x7b50, 0x7b54, 0x2000, 0x20}, /* up_cim_2000_to_207c */
+ {0x7b50, 0x7b54, 0x2080, 0x1d}, /* up_cim_2080_to_20fc */
+ {0x7b50, 0x7b54, 0x00, 0x20}, /* up_cim_00_to_7c */
+ {0x7b50, 0x7b54, 0x80, 0x20}, /* up_cim_80_to_fc */
+ {0x7b50, 0x7b54, 0x100, 0x11}, /* up_cim_100_to_14c */
+ {0x7b50, 0x7b54, 0x200, 0x10}, /* up_cim_200_to_23c */
+ {0x7b50, 0x7b54, 0x240, 0x2}, /* up_cim_240_to_244 */
+ {0x7b50, 0x7b54, 0x250, 0x2}, /* up_cim_250_to_254 */
+ {0x7b50, 0x7b54, 0x260, 0x2}, /* up_cim_260_to_264 */
+ {0x7b50, 0x7b54, 0x270, 0x2}, /* up_cim_270_to_274 */
+ {0x7b50, 0x7b54, 0x280, 0x20}, /* up_cim_280_to_2fc */
+ {0x7b50, 0x7b54, 0x300, 0x20}, /* up_cim_300_to_37c */
+ {0x7b50, 0x7b54, 0x380, 0x14}, /* up_cim_380_to_3cc */
+
+};
+
+static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM] = {
+ {0x7b50, 0x7b54, 0x2000, 0x20}, /* up_cim_2000_to_207c */
+ {0x7b50, 0x7b54, 0x2080, 0x19}, /* up_cim_2080_to_20ec */
+ {0x7b50, 0x7b54, 0x00, 0x20}, /* up_cim_00_to_7c */
+ {0x7b50, 0x7b54, 0x80, 0x20}, /* up_cim_80_to_fc */
+ {0x7b50, 0x7b54, 0x100, 0x11}, /* up_cim_100_to_14c */
+ {0x7b50, 0x7b54, 0x200, 0x10}, /* up_cim_200_to_23c */
+ {0x7b50, 0x7b54, 0x240, 0x2}, /* up_cim_240_to_244 */
+ {0x7b50, 0x7b54, 0x250, 0x2}, /* up_cim_250_to_254 */
+ {0x7b50, 0x7b54, 0x260, 0x2}, /* up_cim_260_to_264 */
+ {0x7b50, 0x7b54, 0x270, 0x2}, /* up_cim_270_to_274 */
+ {0x7b50, 0x7b54, 0x280, 0x20}, /* up_cim_280_to_2fc */
+ {0x7b50, 0x7b54, 0x300, 0x20}, /* up_cim_300_to_37c */
+ {0x7b50, 0x7b54, 0x380, 0x14}, /* up_cim_380_to_3cc */
+};
+
+static const u32 t6_hma_ireg_array[][IREG_NUM_ELEM] = {
+ {0x51320, 0x51324, 0xa000, 32} /* t6_hma_regs_a000_to_a01f */
+};
+#endif /* __CUDBG_ENTITY_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
new file mode 100644
index 000000000000..9b8005e67811
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_if.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2017 Chelsio Communications. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#ifndef __CUDBG_IF_H__
+#define __CUDBG_IF_H__
+
+/* Error codes */
+#define CUDBG_STATUS_NO_MEM -19
+#define CUDBG_STATUS_ENTITY_NOT_FOUND -24
+#define CUDBG_SYSTEM_ERROR -29
+
+#define CUDBG_MAJOR_VERSION 1
+#define CUDBG_MINOR_VERSION 14
+
+enum cudbg_dbg_entity_type {
+ CUDBG_REG_DUMP = 1,
+ CUDBG_DEV_LOG = 2,
+ CUDBG_CIM_IBQ_TP0 = 6,
+ CUDBG_CIM_IBQ_TP1 = 7,
+ CUDBG_CIM_IBQ_ULP = 8,
+ CUDBG_CIM_IBQ_SGE0 = 9,
+ CUDBG_CIM_IBQ_SGE1 = 10,
+ CUDBG_CIM_IBQ_NCSI = 11,
+ CUDBG_CIM_OBQ_ULP0 = 12,
+ CUDBG_CIM_OBQ_ULP1 = 13,
+ CUDBG_CIM_OBQ_ULP2 = 14,
+ CUDBG_CIM_OBQ_ULP3 = 15,
+ CUDBG_CIM_OBQ_SGE = 16,
+ CUDBG_CIM_OBQ_NCSI = 17,
+ CUDBG_EDC0 = 18,
+ CUDBG_EDC1 = 19,
+ CUDBG_TP_INDIRECT = 36,
+ CUDBG_SGE_INDIRECT = 37,
+ CUDBG_CIM_OBQ_RXQ0 = 47,
+ CUDBG_CIM_OBQ_RXQ1 = 48,
+ CUDBG_PCIE_INDIRECT = 50,
+ CUDBG_PM_INDIRECT = 51,
+ CUDBG_MA_INDIRECT = 61,
+ CUDBG_UP_CIM_INDIRECT = 64,
+ CUDBG_MBOX_LOG = 66,
+ CUDBG_HMA_INDIRECT = 67,
+ CUDBG_MAX_ENTITY = 70,
+};
+
+struct cudbg_init {
+ struct adapter *adap; /* Pointer to adapter structure */
+ void *outbuf; /* Output buffer */
+ u32 outbuf_size; /* Output buffer size */
+};
+
+static inline unsigned int cudbg_mbytes_to_bytes(unsigned int size)
+{
+ return size * 1024 * 1024;
+}
+#endif /* __CUDBG_IF_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
new file mode 100644
index 000000000000..c451b2e42a6c
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@ -0,0 +1,867 @@
+/*
+ * Copyright (C) 2017 Chelsio Communications. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include "t4_regs.h"
+#include "cxgb4.h"
+#include "cudbg_if.h"
+#include "cudbg_lib_common.h"
+#include "cudbg_lib.h"
+#include "cudbg_entity.h"
+
+static void cudbg_write_and_release_buff(struct cudbg_buffer *pin_buff,
+ struct cudbg_buffer *dbg_buff)
+{
+ cudbg_update_buff(pin_buff, dbg_buff);
+ cudbg_put_buff(pin_buff, dbg_buff);
+}
+
+static int is_fw_attached(struct cudbg_init *pdbg_init)
+{
+ struct adapter *padap = pdbg_init->adap;
+
+ if (!(padap->flags & FW_OK) || padap->use_bd)
+ return 0;
+
+ return 1;
+}
+
+/* This function will add additional padding bytes into debug_buffer to make it
+ * 4 byte aligned.
+ */
+void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff,
+ struct cudbg_entity_hdr *entity_hdr)
+{
+ u8 zero_buf[4] = {0};
+ u8 padding, remain;
+
+ remain = (dbg_buff->offset - entity_hdr->start_offset) % 4;
+ padding = 4 - remain;
+ if (remain) {
+ memcpy(((u8 *)dbg_buff->data) + dbg_buff->offset, &zero_buf,
+ padding);
+ dbg_buff->offset += padding;
+ entity_hdr->num_pad = padding;
+ }
+ entity_hdr->size = dbg_buff->offset - entity_hdr->start_offset;
+}
+
+struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i)
+{
+ struct cudbg_hdr *cudbg_hdr = (struct cudbg_hdr *)outbuf;
+
+ return (struct cudbg_entity_hdr *)
+ ((char *)outbuf + cudbg_hdr->hdr_len +
+ (sizeof(struct cudbg_entity_hdr) * (i - 1)));
+}
+
+int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ u32 buf_size = 0;
+ int rc = 0;
+
+ if (is_t4(padap->params.chip))
+ buf_size = T4_REGMAP_SIZE;
+ else if (is_t5(padap->params.chip) || is_t6(padap->params.chip))
+ buf_size = T5_REGMAP_SIZE;
+
+ rc = cudbg_get_buff(dbg_buff, buf_size, &temp_buff);
+ if (rc)
+ return rc;
+ t4_get_regs(padap, (void *)temp_buff.data, temp_buff.size);
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
+
+int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ struct devlog_params *dparams;
+ int rc = 0;
+
+ rc = t4_init_devlog_params(padap);
+ if (rc < 0) {
+ cudbg_err->sys_err = rc;
+ return rc;
+ }
+
+ dparams = &padap->params.devlog;
+ rc = cudbg_get_buff(dbg_buff, dparams->size, &temp_buff);
+ if (rc)
+ return rc;
+
+ /* Collect FW devlog */
+ if (dparams->start != 0) {
+ spin_lock(&padap->win0_lock);
+ rc = t4_memory_rw(padap, padap->params.drv_memwin,
+ dparams->memtype, dparams->start,
+ dparams->size,
+ (__be32 *)(char *)temp_buff.data,
+ 1);
+ spin_unlock(&padap->win0_lock);
+ if (rc) {
+ cudbg_err->sys_err = rc;
+ cudbg_put_buff(&temp_buff, dbg_buff);
+ return rc;
+ }
+ }
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
+
+static int cudbg_read_cim_ibq(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err, int qid)
+{
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ int no_of_read_words, rc = 0;
+ u32 qsize;
+
+ /* collect CIM IBQ */
+ qsize = CIM_IBQ_SIZE * 4 * sizeof(u32);
+ rc = cudbg_get_buff(dbg_buff, qsize, &temp_buff);
+ if (rc)
+ return rc;
+
+ /* t4_read_cim_ibq will return no. of read words or error */
+ no_of_read_words = t4_read_cim_ibq(padap, qid,
+ (u32 *)((u32 *)temp_buff.data +
+ temp_buff.offset), qsize);
+ /* no_of_read_words is less than or equal to 0 means error */
+ if (no_of_read_words <= 0) {
+ if (!no_of_read_words)
+ rc = CUDBG_SYSTEM_ERROR;
+ else
+ rc = no_of_read_words;
+ cudbg_err->sys_err = rc;
+ cudbg_put_buff(&temp_buff, dbg_buff);
+ return rc;
+ }
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
+
+int cudbg_collect_cim_ibq_tp0(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 0);
+}
+
+int cudbg_collect_cim_ibq_tp1(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 1);
+}
+
+int cudbg_collect_cim_ibq_ulp(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 2);
+}
+
+int cudbg_collect_cim_ibq_sge0(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 3);
+}
+
+int cudbg_collect_cim_ibq_sge1(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 4);
+}
+
+int cudbg_collect_cim_ibq_ncsi(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_ibq(pdbg_init, dbg_buff, cudbg_err, 5);
+}
+
+static int cudbg_read_cim_obq(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err, int qid)
+{
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ int no_of_read_words, rc = 0;
+ u32 qsize;
+
+ /* collect CIM OBQ */
+ qsize = 6 * CIM_OBQ_SIZE * 4 * sizeof(u32);
+ rc = cudbg_get_buff(dbg_buff, qsize, &temp_buff);
+ if (rc)
+ return rc;
+
+ /* t4_read_cim_obq will return no. of read words or error */
+ no_of_read_words = t4_read_cim_obq(padap, qid,
+ (u32 *)((u32 *)temp_buff.data +
+ temp_buff.offset), qsize);
+ /* no_of_read_words is less than or equal to 0 means error */
+ if (no_of_read_words <= 0) {
+ if (!no_of_read_words)
+ rc = CUDBG_SYSTEM_ERROR;
+ else
+ rc = no_of_read_words;
+ cudbg_err->sys_err = rc;
+ cudbg_put_buff(&temp_buff, dbg_buff);
+ return rc;
+ }
+ temp_buff.size = no_of_read_words * 4;
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
+
+int cudbg_collect_cim_obq_ulp0(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 0);
+}
+
+int cudbg_collect_cim_obq_ulp1(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 1);
+}
+
+int cudbg_collect_cim_obq_ulp2(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 2);
+}
+
+int cudbg_collect_cim_obq_ulp3(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 3);
+}
+
+int cudbg_collect_cim_obq_sge(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 4);
+}
+
+int cudbg_collect_cim_obq_ncsi(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 5);
+}
+
+int cudbg_collect_obq_sge_rx_q0(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 6);
+}
+
+int cudbg_collect_obq_sge_rx_q1(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_read_cim_obq(pdbg_init, dbg_buff, cudbg_err, 7);
+}
+
+static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff, u8 mem_type,
+ unsigned long tot_len,
+ struct cudbg_error *cudbg_err)
+{
+ unsigned long bytes, bytes_left, bytes_read = 0;
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ int rc = 0;
+
+ bytes_left = tot_len;
+ while (bytes_left > 0) {
+ bytes = min_t(unsigned long, bytes_left,
+ (unsigned long)CUDBG_CHUNK_SIZE);
+ rc = cudbg_get_buff(dbg_buff, bytes, &temp_buff);
+ if (rc)
+ return rc;
+ spin_lock(&padap->win0_lock);
+ rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type,
+ bytes_read, bytes,
+ (__be32 *)temp_buff.data,
+ 1);
+ spin_unlock(&padap->win0_lock);
+ if (rc) {
+ cudbg_err->sys_err = rc;
+ cudbg_put_buff(&temp_buff, dbg_buff);
+ return rc;
+ }
+ bytes_left -= bytes;
+ bytes_read += bytes;
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ }
+ return rc;
+}
+
+static void cudbg_collect_mem_info(struct cudbg_init *pdbg_init,
+ struct card_mem *mem_info)
+{
+ struct adapter *padap = pdbg_init->adap;
+ u32 value;
+
+ value = t4_read_reg(padap, MA_EDRAM0_BAR_A);
+ value = EDRAM0_SIZE_G(value);
+ mem_info->size_edc0 = (u16)value;
+
+ value = t4_read_reg(padap, MA_EDRAM1_BAR_A);
+ value = EDRAM1_SIZE_G(value);
+ mem_info->size_edc1 = (u16)value;
+
+ value = t4_read_reg(padap, MA_TARGET_MEM_ENABLE_A);
+ if (value & EDRAM0_ENABLE_F)
+ mem_info->mem_flag |= (1 << EDC0_FLAG);
+ if (value & EDRAM1_ENABLE_F)
+ mem_info->mem_flag |= (1 << EDC1_FLAG);
+}
+
+static void cudbg_t4_fwcache(struct cudbg_init *pdbg_init,
+ struct cudbg_error *cudbg_err)
+{
+ struct adapter *padap = pdbg_init->adap;
+ int rc;
+
+ if (is_fw_attached(pdbg_init)) {
+ /* Flush uP dcache before reading edcX/mcX */
+ rc = t4_fwcache(padap, FW_PARAM_DEV_FWCACHE_FLUSH);
+ if (rc)
+ cudbg_err->sys_warn = rc;
+ }
+}
+
+static int cudbg_collect_mem_region(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err,
+ u8 mem_type)
+{
+ struct card_mem mem_info = {0};
+ unsigned long flag, size;
+ int rc;
+
+ cudbg_t4_fwcache(pdbg_init, cudbg_err);
+ cudbg_collect_mem_info(pdbg_init, &mem_info);
+ switch (mem_type) {
+ case MEM_EDC0:
+ flag = (1 << EDC0_FLAG);
+ size = cudbg_mbytes_to_bytes(mem_info.size_edc0);
+ break;
+ case MEM_EDC1:
+ flag = (1 << EDC1_FLAG);
+ size = cudbg_mbytes_to_bytes(mem_info.size_edc1);
+ break;
+ default:
+ rc = CUDBG_STATUS_ENTITY_NOT_FOUND;
+ goto err;
+ }
+
+ if (mem_info.mem_flag & flag) {
+ rc = cudbg_read_fw_mem(pdbg_init, dbg_buff, mem_type,
+ size, cudbg_err);
+ if (rc)
+ goto err;
+ } else {
+ rc = CUDBG_STATUS_ENTITY_NOT_FOUND;
+ goto err;
+ }
+err:
+ return rc;
+}
+
+int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_collect_mem_region(pdbg_init, dbg_buff, cudbg_err,
+ MEM_EDC0);
+}
+
+int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ return cudbg_collect_mem_region(pdbg_init, dbg_buff, cudbg_err,
+ MEM_EDC1);
+}
+
+int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ struct ireg_buf *ch_tp_pio;
+ int i, rc, n = 0;
+ u32 size;
+
+ if (is_t5(padap->params.chip))
+ n = sizeof(t5_tp_pio_array) +
+ sizeof(t5_tp_tm_pio_array) +
+ sizeof(t5_tp_mib_index_array);
+ else
+ n = sizeof(t6_tp_pio_array) +
+ sizeof(t6_tp_tm_pio_array) +
+ sizeof(t6_tp_mib_index_array);
+
+ n = n / (IREG_NUM_ELEM * sizeof(u32));
+ size = sizeof(struct ireg_buf) * n;
+ rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+ if (rc)
+ return rc;
+
+ ch_tp_pio = (struct ireg_buf *)temp_buff.data;
+
+ /* TP_PIO */
+ if (is_t5(padap->params.chip))
+ n = sizeof(t5_tp_pio_array) / (IREG_NUM_ELEM * sizeof(u32));
+ else if (is_t6(padap->params.chip))
+ n = sizeof(t6_tp_pio_array) / (IREG_NUM_ELEM * sizeof(u32));
+
+ for (i = 0; i < n; i++) {
+ struct ireg_field *tp_pio = &ch_tp_pio->tp_pio;
+ u32 *buff = ch_tp_pio->outbuf;
+
+ if (is_t5(padap->params.chip)) {
+ tp_pio->ireg_addr = t5_tp_pio_array[i][0];
+ tp_pio->ireg_data = t5_tp_pio_array[i][1];
+ tp_pio->ireg_local_offset = t5_tp_pio_array[i][2];
+ tp_pio->ireg_offset_range = t5_tp_pio_array[i][3];
+ } else if (is_t6(padap->params.chip)) {
+ tp_pio->ireg_addr = t6_tp_pio_array[i][0];
+ tp_pio->ireg_data = t6_tp_pio_array[i][1];
+ tp_pio->ireg_local_offset = t6_tp_pio_array[i][2];
+ tp_pio->ireg_offset_range = t6_tp_pio_array[i][3];
+ }
+ t4_tp_pio_read(padap, buff, tp_pio->ireg_offset_range,
+ tp_pio->ireg_local_offset, true);
+ ch_tp_pio++;
+ }
+
+ /* TP_TM_PIO */
+ if (is_t5(padap->params.chip))
+ n = sizeof(t5_tp_tm_pio_array) / (IREG_NUM_ELEM * sizeof(u32));
+ else if (is_t6(padap->params.chip))
+ n = sizeof(t6_tp_tm_pio_array) / (IREG_NUM_ELEM * sizeof(u32));
+
+ for (i = 0; i < n; i++) {
+ struct ireg_field *tp_pio = &ch_tp_pio->tp_pio;
+ u32 *buff = ch_tp_pio->outbuf;
+
+ if (is_t5(padap->params.chip)) {
+ tp_pio->ireg_addr = t5_tp_tm_pio_array[i][0];
+ tp_pio->ireg_data = t5_tp_tm_pio_array[i][1];
+ tp_pio->ireg_local_offset = t5_tp_tm_pio_array[i][2];
+ tp_pio->ireg_offset_range = t5_tp_tm_pio_array[i][3];
+ } else if (is_t6(padap->params.chip)) {
+ tp_pio->ireg_addr = t6_tp_tm_pio_array[i][0];
+ tp_pio->ireg_data = t6_tp_tm_pio_array[i][1];
+ tp_pio->ireg_local_offset = t6_tp_tm_pio_array[i][2];
+ tp_pio->ireg_offset_range = t6_tp_tm_pio_array[i][3];
+ }
+ t4_tp_tm_pio_read(padap, buff, tp_pio->ireg_offset_range,
+ tp_pio->ireg_local_offset, true);
+ ch_tp_pio++;
+ }
+
+ /* TP_MIB_INDEX */
+ if (is_t5(padap->params.chip))
+ n = sizeof(t5_tp_mib_index_array) /
+ (IREG_NUM_ELEM * sizeof(u32));
+ else if (is_t6(padap->params.chip))
+ n = sizeof(t6_tp_mib_index_array) /
+ (IREG_NUM_ELEM * sizeof(u32));
+
+ for (i = 0; i < n ; i++) {
+ struct ireg_field *tp_pio = &ch_tp_pio->tp_pio;
+ u32 *buff = ch_tp_pio->outbuf;
+
+ if (is_t5(padap->params.chip)) {
+ tp_pio->ireg_addr = t5_tp_mib_index_array[i][0];
+ tp_pio->ireg_data = t5_tp_mib_index_array[i][1];
+ tp_pio->ireg_local_offset =
+ t5_tp_mib_index_array[i][2];
+ tp_pio->ireg_offset_range =
+ t5_tp_mib_index_array[i][3];
+ } else if (is_t6(padap->params.chip)) {
+ tp_pio->ireg_addr = t6_tp_mib_index_array[i][0];
+ tp_pio->ireg_data = t6_tp_mib_index_array[i][1];
+ tp_pio->ireg_local_offset =
+ t6_tp_mib_index_array[i][2];
+ tp_pio->ireg_offset_range =
+ t6_tp_mib_index_array[i][3];
+ }
+ t4_tp_mib_read(padap, buff, tp_pio->ireg_offset_range,
+ tp_pio->ireg_local_offset, true);
+ ch_tp_pio++;
+ }
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
+
+int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ struct ireg_buf *ch_sge_dbg;
+ int i, rc;
+
+ rc = cudbg_get_buff(dbg_buff, sizeof(*ch_sge_dbg) * 2, &temp_buff);
+ if (rc)
+ return rc;
+
+ ch_sge_dbg = (struct ireg_buf *)temp_buff.data;
+ for (i = 0; i < 2; i++) {
+ struct ireg_field *sge_pio = &ch_sge_dbg->tp_pio;
+ u32 *buff = ch_sge_dbg->outbuf;
+
+ sge_pio->ireg_addr = t5_sge_dbg_index_array[i][0];
+ sge_pio->ireg_data = t5_sge_dbg_index_array[i][1];
+ sge_pio->ireg_local_offset = t5_sge_dbg_index_array[i][2];
+ sge_pio->ireg_offset_range = t5_sge_dbg_index_array[i][3];
+ t4_read_indirect(padap,
+ sge_pio->ireg_addr,
+ sge_pio->ireg_data,
+ buff,
+ sge_pio->ireg_offset_range,
+ sge_pio->ireg_local_offset);
+ ch_sge_dbg++;
+ }
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
+
+int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ struct ireg_buf *ch_pcie;
+ int i, rc, n;
+ u32 size;
+
+ n = sizeof(t5_pcie_pdbg_array) / (IREG_NUM_ELEM * sizeof(u32));
+ size = sizeof(struct ireg_buf) * n * 2;
+ rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+ if (rc)
+ return rc;
+
+ ch_pcie = (struct ireg_buf *)temp_buff.data;
+ /* PCIE_PDBG */
+ for (i = 0; i < n; i++) {
+ struct ireg_field *pcie_pio = &ch_pcie->tp_pio;
+ u32 *buff = ch_pcie->outbuf;
+
+ pcie_pio->ireg_addr = t5_pcie_pdbg_array[i][0];
+ pcie_pio->ireg_data = t5_pcie_pdbg_array[i][1];
+ pcie_pio->ireg_local_offset = t5_pcie_pdbg_array[i][2];
+ pcie_pio->ireg_offset_range = t5_pcie_pdbg_array[i][3];
+ t4_read_indirect(padap,
+ pcie_pio->ireg_addr,
+ pcie_pio->ireg_data,
+ buff,
+ pcie_pio->ireg_offset_range,
+ pcie_pio->ireg_local_offset);
+ ch_pcie++;
+ }
+
+ /* PCIE_CDBG */
+ n = sizeof(t5_pcie_cdbg_array) / (IREG_NUM_ELEM * sizeof(u32));
+ for (i = 0; i < n; i++) {
+ struct ireg_field *pcie_pio = &ch_pcie->tp_pio;
+ u32 *buff = ch_pcie->outbuf;
+
+ pcie_pio->ireg_addr = t5_pcie_cdbg_array[i][0];
+ pcie_pio->ireg_data = t5_pcie_cdbg_array[i][1];
+ pcie_pio->ireg_local_offset = t5_pcie_cdbg_array[i][2];
+ pcie_pio->ireg_offset_range = t5_pcie_cdbg_array[i][3];
+ t4_read_indirect(padap,
+ pcie_pio->ireg_addr,
+ pcie_pio->ireg_data,
+ buff,
+ pcie_pio->ireg_offset_range,
+ pcie_pio->ireg_local_offset);
+ ch_pcie++;
+ }
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
+
+int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ struct ireg_buf *ch_pm;
+ int i, rc, n;
+ u32 size;
+
+ n = sizeof(t5_pm_rx_array) / (IREG_NUM_ELEM * sizeof(u32));
+ size = sizeof(struct ireg_buf) * n * 2;
+ rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+ if (rc)
+ return rc;
+
+ ch_pm = (struct ireg_buf *)temp_buff.data;
+ /* PM_RX */
+ for (i = 0; i < n; i++) {
+ struct ireg_field *pm_pio = &ch_pm->tp_pio;
+ u32 *buff = ch_pm->outbuf;
+
+ pm_pio->ireg_addr = t5_pm_rx_array[i][0];
+ pm_pio->ireg_data = t5_pm_rx_array[i][1];
+ pm_pio->ireg_local_offset = t5_pm_rx_array[i][2];
+ pm_pio->ireg_offset_range = t5_pm_rx_array[i][3];
+ t4_read_indirect(padap,
+ pm_pio->ireg_addr,
+ pm_pio->ireg_data,
+ buff,
+ pm_pio->ireg_offset_range,
+ pm_pio->ireg_local_offset);
+ ch_pm++;
+ }
+
+ /* PM_TX */
+ n = sizeof(t5_pm_tx_array) / (IREG_NUM_ELEM * sizeof(u32));
+ for (i = 0; i < n; i++) {
+ struct ireg_field *pm_pio = &ch_pm->tp_pio;
+ u32 *buff = ch_pm->outbuf;
+
+ pm_pio->ireg_addr = t5_pm_tx_array[i][0];
+ pm_pio->ireg_data = t5_pm_tx_array[i][1];
+ pm_pio->ireg_local_offset = t5_pm_tx_array[i][2];
+ pm_pio->ireg_offset_range = t5_pm_tx_array[i][3];
+ t4_read_indirect(padap,
+ pm_pio->ireg_addr,
+ pm_pio->ireg_data,
+ buff,
+ pm_pio->ireg_offset_range,
+ pm_pio->ireg_local_offset);
+ ch_pm++;
+ }
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
+
+int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ struct ireg_buf *ma_indr;
+ int i, rc, n;
+ u32 size, j;
+
+ if (CHELSIO_CHIP_VERSION(padap->params.chip) < CHELSIO_T6)
+ return CUDBG_STATUS_ENTITY_NOT_FOUND;
+
+ n = sizeof(t6_ma_ireg_array) / (IREG_NUM_ELEM * sizeof(u32));
+ size = sizeof(struct ireg_buf) * n * 2;
+ rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+ if (rc)
+ return rc;
+
+ ma_indr = (struct ireg_buf *)temp_buff.data;
+ for (i = 0; i < n; i++) {
+ struct ireg_field *ma_fli = &ma_indr->tp_pio;
+ u32 *buff = ma_indr->outbuf;
+
+ ma_fli->ireg_addr = t6_ma_ireg_array[i][0];
+ ma_fli->ireg_data = t6_ma_ireg_array[i][1];
+ ma_fli->ireg_local_offset = t6_ma_ireg_array[i][2];
+ ma_fli->ireg_offset_range = t6_ma_ireg_array[i][3];
+ t4_read_indirect(padap, ma_fli->ireg_addr, ma_fli->ireg_data,
+ buff, ma_fli->ireg_offset_range,
+ ma_fli->ireg_local_offset);
+ ma_indr++;
+ }
+
+ n = sizeof(t6_ma_ireg_array2) / (IREG_NUM_ELEM * sizeof(u32));
+ for (i = 0; i < n; i++) {
+ struct ireg_field *ma_fli = &ma_indr->tp_pio;
+ u32 *buff = ma_indr->outbuf;
+
+ ma_fli->ireg_addr = t6_ma_ireg_array2[i][0];
+ ma_fli->ireg_data = t6_ma_ireg_array2[i][1];
+ ma_fli->ireg_local_offset = t6_ma_ireg_array2[i][2];
+ for (j = 0; j < t6_ma_ireg_array2[i][3]; j++) {
+ t4_read_indirect(padap, ma_fli->ireg_addr,
+ ma_fli->ireg_data, buff, 1,
+ ma_fli->ireg_local_offset);
+ buff++;
+ ma_fli->ireg_local_offset += 0x20;
+ }
+ ma_indr++;
+ }
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
+
+int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ struct ireg_buf *up_cim;
+ int i, rc, n;
+ u32 size;
+
+ n = sizeof(t5_up_cim_reg_array) / (IREG_NUM_ELEM * sizeof(u32));
+ size = sizeof(struct ireg_buf) * n;
+ rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+ if (rc)
+ return rc;
+
+ up_cim = (struct ireg_buf *)temp_buff.data;
+ for (i = 0; i < n; i++) {
+ struct ireg_field *up_cim_reg = &up_cim->tp_pio;
+ u32 *buff = up_cim->outbuf;
+
+ if (is_t5(padap->params.chip)) {
+ up_cim_reg->ireg_addr = t5_up_cim_reg_array[i][0];
+ up_cim_reg->ireg_data = t5_up_cim_reg_array[i][1];
+ up_cim_reg->ireg_local_offset =
+ t5_up_cim_reg_array[i][2];
+ up_cim_reg->ireg_offset_range =
+ t5_up_cim_reg_array[i][3];
+ } else if (is_t6(padap->params.chip)) {
+ up_cim_reg->ireg_addr = t6_up_cim_reg_array[i][0];
+ up_cim_reg->ireg_data = t6_up_cim_reg_array[i][1];
+ up_cim_reg->ireg_local_offset =
+ t6_up_cim_reg_array[i][2];
+ up_cim_reg->ireg_offset_range =
+ t6_up_cim_reg_array[i][3];
+ }
+
+ rc = t4_cim_read(padap, up_cim_reg->ireg_local_offset,
+ up_cim_reg->ireg_offset_range, buff);
+ if (rc) {
+ cudbg_put_buff(&temp_buff, dbg_buff);
+ return rc;
+ }
+ up_cim++;
+ }
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
+
+int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_mbox_log *mboxlog = NULL;
+ struct cudbg_buffer temp_buff = { 0 };
+ struct mbox_cmd_log *log = NULL;
+ struct mbox_cmd *entry;
+ unsigned int entry_idx;
+ u16 mbox_cmds;
+ int i, k, rc;
+ u64 flit;
+ u32 size;
+
+ log = padap->mbox_log;
+ mbox_cmds = padap->mbox_log->size;
+ size = sizeof(struct cudbg_mbox_log) * mbox_cmds;
+ rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+ if (rc)
+ return rc;
+
+ mboxlog = (struct cudbg_mbox_log *)temp_buff.data;
+ for (k = 0; k < mbox_cmds; k++) {
+ entry_idx = log->cursor + k;
+ if (entry_idx >= log->size)
+ entry_idx -= log->size;
+
+ entry = mbox_cmd_log_entry(log, entry_idx);
+ /* skip over unused entries */
+ if (entry->timestamp == 0)
+ continue;
+
+ memcpy(&mboxlog->entry, entry, sizeof(struct mbox_cmd));
+ for (i = 0; i < MBOX_LEN / 8; i++) {
+ flit = entry->cmd[i];
+ mboxlog->hi[i] = (u32)(flit >> 32);
+ mboxlog->lo[i] = (u32)flit;
+ }
+ mboxlog++;
+ }
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
+
+int cudbg_collect_hma_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err)
+{
+ struct adapter *padap = pdbg_init->adap;
+ struct cudbg_buffer temp_buff = { 0 };
+ struct ireg_buf *hma_indr;
+ int i, rc, n;
+ u32 size;
+
+ if (CHELSIO_CHIP_VERSION(padap->params.chip) < CHELSIO_T6)
+ return CUDBG_STATUS_ENTITY_NOT_FOUND;
+
+ n = sizeof(t6_hma_ireg_array) / (IREG_NUM_ELEM * sizeof(u32));
+ size = sizeof(struct ireg_buf) * n;
+ rc = cudbg_get_buff(dbg_buff, size, &temp_buff);
+ if (rc)
+ return rc;
+
+ hma_indr = (struct ireg_buf *)temp_buff.data;
+ for (i = 0; i < n; i++) {
+ struct ireg_field *hma_fli = &hma_indr->tp_pio;
+ u32 *buff = hma_indr->outbuf;
+
+ hma_fli->ireg_addr = t6_hma_ireg_array[i][0];
+ hma_fli->ireg_data = t6_hma_ireg_array[i][1];
+ hma_fli->ireg_local_offset = t6_hma_ireg_array[i][2];
+ hma_fli->ireg_offset_range = t6_hma_ireg_array[i][3];
+ t4_read_indirect(padap, hma_fli->ireg_addr, hma_fli->ireg_data,
+ buff, hma_fli->ireg_offset_range,
+ hma_fli->ireg_local_offset);
+ hma_indr++;
+ }
+ cudbg_write_and_release_buff(&temp_buff, dbg_buff);
+ return rc;
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h
new file mode 100644
index 000000000000..c4440c1d0142
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2017 Chelsio Communications. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#ifndef __CUDBG_LIB_H__
+#define __CUDBG_LIB_H__
+
+int cudbg_collect_reg_dump(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_fw_devlog(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_ibq_tp0(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_ibq_tp1(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_ibq_ulp(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_ibq_sge0(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_ibq_sge1(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_ibq_ncsi(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_obq_ulp0(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_obq_ulp1(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_obq_ulp2(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_obq_ulp3(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_obq_sge(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_cim_obq_ncsi(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_edc0_meminfo(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_edc1_meminfo(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_tp_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_sge_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_obq_sge_rx_q0(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_obq_sge_rx_q1(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_pcie_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_pm_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_ma_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_up_cim_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_mbox_log(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+int cudbg_collect_hma_indirect(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+
+struct cudbg_entity_hdr *cudbg_get_entity_hdr(void *outbuf, int i);
+void cudbg_align_debug_buffer(struct cudbg_buffer *dbg_buff,
+ struct cudbg_entity_hdr *entity_hdr);
+#endif /* __CUDBG_LIB_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h
new file mode 100644
index 000000000000..b150c5d1f7c0
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib_common.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2017 Chelsio Communications. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#ifndef __CUDBG_LIB_COMMON_H__
+#define __CUDBG_LIB_COMMON_H__
+
+#define CUDBG_SIGNATURE 67856866 /* CUDB in ascii */
+
+enum cudbg_dump_type {
+ CUDBG_DUMP_TYPE_MINI = 1,
+};
+
+enum cudbg_compression_type {
+ CUDBG_COMPRESSION_NONE = 1,
+};
+
+struct cudbg_hdr {
+ u32 signature;
+ u32 hdr_len;
+ u16 major_ver;
+ u16 minor_ver;
+ u32 data_len;
+ u32 hdr_flags;
+ u16 max_entities;
+ u8 chip_ver;
+ u8 dump_type:3;
+ u8 reserved1:1;
+ u8 compress_type:4;
+ u32 reserved[8];
+};
+
+struct cudbg_entity_hdr {
+ u32 entity_type;
+ u32 start_offset;
+ u32 size;
+ int hdr_flags;
+ u32 sys_warn;
+ u32 sys_err;
+ u8 num_pad;
+ u8 flag; /* bit 0 is used to indicate ext data */
+ u8 reserved1[2];
+ u32 next_ext_offset; /* pointer to next extended entity meta data */
+ u32 reserved[5];
+};
+
+struct cudbg_buffer {
+ u32 size;
+ u32 offset;
+ char *data;
+};
+
+struct cudbg_error {
+ int sys_err;
+ int sys_warn;
+ int app_err;
+};
+
+#define CDUMP_MAX_COMP_BUF_SIZE ((64 * 1024) - 1)
+#define CUDBG_CHUNK_SIZE ((CDUMP_MAX_COMP_BUF_SIZE / 1024) * 1024)
+
+int cudbg_get_buff(struct cudbg_buffer *pdbg_buff, u32 size,
+ struct cudbg_buffer *pin_buff);
+void cudbg_put_buff(struct cudbg_buffer *pin_buff,
+ struct cudbg_buffer *pdbg_buff);
+void cudbg_update_buff(struct cudbg_buffer *pin_buff,
+ struct cudbg_buffer *pout_buff);
+#endif /* __CUDBG_LIB_COMMON_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index ea72d2d2e1b4..92a0b022687e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -367,6 +367,7 @@ struct adapter_params {
unsigned int max_ird_adapter; /* Max read depth per adapter */
bool fr_nsmr_tpte_wr_support; /* FW support for FR_NSMR_TPTE_WR */
u8 fw_caps_support; /* 32-bit Port Capabilities */
+ bool filter2_wr_support; /* FW support for FILTER2_WR */
/* MPS Buffer Group Map[per Port]. Bit i is set if buffer group i is
* used by the Port
@@ -549,6 +550,7 @@ enum { /* adapter flags */
MASTER_PF = (1 << 7),
FW_OFLD_CONN = (1 << 9),
ROOT_NO_RELAXED_ORDERING = (1 << 10),
+ SHUTTING_DOWN = (1 << 11),
};
enum {
@@ -857,6 +859,7 @@ struct adapter {
unsigned int clipt_start;
unsigned int clipt_end;
struct clip_tbl *clipt;
+ struct smt_data *smt;
struct cxgb4_uld_info *uld;
void *uld_handle[CXGB4_ULD_MAX];
unsigned int num_uld;
@@ -904,6 +907,13 @@ struct adapter {
/* TC u32 offload */
struct cxgb4_tc_u32_table *tc_u32;
struct chcr_stats_debug chcr_stats;
+
+ /* TC flower offload */
+ DECLARE_HASHTABLE(flower_anymatch_tbl, 9);
+ struct timer_list flower_stats_timer;
+
+ /* Ethtool Dump */
+ struct ethtool_dump eth_dump;
};
/* Support for "sched-class" command to allow a TX Scheduling Class to be
@@ -1055,10 +1065,19 @@ struct ch_filter_specification {
uint32_t newdmac:1; /* rewrite destination MAC address */
uint32_t newsmac:1; /* rewrite source MAC address */
uint32_t newvlan:2; /* rewrite VLAN Tag */
+ uint32_t nat_mode:3; /* specify NAT operation mode */
uint8_t dmac[ETH_ALEN]; /* new destination MAC address */
uint8_t smac[ETH_ALEN]; /* new source MAC address */
uint16_t vlan; /* VLAN Tag to insert */
+ u8 nat_lip[16]; /* local IP to use after NAT'ing */
+ u8 nat_fip[16]; /* foreign IP to use after NAT'ing */
+ u16 nat_lport; /* local port to use after NAT'ing */
+ u16 nat_fport; /* foreign port to use after NAT'ing */
+
+ /* reservation for future additions */
+ u8 rsvd[24];
+
/* Filter rule value/mask pairs.
*/
struct ch_filter_tuple val;
@@ -1078,6 +1097,10 @@ enum {
VLAN_REWRITE
};
+enum {
+ NAT_MODE_ALL = 7, /* NAT on entire 4-tuple */
+};
+
/* Host shadow copy of ingress filter entry. This is in host native format
* and doesn't match the ordering or bit order, etc. of the hardware of the
* firmware command. The use of bit-field structure elements is purely to
@@ -1090,9 +1113,9 @@ struct filter_entry {
u32 locked:1; /* filter is administratively locked */
u32 pending:1; /* filter action is pending firmware reply */
- u32 smtidx:8; /* Source MAC Table index for smac */
struct filter_ctx *ctx; /* Caller's completion hook */
struct l2t_entry *l2t; /* Layer Two Table entry for dmac */
+ struct smt_entry *smt; /* Source Mac Table entry for smac */
struct net_device *dev; /* Associated net device */
u32 tid; /* This will store the actual tid */
@@ -1451,7 +1474,7 @@ unsigned int qtimer_val(const struct adapter *adap,
int t4_init_devlog_params(struct adapter *adapter);
int t4_init_sge_params(struct adapter *adapter);
-int t4_init_tp_params(struct adapter *adap);
+int t4_init_tp_params(struct adapter *adap, bool sleep_ok);
int t4_filter_field_shift(const struct adapter *adap, int filter_sel);
int t4_init_rss_mode(struct adapter *adap, int mbox);
int t4_init_portinfo(struct port_info *pi, int mbox,
@@ -1465,14 +1488,15 @@ int t4_config_glbl_rss(struct adapter *adapter, int mbox, unsigned int mode,
int t4_config_vi_rss(struct adapter *adapter, int mbox, unsigned int viid,
unsigned int flags, unsigned int defq);
int t4_read_rss(struct adapter *adapter, u16 *entries);
-void t4_read_rss_key(struct adapter *adapter, u32 *key);
-void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx);
+void t4_read_rss_key(struct adapter *adapter, u32 *key, bool sleep_ok);
+void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx,
+ bool sleep_ok);
void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index,
- u32 *valp);
+ u32 *valp, bool sleep_ok);
void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index,
- u32 *vfl, u32 *vfh);
-u32 t4_read_rss_pf_map(struct adapter *adapter);
-u32 t4_read_rss_pf_mask(struct adapter *adapter);
+ u32 *vfl, u32 *vfh, bool sleep_ok);
+u32 t4_read_rss_pf_map(struct adapter *adapter, bool sleep_ok);
+u32 t4_read_rss_pf_mask(struct adapter *adapter, bool sleep_ok);
unsigned int t4_get_mps_bg_map(struct adapter *adapter, int pidx);
unsigned int t4_get_tp_ch_map(struct adapter *adapter, int pidx);
@@ -1503,14 +1527,18 @@ void t4_read_cong_tbl(struct adapter *adap, u16 incr[NMTUS][NCCTRL_WIN]);
void t4_tp_wr_bits_indirect(struct adapter *adap, unsigned int addr,
unsigned int mask, unsigned int val);
void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr);
-void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st);
-void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st);
-void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st);
-void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st);
+void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st,
+ bool sleep_ok);
+void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st,
+ bool sleep_ok);
+void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st,
+ bool sleep_ok);
+void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st,
+ bool sleep_ok);
void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4,
- struct tp_tcp_stats *v6);
+ struct tp_tcp_stats *v6, bool sleep_ok);
void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx,
- struct tp_fcoe_stats *st);
+ struct tp_fcoe_stats *st, bool sleep_ok);
void t4_load_mtus(struct adapter *adap, const unsigned short *mtus,
const unsigned short *alpha, const unsigned short *beta);
@@ -1619,6 +1647,13 @@ void t4_idma_monitor(struct adapter *adapter,
int hz, int ticks);
int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
unsigned int naddr, u8 *addr);
+void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs,
+ u32 start_index, bool sleep_ok);
+void t4_tp_tm_pio_read(struct adapter *adap, u32 *buff, u32 nregs,
+ u32 start_index, bool sleep_ok);
+void t4_tp_mib_read(struct adapter *adap, u32 *buff, u32 nregs,
+ u32 start_index, bool sleep_ok);
+
void t4_uld_mem_free(struct adapter *adap);
int t4_uld_mem_alloc(struct adapter *adap);
void t4_uld_clean_up(struct adapter *adap);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
new file mode 100644
index 000000000000..9d97080a9d17
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright (C) 2017 Chelsio Communications. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include "t4_regs.h"
+#include "cxgb4.h"
+#include "cxgb4_cudbg.h"
+#include "cudbg_entity.h"
+
+static const struct cxgb4_collect_entity cxgb4_collect_mem_dump[] = {
+ { CUDBG_EDC0, cudbg_collect_edc0_meminfo },
+ { CUDBG_EDC1, cudbg_collect_edc1_meminfo },
+};
+
+static const struct cxgb4_collect_entity cxgb4_collect_hw_dump[] = {
+ { CUDBG_MBOX_LOG, cudbg_collect_mbox_log },
+ { CUDBG_DEV_LOG, cudbg_collect_fw_devlog },
+ { CUDBG_REG_DUMP, cudbg_collect_reg_dump },
+ { CUDBG_CIM_IBQ_TP0, cudbg_collect_cim_ibq_tp0 },
+ { CUDBG_CIM_IBQ_TP1, cudbg_collect_cim_ibq_tp1 },
+ { CUDBG_CIM_IBQ_ULP, cudbg_collect_cim_ibq_ulp },
+ { CUDBG_CIM_IBQ_SGE0, cudbg_collect_cim_ibq_sge0 },
+ { CUDBG_CIM_IBQ_SGE1, cudbg_collect_cim_ibq_sge1 },
+ { CUDBG_CIM_IBQ_NCSI, cudbg_collect_cim_ibq_ncsi },
+ { CUDBG_CIM_OBQ_ULP0, cudbg_collect_cim_obq_ulp0 },
+ { CUDBG_CIM_OBQ_ULP1, cudbg_collect_cim_obq_ulp1 },
+ { CUDBG_CIM_OBQ_ULP2, cudbg_collect_cim_obq_ulp2 },
+ { CUDBG_CIM_OBQ_ULP3, cudbg_collect_cim_obq_ulp3 },
+ { CUDBG_CIM_OBQ_SGE, cudbg_collect_cim_obq_sge },
+ { CUDBG_CIM_OBQ_NCSI, cudbg_collect_cim_obq_ncsi },
+ { CUDBG_TP_INDIRECT, cudbg_collect_tp_indirect },
+ { CUDBG_SGE_INDIRECT, cudbg_collect_sge_indirect },
+ { CUDBG_CIM_OBQ_RXQ0, cudbg_collect_obq_sge_rx_q0 },
+ { CUDBG_CIM_OBQ_RXQ1, cudbg_collect_obq_sge_rx_q1 },
+ { CUDBG_PCIE_INDIRECT, cudbg_collect_pcie_indirect },
+ { CUDBG_PM_INDIRECT, cudbg_collect_pm_indirect },
+ { CUDBG_MA_INDIRECT, cudbg_collect_ma_indirect },
+ { CUDBG_UP_CIM_INDIRECT, cudbg_collect_up_cim_indirect },
+ { CUDBG_HMA_INDIRECT, cudbg_collect_hma_indirect },
+};
+
+static u32 cxgb4_get_entity_length(struct adapter *adap, u32 entity)
+{
+ u32 value, n = 0, len = 0;
+
+ switch (entity) {
+ case CUDBG_REG_DUMP:
+ switch (CHELSIO_CHIP_VERSION(adap->params.chip)) {
+ case CHELSIO_T4:
+ len = T4_REGMAP_SIZE;
+ break;
+ case CHELSIO_T5:
+ case CHELSIO_T6:
+ len = T5_REGMAP_SIZE;
+ break;
+ default:
+ break;
+ }
+ break;
+ case CUDBG_DEV_LOG:
+ len = adap->params.devlog.size;
+ break;
+ case CUDBG_CIM_IBQ_TP0:
+ case CUDBG_CIM_IBQ_TP1:
+ case CUDBG_CIM_IBQ_ULP:
+ case CUDBG_CIM_IBQ_SGE0:
+ case CUDBG_CIM_IBQ_SGE1:
+ case CUDBG_CIM_IBQ_NCSI:
+ len = CIM_IBQ_SIZE * 4 * sizeof(u32);
+ break;
+ case CUDBG_CIM_OBQ_ULP0:
+ case CUDBG_CIM_OBQ_ULP1:
+ case CUDBG_CIM_OBQ_ULP2:
+ case CUDBG_CIM_OBQ_ULP3:
+ case CUDBG_CIM_OBQ_SGE:
+ case CUDBG_CIM_OBQ_NCSI:
+ case CUDBG_CIM_OBQ_RXQ0:
+ case CUDBG_CIM_OBQ_RXQ1:
+ len = 6 * CIM_OBQ_SIZE * 4 * sizeof(u32);
+ break;
+ case CUDBG_EDC0:
+ value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
+ if (value & EDRAM0_ENABLE_F) {
+ value = t4_read_reg(adap, MA_EDRAM0_BAR_A);
+ len = EDRAM0_SIZE_G(value);
+ }
+ len = cudbg_mbytes_to_bytes(len);
+ break;
+ case CUDBG_EDC1:
+ value = t4_read_reg(adap, MA_TARGET_MEM_ENABLE_A);
+ if (value & EDRAM1_ENABLE_F) {
+ value = t4_read_reg(adap, MA_EDRAM1_BAR_A);
+ len = EDRAM1_SIZE_G(value);
+ }
+ len = cudbg_mbytes_to_bytes(len);
+ break;
+ case CUDBG_TP_INDIRECT:
+ switch (CHELSIO_CHIP_VERSION(adap->params.chip)) {
+ case CHELSIO_T5:
+ n = sizeof(t5_tp_pio_array) +
+ sizeof(t5_tp_tm_pio_array) +
+ sizeof(t5_tp_mib_index_array);
+ break;
+ case CHELSIO_T6:
+ n = sizeof(t6_tp_pio_array) +
+ sizeof(t6_tp_tm_pio_array) +
+ sizeof(t6_tp_mib_index_array);
+ break;
+ default:
+ break;
+ }
+ n = n / (IREG_NUM_ELEM * sizeof(u32));
+ len = sizeof(struct ireg_buf) * n;
+ break;
+ case CUDBG_SGE_INDIRECT:
+ len = sizeof(struct ireg_buf) * 2;
+ break;
+ case CUDBG_PCIE_INDIRECT:
+ n = sizeof(t5_pcie_pdbg_array) / (IREG_NUM_ELEM * sizeof(u32));
+ len = sizeof(struct ireg_buf) * n * 2;
+ break;
+ case CUDBG_PM_INDIRECT:
+ n = sizeof(t5_pm_rx_array) / (IREG_NUM_ELEM * sizeof(u32));
+ len = sizeof(struct ireg_buf) * n * 2;
+ break;
+ case CUDBG_MA_INDIRECT:
+ if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) {
+ n = sizeof(t6_ma_ireg_array) /
+ (IREG_NUM_ELEM * sizeof(u32));
+ len = sizeof(struct ireg_buf) * n * 2;
+ }
+ break;
+ case CUDBG_UP_CIM_INDIRECT:
+ n = sizeof(t5_up_cim_reg_array) / (IREG_NUM_ELEM * sizeof(u32));
+ len = sizeof(struct ireg_buf) * n;
+ break;
+ case CUDBG_MBOX_LOG:
+ len = sizeof(struct cudbg_mbox_log) * adap->mbox_log->size;
+ break;
+ case CUDBG_HMA_INDIRECT:
+ if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) {
+ n = sizeof(t6_hma_ireg_array) /
+ (IREG_NUM_ELEM * sizeof(u32));
+ len = sizeof(struct ireg_buf) * n;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return len;
+}
+
+u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag)
+{
+ u32 i, entity;
+ u32 len = 0;
+
+ if (flag & CXGB4_ETH_DUMP_HW) {
+ for (i = 0; i < ARRAY_SIZE(cxgb4_collect_hw_dump); i++) {
+ entity = cxgb4_collect_hw_dump[i].entity;
+ len += cxgb4_get_entity_length(adap, entity);
+ }
+ }
+
+ if (flag & CXGB4_ETH_DUMP_MEM) {
+ for (i = 0; i < ARRAY_SIZE(cxgb4_collect_mem_dump); i++) {
+ entity = cxgb4_collect_mem_dump[i].entity;
+ len += cxgb4_get_entity_length(adap, entity);
+ }
+ }
+
+ return len;
+}
+
+static void cxgb4_cudbg_collect_entity(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ const struct cxgb4_collect_entity *e_arr,
+ u32 arr_size, void *buf, u32 *tot_size)
+{
+ struct adapter *adap = pdbg_init->adap;
+ struct cudbg_error cudbg_err = { 0 };
+ struct cudbg_entity_hdr *entity_hdr;
+ u32 entity_size, i;
+ u32 total_size = 0;
+ int ret;
+
+ for (i = 0; i < arr_size; i++) {
+ const struct cxgb4_collect_entity *e = &e_arr[i];
+
+ /* Skip entities that won't fit in output buffer */
+ entity_size = cxgb4_get_entity_length(adap, e->entity);
+ if (entity_size >
+ pdbg_init->outbuf_size - *tot_size - total_size)
+ continue;
+
+ entity_hdr = cudbg_get_entity_hdr(buf, e->entity);
+ entity_hdr->entity_type = e->entity;
+ entity_hdr->start_offset = dbg_buff->offset;
+ memset(&cudbg_err, 0, sizeof(struct cudbg_error));
+ ret = e->collect_cb(pdbg_init, dbg_buff, &cudbg_err);
+ if (ret) {
+ entity_hdr->size = 0;
+ dbg_buff->offset = entity_hdr->start_offset;
+ } else {
+ cudbg_align_debug_buffer(dbg_buff, entity_hdr);
+ }
+
+ /* Log error and continue with next entity */
+ if (cudbg_err.sys_err)
+ ret = CUDBG_SYSTEM_ERROR;
+
+ entity_hdr->hdr_flags = ret;
+ entity_hdr->sys_err = cudbg_err.sys_err;
+ entity_hdr->sys_warn = cudbg_err.sys_warn;
+ total_size += entity_hdr->size;
+ }
+
+ *tot_size += total_size;
+}
+
+int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size,
+ u32 flag)
+{
+ struct cudbg_init cudbg_init = { 0 };
+ struct cudbg_buffer dbg_buff = { 0 };
+ u32 size, min_size, total_size = 0;
+ struct cudbg_hdr *cudbg_hdr;
+
+ size = *buf_size;
+
+ cudbg_init.adap = adap;
+ cudbg_init.outbuf = buf;
+ cudbg_init.outbuf_size = size;
+
+ dbg_buff.data = buf;
+ dbg_buff.size = size;
+ dbg_buff.offset = 0;
+
+ cudbg_hdr = (struct cudbg_hdr *)buf;
+ cudbg_hdr->signature = CUDBG_SIGNATURE;
+ cudbg_hdr->hdr_len = sizeof(struct cudbg_hdr);
+ cudbg_hdr->major_ver = CUDBG_MAJOR_VERSION;
+ cudbg_hdr->minor_ver = CUDBG_MINOR_VERSION;
+ cudbg_hdr->max_entities = CUDBG_MAX_ENTITY;
+ cudbg_hdr->chip_ver = adap->params.chip;
+ cudbg_hdr->dump_type = CUDBG_DUMP_TYPE_MINI;
+ cudbg_hdr->compress_type = CUDBG_COMPRESSION_NONE;
+
+ min_size = sizeof(struct cudbg_hdr) +
+ sizeof(struct cudbg_entity_hdr) *
+ cudbg_hdr->max_entities;
+ if (size < min_size)
+ return -ENOMEM;
+
+ dbg_buff.offset += min_size;
+ total_size = dbg_buff.offset;
+
+ if (flag & CXGB4_ETH_DUMP_HW)
+ cxgb4_cudbg_collect_entity(&cudbg_init, &dbg_buff,
+ cxgb4_collect_hw_dump,
+ ARRAY_SIZE(cxgb4_collect_hw_dump),
+ buf,
+ &total_size);
+
+ if (flag & CXGB4_ETH_DUMP_MEM)
+ cxgb4_cudbg_collect_entity(&cudbg_init, &dbg_buff,
+ cxgb4_collect_mem_dump,
+ ARRAY_SIZE(cxgb4_collect_mem_dump),
+ buf,
+ &total_size);
+
+ cudbg_hdr->data_len = total_size;
+ *buf_size = total_size;
+ return 0;
+}
+
+void cxgb4_init_ethtool_dump(struct adapter *adapter)
+{
+ adapter->eth_dump.flag = CXGB4_ETH_DUMP_NONE;
+ adapter->eth_dump.version = adapter->params.fw_vers;
+ adapter->eth_dump.len = 0;
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
new file mode 100644
index 000000000000..c099b5aa2214
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_cudbg.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2017 Chelsio Communications. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#ifndef __CXGB4_CUDBG_H__
+#define __CXGB4_CUDBG_H__
+
+#include "cudbg_if.h"
+#include "cudbg_lib_common.h"
+#include "cudbg_lib.h"
+
+typedef int (*cudbg_collect_callback_t)(struct cudbg_init *pdbg_init,
+ struct cudbg_buffer *dbg_buff,
+ struct cudbg_error *cudbg_err);
+
+struct cxgb4_collect_entity {
+ enum cudbg_dbg_entity_type entity;
+ cudbg_collect_callback_t collect_cb;
+};
+
+enum CXGB4_ETHTOOL_DUMP_FLAGS {
+ CXGB4_ETH_DUMP_NONE = ETH_FW_DUMP_DISABLE,
+ CXGB4_ETH_DUMP_MEM = (1 << 0), /* On-Chip Memory Dumps */
+ CXGB4_ETH_DUMP_HW = (1 << 1), /* various FW and HW dumps */
+};
+
+u32 cxgb4_get_dump_length(struct adapter *adap, u32 flag);
+int cxgb4_cudbg_collect(struct adapter *adap, void *buf, u32 *buf_size,
+ u32 flag);
+void cxgb4_init_ethtool_dump(struct adapter *adapter);
+#endif /* __CXGB4_CUDBG_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
index 6ee2ed30626b..4e7f72b17e82 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.c
@@ -40,8 +40,7 @@ static inline bool cxgb4_dcb_state_synced(enum cxgb4_dcb_state state)
return false;
}
-/* Initialize a port's Data Center Bridging state. Typically used after a
- * Link Down event.
+/* Initialize a port's Data Center Bridging state.
*/
void cxgb4_dcb_state_init(struct net_device *dev)
{
@@ -106,6 +105,15 @@ static void cxgb4_dcb_cleanup_apps(struct net_device *dev)
}
}
+/* Reset a port's Data Center Bridging state. Typically used after a
+ * Link Down event.
+ */
+void cxgb4_dcb_reset(struct net_device *dev)
+{
+ cxgb4_dcb_cleanup_apps(dev);
+ cxgb4_dcb_state_init(dev);
+}
+
/* Finite State machine for Data Center Bridging.
*/
void cxgb4_dcb_state_fsm(struct net_device *dev,
@@ -194,8 +202,7 @@ void cxgb4_dcb_state_fsm(struct net_device *dev,
* state. We need to reset back to a ground state
* of incomplete.
*/
- cxgb4_dcb_cleanup_apps(dev);
- cxgb4_dcb_state_init(dev);
+ cxgb4_dcb_reset(dev);
dcb->state = CXGB4_DCB_STATE_FW_INCOMPLETE;
dcb->supported = CXGB4_DCBX_FW_SUPPORT;
linkwatch_fire_event(dev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
index ccf24d3dc982..02040b99c78a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_dcb.h
@@ -131,6 +131,7 @@ struct port_dcb_info {
void cxgb4_dcb_state_init(struct net_device *);
void cxgb4_dcb_version_init(struct net_device *);
+void cxgb4_dcb_reset(struct net_device *dev);
void cxgb4_dcb_state_fsm(struct net_device *, enum cxgb4_dcb_state_input);
void cxgb4_dcb_handle_fw_update(struct adapter *, const struct fw_port_cmd *);
void cxgb4_dcb_set_caps(struct adapter *, const struct fw_port_cmd *);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 76540b0e082d..917663b35603 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2211,7 +2211,7 @@ static int rss_key_show(struct seq_file *seq, void *v)
{
u32 key[10];
- t4_read_rss_key(seq->private, key);
+ t4_read_rss_key(seq->private, key, true);
seq_printf(seq, "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x\n",
key[9], key[8], key[7], key[6], key[5], key[4], key[3],
key[2], key[1], key[0]);
@@ -2248,7 +2248,7 @@ static ssize_t rss_key_write(struct file *file, const char __user *buf,
}
}
- t4_write_rss_key(adap, key, -1);
+ t4_write_rss_key(adap, key, -1, true);
return count;
}
@@ -2325,12 +2325,13 @@ static int rss_pf_config_open(struct inode *inode, struct file *file)
return -ENOMEM;
pfconf = (struct rss_pf_conf *)p->data;
- rss_pf_map = t4_read_rss_pf_map(adapter);
- rss_pf_mask = t4_read_rss_pf_mask(adapter);
+ rss_pf_map = t4_read_rss_pf_map(adapter, true);
+ rss_pf_mask = t4_read_rss_pf_mask(adapter, true);
for (pf = 0; pf < 8; pf++) {
pfconf[pf].rss_pf_map = rss_pf_map;
pfconf[pf].rss_pf_mask = rss_pf_mask;
- t4_read_rss_pf_config(adapter, pf, &pfconf[pf].rss_pf_config);
+ t4_read_rss_pf_config(adapter, pf, &pfconf[pf].rss_pf_config,
+ true);
}
return 0;
}
@@ -2393,7 +2394,7 @@ static int rss_vf_config_open(struct inode *inode, struct file *file)
vfconf = (struct rss_vf_conf *)p->data;
for (vf = 0; vf < vfcount; vf++) {
t4_read_rss_vf_config(adapter, vf, &vfconf[vf].rss_vf_vfl,
- &vfconf[vf].rss_vf_vfh);
+ &vfconf[vf].rss_vf_vfh, true);
}
return 0;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index a71af1e587e2..1b7f6b9ccc8b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -21,6 +21,7 @@
#include "cxgb4.h"
#include "t4_regs.h"
#include "t4fw_api.h"
+#include "cxgb4_cudbg.h"
#define EEPROM_MAGIC 0x38E2F10C
@@ -335,10 +336,10 @@ static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s)
memset(s, 0, sizeof(*s));
spin_lock(&adap->stats_lock);
- t4_tp_get_tcp_stats(adap, &v4, &v6);
- t4_tp_get_rdma_stats(adap, &rdma_stats);
- t4_get_usm_stats(adap, &usm_stats);
- t4_tp_get_err_stats(adap, &err_stats);
+ t4_tp_get_tcp_stats(adap, &v4, &v6, false);
+ t4_tp_get_rdma_stats(adap, &rdma_stats, false);
+ t4_get_usm_stats(adap, &usm_stats, false);
+ t4_tp_get_err_stats(adap, &err_stats, false);
spin_unlock(&adap->stats_lock);
s->db_drop = adap->db_stats.db_drop;
@@ -388,9 +389,9 @@ static void collect_channel_stats(struct adapter *adap, struct channel_stats *s,
memset(s, 0, sizeof(*s));
spin_lock(&adap->stats_lock);
- t4_tp_get_cpl_stats(adap, &cpl_stats);
- t4_tp_get_err_stats(adap, &err_stats);
- t4_get_fcoe_stats(adap, i, &fcoe_stats);
+ t4_tp_get_cpl_stats(adap, &cpl_stats, false);
+ t4_tp_get_err_stats(adap, &err_stats, false);
+ t4_get_fcoe_stats(adap, i, &fcoe_stats, false);
spin_unlock(&adap->stats_lock);
s->cpl_req = cpl_stats.req[i];
@@ -1374,6 +1375,56 @@ static int get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
return -EOPNOTSUPP;
}
+static int set_dump(struct net_device *dev, struct ethtool_dump *eth_dump)
+{
+ struct adapter *adapter = netdev2adap(dev);
+ u32 len = 0;
+
+ len = sizeof(struct cudbg_hdr) +
+ sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY;
+ len += cxgb4_get_dump_length(adapter, eth_dump->flag);
+
+ adapter->eth_dump.flag = eth_dump->flag;
+ adapter->eth_dump.len = len;
+ return 0;
+}
+
+static int get_dump_flag(struct net_device *dev, struct ethtool_dump *eth_dump)
+{
+ struct adapter *adapter = netdev2adap(dev);
+
+ eth_dump->flag = adapter->eth_dump.flag;
+ eth_dump->len = adapter->eth_dump.len;
+ eth_dump->version = adapter->eth_dump.version;
+ return 0;
+}
+
+static int get_dump_data(struct net_device *dev, struct ethtool_dump *eth_dump,
+ void *buf)
+{
+ struct adapter *adapter = netdev2adap(dev);
+ u32 len = 0;
+ int ret = 0;
+
+ if (adapter->eth_dump.flag == CXGB4_ETH_DUMP_NONE)
+ return -ENOENT;
+
+ len = sizeof(struct cudbg_hdr) +
+ sizeof(struct cudbg_entity_hdr) * CUDBG_MAX_ENTITY;
+ len += cxgb4_get_dump_length(adapter, adapter->eth_dump.flag);
+ if (eth_dump->len < len)
+ return -ENOMEM;
+
+ ret = cxgb4_cudbg_collect(adapter, buf, &len, adapter->eth_dump.flag);
+ if (ret)
+ return ret;
+
+ eth_dump->flag = adapter->eth_dump.flag;
+ eth_dump->len = len;
+ eth_dump->version = adapter->eth_dump.version;
+ return 0;
+}
+
static const struct ethtool_ops cxgb_ethtool_ops = {
.get_link_ksettings = get_link_ksettings,
.set_link_ksettings = set_link_ksettings,
@@ -1404,7 +1455,10 @@ static const struct ethtool_ops cxgb_ethtool_ops = {
.get_rxfh = get_rss_table,
.set_rxfh = set_rss_table,
.flash_device = set_flash,
- .get_ts_info = get_ts_info
+ .get_ts_info = get_ts_info,
+ .set_dump = set_dump,
+ .get_dump_flag = get_dump_flag,
+ .get_dump_data = get_dump_data,
};
void cxgb4_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 45b5853ca2f1..89272f29f807 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -34,7 +34,9 @@
#include "cxgb4.h"
#include "t4_regs.h"
+#include "t4_tcb.h"
#include "l2t.h"
+#include "smt.h"
#include "t4fw_api.h"
#include "cxgb4_filter.h"
@@ -148,6 +150,106 @@ static int get_filter_steerq(struct net_device *dev,
return iq;
}
+static int get_filter_count(struct adapter *adapter, unsigned int fidx,
+ u64 *pkts, u64 *bytes)
+{
+ unsigned int tcb_base, tcbaddr;
+ unsigned int word_offset;
+ struct filter_entry *f;
+ __be64 be64_byte_count;
+ int ret;
+
+ tcb_base = t4_read_reg(adapter, TP_CMM_TCB_BASE_A);
+ if ((fidx != (adapter->tids.nftids + adapter->tids.nsftids - 1)) &&
+ fidx >= adapter->tids.nftids)
+ return -E2BIG;
+
+ f = &adapter->tids.ftid_tab[fidx];
+ if (!f->valid)
+ return -EINVAL;
+
+ tcbaddr = tcb_base + f->tid * TCB_SIZE;
+
+ spin_lock(&adapter->win0_lock);
+ if (is_t4(adapter->params.chip)) {
+ __be64 be64_count;
+
+ /* T4 doesn't maintain byte counts in hw */
+ *bytes = 0;
+
+ /* Get pkts */
+ word_offset = 4;
+ ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0,
+ tcbaddr + (word_offset * sizeof(__be32)),
+ sizeof(be64_count),
+ (__be32 *)&be64_count,
+ T4_MEMORY_READ);
+ if (ret < 0)
+ goto out;
+ *pkts = be64_to_cpu(be64_count);
+ } else {
+ __be32 be32_count;
+
+ /* Get bytes */
+ word_offset = 4;
+ ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0,
+ tcbaddr + (word_offset * sizeof(__be32)),
+ sizeof(be64_byte_count),
+ &be64_byte_count,
+ T4_MEMORY_READ);
+ if (ret < 0)
+ goto out;
+ *bytes = be64_to_cpu(be64_byte_count);
+
+ /* Get pkts */
+ word_offset = 6;
+ ret = t4_memory_rw(adapter, MEMWIN_NIC, MEM_EDC0,
+ tcbaddr + (word_offset * sizeof(__be32)),
+ sizeof(be32_count),
+ &be32_count,
+ T4_MEMORY_READ);
+ if (ret < 0)
+ goto out;
+ *pkts = (u64)be32_to_cpu(be32_count);
+ }
+
+out:
+ spin_unlock(&adapter->win0_lock);
+ return ret;
+}
+
+int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx,
+ u64 *hitcnt, u64 *bytecnt)
+{
+ struct adapter *adapter = netdev2adap(dev);
+
+ return get_filter_count(adapter, fidx, hitcnt, bytecnt);
+}
+
+int cxgb4_get_free_ftid(struct net_device *dev, int family)
+{
+ struct adapter *adap = netdev2adap(dev);
+ struct tid_info *t = &adap->tids;
+ int ftid;
+
+ spin_lock_bh(&t->ftid_lock);
+ if (family == PF_INET) {
+ ftid = find_first_zero_bit(t->ftid_bmap, t->nftids);
+ if (ftid >= t->nftids)
+ ftid = -1;
+ } else {
+ ftid = bitmap_find_free_region(t->ftid_bmap, t->nftids, 2);
+ if (ftid < 0)
+ goto out_unlock;
+
+ /* this is only a lookup, keep the found region unallocated */
+ bitmap_release_region(t->ftid_bmap, ftid, 2);
+ }
+out_unlock:
+ spin_unlock_bh(&t->ftid_lock);
+ return ftid;
+}
+
static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family)
{
spin_lock_bh(&t->ftid_lock);
@@ -191,7 +293,8 @@ static int del_filter_wr(struct adapter *adapter, int fidx)
return -ENOMEM;
fwr = __skb_put(skb, len);
- t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id);
+ t4_mk_filtdelwr(f->tid, fwr, (adapter->flags & SHUTTING_DOWN) ? -1
+ : adapter->sge.fw_evtq.abs_id);
/* Mark the filter as "pending" and ship off the Filter Work Request.
* When we get the Work Request Reply we'll clear the pending status.
@@ -210,7 +313,7 @@ static int del_filter_wr(struct adapter *adapter, int fidx)
int set_filter_wr(struct adapter *adapter, int fidx)
{
struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
- struct fw_filter_wr *fwr;
+ struct fw_filter2_wr *fwr;
struct sk_buff *skb;
skb = alloc_skb(sizeof(*fwr), GFP_KERNEL);
@@ -231,6 +334,21 @@ int set_filter_wr(struct adapter *adapter, int fidx)
}
}
+ /* If the new filter requires loopback Source MAC rewriting then
+ * we need to allocate a SMT entry for the filter.
+ */
+ if (f->fs.newsmac) {
+ f->smt = cxgb4_smt_alloc_switching(f->dev, f->fs.smac);
+ if (!f->smt) {
+ if (f->l2t) {
+ cxgb4_l2t_release(f->l2t);
+ f->l2t = NULL;
+ }
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ }
+
fwr = __skb_put_zero(skb, sizeof(*fwr));
/* It would be nice to put most of the following in t4_hw.c but most
@@ -241,7 +359,10 @@ int set_filter_wr(struct adapter *adapter, int fidx)
* filter specification structure but for now it's easiest to simply
* put this fairly direct code in line ...
*/
- fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
+ if (adapter->params.filter2_wr_support)
+ fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER2_WR));
+ else
+ fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16));
fwr->tid_to_iq =
htonl(FW_FILTER_WR_TID_V(f->tid) |
@@ -256,7 +377,6 @@ int set_filter_wr(struct adapter *adapter, int fidx)
FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
- FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
f->fs.newvlan == VLAN_REWRITE) |
FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
@@ -303,8 +423,18 @@ int set_filter_wr(struct adapter *adapter, int fidx)
fwr->lpm = htons(f->fs.mask.lport);
fwr->fp = htons(f->fs.val.fport);
fwr->fpm = htons(f->fs.mask.fport);
- if (f->fs.newsmac)
- memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
+
+ if (adapter->params.filter2_wr_support) {
+ fwr->natmode_to_ulp_type =
+ FW_FILTER2_WR_ULP_TYPE_V(f->fs.nat_mode ?
+ ULP_MODE_TCPDDP :
+ ULP_MODE_NONE) |
+ FW_FILTER2_WR_NATMODE_V(f->fs.nat_mode);
+ memcpy(fwr->newlip, f->fs.nat_lip, sizeof(fwr->newlip));
+ memcpy(fwr->newfip, f->fs.nat_fip, sizeof(fwr->newfip));
+ fwr->newlport = htons(f->fs.nat_lport);
+ fwr->newfport = htons(f->fs.nat_fport);
+ }
/* Mark the filter as "pending" and ship off the Filter Work Request.
* When we get the Work Request Reply we'll clear the pending status.
@@ -362,6 +492,9 @@ void clear_filter(struct adapter *adap, struct filter_entry *f)
if (f->l2t)
cxgb4_l2t_release(f->l2t);
+ if (f->smt)
+ cxgb4_smt_release(f->smt);
+
/* The zeroing of the filter rule below clears the filter valid,
* pending, locked flags, l2t pointer, etc. so it's all we need for
* this operation.
@@ -636,6 +769,10 @@ int cxgb4_del_filter(struct net_device *dev, int filter_id)
struct filter_ctx ctx;
int ret;
+ /* If we are shutting down the adapter do not wait for completion */
+ if (netdev2adap(dev)->flags & SHUTTING_DOWN)
+ return __cxgb4_del_filter(dev, filter_id, NULL);
+
init_completion(&ctx.completion);
ret = __cxgb4_del_filter(dev, filter_id, &ctx);
@@ -652,6 +789,62 @@ out:
return ret;
}
+static int set_tcb_field(struct adapter *adap, struct filter_entry *f,
+ unsigned int ftid, u16 word, u64 mask, u64 val,
+ int no_reply)
+{
+ struct cpl_set_tcb_field *req;
+ struct sk_buff *skb;
+
+ skb = alloc_skb(sizeof(struct cpl_set_tcb_field), GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+
+ req = (struct cpl_set_tcb_field *)__skb_put(skb, sizeof(*req));
+ memset(req, 0, sizeof(*req));
+ INIT_TP_WR_CPL(req, CPL_SET_TCB_FIELD, ftid);
+ req->reply_ctrl = htons(REPLY_CHAN_V(0) |
+ QUEUENO_V(adap->sge.fw_evtq.abs_id) |
+ NO_REPLY_V(no_reply));
+ req->word_cookie = htons(TCB_WORD_V(word) | TCB_COOKIE_V(ftid));
+ req->mask = cpu_to_be64(mask);
+ req->val = cpu_to_be64(val);
+ set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
+ t4_ofld_send(adap, skb);
+ return 0;
+}
+
+/* Set one of the t_flags bits in the TCB.
+ */
+static int set_tcb_tflag(struct adapter *adap, struct filter_entry *f,
+ unsigned int ftid, unsigned int bit_pos,
+ unsigned int val, int no_reply)
+{
+ return set_tcb_field(adap, f, ftid, TCB_T_FLAGS_W, 1ULL << bit_pos,
+ (unsigned long long)val << bit_pos, no_reply);
+}
+
+static int configure_filter_smac(struct adapter *adap, struct filter_entry *f)
+{
+ int err;
+
+ /* do a set-tcb for smac-sel and CWR bit.. */
+ err = set_tcb_tflag(adap, f, f->tid, TF_CCTRL_CWR_S, 1, 1);
+ if (err)
+ goto smac_err;
+
+ err = set_tcb_field(adap, f, f->tid, TCB_SMAC_SEL_W,
+ TCB_SMAC_SEL_V(TCB_SMAC_SEL_M),
+ TCB_SMAC_SEL_V(f->smt->idx), 1);
+ if (!err)
+ return 0;
+
+smac_err:
+ dev_err(adap->pdev_dev, "filter %u smac config failed with error %u\n",
+ f->tid, err);
+ return err;
+}
+
/* Handle a filter write/deletion reply. */
void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
{
@@ -690,19 +883,23 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
clear_filter(adap, f);
if (ctx)
ctx->result = 0;
- } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
- dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
- idx);
- clear_filter(adap, f);
- if (ctx)
- ctx->result = -ENOMEM;
} else if (ret == FW_FILTER_WR_FLT_ADDED) {
- f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
- f->pending = 0; /* asynchronous setup completed */
- f->valid = 1;
- if (ctx) {
- ctx->result = 0;
- ctx->tid = idx;
+ int err = 0;
+
+ if (f->fs.newsmac)
+ err = configure_filter_smac(adap, f);
+
+ if (!err) {
+ f->pending = 0; /* async setup completed */
+ f->valid = 1;
+ if (ctx) {
+ ctx->result = 0;
+ ctx->tid = idx;
+ }
+ } else {
+ clear_filter(adap, f);
+ if (ctx)
+ ctx->result = err;
}
} else {
/* Something went wrong. Issue a warning about the
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 92d9d795d874..e16078ddb39f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -77,9 +77,12 @@
#include "cxgb4_debugfs.h"
#include "clip_tbl.h"
#include "l2t.h"
+#include "smt.h"
#include "sched.h"
#include "cxgb4_tc_u32.h"
+#include "cxgb4_tc_flower.h"
#include "cxgb4_ptp.h"
+#include "cxgb4_cudbg.h"
char cxgb4_driver_name[] = KBUILD_MODNAME;
@@ -280,7 +283,7 @@ void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat)
else {
#ifdef CONFIG_CHELSIO_T4_DCB
if (cxgb4_dcb_enabled(dev)) {
- cxgb4_dcb_state_init(dev);
+ cxgb4_dcb_reset(dev);
dcb_tx_queue_prio_enable(dev, false);
}
#endif /* CONFIG_CHELSIO_T4_DCB */
@@ -561,6 +564,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
const struct cpl_l2t_write_rpl *p = (void *)rsp;
do_l2t_write_rpl(q->adap, p);
+ } else if (opcode == CPL_SMT_WRITE_RPL) {
+ const struct cpl_smt_write_rpl *p = (void *)rsp;
+
+ do_smt_write_rpl(q->adap, p);
} else if (opcode == CPL_SET_TCB_RPL) {
const struct cpl_set_tcb_rpl *p = (void *)rsp;
@@ -1637,7 +1644,7 @@ void cxgb4_get_tcp_stats(struct pci_dev *pdev, struct tp_tcp_stats *v4,
struct adapter *adap = pci_get_drvdata(pdev);
spin_lock(&adap->stats_lock);
- t4_tp_get_tcp_stats(adap, v4, v6);
+ t4_tp_get_tcp_stats(adap, v4, v6, false);
spin_unlock(&adap->stats_lock);
}
EXPORT_SYMBOL(cxgb4_get_tcp_stats);
@@ -2303,10 +2310,16 @@ static int cxgb_close(struct net_device *dev)
{
struct port_info *pi = netdev_priv(dev);
struct adapter *adapter = pi->adapter;
+ int ret;
netif_tx_stop_all_queues(dev);
netif_carrier_off(dev);
- return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false);
+ ret = t4_enable_vi(adapter, adapter->pf, pi->viid, false, false);
+#ifdef CONFIG_CHELSIO_T4_DCB
+ cxgb4_dcb_reset(dev);
+ dcb_tx_queue_prio_enable(dev, false);
+#endif
+ return ret;
}
int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
@@ -2873,11 +2886,28 @@ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
return err;
}
+static int cxgb_setup_tc_flower(struct net_device *dev,
+ struct tc_cls_flower_offload *cls_flower)
+{
+ if (cls_flower->common.chain_index)
+ return -EOPNOTSUPP;
+
+ switch (cls_flower->command) {
+ case TC_CLSFLOWER_REPLACE:
+ return cxgb4_tc_flower_replace(dev, cls_flower);
+ case TC_CLSFLOWER_DESTROY:
+ return cxgb4_tc_flower_destroy(dev, cls_flower);
+ case TC_CLSFLOWER_STATS:
+ return cxgb4_tc_flower_stats(dev, cls_flower);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static int cxgb_setup_tc_cls_u32(struct net_device *dev,
struct tc_cls_u32_offload *cls_u32)
{
- if (!is_classid_clsact_ingress(cls_u32->common.classid) ||
- cls_u32->common.chain_index)
+ if (cls_u32->common.chain_index)
return -EOPNOTSUPP;
switch (cls_u32->command) {
@@ -2891,9 +2921,10 @@ static int cxgb_setup_tc_cls_u32(struct net_device *dev,
}
}
-static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
- void *type_data)
+static int cxgb_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
{
+ struct net_device *dev = cb_priv;
struct port_info *pi = netdev2pinfo(dev);
struct adapter *adap = netdev2adap(dev);
@@ -2907,6 +2938,39 @@ static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
switch (type) {
case TC_SETUP_CLSU32:
return cxgb_setup_tc_cls_u32(dev, type_data);
+ case TC_SETUP_CLSFLOWER:
+ return cxgb_setup_tc_flower(dev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int cxgb_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ struct port_info *pi = netdev2pinfo(dev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, cxgb_setup_tc_block_cb,
+ pi, dev);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, cxgb_setup_tc_block_cb, pi);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int cxgb_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return cxgb_setup_tc_block(dev, type_data);
default:
return -EOPNOTSUPP;
}
@@ -3876,6 +3940,16 @@ static int adap_init0(struct adapter *adap)
1, params, val);
adap->params.fr_nsmr_tpte_wr_support = (ret == 0 && val[0] != 0);
+ /* See if FW supports FW_FILTER2 work request */
+ if (is_t4(adap->params.chip)) {
+ adap->params.filter2_wr_support = 0;
+ } else {
+ params[0] = FW_PARAM_DEV(FILTER2_WR);
+ ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+ 1, params, val);
+ adap->params.filter2_wr_support = (ret == 0 && val[0] != 0);
+ }
+
/*
* Get device capabilities so we can determine what resources we need
* to manage.
@@ -4048,7 +4122,7 @@ static int adap_init0(struct adapter *adap)
}
t4_init_sge_params(adap);
adap->flags |= FW_OK;
- t4_init_tp_params(adap);
+ t4_init_tp_params(adap, true);
return 0;
/*
@@ -4612,9 +4686,11 @@ static void free_some_resources(struct adapter *adapter)
{
unsigned int i;
+ kvfree(adapter->smt);
kvfree(adapter->l2t);
t4_cleanup_sched(adapter);
kvfree(adapter->tids.tid_tab);
+ cxgb4_cleanup_tc_flower(adapter);
cxgb4_cleanup_tc_u32(adapter);
kfree(adapter->sge.egr_map);
kfree(adapter->sge.ingr_map);
@@ -4995,7 +5071,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->priv_flags |= IFF_UNICAST_FLT;
/* MTU range: 81 - 9600 */
- netdev->min_mtu = 81;
+ netdev->min_mtu = 81; /* accommodate SACK */
netdev->max_mtu = MAX_MTU;
netdev->netdev_ops = &cxgb4_netdev_ops;
@@ -5006,6 +5082,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
cxgb4_set_ethtool_ops(netdev);
}
+ cxgb4_init_ethtool_dump(adapter);
+
pci_set_drvdata(pdev, adapter);
if (adapter->flags & FW_OK) {
@@ -5035,6 +5113,12 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
*/
cfg_queues(adapter);
+ adapter->smt = t4_init_smt();
+ if (!adapter->smt) {
+ /* We tolerate a lack of SMT, giving up some functionality */
+ dev_warn(&pdev->dev, "could not allocate SMT, continuing\n");
+ }
+
adapter->l2t = t4_init_l2t(adapter->l2t_start, adapter->l2t_end);
if (!adapter->l2t) {
/* We tolerate a lack of L2T, giving up some functionality */
@@ -5083,6 +5167,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (!adapter->tc_u32)
dev_warn(&pdev->dev,
"could not offload tc u32, continuing\n");
+
+ cxgb4_init_tc_flower(adapter);
}
if (is_offload(adapter)) {
@@ -5254,6 +5340,8 @@ static void remove_one(struct pci_dev *pdev)
return;
}
+ adapter->flags |= SHUTTING_DOWN;
+
if (adapter->pf == 4) {
int i;
@@ -5339,6 +5427,8 @@ static void shutdown_one(struct pci_dev *pdev)
return;
}
+ adapter->flags |= SHUTTING_DOWN;
+
if (adapter->pf == 4) {
int i;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
new file mode 100644
index 000000000000..9b6aabe4f963
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -0,0 +1,827 @@
+/*
+ * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_vlan.h>
+
+#include "cxgb4.h"
+#include "cxgb4_tc_flower.h"
+
+#define STATS_CHECK_PERIOD (HZ / 2)
+
+struct ch_tc_pedit_fields pedits[] = {
+ PEDIT_FIELDS(ETH_, DMAC_31_0, 4, dmac, 0),
+ PEDIT_FIELDS(ETH_, DMAC_47_32, 2, dmac, 4),
+ PEDIT_FIELDS(ETH_, SMAC_15_0, 2, smac, 0),
+ PEDIT_FIELDS(ETH_, SMAC_47_16, 4, smac, 2),
+ PEDIT_FIELDS(IP4_, SRC, 4, nat_fip, 0),
+ PEDIT_FIELDS(IP4_, DST, 4, nat_lip, 0),
+ PEDIT_FIELDS(IP6_, SRC_31_0, 4, nat_fip, 0),
+ PEDIT_FIELDS(IP6_, SRC_63_32, 4, nat_fip, 4),
+ PEDIT_FIELDS(IP6_, SRC_95_64, 4, nat_fip, 8),
+ PEDIT_FIELDS(IP6_, SRC_127_96, 4, nat_fip, 12),
+ PEDIT_FIELDS(IP6_, DST_31_0, 4, nat_lip, 0),
+ PEDIT_FIELDS(IP6_, DST_63_32, 4, nat_lip, 4),
+ PEDIT_FIELDS(IP6_, DST_95_64, 4, nat_lip, 8),
+ PEDIT_FIELDS(IP6_, DST_127_96, 4, nat_lip, 12),
+ PEDIT_FIELDS(TCP_, SPORT, 2, nat_fport, 0),
+ PEDIT_FIELDS(TCP_, DPORT, 2, nat_lport, 0),
+ PEDIT_FIELDS(UDP_, SPORT, 2, nat_fport, 0),
+ PEDIT_FIELDS(UDP_, DPORT, 2, nat_lport, 0),
+};
+
+static struct ch_tc_flower_entry *allocate_flower_entry(void)
+{
+ struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL);
+ spin_lock_init(&new->lock);
+ return new;
+}
+
+/* Must be called with either RTNL or rcu_read_lock */
+static struct ch_tc_flower_entry *ch_flower_lookup(struct adapter *adap,
+ unsigned long flower_cookie)
+{
+ struct ch_tc_flower_entry *flower_entry;
+
+ hash_for_each_possible_rcu(adap->flower_anymatch_tbl, flower_entry,
+ link, flower_cookie)
+ if (flower_entry->tc_flower_cookie == flower_cookie)
+ return flower_entry;
+ return NULL;
+}
+
+static void cxgb4_process_flow_match(struct net_device *dev,
+ struct tc_cls_flower_offload *cls,
+ struct ch_filter_specification *fs)
+{
+ u16 addr_type = 0;
+
+ if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
+ struct flow_dissector_key_control *key =
+ skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_CONTROL,
+ cls->key);
+
+ addr_type = key->addr_type;
+ }
+
+ if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_dissector_key_basic *key =
+ skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ cls->key);
+ struct flow_dissector_key_basic *mask =
+ skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ cls->mask);
+ u16 ethtype_key = ntohs(key->n_proto);
+ u16 ethtype_mask = ntohs(mask->n_proto);
+
+ if (ethtype_key == ETH_P_ALL) {
+ ethtype_key = 0;
+ ethtype_mask = 0;
+ }
+
+ fs->val.ethtype = ethtype_key;
+ fs->mask.ethtype = ethtype_mask;
+ fs->val.proto = key->ip_proto;
+ fs->mask.proto = mask->ip_proto;
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ struct flow_dissector_key_ipv4_addrs *key =
+ skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ cls->key);
+ struct flow_dissector_key_ipv4_addrs *mask =
+ skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ cls->mask);
+ fs->type = 0;
+ memcpy(&fs->val.lip[0], &key->dst, sizeof(key->dst));
+ memcpy(&fs->val.fip[0], &key->src, sizeof(key->src));
+ memcpy(&fs->mask.lip[0], &mask->dst, sizeof(mask->dst));
+ memcpy(&fs->mask.fip[0], &mask->src, sizeof(mask->src));
+
+ /* also initialize nat_lip/fip to same values */
+ memcpy(&fs->nat_lip[0], &key->dst, sizeof(key->dst));
+ memcpy(&fs->nat_fip[0], &key->src, sizeof(key->src));
+
+ }
+
+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
+ struct flow_dissector_key_ipv6_addrs *key =
+ skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ cls->key);
+ struct flow_dissector_key_ipv6_addrs *mask =
+ skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ cls->mask);
+
+ fs->type = 1;
+ memcpy(&fs->val.lip[0], key->dst.s6_addr, sizeof(key->dst));
+ memcpy(&fs->val.fip[0], key->src.s6_addr, sizeof(key->src));
+ memcpy(&fs->mask.lip[0], mask->dst.s6_addr, sizeof(mask->dst));
+ memcpy(&fs->mask.fip[0], mask->src.s6_addr, sizeof(mask->src));
+
+ /* also initialize nat_lip/fip to same values */
+ memcpy(&fs->nat_lip[0], key->dst.s6_addr, sizeof(key->dst));
+ memcpy(&fs->nat_fip[0], key->src.s6_addr, sizeof(key->src));
+ }
+
+ if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+ struct flow_dissector_key_ports *key, *mask;
+
+ key = skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ cls->key);
+ mask = skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ cls->mask);
+ fs->val.lport = cpu_to_be16(key->dst);
+ fs->mask.lport = cpu_to_be16(mask->dst);
+ fs->val.fport = cpu_to_be16(key->src);
+ fs->mask.fport = cpu_to_be16(mask->src);
+
+ /* also initialize nat_lport/fport to same values */
+ fs->nat_lport = cpu_to_be16(key->dst);
+ fs->nat_fport = cpu_to_be16(key->src);
+ }
+
+ if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) {
+ struct flow_dissector_key_ip *key, *mask;
+
+ key = skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_IP,
+ cls->key);
+ mask = skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_IP,
+ cls->mask);
+ fs->val.tos = key->tos;
+ fs->mask.tos = mask->tos;
+ }
+
+ if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_VLAN)) {
+ struct flow_dissector_key_vlan *key, *mask;
+ u16 vlan_tci, vlan_tci_mask;
+
+ key = skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ cls->key);
+ mask = skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_VLAN,
+ cls->mask);
+ vlan_tci = key->vlan_id | (key->vlan_priority <<
+ VLAN_PRIO_SHIFT);
+ vlan_tci_mask = mask->vlan_id | (mask->vlan_priority <<
+ VLAN_PRIO_SHIFT);
+ fs->val.ivlan = cpu_to_be16(vlan_tci);
+ fs->mask.ivlan = cpu_to_be16(vlan_tci_mask);
+
+ /* Chelsio adapters use ivlan_vld bit to match vlan packets
+ * as 802.1Q. Also, when vlan tag is present in packets,
+ * ethtype match is used then to match on ethtype of inner
+ * header ie. the header following the vlan header.
+ * So, set the ivlan_vld based on ethtype info supplied by
+ * TC for vlan packets if its 802.1Q. And then reset the
+ * ethtype value else, hw will try to match the supplied
+ * ethtype value with ethtype of inner header.
+ */
+ if (fs->val.ethtype == ETH_P_8021Q) {
+ fs->val.ivlan_vld = 1;
+ fs->mask.ivlan_vld = 1;
+ fs->val.ethtype = 0;
+ fs->mask.ethtype = 0;
+ }
+ }
+
+ /* Match only packets coming from the ingress port where this
+ * filter will be created.
+ */
+ fs->val.iport = netdev2pinfo(dev)->port_id;
+ fs->mask.iport = ~0;
+}
+
+static int cxgb4_validate_flow_match(struct net_device *dev,
+ struct tc_cls_flower_offload *cls)
+{
+ u16 ethtype_mask = 0;
+ u16 ethtype_key = 0;
+
+ if (cls->dissector->used_keys &
+ ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
+ BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
+ BIT(FLOW_DISSECTOR_KEY_PORTS) |
+ BIT(FLOW_DISSECTOR_KEY_VLAN) |
+ BIT(FLOW_DISSECTOR_KEY_IP))) {
+ netdev_warn(dev, "Unsupported key used: 0x%x\n",
+ cls->dissector->used_keys);
+ return -EOPNOTSUPP;
+ }
+
+ if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
+ struct flow_dissector_key_basic *key =
+ skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ cls->key);
+ struct flow_dissector_key_basic *mask =
+ skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_BASIC,
+ cls->mask);
+ ethtype_key = ntohs(key->n_proto);
+ ethtype_mask = ntohs(mask->n_proto);
+ }
+
+ if (dissector_uses_key(cls->dissector, FLOW_DISSECTOR_KEY_IP)) {
+ u16 eth_ip_type = ethtype_key & ethtype_mask;
+ struct flow_dissector_key_ip *mask;
+
+ if (eth_ip_type != ETH_P_IP && eth_ip_type != ETH_P_IPV6) {
+ netdev_err(dev, "IP Key supported only with IPv4/v6");
+ return -EINVAL;
+ }
+
+ mask = skb_flow_dissector_target(cls->dissector,
+ FLOW_DISSECTOR_KEY_IP,
+ cls->mask);
+ if (mask->ttl) {
+ netdev_warn(dev, "ttl match unsupported for offload");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ return 0;
+}
+
+static void offload_pedit(struct ch_filter_specification *fs, u32 val, u32 mask,
+ u8 field)
+{
+ u32 set_val = val & ~mask;
+ u32 offset = 0;
+ u8 size = 1;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(pedits); i++) {
+ if (pedits[i].field == field) {
+ offset = pedits[i].offset;
+ size = pedits[i].size;
+ break;
+ }
+ }
+ memcpy((u8 *)fs + offset, &set_val, size);
+}
+
+static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
+ u32 mask, u32 offset, u8 htype)
+{
+ switch (htype) {
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
+ switch (offset) {
+ case PEDIT_ETH_DMAC_31_0:
+ fs->newdmac = 1;
+ offload_pedit(fs, val, mask, ETH_DMAC_31_0);
+ break;
+ case PEDIT_ETH_DMAC_47_32_SMAC_15_0:
+ if (~mask & PEDIT_ETH_DMAC_MASK)
+ offload_pedit(fs, val, mask, ETH_DMAC_47_32);
+ else
+ offload_pedit(fs, val >> 16, mask >> 16,
+ ETH_SMAC_15_0);
+ break;
+ case PEDIT_ETH_SMAC_47_16:
+ fs->newsmac = 1;
+ offload_pedit(fs, val, mask, ETH_SMAC_47_16);
+ }
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
+ switch (offset) {
+ case PEDIT_IP4_SRC:
+ offload_pedit(fs, val, mask, IP4_SRC);
+ break;
+ case PEDIT_IP4_DST:
+ offload_pedit(fs, val, mask, IP4_DST);
+ }
+ fs->nat_mode = NAT_MODE_ALL;
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
+ switch (offset) {
+ case PEDIT_IP6_SRC_31_0:
+ offload_pedit(fs, val, mask, IP6_SRC_31_0);
+ break;
+ case PEDIT_IP6_SRC_63_32:
+ offload_pedit(fs, val, mask, IP6_SRC_63_32);
+ break;
+ case PEDIT_IP6_SRC_95_64:
+ offload_pedit(fs, val, mask, IP6_SRC_95_64);
+ break;
+ case PEDIT_IP6_SRC_127_96:
+ offload_pedit(fs, val, mask, IP6_SRC_127_96);
+ break;
+ case PEDIT_IP6_DST_31_0:
+ offload_pedit(fs, val, mask, IP6_DST_31_0);
+ break;
+ case PEDIT_IP6_DST_63_32:
+ offload_pedit(fs, val, mask, IP6_DST_63_32);
+ break;
+ case PEDIT_IP6_DST_95_64:
+ offload_pedit(fs, val, mask, IP6_DST_95_64);
+ break;
+ case PEDIT_IP6_DST_127_96:
+ offload_pedit(fs, val, mask, IP6_DST_127_96);
+ }
+ fs->nat_mode = NAT_MODE_ALL;
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
+ switch (offset) {
+ case PEDIT_TCP_SPORT_DPORT:
+ if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+ offload_pedit(fs, cpu_to_be32(val) >> 16,
+ cpu_to_be32(mask) >> 16,
+ TCP_SPORT);
+ else
+ offload_pedit(fs, cpu_to_be32(val),
+ cpu_to_be32(mask), TCP_DPORT);
+ }
+ fs->nat_mode = NAT_MODE_ALL;
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
+ switch (offset) {
+ case PEDIT_UDP_SPORT_DPORT:
+ if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
+ offload_pedit(fs, cpu_to_be32(val) >> 16,
+ cpu_to_be32(mask) >> 16,
+ UDP_SPORT);
+ else
+ offload_pedit(fs, cpu_to_be32(val),
+ cpu_to_be32(mask), UDP_DPORT);
+ }
+ fs->nat_mode = NAT_MODE_ALL;
+ }
+}
+
+static void cxgb4_process_flow_actions(struct net_device *in,
+ struct tc_cls_flower_offload *cls,
+ struct ch_filter_specification *fs)
+{
+ const struct tc_action *a;
+ LIST_HEAD(actions);
+
+ tcf_exts_to_list(cls->exts, &actions);
+ list_for_each_entry(a, &actions, list) {
+ if (is_tcf_gact_ok(a)) {
+ fs->action = FILTER_PASS;
+ } else if (is_tcf_gact_shot(a)) {
+ fs->action = FILTER_DROP;
+ } else if (is_tcf_mirred_egress_redirect(a)) {
+ int ifindex = tcf_mirred_ifindex(a);
+ struct net_device *out = __dev_get_by_index(dev_net(in),
+ ifindex);
+ struct port_info *pi = netdev_priv(out);
+
+ fs->action = FILTER_SWITCH;
+ fs->eport = pi->port_id;
+ } else if (is_tcf_vlan(a)) {
+ u32 vlan_action = tcf_vlan_action(a);
+ u8 prio = tcf_vlan_push_prio(a);
+ u16 vid = tcf_vlan_push_vid(a);
+ u16 vlan_tci = (prio << VLAN_PRIO_SHIFT) | vid;
+
+ switch (vlan_action) {
+ case TCA_VLAN_ACT_POP:
+ fs->newvlan |= VLAN_REMOVE;
+ break;
+ case TCA_VLAN_ACT_PUSH:
+ fs->newvlan |= VLAN_INSERT;
+ fs->vlan = vlan_tci;
+ break;
+ case TCA_VLAN_ACT_MODIFY:
+ fs->newvlan |= VLAN_REWRITE;
+ fs->vlan = vlan_tci;
+ break;
+ default:
+ break;
+ }
+ } else if (is_tcf_pedit(a)) {
+ u32 mask, val, offset;
+ int nkeys, i;
+ u8 htype;
+
+ nkeys = tcf_pedit_nkeys(a);
+ for (i = 0; i < nkeys; i++) {
+ htype = tcf_pedit_htype(a, i);
+ mask = tcf_pedit_mask(a, i);
+ val = tcf_pedit_val(a, i);
+ offset = tcf_pedit_offset(a, i);
+
+ process_pedit_field(fs, val, mask, offset,
+ htype);
+ }
+ }
+ }
+}
+
+static bool valid_l4_mask(u32 mask)
+{
+ u16 hi, lo;
+
+ /* Either the upper 16-bits (SPORT) OR the lower
+ * 16-bits (DPORT) can be set, but NOT BOTH.
+ */
+ hi = (mask >> 16) & 0xFFFF;
+ lo = mask & 0xFFFF;
+
+ return hi && lo ? false : true;
+}
+
+static bool valid_pedit_action(struct net_device *dev,
+ const struct tc_action *a)
+{
+ u32 mask, offset;
+ u8 cmd, htype;
+ int nkeys, i;
+
+ nkeys = tcf_pedit_nkeys(a);
+ for (i = 0; i < nkeys; i++) {
+ htype = tcf_pedit_htype(a, i);
+ cmd = tcf_pedit_cmd(a, i);
+ mask = tcf_pedit_mask(a, i);
+ offset = tcf_pedit_offset(a, i);
+
+ if (cmd != TCA_PEDIT_KEY_EX_CMD_SET) {
+ netdev_err(dev, "%s: Unsupported pedit cmd\n",
+ __func__);
+ return false;
+ }
+
+ switch (htype) {
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
+ switch (offset) {
+ case PEDIT_ETH_DMAC_31_0:
+ case PEDIT_ETH_DMAC_47_32_SMAC_15_0:
+ case PEDIT_ETH_SMAC_47_16:
+ break;
+ default:
+ netdev_err(dev, "%s: Unsupported pedit field\n",
+ __func__);
+ return false;
+ }
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
+ switch (offset) {
+ case PEDIT_IP4_SRC:
+ case PEDIT_IP4_DST:
+ break;
+ default:
+ netdev_err(dev, "%s: Unsupported pedit field\n",
+ __func__);
+ return false;
+ }
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
+ switch (offset) {
+ case PEDIT_IP6_SRC_31_0:
+ case PEDIT_IP6_SRC_63_32:
+ case PEDIT_IP6_SRC_95_64:
+ case PEDIT_IP6_SRC_127_96:
+ case PEDIT_IP6_DST_31_0:
+ case PEDIT_IP6_DST_63_32:
+ case PEDIT_IP6_DST_95_64:
+ case PEDIT_IP6_DST_127_96:
+ break;
+ default:
+ netdev_err(dev, "%s: Unsupported pedit field\n",
+ __func__);
+ return false;
+ }
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
+ switch (offset) {
+ case PEDIT_TCP_SPORT_DPORT:
+ if (!valid_l4_mask(~mask)) {
+ netdev_err(dev, "%s: Unsupported mask for TCP L4 ports\n",
+ __func__);
+ return false;
+ }
+ break;
+ default:
+ netdev_err(dev, "%s: Unsupported pedit field\n",
+ __func__);
+ return false;
+ }
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
+ switch (offset) {
+ case PEDIT_UDP_SPORT_DPORT:
+ if (!valid_l4_mask(~mask)) {
+ netdev_err(dev, "%s: Unsupported mask for UDP L4 ports\n",
+ __func__);
+ return false;
+ }
+ break;
+ default:
+ netdev_err(dev, "%s: Unsupported pedit field\n",
+ __func__);
+ return false;
+ }
+ break;
+ default:
+ netdev_err(dev, "%s: Unsupported pedit type\n",
+ __func__);
+ return false;
+ }
+ }
+ return true;
+}
+
+static int cxgb4_validate_flow_actions(struct net_device *dev,
+ struct tc_cls_flower_offload *cls)
+{
+ const struct tc_action *a;
+ bool act_redir = false;
+ bool act_pedit = false;
+ bool act_vlan = false;
+ LIST_HEAD(actions);
+
+ tcf_exts_to_list(cls->exts, &actions);
+ list_for_each_entry(a, &actions, list) {
+ if (is_tcf_gact_ok(a)) {
+ /* Do nothing */
+ } else if (is_tcf_gact_shot(a)) {
+ /* Do nothing */
+ } else if (is_tcf_mirred_egress_redirect(a)) {
+ struct adapter *adap = netdev2adap(dev);
+ struct net_device *n_dev;
+ unsigned int i, ifindex;
+ bool found = false;
+
+ ifindex = tcf_mirred_ifindex(a);
+ for_each_port(adap, i) {
+ n_dev = adap->port[i];
+ if (ifindex == n_dev->ifindex) {
+ found = true;
+ break;
+ }
+ }
+
+ /* If interface doesn't belong to our hw, then
+ * the provided output port is not valid
+ */
+ if (!found) {
+ netdev_err(dev, "%s: Out port invalid\n",
+ __func__);
+ return -EINVAL;
+ }
+ act_redir = true;
+ } else if (is_tcf_vlan(a)) {
+ u16 proto = be16_to_cpu(tcf_vlan_push_proto(a));
+ u32 vlan_action = tcf_vlan_action(a);
+
+ switch (vlan_action) {
+ case TCA_VLAN_ACT_POP:
+ break;
+ case TCA_VLAN_ACT_PUSH:
+ case TCA_VLAN_ACT_MODIFY:
+ if (proto != ETH_P_8021Q) {
+ netdev_err(dev, "%s: Unsupported vlan proto\n",
+ __func__);
+ return -EOPNOTSUPP;
+ }
+ break;
+ default:
+ netdev_err(dev, "%s: Unsupported vlan action\n",
+ __func__);
+ return -EOPNOTSUPP;
+ }
+ act_vlan = true;
+ } else if (is_tcf_pedit(a)) {
+ bool pedit_valid = valid_pedit_action(dev, a);
+
+ if (!pedit_valid)
+ return -EOPNOTSUPP;
+ act_pedit = true;
+ } else {
+ netdev_err(dev, "%s: Unsupported action\n", __func__);
+ return -EOPNOTSUPP;
+ }
+ }
+
+ if ((act_pedit || act_vlan) && !act_redir) {
+ netdev_err(dev, "%s: pedit/vlan rewrite invalid without egress redirect\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int cxgb4_tc_flower_replace(struct net_device *dev,
+ struct tc_cls_flower_offload *cls)
+{
+ struct adapter *adap = netdev2adap(dev);
+ struct ch_tc_flower_entry *ch_flower;
+ struct ch_filter_specification *fs;
+ struct filter_ctx ctx;
+ int fidx;
+ int ret;
+
+ if (cxgb4_validate_flow_actions(dev, cls))
+ return -EOPNOTSUPP;
+
+ if (cxgb4_validate_flow_match(dev, cls))
+ return -EOPNOTSUPP;
+
+ ch_flower = allocate_flower_entry();
+ if (!ch_flower) {
+ netdev_err(dev, "%s: ch_flower alloc failed.\n", __func__);
+ return -ENOMEM;
+ }
+
+ fs = &ch_flower->fs;
+ fs->hitcnts = 1;
+ cxgb4_process_flow_match(dev, cls, fs);
+ cxgb4_process_flow_actions(dev, cls, fs);
+
+ fidx = cxgb4_get_free_ftid(dev, fs->type ? PF_INET6 : PF_INET);
+ if (fidx < 0) {
+ netdev_err(dev, "%s: No fidx for offload.\n", __func__);
+ ret = -ENOMEM;
+ goto free_entry;
+ }
+
+ init_completion(&ctx.completion);
+ ret = __cxgb4_set_filter(dev, fidx, fs, &ctx);
+ if (ret) {
+ netdev_err(dev, "%s: filter creation err %d\n",
+ __func__, ret);
+ goto free_entry;
+ }
+
+ /* Wait for reply */
+ ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ);
+ if (!ret) {
+ ret = -ETIMEDOUT;
+ goto free_entry;
+ }
+
+ ret = ctx.result;
+ /* Check if hw returned error for filter creation */
+ if (ret) {
+ netdev_err(dev, "%s: filter creation err %d\n",
+ __func__, ret);
+ goto free_entry;
+ }
+
+ INIT_HLIST_NODE(&ch_flower->link);
+ ch_flower->tc_flower_cookie = cls->cookie;
+ ch_flower->filter_id = ctx.tid;
+ hash_add_rcu(adap->flower_anymatch_tbl, &ch_flower->link, cls->cookie);
+
+ return ret;
+
+free_entry:
+ kfree(ch_flower);
+ return ret;
+}
+
+int cxgb4_tc_flower_destroy(struct net_device *dev,
+ struct tc_cls_flower_offload *cls)
+{
+ struct adapter *adap = netdev2adap(dev);
+ struct ch_tc_flower_entry *ch_flower;
+ int ret;
+
+ ch_flower = ch_flower_lookup(adap, cls->cookie);
+ if (!ch_flower)
+ return -ENOENT;
+
+ ret = cxgb4_del_filter(dev, ch_flower->filter_id);
+ if (ret)
+ goto err;
+
+ hash_del_rcu(&ch_flower->link);
+ kfree_rcu(ch_flower, rcu);
+
+err:
+ return ret;
+}
+
+static void ch_flower_stats_cb(unsigned long data)
+{
+ struct adapter *adap = (struct adapter *)data;
+ struct ch_tc_flower_entry *flower_entry;
+ struct ch_tc_flower_stats *ofld_stats;
+ unsigned int i;
+ u64 packets;
+ u64 bytes;
+ int ret;
+
+ rcu_read_lock();
+ hash_for_each_rcu(adap->flower_anymatch_tbl, i, flower_entry, link) {
+ ret = cxgb4_get_filter_counters(adap->port[0],
+ flower_entry->filter_id,
+ &packets, &bytes);
+ if (!ret) {
+ spin_lock(&flower_entry->lock);
+ ofld_stats = &flower_entry->stats;
+
+ if (ofld_stats->prev_packet_count != packets) {
+ ofld_stats->prev_packet_count = packets;
+ ofld_stats->last_used = jiffies;
+ }
+ spin_unlock(&flower_entry->lock);
+ }
+ }
+ rcu_read_unlock();
+ mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD);
+}
+
+int cxgb4_tc_flower_stats(struct net_device *dev,
+ struct tc_cls_flower_offload *cls)
+{
+ struct adapter *adap = netdev2adap(dev);
+ struct ch_tc_flower_stats *ofld_stats;
+ struct ch_tc_flower_entry *ch_flower;
+ u64 packets;
+ u64 bytes;
+ int ret;
+
+ ch_flower = ch_flower_lookup(adap, cls->cookie);
+ if (!ch_flower) {
+ ret = -ENOENT;
+ goto err;
+ }
+
+ ret = cxgb4_get_filter_counters(dev, ch_flower->filter_id,
+ &packets, &bytes);
+ if (ret < 0)
+ goto err;
+
+ spin_lock_bh(&ch_flower->lock);
+ ofld_stats = &ch_flower->stats;
+ if (ofld_stats->packet_count != packets) {
+ if (ofld_stats->prev_packet_count != packets)
+ ofld_stats->last_used = jiffies;
+ tcf_exts_stats_update(cls->exts, bytes - ofld_stats->byte_count,
+ packets - ofld_stats->packet_count,
+ ofld_stats->last_used);
+
+ ofld_stats->packet_count = packets;
+ ofld_stats->byte_count = bytes;
+ ofld_stats->prev_packet_count = packets;
+ }
+ spin_unlock_bh(&ch_flower->lock);
+ return 0;
+
+err:
+ return ret;
+}
+
+void cxgb4_init_tc_flower(struct adapter *adap)
+{
+ hash_init(adap->flower_anymatch_tbl);
+ setup_timer(&adap->flower_stats_timer, ch_flower_stats_cb,
+ (unsigned long)adap);
+ mod_timer(&adap->flower_stats_timer, jiffies + STATS_CHECK_PERIOD);
+}
+
+void cxgb4_cleanup_tc_flower(struct adapter *adap)
+{
+ if (adap->flower_stats_timer.function)
+ del_timer_sync(&adap->flower_stats_timer);
+}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
new file mode 100644
index 000000000000..202d5c9ec303
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.h
@@ -0,0 +1,120 @@
+/*
+ * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_TC_FLOWER_H
+#define __CXGB4_TC_FLOWER_H
+
+#include <net/pkt_cls.h>
+
+struct ch_tc_flower_stats {
+ u64 prev_packet_count;
+ u64 packet_count;
+ u64 byte_count;
+ u64 last_used;
+};
+
+struct ch_tc_flower_entry {
+ struct ch_filter_specification fs;
+ struct ch_tc_flower_stats stats;
+ unsigned long tc_flower_cookie;
+ struct hlist_node link;
+ struct rcu_head rcu;
+ spinlock_t lock; /* lock for stats */
+ u32 filter_id;
+};
+
+enum {
+ ETH_DMAC_31_0, /* dmac bits 0.. 31 */
+ ETH_DMAC_47_32, /* dmac bits 32..47 */
+ ETH_SMAC_15_0, /* smac bits 0.. 15 */
+ ETH_SMAC_47_16, /* smac bits 16..47 */
+
+ IP4_SRC, /* 32-bit IPv4 src */
+ IP4_DST, /* 32-bit IPv4 dst */
+
+ IP6_SRC_31_0, /* src bits 0.. 31 */
+ IP6_SRC_63_32, /* src bits 63.. 32 */
+ IP6_SRC_95_64, /* src bits 95.. 64 */
+ IP6_SRC_127_96, /* src bits 127..96 */
+
+ IP6_DST_31_0, /* dst bits 0.. 31 */
+ IP6_DST_63_32, /* dst bits 63.. 32 */
+ IP6_DST_95_64, /* dst bits 95.. 64 */
+ IP6_DST_127_96, /* dst bits 127..96 */
+
+ TCP_SPORT, /* 16-bit TCP sport */
+ TCP_DPORT, /* 16-bit TCP dport */
+
+ UDP_SPORT, /* 16-bit UDP sport */
+ UDP_DPORT, /* 16-bit UDP dport */
+};
+
+struct ch_tc_pedit_fields {
+ u8 field;
+ u8 size;
+ u32 offset;
+};
+
+#define PEDIT_FIELDS(type, field, size, fs_field, offset) \
+ { type## field, size, \
+ offsetof(struct ch_filter_specification, fs_field) + (offset) }
+
+#define PEDIT_ETH_DMAC_MASK 0xffff
+#define PEDIT_TCP_UDP_SPORT_MASK 0xffff
+#define PEDIT_ETH_DMAC_31_0 0x0
+#define PEDIT_ETH_DMAC_47_32_SMAC_15_0 0x4
+#define PEDIT_ETH_SMAC_47_16 0x8
+#define PEDIT_IP4_SRC 0xC
+#define PEDIT_IP4_DST 0x10
+#define PEDIT_IP6_SRC_31_0 0x8
+#define PEDIT_IP6_SRC_63_32 0xC
+#define PEDIT_IP6_SRC_95_64 0x10
+#define PEDIT_IP6_SRC_127_96 0x14
+#define PEDIT_IP6_DST_31_0 0x18
+#define PEDIT_IP6_DST_63_32 0x1C
+#define PEDIT_IP6_DST_95_64 0x20
+#define PEDIT_IP6_DST_127_96 0x24
+#define PEDIT_TCP_SPORT_DPORT 0x0
+#define PEDIT_UDP_SPORT_DPORT 0x0
+
+int cxgb4_tc_flower_replace(struct net_device *dev,
+ struct tc_cls_flower_offload *cls);
+int cxgb4_tc_flower_destroy(struct net_device *dev,
+ struct tc_cls_flower_offload *cls);
+int cxgb4_tc_flower_stats(struct net_device *dev,
+ struct tc_cls_flower_offload *cls);
+
+void cxgb4_init_tc_flower(struct adapter *adap);
+void cxgb4_cleanup_tc_flower(struct adapter *adap);
+#endif /* __CXGB4_TC_FLOWER_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 84541fce94c5..52324c77a4fe 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -212,6 +212,7 @@ struct filter_ctx {
struct ch_filter_specification;
+int cxgb4_get_free_ftid(struct net_device *dev, int family);
int __cxgb4_set_filter(struct net_device *dev, int filter_id,
struct ch_filter_specification *fs,
struct filter_ctx *ctx);
@@ -220,6 +221,8 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id,
int cxgb4_set_filter(struct net_device *dev, int filter_id,
struct ch_filter_specification *fs);
int cxgb4_del_filter(struct net_device *dev, int filter_id);
+int cxgb4_get_filter_counters(struct net_device *dev, unsigned int fidx,
+ u64 *hitcnt, u64 *bytecnt);
static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue)
{
diff --git a/drivers/net/ethernet/chelsio/cxgb4/l2t.c b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
index f7ef8871dd0b..1817a0307d26 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/l2t.c
@@ -422,7 +422,7 @@ struct l2t_entry *cxgb4_l2t_get(struct l2t_data *d, struct neighbour *neigh,
u8 lport;
u16 vlan;
struct l2t_entry *e;
- int addr_len = neigh->tbl->key_len;
+ unsigned int addr_len = neigh->tbl->key_len;
u32 *addr = (u32 *)neigh->primary_key;
int ifidx = neigh->dev->ifindex;
int hash = addr_hash(d, addr, addr_len, ifidx);
@@ -536,7 +536,7 @@ void t4_l2t_update(struct adapter *adap, struct neighbour *neigh)
struct l2t_entry *e;
struct sk_buff_head *arpq = NULL;
struct l2t_data *d = adap->l2t;
- int addr_len = neigh->tbl->key_len;
+ unsigned int addr_len = neigh->tbl->key_len;
u32 *addr = (u32 *) neigh->primary_key;
int ifidx = neigh->dev->ifindex;
int hash = addr_hash(d, addr, addr_len, ifidx);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.c b/drivers/net/ethernet/chelsio/cxgb4/smt.c
new file mode 100644
index 000000000000..7b2207a2a130
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/smt.c
@@ -0,0 +1,247 @@
+/*
+ * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cxgb4.h"
+#include "smt.h"
+#include "t4_msg.h"
+#include "t4fw_api.h"
+#include "t4_regs.h"
+#include "t4_values.h"
+
+struct smt_data *t4_init_smt(void)
+{
+ unsigned int smt_size;
+ struct smt_data *s;
+ int i;
+
+ smt_size = SMT_SIZE;
+
+ s = kvzalloc(sizeof(*s) + smt_size * sizeof(struct smt_entry),
+ GFP_KERNEL);
+ if (!s)
+ return NULL;
+ s->smt_size = smt_size;
+ rwlock_init(&s->lock);
+ for (i = 0; i < s->smt_size; ++i) {
+ s->smtab[i].idx = i;
+ s->smtab[i].state = SMT_STATE_UNUSED;
+ memset(&s->smtab[i].src_mac, 0, ETH_ALEN);
+ spin_lock_init(&s->smtab[i].lock);
+ atomic_set(&s->smtab[i].refcnt, 0);
+ }
+ return s;
+}
+
+static struct smt_entry *find_or_alloc_smte(struct smt_data *s, u8 *smac)
+{
+ struct smt_entry *first_free = NULL;
+ struct smt_entry *e, *end;
+
+ for (e = &s->smtab[0], end = &s->smtab[s->smt_size]; e != end; ++e) {
+ if (atomic_read(&e->refcnt) == 0) {
+ if (!first_free)
+ first_free = e;
+ } else {
+ if (e->state == SMT_STATE_SWITCHING) {
+ /* This entry is actually in use. See if we can
+ * re-use it ?
+ */
+ if (memcmp(e->src_mac, smac, ETH_ALEN) == 0)
+ goto found_reuse;
+ }
+ }
+ }
+
+ if (first_free) {
+ e = first_free;
+ goto found;
+ }
+ return NULL;
+
+found:
+ e->state = SMT_STATE_UNUSED;
+
+found_reuse:
+ return e;
+}
+
+static void t4_smte_free(struct smt_entry *e)
+{
+ spin_lock_bh(&e->lock);
+ if (atomic_read(&e->refcnt) == 0) { /* hasn't been recycled */
+ e->state = SMT_STATE_UNUSED;
+ }
+ spin_unlock_bh(&e->lock);
+}
+
+/**
+ * @e: smt entry to release
+ *
+ * Releases ref count and frees up an smt entry from SMT table
+ */
+void cxgb4_smt_release(struct smt_entry *e)
+{
+ if (atomic_dec_and_test(&e->refcnt))
+ t4_smte_free(e);
+}
+EXPORT_SYMBOL(cxgb4_smt_release);
+
+void do_smt_write_rpl(struct adapter *adap, const struct cpl_smt_write_rpl *rpl)
+{
+ unsigned int smtidx = TID_TID_G(GET_TID(rpl));
+ struct smt_data *s = adap->smt;
+
+ if (unlikely(rpl->status != CPL_ERR_NONE)) {
+ struct smt_entry *e = &s->smtab[smtidx];
+
+ dev_err(adap->pdev_dev,
+ "Unexpected SMT_WRITE_RPL status %u for entry %u\n",
+ rpl->status, smtidx);
+ spin_lock(&e->lock);
+ e->state = SMT_STATE_ERROR;
+ spin_unlock(&e->lock);
+ return;
+ }
+}
+
+static int write_smt_entry(struct adapter *adapter, struct smt_entry *e)
+{
+ struct cpl_t6_smt_write_req *t6req;
+ struct smt_data *s = adapter->smt;
+ struct cpl_smt_write_req *req;
+ struct sk_buff *skb;
+ int size;
+ u8 row;
+
+ if (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5) {
+ size = sizeof(*req);
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+ /* Source MAC Table (SMT) contains 256 SMAC entries
+ * organized in 128 rows of 2 entries each.
+ */
+ req = (struct cpl_smt_write_req *)__skb_put(skb, size);
+ INIT_TP_WR(req, 0);
+
+ /* Each row contains an SMAC pair.
+ * LSB selects the SMAC entry within a row
+ */
+ row = (e->idx >> 1);
+ if (e->idx & 1) {
+ req->pfvf1 = 0x0;
+ memcpy(req->src_mac1, e->src_mac, ETH_ALEN);
+
+ /* fill pfvf0/src_mac0 with entry
+ * at prev index from smt-tab.
+ */
+ req->pfvf0 = 0x0;
+ memcpy(req->src_mac0, s->smtab[e->idx - 1].src_mac,
+ ETH_ALEN);
+ } else {
+ req->pfvf0 = 0x0;
+ memcpy(req->src_mac0, e->src_mac, ETH_ALEN);
+
+ /* fill pfvf1/src_mac1 with entry
+ * at next index from smt-tab
+ */
+ req->pfvf1 = 0x0;
+ memcpy(req->src_mac1, s->smtab[e->idx + 1].src_mac,
+ ETH_ALEN);
+ }
+ } else {
+ size = sizeof(*t6req);
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ return -ENOMEM;
+ /* Source MAC Table (SMT) contains 256 SMAC entries */
+ t6req = (struct cpl_t6_smt_write_req *)__skb_put(skb, size);
+ INIT_TP_WR(t6req, 0);
+ req = (struct cpl_smt_write_req *)t6req;
+
+ /* fill pfvf0/src_mac0 from smt-tab */
+ req->pfvf0 = 0x0;
+ memcpy(req->src_mac0, s->smtab[e->idx].src_mac, ETH_ALEN);
+ row = e->idx;
+ }
+
+ OPCODE_TID(req) =
+ htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, e->idx |
+ TID_QID_V(adapter->sge.fw_evtq.abs_id)));
+ req->params = htonl(SMTW_NORPL_V(0) |
+ SMTW_IDX_V(row) |
+ SMTW_OVLAN_IDX_V(0));
+ t4_mgmt_tx(adapter, skb);
+ return 0;
+}
+
+static struct smt_entry *t4_smt_alloc_switching(struct adapter *adap, u16 pfvf,
+ u8 *smac)
+{
+ struct smt_data *s = adap->smt;
+ struct smt_entry *e;
+
+ write_lock_bh(&s->lock);
+ e = find_or_alloc_smte(s, smac);
+ if (e) {
+ spin_lock(&e->lock);
+ if (!atomic_read(&e->refcnt)) {
+ atomic_set(&e->refcnt, 1);
+ e->state = SMT_STATE_SWITCHING;
+ e->pfvf = pfvf;
+ memcpy(e->src_mac, smac, ETH_ALEN);
+ write_smt_entry(adap, e);
+ } else {
+ atomic_inc(&e->refcnt);
+ }
+ spin_unlock(&e->lock);
+ }
+ write_unlock_bh(&s->lock);
+ return e;
+}
+
+/**
+ * @dev: net_device pointer
+ * @smac: MAC address to add to SMT
+ * Returns pointer to the SMT entry created
+ *
+ * Allocates an SMT entry to be used by switching rule of a filter.
+ */
+struct smt_entry *cxgb4_smt_alloc_switching(struct net_device *dev, u8 *smac)
+{
+ struct adapter *adap = netdev2adap(dev);
+
+ return t4_smt_alloc_switching(adap, 0x0, smac);
+}
+EXPORT_SYMBOL(cxgb4_smt_alloc_switching);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/smt.h b/drivers/net/ethernet/chelsio/cxgb4/smt.h
new file mode 100644
index 000000000000..d6c2cc271398
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/smt.h
@@ -0,0 +1,76 @@
+/*
+ * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CXGB4_SMT_H
+#define __CXGB4_SMT_H
+
+#include <linux/spinlock.h>
+#include <linux/if_ether.h>
+#include <linux/atomic.h>
+
+struct adapter;
+struct cpl_smt_write_rpl;
+
+/* SMT related handling. Heavily adapted based on l2t ops in l2t.h/l2t.c
+ */
+enum {
+ SMT_STATE_SWITCHING,
+ SMT_STATE_UNUSED,
+ SMT_STATE_ERROR
+};
+
+enum {
+ SMT_SIZE = 256
+};
+
+struct smt_entry {
+ u16 state;
+ u16 idx;
+ u16 pfvf;
+ u8 src_mac[ETH_ALEN];
+ atomic_t refcnt;
+ spinlock_t lock; /* protect smt entry add,removal */
+};
+
+struct smt_data {
+ unsigned int smt_size;
+ rwlock_t lock;
+ struct smt_entry smtab[0];
+};
+
+struct smt_data *t4_init_smt(void);
+struct smt_entry *cxgb4_smt_alloc_switching(struct net_device *dev, u8 *smac);
+void cxgb4_smt_release(struct smt_entry *e);
+void do_smt_write_rpl(struct adapter *p, const struct cpl_smt_write_rpl *rpl);
+#endif /* __CXGB4_SMT_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index b65ce26ff72f..006414758f65 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -5052,23 +5052,26 @@ static unsigned int t4_use_ldst(struct adapter *adap)
}
/**
- * t4_fw_tp_pio_rw - Access TP PIO through LDST
- * @adap: the adapter
- * @vals: where the indirect register values are stored/written
- * @nregs: how many indirect registers to read/write
- * @start_idx: index of first indirect register to read/write
- * @rw: Read (1) or Write (0)
+ * t4_tp_fw_ldst_rw - Access TP indirect register through LDST
+ * @adap: the adapter
+ * @cmd: TP fw ldst address space type
+ * @vals: where the indirect register values are stored/written
+ * @nregs: how many indirect registers to read/write
+ * @start_idx: index of first indirect register to read/write
+ * @rw: Read (1) or Write (0)
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
- * Access TP PIO registers through LDST
+ * Access TP indirect registers through LDST
*/
-static void t4_fw_tp_pio_rw(struct adapter *adap, u32 *vals, unsigned int nregs,
- unsigned int start_index, unsigned int rw)
+static int t4_tp_fw_ldst_rw(struct adapter *adap, int cmd, u32 *vals,
+ unsigned int nregs, unsigned int start_index,
+ unsigned int rw, bool sleep_ok)
{
- int ret, i;
- int cmd = FW_LDST_ADDRSPC_TP_PIO;
+ int ret = 0;
+ unsigned int i;
struct fw_ldst_cmd c;
- for (i = 0 ; i < nregs; i++) {
+ for (i = 0; i < nregs; i++) {
memset(&c, 0, sizeof(c));
c.op_to_addrspace = cpu_to_be32(FW_CMD_OP_V(FW_LDST_CMD) |
FW_CMD_REQUEST_F |
@@ -5079,26 +5082,147 @@ static void t4_fw_tp_pio_rw(struct adapter *adap, u32 *vals, unsigned int nregs,
c.u.addrval.addr = cpu_to_be32(start_index + i);
c.u.addrval.val = rw ? 0 : cpu_to_be32(vals[i]);
- ret = t4_wr_mbox(adap, adap->mbox, &c, sizeof(c), &c);
- if (!ret && rw)
+ ret = t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c,
+ sleep_ok);
+ if (ret)
+ return ret;
+
+ if (rw)
vals[i] = be32_to_cpu(c.u.addrval.val);
}
+ return 0;
+}
+
+/**
+ * t4_tp_indirect_rw - Read/Write TP indirect register through LDST or backdoor
+ * @adap: the adapter
+ * @reg_addr: Address Register
+ * @reg_data: Data register
+ * @buff: where the indirect register values are stored/written
+ * @nregs: how many indirect registers to read/write
+ * @start_index: index of first indirect register to read/write
+ * @rw: READ(1) or WRITE(0)
+ * @sleep_ok: if true we may sleep while awaiting command completion
+ *
+ * Read/Write TP indirect registers through LDST if possible.
+ * Else, use backdoor access
+ **/
+static void t4_tp_indirect_rw(struct adapter *adap, u32 reg_addr, u32 reg_data,
+ u32 *buff, u32 nregs, u32 start_index, int rw,
+ bool sleep_ok)
+{
+ int rc = -EINVAL;
+ int cmd;
+
+ switch (reg_addr) {
+ case TP_PIO_ADDR_A:
+ cmd = FW_LDST_ADDRSPC_TP_PIO;
+ break;
+ case TP_TM_PIO_ADDR_A:
+ cmd = FW_LDST_ADDRSPC_TP_TM_PIO;
+ break;
+ case TP_MIB_INDEX_A:
+ cmd = FW_LDST_ADDRSPC_TP_MIB;
+ break;
+ default:
+ goto indirect_access;
+ }
+
+ if (t4_use_ldst(adap))
+ rc = t4_tp_fw_ldst_rw(adap, cmd, buff, nregs, start_index, rw,
+ sleep_ok);
+
+indirect_access:
+
+ if (rc) {
+ if (rw)
+ t4_read_indirect(adap, reg_addr, reg_data, buff, nregs,
+ start_index);
+ else
+ t4_write_indirect(adap, reg_addr, reg_data, buff, nregs,
+ start_index);
+ }
+}
+
+/**
+ * t4_tp_pio_read - Read TP PIO registers
+ * @adap: the adapter
+ * @buff: where the indirect register values are written
+ * @nregs: how many indirect registers to read
+ * @start_index: index of first indirect register to read
+ * @sleep_ok: if true we may sleep while awaiting command completion
+ *
+ * Read TP PIO Registers
+ **/
+void t4_tp_pio_read(struct adapter *adap, u32 *buff, u32 nregs,
+ u32 start_index, bool sleep_ok)
+{
+ t4_tp_indirect_rw(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, buff, nregs,
+ start_index, 1, sleep_ok);
+}
+
+/**
+ * t4_tp_pio_write - Write TP PIO registers
+ * @adap: the adapter
+ * @buff: where the indirect register values are stored
+ * @nregs: how many indirect registers to write
+ * @start_index: index of first indirect register to write
+ * @sleep_ok: if true we may sleep while awaiting command completion
+ *
+ * Write TP PIO Registers
+ **/
+static void t4_tp_pio_write(struct adapter *adap, u32 *buff, u32 nregs,
+ u32 start_index, bool sleep_ok)
+{
+ t4_tp_indirect_rw(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, buff, nregs,
+ start_index, 0, sleep_ok);
+}
+
+/**
+ * t4_tp_tm_pio_read - Read TP TM PIO registers
+ * @adap: the adapter
+ * @buff: where the indirect register values are written
+ * @nregs: how many indirect registers to read
+ * @start_index: index of first indirect register to read
+ * @sleep_ok: if true we may sleep while awaiting command completion
+ *
+ * Read TP TM PIO Registers
+ **/
+void t4_tp_tm_pio_read(struct adapter *adap, u32 *buff, u32 nregs,
+ u32 start_index, bool sleep_ok)
+{
+ t4_tp_indirect_rw(adap, TP_TM_PIO_ADDR_A, TP_TM_PIO_DATA_A, buff,
+ nregs, start_index, 1, sleep_ok);
+}
+
+/**
+ * t4_tp_mib_read - Read TP MIB registers
+ * @adap: the adapter
+ * @buff: where the indirect register values are written
+ * @nregs: how many indirect registers to read
+ * @start_index: index of first indirect register to read
+ * @sleep_ok: if true we may sleep while awaiting command completion
+ *
+ * Read TP MIB Registers
+ **/
+void t4_tp_mib_read(struct adapter *adap, u32 *buff, u32 nregs, u32 start_index,
+ bool sleep_ok)
+{
+ t4_tp_indirect_rw(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, buff, nregs,
+ start_index, 1, sleep_ok);
}
/**
* t4_read_rss_key - read the global RSS key
* @adap: the adapter
* @key: 10-entry array holding the 320-bit RSS key
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
* Reads the global 320-bit RSS key.
*/
-void t4_read_rss_key(struct adapter *adap, u32 *key)
+void t4_read_rss_key(struct adapter *adap, u32 *key, bool sleep_ok)
{
- if (t4_use_ldst(adap))
- t4_fw_tp_pio_rw(adap, key, 10, TP_RSS_SECRET_KEY0_A, 1);
- else
- t4_read_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, key, 10,
- TP_RSS_SECRET_KEY0_A);
+ t4_tp_pio_read(adap, key, 10, TP_RSS_SECRET_KEY0_A, sleep_ok);
}
/**
@@ -5106,12 +5230,14 @@ void t4_read_rss_key(struct adapter *adap, u32 *key)
* @adap: the adapter
* @key: 10-entry array holding the 320-bit RSS key
* @idx: which RSS key to write
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
* Writes one of the RSS keys with the given 320-bit value. If @idx is
* 0..15 the corresponding entry in the RSS key table is written,
* otherwise the global RSS key is written.
*/
-void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx)
+void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx,
+ bool sleep_ok)
{
u8 rss_key_addr_cnt = 16;
u32 vrt = t4_read_reg(adap, TP_RSS_CONFIG_VRT_A);
@@ -5124,11 +5250,7 @@ void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx)
(vrt & KEYEXTEND_F) && (KEYMODE_G(vrt) == 3))
rss_key_addr_cnt = 32;
- if (t4_use_ldst(adap))
- t4_fw_tp_pio_rw(adap, (void *)key, 10, TP_RSS_SECRET_KEY0_A, 0);
- else
- t4_write_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A, key, 10,
- TP_RSS_SECRET_KEY0_A);
+ t4_tp_pio_write(adap, (void *)key, 10, TP_RSS_SECRET_KEY0_A, sleep_ok);
if (idx >= 0 && idx < rss_key_addr_cnt) {
if (rss_key_addr_cnt > 16)
@@ -5146,19 +5268,15 @@ void t4_write_rss_key(struct adapter *adap, const u32 *key, int idx)
* @adapter: the adapter
* @index: the entry in the PF RSS table to read
* @valp: where to store the returned value
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
* Reads the PF RSS Configuration Table at the specified index and returns
* the value found there.
*/
void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index,
- u32 *valp)
+ u32 *valp, bool sleep_ok)
{
- if (t4_use_ldst(adapter))
- t4_fw_tp_pio_rw(adapter, valp, 1,
- TP_RSS_PF0_CONFIG_A + index, 1);
- else
- t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A,
- valp, 1, TP_RSS_PF0_CONFIG_A + index);
+ t4_tp_pio_read(adapter, valp, 1, TP_RSS_PF0_CONFIG_A + index, sleep_ok);
}
/**
@@ -5167,12 +5285,13 @@ void t4_read_rss_pf_config(struct adapter *adapter, unsigned int index,
* @index: the entry in the VF RSS table to read
* @vfl: where to store the returned VFL
* @vfh: where to store the returned VFH
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
* Reads the VF RSS Configuration Table at the specified index and returns
* the (VFL, VFH) values found there.
*/
void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index,
- u32 *vfl, u32 *vfh)
+ u32 *vfl, u32 *vfh, bool sleep_ok)
{
u32 vrt, mask, data;
@@ -5193,50 +5312,37 @@ void t4_read_rss_vf_config(struct adapter *adapter, unsigned int index,
/* Grab the VFL/VFH values ...
*/
- if (t4_use_ldst(adapter)) {
- t4_fw_tp_pio_rw(adapter, vfl, 1, TP_RSS_VFL_CONFIG_A, 1);
- t4_fw_tp_pio_rw(adapter, vfh, 1, TP_RSS_VFH_CONFIG_A, 1);
- } else {
- t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A,
- vfl, 1, TP_RSS_VFL_CONFIG_A);
- t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A,
- vfh, 1, TP_RSS_VFH_CONFIG_A);
- }
+ t4_tp_pio_read(adapter, vfl, 1, TP_RSS_VFL_CONFIG_A, sleep_ok);
+ t4_tp_pio_read(adapter, vfh, 1, TP_RSS_VFH_CONFIG_A, sleep_ok);
}
/**
* t4_read_rss_pf_map - read PF RSS Map
* @adapter: the adapter
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
* Reads the PF RSS Map register and returns its value.
*/
-u32 t4_read_rss_pf_map(struct adapter *adapter)
+u32 t4_read_rss_pf_map(struct adapter *adapter, bool sleep_ok)
{
u32 pfmap;
- if (t4_use_ldst(adapter))
- t4_fw_tp_pio_rw(adapter, &pfmap, 1, TP_RSS_PF_MAP_A, 1);
- else
- t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A,
- &pfmap, 1, TP_RSS_PF_MAP_A);
+ t4_tp_pio_read(adapter, &pfmap, 1, TP_RSS_PF_MAP_A, sleep_ok);
return pfmap;
}
/**
* t4_read_rss_pf_mask - read PF RSS Mask
* @adapter: the adapter
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
* Reads the PF RSS Mask register and returns its value.
*/
-u32 t4_read_rss_pf_mask(struct adapter *adapter)
+u32 t4_read_rss_pf_mask(struct adapter *adapter, bool sleep_ok)
{
u32 pfmask;
- if (t4_use_ldst(adapter))
- t4_fw_tp_pio_rw(adapter, &pfmask, 1, TP_RSS_PF_MSK_A, 1);
- else
- t4_read_indirect(adapter, TP_PIO_ADDR_A, TP_PIO_DATA_A,
- &pfmask, 1, TP_RSS_PF_MSK_A);
+ t4_tp_pio_read(adapter, &pfmask, 1, TP_RSS_PF_MSK_A, sleep_ok);
return pfmask;
}
@@ -5245,12 +5351,13 @@ u32 t4_read_rss_pf_mask(struct adapter *adapter)
* @adap: the adapter
* @v4: holds the TCP/IP counter values
* @v6: holds the TCP/IPv6 counter values
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
* Returns the values of TP's TCP/IP and TCP/IPv6 MIB counters.
* Either @v4 or @v6 may be %NULL to skip the corresponding stats.
*/
void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4,
- struct tp_tcp_stats *v6)
+ struct tp_tcp_stats *v6, bool sleep_ok)
{
u32 val[TP_MIB_TCP_RXT_SEG_LO_A - TP_MIB_TCP_OUT_RST_A + 1];
@@ -5259,16 +5366,16 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4,
#define STAT64(x) (((u64)STAT(x##_HI) << 32) | STAT(x##_LO))
if (v4) {
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val,
- ARRAY_SIZE(val), TP_MIB_TCP_OUT_RST_A);
+ t4_tp_mib_read(adap, val, ARRAY_SIZE(val),
+ TP_MIB_TCP_OUT_RST_A, sleep_ok);
v4->tcp_out_rsts = STAT(OUT_RST);
v4->tcp_in_segs = STAT64(IN_SEG);
v4->tcp_out_segs = STAT64(OUT_SEG);
v4->tcp_retrans_segs = STAT64(RXT_SEG);
}
if (v6) {
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val,
- ARRAY_SIZE(val), TP_MIB_TCP_V6OUT_RST_A);
+ t4_tp_mib_read(adap, val, ARRAY_SIZE(val),
+ TP_MIB_TCP_V6OUT_RST_A, sleep_ok);
v6->tcp_out_rsts = STAT(OUT_RST);
v6->tcp_in_segs = STAT64(IN_SEG);
v6->tcp_out_segs = STAT64(OUT_SEG);
@@ -5283,63 +5390,66 @@ void t4_tp_get_tcp_stats(struct adapter *adap, struct tp_tcp_stats *v4,
* t4_tp_get_err_stats - read TP's error MIB counters
* @adap: the adapter
* @st: holds the counter values
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
* Returns the values of TP's error counters.
*/
-void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st)
+void t4_tp_get_err_stats(struct adapter *adap, struct tp_err_stats *st,
+ bool sleep_ok)
{
int nchan = adap->params.arch.nchan;
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->mac_in_errs, nchan, TP_MIB_MAC_IN_ERR_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->hdr_in_errs, nchan, TP_MIB_HDR_IN_ERR_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->tcp_in_errs, nchan, TP_MIB_TCP_IN_ERR_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->tnl_cong_drops, nchan, TP_MIB_TNL_CNG_DROP_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->ofld_chan_drops, nchan, TP_MIB_OFD_CHN_DROP_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->tnl_tx_drops, nchan, TP_MIB_TNL_DROP_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->ofld_vlan_drops, nchan, TP_MIB_OFD_VLN_DROP_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- st->tcp6_in_errs, nchan, TP_MIB_TCP_V6IN_ERR_0_A);
-
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A,
- &st->ofld_no_neigh, 2, TP_MIB_OFD_ARP_DROP_A);
+ t4_tp_mib_read(adap, st->mac_in_errs, nchan, TP_MIB_MAC_IN_ERR_0_A,
+ sleep_ok);
+ t4_tp_mib_read(adap, st->hdr_in_errs, nchan, TP_MIB_HDR_IN_ERR_0_A,
+ sleep_ok);
+ t4_tp_mib_read(adap, st->tcp_in_errs, nchan, TP_MIB_TCP_IN_ERR_0_A,
+ sleep_ok);
+ t4_tp_mib_read(adap, st->tnl_cong_drops, nchan,
+ TP_MIB_TNL_CNG_DROP_0_A, sleep_ok);
+ t4_tp_mib_read(adap, st->ofld_chan_drops, nchan,
+ TP_MIB_OFD_CHN_DROP_0_A, sleep_ok);
+ t4_tp_mib_read(adap, st->tnl_tx_drops, nchan, TP_MIB_TNL_DROP_0_A,
+ sleep_ok);
+ t4_tp_mib_read(adap, st->ofld_vlan_drops, nchan,
+ TP_MIB_OFD_VLN_DROP_0_A, sleep_ok);
+ t4_tp_mib_read(adap, st->tcp6_in_errs, nchan,
+ TP_MIB_TCP_V6IN_ERR_0_A, sleep_ok);
+ t4_tp_mib_read(adap, &st->ofld_no_neigh, 2, TP_MIB_OFD_ARP_DROP_A,
+ sleep_ok);
}
/**
* t4_tp_get_cpl_stats - read TP's CPL MIB counters
* @adap: the adapter
* @st: holds the counter values
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
* Returns the values of TP's CPL counters.
*/
-void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st)
+void t4_tp_get_cpl_stats(struct adapter *adap, struct tp_cpl_stats *st,
+ bool sleep_ok)
{
int nchan = adap->params.arch.nchan;
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->req,
- nchan, TP_MIB_CPL_IN_REQ_0_A);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, st->rsp,
- nchan, TP_MIB_CPL_OUT_RSP_0_A);
+ t4_tp_mib_read(adap, st->req, nchan, TP_MIB_CPL_IN_REQ_0_A, sleep_ok);
+ t4_tp_mib_read(adap, st->rsp, nchan, TP_MIB_CPL_OUT_RSP_0_A, sleep_ok);
}
/**
* t4_tp_get_rdma_stats - read TP's RDMA MIB counters
* @adap: the adapter
* @st: holds the counter values
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
* Returns the values of TP's RDMA counters.
*/
-void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st)
+void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st,
+ bool sleep_ok)
{
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, &st->rqe_dfr_pkt,
- 2, TP_MIB_RQE_DFR_PKT_A);
+ t4_tp_mib_read(adap, &st->rqe_dfr_pkt, 2, TP_MIB_RQE_DFR_PKT_A,
+ sleep_ok);
}
/**
@@ -5347,20 +5457,24 @@ void t4_tp_get_rdma_stats(struct adapter *adap, struct tp_rdma_stats *st)
* @adap: the adapter
* @idx: the port index
* @st: holds the counter values
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
* Returns the values of TP's FCoE counters for the selected port.
*/
void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx,
- struct tp_fcoe_stats *st)
+ struct tp_fcoe_stats *st, bool sleep_ok)
{
u32 val[2];
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, &st->frames_ddp,
- 1, TP_MIB_FCOE_DDP_0_A + idx);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, &st->frames_drop,
- 1, TP_MIB_FCOE_DROP_0_A + idx);
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val,
- 2, TP_MIB_FCOE_BYTE_0_HI_A + 2 * idx);
+ t4_tp_mib_read(adap, &st->frames_ddp, 1, TP_MIB_FCOE_DDP_0_A + idx,
+ sleep_ok);
+
+ t4_tp_mib_read(adap, &st->frames_drop, 1,
+ TP_MIB_FCOE_DROP_0_A + idx, sleep_ok);
+
+ t4_tp_mib_read(adap, val, 2, TP_MIB_FCOE_BYTE_0_HI_A + 2 * idx,
+ sleep_ok);
+
st->octets_ddp = ((u64)val[0] << 32) | val[1];
}
@@ -5368,15 +5482,16 @@ void t4_get_fcoe_stats(struct adapter *adap, unsigned int idx,
* t4_get_usm_stats - read TP's non-TCP DDP MIB counters
* @adap: the adapter
* @st: holds the counter values
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
* Returns the values of TP's counters for non-TCP directly-placed packets.
*/
-void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st)
+void t4_get_usm_stats(struct adapter *adap, struct tp_usm_stats *st,
+ bool sleep_ok)
{
u32 val[4];
- t4_read_indirect(adap, TP_MIB_INDEX_A, TP_MIB_DATA_A, val, 4,
- TP_MIB_USM_PKTS_A);
+ t4_tp_mib_read(adap, val, 4, TP_MIB_USM_PKTS_A, sleep_ok);
st->frames = val[0];
st->drops = val[1];
st->octets = ((u64)val[2] << 32) | val[3];
@@ -8205,7 +8320,7 @@ struct flash_desc {
u32 size_mb;
};
-static int get_flash_params(struct adapter *adap)
+static int t4_get_flash_params(struct adapter *adap)
{
/* Table for non-Numonix supported flash parts. Numonix parts are left
* to the preexisting code. All flash parts have 64KB sectors.
@@ -8214,40 +8329,137 @@ static int get_flash_params(struct adapter *adap)
{ 0x150201, 4 << 20 }, /* Spansion 4MB S25FL032P */
};
+ unsigned int part, manufacturer;
+ unsigned int density, size;
+ u32 flashid = 0;
int ret;
- u32 info;
+
+ /* Issue a Read ID Command to the Flash part. We decode supported
+ * Flash parts and their sizes from this. There's a newer Query
+ * Command which can retrieve detailed geometry information but many
+ * Flash parts don't support it.
+ */
ret = sf1_write(adap, 1, 1, 0, SF_RD_ID);
if (!ret)
- ret = sf1_read(adap, 3, 0, 1, &info);
+ ret = sf1_read(adap, 3, 0, 1, &flashid);
t4_write_reg(adap, SF_OP_A, 0); /* unlock SF */
if (ret)
return ret;
- for (ret = 0; ret < ARRAY_SIZE(supported_flash); ++ret)
- if (supported_flash[ret].vendor_and_model_id == info) {
- adap->params.sf_size = supported_flash[ret].size_mb;
+ /* Check to see if it's one of our non-standard supported Flash parts.
+ */
+ for (part = 0; part < ARRAY_SIZE(supported_flash); part++)
+ if (supported_flash[part].vendor_and_model_id == flashid) {
+ adap->params.sf_size = supported_flash[part].size_mb;
adap->params.sf_nsec =
adap->params.sf_size / SF_SEC_SIZE;
- return 0;
+ goto found;
}
- if ((info & 0xff) != 0x20) /* not a Numonix flash */
- return -EINVAL;
- info >>= 16; /* log2 of size */
- if (info >= 0x14 && info < 0x18)
- adap->params.sf_nsec = 1 << (info - 16);
- else if (info == 0x18)
- adap->params.sf_nsec = 64;
- else
+ /* Decode Flash part size. The code below looks repetative with
+ * common encodings, but that's not guaranteed in the JEDEC
+ * specification for the Read JADEC ID command. The only thing that
+ * we're guaranteed by the JADEC specification is where the
+ * Manufacturer ID is in the returned result. After that each
+ * Manufacturer ~could~ encode things completely differently.
+ * Note, all Flash parts must have 64KB sectors.
+ */
+ manufacturer = flashid & 0xff;
+ switch (manufacturer) {
+ case 0x20: { /* Micron/Numonix */
+ /* This Density -> Size decoding table is taken from Micron
+ * Data Sheets.
+ */
+ density = (flashid >> 16) & 0xff;
+ switch (density) {
+ case 0x14: /* 1MB */
+ size = 1 << 20;
+ break;
+ case 0x15: /* 2MB */
+ size = 1 << 21;
+ break;
+ case 0x16: /* 4MB */
+ size = 1 << 22;
+ break;
+ case 0x17: /* 8MB */
+ size = 1 << 23;
+ break;
+ case 0x18: /* 16MB */
+ size = 1 << 24;
+ break;
+ case 0x19: /* 32MB */
+ size = 1 << 25;
+ break;
+ case 0x20: /* 64MB */
+ size = 1 << 26;
+ break;
+ case 0x21: /* 128MB */
+ size = 1 << 27;
+ break;
+ case 0x22: /* 256MB */
+ size = 1 << 28;
+ break;
+
+ default:
+ dev_err(adap->pdev_dev, "Micron Flash Part has bad size, ID = %#x, Density code = %#x\n",
+ flashid, density);
+ return -EINVAL;
+ }
+ break;
+ }
+ case 0xc2: { /* Macronix */
+ /* This Density -> Size decoding table is taken from Macronix
+ * Data Sheets.
+ */
+ density = (flashid >> 16) & 0xff;
+ switch (density) {
+ case 0x17: /* 8MB */
+ size = 1 << 23;
+ break;
+ case 0x18: /* 16MB */
+ size = 1 << 24;
+ break;
+ default:
+ dev_err(adap->pdev_dev, "Macronix Flash Part has bad size, ID = %#x, Density code = %#x\n",
+ flashid, density);
+ return -EINVAL;
+ }
+ break;
+ }
+ case 0xef: { /* Winbond */
+ /* This Density -> Size decoding table is taken from Winbond
+ * Data Sheets.
+ */
+ density = (flashid >> 16) & 0xff;
+ switch (density) {
+ case 0x17: /* 8MB */
+ size = 1 << 23;
+ break;
+ case 0x18: /* 16MB */
+ size = 1 << 24;
+ break;
+ default:
+ dev_err(adap->pdev_dev, "Winbond Flash Part has bad size, ID = %#x, Density code = %#x\n",
+ flashid, density);
+ return -EINVAL;
+ }
+ break;
+ }
+ default:
+ dev_err(adap->pdev_dev, "Unsupported Flash Part, ID = %#x\n",
+ flashid);
return -EINVAL;
- adap->params.sf_size = 1 << info;
- adap->params.sf_fw_start =
- t4_read_reg(adap, CIM_BOOT_CFG_A) & BOOTADDR_M;
+ }
+
+ /* Store decoded Flash size and fall through into vetting code. */
+ adap->params.sf_size = size;
+ adap->params.sf_nsec = size / SF_SEC_SIZE;
+found:
if (adap->params.sf_size < FLASH_MIN_SIZE)
- dev_warn(adap->pdev_dev, "WARNING!!! FLASH size %#x < %#x!!!\n",
- adap->params.sf_size, FLASH_MIN_SIZE);
+ dev_warn(adap->pdev_dev, "WARNING: Flash Part ID %#x, size %#x < %#x\n",
+ flashid, adap->params.sf_size, FLASH_MIN_SIZE);
return 0;
}
@@ -8285,7 +8497,7 @@ int t4_prep_adapter(struct adapter *adapter)
get_pci_mode(adapter, &adapter->params.pci);
pl_rev = REV_G(t4_read_reg(adapter, PL_REV_A));
- ret = get_flash_params(adapter);
+ ret = t4_get_flash_params(adapter);
if (ret < 0) {
dev_err(adapter->pdev_dev, "error %d identifying flash\n", ret);
return ret;
@@ -8567,10 +8779,11 @@ int t4_init_sge_params(struct adapter *adapter)
/**
* t4_init_tp_params - initialize adap->params.tp
* @adap: the adapter
+ * @sleep_ok: if true we may sleep while awaiting command completion
*
* Initialize various fields of the adapter's TP Parameters structure.
*/
-int t4_init_tp_params(struct adapter *adap)
+int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
{
int chan;
u32 v;
@@ -8586,19 +8799,11 @@ int t4_init_tp_params(struct adapter *adap)
/* Cache the adapter's Compressed Filter Mode and global Incress
* Configuration.
*/
- if (t4_use_ldst(adap)) {
- t4_fw_tp_pio_rw(adap, &adap->params.tp.vlan_pri_map, 1,
- TP_VLAN_PRI_MAP_A, 1);
- t4_fw_tp_pio_rw(adap, &adap->params.tp.ingress_config, 1,
- TP_INGRESS_CONFIG_A, 1);
- } else {
- t4_read_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A,
- &adap->params.tp.vlan_pri_map, 1,
- TP_VLAN_PRI_MAP_A);
- t4_read_indirect(adap, TP_PIO_ADDR_A, TP_PIO_DATA_A,
- &adap->params.tp.ingress_config, 1,
- TP_INGRESS_CONFIG_A);
- }
+ t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1,
+ TP_VLAN_PRI_MAP_A, sleep_ok);
+ t4_tp_pio_read(adap, &adap->params.tp.ingress_config, 1,
+ TP_INGRESS_CONFIG_A, sleep_ok);
+
/* For T6, cache the adapter's compressed error vector
* and passing outer header info for encapsulated packets.
*/
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index b0ff78da8aa2..ce4838d907da 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -50,6 +50,7 @@ enum {
CPL_RX_DATA_ACK = 0xD,
CPL_TX_PKT = 0xE,
CPL_L2T_WRITE_REQ = 0x12,
+ CPL_SMT_WRITE_REQ = 0x14,
CPL_TID_RELEASE = 0x1A,
CPL_TX_DATA_ISO = 0x1F,
@@ -60,6 +61,7 @@ enum {
CPL_PEER_CLOSE = 0x26,
CPL_ABORT_REQ_RSS = 0x2B,
CPL_ABORT_RPL_RSS = 0x2D,
+ CPL_SMT_WRITE_RPL = 0x2E,
CPL_RX_PHYS_ADDR = 0x30,
CPL_CLOSE_CON_RPL = 0x32,
@@ -681,8 +683,8 @@ struct cpl_set_tcb_field {
};
/* cpl_set_tcb_field.word_cookie fields */
-#define TCB_WORD_S 0
-#define TCB_WORD(x) ((x) << TCB_WORD_S)
+#define TCB_WORD_S 0
+#define TCB_WORD_V(x) ((x) << TCB_WORD_S)
#define TCB_COOKIE_S 5
#define TCB_COOKIE_M 0x7
@@ -1266,6 +1268,44 @@ struct cpl_l2t_write_rpl {
u8 rsvd[3];
};
+struct cpl_smt_write_req {
+ WR_HDR;
+ union opcode_tid ot;
+ __be32 params;
+ __be16 pfvf1;
+ u8 src_mac1[6];
+ __be16 pfvf0;
+ u8 src_mac0[6];
+};
+
+struct cpl_t6_smt_write_req {
+ WR_HDR;
+ union opcode_tid ot;
+ __be32 params;
+ __be64 tag;
+ __be16 pfvf0;
+ u8 src_mac0[6];
+ __be32 local_ip;
+ __be32 rsvd;
+};
+
+struct cpl_smt_write_rpl {
+ union opcode_tid ot;
+ u8 status;
+ u8 rsvd[3];
+};
+
+/* cpl_smt_{read,write}_req.params fields */
+#define SMTW_OVLAN_IDX_S 16
+#define SMTW_OVLAN_IDX_V(x) ((x) << SMTW_OVLAN_IDX_S)
+
+#define SMTW_IDX_S 20
+#define SMTW_IDX_V(x) ((x) << SMTW_IDX_S)
+
+#define SMTW_NORPL_S 31
+#define SMTW_NORPL_V(x) ((x) << SMTW_NORPL_S)
+#define SMTW_NORPL_F SMTW_NORPL_V(1U)
+
struct cpl_rdma_terminate {
union opcode_tid ot;
__be16 rsvd;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index aa28299aef5f..8c22bb8c9fbf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -176,6 +176,13 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
CH_PCI_ID_TABLE_FENTRY(0x50a2), /* Custom T540-KR4 */
CH_PCI_ID_TABLE_FENTRY(0x50a3), /* Custom T580-KR4 */
CH_PCI_ID_TABLE_FENTRY(0x50a4), /* Custom 2x T540-CR */
+ CH_PCI_ID_TABLE_FENTRY(0x50a5), /* Custom T522-BT */
+ CH_PCI_ID_TABLE_FENTRY(0x50a6), /* Custom T522-BT-SO */
+ CH_PCI_ID_TABLE_FENTRY(0x50a7), /* Custom T580-CR */
+ CH_PCI_ID_TABLE_FENTRY(0x50a8), /* Custom T580-KR */
+ CH_PCI_ID_TABLE_FENTRY(0x50a9), /* Custom T580-KR */
+ CH_PCI_ID_TABLE_FENTRY(0x50aa), /* Custom T580-CR */
+ CH_PCI_ID_TABLE_FENTRY(0x50ab), /* Custom T520-CR */
/* T6 adapters:
*/
@@ -197,6 +204,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
CH_PCI_ID_TABLE_FENTRY(0x6082), /* Custom T6225-CR SFP28 */
CH_PCI_ID_TABLE_FENTRY(0x6083), /* Custom T62100-CR QSFP28 */
CH_PCI_ID_TABLE_FENTRY(0x6084), /* Custom T64100-CR QSFP28 */
+ CH_PCI_ID_TABLE_FENTRY(0x6085), /* Custom T6240-SO */
CH_PCI_DEVICE_ID_TABLE_DEFINE_END;
#endif /* __T4_PCI_ID_TBL_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index dac90837842b..82614e078f50 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -1447,6 +1447,8 @@
#define LKPTBLQUEUE0_M 0x3ffU
#define LKPTBLQUEUE0_G(x) (((x) >> LKPTBLQUEUE0_S) & LKPTBLQUEUE0_M)
+#define TP_TM_PIO_ADDR_A 0x7e18
+#define TP_TM_PIO_DATA_A 0x7e1c
#define TP_PIO_ADDR_A 0x7e40
#define TP_PIO_DATA_A 0x7e44
#define TP_MIB_INDEX_A 0x7e50
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
new file mode 100644
index 000000000000..c1c76663034d
--- /dev/null
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_tcb.h
@@ -0,0 +1,47 @@
+/*
+ * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2017 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __T4_TCB_H
+#define __T4_TCB_H
+
+#define TCB_SMAC_SEL_W 0
+#define TCB_SMAC_SEL_S 24
+#define TCB_SMAC_SEL_M 0xffULL
+#define TCB_SMAC_SEL_V(x) ((x) << TCB_SMAC_SEL_S)
+
+#define TCB_T_FLAGS_W 1
+
+#define TF_CCTRL_CWR_S 61
+
+#endif /* __T4_TCB_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index ca2756dcefc5..875d4a72b3ef 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -105,7 +105,8 @@ enum fw_wr_opcodes {
FW_ISCSI_TX_DATA_WR = 0x45,
FW_PTP_TX_PKT_WR = 0x46,
FW_CRYPTO_LOOKASIDE_WR = 0X6d,
- FW_LASTC2E_WR = 0x70
+ FW_LASTC2E_WR = 0x70,
+ FW_FILTER2_WR = 0x77
};
struct fw_wr_hdr {
@@ -201,6 +202,51 @@ struct fw_filter_wr {
__u8 sma[6];
};
+struct fw_filter2_wr {
+ __be32 op_pkd;
+ __be32 len16_pkd;
+ __be64 r3;
+ __be32 tid_to_iq;
+ __be32 del_filter_to_l2tix;
+ __be16 ethtype;
+ __be16 ethtypem;
+ __u8 frag_to_ovlan_vldm;
+ __u8 smac_sel;
+ __be16 rx_chan_rx_rpl_iq;
+ __be32 maci_to_matchtypem;
+ __u8 ptcl;
+ __u8 ptclm;
+ __u8 ttyp;
+ __u8 ttypm;
+ __be16 ivlan;
+ __be16 ivlanm;
+ __be16 ovlan;
+ __be16 ovlanm;
+ __u8 lip[16];
+ __u8 lipm[16];
+ __u8 fip[16];
+ __u8 fipm[16];
+ __be16 lp;
+ __be16 lpm;
+ __be16 fp;
+ __be16 fpm;
+ __be16 r7;
+ __u8 sma[6];
+ __be16 r8;
+ __u8 filter_type_swapmac;
+ __u8 natmode_to_ulp_type;
+ __be16 newlport;
+ __be16 newfport;
+ __u8 newlip[16];
+ __u8 newfip[16];
+ __be32 natseqcheck;
+ __be32 r9;
+ __be64 r10;
+ __be64 r11;
+ __be64 r12;
+ __be64 r13;
+};
+
#define FW_FILTER_WR_TID_S 12
#define FW_FILTER_WR_TID_M 0xfffff
#define FW_FILTER_WR_TID_V(x) ((x) << FW_FILTER_WR_TID_S)
@@ -385,6 +431,32 @@ struct fw_filter_wr {
#define FW_FILTER_WR_RX_RPL_IQ_G(x) \
(((x) >> FW_FILTER_WR_RX_RPL_IQ_S) & FW_FILTER_WR_RX_RPL_IQ_M)
+#define FW_FILTER2_WR_FILTER_TYPE_S 1
+#define FW_FILTER2_WR_FILTER_TYPE_M 0x1
+#define FW_FILTER2_WR_FILTER_TYPE_V(x) ((x) << FW_FILTER2_WR_FILTER_TYPE_S)
+#define FW_FILTER2_WR_FILTER_TYPE_G(x) \
+ (((x) >> FW_FILTER2_WR_FILTER_TYPE_S) & FW_FILTER2_WR_FILTER_TYPE_M)
+#define FW_FILTER2_WR_FILTER_TYPE_F FW_FILTER2_WR_FILTER_TYPE_V(1U)
+
+#define FW_FILTER2_WR_NATMODE_S 5
+#define FW_FILTER2_WR_NATMODE_M 0x7
+#define FW_FILTER2_WR_NATMODE_V(x) ((x) << FW_FILTER2_WR_NATMODE_S)
+#define FW_FILTER2_WR_NATMODE_G(x) \
+ (((x) >> FW_FILTER2_WR_NATMODE_S) & FW_FILTER2_WR_NATMODE_M)
+
+#define FW_FILTER2_WR_NATFLAGCHECK_S 4
+#define FW_FILTER2_WR_NATFLAGCHECK_M 0x1
+#define FW_FILTER2_WR_NATFLAGCHECK_V(x) ((x) << FW_FILTER2_WR_NATFLAGCHECK_S)
+#define FW_FILTER2_WR_NATFLAGCHECK_G(x) \
+ (((x) >> FW_FILTER2_WR_NATFLAGCHECK_S) & FW_FILTER2_WR_NATFLAGCHECK_M)
+#define FW_FILTER2_WR_NATFLAGCHECK_F FW_FILTER2_WR_NATFLAGCHECK_V(1U)
+
+#define FW_FILTER2_WR_ULP_TYPE_S 0
+#define FW_FILTER2_WR_ULP_TYPE_M 0xf
+#define FW_FILTER2_WR_ULP_TYPE_V(x) ((x) << FW_FILTER2_WR_ULP_TYPE_S)
+#define FW_FILTER2_WR_ULP_TYPE_G(x) \
+ (((x) >> FW_FILTER2_WR_ULP_TYPE_S) & FW_FILTER2_WR_ULP_TYPE_M)
+
#define FW_FILTER_WR_MACI_S 23
#define FW_FILTER_WR_MACI_M 0x1ff
#define FW_FILTER_WR_MACI_V(x) ((x) << FW_FILTER_WR_MACI_S)
@@ -1127,6 +1199,7 @@ enum fw_params_param_dev {
FW_PARAMS_PARAM_DEV_SCFGREV = 0x1A,
FW_PARAMS_PARAM_DEV_VPDREV = 0x1B,
FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR = 0x1C,
+ FW_PARAMS_PARAM_DEV_FILTER2_WR = 0x1D,
FW_PARAMS_PARAM_DEV_MPSBGMAP = 0x1E,
};
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index a8d94963b4d0..67aec59a14e6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -1812,7 +1812,7 @@ int t4vf_eth_eq_free(struct adapter *adapter, unsigned int eqid)
*
* Returns a string representation of the Link Down Reason Code.
*/
-const char *t4vf_link_down_rc_str(unsigned char link_down_rc)
+static const char *t4vf_link_down_rc_str(unsigned char link_down_rc)
{
static const char * const reason[] = {
"Link Down",
@@ -1838,8 +1838,8 @@ const char *t4vf_link_down_rc_str(unsigned char link_down_rc)
*
* Processes a GET_PORT_INFO FW reply message.
*/
-void t4vf_handle_get_port_info(struct port_info *pi,
- const struct fw_port_cmd *cmd)
+static void t4vf_handle_get_port_info(struct port_info *pi,
+ const struct fw_port_cmd *cmd)
{
int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
struct adapter *adapter = pi->adapter;
diff --git a/drivers/net/ethernet/cisco/enic/enic_clsf.h b/drivers/net/ethernet/cisco/enic/enic_clsf.h
index 6aa9f89d073b..4bfbf25f9ddc 100644
--- a/drivers/net/ethernet/cisco/enic/enic_clsf.h
+++ b/drivers/net/ethernet/cisco/enic/enic_clsf.h
@@ -19,9 +19,8 @@ void enic_flow_may_expire(unsigned long data);
static inline void enic_rfs_timer_start(struct enic *enic)
{
- init_timer(&enic->rfs_h.rfs_may_expire);
- enic->rfs_h.rfs_may_expire.function = enic_flow_may_expire;
- enic->rfs_h.rfs_may_expire.data = (unsigned long)enic;
+ setup_timer(&enic->rfs_h.rfs_may_expire, enic_flow_may_expire,
+ (unsigned long)enic);
mod_timer(&enic->rfs_h.rfs_may_expire, jiffies + HZ/4);
}
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index d24ee1ad3be1..4a11baffe02d 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -2846,9 +2846,8 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Setup notification timer, HW reset task, and wq locks
*/
- init_timer(&enic->notify_timer);
- enic->notify_timer.function = enic_notify_timer;
- enic->notify_timer.data = (unsigned long)enic;
+ setup_timer(&enic->notify_timer, enic_notify_timer,
+ (unsigned long)enic);
enic_set_rx_coal_setting(enic);
INIT_WORK(&enic->reset, enic_reset);
diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c
index c87b8cc42963..13430f75496c 100644
--- a/drivers/net/ethernet/dec/tulip/de2104x.c
+++ b/drivers/net/ethernet/dec/tulip/de2104x.c
@@ -333,8 +333,8 @@ static void de_set_rx_mode (struct net_device *dev);
static void de_tx (struct de_private *de);
static void de_clean_rings (struct de_private *de);
static void de_media_interrupt (struct de_private *de, u32 status);
-static void de21040_media_timer (unsigned long data);
-static void de21041_media_timer (unsigned long data);
+static void de21040_media_timer (struct timer_list *t);
+static void de21041_media_timer (struct timer_list *t);
static unsigned int de_ok_to_advertise (struct de_private *de, u32 new_media);
@@ -959,9 +959,9 @@ static void de_next_media (struct de_private *de, const u32 *media,
}
}
-static void de21040_media_timer (unsigned long data)
+static void de21040_media_timer (struct timer_list *t)
{
- struct de_private *de = (struct de_private *) data;
+ struct de_private *de = from_timer(de, t, media_timer);
struct net_device *dev = de->dev;
u32 status = dr32(SIAStatus);
unsigned int carrier;
@@ -1040,9 +1040,9 @@ static unsigned int de_ok_to_advertise (struct de_private *de, u32 new_media)
return 1;
}
-static void de21041_media_timer (unsigned long data)
+static void de21041_media_timer (struct timer_list *t)
{
- struct de_private *de = (struct de_private *) data;
+ struct de_private *de = from_timer(de, t, media_timer);
struct net_device *dev = de->dev;
u32 status = dr32(SIAStatus);
unsigned int carrier;
@@ -1999,12 +1999,9 @@ static int de_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
de->msg_enable = (debug < 0 ? DE_DEF_MSG_ENABLE : debug);
de->board_idx = board_idx;
spin_lock_init (&de->lock);
- init_timer(&de->media_timer);
- if (de->de21040)
- de->media_timer.function = de21040_media_timer;
- else
- de->media_timer.function = de21041_media_timer;
- de->media_timer.data = (unsigned long) de;
+ timer_setup(&de->media_timer,
+ de->de21040 ? de21040_media_timer : de21041_media_timer,
+ 0);
netif_carrier_off(dev);
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index 0affee9c8aa2..a31b4df3e7ff 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -912,7 +912,7 @@ static int de4x5_init(struct net_device *dev);
static int de4x5_sw_reset(struct net_device *dev);
static int de4x5_rx(struct net_device *dev);
static int de4x5_tx(struct net_device *dev);
-static void de4x5_ast(struct net_device *dev);
+static void de4x5_ast(struct timer_list *t);
static int de4x5_txur(struct net_device *dev);
static int de4x5_rx_ovfc(struct net_device *dev);
@@ -1147,9 +1147,7 @@ de4x5_hw_init(struct net_device *dev, u_long iobase, struct device *gendev)
lp->timeout = -1;
lp->gendev = gendev;
spin_lock_init(&lp->lock);
- init_timer(&lp->timer);
- lp->timer.function = (void (*)(unsigned long))de4x5_ast;
- lp->timer.data = (unsigned long)dev;
+ timer_setup(&lp->timer, de4x5_ast, 0);
de4x5_parse_params(dev);
/*
@@ -1742,9 +1740,10 @@ de4x5_tx(struct net_device *dev)
}
static void
-de4x5_ast(struct net_device *dev)
+de4x5_ast(struct timer_list *t)
{
- struct de4x5_private *lp = netdev_priv(dev);
+ struct de4x5_private *lp = from_timer(lp, t, timer);
+ struct net_device *dev = dev_get_drvdata(lp->gendev);
int next_tick = DE4X5_AUTOSENSE_MS;
int dt;
@@ -2370,7 +2369,7 @@ autoconf_media(struct net_device *dev)
lp->media = INIT;
lp->tcount = 0;
- de4x5_ast(dev);
+ de4x5_ast(&lp->timer);
return lp->media;
}
diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c
index 07e10a45beaa..17ef7a28873d 100644
--- a/drivers/net/ethernet/dec/tulip/dmfe.c
+++ b/drivers/net/ethernet/dec/tulip/dmfe.c
@@ -331,7 +331,7 @@ static void dmfe_phy_write_1bit(void __iomem *, u32);
static u16 dmfe_phy_read_1bit(void __iomem *);
static u8 dmfe_sense_speed(struct dmfe_board_info *);
static void dmfe_process_mode(struct dmfe_board_info *);
-static void dmfe_timer(unsigned long);
+static void dmfe_timer(struct timer_list *);
static inline u32 cal_CRC(unsigned char *, unsigned int, u8);
static void dmfe_rx_packet(struct net_device *, struct dmfe_board_info *);
static void dmfe_free_tx_pkt(struct net_device *, struct dmfe_board_info *);
@@ -596,10 +596,8 @@ static int dmfe_open(struct net_device *dev)
netif_wake_queue(dev);
/* set and active a timer process */
- init_timer(&db->timer);
+ timer_setup(&db->timer, dmfe_timer, 0);
db->timer.expires = DMFE_TIMER_WUT + HZ * 2;
- db->timer.data = (unsigned long)dev;
- db->timer.function = dmfe_timer;
add_timer(&db->timer);
return 0;
@@ -1130,10 +1128,10 @@ static const struct ethtool_ops netdev_ethtool_ops = {
* Dynamic media sense, allocate Rx buffer...
*/
-static void dmfe_timer(unsigned long data)
+static void dmfe_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct dmfe_board_info *db = netdev_priv(dev);
+ struct dmfe_board_info *db = from_timer(db, t, timer);
+ struct net_device *dev = pci_get_drvdata(db->pdev);
void __iomem *ioaddr = db->ioaddr;
u32 tmp_cr8;
unsigned char tmp_cr12;
diff --git a/drivers/net/ethernet/dec/tulip/interrupt.c b/drivers/net/ethernet/dec/tulip/interrupt.c
index 8df80880ecaa..c1ca0765d56d 100644
--- a/drivers/net/ethernet/dec/tulip/interrupt.c
+++ b/drivers/net/ethernet/dec/tulip/interrupt.c
@@ -102,10 +102,10 @@ int tulip_refill_rx(struct net_device *dev)
#ifdef CONFIG_TULIP_NAPI
-void oom_timer(unsigned long data)
+void oom_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct tulip_private *tp = netdev_priv(dev);
+ struct tulip_private *tp = from_timer(tp, t, oom_timer);
+
napi_schedule(&tp->napi);
}
diff --git a/drivers/net/ethernet/dec/tulip/pnic.c b/drivers/net/ethernet/dec/tulip/pnic.c
index 7bcccf5cac7a..3fb39e32e1b4 100644
--- a/drivers/net/ethernet/dec/tulip/pnic.c
+++ b/drivers/net/ethernet/dec/tulip/pnic.c
@@ -84,10 +84,10 @@ void pnic_lnk_change(struct net_device *dev, int csr5)
}
}
-void pnic_timer(unsigned long data)
+void pnic_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct tulip_private *tp = netdev_priv(dev);
+ struct tulip_private *tp = from_timer(tp, t, timer);
+ struct net_device *dev = tp->dev;
void __iomem *ioaddr = tp->base_addr;
int next_tick = 60*HZ;
diff --git a/drivers/net/ethernet/dec/tulip/pnic2.c b/drivers/net/ethernet/dec/tulip/pnic2.c
index 5895fc43f6e0..412adaa7fdf8 100644
--- a/drivers/net/ethernet/dec/tulip/pnic2.c
+++ b/drivers/net/ethernet/dec/tulip/pnic2.c
@@ -76,10 +76,10 @@
#include <linux/delay.h>
-void pnic2_timer(unsigned long data)
+void pnic2_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct tulip_private *tp = netdev_priv(dev);
+ struct tulip_private *tp = from_timer(tp, t, timer);
+ struct net_device *dev = tp->dev;
void __iomem *ioaddr = tp->base_addr;
int next_tick = 60*HZ;
diff --git a/drivers/net/ethernet/dec/tulip/timer.c b/drivers/net/ethernet/dec/tulip/timer.c
index 523d9dde50a2..642e9dfc5451 100644
--- a/drivers/net/ethernet/dec/tulip/timer.c
+++ b/drivers/net/ethernet/dec/tulip/timer.c
@@ -137,10 +137,10 @@ void tulip_media_task(struct work_struct *work)
}
-void mxic_timer(unsigned long data)
+void mxic_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct tulip_private *tp = netdev_priv(dev);
+ struct tulip_private *tp = from_timer(tp, t, timer);
+ struct net_device *dev = tp->dev;
void __iomem *ioaddr = tp->base_addr;
int next_tick = 60*HZ;
@@ -154,10 +154,10 @@ void mxic_timer(unsigned long data)
}
-void comet_timer(unsigned long data)
+void comet_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct tulip_private *tp = netdev_priv(dev);
+ struct tulip_private *tp = from_timer(tp, t, timer);
+ struct net_device *dev = tp->dev;
int next_tick = 2*HZ;
if (tulip_debug > 1)
diff --git a/drivers/net/ethernet/dec/tulip/tulip.h b/drivers/net/ethernet/dec/tulip/tulip.h
index 06660dbc44b7..b458140aeaef 100644
--- a/drivers/net/ethernet/dec/tulip/tulip.h
+++ b/drivers/net/ethernet/dec/tulip/tulip.h
@@ -43,7 +43,7 @@ struct tulip_chip_table {
int io_size;
int valid_intrs; /* CSR7 interrupt enable settings */
int flags;
- void (*media_timer) (unsigned long);
+ void (*media_timer) (struct timer_list *);
work_func_t media_task;
};
@@ -476,7 +476,7 @@ void t21142_lnk_change(struct net_device *dev, int csr5);
/* PNIC2.c */
void pnic2_lnk_change(struct net_device *dev, int csr5);
-void pnic2_timer(unsigned long data);
+void pnic2_timer(struct timer_list *t);
void pnic2_start_nway(struct net_device *dev);
void pnic2_lnk_change(struct net_device *dev, int csr5);
@@ -504,19 +504,19 @@ void tulip_find_mii (struct net_device *dev, int board_idx);
/* pnic.c */
void pnic_do_nway(struct net_device *dev);
void pnic_lnk_change(struct net_device *dev, int csr5);
-void pnic_timer(unsigned long data);
+void pnic_timer(struct timer_list *t);
/* timer.c */
void tulip_media_task(struct work_struct *work);
-void mxic_timer(unsigned long data);
-void comet_timer(unsigned long data);
+void mxic_timer(struct timer_list *t);
+void comet_timer(struct timer_list *t);
/* tulip_core.c */
extern int tulip_debug;
extern const char * const medianame[];
extern const char tulip_media_cap[];
extern const struct tulip_chip_table tulip_tbl[];
-void oom_timer(unsigned long data);
+void oom_timer(struct timer_list *t);
extern u8 t21040_csr13[];
static inline void tulip_start_rxtx(struct tulip_private *tp)
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index 851b6d1f5a42..00d02a0967d0 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -123,10 +123,10 @@ int tulip_debug = TULIP_DEBUG;
int tulip_debug = 1;
#endif
-static void tulip_timer(unsigned long data)
+static void tulip_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct tulip_private *tp = netdev_priv(dev);
+ struct tulip_private *tp = from_timer(tp, t, timer);
+ struct net_device *dev = tp->dev;
if (netif_running(dev))
schedule_work(&tp->media_work);
@@ -505,7 +505,7 @@ media_picked:
tp->timer.expires = RUN_AT(next_tick);
add_timer(&tp->timer);
#ifdef CONFIG_TULIP_NAPI
- setup_timer(&tp->oom_timer, oom_timer, (unsigned long)dev);
+ timer_setup(&tp->oom_timer, oom_timer, 0);
#endif
}
@@ -780,8 +780,7 @@ static void tulip_down (struct net_device *dev)
spin_unlock_irqrestore (&tp->lock, flags);
- setup_timer(&tp->timer, tulip_tbl[tp->chip_id].media_timer,
- (unsigned long)dev);
+ timer_setup(&tp->timer, tulip_tbl[tp->chip_id].media_timer, 0);
dev->if_port = tp->saved_if_port;
@@ -1470,8 +1469,7 @@ static int tulip_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
tp->csr0 = csr0;
spin_lock_init(&tp->lock);
spin_lock_init(&tp->mii_lock);
- setup_timer(&tp->timer, tulip_tbl[tp->chip_id].media_timer,
- (unsigned long)dev);
+ timer_setup(&tp->timer, tulip_tbl[tp->chip_id].media_timer, 0);
INIT_WORK(&tp->media_work, tulip_tbl[tp->chip_id].media_task);
diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c
index 7fc248efc4ba..488a744084c9 100644
--- a/drivers/net/ethernet/dec/tulip/uli526x.c
+++ b/drivers/net/ethernet/dec/tulip/uli526x.c
@@ -241,7 +241,7 @@ static void phy_write_1bit(struct uli526x_board_info *db, u32);
static u16 phy_read_1bit(struct uli526x_board_info *db);
static u8 uli526x_sense_speed(struct uli526x_board_info *);
static void uli526x_process_mode(struct uli526x_board_info *);
-static void uli526x_timer(unsigned long);
+static void uli526x_timer(struct timer_list *t);
static void uli526x_rx_packet(struct net_device *, struct uli526x_board_info *);
static void uli526x_free_tx_pkt(struct net_device *, struct uli526x_board_info *);
static void uli526x_reuse_skb(struct uli526x_board_info *, struct sk_buff *);
@@ -491,10 +491,8 @@ static int uli526x_open(struct net_device *dev)
netif_wake_queue(dev);
/* set and active a timer process */
- init_timer(&db->timer);
+ timer_setup(&db->timer, uli526x_timer, 0);
db->timer.expires = ULI526X_TIMER_WUT + HZ * 2;
- db->timer.data = (unsigned long)dev;
- db->timer.function = uli526x_timer;
add_timer(&db->timer);
return 0;
@@ -1023,10 +1021,10 @@ static const struct ethtool_ops netdev_ethtool_ops = {
* Dynamic media sense, allocate Rx buffer...
*/
-static void uli526x_timer(unsigned long data)
+static void uli526x_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *) data;
- struct uli526x_board_info *db = netdev_priv(dev);
+ struct uli526x_board_info *db = from_timer(db, t, timer);
+ struct net_device *dev = pci_get_drvdata(db->pdev);
struct uli_phy_ops *phy = &db->phy;
void __iomem *ioaddr = db->ioaddr;
unsigned long flags;
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index 32d7229544fa..70cb2d689c2c 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -327,7 +327,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location);
static void mdio_write(struct net_device *dev, int phy_id, int location, int value);
static int netdev_open(struct net_device *dev);
static int update_link(struct net_device *dev);
-static void netdev_timer(unsigned long data);
+static void netdev_timer(struct timer_list *t);
static void init_rxtx_rings(struct net_device *dev);
static void free_rxtx_rings(struct netdev_private *np);
static void init_registers(struct net_device *dev);
@@ -655,10 +655,8 @@ static int netdev_open(struct net_device *dev)
netdev_dbg(dev, "Done netdev_open()\n");
/* Set the timer to check for link beat. */
- init_timer(&np->timer);
+ timer_setup(&np->timer, netdev_timer, 0);
np->timer.expires = jiffies + 1*HZ;
- np->timer.data = (unsigned long)dev;
- np->timer.function = netdev_timer; /* timer handler */
add_timer(&np->timer);
return 0;
out_err:
@@ -774,10 +772,10 @@ static inline void update_csr6(struct net_device *dev, int new)
np->mii_if.full_duplex = 1;
}
-static void netdev_timer(unsigned long data)
+static void netdev_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct netdev_private *np = netdev_priv(dev);
+ struct netdev_private *np = from_timer(np, t, timer);
+ struct net_device *dev = pci_get_drvdata(np->pci_dev);
void __iomem *ioaddr = np->base_addr;
if (debug > 2)
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index 778f974e2928..a2f6758d38dd 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -68,7 +68,7 @@ static const int max_intrloop = 50;
static const int multicast_filter_limit = 0x40;
static int rio_open (struct net_device *dev);
-static void rio_timer (unsigned long data);
+static void rio_timer (struct timer_list *t);
static void rio_tx_timeout (struct net_device *dev);
static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev);
static irqreturn_t rio_interrupt (int irq, void *dev_instance);
@@ -644,7 +644,7 @@ static int rio_open(struct net_device *dev)
return i;
}
- setup_timer(&np->timer, rio_timer, (unsigned long)dev);
+ timer_setup(&np->timer, rio_timer, 0);
np->timer.expires = jiffies + 1 * HZ;
add_timer(&np->timer);
@@ -655,10 +655,10 @@ static int rio_open(struct net_device *dev)
}
static void
-rio_timer (unsigned long data)
+rio_timer (struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)data;
- struct netdev_private *np = netdev_priv(dev);
+ struct netdev_private *np = from_timer(np, t, timer);
+ struct net_device *dev = pci_get_drvdata(np->pdev);
unsigned int entry;
int next_tick = 1*HZ;
unsigned long flags;
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index 2704bcf023be..6ca9e981ad57 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -913,10 +913,8 @@ static int netdev_open(struct net_device *dev)
ioread16(ioaddr + MACCtrl1), ioread16(ioaddr + MACCtrl0));
/* Set the timer to check for link beat. */
- init_timer(&np->timer);
+ setup_timer(&np->timer, netdev_timer, (unsigned long)dev);
np->timer.expires = jiffies + 3*HZ;
- np->timer.data = (unsigned long)dev;
- np->timer.function = netdev_timer; /* timer handler */
add_timer(&np->timer);
/* Enable interrupts by setting the interrupt mask. */
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 9ed8e4b81530..78db8e62a83f 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -21,6 +21,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/clk.h>
#include <linux/dma-mapping.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
@@ -59,6 +60,9 @@
/* Min number of tx ring entries before stopping queue */
#define TX_THRESHOLD (MAX_SKB_FRAGS + 1)
+#define FTGMAC_100MHZ 100000000
+#define FTGMAC_25MHZ 25000000
+
struct ftgmac100 {
/* Registers */
struct resource *res;
@@ -96,6 +100,7 @@ struct ftgmac100 {
struct napi_struct napi;
struct work_struct reset_task;
struct mii_bus *mii_bus;
+ struct clk *clk;
/* Link management */
int cur_speed;
@@ -1734,6 +1739,22 @@ static void ftgmac100_ncsi_handler(struct ncsi_dev *nd)
nd->link_up ? "up" : "down");
}
+static void ftgmac100_setup_clk(struct ftgmac100 *priv)
+{
+ priv->clk = devm_clk_get(priv->dev, NULL);
+ if (IS_ERR(priv->clk))
+ return;
+
+ clk_prepare_enable(priv->clk);
+
+ /* Aspeed specifies a 100MHz clock is required for up to
+ * 1000Mbit link speeds. As NCSI is limited to 100Mbit, 25MHz
+ * is sufficient
+ */
+ clk_set_rate(priv->clk, priv->use_ncsi ? FTGMAC_25MHZ :
+ FTGMAC_100MHZ);
+}
+
static int ftgmac100_probe(struct platform_device *pdev)
{
struct resource *res;
@@ -1830,6 +1851,9 @@ static int ftgmac100_probe(struct platform_device *pdev)
goto err_setup_mdio;
}
+ if (priv->is_aspeed)
+ ftgmac100_setup_clk(priv);
+
/* Default ring sizes */
priv->rx_q_entries = priv->new_rx_q_entries = DEF_RX_QUEUE_ENTRIES;
priv->tx_q_entries = priv->new_tx_q_entries = DEF_TX_QUEUE_ENTRIES;
@@ -1883,6 +1907,8 @@ static int ftgmac100_remove(struct platform_device *pdev)
unregister_netdev(netdev);
+ clk_disable_unprepare(priv->clk);
+
/* There's a small chance the reset task will have been re-queued,
* during stop, make sure it's gone before we free the structure.
*/
diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index e92859dab7ae..c8982313d850 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -909,17 +909,13 @@ static int netdev_open(struct net_device *dev)
printk(KERN_DEBUG "%s: Done netdev_open().\n", dev->name);
/* Set the timer to check for link beat. */
- init_timer(&np->timer);
+ setup_timer(&np->timer, netdev_timer, (unsigned long)dev);
np->timer.expires = RUN_AT(3 * HZ);
- np->timer.data = (unsigned long) dev;
- np->timer.function = netdev_timer;
/* timer handler */
add_timer(&np->timer);
- init_timer(&np->reset_timer);
- np->reset_timer.data = (unsigned long) dev;
- np->reset_timer.function = reset_timer;
+ setup_timer(&np->reset_timer, reset_timer, (unsigned long)dev);
np->reset_timer_armed = 0;
return rc;
}
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 42258060f142..a8d0be824149 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -385,34 +385,19 @@ out:
static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev)
{
- struct platform_device *of_dev;
struct dpaa_eth_data *eth_data;
- struct device *dpaa_dev, *dev;
- struct device_node *mac_node;
+ struct device *dpaa_dev;
struct mac_device *mac_dev;
dpaa_dev = &pdev->dev;
eth_data = dpaa_dev->platform_data;
- if (!eth_data)
+ if (!eth_data) {
+ dev_err(dpaa_dev, "eth_data missing\n");
return ERR_PTR(-ENODEV);
-
- mac_node = eth_data->mac_node;
-
- of_dev = of_find_device_by_node(mac_node);
- if (!of_dev) {
- dev_err(dpaa_dev, "of_find_device_by_node(%pOF) failed\n",
- mac_node);
- of_node_put(mac_node);
- return ERR_PTR(-EINVAL);
}
- of_node_put(mac_node);
-
- dev = &of_dev->dev;
-
- mac_dev = dev_get_drvdata(dev);
+ mac_dev = eth_data->mac_dev;
if (!mac_dev) {
- dev_err(dpaa_dev, "dev_get_drvdata(%s) failed\n",
- dev_name(dev));
+ dev_err(dpaa_dev, "mac_dev missing\n");
return ERR_PTR(-EINVAL);
}
@@ -2435,6 +2420,44 @@ static void dpaa_eth_napi_disable(struct dpaa_priv *priv)
}
}
+static void dpaa_adjust_link(struct net_device *net_dev)
+{
+ struct mac_device *mac_dev;
+ struct dpaa_priv *priv;
+
+ priv = netdev_priv(net_dev);
+ mac_dev = priv->mac_dev;
+ mac_dev->adjust_link(mac_dev);
+}
+
+static int dpaa_phy_init(struct net_device *net_dev)
+{
+ struct mac_device *mac_dev;
+ struct phy_device *phy_dev;
+ struct dpaa_priv *priv;
+
+ priv = netdev_priv(net_dev);
+ mac_dev = priv->mac_dev;
+
+ phy_dev = of_phy_connect(net_dev, mac_dev->phy_node,
+ &dpaa_adjust_link, 0,
+ mac_dev->phy_if);
+ if (!phy_dev) {
+ netif_err(priv, ifup, net_dev, "init_phy() failed\n");
+ return -ENODEV;
+ }
+
+ /* Remove any features not supported by the controller */
+ phy_dev->supported &= mac_dev->if_support;
+ phy_dev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause);
+ phy_dev->advertising = phy_dev->supported;
+
+ mac_dev->phy_dev = phy_dev;
+ net_dev->phydev = phy_dev;
+
+ return 0;
+}
+
static int dpaa_open(struct net_device *net_dev)
{
struct mac_device *mac_dev;
@@ -2445,12 +2468,8 @@ static int dpaa_open(struct net_device *net_dev)
mac_dev = priv->mac_dev;
dpaa_eth_napi_enable(priv);
- net_dev->phydev = mac_dev->init_phy(net_dev, priv->mac_dev);
- if (!net_dev->phydev) {
- netif_err(priv, ifup, net_dev, "init_phy() failed\n");
- err = -ENODEV;
+ if (dpaa_phy_init(net_dev))
goto phy_init_failed;
- }
for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
err = fman_port_enable(mac_dev->port[i]);
@@ -2658,7 +2677,13 @@ static int dpaa_eth_probe(struct platform_device *pdev)
int err = 0, i, channel;
struct device *dev;
- dev = &pdev->dev;
+ /* device used for DMA mapping */
+ dev = pdev->dev.parent;
+ err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
+ if (err) {
+ dev_err(dev, "dma_coerce_mask_and_coherent() failed\n");
+ return err;
+ }
/* Allocate this early, so we can store relevant information in
* the private area
@@ -2666,7 +2691,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA_ETH_TXQ_NUM);
if (!net_dev) {
dev_err(dev, "alloc_etherdev_mq() failed\n");
- goto alloc_etherdev_mq_failed;
+ return -ENOMEM;
}
/* Do this here, so we can be verbose early */
@@ -2682,7 +2707,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
if (IS_ERR(mac_dev)) {
dev_err(dev, "dpaa_mac_dev_get() failed\n");
err = PTR_ERR(mac_dev);
- goto mac_probe_failed;
+ goto free_netdev;
}
/* If fsl_fm_max_frm is set to a higher value than the all-common 1500,
@@ -2700,21 +2725,13 @@ static int dpaa_eth_probe(struct platform_device *pdev)
priv->buf_layout[RX].priv_data_size = DPAA_RX_PRIV_DATA_SIZE; /* Rx */
priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */
- /* device used for DMA mapping */
- set_dma_ops(dev, get_dma_ops(&pdev->dev));
- err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
- if (err) {
- dev_err(dev, "dma_coerce_mask_and_coherent() failed\n");
- goto dev_mask_failed;
- }
-
/* bp init */
for (i = 0; i < DPAA_BPS_NUM; i++) {
int err;
dpaa_bps[i] = dpaa_bp_alloc(dev);
if (IS_ERR(dpaa_bps[i]))
- return PTR_ERR(dpaa_bps[i]);
+ goto free_dpaa_bps;
/* the raw size of the buffers used for reception */
dpaa_bps[i]->raw_size = bpool_buffer_raw_size(i, DPAA_BPS_NUM);
/* avoid runtime computations by keeping the usable size here */
@@ -2722,11 +2739,8 @@ static int dpaa_eth_probe(struct platform_device *pdev)
dpaa_bps[i]->dev = dev;
err = dpaa_bp_alloc_pool(dpaa_bps[i]);
- if (err < 0) {
- dpaa_bps_free(priv);
- priv->dpaa_bps[i] = NULL;
- goto bp_create_failed;
- }
+ if (err < 0)
+ goto free_dpaa_bps;
priv->dpaa_bps[i] = dpaa_bps[i];
}
@@ -2737,7 +2751,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
err = dpaa_alloc_all_fqs(dev, &priv->dpaa_fq_list, &port_fqs);
if (err < 0) {
dev_err(dev, "dpaa_alloc_all_fqs() failed\n");
- goto fq_probe_failed;
+ goto free_dpaa_bps;
}
priv->mac_dev = mac_dev;
@@ -2746,7 +2760,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
if (channel < 0) {
dev_err(dev, "dpaa_get_channel() failed\n");
err = channel;
- goto get_channel_failed;
+ goto free_dpaa_bps;
}
priv->channel = (u16)channel;
@@ -2766,20 +2780,20 @@ static int dpaa_eth_probe(struct platform_device *pdev)
err = dpaa_eth_cgr_init(priv);
if (err < 0) {
dev_err(dev, "Error initializing CGR\n");
- goto tx_cgr_init_failed;
+ goto free_dpaa_bps;
}
err = dpaa_ingress_cgr_init(priv);
if (err < 0) {
dev_err(dev, "Error initializing ingress CGR\n");
- goto rx_cgr_init_failed;
+ goto delete_egress_cgr;
}
/* Add the FQs to the interface, and make them active */
list_for_each_entry_safe(dpaa_fq, tmp, &priv->dpaa_fq_list, list) {
err = dpaa_fq_init(dpaa_fq, false);
if (err < 0)
- goto fq_alloc_failed;
+ goto free_dpaa_fqs;
}
priv->tx_headroom = dpaa_get_headroom(&priv->buf_layout[TX]);
@@ -2789,7 +2803,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
err = dpaa_eth_init_ports(mac_dev, dpaa_bps, DPAA_BPS_NUM, &port_fqs,
&priv->buf_layout[0], dev);
if (err)
- goto init_ports_failed;
+ goto free_dpaa_fqs;
/* Rx traffic distribution based on keygen hashing defaults to on */
priv->keygen_in_use = true;
@@ -2798,7 +2812,7 @@ static int dpaa_eth_probe(struct platform_device *pdev)
if (!priv->percpu_priv) {
dev_err(dev, "devm_alloc_percpu() failed\n");
err = -ENOMEM;
- goto alloc_percpu_failed;
+ goto free_dpaa_fqs;
}
for_each_possible_cpu(i) {
percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
@@ -2811,11 +2825,11 @@ static int dpaa_eth_probe(struct platform_device *pdev)
/* Initialize NAPI */
err = dpaa_napi_add(net_dev);
if (err < 0)
- goto napi_add_failed;
+ goto delete_dpaa_napi;
err = dpaa_netdev_init(net_dev, &dpaa_ops, tx_timeout);
if (err < 0)
- goto netdev_init_failed;
+ goto delete_dpaa_napi;
dpaa_eth_sysfs_init(&net_dev->dev);
@@ -2824,32 +2838,21 @@ static int dpaa_eth_probe(struct platform_device *pdev)
return 0;
-netdev_init_failed:
-napi_add_failed:
+delete_dpaa_napi:
dpaa_napi_del(net_dev);
-alloc_percpu_failed:
-init_ports_failed:
+free_dpaa_fqs:
dpaa_fq_free(dev, &priv->dpaa_fq_list);
-fq_alloc_failed:
qman_delete_cgr_safe(&priv->ingress_cgr);
qman_release_cgrid(priv->ingress_cgr.cgrid);
-rx_cgr_init_failed:
+delete_egress_cgr:
qman_delete_cgr_safe(&priv->cgr_data.cgr);
qman_release_cgrid(priv->cgr_data.cgr.cgrid);
-tx_cgr_init_failed:
-get_channel_failed:
+free_dpaa_bps:
dpaa_bps_free(priv);
-bp_create_failed:
-fq_probe_failed:
-dev_mask_failed:
-mac_probe_failed:
+free_netdev:
dev_set_drvdata(dev, NULL);
free_netdev(net_dev);
-alloc_etherdev_mq_failed:
- for (i = 0; i < DPAA_BPS_NUM && dpaa_bps[i]; i++) {
- if (atomic_read(&dpaa_bps[i]->refs) == 0)
- devm_kfree(dev, dpaa_bps[i]);
- }
+
return err;
}
diff --git a/drivers/net/ethernet/freescale/fman/Makefile b/drivers/net/ethernet/freescale/fman/Makefile
index 2c38119b172c..4ae524a352a2 100644
--- a/drivers/net/ethernet/freescale/fman/Makefile
+++ b/drivers/net/ethernet/freescale/fman/Makefile
@@ -1,9 +1,9 @@
subdir-ccflags-y += -I$(srctree)/drivers/net/ethernet/freescale/fman
-obj-$(CONFIG_FSL_FMAN) += fsl_fman.o
-obj-$(CONFIG_FSL_FMAN) += fsl_fman_port.o
-obj-$(CONFIG_FSL_FMAN) += fsl_mac.o
+obj-$(CONFIG_FSL_FMAN) += fsl_dpaa_fman.o
+obj-$(CONFIG_FSL_FMAN) += fsl_dpaa_fman_port.o
+obj-$(CONFIG_FSL_FMAN) += fsl_dpaa_mac.o
-fsl_fman-objs := fman_muram.o fman.o fman_sp.o fman_keygen.o
-fsl_fman_port-objs := fman_port.o
-fsl_mac-objs:= mac.o fman_dtsec.o fman_memac.o fman_tgec.o
+fsl_dpaa_fman-objs := fman_muram.o fman.o fman_sp.o fman_keygen.o
+fsl_dpaa_fman_port-objs := fman_port.o
+fsl_dpaa_mac-objs:= mac.o fman_dtsec.o fman_memac.o fman_tgec.o
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 387eb4a88b72..1d6da1ea7bfb 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -57,9 +57,7 @@ struct mac_priv_s {
struct device *dev;
void __iomem *vaddr;
u8 cell_index;
- phy_interface_t phy_if;
struct fman *fman;
- struct device_node *phy_node;
struct device_node *internal_phy_node;
/* List of multicast addresses */
struct list_head mc_addr_list;
@@ -106,7 +104,7 @@ static void set_fman_mac_params(struct mac_device *mac_dev,
resource_size(mac_dev->res));
memcpy(&params->addr, mac_dev->addr, sizeof(mac_dev->addr));
params->max_speed = priv->max_speed;
- params->phy_if = priv->phy_if;
+ params->phy_if = mac_dev->phy_if;
params->basex_if = false;
params->mac_id = priv->cell_index;
params->fm = (void *)priv->fman;
@@ -419,15 +417,12 @@ void fman_get_pause_cfg(struct mac_device *mac_dev, bool *rx_pause,
}
EXPORT_SYMBOL(fman_get_pause_cfg);
-static void adjust_link_void(struct net_device *net_dev)
+static void adjust_link_void(struct mac_device *mac_dev)
{
}
-static void adjust_link_dtsec(struct net_device *net_dev)
+static void adjust_link_dtsec(struct mac_device *mac_dev)
{
- struct device *dev = net_dev->dev.parent;
- struct dpaa_eth_data *eth_data = dev->platform_data;
- struct mac_device *mac_dev = eth_data->mac_dev;
struct phy_device *phy_dev = mac_dev->phy_dev;
struct fman_mac *fman_mac;
bool rx_pause, tx_pause;
@@ -444,14 +439,12 @@ static void adjust_link_dtsec(struct net_device *net_dev)
fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause);
err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause);
if (err < 0)
- netdev_err(net_dev, "fman_set_mac_active_pause() = %d\n", err);
+ dev_err(mac_dev->priv->dev, "fman_set_mac_active_pause() = %d\n",
+ err);
}
-static void adjust_link_memac(struct net_device *net_dev)
+static void adjust_link_memac(struct mac_device *mac_dev)
{
- struct device *dev = net_dev->dev.parent;
- struct dpaa_eth_data *eth_data = dev->platform_data;
- struct mac_device *mac_dev = eth_data->mac_dev;
struct phy_device *phy_dev = mac_dev->phy_dev;
struct fman_mac *fman_mac;
bool rx_pause, tx_pause;
@@ -463,60 +456,12 @@ static void adjust_link_memac(struct net_device *net_dev)
fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause);
err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause);
if (err < 0)
- netdev_err(net_dev, "fman_set_mac_active_pause() = %d\n", err);
-}
-
-/* Initializes driver's PHY state, and attaches to the PHY.
- * Returns 0 on success.
- */
-static struct phy_device *init_phy(struct net_device *net_dev,
- struct mac_device *mac_dev,
- void (*adj_lnk)(struct net_device *))
-{
- struct phy_device *phy_dev;
- struct mac_priv_s *priv = mac_dev->priv;
-
- phy_dev = of_phy_connect(net_dev, priv->phy_node, adj_lnk, 0,
- priv->phy_if);
- if (!phy_dev) {
- netdev_err(net_dev, "Could not connect to PHY\n");
- return NULL;
- }
-
- /* Remove any features not supported by the controller */
- phy_dev->supported &= mac_dev->if_support;
- /* Enable the symmetric and asymmetric PAUSE frame advertisements,
- * as most of the PHY drivers do not enable them by default.
- */
- phy_dev->supported |= (SUPPORTED_Pause | SUPPORTED_Asym_Pause);
- phy_dev->advertising = phy_dev->supported;
-
- mac_dev->phy_dev = phy_dev;
-
- return phy_dev;
-}
-
-static struct phy_device *dtsec_init_phy(struct net_device *net_dev,
- struct mac_device *mac_dev)
-{
- return init_phy(net_dev, mac_dev, &adjust_link_dtsec);
-}
-
-static struct phy_device *tgec_init_phy(struct net_device *net_dev,
- struct mac_device *mac_dev)
-{
- return init_phy(net_dev, mac_dev, adjust_link_void);
-}
-
-static struct phy_device *memac_init_phy(struct net_device *net_dev,
- struct mac_device *mac_dev)
-{
- return init_phy(net_dev, mac_dev, &adjust_link_memac);
+ dev_err(mac_dev->priv->dev, "fman_set_mac_active_pause() = %d\n",
+ err);
}
static void setup_dtsec(struct mac_device *mac_dev)
{
- mac_dev->init_phy = dtsec_init_phy;
mac_dev->init = dtsec_initialization;
mac_dev->set_promisc = dtsec_set_promiscuous;
mac_dev->change_addr = dtsec_modify_mac_address;
@@ -528,14 +473,13 @@ static void setup_dtsec(struct mac_device *mac_dev)
mac_dev->set_multi = set_multi;
mac_dev->start = start;
mac_dev->stop = stop;
-
+ mac_dev->adjust_link = adjust_link_dtsec;
mac_dev->priv->enable = dtsec_enable;
mac_dev->priv->disable = dtsec_disable;
}
static void setup_tgec(struct mac_device *mac_dev)
{
- mac_dev->init_phy = tgec_init_phy;
mac_dev->init = tgec_initialization;
mac_dev->set_promisc = tgec_set_promiscuous;
mac_dev->change_addr = tgec_modify_mac_address;
@@ -547,14 +491,13 @@ static void setup_tgec(struct mac_device *mac_dev)
mac_dev->set_multi = set_multi;
mac_dev->start = start;
mac_dev->stop = stop;
-
+ mac_dev->adjust_link = adjust_link_void;
mac_dev->priv->enable = tgec_enable;
mac_dev->priv->disable = tgec_disable;
}
static void setup_memac(struct mac_device *mac_dev)
{
- mac_dev->init_phy = memac_init_phy;
mac_dev->init = memac_initialization;
mac_dev->set_promisc = memac_set_promiscuous;
mac_dev->change_addr = memac_modify_mac_address;
@@ -566,7 +509,7 @@ static void setup_memac(struct mac_device *mac_dev)
mac_dev->set_multi = set_multi;
mac_dev->start = start;
mac_dev->stop = stop;
-
+ mac_dev->adjust_link = adjust_link_memac;
mac_dev->priv->enable = memac_enable;
mac_dev->priv->disable = memac_disable;
}
@@ -599,8 +542,7 @@ static const u16 phy2speed[] = {
};
static struct platform_device *dpaa_eth_add_device(int fman_id,
- struct mac_device *mac_dev,
- struct device_node *node)
+ struct mac_device *mac_dev)
{
struct platform_device *pdev;
struct dpaa_eth_data data;
@@ -613,17 +555,14 @@ static struct platform_device *dpaa_eth_add_device(int fman_id,
data.mac_dev = mac_dev;
data.mac_hw_id = priv->cell_index;
data.fman_hw_id = fman_id;
- data.mac_node = node;
mutex_lock(&eth_lock);
-
pdev = platform_device_alloc("dpaa-ethernet", dpaa_eth_dev_cnt);
if (!pdev) {
ret = -ENOMEM;
goto no_mem;
}
- pdev->dev.of_node = node;
pdev->dev.parent = priv->dev;
set_dma_ops(&pdev->dev, get_dma_ops(priv->dev));
@@ -706,9 +645,6 @@ static int mac_probe(struct platform_device *_of_dev)
goto _return;
}
- /* Register mac_dev */
- dev_set_drvdata(dev, mac_dev);
-
INIT_LIST_HEAD(&priv->mc_addr_list);
/* Get the FM node */
@@ -717,7 +653,7 @@ static int mac_probe(struct platform_device *_of_dev)
dev_err(dev, "of_get_parent(%pOF) failed\n",
mac_node);
err = -EINVAL;
- goto _return_dev_set_drvdata;
+ goto _return_of_get_parent;
}
of_dev = of_find_device_by_node(dev_node);
@@ -751,7 +687,7 @@ static int mac_probe(struct platform_device *_of_dev)
if (err < 0) {
dev_err(dev, "of_address_to_resource(%pOF) = %d\n",
mac_node, err);
- goto _return_dev_set_drvdata;
+ goto _return_of_get_parent;
}
mac_dev->res = __devm_request_region(dev,
@@ -761,7 +697,7 @@ static int mac_probe(struct platform_device *_of_dev)
if (!mac_dev->res) {
dev_err(dev, "__devm_request_mem_region(mac) failed\n");
err = -EBUSY;
- goto _return_dev_set_drvdata;
+ goto _return_of_get_parent;
}
priv->vaddr = devm_ioremap(dev, mac_dev->res->start,
@@ -769,7 +705,7 @@ static int mac_probe(struct platform_device *_of_dev)
if (!priv->vaddr) {
dev_err(dev, "devm_ioremap() failed\n");
err = -EIO;
- goto _return_dev_set_drvdata;
+ goto _return_of_get_parent;
}
if (!of_device_is_available(mac_node)) {
@@ -786,7 +722,7 @@ static int mac_probe(struct platform_device *_of_dev)
if (err) {
dev_err(dev, "failed to read cell-index for %pOF\n", mac_node);
err = -EINVAL;
- goto _return_dev_set_drvdata;
+ goto _return_of_get_parent;
}
priv->cell_index = (u8)val;
@@ -795,7 +731,7 @@ static int mac_probe(struct platform_device *_of_dev)
if (!mac_addr) {
dev_err(dev, "of_get_mac_address(%pOF) failed\n", mac_node);
err = -EINVAL;
- goto _return_dev_set_drvdata;
+ goto _return_of_get_parent;
}
memcpy(mac_dev->addr, mac_addr, sizeof(mac_dev->addr));
@@ -805,14 +741,14 @@ static int mac_probe(struct platform_device *_of_dev)
dev_err(dev, "of_count_phandle_with_args(%pOF, fsl,fman-ports) failed\n",
mac_node);
err = nph;
- goto _return_dev_set_drvdata;
+ goto _return_of_get_parent;
}
if (nph != ARRAY_SIZE(mac_dev->port)) {
dev_err(dev, "Not supported number of fman-ports handles of mac node %pOF from device tree\n",
mac_node);
err = -EINVAL;
- goto _return_dev_set_drvdata;
+ goto _return_of_get_parent;
}
for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
@@ -851,13 +787,13 @@ static int mac_probe(struct platform_device *_of_dev)
mac_node);
phy_if = PHY_INTERFACE_MODE_SGMII;
}
- priv->phy_if = phy_if;
+ mac_dev->phy_if = phy_if;
- priv->speed = phy2speed[priv->phy_if];
+ priv->speed = phy2speed[mac_dev->phy_if];
priv->max_speed = priv->speed;
mac_dev->if_support = DTSEC_SUPPORTED;
/* We don't support half-duplex in SGMII mode */
- if (priv->phy_if == PHY_INTERFACE_MODE_SGMII)
+ if (mac_dev->phy_if == PHY_INTERFACE_MODE_SGMII)
mac_dev->if_support &= ~(SUPPORTED_10baseT_Half |
SUPPORTED_100baseT_Half);
@@ -866,30 +802,30 @@ static int mac_probe(struct platform_device *_of_dev)
mac_dev->if_support |= SUPPORTED_1000baseT_Full;
/* The 10G interface only supports one mode */
- if (priv->phy_if == PHY_INTERFACE_MODE_XGMII)
+ if (mac_dev->phy_if == PHY_INTERFACE_MODE_XGMII)
mac_dev->if_support = SUPPORTED_10000baseT_Full;
/* Get the rest of the PHY information */
- priv->phy_node = of_parse_phandle(mac_node, "phy-handle", 0);
- if (!priv->phy_node && of_phy_is_fixed_link(mac_node)) {
+ mac_dev->phy_node = of_parse_phandle(mac_node, "phy-handle", 0);
+ if (!mac_dev->phy_node && of_phy_is_fixed_link(mac_node)) {
struct phy_device *phy;
err = of_phy_register_fixed_link(mac_node);
if (err)
- goto _return_dev_set_drvdata;
+ goto _return_of_get_parent;
priv->fixed_link = kzalloc(sizeof(*priv->fixed_link),
GFP_KERNEL);
if (!priv->fixed_link) {
err = -ENOMEM;
- goto _return_dev_set_drvdata;
+ goto _return_of_get_parent;
}
- priv->phy_node = of_node_get(mac_node);
- phy = of_phy_find_device(priv->phy_node);
+ mac_dev->phy_node = of_node_get(mac_node);
+ phy = of_phy_find_device(mac_dev->phy_node);
if (!phy) {
err = -EINVAL;
- goto _return_dev_set_drvdata;
+ goto _return_of_get_parent;
}
priv->fixed_link->link = phy->link;
@@ -904,8 +840,8 @@ static int mac_probe(struct platform_device *_of_dev)
err = mac_dev->init(mac_dev);
if (err < 0) {
dev_err(dev, "mac_dev->init() = %d\n", err);
- of_node_put(priv->phy_node);
- goto _return_dev_set_drvdata;
+ of_node_put(mac_dev->phy_node);
+ goto _return_of_get_parent;
}
/* pause frame autonegotiation enabled */
@@ -926,7 +862,7 @@ static int mac_probe(struct platform_device *_of_dev)
mac_dev->addr[0], mac_dev->addr[1], mac_dev->addr[2],
mac_dev->addr[3], mac_dev->addr[4], mac_dev->addr[5]);
- priv->eth_dev = dpaa_eth_add_device(fman_id, mac_dev, mac_node);
+ priv->eth_dev = dpaa_eth_add_device(fman_id, mac_dev);
if (IS_ERR(priv->eth_dev)) {
dev_err(dev, "failed to add Ethernet platform device for MAC %d\n",
priv->cell_index);
@@ -937,9 +873,8 @@ static int mac_probe(struct platform_device *_of_dev)
_return_of_node_put:
of_node_put(dev_node);
-_return_dev_set_drvdata:
+_return_of_get_parent:
kfree(priv->fixed_link);
- dev_set_drvdata(dev, NULL);
_return:
return err;
}
diff --git a/drivers/net/ethernet/freescale/fman/mac.h b/drivers/net/ethernet/freescale/fman/mac.h
index d7313f0c5135..eefb3357e304 100644
--- a/drivers/net/ethernet/freescale/fman/mac.h
+++ b/drivers/net/ethernet/freescale/fman/mac.h
@@ -50,6 +50,8 @@ struct mac_device {
struct fman_port *port[2];
u32 if_support;
struct phy_device *phy_dev;
+ phy_interface_t phy_if;
+ struct device_node *phy_node;
bool autoneg_pause;
bool rx_pause_req;
@@ -58,11 +60,10 @@ struct mac_device {
bool tx_pause_active;
bool promisc;
- struct phy_device *(*init_phy)(struct net_device *net_dev,
- struct mac_device *mac_dev);
int (*init)(struct mac_device *mac_dev);
int (*start)(struct mac_device *mac_dev);
int (*stop)(struct mac_device *mac_dev);
+ void (*adjust_link)(struct mac_device *mac_dev);
int (*set_promisc)(struct fman_mac *mac_dev, bool enable);
int (*change_addr)(struct fman_mac *mac_dev, enet_addr_t *enet_addr);
int (*set_multi)(struct net_device *net_dev,
@@ -82,7 +83,6 @@ struct mac_device {
};
struct dpaa_eth_data {
- struct device_node *mac_node;
struct mac_device *mac_dev;
int mac_hw_id;
int fman_hw_id;
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 753259091b22..7892f2f0c6b5 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -1023,8 +1023,6 @@ static int fs_enet_probe(struct platform_device *ofdev)
ndev->ethtool_ops = &fs_ethtool_ops;
- init_timer(&fep->phy_timer_list);
-
netif_carrier_off(ndev);
ndev->features |= NETIF_F_SG;
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
index 5ce516c8a62a..dd306deb7cf1 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
@@ -137,7 +137,6 @@ struct fs_enet_private {
cbd_t __iomem *cur_rx;
cbd_t __iomem *cur_tx;
int tx_free;
- struct timer_list phy_timer_list;
const struct phy_info *phy;
u32 msg_enable;
struct mii_if_info mii_if;
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index f77ba9fa257b..a96b838cffce 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3857,8 +3857,9 @@ static int ucc_geth_probe(struct platform_device* ofdev)
}
if (netif_msg_probe(&debug))
- pr_info("UCC%1d at 0x%8x (irq = %d)\n",
- ug_info->uf_info.ucc_num + 1, ug_info->uf_info.regs,
+ pr_info("UCC%1d at 0x%8llx (irq = %d)\n",
+ ug_info->uf_info.ucc_num + 1,
+ (u64)ug_info->uf_info.regs,
ug_info->uf_info.irq);
/* Create an ethernet device instance */
diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig
index 91c7bdb9b43c..30000b6aa7b8 100644
--- a/drivers/net/ethernet/hisilicon/Kconfig
+++ b/drivers/net/ethernet/hisilicon/Kconfig
@@ -78,7 +78,7 @@ config HNS_ENET
config HNS3
tristate "Hisilicon Network Subsystem Support HNS3 (Framework)"
- depends on PCI
+ depends on PCI
---help---
This selects the framework support for Hisilicon Network Subsystem 3.
This layer facilitates clients like ENET, RoCE and user-space ethernet
@@ -87,7 +87,7 @@ config HNS3
config HNS3_HCLGE
tristate "Hisilicon HNS3 HCLGE Acceleration Engine & Compatibility Layer Support"
- depends on PCI_MSI
+ depends on PCI_MSI
depends on HNS3
---help---
This selects the HNS3_HCLGE network acceleration engine & its hardware
@@ -96,11 +96,20 @@ config HNS3_HCLGE
config HNS3_ENET
tristate "Hisilicon HNS3 Ethernet Device Support"
- depends on 64BIT && PCI
+ depends on 64BIT && PCI
depends on HNS3 && HNS3_HCLGE
---help---
This selects the Ethernet Driver for Hisilicon Network Subsystem 3 for hip08
family of SoCs. This module depends upon HNAE3 driver to access the HNAE3
devices and their associated operations.
+config HNS3_DCB
+ bool "Hisilicon HNS3 Data Center Bridge Support"
+ default n
+ depends on HNS3 && HNS3_HCLGE && DCB
+ ---help---
+ Say Y here if you want to use Data Center Bridging (DCB) in the HNS3 driver.
+
+ If unsure, say N.
+
endif # NET_VENDOR_HISILICON
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 36520634c96a..91565c8fee08 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -2159,9 +2159,9 @@ static void hns_nic_task_schedule(struct hns_nic_priv *priv)
(void)schedule_work(&priv->service_task);
}
-static void hns_nic_service_timer(unsigned long data)
+static void hns_nic_service_timer(struct timer_list *t)
{
- struct hns_nic_priv *priv = (struct hns_nic_priv *)data;
+ struct hns_nic_priv *priv = from_timer(priv, t, service_timer);
(void)mod_timer(&priv->service_timer, jiffies + SERVICE_TIMER_HZ);
@@ -2451,8 +2451,7 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(ndev);
- setup_timer(&priv->service_timer, hns_nic_service_timer,
- (unsigned long)priv);
+ timer_setup(&priv->service_timer, hns_nic_service_timer, 0);
INIT_WORK(&priv->service_task, hns_nic_service_task);
set_bit(NIC_STATE_SERVICE_INITED, &priv->state);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 1a01cadfe5f3..3acd8db0a794 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -28,6 +28,7 @@
*/
#include <linux/acpi.h>
+#include <linux/dcbnl.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/module.h>
@@ -131,6 +132,7 @@ struct hnae3_client_ops {
int (*init_instance)(struct hnae3_handle *handle);
void (*uninit_instance)(struct hnae3_handle *handle, bool reset);
void (*link_status_change)(struct hnae3_handle *handle, bool state);
+ int (*setup_tc)(struct hnae3_handle *handle, u8 tc);
};
#define HNAE3_CLIENT_NAME_LENGTH 16
@@ -337,6 +339,10 @@ struct hnae3_ae_ops {
u8 *hfunc);
int (*set_rss)(struct hnae3_handle *handle, const u32 *indir,
const u8 *key, const u8 hfunc);
+ int (*set_rss_tuple)(struct hnae3_handle *handle,
+ struct ethtool_rxnfc *cmd);
+ int (*get_rss_tuple)(struct hnae3_handle *handle,
+ struct ethtool_rxnfc *cmd);
int (*get_tc_size)(struct hnae3_handle *handle);
@@ -363,6 +369,21 @@ struct hnae3_ae_ops {
u16 vlan, u8 qos, __be16 proto);
};
+struct hnae3_dcb_ops {
+ /* IEEE 802.1Qaz std */
+ int (*ieee_getets)(struct hnae3_handle *, struct ieee_ets *);
+ int (*ieee_setets)(struct hnae3_handle *, struct ieee_ets *);
+ int (*ieee_getpfc)(struct hnae3_handle *, struct ieee_pfc *);
+ int (*ieee_setpfc)(struct hnae3_handle *, struct ieee_pfc *);
+
+ /* DCBX configuration */
+ u8 (*getdcbx)(struct hnae3_handle *);
+ u8 (*setdcbx)(struct hnae3_handle *, u8);
+
+ int (*map_update)(struct hnae3_handle *);
+ int (*setup_tc)(struct hnae3_handle *, u8, u8 *);
+};
+
struct hnae3_ae_algo {
const struct hnae3_ae_ops *ops;
struct list_head node;
@@ -394,6 +415,7 @@ struct hnae3_knic_private_info {
u16 num_tqps; /* total number of TQPs in this handle */
struct hnae3_queue **tqp; /* array base of all TQPs in this instance */
+ const struct hnae3_dcb_ops *dcb_ops;
};
struct hnae3_roce_private_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
index 162e8a42acd0..d2b20d01a58c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -7,5 +7,9 @@ ccflags-y := -Idrivers/net/ethernet/hisilicon/hns3
obj-$(CONFIG_HNS3_HCLGE) += hclge.o
hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o
+hclge-$(CONFIG_HNS3_DCB) += hclge_dcb.o
+
obj-$(CONFIG_HNS3_ENET) += hns3.o
hns3-objs = hns3_enet.o hns3_ethtool.o
+
+hns3-$(CONFIG_HNS3_DCB) += hns3_dcbnl.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 8b511e6e0ce9..60960e588b5f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -85,6 +85,15 @@ static int hclge_init_cmd_queue(struct hclge_dev *hdev, int ring_type)
return 0;
}
+void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read)
+{
+ desc->flag = cpu_to_le16(HCLGE_CMD_FLAG_NO_INTR | HCLGE_CMD_FLAG_IN);
+ if (is_read)
+ desc->flag |= cpu_to_le16(HCLGE_CMD_FLAG_WR);
+ else
+ desc->flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
+}
+
void hclge_cmd_setup_basic_desc(struct hclge_desc *desc,
enum hclge_opcode_type opcode, bool is_read)
{
@@ -208,7 +217,7 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
* which will be use for hardware to write back
*/
ntc = hw->cmq.csq.next_to_use;
- opcode = desc[0].opcode;
+ opcode = le16_to_cpu(desc[0].opcode);
while (handle < num) {
desc_to_use = &hw->cmq.csq.desc[hw->cmq.csq.next_to_use];
*desc_to_use = desc[handle];
@@ -225,7 +234,7 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
* If the command is sync, wait for the firmware to write back,
* if multi descriptors to be sent, use the first one to check
*/
- if (HCLGE_SEND_SYNC(desc->flag)) {
+ if (HCLGE_SEND_SYNC(le16_to_cpu(desc->flag))) {
do {
if (hclge_cmd_csq_done(hw))
break;
@@ -244,9 +253,9 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
pr_debug("Get cmd desc:\n");
if (likely(!hclge_is_special_opcode(opcode)))
- desc_ret = desc[handle].retval;
+ desc_ret = le16_to_cpu(desc[handle].retval);
else
- desc_ret = desc[0].retval;
+ desc_ret = le16_to_cpu(desc[0].retval);
if ((enum hclge_cmd_return_status)desc_ret ==
HCLGE_CMD_EXEC_SUCCESS)
@@ -276,15 +285,15 @@ int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num)
return retval;
}
-enum hclge_cmd_status hclge_cmd_query_firmware_version(struct hclge_hw *hw,
- u32 *version)
+static enum hclge_cmd_status hclge_cmd_query_firmware_version(
+ struct hclge_hw *hw, u32 *version)
{
- struct hclge_query_version *resp;
+ struct hclge_query_version_cmd *resp;
struct hclge_desc desc;
int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FW_VER, 1);
- resp = (struct hclge_query_version *)desc.data;
+ resp = (struct hclge_query_version_cmd *)desc.data;
ret = hclge_cmd_send(hw, &desc, 1);
if (!ret)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 758cf3948131..b4373345c2b4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -221,12 +221,12 @@ enum hclge_opcode_type {
#define HCLGE_RCB_INIT_QUERY_TIMEOUT 10
#define HCLGE_RCB_INIT_FLAG_EN_B 0
#define HCLGE_RCB_INIT_FLAG_FINI_B 8
-struct hclge_config_rcb_init {
+struct hclge_config_rcb_init_cmd {
__le16 rcb_init_flag;
u8 rsv[22];
};
-struct hclge_tqp_map {
+struct hclge_tqp_map_cmd {
__le16 tqp_id; /* Absolute tqp id for in this pf */
u8 tqp_vf; /* VF id */
#define HCLGE_TQP_MAP_TYPE_PF 0
@@ -246,15 +246,15 @@ enum hclge_int_type {
HCLGE_INT_EVENT,
};
-struct hclge_ctrl_vector_chain {
+struct hclge_ctrl_vector_chain_cmd {
u8 int_vector_id;
u8 int_cause_num;
#define HCLGE_INT_TYPE_S 0
-#define HCLGE_INT_TYPE_M 0x3
+#define HCLGE_INT_TYPE_M GENMASK(1, 0)
#define HCLGE_TQP_ID_S 2
-#define HCLGE_TQP_ID_M (0x7ff << HCLGE_TQP_ID_S)
+#define HCLGE_TQP_ID_M GENMASK(12, 2)
#define HCLGE_INT_GL_IDX_S 13
-#define HCLGE_INT_GL_IDX_M (0x3 << HCLGE_INT_GL_IDX_S)
+#define HCLGE_INT_GL_IDX_M GENMASK(14, 13)
__le16 tqp_type_and_id[HCLGE_VECTOR_ELEMENTS_PER_CMD];
u8 vfid;
u8 rsv;
@@ -263,18 +263,18 @@ struct hclge_ctrl_vector_chain {
#define HCLGE_TC_NUM 8
#define HCLGE_TC0_PRI_BUF_EN_B 15 /* Bit 15 indicate enable or not */
#define HCLGE_BUF_UNIT_S 7 /* Buf size is united by 128 bytes */
-struct hclge_tx_buff_alloc {
+struct hclge_tx_buff_alloc_cmd {
__le16 tx_pkt_buff[HCLGE_TC_NUM];
u8 tx_buff_rsv[8];
};
-struct hclge_rx_priv_buff {
+struct hclge_rx_priv_buff_cmd {
__le16 buf_num[HCLGE_TC_NUM];
__le16 shared_buf;
u8 rsv[6];
};
-struct hclge_query_version {
+struct hclge_query_version_cmd {
__le32 firmware;
__le32 firmware_rsv[5];
};
@@ -311,6 +311,7 @@ struct hclge_tc_thrd {
struct hclge_priv_buf {
struct hclge_waterline wl; /* Waterline for low and high*/
u32 buf_size; /* TC private buffer size */
+ u32 tx_buf_size;
u32 enable; /* Enable TC private buffer or not */
};
@@ -321,15 +322,20 @@ struct hclge_shared_buf {
u32 buf_size;
};
+struct hclge_pkt_buf_alloc {
+ struct hclge_priv_buf priv_buf[HCLGE_MAX_TC_NUM];
+ struct hclge_shared_buf s_buf;
+};
+
#define HCLGE_RX_COM_WL_EN_B 15
-struct hclge_rx_com_wl_buf {
+struct hclge_rx_com_wl_buf_cmd {
__le16 high_wl;
__le16 low_wl;
u8 rsv[20];
};
#define HCLGE_RX_PKT_EN_B 15
-struct hclge_rx_pkt_buf {
+struct hclge_rx_pkt_buf_cmd {
__le16 high_pkt;
__le16 low_pkt;
u8 rsv[20];
@@ -342,7 +348,7 @@ struct hclge_rx_pkt_buf {
#define HCLGE_PF_MAC_NUM_MASK 0x3
#define HCLGE_PF_STATE_MAIN BIT(HCLGE_PF_STATE_MAIN_B)
#define HCLGE_PF_STATE_DONE BIT(HCLGE_PF_STATE_DONE_B)
-struct hclge_func_status {
+struct hclge_func_status_cmd {
__le32 vf_rst_state[4];
u8 pf_state;
u8 mac_id;
@@ -353,7 +359,7 @@ struct hclge_func_status {
u8 rsv[2];
};
-struct hclge_pf_res {
+struct hclge_pf_res_cmd {
__le16 tqp_num;
__le16 buf_size;
__le16 msixcap_localid_ba_nic;
@@ -366,30 +372,30 @@ struct hclge_pf_res {
};
#define HCLGE_CFG_OFFSET_S 0
-#define HCLGE_CFG_OFFSET_M 0xfffff /* Byte (8-10.3) */
+#define HCLGE_CFG_OFFSET_M GENMASK(19, 0)
#define HCLGE_CFG_RD_LEN_S 24
-#define HCLGE_CFG_RD_LEN_M (0xf << HCLGE_CFG_RD_LEN_S)
+#define HCLGE_CFG_RD_LEN_M GENMASK(27, 24)
#define HCLGE_CFG_RD_LEN_BYTES 16
#define HCLGE_CFG_RD_LEN_UNIT 4
#define HCLGE_CFG_VMDQ_S 0
-#define HCLGE_CFG_VMDQ_M (0xff << HCLGE_CFG_VMDQ_S)
+#define HCLGE_CFG_VMDQ_M GENMASK(7, 0)
#define HCLGE_CFG_TC_NUM_S 8
-#define HCLGE_CFG_TC_NUM_M (0xff << HCLGE_CFG_TC_NUM_S)
+#define HCLGE_CFG_TC_NUM_M GENMASK(15, 8)
#define HCLGE_CFG_TQP_DESC_N_S 16
-#define HCLGE_CFG_TQP_DESC_N_M (0xffff << HCLGE_CFG_TQP_DESC_N_S)
+#define HCLGE_CFG_TQP_DESC_N_M GENMASK(31, 16)
#define HCLGE_CFG_PHY_ADDR_S 0
-#define HCLGE_CFG_PHY_ADDR_M (0x1f << HCLGE_CFG_PHY_ADDR_S)
+#define HCLGE_CFG_PHY_ADDR_M GENMASK(4, 0)
#define HCLGE_CFG_MEDIA_TP_S 8
-#define HCLGE_CFG_MEDIA_TP_M (0xff << HCLGE_CFG_MEDIA_TP_S)
+#define HCLGE_CFG_MEDIA_TP_M GENMASK(15, 8)
#define HCLGE_CFG_RX_BUF_LEN_S 16
-#define HCLGE_CFG_RX_BUF_LEN_M (0xffff << HCLGE_CFG_RX_BUF_LEN_S)
+#define HCLGE_CFG_RX_BUF_LEN_M GENMASK(31, 16)
#define HCLGE_CFG_MAC_ADDR_H_S 0
-#define HCLGE_CFG_MAC_ADDR_H_M (0xffff << HCLGE_CFG_MAC_ADDR_H_S)
+#define HCLGE_CFG_MAC_ADDR_H_M GENMASK(15, 0)
#define HCLGE_CFG_DEFAULT_SPEED_S 16
-#define HCLGE_CFG_DEFAULT_SPEED_M (0xff << HCLGE_CFG_DEFAULT_SPEED_S)
+#define HCLGE_CFG_DEFAULT_SPEED_M GENMASK(23, 16)
-struct hclge_cfg_param {
+struct hclge_cfg_param_cmd {
__le32 offset;
__le32 rsv;
__le32 param[4];
@@ -399,7 +405,7 @@ struct hclge_cfg_param {
#define HCLGE_DESC_NUM 0x40
#define HCLGE_ALLOC_VALID_B 0
-struct hclge_vf_num {
+struct hclge_vf_num_cmd {
u8 alloc_valid;
u8 rsv[23];
};
@@ -407,13 +413,13 @@ struct hclge_vf_num {
#define HCLGE_RSS_DEFAULT_OUTPORT_B 4
#define HCLGE_RSS_HASH_KEY_OFFSET_B 4
#define HCLGE_RSS_HASH_KEY_NUM 16
-struct hclge_rss_config {
+struct hclge_rss_config_cmd {
u8 hash_config;
u8 rsv[7];
u8 hash_key[HCLGE_RSS_HASH_KEY_NUM];
};
-struct hclge_rss_input_tuple {
+struct hclge_rss_input_tuple_cmd {
u8 ipv4_tcp_en;
u8 ipv4_udp_en;
u8 ipv4_sctp_en;
@@ -427,26 +433,26 @@ struct hclge_rss_input_tuple {
#define HCLGE_RSS_CFG_TBL_SIZE 16
-struct hclge_rss_indirection_table {
- u16 start_table_index;
- u16 rss_set_bitmap;
+struct hclge_rss_indirection_table_cmd {
+ __le16 start_table_index;
+ __le16 rss_set_bitmap;
u8 rsv[4];
u8 rss_result[HCLGE_RSS_CFG_TBL_SIZE];
};
#define HCLGE_RSS_TC_OFFSET_S 0
-#define HCLGE_RSS_TC_OFFSET_M (0x3ff << HCLGE_RSS_TC_OFFSET_S)
+#define HCLGE_RSS_TC_OFFSET_M GENMASK(9, 0)
#define HCLGE_RSS_TC_SIZE_S 12
-#define HCLGE_RSS_TC_SIZE_M (0x7 << HCLGE_RSS_TC_SIZE_S)
+#define HCLGE_RSS_TC_SIZE_M GENMASK(14, 12)
#define HCLGE_RSS_TC_VALID_B 15
-struct hclge_rss_tc_mode {
- u16 rss_tc_mode[HCLGE_MAX_TC_NUM];
+struct hclge_rss_tc_mode_cmd {
+ __le16 rss_tc_mode[HCLGE_MAX_TC_NUM];
u8 rsv[8];
};
#define HCLGE_LINK_STS_B 0
#define HCLGE_LINK_STATUS BIT(HCLGE_LINK_STS_B)
-struct hclge_link_status {
+struct hclge_link_status_cmd {
u8 status;
u8 rsv[23];
};
@@ -461,7 +467,7 @@ struct hclge_promisc_param {
#define HCLGE_PROMISC_EN_UC 0x1
#define HCLGE_PROMISC_EN_MC 0x2
#define HCLGE_PROMISC_EN_BC 0x4
-struct hclge_promisc_cfg {
+struct hclge_promisc_cfg_cmd {
u8 flag;
u8 vf_id;
__le16 rsv0;
@@ -489,18 +495,18 @@ enum hclge_promisc_type {
#define HCLGE_MAC_TX_UNDER_MIN_ERR_B 21
#define HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B 22
-struct hclge_config_mac_mode {
+struct hclge_config_mac_mode_cmd {
__le32 txrx_pad_fcs_loop_en;
u8 rsv[20];
};
#define HCLGE_CFG_SPEED_S 0
-#define HCLGE_CFG_SPEED_M (0x3f << HCLGE_CFG_SPEED_S)
+#define HCLGE_CFG_SPEED_M GENMASK(5, 0)
#define HCLGE_CFG_DUPLEX_B 7
#define HCLGE_CFG_DUPLEX_M BIT(HCLGE_CFG_DUPLEX_B)
-struct hclge_config_mac_speed_dup {
+struct hclge_config_mac_speed_dup_cmd {
u8 speed_dup;
#define HCLGE_CFG_MAC_SPEED_CHANGE_EN_B 0
@@ -512,17 +518,17 @@ struct hclge_config_mac_speed_dup {
#define HCLGE_QUERY_AN_B 0
#define HCLGE_QUERY_DUPLEX_B 2
-#define HCLGE_QUERY_SPEED_M (0x1f << HCLGE_QUERY_SPEED_S)
+#define HCLGE_QUERY_SPEED_M GENMASK(4, 0)
#define HCLGE_QUERY_AN_M BIT(HCLGE_QUERY_AN_B)
#define HCLGE_QUERY_DUPLEX_M BIT(HCLGE_QUERY_DUPLEX_B)
-struct hclge_query_an_speed_dup {
+struct hclge_query_an_speed_dup_cmd {
u8 an_syn_dup_speed;
u8 pause;
u8 rsv[23];
};
-#define HCLGE_RING_ID_MASK 0x3ff
+#define HCLGE_RING_ID_MASK GENMASK(9, 0)
#define HCLGE_TQP_ENABLE_B 0
#define HCLGE_MAC_CFG_AN_EN_B 0
@@ -533,7 +539,7 @@ struct hclge_query_an_speed_dup {
#define HCLGE_MAC_CFG_AN_EN BIT(HCLGE_MAC_CFG_AN_EN_B)
-struct hclge_config_auto_neg {
+struct hclge_config_auto_neg_cmd {
__le32 cfg_an_cmd_flag;
u8 rsv[20];
};
@@ -542,7 +548,7 @@ struct hclge_config_auto_neg {
#define HCLGE_MAC_MAX_MTU 9728
#define HCLGE_MAC_UPLINK_PORT 0x100
-struct hclge_config_max_frm_size {
+struct hclge_config_max_frm_size_cmd {
__le16 max_frm_size;
u8 rsv[22];
};
@@ -559,10 +565,10 @@ enum hclge_mac_vlan_tbl_opcode {
#define HCLGE_MAC_EPORT_SW_EN_B 0xc
#define HCLGE_MAC_EPORT_TYPE_B 0xb
#define HCLGE_MAC_EPORT_VFID_S 0x3
-#define HCLGE_MAC_EPORT_VFID_M (0xff << HCLGE_MAC_EPORT_VFID_S)
+#define HCLGE_MAC_EPORT_VFID_M GENMASK(10, 3)
#define HCLGE_MAC_EPORT_PFID_S 0x0
-#define HCLGE_MAC_EPORT_PFID_M (0x7 << HCLGE_MAC_EPORT_PFID_S)
-struct hclge_mac_vlan_tbl_entry {
+#define HCLGE_MAC_EPORT_PFID_M GENMASK(2, 0)
+struct hclge_mac_vlan_tbl_entry_cmd {
u8 flags;
u8 resp_code;
__le16 vlan_tag;
@@ -577,15 +583,15 @@ struct hclge_mac_vlan_tbl_entry {
};
#define HCLGE_CFG_MTA_MAC_SEL_S 0x0
-#define HCLGE_CFG_MTA_MAC_SEL_M (0x3 << HCLGE_CFG_MTA_MAC_SEL_S)
+#define HCLGE_CFG_MTA_MAC_SEL_M GENMASK(1, 0)
#define HCLGE_CFG_MTA_MAC_EN_B 0x7
-struct hclge_mta_filter_mode {
+struct hclge_mta_filter_mode_cmd {
u8 dmac_sel_en; /* Use lowest 2 bit as sel_mode, bit 7 as enable */
u8 rsv[23];
};
#define HCLGE_CFG_FUNC_MTA_ACCEPT_B 0x0
-struct hclge_cfg_func_mta_filter {
+struct hclge_cfg_func_mta_filter_cmd {
u8 accept; /* Only used lowest 1 bit */
u8 function_id;
u8 rsv[22];
@@ -593,14 +599,14 @@ struct hclge_cfg_func_mta_filter {
#define HCLGE_CFG_MTA_ITEM_ACCEPT_B 0x0
#define HCLGE_CFG_MTA_ITEM_IDX_S 0x0
-#define HCLGE_CFG_MTA_ITEM_IDX_M (0xfff << HCLGE_CFG_MTA_ITEM_IDX_S)
-struct hclge_cfg_func_mta_item {
- u16 item_idx; /* Only used lowest 12 bit */
+#define HCLGE_CFG_MTA_ITEM_IDX_M GENMASK(11, 0)
+struct hclge_cfg_func_mta_item_cmd {
+ __le16 item_idx; /* Only used lowest 12 bit */
u8 accept; /* Only used lowest 1 bit */
u8 rsv[21];
};
-struct hclge_mac_vlan_add {
+struct hclge_mac_vlan_add_cmd {
__le16 flags;
__le16 mac_addr_hi16;
__le32 mac_addr_lo32;
@@ -613,7 +619,7 @@ struct hclge_mac_vlan_add {
};
#define HNS3_MAC_VLAN_CFG_FLAG_BIT 0
-struct hclge_mac_vlan_remove {
+struct hclge_mac_vlan_remove_cmd {
__le16 flags;
__le16 mac_addr_hi16;
__le32 mac_addr_lo32;
@@ -625,21 +631,21 @@ struct hclge_mac_vlan_remove {
u8 rsv[4];
};
-struct hclge_vlan_filter_ctrl {
+struct hclge_vlan_filter_ctrl_cmd {
u8 vlan_type;
u8 vlan_fe;
u8 rsv[22];
};
-struct hclge_vlan_filter_pf_cfg {
+struct hclge_vlan_filter_pf_cfg_cmd {
u8 vlan_offset;
u8 vlan_cfg;
u8 rsv[2];
u8 vlan_offset_bitmap[20];
};
-struct hclge_vlan_filter_vf_cfg {
- u16 vlan_id;
+struct hclge_vlan_filter_vf_cfg_cmd {
+ __le16 vlan_id;
u8 resp_code;
u8 rsv;
u8 vlan_cfg;
@@ -647,14 +653,14 @@ struct hclge_vlan_filter_vf_cfg {
u8 vf_bitmap[16];
};
-struct hclge_cfg_com_tqp_queue {
+struct hclge_cfg_com_tqp_queue_cmd {
__le16 tqp_id;
__le16 stream_id;
u8 enable;
u8 rsv[19];
};
-struct hclge_cfg_tx_queue_pointer {
+struct hclge_cfg_tx_queue_pointer_cmd {
__le16 tqp_id;
__le16 tx_tail;
__le16 tx_head;
@@ -664,12 +670,12 @@ struct hclge_cfg_tx_queue_pointer {
};
#define HCLGE_TSO_MSS_MIN_S 0
-#define HCLGE_TSO_MSS_MIN_M (0x3FFF << HCLGE_TSO_MSS_MIN_S)
+#define HCLGE_TSO_MSS_MIN_M GENMASK(13, 0)
#define HCLGE_TSO_MSS_MAX_S 16
-#define HCLGE_TSO_MSS_MAX_M (0x3FFF << HCLGE_TSO_MSS_MAX_S)
+#define HCLGE_TSO_MSS_MAX_M GENMASK(29, 16)
-struct hclge_cfg_tso_status {
+struct hclge_cfg_tso_status_cmd {
__le16 tso_mss_min;
__le16 tso_mss_max;
u8 rsv[20];
@@ -679,7 +685,7 @@ struct hclge_cfg_tso_status {
#define HCLGE_TSO_MSS_MAX 9668
#define HCLGE_TQP_RESET_B 0
-struct hclge_reset_tqp_queue {
+struct hclge_reset_tqp_queue_cmd {
__le16 tqp_id;
u8 reset_req;
u8 ready_to_reset;
@@ -733,6 +739,7 @@ struct hclge_hw;
int hclge_cmd_send(struct hclge_hw *hw, struct hclge_desc *desc, int num);
void hclge_cmd_setup_basic_desc(struct hclge_desc *desc,
enum hclge_opcode_type opcode, bool is_read);
+void hclge_cmd_reuse_desc(struct hclge_desc *desc, bool is_read);
int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
struct hclge_promisc_param *param);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
new file mode 100644
index 000000000000..5018d6633133
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "hclge_main.h"
+#include "hclge_tm.h"
+#include "hnae3.h"
+
+#define BW_PERCENT 100
+
+static int hclge_ieee_ets_to_tm_info(struct hclge_dev *hdev,
+ struct ieee_ets *ets)
+{
+ u8 i;
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_STRICT:
+ hdev->tm_info.tc_info[i].tc_sch_mode =
+ HCLGE_SCH_MODE_SP;
+ hdev->tm_info.pg_info[0].tc_dwrr[i] = 0;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ hdev->tm_info.tc_info[i].tc_sch_mode =
+ HCLGE_SCH_MODE_DWRR;
+ hdev->tm_info.pg_info[0].tc_dwrr[i] =
+ ets->tc_tx_bw[i];
+ break;
+ default:
+ /* Hardware only supports SP (strict priority)
+ * or ETS (enhanced transmission selection)
+ * algorithms, if we receive some other value
+ * from dcbnl, then throw an error.
+ */
+ return -EINVAL;
+ }
+ }
+
+ return hclge_tm_prio_tc_info_update(hdev, ets->prio_tc);
+}
+
+static void hclge_tm_info_to_ieee_ets(struct hclge_dev *hdev,
+ struct ieee_ets *ets)
+{
+ u32 i;
+
+ memset(ets, 0, sizeof(*ets));
+ ets->willing = 1;
+ ets->ets_cap = hdev->tc_max;
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ ets->prio_tc[i] = hdev->tm_info.prio_tc[i];
+ ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i];
+
+ if (hdev->tm_info.tc_info[i].tc_sch_mode ==
+ HCLGE_SCH_MODE_SP)
+ ets->tc_tsa[i] = IEEE_8021QAZ_TSA_STRICT;
+ else
+ ets->tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+ }
+}
+
+/* IEEE std */
+static int hclge_ieee_getets(struct hnae3_handle *h, struct ieee_ets *ets)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+
+ hclge_tm_info_to_ieee_ets(hdev, ets);
+
+ return 0;
+}
+
+static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
+ u8 *tc, bool *changed)
+{
+ u32 total_ets_bw = 0;
+ u8 max_tc = 0;
+ u8 i;
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ if (ets->prio_tc[i] >= hdev->tc_max ||
+ i >= hdev->tc_max)
+ return -EINVAL;
+
+ if (ets->prio_tc[i] != hdev->tm_info.prio_tc[i])
+ *changed = true;
+
+ if (ets->prio_tc[i] > max_tc)
+ max_tc = ets->prio_tc[i];
+
+ switch (ets->tc_tsa[i]) {
+ case IEEE_8021QAZ_TSA_STRICT:
+ if (hdev->tm_info.tc_info[i].tc_sch_mode !=
+ HCLGE_SCH_MODE_SP)
+ *changed = true;
+ break;
+ case IEEE_8021QAZ_TSA_ETS:
+ if (hdev->tm_info.tc_info[i].tc_sch_mode !=
+ HCLGE_SCH_MODE_DWRR)
+ *changed = true;
+
+ total_ets_bw += ets->tc_tx_bw[i];
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ if (total_ets_bw != BW_PERCENT)
+ return -EINVAL;
+
+ *tc = max_tc + 1;
+ if (*tc != hdev->tm_info.num_tc)
+ *changed = true;
+
+ return 0;
+}
+
+static int hclge_map_update(struct hnae3_handle *h)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ ret = hclge_tm_map_cfg(hdev);
+ if (ret)
+ return ret;
+
+ ret = hclge_tm_schd_mode_hw(hdev);
+ if (ret)
+ return ret;
+
+ ret = hclge_pause_setup_hw(hdev);
+ if (ret)
+ return ret;
+
+ ret = hclge_buffer_alloc(hdev);
+ if (ret)
+ return ret;
+
+ return hclge_rss_init_hw(hdev);
+}
+
+static int hclge_client_setup_tc(struct hclge_dev *hdev)
+{
+ struct hclge_vport *vport = hdev->vport;
+ struct hnae3_client *client;
+ struct hnae3_handle *handle;
+ int ret;
+ u32 i;
+
+ for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
+ handle = &vport[i].nic;
+ client = handle->client;
+
+ if (!client || !client->ops || !client->ops->setup_tc)
+ continue;
+
+ ret = client->ops->setup_tc(handle, hdev->tm_info.num_tc);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ bool map_changed = false;
+ u8 num_tc = 0;
+ int ret;
+
+ if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
+ hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
+ return -EINVAL;
+
+ ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed);
+ if (ret)
+ return ret;
+
+ hclge_tm_schd_info_update(hdev, num_tc);
+
+ ret = hclge_ieee_ets_to_tm_info(hdev, ets);
+ if (ret)
+ return ret;
+
+ if (map_changed) {
+ ret = hclge_client_setup_tc(hdev);
+ if (ret)
+ return ret;
+ }
+
+ return hclge_tm_dwrr_cfg(hdev);
+}
+
+static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ u8 i, j, pfc_map, *prio_tc;
+
+ memset(pfc, 0, sizeof(*pfc));
+ pfc->pfc_cap = hdev->pfc_max;
+ prio_tc = hdev->tm_info.prio_tc;
+ pfc_map = hdev->tm_info.hw_pfc_map;
+
+ /* Pfc setting is based on TC */
+ for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) {
+ if ((prio_tc[j] == i) && (pfc_map & BIT(i)))
+ pfc->pfc_en |= BIT(j);
+ }
+ }
+
+ return 0;
+}
+
+static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ u8 i, j, pfc_map, *prio_tc;
+
+ if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
+ hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
+ return -EINVAL;
+
+ prio_tc = hdev->tm_info.prio_tc;
+ pfc_map = 0;
+
+ for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) {
+ if ((prio_tc[j] == i) && (pfc->pfc_en & BIT(j))) {
+ pfc_map |= BIT(i);
+ break;
+ }
+ }
+ }
+
+ if (pfc_map == hdev->tm_info.hw_pfc_map)
+ return 0;
+
+ hdev->tm_info.hw_pfc_map = pfc_map;
+
+ return hclge_pause_setup_hw(hdev);
+}
+
+/* DCBX configuration */
+static u8 hclge_getdcbx(struct hnae3_handle *h)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+
+ if (hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
+ return 0;
+
+ return hdev->dcbx_cap;
+}
+
+static u8 hclge_setdcbx(struct hnae3_handle *h, u8 mode)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+
+ /* No support for LLD_MANAGED modes or CEE */
+ if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
+ (mode & DCB_CAP_DCBX_VER_CEE) ||
+ !(mode & DCB_CAP_DCBX_HOST))
+ return 1;
+
+ hdev->dcbx_cap = mode;
+
+ return 0;
+}
+
+/* Set up TC for hardware offloaded mqprio in channel mode */
+static int hclge_setup_tc(struct hnae3_handle *h, u8 tc, u8 *prio_tc)
+{
+ struct hclge_vport *vport = hclge_get_vport(h);
+ struct hclge_dev *hdev = vport->back;
+ int ret;
+
+ if (hdev->flag & HCLGE_FLAG_DCB_ENABLE)
+ return -EINVAL;
+
+ if (tc > hdev->tc_max) {
+ dev_err(&hdev->pdev->dev,
+ "setup tc failed, tc(%u) > tc_max(%u)\n",
+ tc, hdev->tc_max);
+ return -EINVAL;
+ }
+
+ hclge_tm_schd_info_update(hdev, tc);
+
+ ret = hclge_tm_prio_tc_info_update(hdev, prio_tc);
+ if (ret)
+ return ret;
+
+ ret = hclge_tm_init_hw(hdev);
+ if (ret)
+ return ret;
+
+ hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+
+ if (tc > 1)
+ hdev->flag |= HCLGE_FLAG_MQPRIO_ENABLE;
+ else
+ hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE;
+
+ return 0;
+}
+
+static const struct hnae3_dcb_ops hns3_dcb_ops = {
+ .ieee_getets = hclge_ieee_getets,
+ .ieee_setets = hclge_ieee_setets,
+ .ieee_getpfc = hclge_ieee_getpfc,
+ .ieee_setpfc = hclge_ieee_setpfc,
+ .getdcbx = hclge_getdcbx,
+ .setdcbx = hclge_setdcbx,
+ .map_update = hclge_map_update,
+ .setup_tc = hclge_setup_tc,
+};
+
+void hclge_dcb_ops_set(struct hclge_dev *hdev)
+{
+ struct hclge_vport *vport = hdev->vport;
+ struct hnae3_knic_private_info *kinfo;
+
+ /* Hdev does not support DCB or vport is
+ * not a pf, then dcb_ops is not set.
+ */
+ if (!hnae3_dev_dcb_supported(hdev) ||
+ vport->vport_id != 0)
+ return;
+
+ kinfo = &vport->nic.kinfo;
+ kinfo->dcb_ops = &hns3_dcb_ops;
+ hdev->dcbx_cap = DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_HOST;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
new file mode 100644
index 000000000000..7d808ee96694
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2016~2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __HCLGE_DCB_H__
+#define __HCLGE_DCB_H__
+
+#include "hclge_main.h"
+
+#ifdef CONFIG_HNS3_DCB
+void hclge_dcb_ops_set(struct hclge_dev *hdev);
+#else
+static inline void hclge_dcb_ops_set(struct hclge_dev *hdev) {}
+#endif
+
+#endif /* __HCLGE_DCB_H__ */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c1cdbfd83bdb..8508521c26e8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -19,6 +19,7 @@
#include <linux/platform_device.h>
#include "hclge_cmd.h"
+#include "hclge_dcb.h"
#include "hclge_main.h"
#include "hclge_mdio.h"
#include "hclge_tm.h"
@@ -30,7 +31,6 @@
#define HCLGE_64BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_64_bit_stats, f))
#define HCLGE_32BIT_STATS_FIELD_OFF(f) (offsetof(struct hclge_32_bit_stats, f))
-static int hclge_rss_init_hw(struct hclge_dev *hdev);
static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
enum hclge_mta_dmac_sel_type mta_mac_sel,
bool enable);
@@ -362,7 +362,7 @@ static int hclge_64_bit_update_stats(struct hclge_dev *hdev)
#define HCLGE_64_BIT_RTN_DATANUM 4
u64 *data = (u64 *)(&hdev->hw_stats.all_64_bit_stats);
struct hclge_desc desc[HCLGE_64_BIT_CMD_NUM];
- u64 *desc_data;
+ __le64 *desc_data;
int i, k, n;
int ret;
@@ -376,14 +376,14 @@ static int hclge_64_bit_update_stats(struct hclge_dev *hdev)
for (i = 0; i < HCLGE_64_BIT_CMD_NUM; i++) {
if (unlikely(i == 0)) {
- desc_data = (u64 *)(&desc[i].data[0]);
+ desc_data = (__le64 *)(&desc[i].data[0]);
n = HCLGE_64_BIT_RTN_DATANUM - 1;
} else {
- desc_data = (u64 *)(&desc[i]);
+ desc_data = (__le64 *)(&desc[i]);
n = HCLGE_64_BIT_RTN_DATANUM;
}
for (k = 0; k < n; k++) {
- *data++ += cpu_to_le64(*desc_data);
+ *data++ += le64_to_cpu(*desc_data);
desc_data++;
}
}
@@ -411,7 +411,7 @@ static int hclge_32_bit_update_stats(struct hclge_dev *hdev)
struct hclge_desc desc[HCLGE_32_BIT_CMD_NUM];
struct hclge_32_bit_stats *all_32_bit_stats;
- u32 *desc_data;
+ __le32 *desc_data;
int i, k, n;
u64 *data;
int ret;
@@ -431,21 +431,27 @@ static int hclge_32_bit_update_stats(struct hclge_dev *hdev)
hclge_reset_partial_32bit_counter(all_32_bit_stats);
for (i = 0; i < HCLGE_32_BIT_CMD_NUM; i++) {
if (unlikely(i == 0)) {
+ __le16 *desc_data_16bit;
+
all_32_bit_stats->igu_rx_err_pkt +=
- cpu_to_le32(desc[i].data[0]);
+ le32_to_cpu(desc[i].data[0]);
+
+ desc_data_16bit = (__le16 *)&desc[i].data[1];
all_32_bit_stats->igu_rx_no_eof_pkt +=
- cpu_to_le32(desc[i].data[1] & 0xffff);
+ le16_to_cpu(*desc_data_16bit);
+
+ desc_data_16bit++;
all_32_bit_stats->igu_rx_no_sof_pkt +=
- cpu_to_le32((desc[i].data[1] >> 16) & 0xffff);
+ le16_to_cpu(*desc_data_16bit);
- desc_data = (u32 *)(&desc[i].data[2]);
+ desc_data = &desc[i].data[2];
n = HCLGE_32_BIT_RTN_DATANUM - 4;
} else {
- desc_data = (u32 *)(&desc[i]);
+ desc_data = (__le32 *)&desc[i];
n = HCLGE_32_BIT_RTN_DATANUM;
}
for (k = 0; k < n; k++) {
- *data++ += cpu_to_le32(*desc_data);
+ *data++ += le32_to_cpu(*desc_data);
desc_data++;
}
}
@@ -460,7 +466,7 @@ static int hclge_mac_update_stats(struct hclge_dev *hdev)
u64 *data = (u64 *)(&hdev->hw_stats.mac_stats);
struct hclge_desc desc[HCLGE_MAC_CMD_NUM];
- u64 *desc_data;
+ __le64 *desc_data;
int i, k, n;
int ret;
@@ -475,14 +481,14 @@ static int hclge_mac_update_stats(struct hclge_dev *hdev)
for (i = 0; i < HCLGE_MAC_CMD_NUM; i++) {
if (unlikely(i == 0)) {
- desc_data = (u64 *)(&desc[i].data[0]);
+ desc_data = (__le64 *)(&desc[i].data[0]);
n = HCLGE_RTN_DATA_NUM - 2;
} else {
- desc_data = (u64 *)(&desc[i]);
+ desc_data = (__le64 *)(&desc[i]);
n = HCLGE_RTN_DATA_NUM;
}
for (k = 0; k < n; k++) {
- *data++ += cpu_to_le64(*desc_data);
+ *data++ += le64_to_cpu(*desc_data);
desc_data++;
}
}
@@ -508,7 +514,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle)
HCLGE_OPC_QUERY_RX_STATUS,
true);
- desc[0].data[0] = (tqp->index & 0x1ff);
+ desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff));
ret = hclge_cmd_send(&hdev->hw, desc, 1);
if (ret) {
dev_err(&hdev->pdev->dev,
@@ -517,7 +523,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle)
return ret;
}
tqp->tqp_stats.rcb_rx_ring_pktnum_rcd +=
- cpu_to_le32(desc[0].data[4]);
+ le32_to_cpu(desc[0].data[4]);
}
for (i = 0; i < kinfo->num_tqps; i++) {
@@ -528,7 +534,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle)
HCLGE_OPC_QUERY_TX_STATUS,
true);
- desc[0].data[0] = (tqp->index & 0x1ff);
+ desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff));
ret = hclge_cmd_send(&hdev->hw, desc, 1);
if (ret) {
dev_err(&hdev->pdev->dev,
@@ -537,7 +543,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle)
return ret;
}
tqp->tqp_stats.rcb_tx_ring_pktnum_rcd +=
- cpu_to_le32(desc[0].data[4]);
+ le32_to_cpu(desc[0].data[4]);
}
return 0;
@@ -552,12 +558,12 @@ static u64 *hclge_tqps_get_stats(struct hnae3_handle *handle, u64 *data)
for (i = 0; i < kinfo->num_tqps; i++) {
tqp = container_of(kinfo->tqp[i], struct hclge_tqp, q);
- *buff++ = cpu_to_le64(tqp->tqp_stats.rcb_tx_ring_pktnum_rcd);
+ *buff++ = tqp->tqp_stats.rcb_tx_ring_pktnum_rcd;
}
for (i = 0; i < kinfo->num_tqps; i++) {
tqp = container_of(kinfo->tqp[i], struct hclge_tqp, q);
- *buff++ = cpu_to_le64(tqp->tqp_stats.rcb_rx_ring_pktnum_rcd);
+ *buff++ = tqp->tqp_stats.rcb_rx_ring_pktnum_rcd;
}
return buff;
@@ -820,7 +826,7 @@ static void hclge_get_stats(struct hnae3_handle *handle, u64 *data)
}
static int hclge_parse_func_status(struct hclge_dev *hdev,
- struct hclge_func_status *status)
+ struct hclge_func_status_cmd *status)
{
if (!(status->pf_state & HCLGE_PF_STATE_DONE))
return -EINVAL;
@@ -837,13 +843,13 @@ static int hclge_parse_func_status(struct hclge_dev *hdev,
static int hclge_query_function_status(struct hclge_dev *hdev)
{
- struct hclge_func_status *req;
+ struct hclge_func_status_cmd *req;
struct hclge_desc desc;
int timeout = 0;
int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_FUNC_STATUS, true);
- req = (struct hclge_func_status *)desc.data;
+ req = (struct hclge_func_status_cmd *)desc.data;
do {
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -868,7 +874,7 @@ static int hclge_query_function_status(struct hclge_dev *hdev)
static int hclge_query_pf_resource(struct hclge_dev *hdev)
{
- struct hclge_pf_res *req;
+ struct hclge_pf_res_cmd *req;
struct hclge_desc desc;
int ret;
@@ -880,7 +886,7 @@ static int hclge_query_pf_resource(struct hclge_dev *hdev)
return ret;
}
- req = (struct hclge_pf_res *)desc.data;
+ req = (struct hclge_pf_res_cmd *)desc.data;
hdev->num_tqps = __le16_to_cpu(req->tqp_num);
hdev->pkt_buf_size = __le16_to_cpu(req->buf_size) << HCLGE_BUF_UNIT_S;
@@ -938,12 +944,12 @@ static int hclge_parse_speed(int speed_cmd, int *speed)
static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
{
- struct hclge_cfg_param *req;
+ struct hclge_cfg_param_cmd *req;
u64 mac_addr_tmp_high;
u64 mac_addr_tmp;
int i;
- req = (struct hclge_cfg_param *)desc[0].data;
+ req = (struct hclge_cfg_param_cmd *)desc[0].data;
/* get the configuration */
cfg->vmdq_vport_num = hnae_get_field(__le32_to_cpu(req->param[0]),
@@ -978,7 +984,7 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
for (i = 0; i < ETH_ALEN; i++)
cfg->mac_addr[i] = (mac_addr_tmp >> (8 * i)) & 0xff;
- req = (struct hclge_cfg_param *)desc[1].data;
+ req = (struct hclge_cfg_param_cmd *)desc[1].data;
cfg->numa_node_map = __le32_to_cpu(req->param[0]);
}
@@ -989,20 +995,21 @@ static void hclge_parse_cfg(struct hclge_cfg *cfg, struct hclge_desc *desc)
static int hclge_get_cfg(struct hclge_dev *hdev, struct hclge_cfg *hcfg)
{
struct hclge_desc desc[HCLGE_PF_CFG_DESC_NUM];
- struct hclge_cfg_param *req;
+ struct hclge_cfg_param_cmd *req;
int i, ret;
for (i = 0; i < HCLGE_PF_CFG_DESC_NUM; i++) {
- req = (struct hclge_cfg_param *)desc[i].data;
+ u32 offset = 0;
+
+ req = (struct hclge_cfg_param_cmd *)desc[i].data;
hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_GET_CFG_PARAM,
true);
- hnae_set_field(req->offset, HCLGE_CFG_OFFSET_M,
+ hnae_set_field(offset, HCLGE_CFG_OFFSET_M,
HCLGE_CFG_OFFSET_S, i * HCLGE_CFG_RD_LEN_BYTES);
/* Len should be united by 4 bytes when send to hardware */
- hnae_set_field(req->offset, HCLGE_CFG_RD_LEN_M,
- HCLGE_CFG_RD_LEN_S,
+ hnae_set_field(offset, HCLGE_CFG_RD_LEN_M, HCLGE_CFG_RD_LEN_S,
HCLGE_CFG_RD_LEN_BYTES / HCLGE_CFG_RD_LEN_UNIT);
- req->offset = cpu_to_le32(req->offset);
+ req->offset = cpu_to_le32(offset);
}
ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PF_CFG_DESC_NUM);
@@ -1058,7 +1065,7 @@ static int hclge_configure(struct hclge_dev *hdev)
hdev->hw.mac.phy_addr = cfg.phy_addr;
hdev->num_desc = cfg.tqp_desc_num;
hdev->tm_info.num_pg = 1;
- hdev->tm_info.num_tc = cfg.tc_num;
+ hdev->tc_max = cfg.tc_num;
hdev->tm_info.hw_pfc_map = 0;
ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed);
@@ -1067,15 +1074,25 @@ static int hclge_configure(struct hclge_dev *hdev)
return ret;
}
- if ((hdev->tm_info.num_tc > HNAE3_MAX_TC) ||
- (hdev->tm_info.num_tc < 1)) {
+ if ((hdev->tc_max > HNAE3_MAX_TC) ||
+ (hdev->tc_max < 1)) {
dev_warn(&hdev->pdev->dev, "TC num = %d.\n",
- hdev->tm_info.num_tc);
- hdev->tm_info.num_tc = 1;
+ hdev->tc_max);
+ hdev->tc_max = 1;
}
+ /* Dev does not support DCB */
+ if (!hnae3_dev_dcb_supported(hdev)) {
+ hdev->tc_max = 1;
+ hdev->pfc_max = 0;
+ } else {
+ hdev->pfc_max = hdev->tc_max;
+ }
+
+ hdev->tm_info.num_tc = hdev->tc_max;
+
/* Currently not support uncontiuous tc */
- for (i = 0; i < cfg.tc_num; i++)
+ for (i = 0; i < hdev->tm_info.num_tc; i++)
hnae_set_bit(hdev->hw_tc_map, i, 1);
if (!hdev->num_vmdq_vport && !hdev->num_req_vfs)
@@ -1089,16 +1106,23 @@ static int hclge_configure(struct hclge_dev *hdev)
static int hclge_config_tso(struct hclge_dev *hdev, int tso_mss_min,
int tso_mss_max)
{
- struct hclge_cfg_tso_status *req;
+ struct hclge_cfg_tso_status_cmd *req;
struct hclge_desc desc;
+ u16 tso_mss;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TSO_GENERIC_CONFIG, false);
- req = (struct hclge_cfg_tso_status *)desc.data;
- hnae_set_field(req->tso_mss_min, HCLGE_TSO_MSS_MIN_M,
+ req = (struct hclge_cfg_tso_status_cmd *)desc.data;
+
+ tso_mss = 0;
+ hnae_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M,
HCLGE_TSO_MSS_MIN_S, tso_mss_min);
- hnae_set_field(req->tso_mss_max, HCLGE_TSO_MSS_MIN_M,
+ req->tso_mss_min = cpu_to_le16(tso_mss);
+
+ tso_mss = 0;
+ hnae_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M,
HCLGE_TSO_MSS_MIN_S, tso_mss_max);
+ req->tso_mss_max = cpu_to_le16(tso_mss);
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
@@ -1134,15 +1158,15 @@ static int hclge_alloc_tqps(struct hclge_dev *hdev)
static int hclge_map_tqps_to_func(struct hclge_dev *hdev, u16 func_id,
u16 tqp_pid, u16 tqp_vid, bool is_pf)
{
- struct hclge_tqp_map *req;
+ struct hclge_tqp_map_cmd *req;
struct hclge_desc desc;
int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SET_TQP_MAP, false);
- req = (struct hclge_tqp_map *)desc.data;
+ req = (struct hclge_tqp_map_cmd *)desc.data;
req->tqp_id = cpu_to_le16(tqp_pid);
- req->tqp_vf = cpu_to_le16(func_id);
+ req->tqp_vf = func_id;
req->tqp_flag = !is_pf << HCLGE_TQP_MAP_TYPE_B |
1 << HCLGE_TQP_MAP_EN_B;
req->tqp_vid = cpu_to_le16(tqp_vid);
@@ -1324,23 +1348,27 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
return 0;
}
-static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size)
+static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
/* TX buffer size is unit by 128 byte */
#define HCLGE_BUF_SIZE_UNIT_SHIFT 7
#define HCLGE_BUF_SIZE_UPDATE_EN_MSK BIT(15)
- struct hclge_tx_buff_alloc *req;
+ struct hclge_tx_buff_alloc_cmd *req;
struct hclge_desc desc;
int ret;
u8 i;
- req = (struct hclge_tx_buff_alloc *)desc.data;
+ req = (struct hclge_tx_buff_alloc_cmd *)desc.data;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TX_BUFF_ALLOC, 0);
- for (i = 0; i < HCLGE_TC_NUM; i++)
+ for (i = 0; i < HCLGE_TC_NUM; i++) {
+ u32 buf_size = buf_alloc->priv_buf[i].tx_buf_size;
+
req->tx_pkt_buff[i] =
cpu_to_le16((buf_size >> HCLGE_BUF_SIZE_UNIT_SHIFT) |
HCLGE_BUF_SIZE_UPDATE_EN_MSK);
+ }
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
if (ret) {
@@ -1352,9 +1380,10 @@ static int hclge_cmd_alloc_tx_buff(struct hclge_dev *hdev, u16 buf_size)
return 0;
}
-static int hclge_tx_buffer_alloc(struct hclge_dev *hdev, u32 buf_size)
+static int hclge_tx_buffer_alloc(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
- int ret = hclge_cmd_alloc_tx_buff(hdev, buf_size);
+ int ret = hclge_cmd_alloc_tx_buff(hdev, buf_alloc);
if (ret) {
dev_err(&hdev->pdev->dev,
@@ -1387,13 +1416,14 @@ static int hclge_get_pfc_enalbe_num(struct hclge_dev *hdev)
}
/* Get the number of pfc enabled TCs, which have private buffer */
-static int hclge_get_pfc_priv_num(struct hclge_dev *hdev)
+static int hclge_get_pfc_priv_num(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_priv_buf *priv;
int i, cnt = 0;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if ((hdev->tm_info.hw_pfc_map & BIT(i)) &&
priv->enable)
cnt++;
@@ -1403,13 +1433,14 @@ static int hclge_get_pfc_priv_num(struct hclge_dev *hdev)
}
/* Get the number of pfc disabled TCs, which have private buffer */
-static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev)
+static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_priv_buf *priv;
int i, cnt = 0;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (hdev->hw_tc_map & BIT(i) &&
!(hdev->tm_info.hw_pfc_map & BIT(i)) &&
priv->enable)
@@ -1419,21 +1450,33 @@ static int hclge_get_no_pfc_priv_num(struct hclge_dev *hdev)
return cnt;
}
-static u32 hclge_get_rx_priv_buff_alloced(struct hclge_dev *hdev)
+static u32 hclge_get_rx_priv_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_priv_buf *priv;
u32 rx_priv = 0;
int i;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (priv->enable)
rx_priv += priv->buf_size;
}
return rx_priv;
}
-static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
+static u32 hclge_get_tx_buff_alloced(struct hclge_pkt_buf_alloc *buf_alloc)
+{
+ u32 i, total_tx_size = 0;
+
+ for (i = 0; i < HCLGE_MAX_TC_NUM; i++)
+ total_tx_size += buf_alloc->priv_buf[i].tx_buf_size;
+
+ return total_tx_size;
+}
+
+static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc,
+ u32 rx_all)
{
u32 shared_buf_min, shared_buf_tc, shared_std;
int tc_num, pfc_enable_num;
@@ -1454,46 +1497,74 @@ static bool hclge_is_rx_buf_ok(struct hclge_dev *hdev, u32 rx_all)
hdev->mps;
shared_std = max_t(u32, shared_buf_min, shared_buf_tc);
- rx_priv = hclge_get_rx_priv_buff_alloced(hdev);
+ rx_priv = hclge_get_rx_priv_buff_alloced(buf_alloc);
if (rx_all <= rx_priv + shared_std)
return false;
shared_buf = rx_all - rx_priv;
- hdev->s_buf.buf_size = shared_buf;
- hdev->s_buf.self.high = shared_buf;
- hdev->s_buf.self.low = 2 * hdev->mps;
+ buf_alloc->s_buf.buf_size = shared_buf;
+ buf_alloc->s_buf.self.high = shared_buf;
+ buf_alloc->s_buf.self.low = 2 * hdev->mps;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
if ((hdev->hw_tc_map & BIT(i)) &&
(hdev->tm_info.hw_pfc_map & BIT(i))) {
- hdev->s_buf.tc_thrd[i].low = hdev->mps;
- hdev->s_buf.tc_thrd[i].high = 2 * hdev->mps;
+ buf_alloc->s_buf.tc_thrd[i].low = hdev->mps;
+ buf_alloc->s_buf.tc_thrd[i].high = 2 * hdev->mps;
} else {
- hdev->s_buf.tc_thrd[i].low = 0;
- hdev->s_buf.tc_thrd[i].high = hdev->mps;
+ buf_alloc->s_buf.tc_thrd[i].low = 0;
+ buf_alloc->s_buf.tc_thrd[i].high = hdev->mps;
}
}
return true;
}
+static int hclge_tx_buffer_calc(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
+{
+ u32 i, total_size;
+
+ total_size = hdev->pkt_buf_size;
+
+ /* alloc tx buffer for all enabled tc */
+ for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
+ struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
+
+ if (total_size < HCLGE_DEFAULT_TX_BUF)
+ return -ENOMEM;
+
+ if (hdev->hw_tc_map & BIT(i))
+ priv->tx_buf_size = HCLGE_DEFAULT_TX_BUF;
+ else
+ priv->tx_buf_size = 0;
+
+ total_size -= priv->tx_buf_size;
+ }
+
+ return 0;
+}
+
/* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
* @hdev: pointer to struct hclge_dev
- * @tx_size: the allocated tx buffer for all TCs
+ * @buf_alloc: pointer to buffer calculation data
* @return: 0: calculate sucessful, negative: fail
*/
-int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
+static int hclge_rx_buffer_calc(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
- u32 rx_all = hdev->pkt_buf_size - tx_size;
+ u32 rx_all = hdev->pkt_buf_size;
int no_pfc_priv_num, pfc_priv_num;
struct hclge_priv_buf *priv;
int i;
+ rx_all -= hclge_get_tx_buff_alloced(buf_alloc);
+
/* When DCB is not supported, rx private
* buffer is not allocated.
*/
if (!hnae3_dev_dcb_supported(hdev)) {
- if (!hclge_is_rx_buf_ok(hdev, rx_all))
+ if (!hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return -ENOMEM;
return 0;
@@ -1501,7 +1572,7 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
/* step 1, try to alloc private buffer for all enabled tc */
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (hdev->hw_tc_map & BIT(i)) {
priv->enable = 1;
if (hdev->tm_info.hw_pfc_map & BIT(i)) {
@@ -1522,14 +1593,14 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
}
}
- if (hclge_is_rx_buf_ok(hdev, rx_all))
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return 0;
/* step 2, try to decrease the buffer size of
* no pfc TC's private buffer
*/
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
priv->enable = 0;
priv->wl.low = 0;
@@ -1552,18 +1623,18 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
}
}
- if (hclge_is_rx_buf_ok(hdev, rx_all))
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return 0;
/* step 3, try to reduce the number of pfc disabled TCs,
* which have private buffer
*/
/* get the total no pfc enable TC number, which have private buffer */
- no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev);
+ no_pfc_priv_num = hclge_get_no_pfc_priv_num(hdev, buf_alloc);
/* let the last to be cleared first */
for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (hdev->hw_tc_map & BIT(i) &&
!(hdev->tm_info.hw_pfc_map & BIT(i))) {
@@ -1575,22 +1646,22 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
no_pfc_priv_num--;
}
- if (hclge_is_rx_buf_ok(hdev, rx_all) ||
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) ||
no_pfc_priv_num == 0)
break;
}
- if (hclge_is_rx_buf_ok(hdev, rx_all))
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return 0;
/* step 4, try to reduce the number of pfc enabled TCs
* which have private buffer.
*/
- pfc_priv_num = hclge_get_pfc_priv_num(hdev);
+ pfc_priv_num = hclge_get_pfc_priv_num(hdev, buf_alloc);
/* let the last to be cleared first */
for (i = HCLGE_MAX_TC_NUM - 1; i >= 0; i--) {
- priv = &hdev->priv_buf[i];
+ priv = &buf_alloc->priv_buf[i];
if (hdev->hw_tc_map & BIT(i) &&
hdev->tm_info.hw_pfc_map & BIT(i)) {
@@ -1602,38 +1673,39 @@ int hclge_rx_buffer_calc(struct hclge_dev *hdev, u32 tx_size)
pfc_priv_num--;
}
- if (hclge_is_rx_buf_ok(hdev, rx_all) ||
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all) ||
pfc_priv_num == 0)
break;
}
- if (hclge_is_rx_buf_ok(hdev, rx_all))
+ if (hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all))
return 0;
return -ENOMEM;
}
-static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
+static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
- struct hclge_rx_priv_buff *req;
+ struct hclge_rx_priv_buff_cmd *req;
struct hclge_desc desc;
int ret;
int i;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RX_PRIV_BUFF_ALLOC, false);
- req = (struct hclge_rx_priv_buff *)desc.data;
+ req = (struct hclge_rx_priv_buff_cmd *)desc.data;
/* Alloc private buffer TCs */
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- struct hclge_priv_buf *priv = &hdev->priv_buf[i];
+ struct hclge_priv_buf *priv = &buf_alloc->priv_buf[i];
req->buf_num[i] =
cpu_to_le16(priv->buf_size >> HCLGE_BUF_UNIT_S);
req->buf_num[i] |=
- cpu_to_le16(true << HCLGE_TC0_PRI_BUF_EN_B);
+ cpu_to_le16(1 << HCLGE_TC0_PRI_BUF_EN_B);
}
req->shared_buf =
- cpu_to_le16((hdev->s_buf.buf_size >> HCLGE_BUF_UNIT_S) |
+ cpu_to_le16((buf_alloc->s_buf.buf_size >> HCLGE_BUF_UNIT_S) |
(1 << HCLGE_TC0_PRI_BUF_EN_B));
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -1648,7 +1720,8 @@ static int hclge_rx_priv_buf_alloc(struct hclge_dev *hdev)
#define HCLGE_PRIV_ENABLE(a) ((a) > 0 ? 1 : 0)
-static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
+static int hclge_rx_priv_wl_config(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
struct hclge_rx_priv_wl_buf *req;
struct hclge_priv_buf *priv;
@@ -1668,7 +1741,9 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
desc[i].flag &= ~cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
for (j = 0; j < HCLGE_TC_NUM_ONE_DESC; j++) {
- priv = &hdev->priv_buf[i * HCLGE_TC_NUM_ONE_DESC + j];
+ u32 idx = i * HCLGE_TC_NUM_ONE_DESC + j;
+
+ priv = &buf_alloc->priv_buf[idx];
req->tc_wl[j].high =
cpu_to_le16(priv->wl.high >> HCLGE_BUF_UNIT_S);
req->tc_wl[j].high |=
@@ -1693,9 +1768,10 @@ static int hclge_rx_priv_wl_config(struct hclge_dev *hdev)
return 0;
}
-static int hclge_common_thrd_config(struct hclge_dev *hdev)
+static int hclge_common_thrd_config(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
- struct hclge_shared_buf *s_buf = &hdev->s_buf;
+ struct hclge_shared_buf *s_buf = &buf_alloc->s_buf;
struct hclge_rx_com_thrd *req;
struct hclge_desc desc[2];
struct hclge_tc_thrd *tc;
@@ -1739,9 +1815,10 @@ static int hclge_common_thrd_config(struct hclge_dev *hdev)
return 0;
}
-static int hclge_common_wl_config(struct hclge_dev *hdev)
+static int hclge_common_wl_config(struct hclge_dev *hdev,
+ struct hclge_pkt_buf_alloc *buf_alloc)
{
- struct hclge_shared_buf *buf = &hdev->s_buf;
+ struct hclge_shared_buf *buf = &buf_alloc->s_buf;
struct hclge_rx_com_wl *req;
struct hclge_desc desc;
int ret;
@@ -1771,63 +1848,68 @@ static int hclge_common_wl_config(struct hclge_dev *hdev)
int hclge_buffer_alloc(struct hclge_dev *hdev)
{
- u32 tx_buf_size = HCLGE_DEFAULT_TX_BUF;
+ struct hclge_pkt_buf_alloc *pkt_buf;
int ret;
- hdev->priv_buf = devm_kmalloc_array(&hdev->pdev->dev, HCLGE_MAX_TC_NUM,
- sizeof(struct hclge_priv_buf),
- GFP_KERNEL | __GFP_ZERO);
- if (!hdev->priv_buf)
+ pkt_buf = kzalloc(sizeof(*pkt_buf), GFP_KERNEL);
+ if (!pkt_buf)
return -ENOMEM;
- ret = hclge_tx_buffer_alloc(hdev, tx_buf_size);
+ ret = hclge_tx_buffer_calc(hdev, pkt_buf);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "could not calc tx buffer size for all TCs %d\n", ret);
+ goto out;
+ }
+
+ ret = hclge_tx_buffer_alloc(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not alloc tx buffers %d\n", ret);
- return ret;
+ goto out;
}
- ret = hclge_rx_buffer_calc(hdev, tx_buf_size);
+ ret = hclge_rx_buffer_calc(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not calc rx priv buffer size for all TCs %d\n",
ret);
- return ret;
+ goto out;
}
- ret = hclge_rx_priv_buf_alloc(hdev);
+ ret = hclge_rx_priv_buf_alloc(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev, "could not alloc rx priv buffer %d\n",
ret);
- return ret;
+ goto out;
}
if (hnae3_dev_dcb_supported(hdev)) {
- ret = hclge_rx_priv_wl_config(hdev);
+ ret = hclge_rx_priv_wl_config(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not configure rx private waterline %d\n",
ret);
- return ret;
+ goto out;
}
- ret = hclge_common_thrd_config(hdev);
+ ret = hclge_common_thrd_config(hdev, pkt_buf);
if (ret) {
dev_err(&hdev->pdev->dev,
"could not configure common threshold %d\n",
ret);
- return ret;
+ goto out;
}
}
- ret = hclge_common_wl_config(hdev);
- if (ret) {
+ ret = hclge_common_wl_config(hdev, pkt_buf);
+ if (ret)
dev_err(&hdev->pdev->dev,
"could not configure common waterline %d\n", ret);
- return ret;
- }
- return 0;
+out:
+ kfree(pkt_buf);
+ return ret;
}
static int hclge_init_roce_base_info(struct hclge_vport *vport)
@@ -1932,11 +2014,11 @@ static void hclge_check_speed_dup(struct hclge_dev *hdev, int duplex, int speed)
int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex)
{
- struct hclge_config_mac_speed_dup *req;
+ struct hclge_config_mac_speed_dup_cmd *req;
struct hclge_desc desc;
int ret;
- req = (struct hclge_config_mac_speed_dup *)desc.data;
+ req = (struct hclge_config_mac_speed_dup_cmd *)desc.data;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_SPEED_DUP, false);
@@ -2007,12 +2089,12 @@ static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed,
static int hclge_query_mac_an_speed_dup(struct hclge_dev *hdev, int *speed,
u8 *duplex)
{
- struct hclge_query_an_speed_dup *req;
+ struct hclge_query_an_speed_dup_cmd *req;
struct hclge_desc desc;
int speed_tmp;
int ret;
- req = (struct hclge_query_an_speed_dup *)desc.data;
+ req = (struct hclge_query_an_speed_dup_cmd *)desc.data;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -2040,11 +2122,11 @@ static int hclge_query_mac_an_speed_dup(struct hclge_dev *hdev, int *speed,
static int hclge_query_autoneg_result(struct hclge_dev *hdev)
{
struct hclge_mac *mac = &hdev->hw.mac;
- struct hclge_query_an_speed_dup *req;
+ struct hclge_query_an_speed_dup_cmd *req;
struct hclge_desc desc;
int ret;
- req = (struct hclge_query_an_speed_dup *)desc.data;
+ req = (struct hclge_query_an_speed_dup_cmd *)desc.data;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_AN_RESULT, true);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -2061,14 +2143,16 @@ static int hclge_query_autoneg_result(struct hclge_dev *hdev)
static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable)
{
- struct hclge_config_auto_neg *req;
+ struct hclge_config_auto_neg_cmd *req;
struct hclge_desc desc;
+ u32 flag = 0;
int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_AN_MODE, false);
- req = (struct hclge_config_auto_neg *)desc.data;
- hnae_set_bit(req->cfg_an_cmd_flag, HCLGE_MAC_CFG_AN_EN_B, !!enable);
+ req = (struct hclge_config_auto_neg_cmd *)desc.data;
+ hnae_set_bit(flag, HCLGE_MAC_CFG_AN_EN_B, !!enable);
+ req->cfg_an_cmd_flag = cpu_to_le32(flag);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
if (ret) {
@@ -2146,7 +2230,7 @@ static void hclge_task_schedule(struct hclge_dev *hdev)
static int hclge_get_mac_link_status(struct hclge_dev *hdev)
{
- struct hclge_link_status *req;
+ struct hclge_link_status_cmd *req;
struct hclge_desc desc;
int link_status;
int ret;
@@ -2159,7 +2243,7 @@ static int hclge_get_mac_link_status(struct hclge_dev *hdev)
return ret;
}
- req = (struct hclge_link_status *)desc.data;
+ req = (struct hclge_link_status_cmd *)desc.data;
link_status = req->status & HCLGE_LINK_STATUS;
return !!link_status;
@@ -2266,11 +2350,11 @@ static int hclge_get_status(struct hnae3_handle *handle)
return hdev->hw.mac.link;
}
-static void hclge_service_timer(unsigned long data)
+static void hclge_service_timer(struct timer_list *t)
{
- struct hclge_dev *hdev = (struct hclge_dev *)data;
- (void)mod_timer(&hdev->service_timer, jiffies + HZ);
+ struct hclge_dev *hdev = from_timer(hdev, t, service_timer);
+ mod_timer(&hdev->service_timer, jiffies + HZ);
hclge_task_schedule(hdev);
}
@@ -2383,7 +2467,7 @@ static u32 hclge_get_rss_indir_size(struct hnae3_handle *handle)
static int hclge_get_rss_algo(struct hclge_dev *hdev)
{
- struct hclge_rss_config *req;
+ struct hclge_rss_config_cmd *req;
struct hclge_desc desc;
int rss_hash_algo;
int ret;
@@ -2397,7 +2481,7 @@ static int hclge_get_rss_algo(struct hclge_dev *hdev)
return ret;
}
- req = (struct hclge_rss_config *)desc.data;
+ req = (struct hclge_rss_config_cmd *)desc.data;
rss_hash_algo = (req->hash_config & HCLGE_RSS_HASH_ALGO_MASK);
if (rss_hash_algo == HCLGE_RSS_HASH_ALGO_TOEPLITZ)
@@ -2409,13 +2493,13 @@ static int hclge_get_rss_algo(struct hclge_dev *hdev)
static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
const u8 hfunc, const u8 *key)
{
- struct hclge_rss_config *req;
+ struct hclge_rss_config_cmd *req;
struct hclge_desc desc;
int key_offset;
int key_size;
int ret;
- req = (struct hclge_rss_config *)desc.data;
+ req = (struct hclge_rss_config_cmd *)desc.data;
for (key_offset = 0; key_offset < 3; key_offset++) {
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_GENERIC_CONFIG,
@@ -2446,19 +2530,20 @@ static int hclge_set_rss_algo_key(struct hclge_dev *hdev,
static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir)
{
- struct hclge_rss_indirection_table *req;
+ struct hclge_rss_indirection_table_cmd *req;
struct hclge_desc desc;
int i, j;
int ret;
- req = (struct hclge_rss_indirection_table *)desc.data;
+ req = (struct hclge_rss_indirection_table_cmd *)desc.data;
for (i = 0; i < HCLGE_RSS_CFG_TBL_NUM; i++) {
hclge_cmd_setup_basic_desc
(&desc, HCLGE_OPC_RSS_INDIR_TABLE, false);
- req->start_table_index = i * HCLGE_RSS_CFG_TBL_SIZE;
- req->rss_set_bitmap = HCLGE_RSS_SET_BITMAP_MSK;
+ req->start_table_index =
+ cpu_to_le16(i * HCLGE_RSS_CFG_TBL_SIZE);
+ req->rss_set_bitmap = cpu_to_le16(HCLGE_RSS_SET_BITMAP_MSK);
for (j = 0; j < HCLGE_RSS_CFG_TBL_SIZE; j++)
req->rss_result[j] =
@@ -2478,21 +2563,24 @@ static int hclge_set_rss_indir_table(struct hclge_dev *hdev, const u32 *indir)
static int hclge_set_rss_tc_mode(struct hclge_dev *hdev, u16 *tc_valid,
u16 *tc_size, u16 *tc_offset)
{
- struct hclge_rss_tc_mode *req;
+ struct hclge_rss_tc_mode_cmd *req;
struct hclge_desc desc;
int ret;
int i;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_TC_MODE, false);
- req = (struct hclge_rss_tc_mode *)desc.data;
+ req = (struct hclge_rss_tc_mode_cmd *)desc.data;
for (i = 0; i < HCLGE_MAX_TC_NUM; i++) {
- hnae_set_bit(req->rss_tc_mode[i], HCLGE_RSS_TC_VALID_B,
- (tc_valid[i] & 0x1));
- hnae_set_field(req->rss_tc_mode[i], HCLGE_RSS_TC_SIZE_M,
+ u16 mode = 0;
+
+ hnae_set_bit(mode, HCLGE_RSS_TC_VALID_B, (tc_valid[i] & 0x1));
+ hnae_set_field(mode, HCLGE_RSS_TC_SIZE_M,
HCLGE_RSS_TC_SIZE_S, tc_size[i]);
- hnae_set_field(req->rss_tc_mode[i], HCLGE_RSS_TC_OFFSET_M,
+ hnae_set_field(mode, HCLGE_RSS_TC_OFFSET_M,
HCLGE_RSS_TC_OFFSET_S, tc_offset[i]);
+
+ req->rss_tc_mode[i] = cpu_to_le16(mode);
}
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -2507,15 +2595,13 @@ static int hclge_set_rss_tc_mode(struct hclge_dev *hdev, u16 *tc_valid,
static int hclge_set_rss_input_tuple(struct hclge_dev *hdev)
{
-#define HCLGE_RSS_INPUT_TUPLE_OTHER 0xf
-#define HCLGE_RSS_INPUT_TUPLE_SCTP 0x1f
- struct hclge_rss_input_tuple *req;
+ struct hclge_rss_input_tuple_cmd *req;
struct hclge_desc desc;
int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, false);
- req = (struct hclge_rss_input_tuple *)desc.data;
+ req = (struct hclge_rss_input_tuple_cmd *)desc.data;
req->ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
req->ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
req->ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
@@ -2589,6 +2675,161 @@ static int hclge_set_rss(struct hnae3_handle *handle, const u32 *indir,
return ret;
}
+static u8 hclge_get_rss_hash_bits(struct ethtool_rxnfc *nfc)
+{
+ u8 hash_sets = nfc->data & RXH_L4_B_0_1 ? HCLGE_S_PORT_BIT : 0;
+
+ if (nfc->data & RXH_L4_B_2_3)
+ hash_sets |= HCLGE_D_PORT_BIT;
+ else
+ hash_sets &= ~HCLGE_D_PORT_BIT;
+
+ if (nfc->data & RXH_IP_SRC)
+ hash_sets |= HCLGE_S_IP_BIT;
+ else
+ hash_sets &= ~HCLGE_S_IP_BIT;
+
+ if (nfc->data & RXH_IP_DST)
+ hash_sets |= HCLGE_D_IP_BIT;
+ else
+ hash_sets &= ~HCLGE_D_IP_BIT;
+
+ if (nfc->flow_type == SCTP_V4_FLOW || nfc->flow_type == SCTP_V6_FLOW)
+ hash_sets |= HCLGE_V_TAG_BIT;
+
+ return hash_sets;
+}
+
+static int hclge_set_rss_tuple(struct hnae3_handle *handle,
+ struct ethtool_rxnfc *nfc)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_rss_input_tuple_cmd *req;
+ struct hclge_desc desc;
+ u8 tuple_sets;
+ int ret;
+
+ if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3))
+ return -EINVAL;
+
+ req = (struct hclge_rss_input_tuple_cmd *)desc.data;
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "Read rss tuple fail, status = %d\n", ret);
+ return ret;
+ }
+
+ hclge_cmd_reuse_desc(&desc, false);
+
+ tuple_sets = hclge_get_rss_hash_bits(nfc);
+ switch (nfc->flow_type) {
+ case TCP_V4_FLOW:
+ req->ipv4_tcp_en = tuple_sets;
+ break;
+ case TCP_V6_FLOW:
+ req->ipv6_tcp_en = tuple_sets;
+ break;
+ case UDP_V4_FLOW:
+ req->ipv4_udp_en = tuple_sets;
+ break;
+ case UDP_V6_FLOW:
+ req->ipv6_udp_en = tuple_sets;
+ break;
+ case SCTP_V4_FLOW:
+ req->ipv4_sctp_en = tuple_sets;
+ break;
+ case SCTP_V6_FLOW:
+ if ((nfc->data & RXH_L4_B_0_1) ||
+ (nfc->data & RXH_L4_B_2_3))
+ return -EINVAL;
+
+ req->ipv6_sctp_en = tuple_sets;
+ break;
+ case IPV4_FLOW:
+ req->ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+ break;
+ case IPV6_FLOW:
+ req->ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "Set rss tuple fail, status = %d\n", ret);
+
+ return ret;
+}
+
+static int hclge_get_rss_tuple(struct hnae3_handle *handle,
+ struct ethtool_rxnfc *nfc)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_rss_input_tuple_cmd *req;
+ struct hclge_desc desc;
+ u8 tuple_sets;
+ int ret;
+
+ nfc->data = 0;
+
+ req = (struct hclge_rss_input_tuple_cmd *)desc.data;
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RSS_INPUT_TUPLE, true);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "Read rss tuple fail, status = %d\n", ret);
+ return ret;
+ }
+
+ switch (nfc->flow_type) {
+ case TCP_V4_FLOW:
+ tuple_sets = req->ipv4_tcp_en;
+ break;
+ case UDP_V4_FLOW:
+ tuple_sets = req->ipv4_udp_en;
+ break;
+ case TCP_V6_FLOW:
+ tuple_sets = req->ipv6_tcp_en;
+ break;
+ case UDP_V6_FLOW:
+ tuple_sets = req->ipv6_udp_en;
+ break;
+ case SCTP_V4_FLOW:
+ tuple_sets = req->ipv4_sctp_en;
+ break;
+ case SCTP_V6_FLOW:
+ tuple_sets = req->ipv6_sctp_en;
+ break;
+ case IPV4_FLOW:
+ case IPV6_FLOW:
+ tuple_sets = HCLGE_S_IP_BIT | HCLGE_D_IP_BIT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (!tuple_sets)
+ return 0;
+
+ if (tuple_sets & HCLGE_D_PORT_BIT)
+ nfc->data |= RXH_L4_B_2_3;
+ if (tuple_sets & HCLGE_S_PORT_BIT)
+ nfc->data |= RXH_L4_B_0_1;
+ if (tuple_sets & HCLGE_D_IP_BIT)
+ nfc->data |= RXH_IP_DST;
+ if (tuple_sets & HCLGE_S_IP_BIT)
+ nfc->data |= RXH_IP_SRC;
+
+ return 0;
+}
+
static int hclge_get_tc_size(struct hnae3_handle *handle)
{
struct hclge_vport *vport = hclge_get_vport(handle);
@@ -2597,7 +2838,7 @@ static int hclge_get_tc_size(struct hnae3_handle *handle)
return hdev->rss_size_max;
}
-static int hclge_rss_init_hw(struct hclge_dev *hdev)
+int hclge_rss_init_hw(struct hclge_dev *hdev)
{
const u8 hfunc = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
struct hclge_vport *vport = hdev->vport;
@@ -2682,7 +2923,7 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id,
struct hnae3_ring_chain_node *ring_chain)
{
struct hclge_dev *hdev = vport->back;
- struct hclge_ctrl_vector_chain *req;
+ struct hclge_ctrl_vector_chain_cmd *req;
struct hnae3_ring_chain_node *node;
struct hclge_desc desc;
int ret;
@@ -2690,20 +2931,21 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id,
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_ADD_RING_TO_VECTOR, false);
- req = (struct hclge_ctrl_vector_chain *)desc.data;
+ req = (struct hclge_ctrl_vector_chain_cmd *)desc.data;
req->int_vector_id = vector_id;
i = 0;
for (node = ring_chain; node; node = node->next) {
- hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_TYPE_M,
- HCLGE_INT_TYPE_S,
+ u16 type_and_id = 0;
+
+ hnae_set_field(type_and_id, HCLGE_INT_TYPE_M, HCLGE_INT_TYPE_S,
hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
- hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
- HCLGE_TQP_ID_S, node->tqp_index);
- hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M,
+ hnae_set_field(type_and_id, HCLGE_TQP_ID_M, HCLGE_TQP_ID_S,
+ node->tqp_index);
+ hnae_set_field(type_and_id, HCLGE_INT_GL_IDX_M,
HCLGE_INT_GL_IDX_S,
hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
- req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+ req->tqp_type_and_id[i] = cpu_to_le16(type_and_id);
req->vfid = vport->vport_id;
if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
@@ -2739,9 +2981,9 @@ int hclge_map_vport_ring_to_vector(struct hclge_vport *vport, int vector_id,
return 0;
}
-int hclge_map_handle_ring_to_vector(struct hnae3_handle *handle,
- int vector,
- struct hnae3_ring_chain_node *ring_chain)
+static int hclge_map_handle_ring_to_vector(
+ struct hnae3_handle *handle, int vector,
+ struct hnae3_ring_chain_node *ring_chain)
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
@@ -2763,7 +3005,7 @@ static int hclge_unmap_ring_from_vector(
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
- struct hclge_ctrl_vector_chain *req;
+ struct hclge_ctrl_vector_chain_cmd *req;
struct hnae3_ring_chain_node *node;
struct hclge_desc desc;
int i, vector_id;
@@ -2778,21 +3020,22 @@ static int hclge_unmap_ring_from_vector(
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_DEL_RING_TO_VECTOR, false);
- req = (struct hclge_ctrl_vector_chain *)desc.data;
+ req = (struct hclge_ctrl_vector_chain_cmd *)desc.data;
req->int_vector_id = vector_id;
i = 0;
for (node = ring_chain; node; node = node->next) {
- hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_TYPE_M,
- HCLGE_INT_TYPE_S,
+ u16 type_and_id = 0;
+
+ hnae_set_field(type_and_id, HCLGE_INT_TYPE_M, HCLGE_INT_TYPE_S,
hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
- hnae_set_field(req->tqp_type_and_id[i], HCLGE_TQP_ID_M,
- HCLGE_TQP_ID_S, node->tqp_index);
- hnae_set_field(req->tqp_type_and_id[i], HCLGE_INT_GL_IDX_M,
+ hnae_set_field(type_and_id, HCLGE_TQP_ID_M, HCLGE_TQP_ID_S,
+ node->tqp_index);
+ hnae_set_field(type_and_id, HCLGE_INT_GL_IDX_M,
HCLGE_INT_GL_IDX_S,
hnae_get_bit(node->flag, HNAE3_RING_TYPE_B));
- req->tqp_type_and_id[i] = cpu_to_le16(req->tqp_type_and_id[i]);
+ req->tqp_type_and_id[i] = cpu_to_le16(type_and_id);
req->vfid = vport->vport_id;
if (++i >= HCLGE_VECTOR_ELEMENTS_PER_CMD) {
@@ -2830,13 +3073,13 @@ static int hclge_unmap_ring_from_vector(
int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
struct hclge_promisc_param *param)
{
- struct hclge_promisc_cfg *req;
+ struct hclge_promisc_cfg_cmd *req;
struct hclge_desc desc;
int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PROMISC_MODE, false);
- req = (struct hclge_promisc_cfg *)desc.data;
+ req = (struct hclge_promisc_cfg_cmd *)desc.data;
req->vf_id = param->vf_id;
req->flag = (param->enable << HCLGE_PROMISC_EN_B);
@@ -2878,29 +3121,27 @@ static void hclge_set_promisc_mode(struct hnae3_handle *handle, u32 en)
static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
{
struct hclge_desc desc;
- struct hclge_config_mac_mode *req =
- (struct hclge_config_mac_mode *)desc.data;
+ struct hclge_config_mac_mode_cmd *req =
+ (struct hclge_config_mac_mode_cmd *)desc.data;
+ u32 loop_en = 0;
int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, false);
- hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_TX_EN_B, enable);
- hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_RX_EN_B, enable);
- hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_PAD_TX_B, enable);
- hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_PAD_RX_B, enable);
- hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_1588_TX_B, 0);
- hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_1588_RX_B, 0);
- hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_APP_LP_B, 0);
- hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_LINE_LP_B, 0);
- hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_FCS_TX_B, enable);
- hnae_set_bit(req->txrx_pad_fcs_loop_en, HCLGE_MAC_RX_FCS_B, enable);
- hnae_set_bit(req->txrx_pad_fcs_loop_en,
- HCLGE_MAC_RX_FCS_STRIP_B, enable);
- hnae_set_bit(req->txrx_pad_fcs_loop_en,
- HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable);
- hnae_set_bit(req->txrx_pad_fcs_loop_en,
- HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable);
- hnae_set_bit(req->txrx_pad_fcs_loop_en,
- HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable);
+ hnae_set_bit(loop_en, HCLGE_MAC_TX_EN_B, enable);
+ hnae_set_bit(loop_en, HCLGE_MAC_RX_EN_B, enable);
+ hnae_set_bit(loop_en, HCLGE_MAC_PAD_TX_B, enable);
+ hnae_set_bit(loop_en, HCLGE_MAC_PAD_RX_B, enable);
+ hnae_set_bit(loop_en, HCLGE_MAC_1588_TX_B, 0);
+ hnae_set_bit(loop_en, HCLGE_MAC_1588_RX_B, 0);
+ hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0);
+ hnae_set_bit(loop_en, HCLGE_MAC_LINE_LP_B, 0);
+ hnae_set_bit(loop_en, HCLGE_MAC_FCS_TX_B, enable);
+ hnae_set_bit(loop_en, HCLGE_MAC_RX_FCS_B, enable);
+ hnae_set_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B, enable);
+ hnae_set_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B, enable);
+ hnae_set_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B, enable);
+ hnae_set_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B, enable);
+ req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
if (ret)
@@ -2908,12 +3149,65 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
"mac enable fail, ret =%d.\n", ret);
}
+static int hclge_set_loopback(struct hnae3_handle *handle,
+ enum hnae3_loop loop_mode, bool en)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_config_mac_mode_cmd *req;
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_desc desc;
+ u32 loop_en;
+ int ret;
+
+ switch (loop_mode) {
+ case HNAE3_MAC_INTER_LOOP_MAC:
+ req = (struct hclge_config_mac_mode_cmd *)&desc.data[0];
+ /* 1 Read out the MAC mode config at first */
+ hclge_cmd_setup_basic_desc(&desc,
+ HCLGE_OPC_CONFIG_MAC_MODE,
+ true);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "mac loopback get fail, ret =%d.\n",
+ ret);
+ return ret;
+ }
+
+ /* 2 Then setup the loopback flag */
+ loop_en = le32_to_cpu(req->txrx_pad_fcs_loop_en);
+ if (en)
+ hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 1);
+ else
+ hnae_set_bit(loop_en, HCLGE_MAC_APP_LP_B, 0);
+
+ req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
+
+ /* 3 Config mac work mode with loopback flag
+ * and its original configure parameters
+ */
+ hclge_cmd_reuse_desc(&desc, false);
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret)
+ dev_err(&hdev->pdev->dev,
+ "mac loopback set fail, ret =%d.\n", ret);
+ break;
+ default:
+ ret = -ENOTSUPP;
+ dev_err(&hdev->pdev->dev,
+ "loop_mode %d is not supported\n", loop_mode);
+ break;
+ }
+
+ return ret;
+}
+
static int hclge_tqp_enable(struct hclge_dev *hdev, int tqp_id,
int stream_id, bool enable)
{
struct hclge_desc desc;
- struct hclge_cfg_com_tqp_queue *req =
- (struct hclge_cfg_com_tqp_queue *)desc.data;
+ struct hclge_cfg_com_tqp_queue_cmd *req =
+ (struct hclge_cfg_com_tqp_queue_cmd *)desc.data;
int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false);
@@ -2963,7 +3257,7 @@ static int hclge_ae_start(struct hnae3_handle *handle)
/* mac enable */
hclge_cfg_mac_mode(hdev, true);
clear_bit(HCLGE_STATE_DOWN, &hdev->state);
- (void)mod_timer(&hdev->service_timer, jiffies + HZ);
+ mod_timer(&hdev->service_timer, jiffies + HZ);
ret = hclge_mac_start_phy(hdev);
if (ret)
@@ -3077,16 +3371,16 @@ static int hclge_update_desc_vfid(struct hclge_desc *desc, int vfid, bool clr)
word_num = vfid / 32;
bit_num = vfid % 32;
if (clr)
- desc[1].data[word_num] &= ~(1 << bit_num);
+ desc[1].data[word_num] &= cpu_to_le32(~(1 << bit_num));
else
- desc[1].data[word_num] |= (1 << bit_num);
+ desc[1].data[word_num] |= cpu_to_le32(1 << bit_num);
} else {
word_num = (vfid - 192) / 32;
bit_num = vfid % 32;
if (clr)
- desc[2].data[word_num] &= ~(1 << bit_num);
+ desc[2].data[word_num] &= cpu_to_le32(~(1 << bit_num));
else
- desc[2].data[word_num] |= (1 << bit_num);
+ desc[2].data[word_num] |= cpu_to_le32(1 << bit_num);
}
return 0;
@@ -3106,7 +3400,7 @@ static bool hclge_is_all_function_id_zero(struct hclge_desc *desc)
return true;
}
-static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry *new_req,
+static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry_cmd *new_req,
const u8 *addr)
{
const unsigned char *mac_addr = addr;
@@ -3118,8 +3412,8 @@ static void hclge_prepare_mac_addr(struct hclge_mac_vlan_tbl_entry *new_req,
new_req->mac_addr_lo16 = cpu_to_le16(low_val & 0xffff);
}
-u16 hclge_get_mac_addr_to_mta_index(struct hclge_vport *vport,
- const u8 *addr)
+static u16 hclge_get_mac_addr_to_mta_index(struct hclge_vport *vport,
+ const u8 *addr)
{
u16 high_val = addr[1] | (addr[0] << 8);
struct hclge_dev *hdev = vport->back;
@@ -3133,11 +3427,11 @@ static int hclge_set_mta_filter_mode(struct hclge_dev *hdev,
enum hclge_mta_dmac_sel_type mta_mac_sel,
bool enable)
{
- struct hclge_mta_filter_mode *req;
+ struct hclge_mta_filter_mode_cmd *req;
struct hclge_desc desc;
int ret;
- req = (struct hclge_mta_filter_mode *)desc.data;
+ req = (struct hclge_mta_filter_mode_cmd *)desc.data;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_MODE_CFG, false);
hnae_set_bit(req->dmac_sel_en, HCLGE_CFG_MTA_MAC_EN_B,
@@ -3160,11 +3454,11 @@ int hclge_cfg_func_mta_filter(struct hclge_dev *hdev,
u8 func_id,
bool enable)
{
- struct hclge_cfg_func_mta_filter *req;
+ struct hclge_cfg_func_mta_filter_cmd *req;
struct hclge_desc desc;
int ret;
- req = (struct hclge_cfg_func_mta_filter *)desc.data;
+ req = (struct hclge_cfg_func_mta_filter_cmd *)desc.data;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_MAC_FUNC_CFG, false);
hnae_set_bit(req->accept, HCLGE_CFG_FUNC_MTA_ACCEPT_B,
@@ -3187,17 +3481,18 @@ static int hclge_set_mta_table_item(struct hclge_vport *vport,
bool enable)
{
struct hclge_dev *hdev = vport->back;
- struct hclge_cfg_func_mta_item *req;
+ struct hclge_cfg_func_mta_item_cmd *req;
struct hclge_desc desc;
+ u16 item_idx = 0;
int ret;
- req = (struct hclge_cfg_func_mta_item *)desc.data;
+ req = (struct hclge_cfg_func_mta_item_cmd *)desc.data;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MTA_TBL_ITEM_CFG, false);
hnae_set_bit(req->accept, HCLGE_CFG_MTA_ITEM_ACCEPT_B, enable);
- hnae_set_field(req->item_idx, HCLGE_CFG_MTA_ITEM_IDX_M,
+ hnae_set_field(item_idx, HCLGE_CFG_MTA_ITEM_IDX_M,
HCLGE_CFG_MTA_ITEM_IDX_S, idx);
- req->item_idx = cpu_to_le16(req->item_idx);
+ req->item_idx = cpu_to_le16(item_idx);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
if (ret) {
@@ -3211,16 +3506,17 @@ static int hclge_set_mta_table_item(struct hclge_vport *vport,
}
static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport,
- struct hclge_mac_vlan_tbl_entry *req)
+ struct hclge_mac_vlan_tbl_entry_cmd *req)
{
struct hclge_dev *hdev = vport->back;
struct hclge_desc desc;
u8 resp_code;
+ u16 retval;
int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_REMOVE, false);
- memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry));
+ memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry_cmd));
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
if (ret) {
@@ -3229,19 +3525,21 @@ static int hclge_remove_mac_vlan_tbl(struct hclge_vport *vport,
ret);
return ret;
}
- resp_code = (desc.data[0] >> 8) & 0xff;
+ resp_code = (le32_to_cpu(desc.data[0]) >> 8) & 0xff;
+ retval = le16_to_cpu(desc.retval);
- return hclge_get_mac_vlan_cmd_status(vport, desc.retval, resp_code,
+ return hclge_get_mac_vlan_cmd_status(vport, retval, resp_code,
HCLGE_MAC_VLAN_REMOVE);
}
static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport,
- struct hclge_mac_vlan_tbl_entry *req,
+ struct hclge_mac_vlan_tbl_entry_cmd *req,
struct hclge_desc *desc,
bool is_mc)
{
struct hclge_dev *hdev = vport->back;
u8 resp_code;
+ u16 retval;
int ret;
hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_MAC_VLAN_ADD, true);
@@ -3249,7 +3547,7 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport,
desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
memcpy(desc[0].data,
req,
- sizeof(struct hclge_mac_vlan_tbl_entry));
+ sizeof(struct hclge_mac_vlan_tbl_entry_cmd));
hclge_cmd_setup_basic_desc(&desc[1],
HCLGE_OPC_MAC_VLAN_ADD,
true);
@@ -3261,7 +3559,7 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport,
} else {
memcpy(desc[0].data,
req,
- sizeof(struct hclge_mac_vlan_tbl_entry));
+ sizeof(struct hclge_mac_vlan_tbl_entry_cmd));
ret = hclge_cmd_send(&hdev->hw, desc, 1);
}
if (ret) {
@@ -3270,19 +3568,21 @@ static int hclge_lookup_mac_vlan_tbl(struct hclge_vport *vport,
ret);
return ret;
}
- resp_code = (desc[0].data[0] >> 8) & 0xff;
+ resp_code = (le32_to_cpu(desc[0].data[0]) >> 8) & 0xff;
+ retval = le16_to_cpu(desc[0].retval);
- return hclge_get_mac_vlan_cmd_status(vport, desc[0].retval, resp_code,
+ return hclge_get_mac_vlan_cmd_status(vport, retval, resp_code,
HCLGE_MAC_VLAN_LKUP);
}
static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport,
- struct hclge_mac_vlan_tbl_entry *req,
+ struct hclge_mac_vlan_tbl_entry_cmd *req,
struct hclge_desc *mc_desc)
{
struct hclge_dev *hdev = vport->back;
int cfg_status;
u8 resp_code;
+ u16 retval;
int ret;
if (!mc_desc) {
@@ -3291,10 +3591,13 @@ static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport,
hclge_cmd_setup_basic_desc(&desc,
HCLGE_OPC_MAC_VLAN_ADD,
false);
- memcpy(desc.data, req, sizeof(struct hclge_mac_vlan_tbl_entry));
+ memcpy(desc.data, req,
+ sizeof(struct hclge_mac_vlan_tbl_entry_cmd));
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
- resp_code = (desc.data[0] >> 8) & 0xff;
- cfg_status = hclge_get_mac_vlan_cmd_status(vport, desc.retval,
+ resp_code = (le32_to_cpu(desc.data[0]) >> 8) & 0xff;
+ retval = le16_to_cpu(desc.retval);
+
+ cfg_status = hclge_get_mac_vlan_cmd_status(vport, retval,
resp_code,
HCLGE_MAC_VLAN_ADD);
} else {
@@ -3305,11 +3608,12 @@ static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport,
mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_WR);
mc_desc[2].flag &= cpu_to_le16(~HCLGE_CMD_FLAG_NEXT);
memcpy(mc_desc[0].data, req,
- sizeof(struct hclge_mac_vlan_tbl_entry));
+ sizeof(struct hclge_mac_vlan_tbl_entry_cmd));
ret = hclge_cmd_send(&hdev->hw, mc_desc, 3);
- resp_code = (mc_desc[0].data[0] >> 8) & 0xff;
- cfg_status = hclge_get_mac_vlan_cmd_status(vport,
- mc_desc[0].retval,
+ resp_code = (le32_to_cpu(mc_desc[0].data[0]) >> 8) & 0xff;
+ retval = le16_to_cpu(mc_desc[0].retval);
+
+ cfg_status = hclge_get_mac_vlan_cmd_status(vport, retval,
resp_code,
HCLGE_MAC_VLAN_ADD);
}
@@ -3336,8 +3640,9 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
const unsigned char *addr)
{
struct hclge_dev *hdev = vport->back;
- struct hclge_mac_vlan_tbl_entry req;
+ struct hclge_mac_vlan_tbl_entry_cmd req;
enum hclge_cmd_status status;
+ u16 egress_port = 0;
/* mac addr check */
if (is_zero_ether_addr(addr) ||
@@ -3357,15 +3662,15 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
hnae_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT1_EN_B, 0);
hnae_set_bit(req.mc_mac_en, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
- hnae_set_bit(req.egress_port,
- HCLGE_MAC_EPORT_SW_EN_B, 0);
- hnae_set_bit(req.egress_port,
- HCLGE_MAC_EPORT_TYPE_B, 0);
- hnae_set_field(req.egress_port, HCLGE_MAC_EPORT_VFID_M,
+
+ hnae_set_bit(egress_port, HCLGE_MAC_EPORT_SW_EN_B, 0);
+ hnae_set_bit(egress_port, HCLGE_MAC_EPORT_TYPE_B, 0);
+ hnae_set_field(egress_port, HCLGE_MAC_EPORT_VFID_M,
HCLGE_MAC_EPORT_VFID_S, vport->vport_id);
- hnae_set_field(req.egress_port, HCLGE_MAC_EPORT_PFID_M,
+ hnae_set_field(egress_port, HCLGE_MAC_EPORT_PFID_M,
HCLGE_MAC_EPORT_PFID_S, 0);
- req.egress_port = cpu_to_le16(req.egress_port);
+
+ req.egress_port = cpu_to_le16(egress_port);
hclge_prepare_mac_addr(&req, addr);
@@ -3386,7 +3691,7 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
const unsigned char *addr)
{
struct hclge_dev *hdev = vport->back;
- struct hclge_mac_vlan_tbl_entry req;
+ struct hclge_mac_vlan_tbl_entry_cmd req;
enum hclge_cmd_status status;
/* mac addr check */
@@ -3420,7 +3725,7 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport,
const unsigned char *addr)
{
struct hclge_dev *hdev = vport->back;
- struct hclge_mac_vlan_tbl_entry req;
+ struct hclge_mac_vlan_tbl_entry_cmd req;
struct hclge_desc desc[3];
u16 tbl_idx;
int status;
@@ -3471,7 +3776,7 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport,
const unsigned char *addr)
{
struct hclge_dev *hdev = vport->back;
- struct hclge_mac_vlan_tbl_entry req;
+ struct hclge_mac_vlan_tbl_entry_cmd req;
enum hclge_cmd_status status;
struct hclge_desc desc[3];
u16 tbl_idx;
@@ -3554,13 +3859,13 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p)
static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type,
bool filter_en)
{
- struct hclge_vlan_filter_ctrl *req;
+ struct hclge_vlan_filter_ctrl_cmd *req;
struct hclge_desc desc;
int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, false);
- req = (struct hclge_vlan_filter_ctrl *)desc.data;
+ req = (struct hclge_vlan_filter_ctrl_cmd *)desc.data;
req->vlan_type = vlan_type;
req->vlan_fe = filter_en;
@@ -3578,8 +3883,8 @@ int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
bool is_kill, u16 vlan, u8 qos, __be16 proto)
{
#define HCLGE_MAX_VF_BYTES 16
- struct hclge_vlan_filter_vf_cfg *req0;
- struct hclge_vlan_filter_vf_cfg *req1;
+ struct hclge_vlan_filter_vf_cfg_cmd *req0;
+ struct hclge_vlan_filter_vf_cfg_cmd *req1;
struct hclge_desc desc[2];
u8 vf_byte_val;
u8 vf_byte_off;
@@ -3595,10 +3900,10 @@ int hclge_set_vf_vlan_common(struct hclge_dev *hdev, int vfid,
vf_byte_off = vfid / 8;
vf_byte_val = 1 << (vfid % 8);
- req0 = (struct hclge_vlan_filter_vf_cfg *)desc[0].data;
- req1 = (struct hclge_vlan_filter_vf_cfg *)desc[1].data;
+ req0 = (struct hclge_vlan_filter_vf_cfg_cmd *)desc[0].data;
+ req1 = (struct hclge_vlan_filter_vf_cfg_cmd *)desc[1].data;
- req0->vlan_id = vlan;
+ req0->vlan_id = cpu_to_le16(vlan);
req0->vlan_cfg = is_kill;
if (vf_byte_off < HCLGE_MAX_VF_BYTES)
@@ -3639,7 +3944,7 @@ static int hclge_set_port_vlan_filter(struct hnae3_handle *handle,
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
- struct hclge_vlan_filter_pf_cfg *req;
+ struct hclge_vlan_filter_pf_cfg_cmd *req;
struct hclge_desc desc;
u8 vlan_offset_byte_val;
u8 vlan_offset_byte;
@@ -3652,7 +3957,7 @@ static int hclge_set_port_vlan_filter(struct hnae3_handle *handle,
vlan_offset_byte = (vlan_id % 160) / 8;
vlan_offset_byte_val = 1 << (vlan_id % 8);
- req = (struct hclge_vlan_filter_pf_cfg *)desc.data;
+ req = (struct hclge_vlan_filter_pf_cfg_cmd *)desc.data;
req->vlan_offset = vlan_offset_160;
req->vlan_cfg = is_kill;
req->vlan_offset_bitmap[vlan_offset_byte] = vlan_offset_byte_val;
@@ -3714,7 +4019,7 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev)
static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
{
struct hclge_vport *vport = hclge_get_vport(handle);
- struct hclge_config_max_frm_size *req;
+ struct hclge_config_max_frm_size_cmd *req;
struct hclge_dev *hdev = vport->back;
struct hclge_desc desc;
int ret;
@@ -3725,7 +4030,7 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
hdev->mps = new_mtu;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAX_FRM_SIZE, false);
- req = (struct hclge_config_max_frm_size *)desc.data;
+ req = (struct hclge_config_max_frm_size_cmd *)desc.data;
req->max_frm_size = cpu_to_le16(new_mtu);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -3740,13 +4045,13 @@ static int hclge_set_mtu(struct hnae3_handle *handle, int new_mtu)
static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
bool enable)
{
- struct hclge_reset_tqp_queue *req;
+ struct hclge_reset_tqp_queue_cmd *req;
struct hclge_desc desc;
int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, false);
- req = (struct hclge_reset_tqp_queue *)desc.data;
+ req = (struct hclge_reset_tqp_queue_cmd *)desc.data;
req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK);
hnae_set_bit(req->reset_req, HCLGE_TQP_RESET_B, enable);
@@ -3762,13 +4067,13 @@ static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
static int hclge_get_reset_status(struct hclge_dev *hdev, u16 queue_id)
{
- struct hclge_reset_tqp_queue *req;
+ struct hclge_reset_tqp_queue_cmd *req;
struct hclge_desc desc;
int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_RESET_TQP_QUEUE, true);
- req = (struct hclge_reset_tqp_queue *)desc.data;
+ req = (struct hclge_reset_tqp_queue_cmd *)desc.data;
req->tqp_id = cpu_to_le16(queue_id & HCLGE_RING_ID_MASK);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
@@ -4182,8 +4487,9 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
return ret;
}
- setup_timer(&hdev->service_timer, hclge_service_timer,
- (unsigned long)hdev);
+ hclge_dcb_ops_set(hdev);
+
+ timer_setup(&hdev->service_timer, hclge_service_timer, 0);
INIT_WORK(&hdev->service_task, hclge_service_task);
set_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state);
@@ -4210,7 +4516,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
if (IS_ENABLED(CONFIG_PCI_IOV))
hclge_disable_sriov(hdev);
- if (hdev->service_timer.data)
+ if (hdev->service_timer.function)
del_timer_sync(&hdev->service_timer);
if (hdev->service_task.func)
cancel_work_sync(&hdev->service_task);
@@ -4232,6 +4538,7 @@ static const struct hnae3_ae_ops hclge_ops = {
.unmap_ring_from_vector = hclge_unmap_ring_from_vector,
.get_vector = hclge_get_vector,
.set_promisc_mode = hclge_set_promisc_mode,
+ .set_loopback = hclge_set_loopback,
.start = hclge_ae_start,
.stop = hclge_ae_stop,
.get_status = hclge_get_status,
@@ -4243,6 +4550,8 @@ static const struct hnae3_ae_ops hclge_ops = {
.get_rss_indir_size = hclge_get_rss_indir_size,
.get_rss = hclge_get_rss,
.set_rss = hclge_set_rss,
+ .set_rss_tuple = hclge_set_rss_tuple,
+ .get_rss_tuple = hclge_get_rss_tuple,
.get_tc_size = hclge_get_tc_size,
.get_mac_addr = hclge_get_mac_addr,
.set_mac_addr = hclge_set_mac_addr,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 9fcfd9395424..bca4430bb7e7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -32,7 +32,7 @@
#define HCLGE_VECTOR_VF_OFFSET 0x100000
#define HCLGE_RSS_IND_TBL_SIZE 512
-#define HCLGE_RSS_SET_BITMAP_MSK 0xffff
+#define HCLGE_RSS_SET_BITMAP_MSK GENMASK(15, 0)
#define HCLGE_RSS_KEY_SIZE 40
#define HCLGE_RSS_HASH_ALGO_TOEPLITZ 0
#define HCLGE_RSS_HASH_ALGO_SIMPLE 1
@@ -41,6 +41,14 @@
#define HCLGE_RSS_CFG_TBL_NUM \
(HCLGE_RSS_IND_TBL_SIZE / HCLGE_RSS_CFG_TBL_SIZE)
+#define HCLGE_RSS_INPUT_TUPLE_OTHER GENMASK(3, 0)
+#define HCLGE_RSS_INPUT_TUPLE_SCTP GENMASK(4, 0)
+#define HCLGE_D_PORT_BIT BIT(0)
+#define HCLGE_S_PORT_BIT BIT(1)
+#define HCLGE_D_IP_BIT BIT(2)
+#define HCLGE_S_IP_BIT BIT(3)
+#define HCLGE_V_TAG_BIT BIT(4)
+
#define HCLGE_RSS_TC_SIZE_0 1
#define HCLGE_RSS_TC_SIZE_1 2
#define HCLGE_RSS_TC_SIZE_2 4
@@ -65,7 +73,7 @@
#define HCLGE_PHY_CSS_REG 17
#define HCLGE_PHY_MDIX_CTRL_S (5)
-#define HCLGE_PHY_MDIX_CTRL_M (3 << HCLGE_PHY_MDIX_CTRL_S)
+#define HCLGE_PHY_MDIX_CTRL_M GENMASK(6, 5)
#define HCLGE_PHY_MDIX_STATUS_B (6)
#define HCLGE_PHY_SPEED_DUP_RESOLVE_B (11)
@@ -421,8 +429,11 @@ struct hclge_dev {
#define HCLGE_FLAG_TC_BASE_SCH_MODE 1
#define HCLGE_FLAG_VNET_BASE_SCH_MODE 2
u8 tx_sch_mode;
+ u8 tc_max;
+ u8 pfc_max;
u8 default_up;
+ u8 dcbx_cap;
struct hclge_tm_info tm_info;
u16 num_msi;
@@ -459,12 +470,11 @@ struct hclge_dev {
#define HCLGE_FLAG_MAIN 0x00000004
#define HCLGE_FLAG_DCB_CAPABLE 0x00000008
#define HCLGE_FLAG_DCB_ENABLE 0x00000010
+#define HCLGE_FLAG_MQPRIO_ENABLE 0x00000020
u32 flag;
u32 pkt_buf_size; /* Total pf buf size for tx/rx */
u32 mps; /* Max packet size */
- struct hclge_priv_buf *priv_buf;
- struct hclge_shared_buf s_buf;
enum hclge_mta_dmac_sel_type mta_mac_sel_type;
bool enable_mta; /* Mutilcast filter enable */
@@ -517,4 +527,7 @@ static inline int hclge_get_queue_id(struct hnae3_queue *queue)
int hclge_cfg_mac_speed_dup(struct hclge_dev *hdev, int speed, u8 duplex);
int hclge_set_vf_vlan_common(struct hclge_dev *vport, int vfid,
bool is_kill, u16 vlan, u8 qos, __be16 proto);
+
+int hclge_buffer_alloc(struct hclge_dev *hdev);
+int hclge_rss_init_hw(struct hclge_dev *hdev);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 73a75d7cc551..1ae6eae82eb3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -124,6 +124,20 @@ static int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx)
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
+static int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
+ u8 pfc_bitmap)
+{
+ struct hclge_desc desc;
+ struct hclge_pfc_en_cmd *pfc = (struct hclge_pfc_en_cmd *)&desc.data;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_PFC_PAUSE_EN, false);
+
+ pfc->tx_rx_en_bitmap = tx_rx_bitmap;
+ pfc->pri_en_bitmap = pfc_bitmap;
+
+ return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
static int hclge_fill_pri_array(struct hclge_dev *hdev, u8 *pri, u8 pri_id)
{
u8 tc;
@@ -269,6 +283,7 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
struct hclge_pg_shapping_cmd *shap_cfg_cmd;
enum hclge_opcode_type opcode;
struct hclge_desc desc;
+ u32 shapping_para = 0;
opcode = bucket ? HCLGE_OPC_TM_PG_P_SHAPPING :
HCLGE_OPC_TM_PG_C_SHAPPING;
@@ -278,11 +293,41 @@ static int hclge_tm_pg_shapping_cfg(struct hclge_dev *hdev,
shap_cfg_cmd->pg_id = pg_id;
- hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_B, ir_b);
- hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_U, ir_u);
- hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, IR_S, ir_s);
- hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_B, bs_b);
- hclge_tm_set_field(shap_cfg_cmd->pg_shapping_para, BS_S, bs_s);
+ hclge_tm_set_field(shapping_para, IR_B, ir_b);
+ hclge_tm_set_field(shapping_para, IR_U, ir_u);
+ hclge_tm_set_field(shapping_para, IR_S, ir_s);
+ hclge_tm_set_field(shapping_para, BS_B, bs_b);
+ hclge_tm_set_field(shapping_para, BS_S, bs_s);
+
+ shap_cfg_cmd->pg_shapping_para = cpu_to_le32(shapping_para);
+
+ return hclge_cmd_send(&hdev->hw, &desc, 1);
+}
+
+static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
+{
+ struct hclge_port_shapping_cmd *shap_cfg_cmd;
+ struct hclge_desc desc;
+ u32 shapping_para = 0;
+ u8 ir_u, ir_b, ir_s;
+ int ret;
+
+ ret = hclge_shaper_para_calc(HCLGE_ETHER_MAX_RATE,
+ HCLGE_SHAPER_LVL_PORT,
+ &ir_b, &ir_u, &ir_s);
+ if (ret)
+ return ret;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_PORT_SHAPPING, false);
+ shap_cfg_cmd = (struct hclge_port_shapping_cmd *)desc.data;
+
+ hclge_tm_set_field(shapping_para, IR_B, ir_b);
+ hclge_tm_set_field(shapping_para, IR_U, ir_u);
+ hclge_tm_set_field(shapping_para, IR_S, ir_s);
+ hclge_tm_set_field(shapping_para, BS_B, HCLGE_SHAPER_BS_U_DEF);
+ hclge_tm_set_field(shapping_para, BS_S, HCLGE_SHAPER_BS_S_DEF);
+
+ shap_cfg_cmd->port_shapping_para = cpu_to_le32(shapping_para);
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
@@ -295,6 +340,7 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
struct hclge_pri_shapping_cmd *shap_cfg_cmd;
enum hclge_opcode_type opcode;
struct hclge_desc desc;
+ u32 shapping_para = 0;
opcode = bucket ? HCLGE_OPC_TM_PRI_P_SHAPPING :
HCLGE_OPC_TM_PRI_C_SHAPPING;
@@ -305,11 +351,13 @@ static int hclge_tm_pri_shapping_cfg(struct hclge_dev *hdev,
shap_cfg_cmd->pri_id = pri_id;
- hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_B, ir_b);
- hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_U, ir_u);
- hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, IR_S, ir_s);
- hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_B, bs_b);
- hclge_tm_set_field(shap_cfg_cmd->pri_shapping_para, BS_S, bs_s);
+ hclge_tm_set_field(shapping_para, IR_B, ir_b);
+ hclge_tm_set_field(shapping_para, IR_U, ir_u);
+ hclge_tm_set_field(shapping_para, IR_S, ir_s);
+ hclge_tm_set_field(shapping_para, BS_B, bs_b);
+ hclge_tm_set_field(shapping_para, BS_S, bs_s);
+
+ shap_cfg_cmd->pri_shapping_para = cpu_to_le32(shapping_para);
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
@@ -346,13 +394,13 @@ static int hclge_tm_pri_schd_mode_cfg(struct hclge_dev *hdev, u8 pri_id)
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
-static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id)
+static int hclge_tm_qs_schd_mode_cfg(struct hclge_dev *hdev, u16 qs_id, u8 mode)
{
struct hclge_desc desc;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_QS_SCH_MODE_CFG, false);
- if (hdev->tm_info.tc_info[qs_id].tc_sch_mode == HCLGE_SCH_MODE_DWRR)
+ if (mode == HCLGE_SCH_MODE_DWRR)
desc.data[1] = cpu_to_le32(HCLGE_TM_TX_SCHD_DWRR_MSK);
else
desc.data[1] = 0;
@@ -444,7 +492,11 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
hdev->tm_info.prio_tc[i] =
(i >= hdev->tm_info.num_tc) ? 0 : i;
- hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+ /* DCB is enabled if we have more than 1 TC */
+ if (hdev->tm_info.num_tc > 1)
+ hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
+ else
+ hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
}
static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
@@ -470,6 +522,24 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
}
}
+static void hclge_pfc_info_init(struct hclge_dev *hdev)
+{
+ if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) {
+ if (hdev->fc_mode_last_time == HCLGE_FC_PFC)
+ dev_warn(&hdev->pdev->dev,
+ "DCB is disable, but last mode is FC_PFC\n");
+
+ hdev->tm_info.fc_mode = hdev->fc_mode_last_time;
+ } else if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) {
+ /* fc_mode_last_time record the last fc_mode when
+ * DCB is enabled, so that fc_mode can be set to
+ * the correct value when DCB is disabled.
+ */
+ hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
+ hdev->tm_info.fc_mode = HCLGE_FC_PFC;
+ }
+}
+
static int hclge_tm_schd_info_init(struct hclge_dev *hdev)
{
if ((hdev->tx_sch_mode != HCLGE_FLAG_TC_BASE_SCH_MODE) &&
@@ -482,8 +552,7 @@ static int hclge_tm_schd_info_init(struct hclge_dev *hdev)
hclge_tm_vport_info_update(hdev);
- hdev->tm_info.fc_mode = HCLGE_FC_NONE;
- hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
+ hclge_pfc_info_init(hdev);
return 0;
}
@@ -596,17 +665,18 @@ static int hclge_tm_pri_q_qs_cfg(struct hclge_dev *hdev)
{
struct hclge_vport *vport = hdev->vport;
int ret;
- u32 i;
+ u32 i, k;
if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
/* Cfg qs -> pri mapping, one by one mapping */
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
- ret = hclge_tm_qs_to_pri_map_cfg(hdev, i, i);
- if (ret)
- return ret;
- }
+ for (k = 0; k < hdev->num_alloc_vport; k++)
+ for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ ret = hclge_tm_qs_to_pri_map_cfg(
+ hdev, vport[k].qs_offset + i, i);
+ if (ret)
+ return ret;
+ }
} else if (hdev->tx_sch_mode == HCLGE_FLAG_VNET_BASE_SCH_MODE) {
- int k;
/* Cfg qs -> pri mapping, qs = tc, pri = vf, 8 qs -> 1 pri */
for (k = 0; k < hdev->num_alloc_vport; k++)
for (i = 0; i < HNAE3_MAX_TC; i++) {
@@ -755,10 +825,11 @@ static int hclge_tm_pri_shaper_cfg(struct hclge_dev *hdev)
static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
{
+ struct hclge_vport *vport = hdev->vport;
struct hclge_pg_info *pg_info;
u8 dwrr;
int ret;
- u32 i;
+ u32 i, k;
for (i = 0; i < hdev->tm_info.num_tc; i++) {
pg_info =
@@ -769,9 +840,13 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev)
if (ret)
return ret;
- ret = hclge_tm_qs_weight_cfg(hdev, i, dwrr);
- if (ret)
- return ret;
+ for (k = 0; k < hdev->num_alloc_vport; k++) {
+ ret = hclge_tm_qs_weight_cfg(
+ hdev, vport[k].qs_offset + i,
+ vport[k].dwrr);
+ if (ret)
+ return ret;
+ }
}
return 0;
@@ -835,10 +910,14 @@ static int hclge_tm_pri_dwrr_cfg(struct hclge_dev *hdev)
return 0;
}
-static int hclge_tm_map_cfg(struct hclge_dev *hdev)
+int hclge_tm_map_cfg(struct hclge_dev *hdev)
{
int ret;
+ ret = hclge_up_to_tc_map(hdev);
+ if (ret)
+ return ret;
+
ret = hclge_tm_pg_to_pri_map(hdev);
if (ret)
return ret;
@@ -850,6 +929,10 @@ static int hclge_tm_shaper_cfg(struct hclge_dev *hdev)
{
int ret;
+ ret = hclge_tm_port_shaper_cfg(hdev);
+ if (ret)
+ return ret;
+
ret = hclge_tm_pg_shaper_cfg(hdev);
if (ret)
return ret;
@@ -898,7 +981,10 @@ static int hclge_tm_schd_mode_vnet_base_cfg(struct hclge_vport *vport)
return ret;
for (i = 0; i < kinfo->num_tc; i++) {
- ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i);
+ u8 sch_mode = hdev->tm_info.tc_info[i].tc_sch_mode;
+
+ ret = hclge_tm_qs_schd_mode_cfg(hdev, vport->qs_offset + i,
+ sch_mode);
if (ret)
return ret;
}
@@ -910,7 +996,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
{
struct hclge_vport *vport = hdev->vport;
int ret;
- u8 i;
+ u8 i, k;
if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
for (i = 0; i < hdev->tm_info.num_tc; i++) {
@@ -918,9 +1004,13 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
if (ret)
return ret;
- ret = hclge_tm_qs_schd_mode_cfg(hdev, i);
- if (ret)
- return ret;
+ for (k = 0; k < hdev->num_alloc_vport; k++) {
+ ret = hclge_tm_qs_schd_mode_cfg(
+ hdev, vport[k].qs_offset + i,
+ HCLGE_SCH_MODE_DWRR);
+ if (ret)
+ return ret;
+ }
}
} else {
for (i = 0; i < hdev->num_alloc_vport; i++) {
@@ -935,7 +1025,7 @@ static int hclge_tm_lvl34_schd_mode_cfg(struct hclge_dev *hdev)
return 0;
}
-static int hclge_tm_schd_mode_hw(struct hclge_dev *hdev)
+int hclge_tm_schd_mode_hw(struct hclge_dev *hdev)
{
int ret;
@@ -969,27 +1059,109 @@ static int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
return hclge_tm_schd_mode_hw(hdev);
}
+static int hclge_pfc_setup_hw(struct hclge_dev *hdev)
+{
+ u8 enable_bitmap = 0;
+
+ if (hdev->tm_info.fc_mode == HCLGE_FC_PFC)
+ enable_bitmap = HCLGE_TX_MAC_PAUSE_EN_MSK |
+ HCLGE_RX_MAC_PAUSE_EN_MSK;
+
+ return hclge_pfc_pause_en_cfg(hdev, enable_bitmap,
+ hdev->tm_info.hw_pfc_map);
+}
+
+static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
+{
+ bool tx_en, rx_en;
+
+ switch (hdev->tm_info.fc_mode) {
+ case HCLGE_FC_NONE:
+ tx_en = false;
+ rx_en = false;
+ break;
+ case HCLGE_FC_RX_PAUSE:
+ tx_en = false;
+ rx_en = true;
+ break;
+ case HCLGE_FC_TX_PAUSE:
+ tx_en = true;
+ rx_en = false;
+ break;
+ case HCLGE_FC_FULL:
+ tx_en = true;
+ rx_en = true;
+ break;
+ default:
+ tx_en = true;
+ rx_en = true;
+ }
+
+ return hclge_mac_pause_en_cfg(hdev, tx_en, rx_en);
+}
+
int hclge_pause_setup_hw(struct hclge_dev *hdev)
{
- bool en = hdev->tm_info.fc_mode != HCLGE_FC_PFC;
int ret;
u8 i;
- ret = hclge_mac_pause_en_cfg(hdev, en, en);
- if (ret)
- return ret;
+ if (hdev->tm_info.fc_mode != HCLGE_FC_PFC)
+ return hclge_mac_pause_setup_hw(hdev);
- /* Only DCB-supported dev supports qset back pressure setting */
+ /* Only DCB-supported dev supports qset back pressure and pfc cmd */
if (!hnae3_dev_dcb_supported(hdev))
return 0;
+ /* When MAC is GE Mode, hdev does not support pfc setting */
+ ret = hclge_pfc_setup_hw(hdev);
+ if (ret)
+ dev_warn(&hdev->pdev->dev, "set pfc pause failed:%d\n", ret);
+
for (i = 0; i < hdev->tm_info.num_tc; i++) {
ret = hclge_tm_qs_bp_cfg(hdev, i);
if (ret)
return ret;
}
- return hclge_up_to_tc_map(hdev);
+ return 0;
+}
+
+int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc)
+{
+ struct hclge_vport *vport = hdev->vport;
+ struct hnae3_knic_private_info *kinfo;
+ u32 i, k;
+
+ for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
+ if (prio_tc[i] >= hdev->tm_info.num_tc)
+ return -EINVAL;
+ hdev->tm_info.prio_tc[i] = prio_tc[i];
+
+ for (k = 0; k < hdev->num_alloc_vport; k++) {
+ kinfo = &vport[k].nic.kinfo;
+ kinfo->prio_tc[i] = prio_tc[i];
+ }
+ }
+ return 0;
+}
+
+void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc)
+{
+ u8 i, bit_map = 0;
+
+ hdev->tm_info.num_tc = num_tc;
+
+ for (i = 0; i < hdev->tm_info.num_tc; i++)
+ bit_map |= BIT(i);
+
+ if (!bit_map) {
+ bit_map = 1;
+ hdev->tm_info.num_tc = 1;
+ }
+
+ hdev->hw_tc_map = bit_map;
+
+ hclge_tm_schd_info_init(hdev);
}
int hclge_tm_init_hw(struct hclge_dev *hdev)
@@ -1013,8 +1185,13 @@ int hclge_tm_init_hw(struct hclge_dev *hdev)
int hclge_tm_schd_init(struct hclge_dev *hdev)
{
- int ret = hclge_tm_schd_info_init(hdev);
+ int ret;
+
+ /* fc_mode is HCLGE_FC_FULL on reset */
+ hdev->tm_info.fc_mode = HCLGE_FC_FULL;
+ hdev->fc_mode_last_time = hdev->tm_info.fc_mode;
+ ret = hclge_tm_schd_info_init(hdev);
if (ret)
return ret;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index 85158b0d73fe..bf59961918ab 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -94,6 +94,15 @@ struct hclge_bp_to_qs_map_cmd {
u32 rsvd1;
};
+struct hclge_pfc_en_cmd {
+ u8 tx_rx_en_bitmap;
+ u8 pri_en_bitmap;
+};
+
+struct hclge_port_shapping_cmd {
+ __le32 port_shapping_para;
+};
+
#define hclge_tm_set_field(dest, string, val) \
hnae_set_field((dest), (HCLGE_TM_SHAP_##string##_MSK), \
(HCLGE_TM_SHAP_##string##_LSH), val)
@@ -103,4 +112,10 @@ struct hclge_bp_to_qs_map_cmd {
int hclge_tm_schd_init(struct hclge_dev *hdev);
int hclge_pause_setup_hw(struct hclge_dev *hdev);
+int hclge_tm_schd_mode_hw(struct hclge_dev *hdev);
+int hclge_tm_prio_tc_info_update(struct hclge_dev *hdev, u8 *prio_tc);
+void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc);
+int hclge_tm_dwrr_cfg(struct hclge_dev *hdev);
+int hclge_tm_map_cfg(struct hclge_dev *hdev);
+int hclge_tm_init_hw(struct hclge_dev *hdev);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c
new file mode 100644
index 000000000000..925619a7c50a
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_dcbnl.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2016-2017 Hisilicon Limited.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "hnae3.h"
+#include "hns3_enet.h"
+
+static
+int hns3_dcbnl_ieee_getets(struct net_device *ndev, struct ieee_ets *ets)
+{
+ struct hnae3_handle *h = hns3_get_handle(ndev);
+
+ if (h->kinfo.dcb_ops->ieee_getets)
+ return h->kinfo.dcb_ops->ieee_getets(h, ets);
+
+ return -EOPNOTSUPP;
+}
+
+static
+int hns3_dcbnl_ieee_setets(struct net_device *ndev, struct ieee_ets *ets)
+{
+ struct hnae3_handle *h = hns3_get_handle(ndev);
+
+ if (h->kinfo.dcb_ops->ieee_setets)
+ return h->kinfo.dcb_ops->ieee_setets(h, ets);
+
+ return -EOPNOTSUPP;
+}
+
+static
+int hns3_dcbnl_ieee_getpfc(struct net_device *ndev, struct ieee_pfc *pfc)
+{
+ struct hnae3_handle *h = hns3_get_handle(ndev);
+
+ if (h->kinfo.dcb_ops->ieee_getpfc)
+ return h->kinfo.dcb_ops->ieee_getpfc(h, pfc);
+
+ return -EOPNOTSUPP;
+}
+
+static
+int hns3_dcbnl_ieee_setpfc(struct net_device *ndev, struct ieee_pfc *pfc)
+{
+ struct hnae3_handle *h = hns3_get_handle(ndev);
+
+ if (h->kinfo.dcb_ops->ieee_setpfc)
+ return h->kinfo.dcb_ops->ieee_setpfc(h, pfc);
+
+ return -EOPNOTSUPP;
+}
+
+/* DCBX configuration */
+static u8 hns3_dcbnl_getdcbx(struct net_device *ndev)
+{
+ struct hnae3_handle *h = hns3_get_handle(ndev);
+
+ if (h->kinfo.dcb_ops->getdcbx)
+ return h->kinfo.dcb_ops->getdcbx(h);
+
+ return 0;
+}
+
+/* return 0 if successful, otherwise fail */
+static u8 hns3_dcbnl_setdcbx(struct net_device *ndev, u8 mode)
+{
+ struct hnae3_handle *h = hns3_get_handle(ndev);
+
+ if (h->kinfo.dcb_ops->setdcbx)
+ return h->kinfo.dcb_ops->setdcbx(h, mode);
+
+ return 1;
+}
+
+static const struct dcbnl_rtnl_ops hns3_dcbnl_ops = {
+ .ieee_getets = hns3_dcbnl_ieee_getets,
+ .ieee_setets = hns3_dcbnl_ieee_setets,
+ .ieee_getpfc = hns3_dcbnl_ieee_getpfc,
+ .ieee_setpfc = hns3_dcbnl_ieee_setpfc,
+ .getdcbx = hns3_dcbnl_getdcbx,
+ .setdcbx = hns3_dcbnl_setdcbx,
+};
+
+/* hclge_dcbnl_setup - DCBNL setup
+ * @handle: the corresponding vport handle
+ * Set up DCBNL
+ */
+void hns3_dcbnl_setup(struct hnae3_handle *handle)
+{
+ struct net_device *dev = handle->kinfo.netdev;
+
+ if (!handle->kinfo.dcb_ops)
+ return;
+
+ dev->dcbnl_ops = &hns3_dcbnl_ops;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
index 35369e1c8036..8383d6726ae4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.c
@@ -19,12 +19,13 @@
#include <linux/sctp.h>
#include <linux/vermagic.h>
#include <net/gre.h>
+#include <net/pkt_cls.h>
#include <net/vxlan.h>
#include "hnae3.h"
#include "hns3_enet.h"
-const char hns3_driver_name[] = "hns3";
+static const char hns3_driver_name[] = "hns3";
const char hns3_driver_version[] = VERMAGIC_STRING;
static const char hns3_driver_string[] =
"Hisilicon Ethernet Network Driver for Hip08 Family";
@@ -196,6 +197,31 @@ static void hns3_vector_gl_rl_init(struct hns3_enet_tqp_vector *tqp_vector)
tqp_vector->tx_group.flow_level = HNS3_FLOW_LOW;
}
+static int hns3_nic_set_real_num_queue(struct net_device *netdev)
+{
+ struct hnae3_handle *h = hns3_get_handle(netdev);
+ struct hnae3_knic_private_info *kinfo = &h->kinfo;
+ unsigned int queue_size = kinfo->rss_size * kinfo->num_tc;
+ int ret;
+
+ ret = netif_set_real_num_tx_queues(netdev, queue_size);
+ if (ret) {
+ netdev_err(netdev,
+ "netif_set_real_num_tx_queues fail, ret=%d!\n",
+ ret);
+ return ret;
+ }
+
+ ret = netif_set_real_num_rx_queues(netdev, queue_size);
+ if (ret) {
+ netdev_err(netdev,
+ "netif_set_real_num_rx_queues fail, ret=%d!\n", ret);
+ return ret;
+ }
+
+ return 0;
+}
+
static int hns3_nic_net_up(struct net_device *netdev)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -232,26 +258,13 @@ out_start_err:
static int hns3_nic_net_open(struct net_device *netdev)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
int ret;
netif_carrier_off(netdev);
- ret = netif_set_real_num_tx_queues(netdev, h->kinfo.num_tqps);
- if (ret) {
- netdev_err(netdev,
- "netif_set_real_num_tx_queues fail, ret=%d!\n",
- ret);
- return ret;
- }
-
- ret = netif_set_real_num_rx_queues(netdev, h->kinfo.num_tqps);
- if (ret) {
- netdev_err(netdev,
- "netif_set_real_num_rx_queues fail, ret=%d!\n", ret);
+ ret = hns3_nic_set_real_num_queue(netdev);
+ if (ret)
return ret;
- }
ret = hns3_nic_net_up(netdev);
if (ret) {
@@ -292,24 +305,10 @@ static int hns3_nic_net_stop(struct net_device *netdev)
return 0;
}
-void hns3_set_multicast_list(struct net_device *netdev)
-{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
- struct netdev_hw_addr *ha = NULL;
-
- if (h->ae_algo->ops->set_mc_addr) {
- netdev_for_each_mc_addr(ha, netdev)
- if (h->ae_algo->ops->set_mc_addr(h, ha->addr))
- netdev_err(netdev, "set multicast fail\n");
- }
-}
-
static int hns3_nic_uc_sync(struct net_device *netdev,
const unsigned char *addr)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
if (h->ae_algo->ops->add_uc_addr)
return h->ae_algo->ops->add_uc_addr(h, addr);
@@ -320,8 +319,7 @@ static int hns3_nic_uc_sync(struct net_device *netdev,
static int hns3_nic_uc_unsync(struct net_device *netdev,
const unsigned char *addr)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
if (h->ae_algo->ops->rm_uc_addr)
return h->ae_algo->ops->rm_uc_addr(h, addr);
@@ -332,8 +330,7 @@ static int hns3_nic_uc_unsync(struct net_device *netdev,
static int hns3_nic_mc_sync(struct net_device *netdev,
const unsigned char *addr)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
if (h->ae_algo->ops->add_mc_addr)
return h->ae_algo->ops->add_mc_addr(h, addr);
@@ -344,8 +341,7 @@ static int hns3_nic_mc_sync(struct net_device *netdev,
static int hns3_nic_mc_unsync(struct net_device *netdev,
const unsigned char *addr)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
if (h->ae_algo->ops->rm_mc_addr)
return h->ae_algo->ops->rm_mc_addr(h, addr);
@@ -353,10 +349,9 @@ static int hns3_nic_mc_unsync(struct net_device *netdev,
return 0;
}
-void hns3_nic_set_rx_mode(struct net_device *netdev)
+static void hns3_nic_set_rx_mode(struct net_device *netdev)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
if (h->ae_algo->ops->set_promisc_mode) {
if (netdev->flags & IFF_PROMISC)
@@ -755,7 +750,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
if (type == DESC_TYPE_SKB) {
skb = (struct sk_buff *)priv;
- paylen = cpu_to_le16(skb->len);
+ paylen = skb->len;
if (skb->ip_summed == CHECKSUM_PARTIAL) {
skb_reset_mac_len(skb);
@@ -789,7 +784,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
cpu_to_le32(ol_type_vlan_len_msec);
desc->tx.type_cs_vlan_tso_len =
cpu_to_le32(type_cs_vlan_tso);
- desc->tx.paylen = cpu_to_le16(paylen);
+ desc->tx.paylen = cpu_to_le32(paylen);
desc->tx.mss = cpu_to_le16(mss);
}
@@ -905,8 +900,7 @@ static void hns_nic_dma_unmap(struct hns3_enet_ring *ring, int next_to_use_orig)
}
}
-static netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb,
- struct net_device *netdev)
+netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
struct hns3_nic_ring_data *ring_data =
@@ -1012,8 +1006,7 @@ out_net_tx_busy:
static int hns3_nic_net_set_mac_address(struct net_device *netdev, void *p)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
struct sockaddr *mac_addr = p;
int ret;
@@ -1193,61 +1186,80 @@ static void hns3_nic_udp_tunnel_del(struct net_device *netdev,
}
}
-static int hns3_setup_tc(struct net_device *netdev, u8 tc)
+static int hns3_setup_tc(struct net_device *netdev, void *type_data)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
struct hnae3_knic_private_info *kinfo = &h->kinfo;
+ u8 *prio_tc = mqprio_qopt->qopt.prio_tc_map;
+ u8 tc = mqprio_qopt->qopt.num_tc;
+ u16 mode = mqprio_qopt->mode;
+ u8 hw = mqprio_qopt->qopt.hw;
+ bool if_running;
unsigned int i;
int ret;
+ if (!((hw == TC_MQPRIO_HW_OFFLOAD_TCS &&
+ mode == TC_MQPRIO_MODE_CHANNEL) || (!hw && tc == 0)))
+ return -EOPNOTSUPP;
+
if (tc > HNAE3_MAX_TC)
return -EINVAL;
- if (kinfo->num_tc == tc)
- return 0;
-
if (!netdev)
return -EINVAL;
- if (!tc) {
- netdev_reset_tc(netdev);
- return 0;
+ if_running = netif_running(netdev);
+ if (if_running) {
+ hns3_nic_net_stop(netdev);
+ msleep(100);
}
- /* Set num_tc for netdev */
- ret = netdev_set_num_tc(netdev, tc);
+ ret = (kinfo->dcb_ops && kinfo->dcb_ops->setup_tc) ?
+ kinfo->dcb_ops->setup_tc(h, tc, prio_tc) : -EOPNOTSUPP;
if (ret)
- return ret;
+ goto out;
+
+ if (tc <= 1) {
+ netdev_reset_tc(netdev);
+ } else {
+ ret = netdev_set_num_tc(netdev, tc);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ if (!kinfo->tc_info[i].enable)
+ continue;
- /* Set per TC queues for the VSI */
- for (i = 0; i < HNAE3_MAX_TC; i++) {
- if (kinfo->tc_info[i].enable)
netdev_set_tc_queue(netdev,
kinfo->tc_info[i].tc,
kinfo->tc_info[i].tqp_count,
kinfo->tc_info[i].tqp_offset);
+ }
}
- return 0;
+ ret = hns3_nic_set_real_num_queue(netdev);
+
+out:
+ if (if_running)
+ hns3_nic_net_open(netdev);
+
+ return ret;
}
static int hns3_nic_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
- struct tc_mqprio_qopt *mqprio = type_data;
-
if (type != TC_SETUP_MQPRIO)
return -EOPNOTSUPP;
- return hns3_setup_tc(dev, mqprio->num_tc);
+ return hns3_setup_tc(dev, type_data);
}
static int hns3_vlan_rx_add_vid(struct net_device *netdev,
__be16 proto, u16 vid)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
int ret = -EIO;
if (h->ae_algo->ops->set_vlan_filter)
@@ -1259,8 +1271,7 @@ static int hns3_vlan_rx_add_vid(struct net_device *netdev,
static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
__be16 proto, u16 vid)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
int ret = -EIO;
if (h->ae_algo->ops->set_vlan_filter)
@@ -1272,8 +1283,7 @@ static int hns3_vlan_rx_kill_vid(struct net_device *netdev,
static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
u8 qos, __be16 vlan_proto)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
int ret = -EIO;
if (h->ae_algo->ops->set_vf_vlan_filter)
@@ -1285,8 +1295,7 @@ static int hns3_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
static int hns3_nic_change_mtu(struct net_device *netdev, int new_mtu)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
bool if_running = netif_running(netdev);
int ret;
@@ -1933,6 +1942,11 @@ static void hns3_rx_checksum(struct hns3_enet_ring *ring, struct sk_buff *skb,
}
}
+static void hns3_rx_skb(struct hns3_enet_ring *ring, struct sk_buff *skb)
+{
+ napi_gro_receive(&ring->tqp_vector->napi, skb);
+}
+
static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
struct sk_buff **out_skb, int *out_bnum)
{
@@ -2067,7 +2081,9 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
return 0;
}
-static int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget)
+int hns3_clean_rx_ring(
+ struct hns3_enet_ring *ring, int budget,
+ void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *))
{
#define RCB_NOF_ALLOC_RX_BUFF_ONCE 16
struct net_device *netdev = ring->tqp->handle->kinfo.netdev;
@@ -2105,7 +2121,7 @@ static int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget)
/* Do update ip stack process */
skb->protocol = eth_type_trans(skb, netdev);
- (void)napi_gro_receive(&ring->tqp_vector->napi, skb);
+ rx_fn(ring, skb);
recv_pkts++;
}
@@ -2248,7 +2264,8 @@ static int hns3_nic_common_poll(struct napi_struct *napi, int budget)
rx_budget = max(budget / tqp_vector->num_tqps, 1);
hns3_for_each_ring(ring, tqp_vector->rx_group) {
- int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget);
+ int rx_cleaned = hns3_clean_rx_ring(ring, rx_budget,
+ hns3_rx_skb);
if (rx_cleaned >= rx_budget)
clean_complete = false;
@@ -2596,7 +2613,7 @@ static void hns3_fini_ring(struct hns3_enet_ring *ring)
ring->next_to_use = 0;
}
-int hns3_buf_size2type(u32 buf_size)
+static int hns3_buf_size2type(u32 buf_size)
{
int bd_size_type;
@@ -2649,7 +2666,7 @@ static void hns3_init_ring_hw(struct hns3_enet_ring *ring)
}
}
-static int hns3_init_all_ring(struct hns3_nic_priv *priv)
+int hns3_init_all_ring(struct hns3_nic_priv *priv)
{
struct hnae3_handle *h = priv->ae_handle;
int ring_num = h->kinfo.num_tqps * 2;
@@ -2673,12 +2690,12 @@ static int hns3_init_all_ring(struct hns3_nic_priv *priv)
out_when_alloc_ring_memory:
for (j = i - 1; j >= 0; j--)
- hns3_fini_ring(priv->ring_data[i].ring);
+ hns3_fini_ring(priv->ring_data[j].ring);
return -ENOMEM;
}
-static int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
+int hns3_uninit_all_ring(struct hns3_nic_priv *priv)
{
struct hnae3_handle *h = priv->ae_handle;
int i;
@@ -2790,6 +2807,8 @@ static int hns3_client_init(struct hnae3_handle *handle)
goto out_reg_netdev_fail;
}
+ hns3_dcbnl_setup(handle);
+
/* MTU range: (ETH_MIN_MTU(kernel default) - 9706) */
netdev->max_mtu = HNS3_MAX_MTU - (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
@@ -2846,10 +2865,71 @@ static void hns3_link_status_change(struct hnae3_handle *handle, bool linkup)
}
}
-const struct hnae3_client_ops client_ops = {
+static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
+{
+ struct hnae3_knic_private_info *kinfo = &handle->kinfo;
+ struct net_device *ndev = kinfo->netdev;
+ bool if_running;
+ int ret;
+ u8 i;
+
+ if (tc > HNAE3_MAX_TC)
+ return -EINVAL;
+
+ if (!ndev)
+ return -ENODEV;
+
+ if_running = netif_running(ndev);
+
+ ret = netdev_set_num_tc(ndev, tc);
+ if (ret)
+ return ret;
+
+ if (if_running) {
+ (void)hns3_nic_net_stop(ndev);
+ msleep(100);
+ }
+
+ ret = (kinfo->dcb_ops && kinfo->dcb_ops->map_update) ?
+ kinfo->dcb_ops->map_update(handle) : -EOPNOTSUPP;
+ if (ret)
+ goto err_out;
+
+ if (tc <= 1) {
+ netdev_reset_tc(ndev);
+ goto out;
+ }
+
+ for (i = 0; i < HNAE3_MAX_TC; i++) {
+ struct hnae3_tc_info *tc_info = &kinfo->tc_info[i];
+
+ if (tc_info->enable)
+ netdev_set_tc_queue(ndev,
+ tc_info->tc,
+ tc_info->tqp_count,
+ tc_info->tqp_offset);
+ }
+
+ for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) {
+ netdev_set_prio_tc_map(ndev, i,
+ kinfo->prio_tc[i]);
+ }
+
+out:
+ ret = hns3_nic_set_real_num_queue(ndev);
+
+err_out:
+ if (if_running)
+ (void)hns3_nic_net_open(ndev);
+
+ return ret;
+}
+
+static const struct hnae3_client_ops client_ops = {
.init_instance = hns3_client_init,
.uninit_instance = hns3_client_uninit,
.link_status_change = hns3_link_status_change,
+ .setup_tc = hns3_client_setup_tc,
};
/* hns3_init_module - Driver registration routine
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
index 7e8746189747..6228b2603d93 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_enet.h
@@ -76,6 +76,8 @@ enum hns3_nic_state {
#define HNS3_RING_NAME_LEN 16
#define HNS3_BUFFER_SIZE_2048 2048
#define HNS3_RING_MAX_PENDING 32768
+#define HNS3_RING_MIN_PENDING 8
+#define HNS3_RING_BD_MULTIPLE 8
#define HNS3_MAX_MTU 9728
#define HNS3_BD_SIZE_512_TYPE 0
@@ -587,7 +589,23 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value)
#define hns3_for_each_ring(pos, head) \
for (pos = (head).ring; pos; pos = pos->next)
+#define hns3_get_handle(ndev) \
+ (((struct hns3_nic_priv *)netdev_priv(ndev))->ae_handle)
+
void hns3_ethtool_set_ops(struct net_device *netdev);
int hns3_clean_tx_ring(struct hns3_enet_ring *ring, int budget);
+int hns3_init_all_ring(struct hns3_nic_priv *priv);
+int hns3_uninit_all_ring(struct hns3_nic_priv *priv);
+netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev);
+int hns3_clean_rx_ring(
+ struct hns3_enet_ring *ring, int budget,
+ void (*rx_fn)(struct hns3_enet_ring *, struct sk_buff *));
+
+#ifdef CONFIG_HNS3_DCB
+void hns3_dcbnl_setup(struct hnae3_handle *handle);
+#else
+static inline void hns3_dcbnl_setup(struct hnae3_handle *handle) {}
+#endif
+
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
index d636399232fb..6c469e49a04f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hns3_ethtool.c
@@ -59,6 +59,16 @@ static const struct hns3_stats hns3_rxq_stats[] = {
#define HNS3_TQP_STATS_COUNT (HNS3_TXQ_STATS_COUNT + HNS3_RXQ_STATS_COUNT)
+#define HNS3_SELF_TEST_TPYE_NUM 1
+#define HNS3_NIC_LB_TEST_PKT_NUM 1
+#define HNS3_NIC_LB_TEST_RING_ID 0
+#define HNS3_NIC_LB_TEST_PACKET_SIZE 128
+
+/* Nic loopback test err */
+#define HNS3_NIC_LB_TEST_NO_MEM_ERR 1
+#define HNS3_NIC_LB_TEST_TX_CNT_ERR 2
+#define HNS3_NIC_LB_TEST_RX_CNT_ERR 3
+
struct hns3_link_mode_mapping {
u32 hns3_link_mode;
u32 ethtool_link_mode;
@@ -77,6 +87,268 @@ static const struct hns3_link_mode_mapping hns3_lm_map[] = {
{HNS3_LM_1000BASET_FULL_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT},
};
+static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop)
+{
+ struct hnae3_handle *h = hns3_get_handle(ndev);
+ int ret;
+
+ if (!h->ae_algo->ops->set_loopback ||
+ !h->ae_algo->ops->set_promisc_mode)
+ return -EOPNOTSUPP;
+
+ switch (loop) {
+ case HNAE3_MAC_INTER_LOOP_MAC:
+ ret = h->ae_algo->ops->set_loopback(h, loop, true);
+ break;
+ case HNAE3_MAC_LOOP_NONE:
+ ret = h->ae_algo->ops->set_loopback(h,
+ HNAE3_MAC_INTER_LOOP_MAC, false);
+ break;
+ default:
+ ret = -ENOTSUPP;
+ break;
+ }
+
+ if (ret)
+ return ret;
+
+ if (loop == HNAE3_MAC_LOOP_NONE)
+ h->ae_algo->ops->set_promisc_mode(h, ndev->flags & IFF_PROMISC);
+ else
+ h->ae_algo->ops->set_promisc_mode(h, 1);
+
+ return ret;
+}
+
+static int hns3_lp_up(struct net_device *ndev, enum hnae3_loop loop_mode)
+{
+ struct hnae3_handle *h = hns3_get_handle(ndev);
+ int ret;
+
+ if (!h->ae_algo->ops->start)
+ return -EOPNOTSUPP;
+
+ ret = h->ae_algo->ops->start(h);
+ if (ret) {
+ netdev_err(ndev,
+ "hns3_lb_up ae start return error: %d\n", ret);
+ return ret;
+ }
+
+ ret = hns3_lp_setup(ndev, loop_mode);
+ usleep_range(10000, 20000);
+
+ return ret;
+}
+
+static int hns3_lp_down(struct net_device *ndev)
+{
+ struct hnae3_handle *h = hns3_get_handle(ndev);
+ int ret;
+
+ if (!h->ae_algo->ops->stop)
+ return -EOPNOTSUPP;
+
+ ret = hns3_lp_setup(ndev, HNAE3_MAC_LOOP_NONE);
+ if (ret) {
+ netdev_err(ndev, "lb_setup return error: %d\n", ret);
+ return ret;
+ }
+
+ h->ae_algo->ops->stop(h);
+ usleep_range(10000, 20000);
+
+ return 0;
+}
+
+static void hns3_lp_setup_skb(struct sk_buff *skb)
+{
+ struct net_device *ndev = skb->dev;
+ unsigned char *packet;
+ struct ethhdr *ethh;
+ unsigned int i;
+
+ skb_reserve(skb, NET_IP_ALIGN);
+ ethh = skb_put(skb, sizeof(struct ethhdr));
+ packet = skb_put(skb, HNS3_NIC_LB_TEST_PACKET_SIZE);
+
+ memcpy(ethh->h_dest, ndev->dev_addr, ETH_ALEN);
+ eth_zero_addr(ethh->h_source);
+ ethh->h_proto = htons(ETH_P_ARP);
+ skb_reset_mac_header(skb);
+
+ for (i = 0; i < HNS3_NIC_LB_TEST_PACKET_SIZE; i++)
+ packet[i] = (unsigned char)(i & 0xff);
+}
+
+static void hns3_lb_check_skb_data(struct hns3_enet_ring *ring,
+ struct sk_buff *skb)
+{
+ struct hns3_enet_tqp_vector *tqp_vector = ring->tqp_vector;
+ unsigned char *packet = skb->data;
+ u32 i;
+
+ for (i = 0; i < skb->len; i++)
+ if (packet[i] != (unsigned char)(i & 0xff))
+ break;
+
+ /* The packet is correctly received */
+ if (i == skb->len)
+ tqp_vector->rx_group.total_packets++;
+ else
+ print_hex_dump(KERN_ERR, "selftest:", DUMP_PREFIX_OFFSET, 16, 1,
+ skb->data, skb->len, true);
+
+ dev_kfree_skb_any(skb);
+}
+
+static u32 hns3_lb_check_rx_ring(struct hns3_nic_priv *priv, u32 budget)
+{
+ struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_knic_private_info *kinfo;
+ u32 i, rcv_good_pkt_total = 0;
+
+ kinfo = &h->kinfo;
+ for (i = kinfo->num_tqps; i < kinfo->num_tqps * 2; i++) {
+ struct hns3_enet_ring *ring = priv->ring_data[i].ring;
+ struct hns3_enet_ring_group *rx_group;
+ u64 pre_rx_pkt;
+
+ rx_group = &ring->tqp_vector->rx_group;
+ pre_rx_pkt = rx_group->total_packets;
+
+ hns3_clean_rx_ring(ring, budget, hns3_lb_check_skb_data);
+
+ rcv_good_pkt_total += (rx_group->total_packets - pre_rx_pkt);
+ rx_group->total_packets = pre_rx_pkt;
+ }
+ return rcv_good_pkt_total;
+}
+
+static void hns3_lb_clear_tx_ring(struct hns3_nic_priv *priv, u32 start_ringid,
+ u32 end_ringid, u32 budget)
+{
+ u32 i;
+
+ for (i = start_ringid; i <= end_ringid; i++) {
+ struct hns3_enet_ring *ring = priv->ring_data[i].ring;
+
+ hns3_clean_tx_ring(ring, budget);
+ }
+}
+
+/**
+ * hns3_lp_run_test - run loopback test
+ * @ndev: net device
+ * @mode: loopback type
+ */
+static int hns3_lp_run_test(struct net_device *ndev, enum hnae3_loop mode)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct sk_buff *skb;
+ u32 i, good_cnt;
+ int ret_val = 0;
+
+ skb = alloc_skb(HNS3_NIC_LB_TEST_PACKET_SIZE + ETH_HLEN + NET_IP_ALIGN,
+ GFP_KERNEL);
+ if (!skb)
+ return HNS3_NIC_LB_TEST_NO_MEM_ERR;
+
+ skb->dev = ndev;
+ hns3_lp_setup_skb(skb);
+ skb->queue_mapping = HNS3_NIC_LB_TEST_RING_ID;
+
+ good_cnt = 0;
+ for (i = 0; i < HNS3_NIC_LB_TEST_PKT_NUM; i++) {
+ netdev_tx_t tx_ret;
+
+ skb_get(skb);
+ tx_ret = hns3_nic_net_xmit(skb, ndev);
+ if (tx_ret == NETDEV_TX_OK)
+ good_cnt++;
+ else
+ netdev_err(ndev, "hns3_lb_run_test xmit failed: %d\n",
+ tx_ret);
+ }
+ if (good_cnt != HNS3_NIC_LB_TEST_PKT_NUM) {
+ ret_val = HNS3_NIC_LB_TEST_TX_CNT_ERR;
+ netdev_err(ndev, "mode %d sent fail, cnt=0x%x, budget=0x%x\n",
+ mode, good_cnt, HNS3_NIC_LB_TEST_PKT_NUM);
+ goto out;
+ }
+
+ /* Allow 200 milliseconds for packets to go from Tx to Rx */
+ msleep(200);
+
+ good_cnt = hns3_lb_check_rx_ring(priv, HNS3_NIC_LB_TEST_PKT_NUM);
+ if (good_cnt != HNS3_NIC_LB_TEST_PKT_NUM) {
+ ret_val = HNS3_NIC_LB_TEST_RX_CNT_ERR;
+ netdev_err(ndev, "mode %d recv fail, cnt=0x%x, budget=0x%x\n",
+ mode, good_cnt, HNS3_NIC_LB_TEST_PKT_NUM);
+ }
+
+out:
+ hns3_lb_clear_tx_ring(priv, HNS3_NIC_LB_TEST_RING_ID,
+ HNS3_NIC_LB_TEST_RING_ID,
+ HNS3_NIC_LB_TEST_PKT_NUM);
+
+ kfree_skb(skb);
+ return ret_val;
+}
+
+/**
+ * hns3_nic_self_test - self test
+ * @ndev: net device
+ * @eth_test: test cmd
+ * @data: test result
+ */
+static void hns3_self_test(struct net_device *ndev,
+ struct ethtool_test *eth_test, u64 *data)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+ int st_param[HNS3_SELF_TEST_TPYE_NUM][2];
+ bool if_running = netif_running(ndev);
+ int test_index = 0;
+ u32 i;
+
+ /* Only do offline selftest, or pass by default */
+ if (eth_test->flags != ETH_TEST_FL_OFFLINE)
+ return;
+
+ st_param[HNAE3_MAC_INTER_LOOP_MAC][0] = HNAE3_MAC_INTER_LOOP_MAC;
+ st_param[HNAE3_MAC_INTER_LOOP_MAC][1] =
+ h->flags & HNAE3_SUPPORT_MAC_LOOPBACK;
+
+ if (if_running)
+ dev_close(ndev);
+
+ set_bit(HNS3_NIC_STATE_TESTING, &priv->state);
+
+ for (i = 0; i < HNS3_SELF_TEST_TPYE_NUM; i++) {
+ enum hnae3_loop loop_type = (enum hnae3_loop)st_param[i][0];
+
+ if (!st_param[i][1])
+ continue;
+
+ data[test_index] = hns3_lp_up(ndev, loop_type);
+ if (!data[test_index]) {
+ data[test_index] = hns3_lp_run_test(ndev, loop_type);
+ hns3_lp_down(ndev);
+ }
+
+ if (data[test_index])
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ test_index++;
+ }
+
+ clear_bit(HNS3_NIC_STATE_TESTING, &priv->state);
+
+ if (if_running)
+ dev_open(ndev);
+}
+
static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd,
bool is_advertised)
{
@@ -102,8 +374,7 @@ static void hns3_driv_to_eth_caps(u32 caps, struct ethtool_link_ksettings *cmd,
static int hns3_get_sset_count(struct net_device *netdev, int stringset)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
const struct hnae3_ae_ops *ops = h->ae_algo->ops;
if (!ops->get_sset_count)
@@ -164,8 +435,7 @@ static u8 *hns3_get_strings_tqps(struct hnae3_handle *handle, u8 *data)
static void hns3_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
const struct hnae3_ae_ops *ops = h->ae_algo->ops;
char *buff = (char *)data;
@@ -217,11 +487,10 @@ static u64 *hns3_get_stats_tqps(struct hnae3_handle *handle, u64 *data)
* @stats: statistics info.
* @data: statistics data.
*/
-void hns3_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
- u64 *data)
+static void hns3_get_stats(struct net_device *netdev,
+ struct ethtool_stats *stats, u64 *data)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
u64 *p = data;
if (!h->ae_algo->ops->get_stats || !h->ae_algo->ops->update_stats) {
@@ -262,10 +531,7 @@ static void hns3_get_drvinfo(struct net_device *netdev,
static u32 hns3_get_link(struct net_device *netdev)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h;
-
- h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_status)
return h->ae_algo->ops->get_status(h);
@@ -277,7 +543,8 @@ static void hns3_get_ringparam(struct net_device *netdev,
struct ethtool_ringparam *param)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
- int queue_num = priv->ae_handle->kinfo.num_tqps;
+ struct hnae3_handle *h = priv->ae_handle;
+ int queue_num = h->kinfo.num_tqps;
param->tx_max_pending = HNS3_RING_MAX_PENDING;
param->rx_max_pending = HNS3_RING_MAX_PENDING;
@@ -289,8 +556,7 @@ static void hns3_get_ringparam(struct net_device *netdev,
static void hns3_get_pauseparam(struct net_device *netdev,
struct ethtool_pauseparam *param)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
if (h->ae_algo && h->ae_algo->ops && h->ae_algo->ops->get_pauseparam)
h->ae_algo->ops->get_pauseparam(h, &param->autoneg,
@@ -300,8 +566,7 @@ static void hns3_get_pauseparam(struct net_device *netdev,
static int hns3_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *cmd)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
u32 supported_caps;
u32 advertised_caps;
u8 media_type = HNAE3_MEDIA_TYPE_UNKNOWN;
@@ -392,8 +657,7 @@ static int hns3_get_link_ksettings(struct net_device *netdev,
static u32 hns3_get_rss_key_size(struct net_device *netdev)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
if (!h->ae_algo || !h->ae_algo->ops ||
!h->ae_algo->ops->get_rss_key_size)
@@ -404,8 +668,7 @@ static u32 hns3_get_rss_key_size(struct net_device *netdev)
static u32 hns3_get_rss_indir_size(struct net_device *netdev)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
if (!h->ae_algo || !h->ae_algo->ops ||
!h->ae_algo->ops->get_rss_indir_size)
@@ -417,8 +680,7 @@ static u32 hns3_get_rss_indir_size(struct net_device *netdev)
static int hns3_get_rss(struct net_device *netdev, u32 *indir, u8 *key,
u8 *hfunc)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_rss)
return -EOPNOTSUPP;
@@ -429,8 +691,7 @@ static int hns3_get_rss(struct net_device *netdev, u32 *indir, u8 *key,
static int hns3_set_rss(struct net_device *netdev, const u32 *indir,
const u8 *key, const u8 hfunc)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss)
return -EOPNOTSUPP;
@@ -454,16 +715,17 @@ static int hns3_get_rxnfc(struct net_device *netdev,
struct ethtool_rxnfc *cmd,
u32 *rule_locs)
{
- struct hns3_nic_priv *priv = netdev_priv(netdev);
- struct hnae3_handle *h = priv->ae_handle;
+ struct hnae3_handle *h = hns3_get_handle(netdev);
if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->get_tc_size)
return -EOPNOTSUPP;
switch (cmd->cmd) {
case ETHTOOL_GRXRINGS:
- cmd->data = h->ae_algo->ops->get_tc_size(h);
+ cmd->data = h->kinfo.num_tc * h->kinfo.rss_size;
break;
+ case ETHTOOL_GRXFH:
+ return h->ae_algo->ops->get_rss_tuple(h, cmd);
default:
return -EOPNOTSUPP;
}
@@ -471,15 +733,109 @@ static int hns3_get_rxnfc(struct net_device *netdev,
return 0;
}
+static int hns3_change_all_ring_bd_num(struct hns3_nic_priv *priv,
+ u32 new_desc_num)
+{
+ struct hnae3_handle *h = priv->ae_handle;
+ int i;
+
+ h->kinfo.num_desc = new_desc_num;
+
+ for (i = 0; i < h->kinfo.num_tqps * 2; i++)
+ priv->ring_data[i].ring->desc_num = new_desc_num;
+
+ return hns3_init_all_ring(priv);
+}
+
+static int hns3_set_ringparam(struct net_device *ndev,
+ struct ethtool_ringparam *param)
+{
+ struct hns3_nic_priv *priv = netdev_priv(ndev);
+ struct hnae3_handle *h = priv->ae_handle;
+ bool if_running = netif_running(ndev);
+ u32 old_desc_num, new_desc_num;
+ int ret;
+
+ if (param->rx_mini_pending || param->rx_jumbo_pending)
+ return -EINVAL;
+
+ if (param->tx_pending != param->rx_pending) {
+ netdev_err(ndev,
+ "Descriptors of tx and rx must be equal");
+ return -EINVAL;
+ }
+
+ if (param->tx_pending > HNS3_RING_MAX_PENDING ||
+ param->tx_pending < HNS3_RING_MIN_PENDING) {
+ netdev_err(ndev,
+ "Descriptors requested (Tx/Rx: %d) out of range [%d-%d]\n",
+ param->tx_pending, HNS3_RING_MIN_PENDING,
+ HNS3_RING_MAX_PENDING);
+ return -EINVAL;
+ }
+
+ new_desc_num = param->tx_pending;
+
+ /* Hardware requires that its descriptors must be multiple of eight */
+ new_desc_num = ALIGN(new_desc_num, HNS3_RING_BD_MULTIPLE);
+ old_desc_num = h->kinfo.num_desc;
+ if (old_desc_num == new_desc_num)
+ return 0;
+
+ netdev_info(ndev,
+ "Changing descriptor count from %d to %d.\n",
+ old_desc_num, new_desc_num);
+
+ if (if_running)
+ dev_close(ndev);
+
+ ret = hns3_uninit_all_ring(priv);
+ if (ret)
+ return ret;
+
+ ret = hns3_change_all_ring_bd_num(priv, new_desc_num);
+ if (ret) {
+ ret = hns3_change_all_ring_bd_num(priv, old_desc_num);
+ if (ret) {
+ netdev_err(ndev,
+ "Revert to old bd num fail, ret=%d.\n", ret);
+ return ret;
+ }
+ }
+
+ if (if_running)
+ ret = dev_open(ndev);
+
+ return ret;
+}
+
+static int hns3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+ struct hnae3_handle *h = hns3_get_handle(netdev);
+
+ if (!h->ae_algo || !h->ae_algo->ops || !h->ae_algo->ops->set_rss_tuple)
+ return -EOPNOTSUPP;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXFH:
+ return h->ae_algo->ops->set_rss_tuple(h, cmd);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct ethtool_ops hns3_ethtool_ops = {
+ .self_test = hns3_self_test,
.get_drvinfo = hns3_get_drvinfo,
.get_link = hns3_get_link,
.get_ringparam = hns3_get_ringparam,
+ .set_ringparam = hns3_set_ringparam,
.get_pauseparam = hns3_get_pauseparam,
.get_strings = hns3_get_strings,
.get_ethtool_stats = hns3_get_stats,
.get_sset_count = hns3_get_sset_count,
.get_rxnfc = hns3_get_rxnfc,
+ .set_rxnfc = hns3_set_rxnfc,
.get_rxfh_key_size = hns3_get_rss_key_size,
.get_rxfh_indir_size = hns3_get_rss_indir_size,
.get_rxfh = hns3_get_rss,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 1d4f712b15a8..e2e5cdc7119c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -26,6 +26,7 @@
#include <linux/skbuff.h>
#include <linux/dma-mapping.h>
#include <linux/prefetch.h>
+#include <linux/cpumask.h>
#include <asm/barrier.h>
#include "hinic_common.h"
@@ -171,11 +172,10 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq)
struct hinic_sge sge;
dma_addr_t dma_addr;
struct sk_buff *skb;
- int i, alloc_more;
u16 prod_idx;
+ int i;
free_wqebbs = hinic_get_rq_free_wqebbs(rxq->rq);
- alloc_more = 0;
/* Limit the allocation chunks */
if (free_wqebbs > nic_dev->rx_weight)
@@ -185,7 +185,6 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq)
skb = rx_alloc_skb(rxq, &dma_addr);
if (!skb) {
netdev_err(rxq->netdev, "Failed to alloc Rx skb\n");
- alloc_more = 1;
goto skb_out;
}
@@ -195,7 +194,6 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq)
&prod_idx);
if (!rq_wqe) {
rx_free_skb(rxq, skb, dma_addr);
- alloc_more = 1;
goto skb_out;
}
@@ -211,9 +209,7 @@ skb_out:
hinic_rq_update(rxq->rq, prod_idx);
}
- if (alloc_more)
- tasklet_schedule(&rxq->rx_task);
-
+ tasklet_schedule(&rxq->rx_task);
return i;
}
@@ -357,7 +353,7 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget)
}
if (pkts)
- tasklet_schedule(&rxq->rx_task); /* hinic_rx_alloc_pkts */
+ tasklet_schedule(&rxq->rx_task); /* rx_alloc_pkts */
u64_stats_update_begin(&rxq->rxq_stats.syncp);
rxq->rxq_stats.pkts += pkts;
@@ -417,6 +413,8 @@ static int rx_request_irq(struct hinic_rxq *rxq)
struct hinic_dev *nic_dev = netdev_priv(rxq->netdev);
struct hinic_hwdev *hwdev = nic_dev->hwdev;
struct hinic_rq *rq = rxq->rq;
+ struct hinic_qp *qp;
+ struct cpumask mask;
int err;
rx_add_napi(rxq);
@@ -432,7 +430,9 @@ static int rx_request_irq(struct hinic_rxq *rxq)
return err;
}
- return 0;
+ qp = container_of(rq, struct hinic_qp, rq);
+ cpumask_set_cpu(qp->q_id % num_online_cpus(), &mask);
+ return irq_set_affinity_hint(rq->irq, &mask);
}
static void rx_free_irq(struct hinic_rxq *rxq)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index abe3e38cd342..9128858479c4 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -212,10 +212,19 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
sq_wqe = hinic_sq_get_wqe(txq->sq, wqe_size, &prod_idx);
if (!sq_wqe) {
- tx_unmap_skb(nic_dev, skb, txq->sges);
-
netif_stop_subqueue(netdev, qp->q_id);
+ /* Check for the case free_tx_poll is called in another cpu
+ * and we stopped the subqueue after free_tx_poll check.
+ */
+ sq_wqe = hinic_sq_get_wqe(txq->sq, wqe_size, &prod_idx);
+ if (sq_wqe) {
+ netif_wake_subqueue(nic_dev->netdev, qp->q_id);
+ goto process_sq_wqe;
+ }
+
+ tx_unmap_skb(nic_dev, skb, txq->sges);
+
u64_stats_update_begin(&txq->txq_stats.syncp);
txq->txq_stats.tx_busy++;
u64_stats_update_end(&txq->txq_stats.syncp);
@@ -223,6 +232,7 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
goto flush_skbs;
}
+process_sq_wqe:
hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges);
hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index c66abd476023..11eba8277132 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -553,6 +553,10 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter)
if (rc)
return rc;
+ rc = reset_long_term_buff(adapter, &tx_pool->tso_ltb);
+ if (rc)
+ return rc;
+
memset(tx_pool->tx_buff, 0,
adapter->req_tx_entries_per_subcrq *
sizeof(struct ibmvnic_tx_buff));
@@ -562,6 +566,7 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter)
tx_pool->consumer_index = 0;
tx_pool->producer_index = 0;
+ tx_pool->tso_index = 0;
}
return 0;
@@ -581,6 +586,7 @@ static void release_tx_pools(struct ibmvnic_adapter *adapter)
tx_pool = &adapter->tx_pool[i];
kfree(tx_pool->tx_buff);
free_long_term_buff(adapter, &tx_pool->long_term_buff);
+ free_long_term_buff(adapter, &tx_pool->tso_ltb);
kfree(tx_pool->free_map);
}
@@ -625,6 +631,16 @@ static int init_tx_pools(struct net_device *netdev)
return -1;
}
+ /* alloc TSO ltb */
+ if (alloc_long_term_buff(adapter, &tx_pool->tso_ltb,
+ IBMVNIC_TSO_BUFS *
+ IBMVNIC_TSO_BUF_SZ)) {
+ release_tx_pools(adapter);
+ return -1;
+ }
+
+ tx_pool->tso_index = 0;
+
tx_pool->free_map = kcalloc(adapter->req_tx_entries_per_subcrq,
sizeof(int), GFP_KERNEL);
if (!tx_pool->free_map) {
@@ -927,6 +943,7 @@ static int ibmvnic_open(struct net_device *netdev)
}
rc = __ibmvnic_open(netdev);
+ netif_carrier_on(netdev);
mutex_unlock(&adapter->reset_lock);
return rc;
@@ -1200,11 +1217,41 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
be32_to_cpu(adapter->login_rsp_buf->off_txsubm_subcrqs));
index = tx_pool->free_map[tx_pool->consumer_index];
- offset = index * adapter->req_mtu;
- dst = tx_pool->long_term_buff.buff + offset;
- memset(dst, 0, adapter->req_mtu);
- skb_copy_from_linear_data(skb, dst, skb->len);
- data_dma_addr = tx_pool->long_term_buff.addr + offset;
+
+ if (skb_is_gso(skb)) {
+ offset = tx_pool->tso_index * IBMVNIC_TSO_BUF_SZ;
+ dst = tx_pool->tso_ltb.buff + offset;
+ memset(dst, 0, IBMVNIC_TSO_BUF_SZ);
+ data_dma_addr = tx_pool->tso_ltb.addr + offset;
+ tx_pool->tso_index++;
+ if (tx_pool->tso_index == IBMVNIC_TSO_BUFS)
+ tx_pool->tso_index = 0;
+ } else {
+ offset = index * adapter->req_mtu;
+ dst = tx_pool->long_term_buff.buff + offset;
+ memset(dst, 0, adapter->req_mtu);
+ data_dma_addr = tx_pool->long_term_buff.addr + offset;
+ }
+
+ if (skb_shinfo(skb)->nr_frags) {
+ int cur, i;
+
+ /* Copy the head */
+ skb_copy_from_linear_data(skb, dst, skb_headlen(skb));
+ cur = skb_headlen(skb);
+
+ /* Copy the frags */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ memcpy(dst + cur,
+ page_address(skb_frag_page(frag)) +
+ frag->page_offset, skb_frag_size(frag));
+ cur += skb_frag_size(frag);
+ }
+ } else {
+ skb_copy_from_linear_data(skb, dst, skb->len);
+ }
tx_pool->consumer_index =
(tx_pool->consumer_index + 1) %
@@ -1225,7 +1272,10 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_crq.v1.n_sge = 1;
tx_crq.v1.flags1 = IBMVNIC_TX_COMP_NEEDED;
tx_crq.v1.correlator = cpu_to_be32(index);
- tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
+ if (skb_is_gso(skb))
+ tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->tso_ltb.map_id);
+ else
+ tx_crq.v1.dma_reg = cpu_to_be16(tx_pool->long_term_buff.map_id);
tx_crq.v1.sge_len = cpu_to_be32(skb->len);
tx_crq.v1.ioba = cpu_to_be64(data_dma_addr);
@@ -1250,6 +1300,11 @@ static int ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
tx_crq.v1.flags1 |= IBMVNIC_TX_CHKSUM_OFFLOAD;
hdrs += 2;
}
+ if (skb_is_gso(skb)) {
+ tx_crq.v1.flags1 |= IBMVNIC_TX_LSO;
+ tx_crq.v1.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
+ hdrs += 2;
+ }
/* determine if l2/3/4 headers are sent to firmware */
if ((*hdrs >> 7) & 1 &&
(skb->protocol == htons(ETH_P_IP) ||
@@ -2940,14 +2995,14 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
adapter->ip_offload_ctrl.udp_ipv4_chksum = buf->udp_ipv4_chksum;
adapter->ip_offload_ctrl.tcp_ipv6_chksum = buf->tcp_ipv6_chksum;
adapter->ip_offload_ctrl.udp_ipv6_chksum = buf->udp_ipv6_chksum;
+ adapter->ip_offload_ctrl.large_tx_ipv4 = buf->large_tx_ipv4;
+ adapter->ip_offload_ctrl.large_tx_ipv6 = buf->large_tx_ipv6;
- /* large_tx/rx disabled for now, additional features needed */
- adapter->ip_offload_ctrl.large_tx_ipv4 = 0;
- adapter->ip_offload_ctrl.large_tx_ipv6 = 0;
+ /* large_rx disabled for now, additional features needed */
adapter->ip_offload_ctrl.large_rx_ipv4 = 0;
adapter->ip_offload_ctrl.large_rx_ipv6 = 0;
- adapter->netdev->features = NETIF_F_GSO;
+ adapter->netdev->features = NETIF_F_SG | NETIF_F_GSO;
if (buf->tcp_ipv4_chksum || buf->udp_ipv4_chksum)
adapter->netdev->features |= NETIF_F_IP_CSUM;
@@ -2959,6 +3014,13 @@ static void handle_query_ip_offload_rsp(struct ibmvnic_adapter *adapter)
(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)))
adapter->netdev->features |= NETIF_F_RXCSUM;
+ if (buf->large_tx_ipv4)
+ adapter->netdev->features |= NETIF_F_TSO;
+ if (buf->large_tx_ipv6)
+ adapter->netdev->features |= NETIF_F_TSO6;
+
+ adapter->netdev->hw_features |= adapter->netdev->features;
+
memset(&crq, 0, sizeof(crq));
crq.control_ip_offload.first = IBMVNIC_CRQ_CMD;
crq.control_ip_offload.cmd = CONTROL_IP_OFFLOAD;
@@ -3899,6 +3961,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id)
if (rc)
goto ibmvnic_init_fail;
+ netif_carrier_off(netdev);
rc = register_netdev(netdev);
if (rc) {
dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index d02257ccc377..7aa347a21e78 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -39,6 +39,9 @@
#define IBMVNIC_BUFFS_PER_POOL 100
#define IBMVNIC_MAX_TX_QUEUES 5
+#define IBMVNIC_TSO_BUF_SZ 65536
+#define IBMVNIC_TSO_BUFS 64
+
struct ibmvnic_login_buffer {
__be32 len;
__be32 version;
@@ -896,6 +899,8 @@ struct ibmvnic_tx_pool {
wait_queue_head_t ibmvnic_tx_comp_q;
struct task_struct *work_thread;
struct ibmvnic_long_term_buff long_term_buff;
+ struct ibmvnic_long_term_buff tso_ltb;
+ int tso_index;
};
struct ibmvnic_rx_buff {
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 4d10270ddf8f..44b3937f7e81 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -1710,9 +1710,9 @@ static void e100_adjust_adaptive_ifs(struct nic *nic, int speed, int duplex)
}
}
-static void e100_watchdog(unsigned long data)
+static void e100_watchdog(struct timer_list *t)
{
- struct nic *nic = (struct nic *)data;
+ struct nic *nic = from_timer(nic, t, watchdog);
struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET };
u32 speed;
@@ -1910,11 +1910,10 @@ static int e100_alloc_cbs(struct nic *nic)
nic->cb_to_use = nic->cb_to_send = nic->cb_to_clean = NULL;
nic->cbs_avail = 0;
- nic->cbs = pci_pool_alloc(nic->cbs_pool, GFP_KERNEL,
- &nic->cbs_dma_addr);
+ nic->cbs = pci_pool_zalloc(nic->cbs_pool, GFP_KERNEL,
+ &nic->cbs_dma_addr);
if (!nic->cbs)
return -ENOMEM;
- memset(nic->cbs, 0, count * sizeof(struct cb));
for (cb = nic->cbs, i = 0; i < count; cb++, i++) {
cb->next = (i + 1 < count) ? cb + 1 : nic->cbs;
@@ -2921,7 +2920,7 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
pci_set_master(pdev);
- setup_timer(&nic->watchdog, e100_watchdog, (unsigned long)nic);
+ timer_setup(&nic->watchdog, e100_watchdog, 0);
INIT_WORK(&nic->tx_timeout_task, e100_tx_timeout_task);
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index 0641c0098738..afb7ebe20b24 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -398,6 +398,7 @@
#define E1000_ICR_LSC 0x00000004 /* Link Status Change */
#define E1000_ICR_RXSEQ 0x00000008 /* Rx sequence error */
#define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */
+#define E1000_ICR_RXO 0x00000040 /* Receiver Overrun */
#define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */
#define E1000_ICR_ECCER 0x00400000 /* Uncorrectable ECC Error */
/* If this bit asserted, the driver should claim the interrupt */
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index 98e68888abb1..2311b31bdcac 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -94,10 +94,6 @@ struct e1000_info;
*/
#define E1000_CHECK_RESET_COUNT 25
-#define DEFAULT_RDTR 0
-#define DEFAULT_RADV 8
-#define BURST_RDTR 0x20
-#define BURST_RADV 0x20
#define PCICFG_DESC_RING_STATUS 0xe4
#define FLUSH_DESC_REQUIRED 0x100
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index b322011ec282..f457c5703d0c 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -410,6 +410,9 @@ void e1000e_clear_hw_cntrs_base(struct e1000_hw *hw)
* Checks to see of the link status of the hardware has changed. If a
* change in link status has been detected, then we read the PHY registers
* to get the current speed/duplex if link exists.
+ *
+ * Returns a negative error code (-E1000_ERR_*) or 0 (link down) or 1 (link
+ * up).
**/
s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
{
@@ -423,7 +426,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
* Change or Rx Sequence Error interrupt.
*/
if (!mac->get_link_status)
- return 0;
+ return 1;
/* First we want to see if the MII Status Register reports
* link. If so, then we want to get the current speed/duplex
@@ -461,10 +464,12 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
* different link partner.
*/
ret_val = e1000e_config_fc_after_link_up(hw);
- if (ret_val)
+ if (ret_val) {
e_dbg("Error configuring flow control\n");
+ return ret_val;
+ }
- return ret_val;
+ return 1;
}
/**
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 327dfe5bedc0..f2f49239b015 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1071,7 +1071,8 @@ next_desc:
}
static void e1000_put_txbuf(struct e1000_ring *tx_ring,
- struct e1000_buffer *buffer_info)
+ struct e1000_buffer *buffer_info,
+ bool drop)
{
struct e1000_adapter *adapter = tx_ring->adapter;
@@ -1085,7 +1086,10 @@ static void e1000_put_txbuf(struct e1000_ring *tx_ring,
buffer_info->dma = 0;
}
if (buffer_info->skb) {
- dev_kfree_skb_any(buffer_info->skb);
+ if (drop)
+ dev_kfree_skb_any(buffer_info->skb);
+ else
+ dev_consume_skb_any(buffer_info->skb);
buffer_info->skb = NULL;
}
buffer_info->time_stamp = 0;
@@ -1199,7 +1203,7 @@ static void e1000e_tx_hwtstamp_work(struct work_struct *work)
wmb(); /* force write prior to skb_tstamp_tx */
skb_tstamp_tx(skb, &shhwtstamps);
- dev_kfree_skb_any(skb);
+ dev_consume_skb_any(skb);
} else if (time_after(jiffies, adapter->tx_hwtstamp_start
+ adapter->tx_timeout_factor * HZ)) {
dev_kfree_skb_any(adapter->tx_hwtstamp_skb);
@@ -1254,7 +1258,7 @@ static bool e1000_clean_tx_irq(struct e1000_ring *tx_ring)
}
}
- e1000_put_txbuf(tx_ring, buffer_info);
+ e1000_put_txbuf(tx_ring, buffer_info, false);
tx_desc->upper.data = 0;
i++;
@@ -1910,14 +1914,30 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
+ u32 icr;
+ bool enable = true;
+
+ icr = er32(ICR);
+ if (icr & E1000_ICR_RXO) {
+ ew32(ICR, E1000_ICR_RXO);
+ enable = false;
+ /* napi poll will re-enable Other, make sure it runs */
+ if (napi_schedule_prep(&adapter->napi)) {
+ adapter->total_rx_bytes = 0;
+ adapter->total_rx_packets = 0;
+ __napi_schedule(&adapter->napi);
+ }
+ }
+ if (icr & E1000_ICR_LSC) {
+ ew32(ICR, E1000_ICR_LSC);
+ hw->mac.get_link_status = true;
+ /* guard against interrupt when we're going down */
+ if (!test_bit(__E1000_DOWN, &adapter->state))
+ mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ }
- hw->mac.get_link_status = true;
-
- /* guard against interrupt when we're going down */
- if (!test_bit(__E1000_DOWN, &adapter->state)) {
- mod_timer(&adapter->watchdog_timer, jiffies + 1);
+ if (enable && !test_bit(__E1000_DOWN, &adapter->state))
ew32(IMS, E1000_IMS_OTHER);
- }
return IRQ_HANDLED;
}
@@ -2421,7 +2441,7 @@ static void e1000_clean_tx_ring(struct e1000_ring *tx_ring)
for (i = 0; i < tx_ring->count; i++) {
buffer_info = &tx_ring->buffer_info[i];
- e1000_put_txbuf(tx_ring, buffer_info);
+ e1000_put_txbuf(tx_ring, buffer_info, false);
}
netdev_reset_queue(adapter->netdev);
@@ -2687,7 +2707,8 @@ static int e1000e_poll(struct napi_struct *napi, int weight)
napi_complete_done(napi, work_done);
if (!test_bit(__E1000_DOWN, &adapter->state)) {
if (adapter->msix_entries)
- ew32(IMS, adapter->rx_ring->ims_val);
+ ew32(IMS, adapter->rx_ring->ims_val |
+ E1000_IMS_OTHER);
else
e1000_irq_enable(adapter);
}
@@ -3004,8 +3025,8 @@ static void e1000_configure_tx(struct e1000_adapter *adapter)
hw->mac.ops.config_collision_dist(hw);
- /* SPT and CNP Si errata workaround to avoid data corruption */
- if (hw->mac.type >= e1000_pch_spt) {
+ /* SPT and KBL Si errata workaround to avoid data corruption */
+ if (hw->mac.type == e1000_pch_spt) {
u32 reg_val;
reg_val = er32(IOSFPC);
@@ -3013,7 +3034,9 @@ static void e1000_configure_tx(struct e1000_adapter *adapter)
ew32(IOSFPC, reg_val);
reg_val = er32(TARC(0));
- reg_val |= E1000_TARC0_CB_MULTIQ_3_REQ;
+ /* SPT and KBL Si errata workaround to avoid Tx hang */
+ reg_val &= ~BIT(28);
+ reg_val |= BIT(29);
ew32(TARC(0), reg_val);
}
}
@@ -3223,14 +3246,6 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
*/
ew32(RXDCTL(0), E1000_RXDCTL_DMA_BURST_ENABLE);
ew32(RXDCTL(1), E1000_RXDCTL_DMA_BURST_ENABLE);
-
- /* override the delay timers for enabling bursting, only if
- * the value was not set by the user via module options
- */
- if (adapter->rx_int_delay == DEFAULT_RDTR)
- adapter->rx_int_delay = BURST_RDTR;
- if (adapter->rx_abs_int_delay == DEFAULT_RADV)
- adapter->rx_abs_int_delay = BURST_RADV;
}
/* set the Receive Delay Timer Register */
@@ -4204,7 +4219,7 @@ static void e1000e_trigger_lsc(struct e1000_adapter *adapter)
struct e1000_hw *hw = &adapter->hw;
if (adapter->msix_entries)
- ew32(ICS, E1000_ICS_OTHER);
+ ew32(ICS, E1000_ICS_LSC | E1000_ICS_OTHER);
else
ew32(ICS, E1000_ICS_LSC);
}
@@ -4808,9 +4823,9 @@ static void e1000e_update_phy_task(struct work_struct *work)
* Need to wait a few seconds after link up to get diagnostic information from
* the phy
**/
-static void e1000_update_phy_info(unsigned long data)
+static void e1000_update_phy_info(struct timer_list *t)
{
- struct e1000_adapter *adapter = (struct e1000_adapter *)data;
+ struct e1000_adapter *adapter = from_timer(adapter, t, phy_info_timer);
if (test_bit(__E1000_DOWN, &adapter->state))
return;
@@ -5074,14 +5089,14 @@ static bool e1000e_has_link(struct e1000_adapter *adapter)
/* get_link_status is set on LSC (link status) interrupt or
* Rx sequence error interrupt. get_link_status will stay
- * false until the check_for_link establishes link
+ * true until the check_for_link establishes link
* for copper adapters ONLY
*/
switch (hw->phy.media_type) {
case e1000_media_type_copper:
if (hw->mac.get_link_status) {
ret_val = hw->mac.ops.check_for_link(hw);
- link_active = !hw->mac.get_link_status;
+ link_active = ret_val > 0;
} else {
link_active = true;
}
@@ -5092,14 +5107,14 @@ static bool e1000e_has_link(struct e1000_adapter *adapter)
break;
case e1000_media_type_internal_serdes:
ret_val = hw->mac.ops.check_for_link(hw);
- link_active = adapter->hw.mac.serdes_has_link;
+ link_active = hw->mac.serdes_has_link;
break;
default:
case e1000_media_type_unknown:
break;
}
- if ((ret_val == E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) &&
+ if ((ret_val == -E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) &&
(er32(CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) {
/* See e1000_kmrn_lock_loss_workaround_ich8lan() */
e_info("Gigabit has been disabled, downgrading speed\n");
@@ -5144,9 +5159,9 @@ static void e1000e_check_82574_phy_workaround(struct e1000_adapter *adapter)
* e1000_watchdog - Timer Call-back
* @data: pointer to adapter cast into an unsigned long
**/
-static void e1000_watchdog(unsigned long data)
+static void e1000_watchdog(struct timer_list *t)
{
- struct e1000_adapter *adapter = (struct e1000_adapter *)data;
+ struct e1000_adapter *adapter = from_timer(adapter, t, watchdog_timer);
/* Do the rest outside of interrupt context */
schedule_work(&adapter->watchdog_task);
@@ -5614,7 +5629,7 @@ dma_error:
i += tx_ring->count;
i--;
buffer_info = &tx_ring->buffer_info[i];
- e1000_put_txbuf(tx_ring, buffer_info);
+ e1000_put_txbuf(tx_ring, buffer_info, true);
}
return 0;
@@ -7252,13 +7267,8 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_eeprom;
}
- init_timer(&adapter->watchdog_timer);
- adapter->watchdog_timer.function = e1000_watchdog;
- adapter->watchdog_timer.data = (unsigned long)adapter;
-
- init_timer(&adapter->phy_info_timer);
- adapter->phy_info_timer.function = e1000_update_phy_info;
- adapter->phy_info_timer.data = (unsigned long)adapter;
+ timer_setup(&adapter->watchdog_timer, e1000_watchdog, 0);
+ timer_setup(&adapter->phy_info_timer, e1000_update_phy_info, 0);
INIT_WORK(&adapter->reset_task, e1000_reset_task);
INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task);
@@ -7411,7 +7421,7 @@ static void e1000_remove(struct pci_dev *pdev)
if (adapter->flags & FLAG_HAS_HW_TIMESTAMP) {
cancel_work_sync(&adapter->tx_hwtstamp_work);
if (adapter->tx_hwtstamp_skb) {
- dev_kfree_skb_any(adapter->tx_hwtstamp_skb);
+ dev_consume_skb_any(adapter->tx_hwtstamp_skb);
adapter->tx_hwtstamp_skb = NULL;
}
}
diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c
index 6d8c39abee16..47da51864543 100644
--- a/drivers/net/ethernet/intel/e1000e/param.c
+++ b/drivers/net/ethernet/intel/e1000e/param.c
@@ -73,17 +73,25 @@ E1000_PARAM(TxAbsIntDelay, "Transmit Absolute Interrupt Delay");
/* Receive Interrupt Delay in units of 1.024 microseconds
* hardware will likely hang if you set this to anything but zero.
*
+ * Burst variant is used as default if device has FLAG2_DMA_BURST.
+ *
* Valid Range: 0-65535
*/
E1000_PARAM(RxIntDelay, "Receive Interrupt Delay");
+#define DEFAULT_RDTR 0
+#define BURST_RDTR 0x20
#define MAX_RXDELAY 0xFFFF
#define MIN_RXDELAY 0
/* Receive Absolute Interrupt Delay in units of 1.024 microseconds
*
+ * Burst variant is used as default if device has FLAG2_DMA_BURST.
+ *
* Valid Range: 0-65535
*/
E1000_PARAM(RxAbsIntDelay, "Receive Absolute Interrupt Delay");
+#define DEFAULT_RADV 8
+#define BURST_RADV 0x20
#define MAX_RXABSDELAY 0xFFFF
#define MIN_RXABSDELAY 0
@@ -297,6 +305,9 @@ void e1000e_check_options(struct e1000_adapter *adapter)
.max = MAX_RXDELAY } }
};
+ if (adapter->flags2 & FLAG2_DMA_BURST)
+ opt.def = BURST_RDTR;
+
if (num_RxIntDelay > bd) {
adapter->rx_int_delay = RxIntDelay[bd];
e1000_validate_option(&adapter->rx_int_delay, &opt,
@@ -307,7 +318,7 @@ void e1000e_check_options(struct e1000_adapter *adapter)
}
/* Receive Absolute Interrupt Delay */
{
- static const struct e1000_option opt = {
+ static struct e1000_option opt = {
.type = range_option,
.name = "Receive Absolute Interrupt Delay",
.err = "using default of "
@@ -317,6 +328,9 @@ void e1000e_check_options(struct e1000_adapter *adapter)
.max = MAX_RXABSDELAY } }
};
+ if (adapter->flags2 & FLAG2_DMA_BURST)
+ opt.def = BURST_RADV;
+
if (num_RxAbsIntDelay > bd) {
adapter->rx_abs_int_delay = RxAbsIntDelay[bd];
e1000_validate_option(&adapter->rx_abs_int_delay, &opt,
diff --git a/drivers/net/ethernet/intel/e1000e/phy.c b/drivers/net/ethernet/intel/e1000e/phy.c
index d78d47b41a71..86ff0969efb6 100644
--- a/drivers/net/ethernet/intel/e1000e/phy.c
+++ b/drivers/net/ethernet/intel/e1000e/phy.c
@@ -1744,6 +1744,7 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
s32 ret_val = 0;
u16 i, phy_status;
+ *success = false;
for (i = 0; i < iterations; i++) {
/* Some PHYs require the MII_BMSR register to be read
* twice due to the link bit being sticky. No harm doing
@@ -1763,16 +1764,16 @@ s32 e1000e_phy_has_link_generic(struct e1000_hw *hw, u32 iterations,
ret_val = e1e_rphy(hw, MII_BMSR, &phy_status);
if (ret_val)
break;
- if (phy_status & BMSR_LSTATUS)
+ if (phy_status & BMSR_LSTATUS) {
+ *success = true;
break;
+ }
if (usec_interval >= 1000)
msleep(usec_interval / 1000);
else
udelay(usec_interval);
}
- *success = (i < iterations);
-
return ret_val;
}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h
index 689c413b7782..46973fb234c5 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k.h
@@ -248,6 +248,29 @@ struct fm10k_udp_port {
__be16 port;
};
+enum fm10k_macvlan_request_type {
+ FM10K_UC_MAC_REQUEST,
+ FM10K_MC_MAC_REQUEST,
+ FM10K_VLAN_REQUEST
+};
+
+struct fm10k_macvlan_request {
+ enum fm10k_macvlan_request_type type;
+ struct list_head list;
+ union {
+ struct fm10k_mac_request {
+ u8 addr[ETH_ALEN];
+ u16 glort;
+ u16 vid;
+ } mac;
+ struct fm10k_vlan_request {
+ u32 vid;
+ u8 vsi;
+ } vlan;
+ };
+ bool set;
+};
+
/* one work queue for entire driver */
extern struct workqueue_struct *fm10k_workqueue;
@@ -270,11 +293,15 @@ enum fm10k_flags_t {
enum fm10k_state_t {
__FM10K_RESETTING,
+ __FM10K_RESET_DETACHED,
+ __FM10K_RESET_SUSPENDED,
__FM10K_DOWN,
__FM10K_SERVICE_SCHED,
__FM10K_SERVICE_REQUEST,
__FM10K_SERVICE_DISABLE,
- __FM10K_MBX_LOCK,
+ __FM10K_MACVLAN_SCHED,
+ __FM10K_MACVLAN_REQUEST,
+ __FM10K_MACVLAN_DISABLE,
__FM10K_LINK_DOWN,
__FM10K_UPDATING_STATS,
/* This value must be last and determines the BITMAP size */
@@ -344,6 +371,8 @@ struct fm10k_intfc {
struct fm10k_hw_stats stats;
struct fm10k_hw hw;
+ /* Mailbox lock */
+ spinlock_t mbx_lock;
u32 __iomem *uc_addr;
u32 __iomem *sw_addr;
u16 msg_enable;
@@ -365,6 +394,12 @@ struct fm10k_intfc {
struct list_head vxlan_port;
struct list_head geneve_port;
+ /* MAC/VLAN update queue */
+ struct list_head macvlan_requests;
+ struct delayed_work macvlan_task;
+ /* MAC/VLAN update queue lock */
+ spinlock_t macvlan_lock;
+
#ifdef CONFIG_DEBUG_FS
struct dentry *dbg_intfc;
#endif /* CONFIG_DEBUG_FS */
@@ -384,23 +419,17 @@ struct fm10k_intfc {
static inline void fm10k_mbx_lock(struct fm10k_intfc *interface)
{
- /* busy loop if we cannot obtain the lock as some calls
- * such as ndo_set_rx_mode may be made in atomic context
- */
- while (test_and_set_bit(__FM10K_MBX_LOCK, interface->state))
- udelay(20);
+ spin_lock(&interface->mbx_lock);
}
static inline void fm10k_mbx_unlock(struct fm10k_intfc *interface)
{
- /* flush memory to make sure state is correct */
- smp_mb__before_atomic();
- clear_bit(__FM10K_MBX_LOCK, interface->state);
+ spin_unlock(&interface->mbx_lock);
}
static inline int fm10k_mbx_trylock(struct fm10k_intfc *interface)
{
- return !test_and_set_bit(__FM10K_MBX_LOCK, interface->state);
+ return spin_trylock(&interface->mbx_lock);
}
/* fm10k_test_staterr - test bits in Rx descriptor status and error fields */
@@ -490,6 +519,7 @@ void fm10k_up(struct fm10k_intfc *interface);
void fm10k_down(struct fm10k_intfc *interface);
void fm10k_update_stats(struct fm10k_intfc *interface);
void fm10k_service_event_schedule(struct fm10k_intfc *interface);
+void fm10k_macvlan_schedule(struct fm10k_intfc *interface);
void fm10k_update_rx_drop_en(struct fm10k_intfc *interface);
#ifdef CONFIG_NET_POLL_CONTROLLER
void fm10k_netpoll(struct net_device *netdev);
@@ -510,6 +540,12 @@ void fm10k_reset_rx_state(struct fm10k_intfc *);
int fm10k_setup_tc(struct net_device *dev, u8 tc);
int fm10k_open(struct net_device *netdev);
int fm10k_close(struct net_device *netdev);
+int fm10k_queue_vlan_request(struct fm10k_intfc *interface, u32 vid,
+ u8 vsi, bool set);
+int fm10k_queue_mac_request(struct fm10k_intfc *interface, u16 glort,
+ const unsigned char *addr, u16 vid, bool set);
+void fm10k_clear_macvlan_queue(struct fm10k_intfc *interface,
+ u16 glort, bool vlans);
/* Ethtool */
void fm10k_set_ethtool_ops(struct net_device *dev);
@@ -526,8 +562,8 @@ s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid);
int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac);
int fm10k_ndo_set_vf_vlan(struct net_device *netdev,
int vf_idx, u16 vid, u8 qos, __be16 vlan_proto);
-int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate,
- int unused);
+int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
+ int __always_unused min_rate, int max_rate);
int fm10k_ndo_get_vf_config(struct net_device *netdev,
int vf_idx, struct ifla_vf_info *ivi);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
index 62a6ad9b3eed..736a9f087bc9 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2017 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -517,8 +517,8 @@ s32 fm10k_get_host_state_generic(struct fm10k_hw *hw, bool *host_ready)
goto out;
}
- /* verify Mailbox is still valid */
- if (!mbx->ops.tx_ready(mbx, FM10K_VFMBX_MSG_MTU))
+ /* verify Mailbox is still open */
+ if (mbx->state != FM10K_STATE_OPEN)
goto out;
/* interface cannot receive traffic without logical ports */
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
index 5116fd043630..14df09e2d964 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_debugfs.c
@@ -52,9 +52,9 @@ static void fm10k_dbg_desc_seq_stop(struct seq_file __always_unused *s,
static void fm10k_dbg_desc_break(struct seq_file *s, int i)
{
while (i--)
- seq_puts(s, "-");
+ seq_putc(s, '-');
- seq_puts(s, "\n");
+ seq_putc(s, '\n');
}
static int fm10k_dbg_tx_desc_seq_show(struct seq_file *s, void *v)
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
index 5f4dac0d36ef..ea3ab24265ee 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2017 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -35,10 +35,133 @@ static s32 fm10k_iov_msg_error(struct fm10k_hw *hw, u32 **results,
return fm10k_tlv_msg_error(hw, results, mbx);
}
+/**
+ * fm10k_iov_msg_queue_mac_vlan - Message handler for MAC/VLAN request from VF
+ * @hw: Pointer to hardware structure
+ * @results: Pointer array to message, results[0] is pointer to message
+ * @mbx: Pointer to mailbox information structure
+ *
+ * This function is a custom handler for MAC/VLAN requests from the VF. The
+ * assumption is that it is acceptable to directly hand off the message from
+ * the VF to the PF's switch manager. However, we use a MAC/VLAN message
+ * queue to avoid overloading the mailbox when a large number of requests
+ * come in.
+ **/
+static s32 fm10k_iov_msg_queue_mac_vlan(struct fm10k_hw *hw, u32 **results,
+ struct fm10k_mbx_info *mbx)
+{
+ struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx;
+ struct fm10k_intfc *interface = hw->back;
+ u8 mac[ETH_ALEN];
+ u32 *result;
+ int err = 0;
+ bool set;
+ u16 vlan;
+ u32 vid;
+
+ /* we shouldn't be updating rules on a disabled interface */
+ if (!FM10K_VF_FLAG_ENABLED(vf_info))
+ err = FM10K_ERR_PARAM;
+
+ if (!err && !!results[FM10K_MAC_VLAN_MSG_VLAN]) {
+ result = results[FM10K_MAC_VLAN_MSG_VLAN];
+
+ /* record VLAN id requested */
+ err = fm10k_tlv_attr_get_u32(result, &vid);
+ if (err)
+ return err;
+
+ set = !(vid & FM10K_VLAN_CLEAR);
+ vid &= ~FM10K_VLAN_CLEAR;
+
+ /* if the length field has been set, this is a multi-bit
+ * update request. For multi-bit requests, simply disallow
+ * them when the pf_vid has been set. In this case, the PF
+ * should have already cleared the VLAN_TABLE, and if we
+ * allowed them, it could allow a rogue VF to receive traffic
+ * on a VLAN it was not assigned. In the single-bit case, we
+ * need to modify requests for VLAN 0 to use the default PF or
+ * SW vid when assigned.
+ */
+
+ if (vid >> 16) {
+ /* prevent multi-bit requests when PF has
+ * administratively set the VLAN for this VF
+ */
+ if (vf_info->pf_vid)
+ return FM10K_ERR_PARAM;
+ } else {
+ err = fm10k_iov_select_vid(vf_info, (u16)vid);
+ if (err < 0)
+ return err;
+
+ vid = err;
+ }
+
+ /* update VSI info for VF in regards to VLAN table */
+ err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set);
+ }
+
+ if (!err && !!results[FM10K_MAC_VLAN_MSG_MAC]) {
+ result = results[FM10K_MAC_VLAN_MSG_MAC];
+
+ /* record unicast MAC address requested */
+ err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
+ if (err)
+ return err;
+
+ /* block attempts to set MAC for a locked device */
+ if (is_valid_ether_addr(vf_info->mac) &&
+ !ether_addr_equal(mac, vf_info->mac))
+ return FM10K_ERR_PARAM;
+
+ set = !(vlan & FM10K_VLAN_CLEAR);
+ vlan &= ~FM10K_VLAN_CLEAR;
+
+ err = fm10k_iov_select_vid(vf_info, vlan);
+ if (err < 0)
+ return err;
+
+ vlan = (u16)err;
+
+ /* Add this request to the MAC/VLAN queue */
+ err = fm10k_queue_mac_request(interface, vf_info->glort,
+ mac, vlan, set);
+ }
+
+ if (!err && !!results[FM10K_MAC_VLAN_MSG_MULTICAST]) {
+ result = results[FM10K_MAC_VLAN_MSG_MULTICAST];
+
+ /* record multicast MAC address requested */
+ err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
+ if (err)
+ return err;
+
+ /* verify that the VF is allowed to request multicast */
+ if (!(vf_info->vf_flags & FM10K_VF_FLAG_MULTI_ENABLED))
+ return FM10K_ERR_PARAM;
+
+ set = !(vlan & FM10K_VLAN_CLEAR);
+ vlan &= ~FM10K_VLAN_CLEAR;
+
+ err = fm10k_iov_select_vid(vf_info, vlan);
+ if (err < 0)
+ return err;
+
+ vlan = (u16)err;
+
+ /* Add this request to the MAC/VLAN queue */
+ err = fm10k_queue_mac_request(interface, vf_info->glort,
+ mac, vlan, set);
+ }
+
+ return err;
+}
+
static const struct fm10k_msg_data iov_mbx_data[] = {
FM10K_TLV_MSG_TEST_HANDLER(fm10k_tlv_msg_test),
FM10K_VF_MSG_MSIX_HANDLER(fm10k_iov_msg_msix_pf),
- FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_iov_msg_mac_vlan_pf),
+ FM10K_VF_MSG_MAC_VLAN_HANDLER(fm10k_iov_msg_queue_mac_vlan),
FM10K_VF_MSG_LPORT_STATE_HANDLER(fm10k_iov_msg_lport_state_pf),
FM10K_TLV_MSG_ERROR_HANDLER(fm10k_iov_msg_error),
};
@@ -66,25 +189,21 @@ s32 fm10k_iov_event(struct fm10k_intfc *interface)
goto read_unlock;
/* read VFLRE to determine if any VFs have been reset */
- do {
- vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(0));
- vflre <<= 32;
- vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(1));
- vflre = (vflre << 32) | (vflre >> 32);
- vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(0));
+ vflre = fm10k_read_reg(hw, FM10K_PFVFLRE(1));
+ vflre <<= 32;
+ vflre |= fm10k_read_reg(hw, FM10K_PFVFLRE(0));
- i = iov_data->num_vfs;
+ i = iov_data->num_vfs;
- for (vflre <<= 64 - i; vflre && i--; vflre += vflre) {
- struct fm10k_vf_info *vf_info = &iov_data->vf_info[i];
+ for (vflre <<= 64 - i; vflre && i--; vflre += vflre) {
+ struct fm10k_vf_info *vf_info = &iov_data->vf_info[i];
- if (vflre >= 0)
- continue;
+ if (vflre >= 0)
+ continue;
- hw->iov.ops.reset_resources(hw, vf_info);
- vf_info->mbx.ops.connect(hw, &vf_info->mbx);
- }
- } while (i != iov_data->num_vfs);
+ hw->iov.ops.reset_resources(hw, vf_info);
+ vf_info->mbx.ops.connect(hw, &vf_info->mbx);
+ }
read_unlock:
rcu_read_unlock();
@@ -126,9 +245,14 @@ process_mbx:
struct fm10k_mbx_info *mbx = &vf_info->mbx;
u16 glort = vf_info->glort;
+ /* process the SM mailbox first to drain outgoing messages */
+ hw->mbx.ops.process(hw, &hw->mbx);
+
/* verify port mapping is valid, if not reset port */
- if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort))
+ if (vf_info->vf_flags && !fm10k_glort_valid_pf(hw, glort)) {
hw->iov.ops.reset_lport(hw, vf_info);
+ fm10k_clear_macvlan_queue(interface, glort, false);
+ }
/* reset VFs that have mailbox timed out */
if (!mbx->timeout) {
@@ -140,6 +264,10 @@ process_mbx:
if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU)) {
/* keep track of how many times this occurs */
interface->hw_sm_mbx_full++;
+
+ /* make sure we try again momentarily */
+ fm10k_service_event_schedule(interface);
+
break;
}
@@ -187,6 +315,7 @@ void fm10k_iov_suspend(struct pci_dev *pdev)
hw->iov.ops.reset_resources(hw, vf_info);
hw->iov.ops.reset_lport(hw, vf_info);
+ fm10k_clear_macvlan_queue(interface, vf_info->glort, false);
}
}
@@ -411,6 +540,8 @@ static inline void fm10k_reset_vf_info(struct fm10k_intfc *interface,
/* disable LPORT for this VF which clears switch rules */
hw->iov.ops.reset_lport(hw, vf_info);
+ fm10k_clear_macvlan_queue(interface, vf_info->glort, false);
+
/* assign new MAC+VLAN for this VF */
hw->iov.ops.assign_default_mac_vlan(hw, vf_info);
@@ -482,7 +613,7 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid,
}
int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
- int __always_unused unused, int rate)
+ int __always_unused min_rate, int max_rate)
{
struct fm10k_intfc *interface = netdev_priv(netdev);
struct fm10k_iov_data *iov_data = interface->iov_data;
@@ -493,14 +624,15 @@ int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx,
return -EINVAL;
/* rate limit cannot be less than 10Mbs or greater than link speed */
- if (rate && ((rate < FM10K_VF_TC_MIN) || rate > FM10K_VF_TC_MAX))
+ if (max_rate &&
+ (max_rate < FM10K_VF_TC_MIN || max_rate > FM10K_VF_TC_MAX))
return -EINVAL;
/* store values */
- iov_data->vf_info[vf_idx].rate = rate;
+ iov_data->vf_info[vf_idx].rate = max_rate;
/* update hardware configuration */
- hw->iov.ops.configure_tc(hw, vf_idx, rate);
+ hw->iov.ops.configure_tc(hw, vf_idx, max_rate);
return 0;
}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 9dffaba85ae6..dbd69310f263 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -28,7 +28,7 @@
#include "fm10k.h"
-#define DRV_VERSION "0.21.7-k"
+#define DRV_VERSION "0.22.1-k"
#define DRV_SUMMARY "Intel(R) Ethernet Switch Host Interface Driver"
const char fm10k_driver_version[] = DRV_VERSION;
char fm10k_driver_name[] = "fm10k";
@@ -806,9 +806,10 @@ static int fm10k_tso(struct fm10k_ring *tx_ring,
tx_desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size);
return 1;
+
err_vxlan:
tx_ring->netdev->features &= ~NETIF_F_GSO_UDP_TUNNEL;
- if (!net_ratelimit())
+ if (net_ratelimit())
netdev_err(tx_ring->netdev,
"TSO requested for unsupported tunnel, disabling offload\n");
return -1;
@@ -876,6 +877,7 @@ static void fm10k_tx_csum(struct fm10k_ring *tx_ring,
case IPPROTO_GRE:
if (skb->encapsulation)
break;
+ /* fall through */
default:
if (unlikely(net_ratelimit())) {
dev_warn(tx_ring->dev,
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
index 334088a101c3..244d3ad58ca7 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_mbx.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2017 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -1586,7 +1586,7 @@ s32 fm10k_pfvf_mbx_init(struct fm10k_hw *hw, struct fm10k_mbx_info *mbx,
mbx->mbmem_reg = FM10K_MBMEM_VF(id, 0);
break;
}
- /* fallthough */
+ /* fall through */
default:
return FM10K_MBX_ERR_NO_MBX;
}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index e69d49d91d67..81e4425f0529 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -643,9 +643,13 @@ int fm10k_close(struct net_device *netdev)
static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
{
struct fm10k_intfc *interface = netdev_priv(dev);
+ int num_tx_queues = READ_ONCE(interface->num_tx_queues);
unsigned int r_idx = skb->queue_mapping;
int err;
+ if (!num_tx_queues)
+ return NETDEV_TX_BUSY;
+
if ((skb->protocol == htons(ETH_P_8021Q)) &&
!skb_vlan_tag_present(skb)) {
/* FM10K only supports hardware tagging, any tags in frame
@@ -698,8 +702,8 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
__skb_put(skb, pad_len);
}
- if (r_idx >= interface->num_tx_queues)
- r_idx %= interface->num_tx_queues;
+ if (r_idx >= num_tx_queues)
+ r_idx %= num_tx_queues;
err = fm10k_xmit_frame_ring(skb, interface->tx_ring[r_idx]);
@@ -754,11 +758,132 @@ static bool fm10k_host_mbx_ready(struct fm10k_intfc *interface)
return (hw->mac.type == fm10k_mac_vf || interface->host_ready);
}
+/**
+ * fm10k_queue_vlan_request - Queue a VLAN update request
+ * @interface: the fm10k interface structure
+ * @vid: the VLAN vid
+ * @vsi: VSI index number
+ * @set: whether to set or clear
+ *
+ * This function queues up a VLAN update. For VFs, this must be sent to the
+ * managing PF over the mailbox. For PFs, we'll use the same handling so that
+ * it's similar to the VF. This avoids storming the PF<->VF mailbox with too
+ * many VLAN updates during reset.
+ */
+int fm10k_queue_vlan_request(struct fm10k_intfc *interface,
+ u32 vid, u8 vsi, bool set)
+{
+ struct fm10k_macvlan_request *request;
+ unsigned long flags;
+
+ /* This must be atomic since we may be called while the netdev
+ * addr_list_lock is held
+ */
+ request = kzalloc(sizeof(*request), GFP_ATOMIC);
+ if (!request)
+ return -ENOMEM;
+
+ request->type = FM10K_VLAN_REQUEST;
+ request->vlan.vid = vid;
+ request->vlan.vsi = vsi;
+ request->set = set;
+
+ spin_lock_irqsave(&interface->macvlan_lock, flags);
+ list_add_tail(&request->list, &interface->macvlan_requests);
+ spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+
+ fm10k_macvlan_schedule(interface);
+
+ return 0;
+}
+
+/**
+ * fm10k_queue_mac_request - Queue a MAC update request
+ * @interface: the fm10k interface structure
+ * @glort: the target glort for this update
+ * @addr: the address to update
+ * @vid: the vid to update
+ * @sync: whether to add or remove
+ *
+ * This function queues up a MAC request for sending to the switch manager.
+ * A separate thread monitors the queue and sends updates to the switch
+ * manager. Return 0 on success, and negative error code on failure.
+ **/
+int fm10k_queue_mac_request(struct fm10k_intfc *interface, u16 glort,
+ const unsigned char *addr, u16 vid, bool set)
+{
+ struct fm10k_macvlan_request *request;
+ unsigned long flags;
+
+ /* This must be atomic since we may be called while the netdev
+ * addr_list_lock is held
+ */
+ request = kzalloc(sizeof(*request), GFP_ATOMIC);
+ if (!request)
+ return -ENOMEM;
+
+ if (is_multicast_ether_addr(addr))
+ request->type = FM10K_MC_MAC_REQUEST;
+ else
+ request->type = FM10K_UC_MAC_REQUEST;
+
+ ether_addr_copy(request->mac.addr, addr);
+ request->mac.glort = glort;
+ request->mac.vid = vid;
+ request->set = set;
+
+ spin_lock_irqsave(&interface->macvlan_lock, flags);
+ list_add_tail(&request->list, &interface->macvlan_requests);
+ spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+
+ fm10k_macvlan_schedule(interface);
+
+ return 0;
+}
+
+/**
+ * fm10k_clear_macvlan_queue - Cancel pending updates for a given glort
+ * @interface: the fm10k interface structure
+ * @glort: the target glort to clear
+ * @vlans: true to clear VLAN messages, false to ignore them
+ *
+ * Cancel any outstanding MAC/VLAN requests for a given glort. This is
+ * expected to be called when a logical port goes down.
+ **/
+void fm10k_clear_macvlan_queue(struct fm10k_intfc *interface,
+ u16 glort, bool vlans)
+
+{
+ struct fm10k_macvlan_request *r, *tmp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&interface->macvlan_lock, flags);
+
+ /* Free any outstanding MAC/VLAN requests for this interface */
+ list_for_each_entry_safe(r, tmp, &interface->macvlan_requests, list) {
+ switch (r->type) {
+ case FM10K_MC_MAC_REQUEST:
+ case FM10K_UC_MAC_REQUEST:
+ /* Don't free requests for other interfaces */
+ if (r->mac.glort != glort)
+ break;
+ /* fall through */
+ case FM10K_VLAN_REQUEST:
+ if (vlans) {
+ list_del(&r->list);
+ kfree(r);
+ }
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+}
+
static int fm10k_uc_vlan_unsync(struct net_device *netdev,
const unsigned char *uc_addr)
{
struct fm10k_intfc *interface = netdev_priv(netdev);
- struct fm10k_hw *hw = &interface->hw;
u16 glort = interface->glort;
u16 vid = interface->vid;
bool set = !!(vid / VLAN_N_VID);
@@ -767,10 +892,7 @@ static int fm10k_uc_vlan_unsync(struct net_device *netdev,
/* drop any leading bits on the VLAN ID */
vid &= VLAN_N_VID - 1;
- if (fm10k_host_mbx_ready(interface))
- err = hw->mac.ops.update_uc_addr(hw, glort, uc_addr,
- vid, set, 0);
-
+ err = fm10k_queue_mac_request(interface, glort, uc_addr, vid, set);
if (err)
return err;
@@ -782,7 +904,6 @@ static int fm10k_mc_vlan_unsync(struct net_device *netdev,
const unsigned char *mc_addr)
{
struct fm10k_intfc *interface = netdev_priv(netdev);
- struct fm10k_hw *hw = &interface->hw;
u16 glort = interface->glort;
u16 vid = interface->vid;
bool set = !!(vid / VLAN_N_VID);
@@ -791,9 +912,7 @@ static int fm10k_mc_vlan_unsync(struct net_device *netdev,
/* drop any leading bits on the VLAN ID */
vid &= VLAN_N_VID - 1;
- if (fm10k_host_mbx_ready(interface))
- err = hw->mac.ops.update_mc_addr(hw, glort, mc_addr, vid, set);
-
+ err = fm10k_queue_mac_request(interface, glort, mc_addr, vid, set);
if (err)
return err;
@@ -851,18 +970,14 @@ static int fm10k_update_vid(struct net_device *netdev, u16 vid, bool set)
/* only need to update the VLAN if not in promiscuous mode */
if (!(netdev->flags & IFF_PROMISC)) {
- err = hw->mac.ops.update_vlan(hw, vid, 0, set);
+ err = fm10k_queue_vlan_request(interface, vid, 0, set);
if (err)
goto err_out;
}
- /* update our base MAC address if host's mailbox is ready */
- if (fm10k_host_mbx_ready(interface))
- err = hw->mac.ops.update_uc_addr(hw, interface->glort,
- hw->mac.addr, vid, set, 0);
- else
- err = -EHOSTDOWN;
-
+ /* Update our base MAC address */
+ err = fm10k_queue_mac_request(interface, interface->glort,
+ hw->mac.addr, vid, set);
if (err)
goto err_out;
@@ -906,7 +1021,6 @@ static u16 fm10k_find_next_vlan(struct fm10k_intfc *interface, u16 vid)
static void fm10k_clear_unused_vlans(struct fm10k_intfc *interface)
{
- struct fm10k_hw *hw = &interface->hw;
u32 vid, prev_vid;
/* loop through and find any gaps in the table */
@@ -918,7 +1032,7 @@ static void fm10k_clear_unused_vlans(struct fm10k_intfc *interface)
/* send request to clear multiple bits at a time */
prev_vid += (vid - prev_vid - 1) << FM10K_VLAN_LENGTH_SHIFT;
- hw->mac.ops.update_vlan(hw, prev_vid, 0, false);
+ fm10k_queue_vlan_request(interface, prev_vid, 0, false);
}
}
@@ -933,15 +1047,11 @@ static int __fm10k_uc_sync(struct net_device *dev,
if (!is_valid_ether_addr(addr))
return -EADDRNOTAVAIL;
- /* update table with current entries if host's mailbox is ready */
- if (!fm10k_host_mbx_ready(interface))
- return -EHOSTDOWN;
-
for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1;
vid < VLAN_N_VID;
vid = fm10k_find_next_vlan(interface, vid)) {
- err = hw->mac.ops.update_uc_addr(hw, glort, addr,
- vid, sync, 0);
+ err = fm10k_queue_mac_request(interface, glort,
+ addr, vid, sync);
if (err)
return err;
}
@@ -998,15 +1108,18 @@ static int __fm10k_mc_sync(struct net_device *dev,
struct fm10k_intfc *interface = netdev_priv(dev);
struct fm10k_hw *hw = &interface->hw;
u16 vid, glort = interface->glort;
+ s32 err;
- /* update table with current entries if host's mailbox is ready */
- if (!fm10k_host_mbx_ready(interface))
- return 0;
+ if (!is_multicast_ether_addr(addr))
+ return -EADDRNOTAVAIL;
for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1;
vid < VLAN_N_VID;
vid = fm10k_find_next_vlan(interface, vid)) {
- hw->mac.ops.update_mc_addr(hw, glort, addr, vid, sync);
+ err = fm10k_queue_mac_request(interface, glort,
+ addr, vid, sync);
+ if (err)
+ return err;
}
return 0;
@@ -1046,7 +1159,8 @@ static void fm10k_set_rx_mode(struct net_device *dev)
if (interface->xcast_mode != xcast_mode) {
/* update VLAN table */
if (xcast_mode == FM10K_XCAST_MODE_PROMISC)
- hw->mac.ops.update_vlan(hw, FM10K_VLAN_ALL, 0, true);
+ fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL,
+ 0, true);
if (interface->xcast_mode == FM10K_XCAST_MODE_PROMISC)
fm10k_clear_unused_vlans(interface);
@@ -1094,22 +1208,20 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface)
interface->glort_count, true);
/* update VLAN table */
- hw->mac.ops.update_vlan(hw, FM10K_VLAN_ALL, 0,
- xcast_mode == FM10K_XCAST_MODE_PROMISC);
+ fm10k_queue_vlan_request(interface, FM10K_VLAN_ALL, 0,
+ xcast_mode == FM10K_XCAST_MODE_PROMISC);
/* Add filter for VLAN 0 */
- hw->mac.ops.update_vlan(hw, 0, 0, true);
+ fm10k_queue_vlan_request(interface, 0, 0, true);
/* update table with current entries */
for (vid = hw->mac.default_vid ? fm10k_find_next_vlan(interface, 0) : 1;
vid < VLAN_N_VID;
vid = fm10k_find_next_vlan(interface, vid)) {
- hw->mac.ops.update_vlan(hw, vid, 0, true);
+ fm10k_queue_vlan_request(interface, vid, 0, true);
- /* Update unicast entries if host's mailbox is ready */
- if (fm10k_host_mbx_ready(interface))
- hw->mac.ops.update_uc_addr(hw, glort, hw->mac.addr,
- vid, true, 0);
+ fm10k_queue_mac_request(interface, glort,
+ hw->mac.addr, vid, true);
}
/* update xcast mode before synchronizing addresses if host's mailbox
@@ -1136,6 +1248,13 @@ void fm10k_reset_rx_state(struct fm10k_intfc *interface)
struct net_device *netdev = interface->netdev;
struct fm10k_hw *hw = &interface->hw;
+ /* Wait for MAC/VLAN work to finish */
+ while (test_bit(__FM10K_MACVLAN_SCHED, interface->state))
+ usleep_range(1000, 2000);
+
+ /* Cancel pending MAC/VLAN requests */
+ fm10k_clear_macvlan_queue(interface, interface->glort, true);
+
fm10k_mbx_lock(interface);
/* clear the logical port state on lower device if host's mailbox is
@@ -1370,8 +1489,8 @@ static void *fm10k_dfwd_add_station(struct net_device *dev,
if (fm10k_host_mbx_ready(interface)) {
hw->mac.ops.update_xcast_mode(hw, glort,
FM10K_XCAST_MODE_MULTI);
- hw->mac.ops.update_uc_addr(hw, glort, sdev->dev_addr,
- 0, true, 0);
+ fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
+ 0, true);
}
fm10k_mbx_unlock(interface);
@@ -1410,8 +1529,8 @@ static void fm10k_dfwd_del_station(struct net_device *dev, void *priv)
if (fm10k_host_mbx_ready(interface)) {
hw->mac.ops.update_xcast_mode(hw, glort,
FM10K_XCAST_MODE_NONE);
- hw->mac.ops.update_uc_addr(hw, glort, sdev->dev_addr,
- 0, false, 0);
+ fm10k_queue_mac_request(interface, glort, sdev->dev_addr,
+ 0, false);
}
fm10k_mbx_unlock(interface);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
index 63784576ae8b..7f605221a686 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c
@@ -91,6 +91,76 @@ static int fm10k_hw_ready(struct fm10k_intfc *interface)
return FM10K_REMOVED(hw->hw_addr) ? -ENODEV : 0;
}
+/**
+ * fm10k_macvlan_schedule - Schedule MAC/VLAN queue task
+ * @interface: fm10k private interface structure
+ *
+ * Schedule the MAC/VLAN queue monitor task. If the MAC/VLAN task cannot be
+ * started immediately, request that it be restarted when possible.
+ */
+void fm10k_macvlan_schedule(struct fm10k_intfc *interface)
+{
+ /* Avoid processing the MAC/VLAN queue when the service task is
+ * disabled, or when we're resetting the device.
+ */
+ if (!test_bit(__FM10K_MACVLAN_DISABLE, interface->state) &&
+ !test_and_set_bit(__FM10K_MACVLAN_SCHED, interface->state)) {
+ clear_bit(__FM10K_MACVLAN_REQUEST, interface->state);
+ /* We delay the actual start of execution in order to allow
+ * multiple MAC/VLAN updates to accumulate before handling
+ * them, and to allow some time to let the mailbox drain
+ * between runs.
+ */
+ queue_delayed_work(fm10k_workqueue,
+ &interface->macvlan_task, 10);
+ } else {
+ set_bit(__FM10K_MACVLAN_REQUEST, interface->state);
+ }
+}
+
+/**
+ * fm10k_stop_macvlan_task - Stop the MAC/VLAN queue monitor
+ * @interface: fm10k private interface structure
+ *
+ * Wait until the MAC/VLAN queue task has stopped, and cancel any future
+ * requests.
+ */
+static void fm10k_stop_macvlan_task(struct fm10k_intfc *interface)
+{
+ /* Disable the MAC/VLAN work item */
+ set_bit(__FM10K_MACVLAN_DISABLE, interface->state);
+
+ /* Make sure we waited until any current invocations have stopped */
+ cancel_delayed_work_sync(&interface->macvlan_task);
+
+ /* We set the __FM10K_MACVLAN_SCHED bit when we schedule the task.
+ * However, it may not be unset of the MAC/VLAN task never actually
+ * got a chance to run. Since we've canceled the task here, and it
+ * cannot be rescheuled right now, we need to ensure the scheduled bit
+ * gets unset.
+ */
+ clear_bit(__FM10K_MACVLAN_SCHED, interface->state);
+}
+
+/**
+ * fm10k_resume_macvlan_task - Restart the MAC/VLAN queue monitor
+ * @interface: fm10k private interface structure
+ *
+ * Clear the __FM10K_MACVLAN_DISABLE bit and, if a request occurred, schedule
+ * the MAC/VLAN work monitor.
+ */
+static void fm10k_resume_macvlan_task(struct fm10k_intfc *interface)
+{
+ /* Re-enable the MAC/VLAN work item */
+ clear_bit(__FM10K_MACVLAN_DISABLE, interface->state);
+
+ /* We might have received a MAC/VLAN request while disabled. If so,
+ * kick off the queue now.
+ */
+ if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state))
+ fm10k_macvlan_schedule(interface);
+}
+
void fm10k_service_event_schedule(struct fm10k_intfc *interface)
{
if (!test_bit(__FM10K_SERVICE_DISABLE, interface->state) &&
@@ -118,13 +188,35 @@ static void fm10k_service_event_complete(struct fm10k_intfc *interface)
fm10k_service_event_schedule(interface);
}
+static void fm10k_stop_service_event(struct fm10k_intfc *interface)
+{
+ set_bit(__FM10K_SERVICE_DISABLE, interface->state);
+ cancel_work_sync(&interface->service_task);
+
+ /* It's possible that cancel_work_sync stopped the service task from
+ * running before it could actually start. In this case the
+ * __FM10K_SERVICE_SCHED bit will never be cleared. Since we know that
+ * the service task cannot be running at this point, we need to clear
+ * the scheduled bit, as otherwise the service task may never be
+ * restarted.
+ */
+ clear_bit(__FM10K_SERVICE_SCHED, interface->state);
+}
+
+static void fm10k_start_service_event(struct fm10k_intfc *interface)
+{
+ clear_bit(__FM10K_SERVICE_DISABLE, interface->state);
+ fm10k_service_event_schedule(interface);
+}
+
/**
* fm10k_service_timer - Timer Call-back
* @data: pointer to interface cast into an unsigned long
**/
-static void fm10k_service_timer(unsigned long data)
+static void fm10k_service_timer(struct timer_list *t)
{
- struct fm10k_intfc *interface = (struct fm10k_intfc *)data;
+ struct fm10k_intfc *interface = from_timer(interface, t,
+ service_timer);
/* Reset the timer */
mod_timer(&interface->service_timer, (HZ * 2) + jiffies);
@@ -132,36 +224,15 @@ static void fm10k_service_timer(unsigned long data)
fm10k_service_event_schedule(interface);
}
-static void fm10k_detach_subtask(struct fm10k_intfc *interface)
-{
- struct net_device *netdev = interface->netdev;
- u32 __iomem *hw_addr;
- u32 value;
-
- /* do nothing if device is still present or hw_addr is set */
- if (netif_device_present(netdev) || interface->hw.hw_addr)
- return;
-
- /* check the real address space to see if we've recovered */
- hw_addr = READ_ONCE(interface->uc_addr);
- value = readl(hw_addr);
- if (~value) {
- interface->hw.hw_addr = interface->uc_addr;
- netif_device_attach(netdev);
- set_bit(FM10K_FLAG_RESET_REQUESTED, interface->flags);
- netdev_warn(netdev, "PCIe link restored, device now attached\n");
- return;
- }
-
- rtnl_lock();
-
- if (netif_running(netdev))
- dev_close(netdev);
-
- rtnl_unlock();
-}
-
-static void fm10k_prepare_for_reset(struct fm10k_intfc *interface)
+/**
+ * fm10k_prepare_for_reset - Prepare the driver and device for a pending reset
+ * @interface: fm10k private data structure
+ *
+ * This function prepares for a device reset by shutting as much down as we
+ * can. It does nothing and returns false if __FM10K_RESETTING was already set
+ * prior to calling this function. It returns true if it actually did work.
+ */
+static bool fm10k_prepare_for_reset(struct fm10k_intfc *interface)
{
struct net_device *netdev = interface->netdev;
@@ -170,8 +241,15 @@ static void fm10k_prepare_for_reset(struct fm10k_intfc *interface)
/* put off any impending NetWatchDogTimeout */
netif_trans_update(netdev);
- while (test_and_set_bit(__FM10K_RESETTING, interface->state))
- usleep_range(1000, 2000);
+ /* Nothing to do if a reset is already in progress */
+ if (test_and_set_bit(__FM10K_RESETTING, interface->state))
+ return false;
+
+ /* As the MAC/VLAN task will be accessing registers it must not be
+ * running while we reset. Although the task will not be scheduled
+ * once we start resetting it may already be running
+ */
+ fm10k_stop_macvlan_task(interface);
rtnl_lock();
@@ -189,6 +267,8 @@ static void fm10k_prepare_for_reset(struct fm10k_intfc *interface)
interface->last_reset = jiffies + (10 * HZ);
rtnl_unlock();
+
+ return true;
}
static int fm10k_handle_reset(struct fm10k_intfc *interface)
@@ -197,6 +277,8 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface)
struct fm10k_hw *hw = &interface->hw;
int err;
+ WARN_ON(!test_bit(__FM10K_RESETTING, interface->state));
+
rtnl_lock();
pci_set_master(interface->pdev);
@@ -253,6 +335,8 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface)
rtnl_unlock();
+ fm10k_resume_macvlan_task(interface);
+
clear_bit(__FM10K_RESETTING, interface->state);
return err;
@@ -270,27 +354,80 @@ reinit_err:
return err;
}
-static void fm10k_reinit(struct fm10k_intfc *interface)
+static void fm10k_detach_subtask(struct fm10k_intfc *interface)
{
+ struct net_device *netdev = interface->netdev;
+ u32 __iomem *hw_addr;
+ u32 value;
int err;
- fm10k_prepare_for_reset(interface);
+ /* do nothing if netdev is still present or hw_addr is set */
+ if (netif_device_present(netdev) || interface->hw.hw_addr)
+ return;
- err = fm10k_handle_reset(interface);
- if (err)
- dev_err(&interface->pdev->dev,
- "fm10k_handle_reset failed: %d\n", err);
+ /* We've lost the PCIe register space, and can no longer access the
+ * device. Shut everything except the detach subtask down and prepare
+ * to reset the device in case we recover. If we actually prepare for
+ * reset, indicate that we're detached.
+ */
+ if (fm10k_prepare_for_reset(interface))
+ set_bit(__FM10K_RESET_DETACHED, interface->state);
+
+ /* check the real address space to see if we've recovered */
+ hw_addr = READ_ONCE(interface->uc_addr);
+ value = readl(hw_addr);
+ if (~value) {
+ /* Make sure the reset was initiated because we detached,
+ * otherwise we might race with a different reset flow.
+ */
+ if (!test_and_clear_bit(__FM10K_RESET_DETACHED,
+ interface->state))
+ return;
+
+ /* Restore the hardware address */
+ interface->hw.hw_addr = interface->uc_addr;
+
+ /* PCIe link has been restored, and the device is active
+ * again. Restore everything and reset the device.
+ */
+ err = fm10k_handle_reset(interface);
+ if (err) {
+ netdev_err(netdev, "Unable to reset device: %d\n", err);
+ interface->hw.hw_addr = NULL;
+ return;
+ }
+
+ /* Re-attach the netdev */
+ netif_device_attach(netdev);
+ netdev_warn(netdev, "PCIe link restored, device now attached\n");
+ return;
+ }
}
static void fm10k_reset_subtask(struct fm10k_intfc *interface)
{
+ int err;
+
if (!test_and_clear_bit(FM10K_FLAG_RESET_REQUESTED,
interface->flags))
return;
+ /* If another thread has already prepared to reset the device, we
+ * should not attempt to handle a reset here, since we'd race with
+ * that thread. This may happen if we suspend the device or if the
+ * PCIe link is lost. In this case, we'll just ignore the RESET
+ * request, as it will (eventually) be taken care of when the thread
+ * which actually started the reset is finished.
+ */
+ if (!fm10k_prepare_for_reset(interface))
+ return;
+
netdev_err(interface->netdev, "Reset interface\n");
- fm10k_reinit(interface);
+ err = fm10k_handle_reset(interface);
+ if (err)
+ dev_err(&interface->pdev->dev,
+ "fm10k_handle_reset failed: %d\n", err);
}
/**
@@ -360,6 +497,10 @@ static void fm10k_watchdog_update_host_state(struct fm10k_intfc *interface)
**/
static void fm10k_mbx_subtask(struct fm10k_intfc *interface)
{
+ /* If we're resetting, bail out */
+ if (test_bit(__FM10K_RESETTING, interface->state))
+ return;
+
/* process upstream mailbox and update device state */
fm10k_watchdog_update_host_state(interface);
@@ -609,9 +750,11 @@ static void fm10k_service_task(struct work_struct *work)
interface = container_of(work, struct fm10k_intfc, service_task);
+ /* Check whether we're detached first */
+ fm10k_detach_subtask(interface);
+
/* tasks run even when interface is down */
fm10k_mbx_subtask(interface);
- fm10k_detach_subtask(interface);
fm10k_reset_subtask(interface);
/* tasks only run when interface is up */
@@ -623,6 +766,112 @@ static void fm10k_service_task(struct work_struct *work)
}
/**
+ * fm10k_macvlan_task - send queued MAC/VLAN requests to switch manager
+ * @work: pointer to work_struct containing our data
+ *
+ * This work item handles sending MAC/VLAN updates to the switch manager. When
+ * the interface is up, it will attempt to queue mailbox messages to the
+ * switch manager requesting updates for MAC/VLAN pairs. If the Tx fifo of the
+ * mailbox is full, it will reschedule itself to try again in a short while.
+ * This ensures that the driver does not overload the switch mailbox with too
+ * many simultaneous requests, causing an unnecessary reset.
+ **/
+static void fm10k_macvlan_task(struct work_struct *work)
+{
+ struct fm10k_macvlan_request *item;
+ struct fm10k_intfc *interface;
+ struct delayed_work *dwork;
+ struct list_head *requests;
+ struct fm10k_hw *hw;
+ unsigned long flags;
+
+ dwork = to_delayed_work(work);
+ interface = container_of(dwork, struct fm10k_intfc, macvlan_task);
+ hw = &interface->hw;
+ requests = &interface->macvlan_requests;
+
+ do {
+ /* Pop the first item off the list */
+ spin_lock_irqsave(&interface->macvlan_lock, flags);
+ item = list_first_entry_or_null(requests,
+ struct fm10k_macvlan_request,
+ list);
+ if (item)
+ list_del_init(&item->list);
+
+ spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+
+ /* We have no more items to process */
+ if (!item)
+ goto done;
+
+ fm10k_mbx_lock(interface);
+
+ /* Check that we have plenty of space to send the message. We
+ * want to ensure that the mailbox stays low enough to avoid a
+ * change in the host state, otherwise we may see spurious
+ * link up / link down notifications.
+ */
+ if (!hw->mbx.ops.tx_ready(&hw->mbx, FM10K_VFMBX_MSG_MTU + 5)) {
+ hw->mbx.ops.process(hw, &hw->mbx);
+ set_bit(__FM10K_MACVLAN_REQUEST, interface->state);
+ fm10k_mbx_unlock(interface);
+
+ /* Put the request back on the list */
+ spin_lock_irqsave(&interface->macvlan_lock, flags);
+ list_add(&item->list, requests);
+ spin_unlock_irqrestore(&interface->macvlan_lock, flags);
+ break;
+ }
+
+ switch (item->type) {
+ case FM10K_MC_MAC_REQUEST:
+ hw->mac.ops.update_mc_addr(hw,
+ item->mac.glort,
+ item->mac.addr,
+ item->mac.vid,
+ item->set);
+ break;
+ case FM10K_UC_MAC_REQUEST:
+ hw->mac.ops.update_uc_addr(hw,
+ item->mac.glort,
+ item->mac.addr,
+ item->mac.vid,
+ item->set,
+ 0);
+ break;
+ case FM10K_VLAN_REQUEST:
+ hw->mac.ops.update_vlan(hw,
+ item->vlan.vid,
+ item->vlan.vsi,
+ item->set);
+ break;
+ default:
+ break;
+ }
+
+ fm10k_mbx_unlock(interface);
+
+ /* Free the item now that we've sent the update */
+ kfree(item);
+ } while (true);
+
+done:
+ WARN_ON(!test_bit(__FM10K_MACVLAN_SCHED, interface->state));
+
+ /* flush memory to make sure state is correct */
+ smp_mb__before_atomic();
+ clear_bit(__FM10K_MACVLAN_SCHED, interface->state);
+
+ /* If a MAC/VLAN request was scheduled since we started, we should
+ * re-schedule. However, there is no reason to re-schedule if there is
+ * no work to do.
+ */
+ if (test_bit(__FM10K_MACVLAN_REQUEST, interface->state))
+ fm10k_macvlan_schedule(interface);
+}
+
+/**
* fm10k_configure_tx_ring - Configure Tx ring after Reset
* @interface: board private structure
* @ring: structure containing ring specific data
@@ -1544,7 +1793,7 @@ int fm10k_qv_request_irq(struct fm10k_intfc *interface)
struct net_device *dev = interface->netdev;
struct fm10k_hw *hw = &interface->hw;
struct msix_entry *entry;
- int ri = 0, ti = 0;
+ unsigned int ri = 0, ti = 0;
int vector, err;
entry = &interface->msix_entries[NON_Q_VECTORS(hw)];
@@ -1554,15 +1803,15 @@ int fm10k_qv_request_irq(struct fm10k_intfc *interface)
/* name the vector */
if (q_vector->tx.count && q_vector->rx.count) {
- snprintf(q_vector->name, sizeof(q_vector->name) - 1,
- "%s-TxRx-%d", dev->name, ri++);
+ snprintf(q_vector->name, sizeof(q_vector->name),
+ "%s-TxRx-%u", dev->name, ri++);
ti++;
} else if (q_vector->rx.count) {
- snprintf(q_vector->name, sizeof(q_vector->name) - 1,
- "%s-rx-%d", dev->name, ri++);
+ snprintf(q_vector->name, sizeof(q_vector->name),
+ "%s-rx-%u", dev->name, ri++);
} else if (q_vector->tx.count) {
- snprintf(q_vector->name, sizeof(q_vector->name) - 1,
- "%s-tx-%d", dev->name, ti++);
+ snprintf(q_vector->name, sizeof(q_vector->name),
+ "%s-tx-%u", dev->name, ti++);
} else {
/* skip this unused q_vector */
continue;
@@ -1800,9 +2049,6 @@ static int fm10k_sw_init(struct fm10k_intfc *interface,
netdev->vlan_features |= NETIF_F_HIGHDMA;
}
- /* delay any future reset requests */
- interface->last_reset = jiffies + (10 * HZ);
-
/* reset and initialize the hardware so it is in a known state */
err = hw->mac.ops.reset_hw(hw);
if (err) {
@@ -1857,9 +2103,16 @@ static int fm10k_sw_init(struct fm10k_intfc *interface,
INIT_LIST_HEAD(&interface->vxlan_port);
INIT_LIST_HEAD(&interface->geneve_port);
+ /* Initialize the MAC/VLAN queue */
+ INIT_LIST_HEAD(&interface->macvlan_requests);
+
netdev_rss_key_fill(rss_key, sizeof(rss_key));
memcpy(interface->rssrk, rss_key, sizeof(rss_key));
+ /* Initialize the mailbox lock */
+ spin_lock_init(&interface->mbx_lock);
+ spin_lock_init(&interface->macvlan_lock);
+
/* Start off interface as being down */
set_bit(__FM10K_DOWN, interface->state);
set_bit(__FM10K_UPDATING_STATS, interface->state);
@@ -2063,10 +2316,12 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Initialize service timer and service task late in order to avoid
* cleanup issues.
*/
- setup_timer(&interface->service_timer, &fm10k_service_timer,
- (unsigned long)interface);
+ timer_setup(&interface->service_timer, fm10k_service_timer, 0);
INIT_WORK(&interface->service_task, fm10k_service_task);
+ /* Setup the MAC/VLAN queue */
+ INIT_DELAYED_WORK(&interface->macvlan_task, fm10k_macvlan_task);
+
/* kick off service timer now, even when interface is down */
mod_timer(&interface->service_timer, (HZ * 2) + jiffies);
@@ -2079,8 +2334,9 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* enable SR-IOV after registering netdev to enforce PF/VF ordering */
fm10k_iov_configure(pdev, 0);
- /* clear the service task disable bit to allow service task to start */
+ /* clear the service task disable bit and kick off service task */
clear_bit(__FM10K_SERVICE_DISABLE, interface->state);
+ fm10k_service_event_schedule(interface);
return 0;
@@ -2118,8 +2374,11 @@ static void fm10k_remove(struct pci_dev *pdev)
del_timer_sync(&interface->service_timer);
- set_bit(__FM10K_SERVICE_DISABLE, interface->state);
- cancel_work_sync(&interface->service_task);
+ fm10k_stop_service_event(interface);
+ fm10k_stop_macvlan_task(interface);
+
+ /* Remove all pending MAC/VLAN requests */
+ fm10k_clear_macvlan_queue(interface, interface->glort, true);
/* free netdev, this may bounce the interrupts due to setup_tc */
if (netdev->reg_state == NETREG_REGISTERED)
@@ -2156,11 +2415,14 @@ static void fm10k_prepare_suspend(struct fm10k_intfc *interface)
* a surprise remove if the PCIe device is disabled while we're
* stopped. We stop the watchdog task until after we resume software
* activity.
+ *
+ * Note that the MAC/VLAN task will be stopped as part of preparing
+ * for reset so we don't need to handle it here.
*/
- set_bit(__FM10K_SERVICE_DISABLE, interface->state);
- cancel_work_sync(&interface->service_task);
+ fm10k_stop_service_event(interface);
- fm10k_prepare_for_reset(interface);
+ if (fm10k_prepare_for_reset(interface))
+ set_bit(__FM10K_RESET_SUSPENDED, interface->state);
}
static int fm10k_handle_resume(struct fm10k_intfc *interface)
@@ -2168,6 +2430,13 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface)
struct fm10k_hw *hw = &interface->hw;
int err;
+ /* Even if we didn't properly prepare for reset in
+ * fm10k_prepare_suspend, we'll attempt to resume anyways.
+ */
+ if (!test_and_clear_bit(__FM10K_RESET_SUSPENDED, interface->state))
+ dev_warn(&interface->pdev->dev,
+ "Device was shut down as part of suspend... Attempting to recover\n");
+
/* reset statistics starting values */
hw->mac.ops.rebind_hw_stats(hw, &interface->stats);
@@ -2185,45 +2454,30 @@ static int fm10k_handle_resume(struct fm10k_intfc *interface)
interface->link_down_event = jiffies + (HZ);
set_bit(__FM10K_LINK_DOWN, interface->state);
- /* clear the service task disable bit to allow service task to start */
- clear_bit(__FM10K_SERVICE_DISABLE, interface->state);
- fm10k_service_event_schedule(interface);
+ /* restart the service task */
+ fm10k_start_service_event(interface);
+
+ /* Restart the MAC/VLAN request queue in-case of outstanding events */
+ fm10k_macvlan_schedule(interface);
return err;
}
#ifdef CONFIG_PM
/**
- * fm10k_resume - Restore device to pre-sleep state
- * @pdev: PCI device information struct
+ * fm10k_resume - Generic PM resume hook
+ * @dev: generic device structure
*
- * fm10k_resume is called after the system has powered back up from a sleep
- * state and is ready to resume operation. This function is meant to restore
- * the device back to its pre-sleep state.
+ * Generic PM hook used when waking the device from a low power state after
+ * suspend or hibernation. This function does not need to handle lower PCIe
+ * device state as the stack takes care of that for us.
**/
-static int fm10k_resume(struct pci_dev *pdev)
+static int fm10k_resume(struct device *dev)
{
- struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+ struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev));
struct net_device *netdev = interface->netdev;
struct fm10k_hw *hw = &interface->hw;
- u32 err;
-
- pci_set_power_state(pdev, PCI_D0);
- pci_restore_state(pdev);
-
- /* pci_restore_state clears dev->state_saved so call
- * pci_save_state to restore it.
- */
- pci_save_state(pdev);
-
- err = pci_enable_device_mem(pdev);
- if (err) {
- dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
- return err;
- }
- pci_set_master(pdev);
-
- pci_wake_from_d3(pdev, false);
+ int err;
/* refresh hw_addr in case it was dropped */
hw->hw_addr = interface->uc_addr;
@@ -2238,36 +2492,27 @@ static int fm10k_resume(struct pci_dev *pdev)
}
/**
- * fm10k_suspend - Prepare the device for a system sleep state
- * @pdev: PCI device information struct
+ * fm10k_suspend - Generic PM suspend hook
+ * @dev: generic device structure
*
- * fm10k_suspend is meant to shutdown the device prior to the system entering
- * a sleep state. The fm10k hardware does not support wake on lan so the
- * driver simply needs to shut down the device so it is in a low power state.
+ * Generic PM hook used when setting the device into a low power state for
+ * system suspend or hibernation. This function does not need to handle lower
+ * PCIe device state as the stack takes care of that for us.
**/
-static int fm10k_suspend(struct pci_dev *pdev,
- pm_message_t __always_unused state)
+static int fm10k_suspend(struct device *dev)
{
- struct fm10k_intfc *interface = pci_get_drvdata(pdev);
+ struct fm10k_intfc *interface = pci_get_drvdata(to_pci_dev(dev));
struct net_device *netdev = interface->netdev;
- int err = 0;
netif_device_detach(netdev);
fm10k_prepare_suspend(interface);
- err = pci_save_state(pdev);
- if (err)
- return err;
-
- pci_disable_device(pdev);
- pci_wake_from_d3(pdev, false);
- pci_set_power_state(pdev, PCI_D3hot);
-
return 0;
}
#endif /* CONFIG_PM */
+
/**
* fm10k_io_error_detected - called when PCI error is detected
* @pdev: Pointer to PCI device
@@ -2343,11 +2588,18 @@ static void fm10k_io_resume(struct pci_dev *pdev)
if (err)
dev_warn(&pdev->dev,
- "fm10k_io_resume failed: %d\n", err);
+ "%s failed: %d\n", __func__, err);
else
netif_device_attach(netdev);
}
+/**
+ * fm10k_io_reset_prepare - called when PCI function is about to be reset
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the PCI function is about to be reset,
+ * allowing the device driver to prepare for it.
+ */
static void fm10k_io_reset_prepare(struct pci_dev *pdev)
{
/* warn incase we have any active VF devices */
@@ -2357,6 +2609,13 @@ static void fm10k_io_reset_prepare(struct pci_dev *pdev)
fm10k_prepare_suspend(pci_get_drvdata(pdev));
}
+/**
+ * fm10k_io_reset_done - called when PCI function has finished resetting
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called just after the PCI function is reset, such as via
+ * /sys/class/net/<enpX>/device/reset or similar.
+ */
static void fm10k_io_reset_done(struct pci_dev *pdev)
{
struct fm10k_intfc *interface = pci_get_drvdata(pdev);
@@ -2364,7 +2623,7 @@ static void fm10k_io_reset_done(struct pci_dev *pdev)
if (err) {
dev_warn(&pdev->dev,
- "fm10k_io_reset_notify failed: %d\n", err);
+ "%s failed: %d\n", __func__, err);
netif_device_detach(interface->netdev);
}
}
@@ -2377,15 +2636,18 @@ static const struct pci_error_handlers fm10k_err_handler = {
.reset_done = fm10k_io_reset_done,
};
+static SIMPLE_DEV_PM_OPS(fm10k_pm_ops, fm10k_suspend, fm10k_resume);
+
static struct pci_driver fm10k_driver = {
.name = fm10k_driver_name,
.id_table = fm10k_pci_tbl,
.probe = fm10k_probe,
.remove = fm10k_remove,
#ifdef CONFIG_PM
- .suspend = fm10k_suspend,
- .resume = fm10k_resume,
-#endif
+ .driver = {
+ .pm = &fm10k_pm_ops,
+ },
+#endif /* CONFIG_PM */
.sriov_configure = fm10k_iov_configure,
.err_handler = &fm10k_err_handler
};
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
index 40ee0242a80a..425d814aed4d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2017 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -1186,7 +1186,7 @@ s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *hw, u32 **results,
* Will report an error if the VLAN ID is out of range. For VID = 0, it will
* return either the pf_vid or sw_vid depending on which one is set.
*/
-static s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid)
+s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid)
{
if (!vid)
return vf_info->pf_vid ? vf_info->pf_vid : vf_info->sw_vid;
@@ -1334,19 +1334,19 @@ static u8 fm10k_iov_supported_xcast_mode_pf(struct fm10k_vf_info *vf_info,
case FM10K_XCAST_MODE_PROMISC:
if (vf_flags & FM10K_VF_FLAG_PROMISC_CAPABLE)
return FM10K_XCAST_MODE_PROMISC;
- /* fallthough */
+ /* fall through */
case FM10K_XCAST_MODE_ALLMULTI:
if (vf_flags & FM10K_VF_FLAG_ALLMULTI_CAPABLE)
return FM10K_XCAST_MODE_ALLMULTI;
- /* fallthough */
+ /* fall through */
case FM10K_XCAST_MODE_MULTI:
if (vf_flags & FM10K_VF_FLAG_MULTI_CAPABLE)
return FM10K_XCAST_MODE_MULTI;
- /* fallthough */
+ /* fall through */
case FM10K_XCAST_MODE_NONE:
if (vf_flags & FM10K_VF_FLAG_NONE_CAPABLE)
return FM10K_XCAST_MODE_NONE;
- /* fallthough */
+ /* fall through */
default:
break;
}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
index 3336d3c10760..e04d41f1a532 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.h
@@ -1,5 +1,5 @@
/* Intel(R) Ethernet Switch Host Interface Driver
- * Copyright(c) 2013 - 2016 Intel Corporation.
+ * Copyright(c) 2013 - 2017 Intel Corporation.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@@ -114,6 +114,7 @@ extern const struct fm10k_tlv_attr fm10k_err_msg_attr[];
#define FM10K_PF_MSG_ERR_HANDLER(msg, func) \
FM10K_MSG_HANDLER(FM10K_PF_MSG_ID_##msg, fm10k_err_msg_attr, func)
+s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid);
s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *);
s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *, u32 **,
struct fm10k_mbx_info *);
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index d0c1bf5441d8..eb017763646d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -54,6 +54,7 @@
#include <linux/clocksource.h>
#include <linux/net_tstamp.h>
#include <linux/ptp_clock_kernel.h>
+#include <net/pkt_cls.h>
#include "i40e_type.h"
#include "i40e_prototype.h"
#include "i40e_client.h"
@@ -77,6 +78,7 @@
#define i40e_default_queues_per_vmdq(pf) \
(((pf)->hw_features & I40E_HW_RSS_AQ_CAPABLE) ? 4 : 1)
#define I40E_DEFAULT_QUEUES_PER_VF 4
+#define I40E_MAX_VF_QUEUES 16
#define I40E_DEFAULT_QUEUES_PER_TC 1 /* should be a power of 2 */
#define i40e_pf_get_max_q_per_tc(pf) \
(((pf)->hw_features & I40E_HW_128_QP_RSS_CAPABLE) ? 128 : 64)
@@ -86,6 +88,7 @@
#define I40E_AQ_LEN 256
#define I40E_AQ_WORK_LIMIT 66 /* max number of VFs + a little */
#define I40E_MAX_USER_PRIORITY 8
+#define I40E_MAX_QUEUES_PER_CH 64
#define I40E_DEFAULT_TRAFFIC_CLASS BIT(0)
#define I40E_DEFAULT_MSG_ENABLE 4
#define I40E_QUEUE_WAIT_RETRY_LIMIT 10
@@ -125,6 +128,11 @@
/* default to trying for four seconds */
#define I40E_TRY_LINK_TIMEOUT (4 * HZ)
+/* BW rate limiting */
+#define I40E_BW_CREDIT_DIVISOR 50 /* 50Mbps per BW credit */
+#define I40E_BW_MBPS_DIVISOR 125000 /* rate / (1000000 / 8) Mbps */
+#define I40E_MAX_BW_INACTIVE_ACCUM 4 /* accumulate 4 credits max */
+
/* driver state flags */
enum i40e_state_t {
__I40E_TESTING,
@@ -136,6 +144,7 @@ enum i40e_state_t {
__I40E_MDD_EVENT_PENDING,
__I40E_VFLR_EVENT_PENDING,
__I40E_RESET_RECOVERY_PENDING,
+ __I40E_MISC_IRQ_REQUESTED,
__I40E_RESET_INTR_RECEIVED,
__I40E_REINIT_REQUESTED,
__I40E_PF_RESET_REQUESTED,
@@ -155,6 +164,8 @@ enum i40e_state_t {
__I40E_STATE_SIZE__,
};
+#define I40E_PF_RESET_FLAG BIT_ULL(__I40E_PF_RESET_REQUESTED)
+
/* VSI state flags */
enum i40e_vsi_state_t {
__I40E_VSI_DOWN,
@@ -336,6 +347,25 @@ struct i40e_flex_pit {
u8 pit_index;
};
+struct i40e_channel {
+ struct list_head list;
+ bool initialized;
+ u8 type;
+ u16 vsi_number; /* Assigned VSI number from AQ 'Add VSI' response */
+ u16 stat_counter_idx;
+ u16 base_queue;
+ u16 num_queue_pairs; /* Requested by user */
+ u16 seid;
+
+ u8 enabled_tc;
+ struct i40e_aqc_vsi_properties_data info;
+
+ u64 max_tx_rate;
+
+ /* track this channel belongs to which VSI */
+ struct i40e_vsi *parent_vsi;
+};
+
/* struct that defines the Ethernet device */
struct i40e_pf {
struct pci_dev *pdev;
@@ -348,7 +378,7 @@ struct i40e_pf {
u16 num_vmdq_vsis; /* num vmdq vsis this PF has set up */
u16 num_vmdq_qps; /* num queue pairs per vmdq pool */
u16 num_vmdq_msix; /* num queue vectors per vmdq pool */
- u16 num_req_vfs; /* num VFs requested for this VF */
+ u16 num_req_vfs; /* num VFs requested for this PF */
u16 num_vf_qps; /* num queue pairs per VF */
u16 num_lan_qps; /* num lan queues this PF has set up */
u16 num_lan_msix; /* num queue vectors for the base PF vsi */
@@ -401,55 +431,58 @@ struct i40e_pf {
struct timer_list service_timer;
struct work_struct service_task;
- u64 hw_features;
-#define I40E_HW_RSS_AQ_CAPABLE BIT_ULL(0)
-#define I40E_HW_128_QP_RSS_CAPABLE BIT_ULL(1)
-#define I40E_HW_ATR_EVICT_CAPABLE BIT_ULL(2)
-#define I40E_HW_WB_ON_ITR_CAPABLE BIT_ULL(3)
-#define I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE BIT_ULL(4)
-#define I40E_HW_NO_PCI_LINK_CHECK BIT_ULL(5)
-#define I40E_HW_100M_SGMII_CAPABLE BIT_ULL(6)
-#define I40E_HW_NO_DCB_SUPPORT BIT_ULL(7)
-#define I40E_HW_USE_SET_LLDP_MIB BIT_ULL(8)
-#define I40E_HW_GENEVE_OFFLOAD_CAPABLE BIT_ULL(9)
-#define I40E_HW_PTP_L4_CAPABLE BIT_ULL(10)
-#define I40E_HW_WOL_MC_MAGIC_PKT_WAKE BIT_ULL(11)
-#define I40E_HW_MPLS_HDR_OFFLOAD_CAPABLE BIT_ULL(12)
-#define I40E_HW_HAVE_CRT_RETIMER BIT_ULL(13)
-#define I40E_HW_OUTER_UDP_CSUM_CAPABLE BIT_ULL(14)
-#define I40E_HW_PHY_CONTROLS_LEDS BIT_ULL(15)
-#define I40E_HW_STOP_FW_LLDP BIT_ULL(16)
-#define I40E_HW_PORT_ID_VALID BIT_ULL(17)
-#define I40E_HW_RESTART_AUTONEG BIT_ULL(18)
-
- u64 flags;
-#define I40E_FLAG_RX_CSUM_ENABLED BIT_ULL(1)
-#define I40E_FLAG_MSI_ENABLED BIT_ULL(2)
-#define I40E_FLAG_MSIX_ENABLED BIT_ULL(3)
-#define I40E_FLAG_HW_ATR_EVICT_ENABLED BIT_ULL(4)
-#define I40E_FLAG_RSS_ENABLED BIT_ULL(6)
-#define I40E_FLAG_VMDQ_ENABLED BIT_ULL(7)
-#define I40E_FLAG_IWARP_ENABLED BIT_ULL(10)
-#define I40E_FLAG_FILTER_SYNC BIT_ULL(15)
-#define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT_ULL(16)
-#define I40E_FLAG_SRIOV_ENABLED BIT_ULL(19)
-#define I40E_FLAG_DCB_ENABLED BIT_ULL(20)
-#define I40E_FLAG_FD_SB_ENABLED BIT_ULL(21)
-#define I40E_FLAG_FD_ATR_ENABLED BIT_ULL(22)
-#define I40E_FLAG_FD_SB_AUTO_DISABLED BIT_ULL(23)
-#define I40E_FLAG_FD_ATR_AUTO_DISABLED BIT_ULL(24)
-#define I40E_FLAG_PTP BIT_ULL(25)
-#define I40E_FLAG_MFP_ENABLED BIT_ULL(26)
-#define I40E_FLAG_UDP_FILTER_SYNC BIT_ULL(27)
-#define I40E_FLAG_DCB_CAPABLE BIT_ULL(29)
-#define I40E_FLAG_VEB_STATS_ENABLED BIT_ULL(37)
-#define I40E_FLAG_LINK_POLLING_ENABLED BIT_ULL(39)
-#define I40E_FLAG_VEB_MODE_ENABLED BIT_ULL(40)
-#define I40E_FLAG_TRUE_PROMISC_SUPPORT BIT_ULL(51)
-#define I40E_FLAG_CLIENT_RESET BIT_ULL(54)
-#define I40E_FLAG_TEMP_LINK_POLLING BIT_ULL(55)
-#define I40E_FLAG_CLIENT_L2_CHANGE BIT_ULL(56)
-#define I40E_FLAG_LEGACY_RX BIT_ULL(58)
+ u32 hw_features;
+#define I40E_HW_RSS_AQ_CAPABLE BIT(0)
+#define I40E_HW_128_QP_RSS_CAPABLE BIT(1)
+#define I40E_HW_ATR_EVICT_CAPABLE BIT(2)
+#define I40E_HW_WB_ON_ITR_CAPABLE BIT(3)
+#define I40E_HW_MULTIPLE_TCP_UDP_RSS_PCTYPE BIT(4)
+#define I40E_HW_NO_PCI_LINK_CHECK BIT(5)
+#define I40E_HW_100M_SGMII_CAPABLE BIT(6)
+#define I40E_HW_NO_DCB_SUPPORT BIT(7)
+#define I40E_HW_USE_SET_LLDP_MIB BIT(8)
+#define I40E_HW_GENEVE_OFFLOAD_CAPABLE BIT(9)
+#define I40E_HW_PTP_L4_CAPABLE BIT(10)
+#define I40E_HW_WOL_MC_MAGIC_PKT_WAKE BIT(11)
+#define I40E_HW_MPLS_HDR_OFFLOAD_CAPABLE BIT(12)
+#define I40E_HW_HAVE_CRT_RETIMER BIT(13)
+#define I40E_HW_OUTER_UDP_CSUM_CAPABLE BIT(14)
+#define I40E_HW_PHY_CONTROLS_LEDS BIT(15)
+#define I40E_HW_STOP_FW_LLDP BIT(16)
+#define I40E_HW_PORT_ID_VALID BIT(17)
+#define I40E_HW_RESTART_AUTONEG BIT(18)
+
+ u32 flags;
+#define I40E_FLAG_RX_CSUM_ENABLED BIT(0)
+#define I40E_FLAG_MSI_ENABLED BIT(1)
+#define I40E_FLAG_MSIX_ENABLED BIT(2)
+#define I40E_FLAG_RSS_ENABLED BIT(3)
+#define I40E_FLAG_VMDQ_ENABLED BIT(4)
+#define I40E_FLAG_FILTER_SYNC BIT(5)
+#define I40E_FLAG_SRIOV_ENABLED BIT(6)
+#define I40E_FLAG_DCB_CAPABLE BIT(7)
+#define I40E_FLAG_DCB_ENABLED BIT(8)
+#define I40E_FLAG_FD_SB_ENABLED BIT(9)
+#define I40E_FLAG_FD_ATR_ENABLED BIT(10)
+#define I40E_FLAG_FD_SB_AUTO_DISABLED BIT(11)
+#define I40E_FLAG_FD_ATR_AUTO_DISABLED BIT(12)
+#define I40E_FLAG_MFP_ENABLED BIT(13)
+#define I40E_FLAG_UDP_FILTER_SYNC BIT(14)
+#define I40E_FLAG_HW_ATR_EVICT_ENABLED BIT(15)
+#define I40E_FLAG_VEB_MODE_ENABLED BIT(16)
+#define I40E_FLAG_VEB_STATS_ENABLED BIT(17)
+#define I40E_FLAG_LINK_POLLING_ENABLED BIT(18)
+#define I40E_FLAG_TRUE_PROMISC_SUPPORT BIT(19)
+#define I40E_FLAG_TEMP_LINK_POLLING BIT(20)
+#define I40E_FLAG_LEGACY_RX BIT(21)
+#define I40E_FLAG_PTP BIT(22)
+#define I40E_FLAG_IWARP_ENABLED BIT(23)
+#define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT(24)
+#define I40E_FLAG_CLIENT_L2_CHANGE BIT(25)
+#define I40E_FLAG_CLIENT_RESET BIT(26)
+#define I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED BIT(27)
+#define I40E_FLAG_SOURCE_PRUNING_DISABLED BIT(28)
+#define I40E_FLAG_TC_MQPRIO BIT(29)
struct i40e_client_instance *cinst;
bool stat_offsets_loaded;
@@ -530,6 +563,8 @@ struct i40e_pf {
u32 ioremap_len;
u32 fd_inv;
u16 phy_led_val;
+
+ u16 override_q_count;
};
/**
@@ -673,6 +708,7 @@ struct i40e_vsi {
enum i40e_vsi_type type; /* VSI type, e.g., LAN, FCoE, etc */
s16 vf_id; /* Virtual function ID for SRIOV VSIs */
+ struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */
struct i40e_tc_configuration tc_config;
struct i40e_aqc_vsi_properties_data info;
@@ -694,6 +730,16 @@ struct i40e_vsi {
bool current_isup; /* Sync 'link up' logging */
enum i40e_aq_link_speed current_speed; /* Sync link speed logging */
+ /* channel specific fields */
+ u16 cnt_q_avail; /* num of queues available for channel usage */
+ u16 orig_rss_size;
+ u16 current_rss_size;
+ bool reconfig_rss;
+
+ u16 next_base_queue; /* next queue to be used for channel setup */
+
+ struct list_head ch_list;
+
void *priv; /* client driver data reference. */
/* VSI specific handlers */
@@ -945,9 +991,6 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector)
struct i40e_hw *hw = &pf->hw;
u32 val;
- /* definitely clear the PBA here, as this function is meant to
- * clean out all previous interrupts AND enable the interrupt
- */
val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
(I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
@@ -956,7 +999,7 @@ static inline void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector)
}
void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf);
-void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba);
+void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf);
int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
int i40e_open(struct net_device *netdev);
int i40e_close(struct net_device *netdev);
@@ -1001,4 +1044,7 @@ static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
{
return !!vsi->xdp_prog;
}
+
+int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
+int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate);
#endif /* _I40E_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index ba04988e0598..9dcb2a961197 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -607,6 +607,18 @@ i40e_status i40e_init_adminq(struct i40e_hw *hw)
&oem_lo);
hw->nvm.oem_ver = ((u32)oem_hi << 16) | oem_lo;
+ if (hw->mac.type == I40E_MAC_XL710 &&
+ hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+ hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) {
+ hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
+ }
+
+ /* The ability to RX (not drop) 802.1ad frames was added in API 1.7 */
+ if (hw->aq.api_maj_ver > 1 ||
+ (hw->aq.api_maj_ver == 1 &&
+ hw->aq.api_min_ver >= 7))
+ hw->flags |= I40E_HW_FLAG_802_1AD_CAPABLE;
+
if (hw->aq.api_maj_ver > I40E_FW_API_VERSION_MAJOR) {
ret_code = I40E_ERR_FIRMWARE_API_VERSION;
goto init_adminq_free_arq;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 5d5f422cbae5..6a5db1b33fa2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -34,7 +34,15 @@
*/
#define I40E_FW_API_VERSION_MAJOR 0x0001
-#define I40E_FW_API_VERSION_MINOR 0x0005
+#define I40E_FW_API_VERSION_MINOR_X722 0x0005
+#define I40E_FW_API_VERSION_MINOR_X710 0x0007
+
+#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \
+ I40E_FW_API_VERSION_MINOR_X710 : \
+ I40E_FW_API_VERSION_MINOR_X722)
+
+/* API version 1.7 implements additional link and PHY-specific APIs */
+#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007
struct i40e_aq_desc {
__le16 flags;
@@ -236,6 +244,8 @@ enum i40e_admin_queue_opc {
i40e_aqc_opc_set_phy_debug = 0x0622,
i40e_aqc_opc_upload_ext_phy_fm = 0x0625,
i40e_aqc_opc_run_phy_activity = 0x0626,
+ i40e_aqc_opc_set_phy_register = 0x0628,
+ i40e_aqc_opc_get_phy_register = 0x0629,
/* NVM commands */
i40e_aqc_opc_nvm_read = 0x0701,
@@ -765,7 +775,22 @@ struct i40e_aqc_set_switch_config {
#define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001
#define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002
__le16 valid_flags;
- u8 reserved[12];
+ /* The ethertype in switch_tag is dropped on ingress and used
+ * internally by the switch. Set this to zero for the default
+ * of 0x88a8 (802.1ad). Should be zero for firmware API
+ * versions lower than 1.7.
+ */
+ __le16 switch_tag;
+ /* The ethertypes in first_tag and second_tag are used to
+ * match the outer and inner VLAN tags (respectively) when HW
+ * double VLAN tagging is enabled via the set port parameters
+ * AQ command. Otherwise these are both ignored. Set them to
+ * zero for their defaults of 0x8100 (802.1Q). Should be zero
+ * for firmware API versions lower than 1.7.
+ */
+ __le16 first_tag;
+ __le16 second_tag;
+ u8 reserved[6];
};
I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config);
@@ -1726,6 +1751,8 @@ enum i40e_aq_phy_type {
I40E_PHY_TYPE_10GBASE_CR1_CU = 0xB,
I40E_PHY_TYPE_10GBASE_AOC = 0xC,
I40E_PHY_TYPE_40GBASE_AOC = 0xD,
+ I40E_PHY_TYPE_UNRECOGNIZED = 0xE,
+ I40E_PHY_TYPE_UNSUPPORTED = 0xF,
I40E_PHY_TYPE_100BASE_TX = 0x11,
I40E_PHY_TYPE_1000BASE_T = 0x12,
I40E_PHY_TYPE_10GBASE_T = 0x13,
@@ -1744,7 +1771,12 @@ enum i40e_aq_phy_type {
I40E_PHY_TYPE_25GBASE_CR = 0x20,
I40E_PHY_TYPE_25GBASE_SR = 0x21,
I40E_PHY_TYPE_25GBASE_LR = 0x22,
- I40E_PHY_TYPE_MAX
+ I40E_PHY_TYPE_25GBASE_AOC = 0x23,
+ I40E_PHY_TYPE_25GBASE_ACC = 0x24,
+ I40E_PHY_TYPE_MAX,
+ I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP = 0xFD,
+ I40E_PHY_TYPE_EMPTY = 0xFE,
+ I40E_PHY_TYPE_DEFAULT = 0xFF,
};
#define I40E_LINK_SPEED_100MB_SHIFT 0x1
@@ -1801,6 +1833,8 @@ struct i40e_aq_get_phy_abilities_resp {
#define I40E_AQ_PHY_TYPE_EXT_25G_CR 0X02
#define I40E_AQ_PHY_TYPE_EXT_25G_SR 0x04
#define I40E_AQ_PHY_TYPE_EXT_25G_LR 0x08
+#define I40E_AQ_PHY_TYPE_EXT_25G_AOC 0x10
+#define I40E_AQ_PHY_TYPE_EXT_25G_ACC 0x20
u8 fec_cfg_curr_mod_ext_info;
#define I40E_AQ_ENABLE_FEC_KR 0x01
#define I40E_AQ_ENABLE_FEC_RS 0x02
@@ -1934,19 +1968,31 @@ struct i40e_aqc_get_link_status {
#define I40E_AQ_25G_SERDES_UCODE_ERR 0X04
#define I40E_AQ_25G_NIMB_UCODE_ERR 0X05
u8 loopback; /* use defines from i40e_aqc_set_lb_mode */
+/* Since firmware API 1.7 loopback field keeps power class info as well */
+#define I40E_AQ_LOOPBACK_MASK 0x07
+#define I40E_AQ_PWR_CLASS_SHIFT_LB 6
+#define I40E_AQ_PWR_CLASS_MASK_LB (0x03 << I40E_AQ_PWR_CLASS_SHIFT_LB)
__le16 max_frame_size;
u8 config;
#define I40E_AQ_CONFIG_FEC_KR_ENA 0x01
#define I40E_AQ_CONFIG_FEC_RS_ENA 0x02
#define I40E_AQ_CONFIG_CRC_ENA 0x04
#define I40E_AQ_CONFIG_PACING_MASK 0x78
- u8 power_desc;
+ union {
+ struct {
+ u8 power_desc;
#define I40E_AQ_LINK_POWER_CLASS_1 0x00
#define I40E_AQ_LINK_POWER_CLASS_2 0x01
#define I40E_AQ_LINK_POWER_CLASS_3 0x02
#define I40E_AQ_LINK_POWER_CLASS_4 0x03
#define I40E_AQ_PWR_CLASS_MASK 0x03
- u8 reserved[4];
+ u8 reserved[4];
+ };
+ struct {
+ u8 link_type[4];
+ u8 link_type_ext;
+ };
+ };
};
I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status);
@@ -2029,6 +2075,22 @@ struct i40e_aqc_run_phy_activity {
I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity);
+/* Set PHY Register command (0x0628) */
+/* Get PHY Register command (0x0629) */
+struct i40e_aqc_phy_register_access {
+ u8 phy_interface;
+#define I40E_AQ_PHY_REG_ACCESS_INTERNAL 0
+#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL 1
+#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE 2
+ u8 dev_address;
+ u8 reserved1[2];
+ __le32 reg_address;
+ __le32 reg_value;
+ u8 reserved2[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access);
+
/* NVM Read command (indirect 0x0701)
* NVM Erase commands (direct 0x0702)
* NVM Update commands (indirect 0x0703)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 111426ba5fbc..8d0ee006606b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -1180,6 +1180,8 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
case I40E_PHY_TYPE_40GBASE_AOC:
case I40E_PHY_TYPE_10GBASE_AOC:
case I40E_PHY_TYPE_25GBASE_CR:
+ case I40E_PHY_TYPE_25GBASE_AOC:
+ case I40E_PHY_TYPE_25GBASE_ACC:
media = I40E_MEDIA_TYPE_DA;
break;
case I40E_PHY_TYPE_1000BASE_KX:
@@ -1567,34 +1569,57 @@ i40e_status i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
struct i40e_aq_desc desc;
i40e_status status;
u16 abilities_size = sizeof(struct i40e_aq_get_phy_abilities_resp);
+ u16 max_delay = I40E_MAX_PHY_TIMEOUT, total_delay = 0;
if (!abilities)
return I40E_ERR_PARAM;
- i40e_fill_default_direct_cmd_desc(&desc,
- i40e_aqc_opc_get_phy_abilities);
+ do {
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_phy_abilities);
- desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
- if (abilities_size > I40E_AQ_LARGE_BUF)
- desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+ desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+ if (abilities_size > I40E_AQ_LARGE_BUF)
+ desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
- if (qualified_modules)
- desc.params.external.param0 |=
+ if (qualified_modules)
+ desc.params.external.param0 |=
cpu_to_le32(I40E_AQ_PHY_REPORT_QUALIFIED_MODULES);
- if (report_init)
- desc.params.external.param0 |=
+ if (report_init)
+ desc.params.external.param0 |=
cpu_to_le32(I40E_AQ_PHY_REPORT_INITIAL_VALUES);
- status = i40e_asq_send_command(hw, &desc, abilities, abilities_size,
- cmd_details);
+ status = i40e_asq_send_command(hw, &desc, abilities,
+ abilities_size, cmd_details);
- if (hw->aq.asq_last_status == I40E_AQ_RC_EIO)
- status = I40E_ERR_UNKNOWN_PHY;
+ if (status)
+ break;
+
+ if (hw->aq.asq_last_status == I40E_AQ_RC_EIO) {
+ status = I40E_ERR_UNKNOWN_PHY;
+ break;
+ } else if (hw->aq.asq_last_status == I40E_AQ_RC_EAGAIN) {
+ usleep_range(1000, 2000);
+ total_delay++;
+ status = I40E_ERR_TIMEOUT;
+ }
+ } while ((hw->aq.asq_last_status != I40E_AQ_RC_OK) &&
+ (total_delay < max_delay));
+
+ if (status)
+ return status;
if (report_init) {
- hw->phy.phy_types = le32_to_cpu(abilities->phy_type);
- hw->phy.phy_types |= ((u64)abilities->phy_type_ext << 32);
+ if (hw->mac.type == I40E_MAC_XL710 &&
+ hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+ hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) {
+ status = i40e_aq_get_link_info(hw, true, NULL, NULL);
+ } else {
+ hw->phy.phy_types = le32_to_cpu(abilities->phy_type);
+ hw->phy.phy_types |=
+ ((u64)abilities->phy_type_ext << 32);
+ }
}
return status;
@@ -1819,7 +1844,7 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
hw_link_info->fec_info = resp->config & (I40E_AQ_CONFIG_FEC_KR_ENA |
I40E_AQ_CONFIG_FEC_RS_ENA);
hw_link_info->ext_info = resp->ext_info;
- hw_link_info->loopback = resp->loopback;
+ hw_link_info->loopback = resp->loopback & I40E_AQ_LOOPBACK_MASK;
hw_link_info->max_frame_size = le16_to_cpu(resp->max_frame_size);
hw_link_info->pacing = resp->config & I40E_AQ_CONFIG_PACING_MASK;
@@ -1850,6 +1875,15 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE)
hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU;
+ if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+ hw->aq.api_min_ver >= 7) {
+ __le32 tmp;
+
+ memcpy(&tmp, resp->link_type, sizeof(tmp));
+ hw->phy.phy_types = le32_to_cpu(tmp);
+ hw->phy.phy_types |= ((u64)resp->link_type_ext << 32);
+ }
+
/* save link status information */
if (link)
*link = *hw_link_info;
@@ -2391,7 +2425,11 @@ enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw,
i40e_aqc_opc_set_switch_config);
scfg->flags = cpu_to_le16(flags);
scfg->valid_flags = cpu_to_le16(valid_flags);
-
+ if (hw->flags & I40E_HW_FLAG_802_1AD_CAPABLE) {
+ scfg->switch_tag = cpu_to_le16(hw->switch_tag);
+ scfg->first_tag = cpu_to_le16(hw->first_tag);
+ scfg->second_tag = cpu_to_le16(hw->second_tag);
+ }
status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
return status;
@@ -4826,6 +4864,74 @@ phy_blinking_end:
}
/**
+ * i40e_led_get_reg - read LED register
+ * @hw: pointer to the HW structure
+ * @led_addr: LED register address
+ * @reg_val: read register value
+ **/
+static enum i40e_status_code i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr,
+ u32 *reg_val)
+{
+ enum i40e_status_code status;
+ u8 phy_addr = 0;
+ u8 port_num;
+ u32 i;
+
+ *reg_val = 0;
+ if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+ status =
+ i40e_aq_get_phy_register(hw,
+ I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
+ I40E_PHY_COM_REG_PAGE,
+ I40E_PHY_LED_PROV_REG_1,
+ reg_val, NULL);
+ } else {
+ i = rd32(hw, I40E_PFGEN_PORTNUM);
+ port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
+ phy_addr = i40e_get_phy_address(hw, port_num);
+ status = i40e_read_phy_register_clause45(hw,
+ I40E_PHY_COM_REG_PAGE,
+ led_addr, phy_addr,
+ (u16 *)reg_val);
+ }
+ return status;
+}
+
+/**
+ * i40e_led_set_reg - write LED register
+ * @hw: pointer to the HW structure
+ * @led_addr: LED register address
+ * @reg_val: register value to write
+ **/
+static enum i40e_status_code i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr,
+ u32 reg_val)
+{
+ enum i40e_status_code status;
+ u8 phy_addr = 0;
+ u8 port_num;
+ u32 i;
+
+ if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+ status =
+ i40e_aq_set_phy_register(hw,
+ I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
+ I40E_PHY_COM_REG_PAGE,
+ I40E_PHY_LED_PROV_REG_1,
+ reg_val, NULL);
+ } else {
+ i = rd32(hw, I40E_PFGEN_PORTNUM);
+ port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
+ phy_addr = i40e_get_phy_address(hw, port_num);
+ status = i40e_write_phy_register_clause45(hw,
+ I40E_PHY_COM_REG_PAGE,
+ led_addr, phy_addr,
+ (u16)reg_val);
+ }
+
+ return status;
+}
+
+/**
* i40e_led_get_phy - return current on/off mode
* @hw: pointer to the hw struct
* @led_addr: address of led register to use
@@ -4842,7 +4948,19 @@ i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
u16 temp_addr;
u8 port_num;
u32 i;
-
+ u32 reg_val_aq;
+
+ if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
+ status =
+ i40e_aq_get_phy_register(hw,
+ I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
+ I40E_PHY_COM_REG_PAGE,
+ I40E_PHY_LED_PROV_REG_1,
+ &reg_val_aq, NULL);
+ if (status == I40E_SUCCESS)
+ *val = (u16)reg_val_aq;
+ return status;
+ }
temp_addr = I40E_PHY_LED_PROV_REG_1;
i = rd32(hw, I40E_PFGEN_PORTNUM);
port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
@@ -4877,51 +4995,38 @@ i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on,
u16 led_addr, u32 mode)
{
i40e_status status = 0;
- u16 led_ctl = 0;
- u16 led_reg = 0;
- u8 phy_addr = 0;
- u8 port_num;
- u32 i;
+ u32 led_ctl = 0;
+ u32 led_reg = 0;
- i = rd32(hw, I40E_PFGEN_PORTNUM);
- port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
- phy_addr = i40e_get_phy_address(hw, port_num);
- status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
- led_addr, phy_addr, &led_reg);
+ status = i40e_led_get_reg(hw, led_addr, &led_reg);
if (status)
return status;
led_ctl = led_reg;
if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) {
led_reg = 0;
- status = i40e_write_phy_register_clause45(hw,
- I40E_PHY_COM_REG_PAGE,
- led_addr, phy_addr,
- led_reg);
+ status = i40e_led_set_reg(hw, led_addr, led_reg);
if (status)
return status;
}
- status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
- led_addr, phy_addr, &led_reg);
+ status = i40e_led_get_reg(hw, led_addr, &led_reg);
if (status)
goto restore_config;
if (on)
led_reg = I40E_PHY_LED_MANUAL_ON;
else
led_reg = 0;
- status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
- led_addr, phy_addr, led_reg);
+
+ status = i40e_led_set_reg(hw, led_addr, led_reg);
if (status)
goto restore_config;
if (mode & I40E_PHY_LED_MODE_ORIG) {
led_ctl = (mode & I40E_PHY_LED_MODE_MASK);
- status = i40e_write_phy_register_clause45(hw,
- I40E_PHY_COM_REG_PAGE,
- led_addr, phy_addr, led_ctl);
+ status = i40e_led_set_reg(hw, led_addr, led_ctl);
}
return status;
+
restore_config:
- status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
- led_addr, phy_addr, led_ctl);
+ status = i40e_led_set_reg(hw, led_addr, led_ctl);
return status;
}
@@ -5052,6 +5157,75 @@ do_retry:
}
/**
+ * i40e_aq_set_phy_register
+ * @hw: pointer to the hw struct
+ * @phy_select: select which phy should be accessed
+ * @dev_addr: PHY device address
+ * @reg_addr: PHY register address
+ * @reg_val: new register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Write the external PHY register.
+ **/
+i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw,
+ u8 phy_select, u8 dev_addr,
+ u32 reg_addr, u32 reg_val,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_phy_register_access *cmd =
+ (struct i40e_aqc_phy_register_access *)&desc.params.raw;
+ i40e_status status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_phy_register);
+
+ cmd->phy_interface = phy_select;
+ cmd->dev_address = dev_addr;
+ cmd->reg_address = cpu_to_le32(reg_addr);
+ cmd->reg_value = cpu_to_le32(reg_val);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40e_aq_get_phy_register
+ * @hw: pointer to the hw struct
+ * @phy_select: select which phy should be accessed
+ * @dev_addr: PHY device address
+ * @reg_addr: PHY register address
+ * @reg_val: read register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Read the external PHY register.
+ **/
+i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw,
+ u8 phy_select, u8 dev_addr,
+ u32 reg_addr, u32 *reg_val,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_phy_register_access *cmd =
+ (struct i40e_aqc_phy_register_access *)&desc.params.raw;
+ i40e_status status;
+
+ i40e_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_phy_register);
+
+ cmd->phy_interface = phy_select;
+ cmd->dev_address = dev_addr;
+ cmd->reg_address = cpu_to_le32(reg_addr);
+
+ status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+ if (!status)
+ *reg_val = le32_to_cpu(cmd->reg_value);
+
+ return status;
+}
+
+/**
* i40e_aq_write_ppp - Write pipeline personalization profile (ppp)
* @hw: pointer to the hw struct
* @buff: command buffer (size in bytes = buff_size)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 8f326f87a815..2b8bbc84e34f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -278,8 +278,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
rx_ring->netdev,
rx_ring->rx_bi);
dev_info(&pf->pdev->dev,
- " rx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
- i, rx_ring->state,
+ " rx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
+ i, *rx_ring->state,
rx_ring->queue_index,
rx_ring->reg_idx);
dev_info(&pf->pdev->dev,
@@ -334,8 +334,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
tx_ring->netdev,
tx_ring->tx_bi);
dev_info(&pf->pdev->dev,
- " tx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
- i, tx_ring->state,
+ " tx_rings[%i]: state = %lu, queue_index = %d, reg_idx = %d\n",
+ i, *tx_ring->state,
tx_ring->queue_index,
tx_ring->reg_idx);
dev_info(&pf->pdev->dev,
@@ -798,8 +798,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
*/
if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
- i40e_do_reset_safe(pf,
- BIT_ULL(__I40E_PF_RESET_REQUESTED));
+ i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
}
vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c
index f141e78d409e..76ed56641864 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_diag.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_diag.c
@@ -36,7 +36,9 @@
static i40e_status i40e_diag_reg_pattern_test(struct i40e_hw *hw,
u32 reg, u32 mask)
{
- const u32 patterns[] = {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF};
+ static const u32 patterns[] = {
+ 0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF
+ };
u32 pat, val, orig_val;
int i;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 05e89864f781..9eb618799a30 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -227,6 +227,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
I40E_PRIV_FLAG("veb-stats", I40E_FLAG_VEB_STATS_ENABLED, 0),
I40E_PRIV_FLAG("hw-atr-eviction", I40E_FLAG_HW_ATR_EVICT_ENABLED, 0),
I40E_PRIV_FLAG("legacy-rx", I40E_FLAG_LEGACY_RX, 0),
+ I40E_PRIV_FLAG("disable-source-pruning",
+ I40E_FLAG_SOURCE_PRUNING_DISABLED, 0),
};
#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)
@@ -251,428 +253,557 @@ static void i40e_partition_setting_complaint(struct i40e_pf *pf)
/**
* i40e_phy_type_to_ethtool - convert the phy_types to ethtool link modes
- * @phy_types: PHY types to convert
- * @supported: pointer to the ethtool supported variable to fill in
- * @advertising: pointer to the ethtool advertising variable to fill in
+ * @pf: PF struct with phy_types
+ * @ks: ethtool link ksettings struct to fill out
*
**/
-static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
- u32 *advertising)
+static void i40e_phy_type_to_ethtool(struct i40e_pf *pf,
+ struct ethtool_link_ksettings *ks)
{
struct i40e_link_status *hw_link_info = &pf->hw.phy.link_info;
u64 phy_types = pf->hw.phy.phy_types;
- *supported = 0x0;
- *advertising = 0x0;
+ ethtool_link_ksettings_zero_link_mode(ks, supported);
+ ethtool_link_ksettings_zero_link_mode(ks, advertising);
if (phy_types & I40E_CAP_PHY_TYPE_SGMII) {
- *supported |= SUPPORTED_Autoneg |
- SUPPORTED_1000baseT_Full;
- *advertising |= ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
- *advertising |= ADVERTISED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseT_Full);
if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
- *supported |= SUPPORTED_100baseT_Full;
- *advertising |= ADVERTISED_100baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100baseT_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 100baseT_Full);
}
}
if (phy_types & I40E_CAP_PHY_TYPE_XAUI ||
phy_types & I40E_CAP_PHY_TYPE_XFI ||
phy_types & I40E_CAP_PHY_TYPE_SFI ||
phy_types & I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU ||
- phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC)
- *supported |= SUPPORTED_10000baseT_Full;
- if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
- phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
- phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
- phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
- phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
- *supported |= SUPPORTED_Autoneg |
- SUPPORTED_10000baseT_Full;
- *advertising |= ADVERTISED_Autoneg;
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_AOC) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseT_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseT_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_T) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
- *advertising |= ADVERTISED_10000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseT_Full);
}
if (phy_types & I40E_CAP_PHY_TYPE_XLAUI ||
phy_types & I40E_CAP_PHY_TYPE_XLPPI ||
phy_types & I40E_CAP_PHY_TYPE_40GBASE_AOC)
- *supported |= SUPPORTED_40000baseCR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseCR4_Full);
if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4) {
- *supported |= SUPPORTED_Autoneg |
- SUPPORTED_40000baseCR4_Full;
- *advertising |= ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseCR4_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_40GB)
- *advertising |= ADVERTISED_40000baseCR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 40000baseCR4_Full);
}
if (phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
- *supported |= SUPPORTED_Autoneg |
- SUPPORTED_100baseT_Full;
- *advertising |= ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
- *advertising |= ADVERTISED_100baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 100baseT_Full);
}
- if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
- phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
- phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
- phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
- *supported |= SUPPORTED_Autoneg |
- SUPPORTED_1000baseT_Full;
- *advertising |= ADVERTISED_Autoneg;
+ if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_T) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
- *advertising |= ADVERTISED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseT_Full);
}
if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_SR4)
- *supported |= SUPPORTED_40000baseSR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseSR4_Full);
if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_LR4)
- *supported |= SUPPORTED_40000baseLR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseLR4_Full);
if (phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4) {
- *supported |= SUPPORTED_40000baseKR4_Full |
- SUPPORTED_Autoneg;
- *advertising |= ADVERTISED_40000baseKR4_Full |
- ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseLR4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 40000baseLR4_Full);
}
if (phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2) {
- *supported |= SUPPORTED_20000baseKR2_Full |
- SUPPORTED_Autoneg;
- *advertising |= ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 20000baseKR2_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_20GB)
- *advertising |= ADVERTISED_20000baseKR2_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 20000baseKR2_Full);
}
- if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR) {
- if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
- *supported |= SUPPORTED_10000baseKR_Full |
- SUPPORTED_Autoneg;
- *advertising |= ADVERTISED_Autoneg;
+ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseKX4_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
- if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
- *advertising |= ADVERTISED_10000baseKR_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseKX4_Full);
}
- if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
- *supported |= SUPPORTED_10000baseKX4_Full |
- SUPPORTED_Autoneg;
- *advertising |= ADVERTISED_Autoneg;
+ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR &&
+ !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseKR_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
- *advertising |= ADVERTISED_10000baseKX4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseKR_Full);
}
- if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX) {
- if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
- *supported |= SUPPORTED_1000baseKX_Full |
- SUPPORTED_Autoneg;
- *advertising |= ADVERTISED_Autoneg;
+ if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX &&
+ !(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER)) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseKX_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
- if (!(pf->hw_features & I40E_HW_HAVE_CRT_RETIMER))
- *advertising |= ADVERTISED_1000baseKX_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseKX_Full);
}
- if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
- phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
- phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+ /* need to add 25G PHY types */
+ if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseKR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseKR_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseCR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseCR_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR) {
- *supported |= SUPPORTED_Autoneg;
- *advertising |= ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseSR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseSR_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_AOC ||
+ phy_types & I40E_CAP_PHY_TYPE_25GBASE_ACC) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseCR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_25GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseCR_Full);
+ }
+ /* need to add new 10G PHY types */
+ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseCR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseCR_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseSR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseSR_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseLR_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseLR_Full);
+ }
+ if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseX_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseX_Full);
+ }
+ /* Autoneg PHY types */
+ if (phy_types & I40E_CAP_PHY_TYPE_SGMII ||
+ phy_types & I40E_CAP_PHY_TYPE_40GBASE_KR4 ||
+ phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4_CU ||
+ phy_types & I40E_CAP_PHY_TYPE_40GBASE_CR4 ||
+ phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+ phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR ||
+ phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
+ phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
+ phy_types & I40E_CAP_PHY_TYPE_20GBASE_KR2 ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_T ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_SR ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_LR ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4 ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1_CU ||
+ phy_types & I40E_CAP_PHY_TYPE_10GBASE_CR1 ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_T ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_SX ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_LX ||
+ phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX ||
+ phy_types & I40E_CAP_PHY_TYPE_100BASE_TX) {
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ Autoneg);
}
}
/**
* i40e_get_settings_link_up - Get the Link settings for when link is up
* @hw: hw structure
- * @ecmd: ethtool command to fill in
+ * @ks: ethtool ksettings to fill in
* @netdev: network interface device structure
- *
+ * @pf: pointer to physical function struct
**/
static void i40e_get_settings_link_up(struct i40e_hw *hw,
- struct ethtool_link_ksettings *cmd,
+ struct ethtool_link_ksettings *ks,
struct net_device *netdev,
struct i40e_pf *pf)
{
struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+ struct ethtool_link_ksettings cap_ksettings;
u32 link_speed = hw_link_info->link_speed;
- u32 e_advertising = 0x0;
- u32 e_supported = 0x0;
- u32 supported, advertising;
-
- ethtool_convert_link_mode_to_legacy_u32(&supported,
- cmd->link_modes.supported);
- ethtool_convert_link_mode_to_legacy_u32(&advertising,
- cmd->link_modes.advertising);
/* Initialize supported and advertised settings based on phy settings */
switch (hw_link_info->phy_type) {
case I40E_PHY_TYPE_40GBASE_CR4:
case I40E_PHY_TYPE_40GBASE_CR4_CU:
- supported = SUPPORTED_Autoneg |
- SUPPORTED_40000baseCR4_Full;
- advertising = ADVERTISED_Autoneg |
- ADVERTISED_40000baseCR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseCR4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 40000baseCR4_Full);
break;
case I40E_PHY_TYPE_XLAUI:
case I40E_PHY_TYPE_XLPPI:
case I40E_PHY_TYPE_40GBASE_AOC:
- supported = SUPPORTED_40000baseCR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseCR4_Full);
break;
case I40E_PHY_TYPE_40GBASE_SR4:
- supported = SUPPORTED_40000baseSR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseSR4_Full);
break;
case I40E_PHY_TYPE_40GBASE_LR4:
- supported = SUPPORTED_40000baseLR4_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseLR4_Full);
break;
+ case I40E_PHY_TYPE_25GBASE_SR:
+ case I40E_PHY_TYPE_25GBASE_LR:
case I40E_PHY_TYPE_10GBASE_SR:
case I40E_PHY_TYPE_10GBASE_LR:
case I40E_PHY_TYPE_1000BASE_SX:
case I40E_PHY_TYPE_1000BASE_LX:
- supported = SUPPORTED_10000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseSR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseSR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseSR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseSR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseLR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseLR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseX_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseX_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseT_Full);
if (hw_link_info->module_type[2] &
I40E_MODULE_TYPE_1000BASE_SX ||
hw_link_info->module_type[2] &
I40E_MODULE_TYPE_1000BASE_LX) {
- supported |= SUPPORTED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseT_Full);
if (hw_link_info->requested_speeds &
I40E_LINK_SPEED_1GB)
- advertising |= ADVERTISED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(
+ ks, advertising, 1000baseT_Full);
}
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
- advertising |= ADVERTISED_10000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseT_Full);
break;
case I40E_PHY_TYPE_10GBASE_T:
case I40E_PHY_TYPE_1000BASE_T:
case I40E_PHY_TYPE_100BASE_TX:
- supported = SUPPORTED_Autoneg |
- SUPPORTED_10000baseT_Full |
- SUPPORTED_1000baseT_Full |
- SUPPORTED_100baseT_Full;
- advertising = ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseT_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseT_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100baseT_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
- advertising |= ADVERTISED_10000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
- advertising |= ADVERTISED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_100MB)
- advertising |= ADVERTISED_100baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 100baseT_Full);
break;
case I40E_PHY_TYPE_1000BASE_T_OPTICAL:
- supported = SUPPORTED_Autoneg |
- SUPPORTED_1000baseT_Full;
- advertising = ADVERTISED_Autoneg |
- ADVERTISED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseT_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseT_Full);
break;
case I40E_PHY_TYPE_10GBASE_CR1_CU:
case I40E_PHY_TYPE_10GBASE_CR1:
- supported = SUPPORTED_Autoneg |
- SUPPORTED_10000baseT_Full;
- advertising = ADVERTISED_Autoneg |
- ADVERTISED_10000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseT_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseT_Full);
break;
case I40E_PHY_TYPE_XAUI:
case I40E_PHY_TYPE_XFI:
case I40E_PHY_TYPE_SFI:
case I40E_PHY_TYPE_10GBASE_SFPP_CU:
case I40E_PHY_TYPE_10GBASE_AOC:
- supported = SUPPORTED_10000baseT_Full;
- advertising = SUPPORTED_10000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseT_Full);
+ if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseT_Full);
break;
case I40E_PHY_TYPE_SGMII:
- supported = SUPPORTED_Autoneg |
- SUPPORTED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseT_Full);
if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
- advertising |= ADVERTISED_1000baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseT_Full);
if (pf->hw_features & I40E_HW_100M_SGMII_CAPABLE) {
- supported |= SUPPORTED_100baseT_Full;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 100baseT_Full);
if (hw_link_info->requested_speeds &
I40E_LINK_SPEED_100MB)
- advertising |= ADVERTISED_100baseT_Full;
+ ethtool_link_ksettings_add_link_mode(
+ ks, advertising, 100baseT_Full);
}
break;
case I40E_PHY_TYPE_40GBASE_KR4:
+ case I40E_PHY_TYPE_25GBASE_KR:
case I40E_PHY_TYPE_20GBASE_KR2:
case I40E_PHY_TYPE_10GBASE_KR:
case I40E_PHY_TYPE_10GBASE_KX4:
case I40E_PHY_TYPE_1000BASE_KX:
- supported |= SUPPORTED_40000baseKR4_Full |
- SUPPORTED_20000baseKR2_Full |
- SUPPORTED_10000baseKR_Full |
- SUPPORTED_10000baseKX4_Full |
- SUPPORTED_1000baseKX_Full |
- SUPPORTED_Autoneg;
- advertising |= ADVERTISED_40000baseKR4_Full |
- ADVERTISED_20000baseKR2_Full |
- ADVERTISED_10000baseKR_Full |
- ADVERTISED_10000baseKX4_Full |
- ADVERTISED_1000baseKX_Full |
- ADVERTISED_Autoneg;
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 40000baseKR4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseKR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 20000baseKR2_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseKR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseKX4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 1000baseKX_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 40000baseKR4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseKR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 20000baseKR2_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseKR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseKX4_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 1000baseKX_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
break;
- case I40E_PHY_TYPE_25GBASE_KR:
case I40E_PHY_TYPE_25GBASE_CR:
- case I40E_PHY_TYPE_25GBASE_SR:
- case I40E_PHY_TYPE_25GBASE_LR:
- supported = SUPPORTED_Autoneg;
- advertising = ADVERTISED_Autoneg;
- /* TODO: add speeds when ethtool is ready to support*/
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseCR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseCR_Full);
+ break;
+ case I40E_PHY_TYPE_25GBASE_AOC:
+ case I40E_PHY_TYPE_25GBASE_ACC:
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 25000baseCR_Full);
+
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 25000baseCR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, supported,
+ 10000baseCR_Full);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ 10000baseCR_Full);
break;
default:
/* if we got here and link is up something bad is afoot */
- netdev_info(netdev, "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
+ netdev_info(netdev,
+ "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
hw_link_info->phy_type);
}
/* Now that we've worked out everything that could be supported by the
- * current PHY type, get what is supported by the NVM and them to
- * get what is truly supported
+ * current PHY type, get what is supported by the NVM and intersect
+ * them to get what is truly supported
*/
- i40e_phy_type_to_ethtool(pf, &e_supported,
- &e_advertising);
-
- supported = supported & e_supported;
- advertising = advertising & e_advertising;
+ memset(&cap_ksettings, 0, sizeof(struct ethtool_link_ksettings));
+ i40e_phy_type_to_ethtool(pf, &cap_ksettings);
+ ethtool_intersect_link_masks(ks, &cap_ksettings);
/* Set speed and duplex */
switch (link_speed) {
case I40E_LINK_SPEED_40GB:
- cmd->base.speed = SPEED_40000;
+ ks->base.speed = SPEED_40000;
break;
case I40E_LINK_SPEED_25GB:
-#ifdef SPEED_25000
- cmd->base.speed = SPEED_25000;
-#else
- netdev_info(netdev,
- "Speed is 25G, display not supported by this version of ethtool.\n");
-#endif
+ ks->base.speed = SPEED_25000;
break;
case I40E_LINK_SPEED_20GB:
- cmd->base.speed = SPEED_20000;
+ ks->base.speed = SPEED_20000;
break;
case I40E_LINK_SPEED_10GB:
- cmd->base.speed = SPEED_10000;
+ ks->base.speed = SPEED_10000;
break;
case I40E_LINK_SPEED_1GB:
- cmd->base.speed = SPEED_1000;
+ ks->base.speed = SPEED_1000;
break;
case I40E_LINK_SPEED_100MB:
- cmd->base.speed = SPEED_100;
+ ks->base.speed = SPEED_100;
break;
default:
break;
}
- cmd->base.duplex = DUPLEX_FULL;
-
- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
- supported);
- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
- advertising);
+ ks->base.duplex = DUPLEX_FULL;
}
/**
* i40e_get_settings_link_down - Get the Link settings for when link is down
* @hw: hw structure
- * @ecmd: ethtool command to fill in
+ * @ks: ethtool ksettings to fill in
+ * @pf: pointer to physical function struct
*
* Reports link settings that can be determined when link is down
**/
static void i40e_get_settings_link_down(struct i40e_hw *hw,
- struct ethtool_link_ksettings *cmd,
+ struct ethtool_link_ksettings *ks,
struct i40e_pf *pf)
{
- u32 supported, advertising;
-
/* link is down and the driver needs to fall back on
* supported phy types to figure out what info to display
*/
- i40e_phy_type_to_ethtool(pf, &supported, &advertising);
-
- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
- supported);
- ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
- advertising);
+ i40e_phy_type_to_ethtool(pf, ks);
/* With no link speed and duplex are unknown */
- cmd->base.speed = SPEED_UNKNOWN;
- cmd->base.duplex = DUPLEX_UNKNOWN;
+ ks->base.speed = SPEED_UNKNOWN;
+ ks->base.duplex = DUPLEX_UNKNOWN;
}
/**
- * i40e_get_settings - Get Link Speed and Duplex settings
+ * i40e_get_link_ksettings - Get Link Speed and Duplex settings
* @netdev: network interface device structure
- * @ecmd: ethtool command
+ * @ks: ethtool ksettings
*
* Reports speed/duplex settings based on media_type
**/
static int i40e_get_link_ksettings(struct net_device *netdev,
- struct ethtool_link_ksettings *cmd)
+ struct ethtool_link_ksettings *ks)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_pf *pf = np->vsi->back;
struct i40e_hw *hw = &pf->hw;
struct i40e_link_status *hw_link_info = &hw->phy.link_info;
bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP;
- u32 advertising;
+
+ ethtool_link_ksettings_zero_link_mode(ks, supported);
+ ethtool_link_ksettings_zero_link_mode(ks, advertising);
if (link_up)
- i40e_get_settings_link_up(hw, cmd, netdev, pf);
+ i40e_get_settings_link_up(hw, ks, netdev, pf);
else
- i40e_get_settings_link_down(hw, cmd, pf);
+ i40e_get_settings_link_down(hw, ks, pf);
/* Now set the settings that don't rely on link being up/down */
/* Set autoneg settings */
- cmd->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
- AUTONEG_ENABLE : AUTONEG_DISABLE);
+ ks->base.autoneg = ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
+ AUTONEG_ENABLE : AUTONEG_DISABLE);
+ /* Set media type settings */
switch (hw->phy.media_type) {
case I40E_MEDIA_TYPE_BACKPLANE:
- ethtool_link_ksettings_add_link_mode(cmd, supported,
- Autoneg);
- ethtool_link_ksettings_add_link_mode(cmd, supported,
- Backplane);
- ethtool_link_ksettings_add_link_mode(cmd, advertising,
- Autoneg);
- ethtool_link_ksettings_add_link_mode(cmd, advertising,
+ ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, supported, Backplane);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Autoneg);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
Backplane);
- cmd->base.port = PORT_NONE;
+ ks->base.port = PORT_NONE;
break;
case I40E_MEDIA_TYPE_BASET:
- ethtool_link_ksettings_add_link_mode(cmd, supported, TP);
- ethtool_link_ksettings_add_link_mode(cmd, advertising, TP);
- cmd->base.port = PORT_TP;
+ ethtool_link_ksettings_add_link_mode(ks, supported, TP);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, TP);
+ ks->base.port = PORT_TP;
break;
case I40E_MEDIA_TYPE_DA:
case I40E_MEDIA_TYPE_CX4:
- ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
- ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
- cmd->base.port = PORT_DA;
+ ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, FIBRE);
+ ks->base.port = PORT_DA;
break;
case I40E_MEDIA_TYPE_FIBER:
- ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
- cmd->base.port = PORT_FIBRE;
+ ethtool_link_ksettings_add_link_mode(ks, supported, FIBRE);
+ ks->base.port = PORT_FIBRE;
break;
case I40E_MEDIA_TYPE_UNKNOWN:
default:
- cmd->base.port = PORT_OTHER;
+ ks->base.port = PORT_OTHER;
break;
}
/* Set flow control settings */
- ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+ ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
switch (hw->fc.requested_mode) {
case I40E_FC_FULL:
- ethtool_link_ksettings_add_link_mode(cmd, advertising,
- Pause);
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
break;
case I40E_FC_TX_PAUSE:
- ethtool_link_ksettings_add_link_mode(cmd, advertising,
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
Asym_Pause);
break;
case I40E_FC_RX_PAUSE:
- ethtool_link_ksettings_add_link_mode(cmd, advertising,
- Pause);
- ethtool_link_ksettings_add_link_mode(cmd, advertising,
+ ethtool_link_ksettings_add_link_mode(ks, advertising, Pause);
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
Asym_Pause);
break;
default:
- ethtool_convert_link_mode_to_legacy_u32(
- &advertising, cmd->link_modes.advertising);
-
- advertising &= ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
-
- ethtool_convert_legacy_u32_to_link_mode(
- cmd->link_modes.advertising, advertising);
+ ethtool_link_ksettings_del_link_mode(ks, advertising, Pause);
+ ethtool_link_ksettings_del_link_mode(ks, advertising,
+ Asym_Pause);
break;
}
@@ -680,30 +811,28 @@ static int i40e_get_link_ksettings(struct net_device *netdev,
}
/**
- * i40e_set_settings - Set Speed and Duplex
+ * i40e_set_link_ksettings - Set Speed and Duplex
* @netdev: network interface device structure
- * @ecmd: ethtool command
+ * @ks: ethtool ksettings
*
* Set speed/duplex per media_types advertised/forced
**/
static int i40e_set_link_ksettings(struct net_device *netdev,
- const struct ethtool_link_ksettings *cmd)
+ const struct ethtool_link_ksettings *ks)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_aq_get_phy_abilities_resp abilities;
+ struct ethtool_link_ksettings safe_ks;
+ struct ethtool_link_ksettings copy_ks;
struct i40e_aq_set_phy_config config;
struct i40e_pf *pf = np->vsi->back;
struct i40e_vsi *vsi = np->vsi;
struct i40e_hw *hw = &pf->hw;
- struct ethtool_link_ksettings safe_cmd;
- struct ethtool_link_ksettings copy_cmd;
+ bool autoneg_changed = false;
i40e_status status = 0;
- bool change = false;
int timeout = 50;
int err = 0;
- u32 autoneg;
- u32 advertise;
- u32 tmp;
+ u8 autoneg;
/* Changing port settings is not supported if this isn't the
* port's controlling PF
@@ -712,17 +841,14 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
i40e_partition_setting_complaint(pf);
return -EOPNOTSUPP;
}
-
if (vsi != pf->vsi[pf->lan_vsi])
return -EOPNOTSUPP;
-
if (hw->phy.media_type != I40E_MEDIA_TYPE_BASET &&
hw->phy.media_type != I40E_MEDIA_TYPE_FIBER &&
hw->phy.media_type != I40E_MEDIA_TYPE_BACKPLANE &&
hw->phy.media_type != I40E_MEDIA_TYPE_DA &&
hw->phy.link_info.link_info & I40E_AQ_LINK_UP)
return -EOPNOTSUPP;
-
if (hw->device_id == I40E_DEV_ID_KX_B ||
hw->device_id == I40E_DEV_ID_KX_C ||
hw->device_id == I40E_DEV_ID_20G_KR2 ||
@@ -731,31 +857,37 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
return -EOPNOTSUPP;
}
- /* copy the cmd to copy_cmd to avoid modifying the origin */
- memcpy(&copy_cmd, cmd, sizeof(struct ethtool_link_ksettings));
+ /* copy the ksettings to copy_ks to avoid modifying the origin */
+ memcpy(&copy_ks, ks, sizeof(struct ethtool_link_ksettings));
- /* get our own copy of the bits to check against */
- memset(&safe_cmd, 0, sizeof(struct ethtool_link_ksettings));
- i40e_get_link_ksettings(netdev, &safe_cmd);
+ /* save autoneg out of ksettings */
+ autoneg = copy_ks.base.autoneg;
- /* save autoneg and speed out of cmd */
- autoneg = cmd->base.autoneg;
- ethtool_convert_link_mode_to_legacy_u32(&advertise,
- cmd->link_modes.advertising);
+ memset(&safe_ks, 0, sizeof(safe_ks));
+ /* Get link modes supported by hardware and check against modes
+ * requested by the user. Return an error if unsupported mode was set.
+ */
+ i40e_phy_type_to_ethtool(pf, &safe_ks);
+ if (!bitmap_subset(copy_ks.link_modes.advertising,
+ safe_ks.link_modes.supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS))
+ return -EINVAL;
- /* set autoneg and speed back to what they currently are */
- copy_cmd.base.autoneg = safe_cmd.base.autoneg;
- ethtool_convert_link_mode_to_legacy_u32(
- &tmp, safe_cmd.link_modes.advertising);
- ethtool_convert_legacy_u32_to_link_mode(
- copy_cmd.link_modes.advertising, tmp);
+ /* get our own copy of the bits to check against */
+ memset(&safe_ks, 0, sizeof(struct ethtool_link_ksettings));
+ safe_ks.base.cmd = copy_ks.base.cmd;
+ safe_ks.base.link_mode_masks_nwords =
+ copy_ks.base.link_mode_masks_nwords;
+ i40e_get_link_ksettings(netdev, &safe_ks);
- copy_cmd.base.cmd = safe_cmd.base.cmd;
+ /* set autoneg back to what it currently is */
+ copy_ks.base.autoneg = safe_ks.base.autoneg;
- /* If copy_cmd and safe_cmd are not the same now, then they are
- * trying to set something that we do not support
+ /* If copy_ks.base and safe_ks.base are not the same now, then they are
+ * trying to set something that we do not support.
*/
- if (memcmp(&copy_cmd, &safe_cmd, sizeof(struct ethtool_link_ksettings)))
+ if (memcmp(&copy_ks.base, &safe_ks.base,
+ sizeof(struct ethtool_link_settings)))
return -EOPNOTSUPP;
while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
@@ -784,8 +916,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
/* If autoneg was not already enabled */
if (!(hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED)) {
/* If autoneg is not supported, return error */
- if (!ethtool_link_ksettings_test_link_mode(
- &safe_cmd, supported, Autoneg)) {
+ if (!ethtool_link_ksettings_test_link_mode(&safe_ks,
+ supported,
+ Autoneg)) {
netdev_info(netdev, "Autoneg not supported on this phy\n");
err = -EINVAL;
goto done;
@@ -793,7 +926,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
/* Autoneg is allowed to change */
config.abilities = abilities.abilities |
I40E_AQ_PHY_ENABLE_AN;
- change = true;
+ autoneg_changed = true;
}
} else {
/* If autoneg is currently enabled */
@@ -801,8 +934,9 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
/* If autoneg is supported 10GBASE_T is the only PHY
* that can disable it, so otherwise return error
*/
- if (ethtool_link_ksettings_test_link_mode(
- &safe_cmd, supported, Autoneg) &&
+ if (ethtool_link_ksettings_test_link_mode(&safe_ks,
+ supported,
+ Autoneg) &&
hw->phy.link_info.phy_type !=
I40E_PHY_TYPE_10GBASE_T) {
netdev_info(netdev, "Autoneg cannot be disabled on this phy\n");
@@ -812,32 +946,49 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
/* Autoneg is allowed to change */
config.abilities = abilities.abilities &
~I40E_AQ_PHY_ENABLE_AN;
- change = true;
+ autoneg_changed = true;
}
}
- ethtool_convert_link_mode_to_legacy_u32(&tmp,
- safe_cmd.link_modes.supported);
- if (advertise & ~tmp) {
- err = -EINVAL;
- goto done;
- }
-
- if (advertise & ADVERTISED_100baseT_Full)
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 100baseT_Full))
config.link_speed |= I40E_LINK_SPEED_100MB;
- if (advertise & ADVERTISED_1000baseT_Full ||
- advertise & ADVERTISED_1000baseKX_Full)
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseT_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseX_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseKX_Full))
config.link_speed |= I40E_LINK_SPEED_1GB;
- if (advertise & ADVERTISED_10000baseT_Full ||
- advertise & ADVERTISED_10000baseKX4_Full ||
- advertise & ADVERTISED_10000baseKR_Full)
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseT_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseKX4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseKR_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseCR_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 10000baseSR_Full))
config.link_speed |= I40E_LINK_SPEED_10GB;
- if (advertise & ADVERTISED_20000baseKR2_Full)
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 20000baseKR2_Full))
config.link_speed |= I40E_LINK_SPEED_20GB;
- if (advertise & ADVERTISED_40000baseKR4_Full ||
- advertise & ADVERTISED_40000baseCR4_Full ||
- advertise & ADVERTISED_40000baseSR4_Full ||
- advertise & ADVERTISED_40000baseLR4_Full)
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 25000baseCR_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 25000baseKR_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 25000baseSR_Full))
+ config.link_speed |= I40E_LINK_SPEED_25GB;
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 40000baseKR4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 40000baseCR4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 40000baseSR4_Full) ||
+ ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 40000baseLR4_Full))
config.link_speed |= I40E_LINK_SPEED_40GB;
/* If speed didn't get set, set it to what it currently is.
@@ -846,8 +997,7 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
*/
if (!config.link_speed)
config.link_speed = abilities.link_speed;
-
- if (change || (abilities.link_speed != config.link_speed)) {
+ if (autoneg_changed || abilities.link_speed != config.link_speed) {
/* copy over the rest of the abilities */
config.phy_type = abilities.phy_type;
config.phy_type_ext = abilities.phy_type_ext;
@@ -874,7 +1024,8 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
/* make the aq call */
status = i40e_aq_set_phy_config(hw, &config, NULL);
if (status) {
- netdev_info(netdev, "Set phy config failed, err %s aq_err %s\n",
+ netdev_info(netdev,
+ "Set phy config failed, err %s aq_err %s\n",
i40e_stat_str(hw, status),
i40e_aq_str(hw, hw->aq.asq_last_status));
err = -EAGAIN;
@@ -883,7 +1034,8 @@ static int i40e_set_link_ksettings(struct net_device *netdev,
status = i40e_update_link_info(hw);
if (status)
- netdev_dbg(netdev, "Updating link info failed with err %s aq_err %s\n",
+ netdev_dbg(netdev,
+ "Updating link info failed with err %s aq_err %s\n",
i40e_stat_str(hw, status),
i40e_aq_str(hw, hw->aq.asq_last_status));
@@ -2008,7 +2160,9 @@ static int i40e_set_phys_id(struct net_device *netdev,
if (!(pf->hw_features & I40E_HW_PHY_CONTROLS_LEDS)) {
pf->led_status = i40e_led_get(hw);
} else {
- i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL, NULL);
+ if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+ i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL,
+ NULL);
ret = i40e_led_get_phy(hw, &temp_status,
&pf->phy_led_val);
pf->led_status = temp_status;
@@ -2033,7 +2187,8 @@ static int i40e_set_phys_id(struct net_device *netdev,
ret = i40e_led_set_phy(hw, false, pf->led_status,
(pf->phy_led_val |
I40E_PHY_LED_MODE_ORIG));
- i40e_aq_set_phy_debug(hw, 0, NULL);
+ if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE))
+ i40e_aq_set_phy_debug(hw, 0, NULL);
}
break;
default:
@@ -2071,14 +2226,13 @@ static int __i40e_get_coalesce(struct net_device *netdev,
ec->tx_max_coalesced_frames_irq = vsi->work_limit;
ec->rx_max_coalesced_frames_irq = vsi->work_limit;
- /* rx and tx usecs has per queue value. If user doesn't specify the queue,
- * return queue 0's value to represent.
+ /* rx and tx usecs has per queue value. If user doesn't specify the
+ * queue, return queue 0's value to represent.
*/
- if (queue < 0) {
+ if (queue < 0)
queue = 0;
- } else if (queue >= vsi->num_queue_pairs) {
+ else if (queue >= vsi->num_queue_pairs)
return -EINVAL;
- }
rx_ring = vsi->rx_rings[queue];
tx_ring = vsi->tx_rings[queue];
@@ -2092,7 +2246,6 @@ static int __i40e_get_coalesce(struct net_device *netdev,
ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC;
ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC;
-
/* we use the _usecs_high to store/set the interrupt rate limit
* that the hardware supports, that almost but not quite
* fits the original intent of the ethtool variable,
@@ -2142,7 +2295,6 @@ static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue,
*
* Change the ITR settings for a specific queue.
**/
-
static void i40e_set_itr_per_queue(struct i40e_vsi *vsi,
struct ethtool_coalesce *ec,
int queue)
@@ -2264,8 +2416,8 @@ static int __i40e_set_coalesce(struct net_device *netdev,
vsi->int_rate_limit);
}
- /* rx and tx usecs has per queue value. If user doesn't specify the queue,
- * apply to all queues.
+ /* rx and tx usecs has per queue value. If user doesn't specify the
+ * queue, apply to all queues.
*/
if (queue < 0) {
for (i = 0; i < vsi->num_queue_pairs; i++)
@@ -2647,7 +2799,7 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
switch (cmd->cmd) {
case ETHTOOL_GRXRINGS:
- cmd->data = vsi->num_queue_pairs;
+ cmd->data = vsi->rss_size;
ret = 0;
break;
case ETHTOOL_GRXFH:
@@ -3892,6 +4044,12 @@ static int i40e_set_channels(struct net_device *dev,
if (vsi->type != I40E_VSI_MAIN)
return -EINVAL;
+ /* We do not support setting channels via ethtool when TCs are
+ * configured through mqprio
+ */
+ if (pf->flags & I40E_FLAG_TC_MQPRIO)
+ return -EINVAL;
+
/* verify they are not requesting separate vectors */
if (!count || ch->rx_count || ch->tx_count)
return -EINVAL;
@@ -3959,6 +4117,16 @@ static u32 i40e_get_rxfh_indir_size(struct net_device *netdev)
return I40E_HLUT_ARRAY_SIZE;
}
+/**
+ * i40e_get_rxfh - get the rx flow hash indirection table
+ * @netdev: network interface device structure
+ * @indir: indirection table
+ * @key: hash key
+ * @hfunc: hash function
+ *
+ * Reads the indirection table directly from the hardware. Returns 0 on
+ * success.
+ **/
static int i40e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
u8 *hfunc)
{
@@ -4090,7 +4258,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
- u64 orig_flags, new_flags, changed_flags;
+ u32 orig_flags, new_flags, changed_flags;
u32 i, j;
orig_flags = READ_ONCE(pf->flags);
@@ -4142,12 +4310,12 @@ flags_complete:
return -EOPNOTSUPP;
/* Compare and exchange the new flags into place. If we failed, that
- * is if cmpxchg64 returns anything but the old value, this means that
+ * is if cmpxchg returns anything but the old value, this means that
* something else has modified the flags variable since we copied it
* originally. We'll just punt with an error and log something in the
* message buffer.
*/
- if (cmpxchg64(&pf->flags, orig_flags, new_flags) != orig_flags) {
+ if (cmpxchg(&pf->flags, orig_flags, new_flags) != orig_flags) {
dev_warn(&pf->pdev->dev,
"Unable to update pf->flags as it was modified by another thread...\n");
return -EAGAIN;
@@ -4189,13 +4357,166 @@ flags_complete:
/* Issue reset to cause things to take effect, as additional bits
* are added we will need to create a mask of bits requiring reset
*/
- if ((changed_flags & I40E_FLAG_VEB_STATS_ENABLED) ||
- ((changed_flags & I40E_FLAG_LEGACY_RX) && netif_running(dev)))
+ if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
+ I40E_FLAG_LEGACY_RX |
+ I40E_FLAG_SOURCE_PRUNING_DISABLED))
i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
return 0;
}
+/**
+ * i40e_get_module_info - get (Q)SFP+ module type info
+ * @netdev: network interface device structure
+ * @modinfo: module EEPROM size and layout information structure
+ **/
+static int i40e_get_module_info(struct net_device *netdev,
+ struct ethtool_modinfo *modinfo)
+{
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ u32 sff8472_comp = 0;
+ u32 sff8472_swap = 0;
+ u32 sff8636_rev = 0;
+ i40e_status status;
+ u32 type = 0;
+
+ /* Check if firmware supports reading module EEPROM. */
+ if (!(hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE)) {
+ netdev_err(vsi->netdev, "Module EEPROM memory read not supported. Please update the NVM image.\n");
+ return -EINVAL;
+ }
+
+ status = i40e_update_link_info(hw);
+ if (status)
+ return -EIO;
+
+ if (hw->phy.link_info.phy_type == I40E_PHY_TYPE_EMPTY) {
+ netdev_err(vsi->netdev, "Cannot read module EEPROM memory. No module connected.\n");
+ return -EINVAL;
+ }
+
+ type = hw->phy.link_info.module_type[0];
+
+ switch (type) {
+ case I40E_MODULE_TYPE_SFP:
+ status = i40e_aq_get_phy_register(hw,
+ I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+ I40E_I2C_EEPROM_DEV_ADDR,
+ I40E_MODULE_SFF_8472_COMP,
+ &sff8472_comp, NULL);
+ if (status)
+ return -EIO;
+
+ status = i40e_aq_get_phy_register(hw,
+ I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+ I40E_I2C_EEPROM_DEV_ADDR,
+ I40E_MODULE_SFF_8472_SWAP,
+ &sff8472_swap, NULL);
+ if (status)
+ return -EIO;
+
+ /* Check if the module requires address swap to access
+ * the other EEPROM memory page.
+ */
+ if (sff8472_swap & I40E_MODULE_SFF_ADDR_MODE) {
+ netdev_warn(vsi->netdev, "Module address swap to access page 0xA2 is not supported.\n");
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ } else if (sff8472_comp == 0x00) {
+ /* Module is not SFF-8472 compliant */
+ modinfo->type = ETH_MODULE_SFF_8079;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8472;
+ modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+ }
+ break;
+ case I40E_MODULE_TYPE_QSFP_PLUS:
+ /* Read from memory page 0. */
+ status = i40e_aq_get_phy_register(hw,
+ I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+ 0,
+ I40E_MODULE_REVISION_ADDR,
+ &sff8636_rev, NULL);
+ if (status)
+ return -EIO;
+ /* Determine revision compliance byte */
+ if (sff8636_rev > 0x02) {
+ /* Module is SFF-8636 compliant */
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+ } else {
+ modinfo->type = ETH_MODULE_SFF_8436;
+ modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+ }
+ break;
+ case I40E_MODULE_TYPE_QSFP28:
+ modinfo->type = ETH_MODULE_SFF_8636;
+ modinfo->eeprom_len = I40E_MODULE_QSFP_MAX_LEN;
+ break;
+ default:
+ netdev_err(vsi->netdev, "Module type unrecognized\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * i40e_get_module_eeprom - fills buffer with (Q)SFP+ module memory contents
+ * @netdev: network interface device structure
+ * @ee: EEPROM dump request structure
+ * @data: buffer to be filled with EEPROM contents
+ **/
+static int i40e_get_module_eeprom(struct net_device *netdev,
+ struct ethtool_eeprom *ee,
+ u8 *data)
+{
+ struct i40e_netdev_priv *np = netdev_priv(netdev);
+ struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ bool is_sfp = false;
+ i40e_status status;
+ u32 value = 0;
+ int i;
+
+ if (!ee || !ee->len || !data)
+ return -EINVAL;
+
+ if (hw->phy.link_info.module_type[0] == I40E_MODULE_TYPE_SFP)
+ is_sfp = true;
+
+ for (i = 0; i < ee->len; i++) {
+ u32 offset = i + ee->offset;
+ u32 addr = is_sfp ? I40E_I2C_EEPROM_DEV_ADDR : 0;
+
+ /* Check if we need to access the other memory page */
+ if (is_sfp) {
+ if (offset >= ETH_MODULE_SFF_8079_LEN) {
+ offset -= ETH_MODULE_SFF_8079_LEN;
+ addr = I40E_I2C_EEPROM_DEV_ADDR2;
+ }
+ } else {
+ while (offset >= ETH_MODULE_SFF_8436_LEN) {
+ /* Compute memory page number and offset. */
+ offset -= ETH_MODULE_SFF_8436_LEN / 2;
+ addr++;
+ }
+ }
+
+ status = i40e_aq_get_phy_register(hw,
+ I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
+ addr, offset, &value, NULL);
+ if (status)
+ return -EIO;
+ data[i] = value;
+ }
+ return 0;
+}
+
static const struct ethtool_ops i40e_ethtool_ops = {
.get_drvinfo = i40e_get_drvinfo,
.get_regs_len = i40e_get_regs_len,
@@ -4228,6 +4549,8 @@ static const struct ethtool_ops i40e_ethtool_ops = {
.set_rxfh = i40e_set_rxfh,
.get_channels = i40e_get_channels,
.set_channels = i40e_set_channels,
+ .get_module_info = i40e_get_module_info,
+ .get_module_eeprom = i40e_get_module_eeprom,
.get_ts_info = i40e_get_ts_info,
.get_priv_flags = i40e_get_priv_flags,
.set_priv_flags = i40e_set_priv_flags,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 6498da8806cb..54ff34faca37 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -600,6 +600,20 @@ static void i40e_stat_update32(struct i40e_hw *hw, u32 reg,
}
/**
+ * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat
+ * @hw: ptr to the hardware info
+ * @reg: the hw reg to read and clear
+ * @stat: ptr to the stat
+ **/
+static void i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat)
+{
+ u32 new_data = rd32(hw, reg);
+
+ wr32(hw, reg, 1); /* must write a nonzero value to clear register */
+ *stat += new_data;
+}
+
+/**
* i40e_update_eth_stats - Update VSI-specific ethernet statistics counters.
* @vsi: the VSI to be updated
**/
@@ -1040,18 +1054,15 @@ static void i40e_update_pf_stats(struct i40e_pf *pf)
&osd->rx_jabber, &nsd->rx_jabber);
/* FDIR stats */
- i40e_stat_update32(hw,
- I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(pf->hw.pf_id)),
- pf->stat_offsets_loaded,
- &osd->fd_atr_match, &nsd->fd_atr_match);
- i40e_stat_update32(hw,
- I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(pf->hw.pf_id)),
- pf->stat_offsets_loaded,
- &osd->fd_sb_match, &nsd->fd_sb_match);
- i40e_stat_update32(hw,
- I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id)),
- pf->stat_offsets_loaded,
- &osd->fd_atr_tunnel_match, &nsd->fd_atr_tunnel_match);
+ i40e_stat_update_and_clear32(hw,
+ I40E_GLQF_PCNT(I40E_FD_ATR_STAT_IDX(hw->pf_id)),
+ &nsd->fd_atr_match);
+ i40e_stat_update_and_clear32(hw,
+ I40E_GLQF_PCNT(I40E_FD_SB_STAT_IDX(hw->pf_id)),
+ &nsd->fd_sb_match);
+ i40e_stat_update_and_clear32(hw,
+ I40E_GLQF_PCNT(I40E_FD_ATR_TUNNEL_STAT_IDX(hw->pf_id)),
+ &nsd->fd_atr_tunnel_match);
val = rd32(hw, I40E_PRTPM_EEE_STAT);
nsd->tx_lpi_status =
@@ -1578,6 +1589,170 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
}
/**
+ * i40e_config_rss_aq - Prepare for RSS using AQ commands
+ * @vsi: vsi structure
+ * @seed: RSS hash seed
+ **/
+static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
+ u8 *lut, u16 lut_size)
+{
+ struct i40e_pf *pf = vsi->back;
+ struct i40e_hw *hw = &pf->hw;
+ int ret = 0;
+
+ if (seed) {
+ struct i40e_aqc_get_set_rss_key_data *seed_dw =
+ (struct i40e_aqc_get_set_rss_key_data *)seed;
+ ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Cannot set RSS key, err %s aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return ret;
+ }
+ }
+ if (lut) {
+ bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
+
+ ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Cannot set RSS lut, err %s aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ return ret;
+ }
+ }
+ return ret;
+}
+
+/**
+ * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
+ * @vsi: VSI structure
+ **/
+static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
+{
+ struct i40e_pf *pf = vsi->back;
+ u8 seed[I40E_HKEY_ARRAY_SIZE];
+ u8 *lut;
+ int ret;
+
+ if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
+ return 0;
+ if (!vsi->rss_size)
+ vsi->rss_size = min_t(int, pf->alloc_rss_size,
+ vsi->num_queue_pairs);
+ if (!vsi->rss_size)
+ return -EINVAL;
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+ if (!lut)
+ return -ENOMEM;
+
+ /* Use the user configured hash keys and lookup table if there is one,
+ * otherwise use default
+ */
+ if (vsi->rss_lut_user)
+ memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
+ else
+ i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
+ if (vsi->rss_hkey_user)
+ memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+ else
+ netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+ ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
+ kfree(lut);
+ return ret;
+}
+
+/**
+ * i40e_vsi_setup_queue_map_mqprio - Prepares mqprio based tc_config
+ * @vsi: the VSI being configured,
+ * @ctxt: VSI context structure
+ * @enabled_tc: number of traffic classes to enable
+ *
+ * Prepares VSI tc_config to have queue configurations based on MQPRIO options.
+ **/
+static int i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi,
+ struct i40e_vsi_context *ctxt,
+ u8 enabled_tc)
+{
+ u16 qcount = 0, max_qcount, qmap, sections = 0;
+ int i, override_q, pow, num_qps, ret;
+ u8 netdev_tc = 0, offset = 0;
+
+ if (vsi->type != I40E_VSI_MAIN)
+ return -EINVAL;
+ sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+ sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+ vsi->tc_config.numtc = vsi->mqprio_qopt.qopt.num_tc;
+ vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
+ num_qps = vsi->mqprio_qopt.qopt.count[0];
+
+ /* find the next higher power-of-2 of num queue pairs */
+ pow = ilog2(num_qps);
+ if (!is_power_of_2(num_qps))
+ pow++;
+ qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+ (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+ /* Setup queue offset/count for all TCs for given VSI */
+ max_qcount = vsi->mqprio_qopt.qopt.count[0];
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+ /* See if the given TC is enabled for the given VSI */
+ if (vsi->tc_config.enabled_tc & BIT(i)) {
+ offset = vsi->mqprio_qopt.qopt.offset[i];
+ qcount = vsi->mqprio_qopt.qopt.count[i];
+ if (qcount > max_qcount)
+ max_qcount = qcount;
+ vsi->tc_config.tc_info[i].qoffset = offset;
+ vsi->tc_config.tc_info[i].qcount = qcount;
+ vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++;
+ } else {
+ /* TC is not enabled so set the offset to
+ * default queue and allocate one queue
+ * for the given TC.
+ */
+ vsi->tc_config.tc_info[i].qoffset = 0;
+ vsi->tc_config.tc_info[i].qcount = 1;
+ vsi->tc_config.tc_info[i].netdev_tc = 0;
+ }
+ }
+
+ /* Set actual Tx/Rx queue pairs */
+ vsi->num_queue_pairs = offset + qcount;
+
+ /* Setup queue TC[0].qmap for given VSI context */
+ ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+ ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+ ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
+ ctxt->info.valid_sections |= cpu_to_le16(sections);
+
+ /* Reconfigure RSS for main VSI with max queue count */
+ vsi->rss_size = max_qcount;
+ ret = i40e_vsi_config_rss(vsi);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+ "Failed to reconfig rss for num_queues (%u)\n",
+ max_qcount);
+ return ret;
+ }
+ vsi->reconfig_rss = true;
+ dev_dbg(&vsi->back->pdev->dev,
+ "Reconfigured rss with num_queues (%u)\n", max_qcount);
+
+ /* Find queue count available for channel VSIs and starting offset
+ * for channel VSIs
+ */
+ override_q = vsi->mqprio_qopt.qopt.count[0];
+ if (override_q && override_q < vsi->num_queue_pairs) {
+ vsi->cnt_q_avail = vsi->num_queue_pairs - override_q;
+ vsi->next_base_queue = override_q;
+ }
+ return 0;
+}
+
+/**
* i40e_vsi_setup_queue_map - Setup a VSI queue map based on enabled_tc
* @vsi: the VSI being setup
* @ctxt: VSI context structure
@@ -1615,7 +1790,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
numtc = 1;
}
} else {
- /* At least TC0 is enabled in case of non-DCB case */
+ /* At least TC0 is enabled in non-DCB, non-MQPRIO case */
numtc = 1;
}
@@ -1765,11 +1940,6 @@ static void i40e_set_rx_mode(struct net_device *netdev)
vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
}
-
- /* schedule our worker thread which will take care of
- * applying the new filter changes
- */
- i40e_service_event_schedule(vsi->back);
}
/**
@@ -2873,22 +3043,18 @@ static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi)
**/
static void i40e_config_xps_tx_ring(struct i40e_ring *ring)
{
- struct i40e_vsi *vsi = ring->vsi;
+ int cpu;
- if (!ring->q_vector || !ring->netdev)
+ if (!ring->q_vector || !ring->netdev || ring->ch)
return;
- if ((vsi->tc_config.numtc <= 1) &&
- !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state)) {
- netif_set_xps_queue(ring->netdev,
- get_cpu_mask(ring->q_vector->v_idx),
- ring->queue_index);
- }
+ /* We only initialize XPS once, so as not to overwrite user settings */
+ if (test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state))
+ return;
- /* schedule our worker thread which will take care of
- * applying the new filter changes
- */
- i40e_service_event_schedule(vsi->back);
+ cpu = cpumask_local_spread(ring->q_vector->v_idx, -1);
+ netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu),
+ ring->queue_index);
}
/**
@@ -2942,7 +3108,14 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
* initialization. This has to be done regardless of
* DCB as by default everything is mapped to TC0.
*/
- tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
+
+ if (ring->ch)
+ tx_ctx.rdylist =
+ le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]);
+
+ else
+ tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
+
tx_ctx.rdylist_act = 0;
/* clear the context in the HMC */
@@ -2964,12 +3137,23 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
}
/* Now associate this queue with this PCI function */
- if (vsi->type == I40E_VSI_VMDQ2) {
- qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
- qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) &
- I40E_QTX_CTL_VFVM_INDX_MASK;
+ if (ring->ch) {
+ if (ring->ch->type == I40E_VSI_VMDQ2)
+ qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
+ else
+ return -EINVAL;
+
+ qtx_ctl |= (ring->ch->vsi_number <<
+ I40E_QTX_CTL_VFVM_INDX_SHIFT) &
+ I40E_QTX_CTL_VFVM_INDX_MASK;
} else {
- qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
+ if (vsi->type == I40E_VSI_VMDQ2) {
+ qtx_ctl = I40E_QTX_CTL_VM_QUEUE;
+ qtx_ctl |= ((vsi->id) << I40E_QTX_CTL_VFVM_INDX_SHIFT) &
+ I40E_QTX_CTL_VFVM_INDX_MASK;
+ } else {
+ qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
+ }
}
qtx_ctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) &
@@ -2998,7 +3182,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
struct i40e_hmc_obj_rxq rx_ctx;
i40e_status err = 0;
- ring->state = 0;
+ bitmap_zero(ring->state, __I40E_RING_STATE_NBITS);
/* clear the context structure first */
memset(&rx_ctx, 0, sizeof(rx_ctx));
@@ -3023,7 +3207,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
if (hw->revision_id == 0)
rx_ctx.lrxqthresh = 0;
else
- rx_ctx.lrxqthresh = 2;
+ rx_ctx.lrxqthresh = 1;
rx_ctx.crcstrip = 1;
rx_ctx.l2tsel = 1;
/* this controls whether VLAN is stripped from inner headers */
@@ -3138,6 +3322,7 @@ static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
rx_ring->dcb_tc = 0;
tx_ring->dcb_tc = 0;
}
+ return;
}
for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) {
@@ -3396,15 +3581,14 @@ void i40e_irq_dynamic_disable_icr0(struct i40e_pf *pf)
/**
* i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0
* @pf: board private structure
- * @clearpba: true when all pending interrupt events should be cleared
**/
-void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf, bool clearpba)
+void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf)
{
struct i40e_hw *hw = &pf->hw;
u32 val;
val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
- (clearpba ? I40E_PFINT_DYN_CTL0_CLEARPBA_MASK : 0) |
+ I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
(I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
wr32(hw, I40E_PFINT_DYN_CTL0, val);
@@ -3471,6 +3655,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
int tx_int_idx = 0;
int vector, err;
int irq_num;
+ int cpu;
for (vector = 0; vector < q_vectors; vector++) {
struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
@@ -3506,10 +3691,14 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
q_vector->affinity_notify.release = i40e_irq_affinity_release;
irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
- /* get_cpu_mask returns a static constant mask with
- * a permanent lifetime so it's ok to use here.
+ /* Spread affinity hints out across online CPUs.
+ *
+ * get_cpu_mask returns a static constant mask with
+ * a permanent lifetime so it's ok to pass to
+ * irq_set_affinity_hint without making a copy.
*/
- irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx));
+ cpu = cpumask_local_spread(q_vector->v_idx, -1);
+ irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
}
vsi->irqs_ready = true;
@@ -3585,7 +3774,7 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi)
for (i = 0; i < vsi->num_q_vectors; i++)
i40e_irq_dynamic_enable(vsi, i);
} else {
- i40e_irq_dynamic_enable_icr0(pf, true);
+ i40e_irq_dynamic_enable_icr0(pf);
}
i40e_flush(&pf->hw);
@@ -3593,14 +3782,20 @@ static int i40e_vsi_enable_irq(struct i40e_vsi *vsi)
}
/**
- * i40e_stop_misc_vector - Stop the vector that handles non-queue events
+ * i40e_free_misc_vector - Free the vector that handles non-queue events
* @pf: board private structure
**/
-static void i40e_stop_misc_vector(struct i40e_pf *pf)
+static void i40e_free_misc_vector(struct i40e_pf *pf)
{
/* Disable ICR 0 */
wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0);
i40e_flush(&pf->hw);
+
+ if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
+ synchronize_irq(pf->msix_entries[0].vector);
+ free_irq(pf->msix_entries[0].vector, pf);
+ clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
+ }
}
/**
@@ -3728,7 +3923,7 @@ enable_intr:
wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
if (!test_bit(__I40E_DOWN, pf->state)) {
i40e_service_event_schedule(pf);
- i40e_irq_dynamic_enable_icr0(pf, false);
+ i40e_irq_dynamic_enable_icr0(pf);
}
return ret;
@@ -4455,11 +4650,7 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf)
{
int i;
- i40e_stop_misc_vector(pf);
- if (pf->flags & I40E_FLAG_MSIX_ENABLED && pf->msix_entries) {
- synchronize_irq(pf->msix_entries[0].vector);
- free_irq(pf->msix_entries[0].vector, pf);
- }
+ i40e_free_misc_vector(pf);
i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector,
I40E_IWARP_IRQ_PILE_ID);
@@ -4848,6 +5039,24 @@ static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg)
}
/**
+ * i40e_mqprio_get_enabled_tc - Get enabled traffic classes
+ * @pf: PF being queried
+ *
+ * Query the current MQPRIO configuration and return the number of
+ * traffic classes enabled.
+ **/
+static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf)
+{
+ struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+ u8 num_tc = vsi->mqprio_qopt.qopt.num_tc;
+ u8 enabled_tc = 1, i;
+
+ for (i = 1; i < num_tc; i++)
+ enabled_tc |= BIT(i);
+ return enabled_tc;
+}
+
+/**
* i40e_pf_get_num_tc - Get enabled traffic classes for PF
* @pf: PF being queried
*
@@ -4860,7 +5069,10 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
u8 num_tc = 0;
struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
- /* If DCB is not enabled then always in single TC */
+ if (pf->flags & I40E_FLAG_TC_MQPRIO)
+ return pf->vsi[pf->lan_vsi]->mqprio_qopt.qopt.num_tc;
+
+ /* If neither MQPRIO nor DCB is enabled, then always use single TC */
if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
return 1;
@@ -4889,7 +5101,12 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
**/
static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
{
- /* If DCB is not enabled for this PF then just return default TC */
+ if (pf->flags & I40E_FLAG_TC_MQPRIO)
+ return i40e_mqprio_get_enabled_tc(pf);
+
+ /* If neither MQPRIO nor DCB is enabled for this PF then just return
+ * default TC
+ */
if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
return I40E_DEFAULT_TRAFFIC_CLASS;
@@ -4979,6 +5196,16 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
i40e_status ret;
int i;
+ if (vsi->back->flags & I40E_FLAG_TC_MQPRIO)
+ return 0;
+ if (!vsi->mqprio_qopt.qopt.hw) {
+ ret = i40e_set_bw_limit(vsi, vsi->seid, 0);
+ if (ret)
+ dev_info(&vsi->back->pdev->dev,
+ "Failed to reset tx rate for vsi->seid %u\n",
+ vsi->seid);
+ return ret;
+ }
bw_data.tc_valid_bits = enabled_tc;
for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
bw_data.tc_bw_credits[i] = bw_share[i];
@@ -5041,6 +5268,9 @@ static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc)
vsi->tc_config.tc_info[i].qoffset);
}
+ if (pf->flags & I40E_FLAG_TC_MQPRIO)
+ return;
+
/* Assign UP2TC map for the VSI */
for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
/* Get the actual TC# for the UP */
@@ -5091,7 +5321,8 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
int i;
/* Check if enabled_tc is same as existing or new TCs */
- if (vsi->tc_config.enabled_tc == enabled_tc)
+ if (vsi->tc_config.enabled_tc == enabled_tc &&
+ vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL)
return ret;
/* Enable ETS TCs with equal BW Share for now across all VSIs */
@@ -5114,15 +5345,37 @@ static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
ctxt.vf_num = 0;
ctxt.uplink_seid = vsi->uplink_seid;
ctxt.info = vsi->info;
- i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+ if (vsi->back->flags & I40E_FLAG_TC_MQPRIO) {
+ ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc);
+ if (ret)
+ goto out;
+ } else {
+ i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+ }
+ /* On destroying the qdisc, reset vsi->rss_size, as number of enabled
+ * queues changed.
+ */
+ if (!vsi->mqprio_qopt.qopt.hw && vsi->reconfig_rss) {
+ vsi->rss_size = min_t(int, vsi->back->alloc_rss_size,
+ vsi->num_queue_pairs);
+ ret = i40e_vsi_config_rss(vsi);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+ "Failed to reconfig rss for num_queues\n");
+ return ret;
+ }
+ vsi->reconfig_rss = false;
+ }
if (vsi->back->flags & I40E_FLAG_IWARP_ENABLED) {
ctxt.info.valid_sections |=
cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
}
- /* Update the VSI after updating the VSI queue-mapping information */
+ /* Update the VSI after updating the VSI queue-mapping
+ * information
+ */
ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
if (ret) {
dev_info(&vsi->back->pdev->dev,
@@ -5154,6 +5407,739 @@ out:
}
/**
+ * i40e_get_link_speed - Returns link speed for the interface
+ * @vsi: VSI to be configured
+ *
+ **/
+int i40e_get_link_speed(struct i40e_vsi *vsi)
+{
+ struct i40e_pf *pf = vsi->back;
+
+ switch (pf->hw.phy.link_info.link_speed) {
+ case I40E_LINK_SPEED_40GB:
+ return 40000;
+ case I40E_LINK_SPEED_25GB:
+ return 25000;
+ case I40E_LINK_SPEED_20GB:
+ return 20000;
+ case I40E_LINK_SPEED_10GB:
+ return 10000;
+ case I40E_LINK_SPEED_1GB:
+ return 1000;
+ default:
+ return -EINVAL;
+ }
+}
+
+/**
+ * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate
+ * @vsi: VSI to be configured
+ * @seid: seid of the channel/VSI
+ * @max_tx_rate: max TX rate to be configured as BW limit
+ *
+ * Helper function to set BW limit for a given VSI
+ **/
+int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate)
+{
+ struct i40e_pf *pf = vsi->back;
+ u64 credits = 0;
+ int speed = 0;
+ int ret = 0;
+
+ speed = i40e_get_link_speed(vsi);
+ if (max_tx_rate > speed) {
+ dev_err(&pf->pdev->dev,
+ "Invalid max tx rate %llu specified for VSI seid %d.",
+ max_tx_rate, seid);
+ return -EINVAL;
+ }
+ if (max_tx_rate && max_tx_rate < 50) {
+ dev_warn(&pf->pdev->dev,
+ "Setting max tx rate to minimum usable value of 50Mbps.\n");
+ max_tx_rate = 50;
+ }
+
+ /* Tx rate credits are in values of 50Mbps, 0 is disabled */
+ credits = max_tx_rate;
+ do_div(credits, I40E_BW_CREDIT_DIVISOR);
+ ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, credits,
+ I40E_MAX_BW_INACTIVE_ACCUM, NULL);
+ if (ret)
+ dev_err(&pf->pdev->dev,
+ "Failed set tx rate (%llu Mbps) for vsi->seid %u, err %s aq_err %s\n",
+ max_tx_rate, seid, i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ return ret;
+}
+
+/**
+ * i40e_remove_queue_channels - Remove queue channels for the TCs
+ * @vsi: VSI to be configured
+ *
+ * Remove queue channels for the TCs
+ **/
+static void i40e_remove_queue_channels(struct i40e_vsi *vsi)
+{
+ struct i40e_channel *ch, *ch_tmp;
+ int ret, i;
+
+ /* Reset rss size that was stored when reconfiguring rss for
+ * channel VSIs with non-power-of-2 queue count.
+ */
+ vsi->current_rss_size = 0;
+
+ /* perform cleanup for channels if they exist */
+ if (list_empty(&vsi->ch_list))
+ return;
+
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+ struct i40e_vsi *p_vsi;
+
+ list_del(&ch->list);
+ p_vsi = ch->parent_vsi;
+ if (!p_vsi || !ch->initialized) {
+ kfree(ch);
+ continue;
+ }
+ /* Reset queue contexts */
+ for (i = 0; i < ch->num_queue_pairs; i++) {
+ struct i40e_ring *tx_ring, *rx_ring;
+ u16 pf_q;
+
+ pf_q = ch->base_queue + i;
+ tx_ring = vsi->tx_rings[pf_q];
+ tx_ring->ch = NULL;
+
+ rx_ring = vsi->rx_rings[pf_q];
+ rx_ring->ch = NULL;
+ }
+
+ /* Reset BW configured for this VSI via mqprio */
+ ret = i40e_set_bw_limit(vsi, ch->seid, 0);
+ if (ret)
+ dev_info(&vsi->back->pdev->dev,
+ "Failed to reset tx rate for ch->seid %u\n",
+ ch->seid);
+
+ /* delete VSI from FW */
+ ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
+ NULL);
+ if (ret)
+ dev_err(&vsi->back->pdev->dev,
+ "unable to remove channel (%d) for parent VSI(%d)\n",
+ ch->seid, p_vsi->seid);
+ kfree(ch);
+ }
+ INIT_LIST_HEAD(&vsi->ch_list);
+}
+
+/**
+ * i40e_is_any_channel - channel exist or not
+ * @vsi: ptr to VSI to which channels are associated with
+ *
+ * Returns true or false if channel(s) exist for associated VSI or not
+ **/
+static bool i40e_is_any_channel(struct i40e_vsi *vsi)
+{
+ struct i40e_channel *ch, *ch_tmp;
+
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+ if (ch->initialized)
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * i40e_get_max_queues_for_channel
+ * @vsi: ptr to VSI to which channels are associated with
+ *
+ * Helper function which returns max value among the queue counts set on the
+ * channels/TCs created.
+ **/
+static int i40e_get_max_queues_for_channel(struct i40e_vsi *vsi)
+{
+ struct i40e_channel *ch, *ch_tmp;
+ int max = 0;
+
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+ if (!ch->initialized)
+ continue;
+ if (ch->num_queue_pairs > max)
+ max = ch->num_queue_pairs;
+ }
+
+ return max;
+}
+
+/**
+ * i40e_validate_num_queues - validate num_queues w.r.t channel
+ * @pf: ptr to PF device
+ * @num_queues: number of queues
+ * @vsi: the parent VSI
+ * @reconfig_rss: indicates should the RSS be reconfigured or not
+ *
+ * This function validates number of queues in the context of new channel
+ * which is being established and determines if RSS should be reconfigured
+ * or not for parent VSI.
+ **/
+static int i40e_validate_num_queues(struct i40e_pf *pf, int num_queues,
+ struct i40e_vsi *vsi, bool *reconfig_rss)
+{
+ int max_ch_queues;
+
+ if (!reconfig_rss)
+ return -EINVAL;
+
+ *reconfig_rss = false;
+
+ if (num_queues > I40E_MAX_QUEUES_PER_CH) {
+ dev_err(&pf->pdev->dev,
+ "Failed to create VMDq VSI. User requested num_queues (%d) > I40E_MAX_QUEUES_PER_VSI (%u)\n",
+ num_queues, I40E_MAX_QUEUES_PER_CH);
+ return -EINVAL;
+ }
+
+ if (vsi->current_rss_size) {
+ if (num_queues > vsi->current_rss_size) {
+ dev_dbg(&pf->pdev->dev,
+ "Error: num_queues (%d) > vsi's current_size(%d)\n",
+ num_queues, vsi->current_rss_size);
+ return -EINVAL;
+ } else if ((num_queues < vsi->current_rss_size) &&
+ (!is_power_of_2(num_queues))) {
+ dev_dbg(&pf->pdev->dev,
+ "Error: num_queues (%d) < vsi's current_size(%d), but not power of 2\n",
+ num_queues, vsi->current_rss_size);
+ return -EINVAL;
+ }
+ }
+
+ if (!is_power_of_2(num_queues)) {
+ /* Find the max num_queues configured for channel if channel
+ * exist.
+ * if channel exist, then enforce 'num_queues' to be more than
+ * max ever queues configured for channel.
+ */
+ max_ch_queues = i40e_get_max_queues_for_channel(vsi);
+ if (num_queues < max_ch_queues) {
+ dev_dbg(&pf->pdev->dev,
+ "Error: num_queues (%d) < max queues configured for channel(%d)\n",
+ num_queues, max_ch_queues);
+ return -EINVAL;
+ }
+ *reconfig_rss = true;
+ }
+
+ return 0;
+}
+
+/**
+ * i40e_vsi_reconfig_rss - reconfig RSS based on specified rss_size
+ * @vsi: the VSI being setup
+ * @rss_size: size of RSS, accordingly LUT gets reprogrammed
+ *
+ * This function reconfigures RSS by reprogramming LUTs using 'rss_size'
+ **/
+static int i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size)
+{
+ struct i40e_pf *pf = vsi->back;
+ u8 seed[I40E_HKEY_ARRAY_SIZE];
+ struct i40e_hw *hw = &pf->hw;
+ int local_rss_size;
+ u8 *lut;
+ int ret;
+
+ if (!vsi->rss_size)
+ return -EINVAL;
+
+ if (rss_size > vsi->rss_size)
+ return -EINVAL;
+
+ local_rss_size = min_t(int, vsi->rss_size, rss_size);
+ lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
+ if (!lut)
+ return -ENOMEM;
+
+ /* Ignoring user configured lut if there is one */
+ i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, local_rss_size);
+
+ /* Use user configured hash key if there is one, otherwise
+ * use default.
+ */
+ if (vsi->rss_hkey_user)
+ memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
+ else
+ netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
+
+ ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Cannot set RSS lut, err %s aq_err %s\n",
+ i40e_stat_str(hw, ret),
+ i40e_aq_str(hw, hw->aq.asq_last_status));
+ kfree(lut);
+ return ret;
+ }
+ kfree(lut);
+
+ /* Do the update w.r.t. storing rss_size */
+ if (!vsi->orig_rss_size)
+ vsi->orig_rss_size = vsi->rss_size;
+ vsi->current_rss_size = local_rss_size;
+
+ return ret;
+}
+
+/**
+ * i40e_channel_setup_queue_map - Setup a channel queue map
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ctxt: VSI context structure
+ * @ch: ptr to channel structure
+ *
+ * Setup queue map for a specific channel
+ **/
+static void i40e_channel_setup_queue_map(struct i40e_pf *pf,
+ struct i40e_vsi_context *ctxt,
+ struct i40e_channel *ch)
+{
+ u16 qcount, qmap, sections = 0;
+ u8 offset = 0;
+ int pow;
+
+ sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+ sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+
+ qcount = min_t(int, ch->num_queue_pairs, pf->num_lan_msix);
+ ch->num_queue_pairs = qcount;
+
+ /* find the next higher power-of-2 of num queue pairs */
+ pow = ilog2(qcount);
+ if (!is_power_of_2(qcount))
+ pow++;
+
+ qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+ (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+ /* Setup queue TC[0].qmap for given VSI context */
+ ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
+
+ ctxt->info.up_enable_bits = 0x1; /* TC0 enabled */
+ ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+ ctxt->info.queue_mapping[0] = cpu_to_le16(ch->base_queue);
+ ctxt->info.valid_sections |= cpu_to_le16(sections);
+}
+
+/**
+ * i40e_add_channel - add a channel by adding VSI
+ * @pf: ptr to PF device
+ * @uplink_seid: underlying HW switching element (VEB) ID
+ * @ch: ptr to channel structure
+ *
+ * Add a channel (VSI) using add_vsi and queue_map
+ **/
+static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
+ struct i40e_channel *ch)
+{
+ struct i40e_hw *hw = &pf->hw;
+ struct i40e_vsi_context ctxt;
+ u8 enabled_tc = 0x1; /* TC0 enabled */
+ int ret;
+
+ if (ch->type != I40E_VSI_VMDQ2) {
+ dev_info(&pf->pdev->dev,
+ "add new vsi failed, ch->type %d\n", ch->type);
+ return -EINVAL;
+ }
+
+ memset(&ctxt, 0, sizeof(ctxt));
+ ctxt.pf_num = hw->pf_id;
+ ctxt.vf_num = 0;
+ ctxt.uplink_seid = uplink_seid;
+ ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
+ if (ch->type == I40E_VSI_VMDQ2)
+ ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
+
+ if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED) {
+ ctxt.info.valid_sections |=
+ cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+ ctxt.info.switch_id =
+ cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+ }
+
+ /* Set queue map for a given VSI context */
+ i40e_channel_setup_queue_map(pf, &ctxt, ch);
+
+ /* Now time to create VSI */
+ ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "add new vsi failed, err %s aq_err %s\n",
+ i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ return -ENOENT;
+ }
+
+ /* Success, update channel */
+ ch->enabled_tc = enabled_tc;
+ ch->seid = ctxt.seid;
+ ch->vsi_number = ctxt.vsi_number;
+ ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx);
+
+ /* copy just the sections touched not the entire info
+ * since not all sections are valid as returned by
+ * update vsi params
+ */
+ ch->info.mapping_flags = ctxt.info.mapping_flags;
+ memcpy(&ch->info.queue_mapping,
+ &ctxt.info.queue_mapping, sizeof(ctxt.info.queue_mapping));
+ memcpy(&ch->info.tc_mapping, ctxt.info.tc_mapping,
+ sizeof(ctxt.info.tc_mapping));
+
+ return 0;
+}
+
+static int i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch,
+ u8 *bw_share)
+{
+ struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
+ i40e_status ret;
+ int i;
+
+ bw_data.tc_valid_bits = ch->enabled_tc;
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+ bw_data.tc_bw_credits[i] = bw_share[i];
+
+ ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, ch->seid,
+ &bw_data, NULL);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+ "Config VSI BW allocation per TC failed, aq_err: %d for new_vsi->seid %u\n",
+ vsi->back->hw.aq.asq_last_status, ch->seid);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+ ch->info.qs_handle[i] = bw_data.qs_handles[i];
+
+ return 0;
+}
+
+/**
+ * i40e_channel_config_tx_ring - config TX ring associated with new channel
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ *
+ * Configure TX rings associated with channel (VSI) since queues are being
+ * from parent VSI.
+ **/
+static int i40e_channel_config_tx_ring(struct i40e_pf *pf,
+ struct i40e_vsi *vsi,
+ struct i40e_channel *ch)
+{
+ i40e_status ret;
+ int i;
+ u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
+
+ /* Enable ETS TCs with equal BW Share for now across all VSIs */
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+ if (ch->enabled_tc & BIT(i))
+ bw_share[i] = 1;
+ }
+
+ /* configure BW for new VSI */
+ ret = i40e_channel_config_bw(vsi, ch, bw_share);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+ "Failed configuring TC map %d for channel (seid %u)\n",
+ ch->enabled_tc, ch->seid);
+ return ret;
+ }
+
+ for (i = 0; i < ch->num_queue_pairs; i++) {
+ struct i40e_ring *tx_ring, *rx_ring;
+ u16 pf_q;
+
+ pf_q = ch->base_queue + i;
+
+ /* Get to TX ring ptr of main VSI, for re-setup TX queue
+ * context
+ */
+ tx_ring = vsi->tx_rings[pf_q];
+ tx_ring->ch = ch;
+
+ /* Get the RX ring ptr */
+ rx_ring = vsi->rx_rings[pf_q];
+ rx_ring->ch = ch;
+ }
+
+ return 0;
+}
+
+/**
+ * i40e_setup_hw_channel - setup new channel
+ * @pf: ptr to PF device
+ * @vsi: the VSI being setup
+ * @ch: ptr to channel structure
+ * @uplink_seid: underlying HW switching element (VEB) ID
+ * @type: type of channel to be created (VMDq2/VF)
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and configures TX rings accordingly
+ **/
+static inline int i40e_setup_hw_channel(struct i40e_pf *pf,
+ struct i40e_vsi *vsi,
+ struct i40e_channel *ch,
+ u16 uplink_seid, u8 type)
+{
+ int ret;
+
+ ch->initialized = false;
+ ch->base_queue = vsi->next_base_queue;
+ ch->type = type;
+
+ /* Proceed with creation of channel (VMDq2) VSI */
+ ret = i40e_add_channel(pf, uplink_seid, ch);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "failed to add_channel using uplink_seid %u\n",
+ uplink_seid);
+ return ret;
+ }
+
+ /* Mark the successful creation of channel */
+ ch->initialized = true;
+
+ /* Reconfigure TX queues using QTX_CTL register */
+ ret = i40e_channel_config_tx_ring(pf, vsi, ch);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "failed to configure TX rings for channel %u\n",
+ ch->seid);
+ return ret;
+ }
+
+ /* update 'next_base_queue' */
+ vsi->next_base_queue = vsi->next_base_queue + ch->num_queue_pairs;
+ dev_dbg(&pf->pdev->dev,
+ "Added channel: vsi_seid %u, vsi_number %u, stat_counter_idx %u, num_queue_pairs %u, pf->next_base_queue %d\n",
+ ch->seid, ch->vsi_number, ch->stat_counter_idx,
+ ch->num_queue_pairs,
+ vsi->next_base_queue);
+ return ret;
+}
+
+/**
+ * i40e_setup_channel - setup new channel using uplink element
+ * @pf: ptr to PF device
+ * @type: type of channel to be created (VMDq2/VF)
+ * @uplink_seid: underlying HW switching element (VEB) ID
+ * @ch: ptr to channel structure
+ *
+ * Setup new channel (VSI) based on specified type (VMDq2/VF)
+ * and uplink switching element (uplink_seid)
+ **/
+static bool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi,
+ struct i40e_channel *ch)
+{
+ u8 vsi_type;
+ u16 seid;
+ int ret;
+
+ if (vsi->type == I40E_VSI_MAIN) {
+ vsi_type = I40E_VSI_VMDQ2;
+ } else {
+ dev_err(&pf->pdev->dev, "unsupported parent vsi type(%d)\n",
+ vsi->type);
+ return false;
+ }
+
+ /* underlying switching element */
+ seid = pf->vsi[pf->lan_vsi]->uplink_seid;
+
+ /* create channel (VSI), configure TX rings */
+ ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type);
+ if (ret) {
+ dev_err(&pf->pdev->dev, "failed to setup hw_channel\n");
+ return false;
+ }
+
+ return ch->initialized ? true : false;
+}
+
+/**
+ * i40e_create_queue_channel - function to create channel
+ * @vsi: VSI to be configured
+ * @ch: ptr to channel (it contains channel specific params)
+ *
+ * This function creates channel (VSI) using num_queues specified by user,
+ * reconfigs RSS if needed.
+ **/
+int i40e_create_queue_channel(struct i40e_vsi *vsi,
+ struct i40e_channel *ch)
+{
+ struct i40e_pf *pf = vsi->back;
+ bool reconfig_rss;
+ int err;
+
+ if (!ch)
+ return -EINVAL;
+
+ if (!ch->num_queue_pairs) {
+ dev_err(&pf->pdev->dev, "Invalid num_queues requested: %d\n",
+ ch->num_queue_pairs);
+ return -EINVAL;
+ }
+
+ /* validate user requested num_queues for channel */
+ err = i40e_validate_num_queues(pf, ch->num_queue_pairs, vsi,
+ &reconfig_rss);
+ if (err) {
+ dev_info(&pf->pdev->dev, "Failed to validate num_queues (%d)\n",
+ ch->num_queue_pairs);
+ return -EINVAL;
+ }
+
+ /* By default we are in VEPA mode, if this is the first VF/VMDq
+ * VSI to be added switch to VEB mode.
+ */
+ if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) ||
+ (!i40e_is_any_channel(vsi))) {
+ if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) {
+ dev_dbg(&pf->pdev->dev,
+ "Failed to create channel. Override queues (%u) not power of 2\n",
+ vsi->tc_config.tc_info[0].qcount);
+ return -EINVAL;
+ }
+
+ if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+ pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+
+ if (vsi->type == I40E_VSI_MAIN) {
+ if (pf->flags & I40E_FLAG_TC_MQPRIO)
+ i40e_do_reset(pf, I40E_PF_RESET_FLAG,
+ true);
+ else
+ i40e_do_reset_safe(pf,
+ I40E_PF_RESET_FLAG);
+ }
+ }
+ /* now onwards for main VSI, number of queues will be value
+ * of TC0's queue count
+ */
+ }
+
+ /* By this time, vsi->cnt_q_avail shall be set to non-zero and
+ * it should be more than num_queues
+ */
+ if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_queue_pairs) {
+ dev_dbg(&pf->pdev->dev,
+ "Error: cnt_q_avail (%u) less than num_queues %d\n",
+ vsi->cnt_q_avail, ch->num_queue_pairs);
+ return -EINVAL;
+ }
+
+ /* reconfig_rss only if vsi type is MAIN_VSI */
+ if (reconfig_rss && (vsi->type == I40E_VSI_MAIN)) {
+ err = i40e_vsi_reconfig_rss(vsi, ch->num_queue_pairs);
+ if (err) {
+ dev_info(&pf->pdev->dev,
+ "Error: unable to reconfig rss for num_queues (%u)\n",
+ ch->num_queue_pairs);
+ return -EINVAL;
+ }
+ }
+
+ if (!i40e_setup_channel(pf, vsi, ch)) {
+ dev_info(&pf->pdev->dev, "Failed to setup channel\n");
+ return -EINVAL;
+ }
+
+ dev_info(&pf->pdev->dev,
+ "Setup channel (id:%u) utilizing num_queues %d\n",
+ ch->seid, ch->num_queue_pairs);
+
+ /* configure VSI for BW limit */
+ if (ch->max_tx_rate) {
+ u64 credits = ch->max_tx_rate;
+
+ if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate))
+ return -EINVAL;
+
+ do_div(credits, I40E_BW_CREDIT_DIVISOR);
+ dev_dbg(&pf->pdev->dev,
+ "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+ ch->max_tx_rate,
+ credits,
+ ch->seid);
+ }
+
+ /* in case of VF, this will be main SRIOV VSI */
+ ch->parent_vsi = vsi;
+
+ /* and update main_vsi's count for queue_available to use */
+ vsi->cnt_q_avail -= ch->num_queue_pairs;
+
+ return 0;
+}
+
+/**
+ * i40e_configure_queue_channels - Add queue channel for the given TCs
+ * @vsi: VSI to be configured
+ *
+ * Configures queue channel mapping to the given TCs
+ **/
+static int i40e_configure_queue_channels(struct i40e_vsi *vsi)
+{
+ struct i40e_channel *ch;
+ u64 max_rate = 0;
+ int ret = 0, i;
+
+ /* Create app vsi with the TCs. Main VSI with TC0 is already set up */
+ for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+ if (vsi->tc_config.enabled_tc & BIT(i)) {
+ ch = kzalloc(sizeof(*ch), GFP_KERNEL);
+ if (!ch) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ INIT_LIST_HEAD(&ch->list);
+ ch->num_queue_pairs =
+ vsi->tc_config.tc_info[i].qcount;
+ ch->base_queue =
+ vsi->tc_config.tc_info[i].qoffset;
+
+ /* Bandwidth limit through tc interface is in bytes/s,
+ * change to Mbit/s
+ */
+ max_rate = vsi->mqprio_qopt.max_rate[i];
+ do_div(max_rate, I40E_BW_MBPS_DIVISOR);
+ ch->max_tx_rate = max_rate;
+
+ list_add_tail(&ch->list, &vsi->ch_list);
+
+ ret = i40e_create_queue_channel(vsi, ch);
+ if (ret) {
+ dev_err(&vsi->back->pdev->dev,
+ "Failed creating queue channel with TC%d: queues %d\n",
+ i, ch->num_queue_pairs);
+ goto err_free;
+ }
+ }
+ }
+ return ret;
+
+err_free:
+ i40e_remove_queue_channels(vsi);
+ return ret;
+}
+
+/**
* i40e_veb_config_tc - Configure TCs for given VEB
* @veb: given VEB
* @enabled_tc: TC bitmap
@@ -5346,13 +6332,14 @@ out:
void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
{
enum i40e_aq_link_speed new_speed;
+ struct i40e_pf *pf = vsi->back;
char *speed = "Unknown";
char *fc = "Unknown";
char *fec = "";
char *req_fec = "";
char *an = "";
- new_speed = vsi->back->hw.phy.link_info.link_speed;
+ new_speed = pf->hw.phy.link_info.link_speed;
if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
return;
@@ -5366,13 +6353,13 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
/* Warn user if link speed on NPAR enabled partition is not at
* least 10GB
*/
- if (vsi->back->hw.func_caps.npar_enable &&
- (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
- vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
+ if (pf->hw.func_caps.npar_enable &&
+ (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
+ pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
netdev_warn(vsi->netdev,
"The partition detected link speed that is less than 10Gbps\n");
- switch (vsi->back->hw.phy.link_info.link_speed) {
+ switch (pf->hw.phy.link_info.link_speed) {
case I40E_LINK_SPEED_40GB:
speed = "40 G";
break;
@@ -5395,7 +6382,7 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
break;
}
- switch (vsi->back->hw.fc.current_mode) {
+ switch (pf->hw.fc.current_mode) {
case I40E_FC_FULL:
fc = "RX/TX";
break;
@@ -5410,18 +6397,18 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
break;
}
- if (vsi->back->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
+ if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
req_fec = ", Requested FEC: None";
fec = ", FEC: None";
an = ", Autoneg: False";
- if (vsi->back->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
+ if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
an = ", Autoneg: True";
- if (vsi->back->hw.phy.link_info.fec_info &
+ if (pf->hw.phy.link_info.fec_info &
I40E_AQ_CONFIG_FEC_KR_ENA)
fec = ", FEC: CL74 FC-FEC/BASE-R";
- else if (vsi->back->hw.phy.link_info.fec_info &
+ else if (pf->hw.phy.link_info.fec_info &
I40E_AQ_CONFIG_FEC_RS_ENA)
fec = ", FEC: CL108 RS-FEC";
@@ -5470,15 +6457,6 @@ static int i40e_up_complete(struct i40e_vsi *vsi)
i40e_print_link_message(vsi, true);
netif_tx_start_all_queues(vsi->netdev);
netif_carrier_on(vsi->netdev);
- } else if (vsi->netdev) {
- i40e_print_link_message(vsi, false);
- /* need to check for qualified module here*/
- if ((pf->hw.phy.link_info.link_info &
- I40E_AQ_MEDIA_AVAILABLE) &&
- (!(pf->hw.phy.link_info.an_info &
- I40E_AQ_QUALIFIED_MODULE)))
- netdev_err(vsi->netdev,
- "the driver failed to link because an unqualified module was detected.");
}
/* replay FDIR SB filters */
@@ -5562,74 +6540,221 @@ void i40e_down(struct i40e_vsi *vsi)
}
/**
+ * i40e_validate_mqprio_qopt- validate queue mapping info
+ * @vsi: the VSI being configured
+ * @mqprio_qopt: queue parametrs
+ **/
+static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi,
+ struct tc_mqprio_qopt_offload *mqprio_qopt)
+{
+ u64 sum_max_rate = 0;
+ u64 max_rate = 0;
+ int i;
+
+ if (mqprio_qopt->qopt.offset[0] != 0 ||
+ mqprio_qopt->qopt.num_tc < 1 ||
+ mqprio_qopt->qopt.num_tc > I40E_MAX_TRAFFIC_CLASS)
+ return -EINVAL;
+ for (i = 0; ; i++) {
+ if (!mqprio_qopt->qopt.count[i])
+ return -EINVAL;
+ if (mqprio_qopt->min_rate[i]) {
+ dev_err(&vsi->back->pdev->dev,
+ "Invalid min tx rate (greater than 0) specified\n");
+ return -EINVAL;
+ }
+ max_rate = mqprio_qopt->max_rate[i];
+ do_div(max_rate, I40E_BW_MBPS_DIVISOR);
+ sum_max_rate += max_rate;
+
+ if (i >= mqprio_qopt->qopt.num_tc - 1)
+ break;
+ if (mqprio_qopt->qopt.offset[i + 1] !=
+ (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i]))
+ return -EINVAL;
+ }
+ if (vsi->num_queue_pairs <
+ (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) {
+ return -EINVAL;
+ }
+ if (sum_max_rate > i40e_get_link_speed(vsi)) {
+ dev_err(&vsi->back->pdev->dev,
+ "Invalid max tx rate specified\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * i40e_vsi_set_default_tc_config - set default values for tc configuration
+ * @vsi: the VSI being configured
+ **/
+static void i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
+{
+ u16 qcount;
+ int i;
+
+ /* Only TC0 is enabled */
+ vsi->tc_config.numtc = 1;
+ vsi->tc_config.enabled_tc = 1;
+ qcount = min_t(int, vsi->alloc_queue_pairs,
+ i40e_pf_get_max_q_per_tc(vsi->back));
+ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+ /* For the TC that is not enabled set the offset to to default
+ * queue and allocate one queue for the given TC.
+ */
+ vsi->tc_config.tc_info[i].qoffset = 0;
+ if (i == 0)
+ vsi->tc_config.tc_info[i].qcount = qcount;
+ else
+ vsi->tc_config.tc_info[i].qcount = 1;
+ vsi->tc_config.tc_info[i].netdev_tc = 0;
+ }
+}
+
+/**
* i40e_setup_tc - configure multiple traffic classes
* @netdev: net device to configure
- * @tc: number of traffic classes to enable
+ * @type_data: tc offload data
**/
-static int i40e_setup_tc(struct net_device *netdev, u8 tc)
+static int i40e_setup_tc(struct net_device *netdev, void *type_data)
{
+ struct tc_mqprio_qopt_offload *mqprio_qopt = type_data;
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
struct i40e_pf *pf = vsi->back;
- u8 enabled_tc = 0;
+ u8 enabled_tc = 0, num_tc, hw;
+ bool need_reset = false;
int ret = -EINVAL;
+ u16 mode;
int i;
- /* Check if DCB enabled to continue */
- if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
- netdev_info(netdev, "DCB is not enabled for adapter\n");
- goto exit;
+ num_tc = mqprio_qopt->qopt.num_tc;
+ hw = mqprio_qopt->qopt.hw;
+ mode = mqprio_qopt->mode;
+ if (!hw) {
+ pf->flags &= ~I40E_FLAG_TC_MQPRIO;
+ memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
+ goto config_tc;
}
/* Check if MFP enabled */
if (pf->flags & I40E_FLAG_MFP_ENABLED) {
- netdev_info(netdev, "Configuring TC not supported in MFP mode\n");
- goto exit;
+ netdev_info(netdev,
+ "Configuring TC not supported in MFP mode\n");
+ return ret;
}
+ switch (mode) {
+ case TC_MQPRIO_MODE_DCB:
+ pf->flags &= ~I40E_FLAG_TC_MQPRIO;
- /* Check whether tc count is within enabled limit */
- if (tc > i40e_pf_get_num_tc(pf)) {
- netdev_info(netdev, "TC count greater than enabled on link for adapter\n");
- goto exit;
+ /* Check if DCB enabled to continue */
+ if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+ netdev_info(netdev,
+ "DCB is not enabled for adapter\n");
+ return ret;
+ }
+
+ /* Check whether tc count is within enabled limit */
+ if (num_tc > i40e_pf_get_num_tc(pf)) {
+ netdev_info(netdev,
+ "TC count greater than enabled on link for adapter\n");
+ return ret;
+ }
+ break;
+ case TC_MQPRIO_MODE_CHANNEL:
+ if (pf->flags & I40E_FLAG_DCB_ENABLED) {
+ netdev_info(netdev,
+ "Full offload of TC Mqprio options is not supported when DCB is enabled\n");
+ return ret;
+ }
+ if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+ return ret;
+ ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt);
+ if (ret)
+ return ret;
+ memcpy(&vsi->mqprio_qopt, mqprio_qopt,
+ sizeof(*mqprio_qopt));
+ pf->flags |= I40E_FLAG_TC_MQPRIO;
+ pf->flags &= ~I40E_FLAG_DCB_ENABLED;
+ break;
+ default:
+ return -EINVAL;
}
+config_tc:
/* Generate TC map for number of tc requested */
- for (i = 0; i < tc; i++)
+ for (i = 0; i < num_tc; i++)
enabled_tc |= BIT(i);
/* Requesting same TC configuration as already enabled */
- if (enabled_tc == vsi->tc_config.enabled_tc)
+ if (enabled_tc == vsi->tc_config.enabled_tc &&
+ mode != TC_MQPRIO_MODE_CHANNEL)
return 0;
/* Quiesce VSI queues */
i40e_quiesce_vsi(vsi);
+ if (!hw && !(pf->flags & I40E_FLAG_TC_MQPRIO))
+ i40e_remove_queue_channels(vsi);
+
/* Configure VSI for enabled TCs */
ret = i40e_vsi_config_tc(vsi, enabled_tc);
if (ret) {
netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
vsi->seid);
+ need_reset = true;
goto exit;
}
- /* Unquiesce VSI */
- i40e_unquiesce_vsi(vsi);
+ if (pf->flags & I40E_FLAG_TC_MQPRIO) {
+ if (vsi->mqprio_qopt.max_rate[0]) {
+ u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+
+ do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+ ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+ if (!ret) {
+ u64 credits = max_tx_rate;
+
+ do_div(credits, I40E_BW_CREDIT_DIVISOR);
+ dev_dbg(&vsi->back->pdev->dev,
+ "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+ max_tx_rate,
+ credits,
+ vsi->seid);
+ } else {
+ need_reset = true;
+ goto exit;
+ }
+ }
+ ret = i40e_configure_queue_channels(vsi);
+ if (ret) {
+ netdev_info(netdev,
+ "Failed configuring queue channels\n");
+ need_reset = true;
+ goto exit;
+ }
+ }
exit:
+ /* Reset the configuration data to defaults, only TC0 is enabled */
+ if (need_reset) {
+ i40e_vsi_set_default_tc_config(vsi);
+ need_reset = false;
+ }
+
+ /* Unquiesce VSI */
+ i40e_unquiesce_vsi(vsi);
return ret;
}
static int __i40e_setup_tc(struct net_device *netdev, enum tc_setup_type type,
void *type_data)
{
- struct tc_mqprio_qopt *mqprio = type_data;
-
if (type != TC_SETUP_MQPRIO)
return -EOPNOTSUPP;
- mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
-
- return i40e_setup_tc(netdev, mqprio->num_tc);
+ return i40e_setup_tc(netdev, type_data);
}
/**
@@ -5747,7 +6872,7 @@ err_setup_rx:
err_setup_tx:
i40e_vsi_free_tx_resources(vsi);
if (vsi == pf->vsi[pf->lan_vsi])
- i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true);
+ i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
return err;
}
@@ -5875,7 +7000,7 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
i40e_flush(&pf->hw);
- } else if (reset_flags & BIT_ULL(__I40E_PF_RESET_REQUESTED)) {
+ } else if (reset_flags & I40E_PF_RESET_FLAG) {
/* Request a PF Reset
*
@@ -6226,6 +7351,7 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
hlist_del(&filter->fdir_node);
kfree(filter);
pf->fdir_pf_active_filters--;
+ pf->fd_inv = 0;
}
}
}
@@ -6429,8 +7555,7 @@ static void i40e_link_event(struct i40e_pf *pf)
new_link == netif_carrier_ok(vsi->netdev)))
return;
- if (!test_bit(__I40E_VSI_DOWN, vsi->state))
- i40e_print_link_message(vsi, new_link);
+ i40e_print_link_message(vsi, new_link);
/* Notify the base of the switch tree connected to
* the link. Floating VEBs are not notified.
@@ -6553,12 +7678,26 @@ static void i40e_handle_link_event(struct i40e_pf *pf,
*/
i40e_link_event(pf);
- /* check for unqualified module, if link is down */
- if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
- (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
- (!(status->link_info & I40E_AQ_LINK_UP)))
+ /* Check if module meets thermal requirements */
+ if (status->phy_type == I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP) {
dev_err(&pf->pdev->dev,
- "The driver failed to link because an unqualified module was detected.\n");
+ "Rx/Tx is disabled on this device because the module does not meet thermal requirements.\n");
+ dev_err(&pf->pdev->dev,
+ "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+ } else {
+ /* check for unqualified module, if link is down, suppress
+ * the message if link was forced to be down.
+ */
+ if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
+ (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
+ (!(status->link_info & I40E_AQ_LINK_UP)) &&
+ (!(pf->flags & I40E_FLAG_LINK_DOWN_ON_CLOSE_ENABLED))) {
+ dev_err(&pf->pdev->dev,
+ "Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n");
+ dev_err(&pf->pdev->dev,
+ "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
+ }
+ }
}
/**
@@ -7017,6 +8156,49 @@ static void i40e_fdir_teardown(struct i40e_pf *pf)
}
/**
+ * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset
+ * @vsi: PF main vsi
+ *
+ * Rebuilds channel VSIs if they existed before reset
+ **/
+static int i40e_rebuild_channels(struct i40e_vsi *vsi)
+{
+ struct i40e_channel *ch, *ch_tmp;
+ i40e_status ret;
+
+ if (list_empty(&vsi->ch_list))
+ return 0;
+
+ list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) {
+ if (!ch->initialized)
+ break;
+ /* Proceed with creation of channel (VMDq2) VSI */
+ ret = i40e_add_channel(vsi->back, vsi->uplink_seid, ch);
+ if (ret) {
+ dev_info(&vsi->back->pdev->dev,
+ "failed to rebuild channels using uplink_seid %u\n",
+ vsi->uplink_seid);
+ return ret;
+ }
+ if (ch->max_tx_rate) {
+ u64 credits = ch->max_tx_rate;
+
+ if (i40e_set_bw_limit(vsi, ch->seid,
+ ch->max_tx_rate))
+ return -EINVAL;
+
+ do_div(credits, I40E_BW_CREDIT_DIVISOR);
+ dev_dbg(&vsi->back->pdev->dev,
+ "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+ ch->max_tx_rate,
+ credits,
+ ch->seid);
+ }
+ }
+ return 0;
+}
+
+/**
* i40e_prep_for_reset - prep for the core to reset
* @pf: board private structure
* @lock_acquired: indicates whether or not the lock has been acquired
@@ -7152,6 +8334,7 @@ static int i40e_reset(struct i40e_pf *pf)
**/
static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
{
+ struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
struct i40e_hw *hw = &pf->hw;
u8 set_fc_aq_fail = 0;
i40e_status ret;
@@ -7234,7 +8417,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
* If there were VEBs but the reconstitution failed, we'll try
* try to recover minimal use by getting the basic PF VSI working.
*/
- if (pf->vsi[pf->lan_vsi]->uplink_seid != pf->mac_seid) {
+ if (vsi->uplink_seid != pf->mac_seid) {
dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n");
/* find the one VEB connected to the MAC, and find orphans */
for (v = 0; v < I40E_MAX_VEB; v++) {
@@ -7258,8 +8441,7 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
dev_info(&pf->pdev->dev,
"rebuild of switch failed: %d, will try to set up simple PF connection\n",
ret);
- pf->vsi[pf->lan_vsi]->uplink_seid
- = pf->mac_seid;
+ vsi->uplink_seid = pf->mac_seid;
break;
} else if (pf->veb[v]->uplink_seid == 0) {
dev_info(&pf->pdev->dev,
@@ -7270,10 +8452,10 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
}
}
- if (pf->vsi[pf->lan_vsi]->uplink_seid == pf->mac_seid) {
+ if (vsi->uplink_seid == pf->mac_seid) {
dev_dbg(&pf->pdev->dev, "attempting to rebuild PF VSI\n");
/* no VEB, so rebuild only the Main VSI */
- ret = i40e_add_vsi(pf->vsi[pf->lan_vsi]);
+ ret = i40e_add_vsi(vsi);
if (ret) {
dev_info(&pf->pdev->dev,
"rebuild of Main VSI failed: %d\n", ret);
@@ -7281,6 +8463,31 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
}
}
+ if (vsi->mqprio_qopt.max_rate[0]) {
+ u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0];
+ u64 credits = 0;
+
+ do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
+ ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+ if (ret)
+ goto end_unlock;
+
+ credits = max_tx_rate;
+ do_div(credits, I40E_BW_CREDIT_DIVISOR);
+ dev_dbg(&vsi->back->pdev->dev,
+ "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
+ max_tx_rate,
+ credits,
+ vsi->seid);
+ }
+
+ /* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs
+ * for this main VSI if they exist
+ */
+ ret = i40e_rebuild_channels(vsi);
+ if (ret)
+ goto end_unlock;
+
/* Reconfigure hardware for allowing smaller MSS in the case
* of TSO, so that we avoid the MDD being fired and causing
* a reset in the case of small MSS+TSO.
@@ -7615,9 +8822,9 @@ static void i40e_service_task(struct work_struct *work)
* i40e_service_timer - timer callback
* @data: pointer to PF struct
**/
-static void i40e_service_timer(unsigned long data)
+static void i40e_service_timer(struct timer_list *t)
{
- struct i40e_pf *pf = (struct i40e_pf *)data;
+ struct i40e_pf *pf = from_timer(pf, t, service_timer);
mod_timer(&pf->service_timer,
round_jiffies(jiffies + pf->service_timer_period));
@@ -7674,7 +8881,7 @@ static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
/**
* i40e_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi
- * @type: VSI pointer
+ * @vsi: VSI pointer
* @alloc_qvectors: a bool to specify if q_vectors need to be allocated.
*
* On error: returns error code (negative)
@@ -8350,6 +9557,57 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf)
return 0;
}
+#ifdef CONFIG_PM
+/**
+ * i40e_restore_interrupt_scheme - Restore the interrupt scheme
+ * @pf: private board data structure
+ *
+ * Restore the interrupt scheme that was cleared when we suspended the
+ * device. This should be called during resume to re-allocate the q_vectors
+ * and reacquire IRQs.
+ */
+static int i40e_restore_interrupt_scheme(struct i40e_pf *pf)
+{
+ int err, i;
+
+ /* We cleared the MSI and MSI-X flags when disabling the old interrupt
+ * scheme. We need to re-enabled them here in order to attempt to
+ * re-acquire the MSI or MSI-X vectors
+ */
+ pf->flags |= (I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
+
+ err = i40e_init_interrupt_scheme(pf);
+ if (err)
+ return err;
+
+ /* Now that we've re-acquired IRQs, we need to remap the vectors and
+ * rings together again.
+ */
+ for (i = 0; i < pf->num_alloc_vsi; i++) {
+ if (pf->vsi[i]) {
+ err = i40e_vsi_alloc_q_vectors(pf->vsi[i]);
+ if (err)
+ goto err_unwind;
+ i40e_vsi_map_rings_to_vectors(pf->vsi[i]);
+ }
+ }
+
+ err = i40e_setup_misc_vector(pf);
+ if (err)
+ goto err_unwind;
+
+ return 0;
+
+err_unwind:
+ while (i--) {
+ if (pf->vsi[i])
+ i40e_vsi_free_q_vectors(pf->vsi[i]);
+ }
+
+ return err;
+}
+#endif /* CONFIG_PM */
+
/**
* i40e_setup_misc_vector - Setup the misc vector to handle non queue events
* @pf: board private structure
@@ -8363,13 +9621,12 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf)
struct i40e_hw *hw = &pf->hw;
int err = 0;
- /* Only request the irq if this is the first time through, and
- * not when we're rebuilding after a Reset
- */
- if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) {
+ /* Only request the IRQ once, the first time through. */
+ if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) {
err = request_irq(pf->msix_entries[0].vector,
i40e_intr, 0, pf->int_name, pf);
if (err) {
+ clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
dev_info(&pf->pdev->dev,
"request_irq for %s failed: %d\n",
pf->int_name, err);
@@ -8385,51 +9642,12 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf)
i40e_flush(hw);
- i40e_irq_dynamic_enable_icr0(pf, true);
+ i40e_irq_dynamic_enable_icr0(pf);
return err;
}
/**
- * i40e_config_rss_aq - Prepare for RSS using AQ commands
- * @vsi: vsi structure
- * @seed: RSS hash seed
- **/
-static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
- u8 *lut, u16 lut_size)
-{
- struct i40e_pf *pf = vsi->back;
- struct i40e_hw *hw = &pf->hw;
- int ret = 0;
-
- if (seed) {
- struct i40e_aqc_get_set_rss_key_data *seed_dw =
- (struct i40e_aqc_get_set_rss_key_data *)seed;
- ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw);
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Cannot set RSS key, err %s aq_err %s\n",
- i40e_stat_str(hw, ret),
- i40e_aq_str(hw, hw->aq.asq_last_status));
- return ret;
- }
- }
- if (lut) {
- bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false;
-
- ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size);
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Cannot set RSS lut, err %s aq_err %s\n",
- i40e_stat_str(hw, ret),
- i40e_aq_str(hw, hw->aq.asq_last_status));
- return ret;
- }
- }
- return ret;
-}
-
-/**
* i40e_get_rss_aq - Get RSS keys and lut by using AQ commands
* @vsi: Pointer to vsi structure
* @seed: Buffter to store the hash keys
@@ -8476,46 +9694,6 @@ static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
}
/**
- * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used
- * @vsi: VSI structure
- **/
-static int i40e_vsi_config_rss(struct i40e_vsi *vsi)
-{
- u8 seed[I40E_HKEY_ARRAY_SIZE];
- struct i40e_pf *pf = vsi->back;
- u8 *lut;
- int ret;
-
- if (!(pf->hw_features & I40E_HW_RSS_AQ_CAPABLE))
- return 0;
-
- if (!vsi->rss_size)
- vsi->rss_size = min_t(int, pf->alloc_rss_size,
- vsi->num_queue_pairs);
- if (!vsi->rss_size)
- return -EINVAL;
-
- lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
- if (!lut)
- return -ENOMEM;
- /* Use the user configured hash keys and lookup table if there is one,
- * otherwise use default
- */
- if (vsi->rss_lut_user)
- memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size);
- else
- i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
- if (vsi->rss_hkey_user)
- memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE);
- else
- netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
- ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
- kfree(lut);
-
- return ret;
-}
-
-/**
* i40e_config_rss_reg - Configure RSS keys and lut by writing registers
* @vsi: Pointer to vsi structure
* @seed: RSS hash seed
@@ -8913,8 +10091,8 @@ static int i40e_sw_init(struct i40e_pf *pf)
I40E_FLAG_MSIX_ENABLED;
/* Set default ITR */
- pf->rx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF;
- pf->tx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF;
+ pf->rx_itr_default = I40E_ITR_RX_DEF;
+ pf->tx_itr_default = I40E_ITR_TX_DEF;
/* Depending on PF configurations, it is possible that the RSS
* maximum might end up larger than the available queues
@@ -9014,6 +10192,11 @@ static int i40e_sw_init(struct i40e_pf *pf)
(pf->hw.aq.fw_maj_ver >= 5)))
pf->hw_features |= I40E_HW_USE_SET_LLDP_MIB;
+ /* Enable PTP L4 if FW > v6.0 */
+ if (pf->hw.mac.type == I40E_MAC_XL710 &&
+ pf->hw.aq.fw_maj_ver >= 6)
+ pf->hw_features |= I40E_HW_PTP_L4_CAPABLE;
+
if (pf->hw.func_caps.vmdq) {
pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
pf->flags |= I40E_FLAG_VMDQ_ENABLED;
@@ -9154,7 +10337,7 @@ static int i40e_set_features(struct net_device *netdev,
need_reset = i40e_set_ntuple(pf, features);
if (need_reset)
- i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED), true);
+ i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
return 0;
}
@@ -9406,8 +10589,7 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev,
pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
else
pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
- i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED),
- true);
+ i40e_do_reset(pf, I40E_PF_RESET_FLAG, true);
break;
}
}
@@ -9849,6 +11031,31 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
enabled_tc = i40e_pf_get_tc_map(pf);
+ /* Source pruning is enabled by default, so the flag is
+ * negative logic - if it's set, we need to fiddle with
+ * the VSI to disable source pruning.
+ */
+ if (pf->flags & I40E_FLAG_SOURCE_PRUNING_DISABLED) {
+ memset(&ctxt, 0, sizeof(ctxt));
+ ctxt.seid = pf->main_vsi_seid;
+ ctxt.pf_num = pf->hw.pf_id;
+ ctxt.vf_num = 0;
+ ctxt.info.valid_sections |=
+ cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+ ctxt.info.switch_id =
+ cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB);
+ ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "update vsi failed, err %s aq_err %s\n",
+ i40e_stat_str(&pf->hw, ret),
+ i40e_aq_str(&pf->hw,
+ pf->hw.aq.asq_last_status));
+ ret = -ENOENT;
+ goto err;
+ }
+ }
+
/* MFP mode setup queue map and update VSI */
if ((pf->flags & I40E_FLAG_MFP_ENABLED) &&
!(pf->hw.func_caps.iscsi)) { /* NIC type PF */
@@ -11039,6 +12246,7 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit)
static void i40e_determine_queue_usage(struct i40e_pf *pf)
{
int queues_left;
+ int q_max;
pf->num_lan_qps = 0;
@@ -11085,10 +12293,12 @@ static void i40e_determine_queue_usage(struct i40e_pf *pf)
I40E_FLAG_DCB_ENABLED);
dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
}
- pf->num_lan_qps = max_t(int, pf->rss_size_max,
- num_online_cpus());
- pf->num_lan_qps = min_t(int, pf->num_lan_qps,
- pf->hw.func_caps.num_tx_qp);
+
+ /* limit lan qps to the smaller of qps, cpus or msix */
+ q_max = max_t(int, pf->rss_size_max, num_online_cpus());
+ q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp);
+ q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors);
+ pf->num_lan_qps = q_max;
queues_left -= pf->num_lan_qps;
}
@@ -11304,6 +12514,13 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
hw->bus.bus_id = pdev->bus->number;
pf->instance = pfs_found;
+ /* Select something other than the 802.1ad ethertype for the
+ * switch to use internally and drop on ingress.
+ */
+ hw->switch_tag = 0xffff;
+ hw->first_tag = ETH_P_8021AD;
+ hw->second_tag = ETH_P_8021Q;
+
INIT_LIST_HEAD(&pf->l3_flex_pit_list);
INIT_LIST_HEAD(&pf->l4_flex_pit_list);
@@ -11380,11 +12597,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
i40e_nvm_version_str(hw));
if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
- hw->aq.api_min_ver > I40E_FW_API_VERSION_MINOR)
+ hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw))
dev_info(&pdev->dev,
"The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
- else if (hw->aq.api_maj_ver < I40E_FW_API_VERSION_MAJOR ||
- hw->aq.api_min_ver < (I40E_FW_API_VERSION_MINOR - 1))
+ else if (hw->aq.api_maj_ver == 1 && hw->aq.api_min_ver < 4)
dev_info(&pdev->dev,
"The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
@@ -11454,7 +12670,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
#endif /* CONFIG_I40E_DCB */
/* set up periodic task facility */
- setup_timer(&pf->service_timer, i40e_service_timer, (unsigned long)pf);
+ timer_setup(&pf->service_timer, i40e_service_timer, 0);
pf->service_timer_period = HZ;
INIT_WORK(&pf->service_task, i40e_service_task);
@@ -11506,6 +12722,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
goto err_vsis;
}
+ INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list);
/* Make sure flow control is set according to current settings */
err = i40e_set_fc(hw, &set_fc_aq_fail, true);
@@ -11777,7 +12994,7 @@ static void i40e_remove(struct pci_dev *pdev)
/* no more scheduling of any task */
set_bit(__I40E_SUSPENDED, pf->state);
set_bit(__I40E_DOWN, pf->state);
- if (pf->service_timer.data)
+ if (pf->service_timer.function)
del_timer_sync(&pf->service_timer);
if (pf->service_task.func)
cancel_work_sync(&pf->service_task);
@@ -11937,6 +13154,28 @@ static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev)
}
/**
+ * i40e_pci_error_reset_prepare - prepare device driver for pci reset
+ * @pdev: PCI device information struct
+ */
+static void i40e_pci_error_reset_prepare(struct pci_dev *pdev)
+{
+ struct i40e_pf *pf = pci_get_drvdata(pdev);
+
+ i40e_prep_for_reset(pf, false);
+}
+
+/**
+ * i40e_pci_error_reset_done - pci reset done, device driver reset can begin
+ * @pdev: PCI device information struct
+ */
+static void i40e_pci_error_reset_done(struct pci_dev *pdev)
+{
+ struct i40e_pf *pf = pci_get_drvdata(pdev);
+
+ i40e_reset_and_rebuild(pf, false, false);
+}
+
+/**
* i40e_pci_error_resume - restart operations after PCI error recovery
* @pdev: PCI device information struct
*
@@ -12048,18 +13287,25 @@ static void i40e_shutdown(struct pci_dev *pdev)
#ifdef CONFIG_PM
/**
- * i40e_suspend - PCI callback for moving to D3
- * @pdev: PCI device information struct
+ * i40e_suspend - PM callback for moving to D3
+ * @dev: generic device information structure
**/
-static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
+static int i40e_suspend(struct device *dev)
{
+ struct pci_dev *pdev = to_pci_dev(dev);
struct i40e_pf *pf = pci_get_drvdata(pdev);
struct i40e_hw *hw = &pf->hw;
- int retval = 0;
- set_bit(__I40E_SUSPENDED, pf->state);
+ /* If we're already suspended, then there is nothing to do */
+ if (test_and_set_bit(__I40E_SUSPENDED, pf->state))
+ return 0;
+
set_bit(__I40E_DOWN, pf->state);
+ /* Ensure service task will not be running */
+ del_timer_sync(&pf->service_timer);
+ cancel_work_sync(&pf->service_task);
+
if (pf->wol_en && (pf->hw_features & I40E_HW_WOL_MC_MAGIC_PKT_WAKE))
i40e_enable_mc_magic_wake(pf);
@@ -12068,81 +13314,74 @@ static int i40e_suspend(struct pci_dev *pdev, pm_message_t state)
wr32(hw, I40E_PFPM_APM, (pf->wol_en ? I40E_PFPM_APM_APME_MASK : 0));
wr32(hw, I40E_PFPM_WUFC, (pf->wol_en ? I40E_PFPM_WUFC_MAG_MASK : 0));
- i40e_stop_misc_vector(pf);
- if (pf->msix_entries) {
- synchronize_irq(pf->msix_entries[0].vector);
- free_irq(pf->msix_entries[0].vector, pf);
- }
- retval = pci_save_state(pdev);
- if (retval)
- return retval;
-
- pci_wake_from_d3(pdev, pf->wol_en);
- pci_set_power_state(pdev, PCI_D3hot);
+ /* Clear the interrupt scheme and release our IRQs so that the system
+ * can safely hibernate even when there are a large number of CPUs.
+ * Otherwise hibernation might fail when mapping all the vectors back
+ * to CPU0.
+ */
+ i40e_clear_interrupt_scheme(pf);
- return retval;
+ return 0;
}
/**
- * i40e_resume - PCI callback for waking up from D3
- * @pdev: PCI device information struct
+ * i40e_resume - PM callback for waking up from D3
+ * @dev: generic device information structure
**/
-static int i40e_resume(struct pci_dev *pdev)
+static int i40e_resume(struct device *dev)
{
+ struct pci_dev *pdev = to_pci_dev(dev);
struct i40e_pf *pf = pci_get_drvdata(pdev);
- u32 err;
+ int err;
- pci_set_power_state(pdev, PCI_D0);
- pci_restore_state(pdev);
- /* pci_restore_state() clears dev->state_saves, so
- * call pci_save_state() again to restore it.
- */
- pci_save_state(pdev);
+ /* If we're not suspended, then there is nothing to do */
+ if (!test_bit(__I40E_SUSPENDED, pf->state))
+ return 0;
- err = pci_enable_device_mem(pdev);
+ /* We cleared the interrupt scheme when we suspended, so we need to
+ * restore it now to resume device functionality.
+ */
+ err = i40e_restore_interrupt_scheme(pf);
if (err) {
- dev_err(&pdev->dev, "Cannot enable PCI device from suspend\n");
- return err;
+ dev_err(&pdev->dev, "Cannot restore interrupt scheme: %d\n",
+ err);
}
- pci_set_master(pdev);
- /* no wakeup events while running */
- pci_wake_from_d3(pdev, false);
-
- /* handling the reset will rebuild the device state */
- if (test_and_clear_bit(__I40E_SUSPENDED, pf->state)) {
- clear_bit(__I40E_DOWN, pf->state);
- if (pf->msix_entries) {
- err = request_irq(pf->msix_entries[0].vector,
- i40e_intr, 0, pf->int_name, pf);
- if (err) {
- dev_err(&pf->pdev->dev,
- "request_irq for %s failed: %d\n",
- pf->int_name, err);
- }
- }
- i40e_reset_and_rebuild(pf, false, false);
- }
+ clear_bit(__I40E_DOWN, pf->state);
+ i40e_reset_and_rebuild(pf, false, false);
+
+ /* Clear suspended state last after everything is recovered */
+ clear_bit(__I40E_SUSPENDED, pf->state);
+
+ /* Restart the service task */
+ mod_timer(&pf->service_timer,
+ round_jiffies(jiffies + pf->service_timer_period));
return 0;
}
-#endif
+#endif /* CONFIG_PM */
+
static const struct pci_error_handlers i40e_err_handler = {
.error_detected = i40e_pci_error_detected,
.slot_reset = i40e_pci_error_slot_reset,
+ .reset_prepare = i40e_pci_error_reset_prepare,
+ .reset_done = i40e_pci_error_reset_done,
.resume = i40e_pci_error_resume,
};
+static SIMPLE_DEV_PM_OPS(i40e_pm_ops, i40e_suspend, i40e_resume);
+
static struct pci_driver i40e_driver = {
.name = i40e_driver_name,
.id_table = i40e_pci_tbl,
.probe = i40e_probe,
.remove = i40e_remove,
#ifdef CONFIG_PM
- .suspend = i40e_suspend,
- .resume = i40e_resume,
-#endif
+ .driver = {
+ .pm = &i40e_pm_ops,
+ },
+#endif /* CONFIG_PM */
.shutdown = i40e_shutdown,
.err_handler = &i40e_err_handler,
.sriov_configure = i40e_pci_sriov_configure,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index d591b3e6bd7c..0ccab0a5d717 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -311,13 +311,10 @@ static i40e_status i40e_read_nvm_word_aq(struct i40e_hw *hw, u16 offset,
static i40e_status __i40e_read_nvm_word(struct i40e_hw *hw,
u16 offset, u16 *data)
{
- i40e_status ret_code = 0;
-
if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
- ret_code = i40e_read_nvm_word_aq(hw, offset, data);
- else
- ret_code = i40e_read_nvm_word_srctl(hw, offset, data);
- return ret_code;
+ return i40e_read_nvm_word_aq(hw, offset, data);
+
+ return i40e_read_nvm_word_srctl(hw, offset, data);
}
/**
@@ -331,7 +328,7 @@ static i40e_status __i40e_read_nvm_word(struct i40e_hw *hw,
i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
u16 *data)
{
- i40e_status ret_code = 0;
+ i40e_status ret_code;
ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
if (ret_code)
@@ -446,13 +443,10 @@ static i40e_status __i40e_read_nvm_buffer(struct i40e_hw *hw,
u16 offset, u16 *words,
u16 *data)
{
- i40e_status ret_code = 0;
-
if (hw->flags & I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE)
- ret_code = i40e_read_nvm_buffer_aq(hw, offset, words, data);
- else
- ret_code = i40e_read_nvm_buffer_srctl(hw, offset, words, data);
- return ret_code;
+ return i40e_read_nvm_buffer_aq(hw, offset, words, data);
+
+ return i40e_read_nvm_buffer_srctl(hw, offset, words, data);
}
/**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index a39b13197891..01502561035c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -360,6 +360,15 @@ i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
u32 reg_addr, u32 reg_val,
struct i40e_asq_cmd_details *cmd_details);
void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val);
+i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw,
+ u8 phy_select, u8 dev_addr,
+ u32 reg_addr, u32 reg_val,
+ struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw,
+ u8 phy_select, u8 dev_addr,
+ u32 reg_addr, u32 *reg_val,
+ struct i40e_asq_cmd_details *cmd_details);
+
i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw,
u16 reg, u8 phy_addr, u16 *value);
i40e_status i40e_write_phy_register_clause22(struct i40e_hw *hw,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index 86ca27f72f02..c234758dad15 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -2794,7 +2794,7 @@
#define I40E_GLV_RUPP_MAX_INDEX 383
#define I40E_GLV_RUPP_RUPP_SHIFT 0
#define I40E_GLV_RUPP_RUPP_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_RUPP_RUPP_SHIFT)
-#define I40E_GLV_TEPC(_VSI) (0x00344000 + ((_VSI) * 4)) /* _i=0...383 */ /* Reset: CORER */
+#define I40E_GLV_TEPC(_i) (0x00344000 + ((_i) * 8)) /* _i=0...383 */ /* Reset: CORER */
#define I40E_GLV_TEPC_MAX_INDEX 383
#define I40E_GLV_TEPC_TEPC_SHIFT 0
#define I40E_GLV_TEPC_TEPC_MASK I40E_MASK(0xFFFFFFFF, I40E_GLV_TEPC_TEPC_SHIFT)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 2756131495f0..edbc94c4353d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -960,14 +960,14 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
{
enum i40e_latency_range new_latency_range = rc->latency_range;
u32 new_itr = rc->itr;
- int bytes_per_int;
+ int bytes_per_usec;
unsigned int usecs, estimated_usecs;
if (rc->total_packets == 0 || !rc->itr)
return false;
usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
- bytes_per_int = rc->total_bytes / usecs;
+ bytes_per_usec = rc->total_bytes / usecs;
/* The calculations in this algorithm depend on interrupts actually
* firing at the ITR rate. This may not happen if the packet rate is
@@ -993,18 +993,18 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
*/
switch (new_latency_range) {
case I40E_LOWEST_LATENCY:
- if (bytes_per_int > 10)
+ if (bytes_per_usec > 10)
new_latency_range = I40E_LOW_LATENCY;
break;
case I40E_LOW_LATENCY:
- if (bytes_per_int > 20)
+ if (bytes_per_usec > 20)
new_latency_range = I40E_BULK_LATENCY;
- else if (bytes_per_int <= 10)
+ else if (bytes_per_usec <= 10)
new_latency_range = I40E_LOWEST_LATENCY;
break;
case I40E_BULK_LATENCY:
default:
- if (bytes_per_int <= 20)
+ if (bytes_per_usec <= 20)
new_latency_range = I40E_LOW_LATENCY;
break;
}
@@ -1407,6 +1407,15 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
union i40e_rx_desc *rx_desc;
struct i40e_rx_buffer *bi;
+ /* Hardware only fetches new descriptors in cache lines of 8,
+ * essentially ignoring the lower 3 bits of the tail register. We want
+ * to ensure our tail writes are aligned to avoid unnecessary work. We
+ * can't simply round down the cleaned count, since we might fail to
+ * allocate some buffers. What we really want is to ensure that
+ * next_to_used + cleaned_count produces an aligned value.
+ */
+ cleaned_count -= (ntu + cleaned_count) & 0x7;
+
/* do nothing if no valid netdev defined */
if (!rx_ring->netdev || !cleaned_count)
return false;
@@ -2116,6 +2125,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
if (!skb) {
xdp.data = page_address(rx_buffer->page) +
rx_buffer->page_offset;
+ xdp_set_data_meta_invalid(&xdp);
xdp.data_hard_start = xdp.data -
i40e_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;
@@ -2210,9 +2220,7 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
u32 val;
val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- /* Don't clear PBA because that can cause lost interrupts that
- * came in while we were cleaning/polling
- */
+ I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
(type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
(itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
@@ -2249,7 +2257,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
/* If we don't have MSIX, then we only need to re-enable icr0 */
if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED)) {
- i40e_irq_dynamic_enable_icr0(vsi->back, false);
+ i40e_irq_dynamic_enable_icr0(vsi->back);
return;
}
@@ -3175,38 +3183,12 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
/* write last descriptor with EOP bit */
td_cmd |= I40E_TX_DESC_CMD_EOP;
- /* We can OR these values together as they both are checked against
- * 4 below and at this point desc_count will be used as a boolean value
- * after this if/else block.
+ /* We OR these values together to check both against 4 (WB_STRIDE)
+ * below. This is safe since we don't re-use desc_count afterwards.
*/
desc_count |= ++tx_ring->packet_stride;
- /* Algorithm to optimize tail and RS bit setting:
- * if queue is stopped
- * mark RS bit
- * reset packet counter
- * else if xmit_more is supported and is true
- * advance packet counter to 4
- * reset desc_count to 0
- *
- * if desc_count >= 4
- * mark RS bit
- * reset packet counter
- * if desc_count > 0
- * update tail
- *
- * Note: If there are less than 4 descriptors
- * pending and interrupts were disabled the service task will
- * trigger a force WB.
- */
- if (netif_xmit_stopped(txring_txq(tx_ring))) {
- goto do_rs;
- } else if (skb->xmit_more) {
- /* set stride to arm on next packet and reset desc_count */
- tx_ring->packet_stride = WB_STRIDE;
- desc_count = 0;
- } else if (desc_count >= WB_STRIDE) {
-do_rs:
+ if (desc_count >= WB_STRIDE) {
/* write last descriptor with RS bit set */
td_cmd |= I40E_TX_DESC_CMD_RS;
tx_ring->packet_stride = 0;
@@ -3227,7 +3209,7 @@ do_rs:
first->next_to_watch = tx_desc;
/* notify HW of packet */
- if (desc_count) {
+ if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
writel(i, tx_ring->tail);
/* we need this if more than one processor can write to our tail
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 2f848bc5e391..fbae1182e2ea 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -38,8 +38,10 @@
#define I40E_ITR_8K 0x003E
#define I40E_ITR_4K 0x007A
#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF I40E_ITR_20K
-#define I40E_ITR_TX_DEF I40E_ITR_20K
+#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
+ I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
+ I40E_ITR_DYNAMIC)
#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */
#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */
@@ -206,7 +208,7 @@ static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc,
}
/* How many Rx Buffers do we bundle into one write to the hardware ? */
-#define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */
+#define I40E_RX_BUFFER_WRITE 32 /* Must be power of 2 */
#define I40E_RX_INCREMENT(r, i) \
do { \
(i)++; \
@@ -342,6 +344,7 @@ struct i40e_rx_queue_stats {
enum i40e_ring_state_t {
__I40E_TX_FDIR_INIT_DONE,
__I40E_TX_XPS_INIT_DONE,
+ __I40E_RING_STATE_NBITS /* must be last */
};
/* some useful defines for virtchannel interface, which
@@ -366,7 +369,7 @@ struct i40e_ring {
struct i40e_tx_buffer *tx_bi;
struct i40e_rx_buffer *rx_bi;
};
- unsigned long state;
+ DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS);
u16 queue_index; /* Queue number of ring */
u8 dcb_tc; /* Traffic class of ring */
u8 __iomem *tail;
@@ -423,6 +426,8 @@ struct i40e_ring {
* i40e_clean_rx_ring_irq() is called
* for this ring.
*/
+
+ struct i40e_channel *ch;
} ____cacheline_internodealigned_in_smp;
static inline bool ring_uses_build_skb(struct i40e_ring *ring)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index fd4bbdd88b57..17a99b53acd9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -46,6 +46,9 @@
/* Max default timeout in ms, */
#define I40E_MAX_NVM_TIMEOUT 18000
+/* Max timeout in ms for the phy to respond */
+#define I40E_MAX_PHY_TIMEOUT 500
+
/* Switch from ms to the 1usec global time (this is the GTIME resolution) */
#define I40E_MS_TO_GTIME(time) ((time) * 1000)
@@ -268,6 +271,10 @@ struct i40e_phy_info {
I40E_PHY_TYPE_OFFSET)
#define I40E_CAP_PHY_TYPE_25GBASE_LR BIT_ULL(I40E_PHY_TYPE_25GBASE_LR + \
I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_AOC BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC + \
+ I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_ACC BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC + \
+ I40E_PHY_TYPE_OFFSET)
#define I40E_HW_CAP_MAX_GPIO 30
/* Capabilities of a PF or a VF or the whole device */
struct i40e_hw_capabilities {
@@ -428,6 +435,18 @@ struct i40e_nvm_access {
u8 data[1];
};
+/* (Q)SFP module access definitions */
+#define I40E_I2C_EEPROM_DEV_ADDR 0xA0
+#define I40E_I2C_EEPROM_DEV_ADDR2 0xA2
+#define I40E_MODULE_TYPE_ADDR 0x00
+#define I40E_MODULE_REVISION_ADDR 0x01
+#define I40E_MODULE_SFF_8472_COMP 0x5E
+#define I40E_MODULE_SFF_8472_SWAP 0x5C
+#define I40E_MODULE_SFF_ADDR_MODE 0x04
+#define I40E_MODULE_TYPE_QSFP_PLUS 0x0D
+#define I40E_MODULE_TYPE_QSFP28 0x11
+#define I40E_MODULE_QSFP_MAX_LEN 640
+
/* PCI bus types */
enum i40e_bus_type {
i40e_bus_type_unknown = 0,
@@ -598,8 +617,15 @@ struct i40e_hw {
struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */
#define I40E_HW_FLAG_AQ_SRCTL_ACCESS_ENABLE BIT_ULL(0)
+#define I40E_HW_FLAG_802_1AD_CAPABLE BIT_ULL(1)
+#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2)
u64 flags;
+ /* Used in set switch config AQ command */
+ u16 switch_tag;
+ u16 first_tag;
+ u16 second_tag;
+
/* debug mask */
u32 debug_mask;
char err_str[16];
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 4d1e670f490e..f8a794b72462 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -154,15 +154,30 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
/**
* i40e_vc_disable_vf
- * @pf: pointer to the PF info
* @vf: pointer to the VF info
*
- * Disable the VF through a SW reset
+ * Disable the VF through a SW reset.
**/
-static inline void i40e_vc_disable_vf(struct i40e_pf *pf, struct i40e_vf *vf)
+static inline void i40e_vc_disable_vf(struct i40e_vf *vf)
{
+ int i;
+
i40e_vc_notify_vf_reset(vf);
- i40e_reset_vf(vf, false);
+
+ /* We want to ensure that an actual reset occurs initiated after this
+ * function was called. However, we do not want to wait forever, so
+ * we'll give a reasonable time and print a message if we failed to
+ * ensure a reset.
+ */
+ for (i = 0; i < 20; i++) {
+ if (i40e_reset_vf(vf, false))
+ return;
+ usleep_range(10000, 20000);
+ }
+
+ dev_warn(&vf->pf->pdev->dev,
+ "Failed to initiate reset for VF %d after 200 milliseconds\n",
+ vf->vf_id);
}
/**
@@ -258,7 +273,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
struct i40e_hw *hw = &pf->hw;
u16 vsi_queue_id, pf_queue_id;
enum i40e_queue_type qtype;
- u16 next_q, vector_id;
+ u16 next_q, vector_id, size;
u32 reg, reg_idx;
u16 itr_idx = 0;
@@ -288,9 +303,11 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
vsi_queue_id + 1));
}
- next_q = find_first_bit(&linklistmap,
- (I40E_MAX_VSI_QP *
- I40E_VIRTCHNL_SUPPORTED_QTYPES));
+ size = I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES;
+ next_q = find_first_bit(&linklistmap, size);
+ if (unlikely(next_q == size))
+ goto irq_list_done;
+
vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id);
@@ -298,7 +315,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
wr32(hw, reg_idx, reg);
- while (next_q < (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)) {
+ while (next_q < size) {
switch (qtype) {
case I40E_QUEUE_TYPE_RX:
reg_idx = I40E_QINT_RQCTL(pf_queue_id);
@@ -312,12 +329,8 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id,
break;
}
- next_q = find_next_bit(&linklistmap,
- (I40E_MAX_VSI_QP *
- I40E_VIRTCHNL_SUPPORTED_QTYPES),
- next_q + 1);
- if (next_q <
- (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)) {
+ next_q = find_next_bit(&linklistmap, size, next_q + 1);
+ if (next_q < size) {
vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id,
@@ -423,6 +436,9 @@ static int i40e_config_iwarp_qvlist(struct i40e_vf *vf,
(sizeof(struct virtchnl_iwarp_qv_info) *
(qvlist_info->num_vectors - 1));
vf->qvlist_info = kzalloc(size, GFP_KERNEL);
+ if (!vf->qvlist_info)
+ return -ENOMEM;
+
vf->qvlist_info->num_vectors = qvlist_info->num_vectors;
msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
@@ -621,7 +637,7 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
rx_ctx.dsize = 1;
/* default values */
- rx_ctx.lrxqthresh = 2;
+ rx_ctx.lrxqthresh = 1;
rx_ctx.crcstrip = 1;
rx_ctx.prefena = 1;
rx_ctx.l2tsel = 1;
@@ -815,6 +831,14 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
*/
clear_bit(I40E_VF_STATE_INIT, &vf->vf_states);
+ /* It's possible the VF had requeuested more queues than the default so
+ * do the accounting here when we're about to free them.
+ */
+ if (vf->num_queue_pairs > I40E_DEFAULT_QUEUES_PER_VF) {
+ pf->queues_left += vf->num_queue_pairs -
+ I40E_DEFAULT_QUEUES_PER_VF;
+ }
+
/* free vsi & disconnect it from the parent uplink */
if (vf->lan_vsi_idx) {
i40e_vsi_release(pf->vsi[vf->lan_vsi_idx]);
@@ -853,7 +877,8 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
}
/* reset some of the state variables keeping track of the resources */
vf->num_queue_pairs = 0;
- vf->vf_states = 0;
+ clear_bit(I40E_VF_STATE_MC_PROMISC, &vf->vf_states);
+ clear_bit(I40E_VF_STATE_UC_PROMISC, &vf->vf_states);
}
/**
@@ -868,12 +893,27 @@ static int i40e_alloc_vf_res(struct i40e_vf *vf)
int total_queue_pairs = 0;
int ret;
+ if (vf->num_req_queues &&
+ vf->num_req_queues <= pf->queues_left + I40E_DEFAULT_QUEUES_PER_VF)
+ pf->num_vf_qps = vf->num_req_queues;
+ else
+ pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
+
/* allocate hw vsi context & associated resources */
ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV);
if (ret)
goto error_alloc;
total_queue_pairs += pf->vsi[vf->lan_vsi_idx]->alloc_queue_pairs;
+ /* We account for each VF to get a default number of queue pairs. If
+ * the VF has now requested more, we need to account for that to make
+ * certain we never request more queues than we actually have left in
+ * HW.
+ */
+ if (total_queue_pairs > I40E_DEFAULT_QUEUES_PER_VF)
+ pf->queues_left -=
+ total_queue_pairs - I40E_DEFAULT_QUEUES_PER_VF;
+
if (vf->trusted)
set_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
else
@@ -1008,8 +1048,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf)
set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states);
clear_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
/* Do not notify the client during VF init */
- if (test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE,
- &vf->vf_states))
+ if (!test_and_clear_bit(I40E_VF_STATE_PRE_ENABLE,
+ &vf->vf_states))
i40e_notify_client_of_vf_reset(pf, abs_vf_id);
vf->num_vlan = 0;
}
@@ -1026,9 +1066,9 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf)
* @vf: pointer to the VF structure
* @flr: VFLR was issued or not
*
- * reset the VF
+ * Returns true if the VF is reset, false otherwise.
**/
-void i40e_reset_vf(struct i40e_vf *vf, bool flr)
+bool i40e_reset_vf(struct i40e_vf *vf, bool flr)
{
struct i40e_pf *pf = vf->pf;
struct i40e_hw *hw = &pf->hw;
@@ -1036,9 +1076,11 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
u32 reg;
int i;
- /* If VFs have been disabled, there is no need to reset */
+ /* If the VFs have been disabled, this means something else is
+ * resetting the VF, so we shouldn't continue.
+ */
if (test_and_set_bit(__I40E_VF_DISABLE, pf->state))
- return;
+ return false;
i40e_trigger_vf_reset(vf, flr);
@@ -1075,6 +1117,8 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
i40e_flush(hw);
clear_bit(__I40E_VF_DISABLE, pf->state);
+
+ return true;
}
/**
@@ -1086,8 +1130,10 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
* VF, then do all the waiting in one chunk, and finally finish restoring each
* VF after the wait. This is useful during PF routines which need to reset
* all VFs, as otherwise it must perform these resets in a serialized fashion.
+ *
+ * Returns true if any VFs were reset, and false otherwise.
**/
-void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
+bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
{
struct i40e_hw *hw = &pf->hw;
struct i40e_vf *vf;
@@ -1096,11 +1142,11 @@ void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
/* If we don't have any VFs, then there is nothing to reset */
if (!pf->num_alloc_vfs)
- return;
+ return false;
/* If VFs have been disabled, there is no need to reset */
if (test_and_set_bit(__I40E_VF_DISABLE, pf->state))
- return;
+ return false;
/* Begin reset on all VFs at once */
for (v = 0; v < pf->num_alloc_vfs; v++)
@@ -1175,6 +1221,8 @@ void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
i40e_flush(hw);
clear_bit(__I40E_VF_DISABLE, pf->state);
+
+ return true;
}
/**
@@ -1308,7 +1356,7 @@ err_alloc:
i40e_free_vfs(pf);
err_iov:
/* Re-enable interrupt 0. */
- i40e_irq_dynamic_enable_icr0(pf, false);
+ i40e_irq_dynamic_enable_icr0(pf);
return ret;
}
@@ -1377,8 +1425,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (num_vfs) {
if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
- i40e_do_reset_safe(pf,
- BIT_ULL(__I40E_PF_RESET_REQUESTED));
+ i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
}
return i40e_pci_sriov_enable(pdev, num_vfs);
}
@@ -1386,7 +1433,7 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
if (!pci_vfs_assigned(pf->pdev)) {
i40e_free_vfs(pf);
pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
- i40e_do_reset_safe(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED));
+ i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
} else {
dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n");
return -EINVAL;
@@ -1537,6 +1584,8 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_IWARP)) {
vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_IWARP;
set_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states);
+ } else {
+ clear_bit(I40E_VF_STATE_IWARPENA, &vf->vf_states);
}
if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
@@ -1579,6 +1628,9 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg)
VIRTCHNL_VF_OFFLOAD_WB_ON_ITR;
}
+ if (vf->driver_caps & VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)
+ vfres->vf_cap_flags |= VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
+
vfres->num_vsis = num_vsis;
vfres->num_queue_pairs = vf->num_queue_pairs;
vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
@@ -1987,6 +2039,57 @@ error_param:
}
/**
+ * i40e_vc_request_queues_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * VFs get a default number of queues but can use this message to request a
+ * different number. If the request is successful, PF will reset the VF and
+ * return 0. If unsuccessful, PF will send message informing VF of number of
+ * available queues and return result of sending VF a message.
+ **/
+static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg, int msglen)
+{
+ struct virtchnl_vf_res_request *vfres =
+ (struct virtchnl_vf_res_request *)msg;
+ int req_pairs = vfres->num_queue_pairs;
+ int cur_pairs = vf->num_queue_pairs;
+ struct i40e_pf *pf = vf->pf;
+
+ if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states))
+ return -EINVAL;
+
+ if (req_pairs <= 0) {
+ dev_err(&pf->pdev->dev,
+ "VF %d tried to request %d queues. Ignoring.\n",
+ vf->vf_id, req_pairs);
+ } else if (req_pairs > I40E_MAX_VF_QUEUES) {
+ dev_err(&pf->pdev->dev,
+ "VF %d tried to request more than %d queues.\n",
+ vf->vf_id,
+ I40E_MAX_VF_QUEUES);
+ vfres->num_queue_pairs = I40E_MAX_VF_QUEUES;
+ } else if (req_pairs - cur_pairs > pf->queues_left) {
+ dev_warn(&pf->pdev->dev,
+ "VF %d requested %d more queues, but only %d left.\n",
+ vf->vf_id,
+ req_pairs - cur_pairs,
+ pf->queues_left);
+ vfres->num_queue_pairs = pf->queues_left + cur_pairs;
+ } else {
+ /* successful request */
+ vf->num_req_queues = req_pairs;
+ i40e_vc_notify_vf_reset(vf);
+ i40e_reset_vf(vf, false);
+ return 0;
+ }
+
+ return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, 0,
+ (u8 *)vfres, sizeof(vfres));
+}
+
+/**
* i40e_vc_get_stats_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
@@ -2708,6 +2811,9 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
ret = i40e_vc_disable_vlan_stripping(vf, msg, msglen);
break;
+ case VIRTCHNL_OP_REQUEST_QUEUES:
+ ret = i40e_vc_request_queues_msg(vf, msg, msglen);
+ break;
case VIRTCHNL_OP_UNKNOWN:
default:
@@ -2779,6 +2885,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
struct i40e_mac_filter *f;
struct i40e_vf *vf;
int ret = 0;
+ struct hlist_node *h;
int bkt;
/* validate the request */
@@ -2817,7 +2924,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
/* Delete all the filters for this VSI - we're going to kill it
* anyway.
*/
- hash_for_each(vsi->mac_filter_hash, bkt, f, hlist)
+ hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
__i40e_del_filter(vsi, f);
spin_unlock_bh(&vsi->mac_filter_hash_lock);
@@ -2840,7 +2947,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
}
/* Force the VF driver stop so it has to reload with new MAC address */
- i40e_vc_disable_vf(pf, vf);
+ i40e_vc_disable_vf(vf);
dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n");
error_param:
@@ -2848,6 +2955,34 @@ error_param:
}
/**
+ * i40e_vsi_has_vlans - True if VSI has configured VLANs
+ * @vsi: pointer to the vsi
+ *
+ * Check if a VSI has configured any VLANs. False if we have a port VLAN or if
+ * we have no configured VLANs. Do not call while holding the
+ * mac_filter_hash_lock.
+ */
+static bool i40e_vsi_has_vlans(struct i40e_vsi *vsi)
+{
+ bool have_vlans;
+
+ /* If we have a port VLAN, then the VSI cannot have any VLANs
+ * configured, as all MAC/VLAN filters will be assigned to the PVID.
+ */
+ if (vsi->info.pvid)
+ return false;
+
+ /* Since we don't have a PVID, we know that if the device is in VLAN
+ * mode it must be because of a VLAN filter configured on this VSI.
+ */
+ spin_lock_bh(&vsi->mac_filter_hash_lock);
+ have_vlans = i40e_is_vsi_in_vlan(vsi);
+ spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+ return have_vlans;
+}
+
+/**
* i40e_ndo_set_vf_port_vlan
* @netdev: network interface device structure
* @vf_id: VF identifier
@@ -2899,10 +3034,7 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
/* duplicate request, so just return success */
goto error_pvid;
- /* Locked once because multiple functions below iterate list */
- spin_lock_bh(&vsi->mac_filter_hash_lock);
-
- if (le16_to_cpu(vsi->info.pvid) == 0 && i40e_is_vsi_in_vlan(vsi)) {
+ if (i40e_vsi_has_vlans(vsi)) {
dev_err(&pf->pdev->dev,
"VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n",
vf_id);
@@ -2910,11 +3042,14 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
* the right thing by reconfiguring his network correctly
* and then reloading the VF driver.
*/
- i40e_vc_disable_vf(pf, vf);
+ i40e_vc_disable_vf(vf);
/* During reset the VF got a new VSI, so refresh the pointer. */
vsi = pf->vsi[vf->lan_vsi_idx];
}
+ /* Locked once because multiple functions below iterate list */
+ spin_lock_bh(&vsi->mac_filter_hash_lock);
+
/* Check for condition where there was already a port VLAN ID
* filter set and now it is being deleted by setting it to zero.
* Additionally check for the condition where there was a port
@@ -2987,8 +3122,6 @@ error_pvid:
return ret;
}
-#define I40E_BW_CREDIT_DIVISOR 50 /* 50Mbps per BW credit */
-#define I40E_MAX_BW_INACTIVE_ACCUM 4 /* device can accumulate 4 credits max */
/**
* i40e_ndo_set_vf_bw
* @netdev: network interface device structure
@@ -3004,7 +3137,6 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
struct i40e_pf *pf = np->vsi->back;
struct i40e_vsi *vsi;
struct i40e_vf *vf;
- int speed = 0;
int ret = 0;
/* validate the request */
@@ -3029,48 +3161,10 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
goto error;
}
- switch (pf->hw.phy.link_info.link_speed) {
- case I40E_LINK_SPEED_40GB:
- speed = 40000;
- break;
- case I40E_LINK_SPEED_25GB:
- speed = 25000;
- break;
- case I40E_LINK_SPEED_20GB:
- speed = 20000;
- break;
- case I40E_LINK_SPEED_10GB:
- speed = 10000;
- break;
- case I40E_LINK_SPEED_1GB:
- speed = 1000;
- break;
- default:
- break;
- }
-
- if (max_tx_rate > speed) {
- dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for VF %d.\n",
- max_tx_rate, vf->vf_id);
- ret = -EINVAL;
+ ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate);
+ if (ret)
goto error;
- }
-
- if ((max_tx_rate < 50) && (max_tx_rate > 0)) {
- dev_warn(&pf->pdev->dev, "Setting max Tx rate to minimum usable value of 50Mbps.\n");
- max_tx_rate = 50;
- }
- /* Tx rate credits are in values of 50Mbps, 0 is disabled*/
- ret = i40e_aq_config_vsi_bw_limit(&pf->hw, vsi->seid,
- max_tx_rate / I40E_BW_CREDIT_DIVISOR,
- I40E_MAX_BW_INACTIVE_ACCUM, NULL);
- if (ret) {
- dev_err(&pf->pdev->dev, "Unable to set max tx rate, error code %d.\n",
- ret);
- ret = -EIO;
- goto error;
- }
vf->tx_rate = max_tx_rate;
error:
return ret;
@@ -3279,14 +3373,11 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting)
vf = &pf->vf[vf_id];
- if (!vf)
- return -EINVAL;
if (setting == vf->trusted)
goto out;
vf->trusted = setting;
- i40e_vc_notify_vf_reset(vf);
- i40e_reset_vf(vf, false);
+ i40e_vc_disable_vf(vf);
dev_info(&pf->pdev->dev, "VF %u is now %strusted\n",
vf_id, setting ? "" : "un");
out:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
index 1f4b0c504368..5efc4f92bb37 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
@@ -56,7 +56,6 @@ enum i40e_vf_states {
I40E_VF_STATE_INIT = 0,
I40E_VF_STATE_ACTIVE,
I40E_VF_STATE_IWARPENA,
- I40E_VF_STATE_FCOEENA,
I40E_VF_STATE_DISABLED,
I40E_VF_STATE_MC_PROMISC,
I40E_VF_STATE_UC_PROMISC,
@@ -97,6 +96,7 @@ struct i40e_vf {
u16 lan_vsi_id; /* ID as used by firmware */
u8 num_queue_pairs; /* num of qps assigned to VF vsis */
+ u8 num_req_queues; /* num of requested qps */
u64 num_mdd_events; /* num of mdd events detected */
/* num of continuous malformed or invalid msgs detected */
u64 num_invalid_msgs;
@@ -121,8 +121,8 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs);
int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
u32 v_retval, u8 *msg, u16 msglen);
int i40e_vc_process_vflr_event(struct i40e_pf *pf);
-void i40e_reset_vf(struct i40e_vf *vf, bool flr);
-void i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
+bool i40e_reset_vf(struct i40e_vf *vf, bool flr);
+bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
/* VF configuration related iplink handlers */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index 83e63e55c4b4..463e331a70a9 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -34,7 +34,15 @@
*/
#define I40E_FW_API_VERSION_MAJOR 0x0001
-#define I40E_FW_API_VERSION_MINOR 0x0005
+#define I40E_FW_API_VERSION_MINOR_X722 0x0005
+#define I40E_FW_API_VERSION_MINOR_X710 0x0007
+
+#define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \
+ I40E_FW_API_VERSION_MINOR_X710 : \
+ I40E_FW_API_VERSION_MINOR_X722)
+
+/* API version 1.7 implements additional link and PHY-specific APIs */
+#define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007
struct i40e_aq_desc {
__le16 flags;
@@ -236,6 +244,8 @@ enum i40e_admin_queue_opc {
i40e_aqc_opc_set_phy_debug = 0x0622,
i40e_aqc_opc_upload_ext_phy_fm = 0x0625,
i40e_aqc_opc_run_phy_activity = 0x0626,
+ i40e_aqc_opc_set_phy_register = 0x0628,
+ i40e_aqc_opc_get_phy_register = 0x0629,
/* NVM commands */
i40e_aqc_opc_nvm_read = 0x0701,
@@ -761,7 +771,22 @@ struct i40e_aqc_set_switch_config {
#define I40E_AQ_SET_SWITCH_CFG_PROMISC 0x0001
#define I40E_AQ_SET_SWITCH_CFG_L2_FILTER 0x0002
__le16 valid_flags;
- u8 reserved[12];
+ /* The ethertype in switch_tag is dropped on ingress and used
+ * internally by the switch. Set this to zero for the default
+ * of 0x88a8 (802.1ad). Should be zero for firmware API
+ * versions lower than 1.7.
+ */
+ __le16 switch_tag;
+ /* The ethertypes in first_tag and second_tag are used to
+ * match the outer and inner VLAN tags (respectively) when HW
+ * double VLAN tagging is enabled via the set port parameters
+ * AQ command. Otherwise these are both ignored. Set them to
+ * zero for their defaults of 0x8100 (802.1Q). Should be zero
+ * for firmware API versions lower than 1.7.
+ */
+ __le16 first_tag;
+ __le16 second_tag;
+ u8 reserved[6];
};
I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config);
@@ -1722,6 +1747,8 @@ enum i40e_aq_phy_type {
I40E_PHY_TYPE_10GBASE_CR1_CU = 0xB,
I40E_PHY_TYPE_10GBASE_AOC = 0xC,
I40E_PHY_TYPE_40GBASE_AOC = 0xD,
+ I40E_PHY_TYPE_UNRECOGNIZED = 0xE,
+ I40E_PHY_TYPE_UNSUPPORTED = 0xF,
I40E_PHY_TYPE_100BASE_TX = 0x11,
I40E_PHY_TYPE_1000BASE_T = 0x12,
I40E_PHY_TYPE_10GBASE_T = 0x13,
@@ -1740,7 +1767,12 @@ enum i40e_aq_phy_type {
I40E_PHY_TYPE_25GBASE_CR = 0x20,
I40E_PHY_TYPE_25GBASE_SR = 0x21,
I40E_PHY_TYPE_25GBASE_LR = 0x22,
- I40E_PHY_TYPE_MAX
+ I40E_PHY_TYPE_25GBASE_AOC = 0x23,
+ I40E_PHY_TYPE_25GBASE_ACC = 0x24,
+ I40E_PHY_TYPE_MAX,
+ I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP = 0xFD,
+ I40E_PHY_TYPE_EMPTY = 0xFE,
+ I40E_PHY_TYPE_DEFAULT = 0xFF,
};
#define I40E_LINK_SPEED_100MB_SHIFT 0x1
@@ -1797,6 +1829,8 @@ struct i40e_aq_get_phy_abilities_resp {
#define I40E_AQ_PHY_TYPE_EXT_25G_CR 0X02
#define I40E_AQ_PHY_TYPE_EXT_25G_SR 0x04
#define I40E_AQ_PHY_TYPE_EXT_25G_LR 0x08
+#define I40E_AQ_PHY_TYPE_EXT_25G_AOC 0x10
+#define I40E_AQ_PHY_TYPE_EXT_25G_ACC 0x20
u8 fec_cfg_curr_mod_ext_info;
#define I40E_AQ_ENABLE_FEC_KR 0x01
#define I40E_AQ_ENABLE_FEC_RS 0x02
@@ -1930,19 +1964,31 @@ struct i40e_aqc_get_link_status {
#define I40E_AQ_25G_SERDES_UCODE_ERR 0X04
#define I40E_AQ_25G_NIMB_UCODE_ERR 0X05
u8 loopback; /* use defines from i40e_aqc_set_lb_mode */
+/* Since firmware API 1.7 loopback field keeps power class info as well */
+#define I40E_AQ_LOOPBACK_MASK 0x07
+#define I40E_AQ_PWR_CLASS_SHIFT_LB 6
+#define I40E_AQ_PWR_CLASS_MASK_LB (0x03 << I40E_AQ_PWR_CLASS_SHIFT_LB)
__le16 max_frame_size;
u8 config;
#define I40E_AQ_CONFIG_FEC_KR_ENA 0x01
#define I40E_AQ_CONFIG_FEC_RS_ENA 0x02
#define I40E_AQ_CONFIG_CRC_ENA 0x04
#define I40E_AQ_CONFIG_PACING_MASK 0x78
- u8 power_desc;
+ union {
+ struct {
+ u8 power_desc;
#define I40E_AQ_LINK_POWER_CLASS_1 0x00
#define I40E_AQ_LINK_POWER_CLASS_2 0x01
#define I40E_AQ_LINK_POWER_CLASS_3 0x02
#define I40E_AQ_LINK_POWER_CLASS_4 0x03
#define I40E_AQ_PWR_CLASS_MASK 0x03
- u8 reserved[4];
+ u8 reserved[4];
+ };
+ struct {
+ u8 link_type[4];
+ u8 link_type_ext;
+ };
+ };
};
I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status);
@@ -2022,6 +2068,22 @@ struct i40e_aqc_run_phy_activity {
I40E_CHECK_CMD_LENGTH(i40e_aqc_run_phy_activity);
+/* Set PHY Register command (0x0628) */
+/* Get PHY Register command (0x0629) */
+struct i40e_aqc_phy_register_access {
+ u8 phy_interface;
+#define I40E_AQ_PHY_REG_ACCESS_INTERNAL 0
+#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL 1
+#define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE 2
+ u8 dev_address;
+ u8 reserved1[2];
+ __le32 reg_address;
+ __le32 reg_value;
+ u8 reserved2[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access);
+
/* NVM Read command (indirect 0x0701)
* NVM Erase commands (direct 0x0702)
* NVM Update commands (indirect 0x0703)
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 8d3a2bfe186a..7d70bf69b249 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -1042,6 +1042,75 @@ do_retry:
}
/**
+ * i40evf_aq_set_phy_register
+ * @hw: pointer to the hw struct
+ * @phy_select: select which phy should be accessed
+ * @dev_addr: PHY device address
+ * @reg_addr: PHY register address
+ * @reg_val: new register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Reset the external PHY.
+ **/
+i40e_status i40evf_aq_set_phy_register(struct i40e_hw *hw,
+ u8 phy_select, u8 dev_addr,
+ u32 reg_addr, u32 reg_val,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_phy_register_access *cmd =
+ (struct i40e_aqc_phy_register_access *)&desc.params.raw;
+ i40e_status status;
+
+ i40evf_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_set_phy_register);
+
+ cmd->phy_interface = phy_select;
+ cmd->dev_address = dev_addr;
+ cmd->reg_address = cpu_to_le32(reg_addr);
+ cmd->reg_value = cpu_to_le32(reg_val);
+
+ status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+ return status;
+}
+
+/**
+ * i40evf_aq_get_phy_register
+ * @hw: pointer to the hw struct
+ * @phy_select: select which phy should be accessed
+ * @dev_addr: PHY device address
+ * @reg_addr: PHY register address
+ * @reg_val: read register value
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Reset the external PHY.
+ **/
+i40e_status i40evf_aq_get_phy_register(struct i40e_hw *hw,
+ u8 phy_select, u8 dev_addr,
+ u32 reg_addr, u32 *reg_val,
+ struct i40e_asq_cmd_details *cmd_details)
+{
+ struct i40e_aq_desc desc;
+ struct i40e_aqc_phy_register_access *cmd =
+ (struct i40e_aqc_phy_register_access *)&desc.params.raw;
+ i40e_status status;
+
+ i40evf_fill_default_direct_cmd_desc(&desc,
+ i40e_aqc_opc_get_phy_register);
+
+ cmd->phy_interface = phy_select;
+ cmd->dev_address = dev_addr;
+ cmd->reg_address = cpu_to_le32(reg_addr);
+
+ status = i40evf_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+ if (!status)
+ *reg_val = le32_to_cpu(cmd->reg_value);
+
+ return status;
+}
+
+/**
* i40e_aq_send_msg_to_pf
* @hw: pointer to the hardware structure
* @v_opcode: opcodes for VF-PF communication
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
index c9836bba487d..b624b5994075 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
@@ -111,6 +111,15 @@ i40e_status i40evf_aq_rx_ctl_write_register(struct i40e_hw *hw,
u32 reg_addr, u32 reg_val,
struct i40e_asq_cmd_details *cmd_details);
void i40evf_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val);
+i40e_status i40e_aq_set_phy_register(struct i40e_hw *hw,
+ u8 phy_select, u8 dev_addr,
+ u32 reg_addr, u32 reg_val,
+ struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_get_phy_register(struct i40e_hw *hw,
+ u8 phy_select, u8 dev_addr,
+ u32 reg_addr, u32 *reg_val,
+ struct i40e_asq_cmd_details *cmd_details);
+
i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page,
u16 reg, u8 phy_addr, u16 *value);
i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page,
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index c32c62462c84..6806ada11490 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -358,14 +358,14 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
{
enum i40e_latency_range new_latency_range = rc->latency_range;
u32 new_itr = rc->itr;
- int bytes_per_int;
+ int bytes_per_usec;
unsigned int usecs, estimated_usecs;
if (rc->total_packets == 0 || !rc->itr)
return false;
usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
- bytes_per_int = rc->total_bytes / usecs;
+ bytes_per_usec = rc->total_bytes / usecs;
/* The calculations in this algorithm depend on interrupts actually
* firing at the ITR rate. This may not happen if the packet rate is
@@ -391,18 +391,18 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
*/
switch (new_latency_range) {
case I40E_LOWEST_LATENCY:
- if (bytes_per_int > 10)
+ if (bytes_per_usec > 10)
new_latency_range = I40E_LOW_LATENCY;
break;
case I40E_LOW_LATENCY:
- if (bytes_per_int > 20)
+ if (bytes_per_usec > 20)
new_latency_range = I40E_BULK_LATENCY;
- else if (bytes_per_int <= 10)
+ else if (bytes_per_usec <= 10)
new_latency_range = I40E_LOWEST_LATENCY;
break;
case I40E_BULK_LATENCY:
default:
- if (bytes_per_int <= 20)
+ if (bytes_per_usec <= 20)
new_latency_range = I40E_LOW_LATENCY;
break;
}
@@ -711,6 +711,15 @@ bool i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
union i40e_rx_desc *rx_desc;
struct i40e_rx_buffer *bi;
+ /* Hardware only fetches new descriptors in cache lines of 8,
+ * essentially ignoring the lower 3 bits of the tail register. We want
+ * to ensure our tail writes are aligned to avoid unnecessary work. We
+ * can't simply round down the cleaned count, since we might fail to
+ * allocate some buffers. What we really want is to ensure that
+ * next_to_used + cleaned_count produces an aligned value.
+ */
+ cleaned_count -= (ntu + cleaned_count) & 0x7;
+
/* do nothing if no valid netdev defined */
if (!rx_ring->netdev || !cleaned_count)
return false;
@@ -1409,9 +1418,7 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr)
u32 val;
val = I40E_VFINT_DYN_CTLN1_INTENA_MASK |
- /* Don't clear PBA because that can cause lost interrupts that
- * came in while we were cleaning/polling
- */
+ I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK |
(type << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT) |
(itr << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 0d9f98bc07bd..8d26c85d12e1 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -38,8 +38,10 @@
#define I40E_ITR_8K 0x003E
#define I40E_ITR_4K 0x007A
#define I40E_MAX_INTRL 0x3B /* reg uses 4 usec resolution */
-#define I40E_ITR_RX_DEF I40E_ITR_20K
-#define I40E_ITR_TX_DEF I40E_ITR_20K
+#define I40E_ITR_RX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
+ I40E_ITR_DYNAMIC)
+#define I40E_ITR_TX_DEF (ITR_REG_TO_USEC(I40E_ITR_20K) | \
+ I40E_ITR_DYNAMIC)
#define I40E_ITR_DYNAMIC 0x8000 /* use top bit as a flag */
#define I40E_MIN_INT_RATE 250 /* ~= 1000000 / (I40E_MAX_ITR * 2) */
#define I40E_MAX_INT_RATE 500000 /* == 1000000 / (I40E_MIN_ITR * 2) */
@@ -189,7 +191,7 @@ static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc,
}
/* How many Rx Buffers do we bundle into one write to the hardware ? */
-#define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */
+#define I40E_RX_BUFFER_WRITE 32 /* Must be power of 2 */
#define I40E_RX_INCREMENT(r, i) \
do { \
(i)++; \
@@ -325,6 +327,7 @@ struct i40e_rx_queue_stats {
enum i40e_ring_state_t {
__I40E_TX_FDIR_INIT_DONE,
__I40E_TX_XPS_INIT_DONE,
+ __I40E_RING_STATE_NBITS /* must be last */
};
/* some useful defines for virtchannel interface, which
@@ -348,7 +351,7 @@ struct i40e_ring {
struct i40e_tx_buffer *tx_bi;
struct i40e_rx_buffer *rx_bi;
};
- unsigned long state;
+ DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS);
u16 queue_index; /* Queue number of ring */
u8 dcb_tc; /* Traffic class of ring */
u8 __iomem *tail;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index 2ea919d9cdcf..213b773dfad6 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h
@@ -46,6 +46,9 @@
/* Max default timeout in ms, */
#define I40E_MAX_NVM_TIMEOUT 18000
+/* Max timeout in ms for the phy to respond */
+#define I40E_MAX_PHY_TIMEOUT 500
+
/* Switch from ms to the 1usec global time (this is the GTIME resolution) */
#define I40E_MS_TO_GTIME(time) ((time) * 1000)
@@ -401,6 +404,18 @@ struct i40e_nvm_access {
u8 data[1];
};
+/* (Q)SFP module access definitions */
+#define I40E_I2C_EEPROM_DEV_ADDR 0xA0
+#define I40E_I2C_EEPROM_DEV_ADDR2 0xA2
+#define I40E_MODULE_TYPE_ADDR 0x00
+#define I40E_MODULE_REVISION_ADDR 0x01
+#define I40E_MODULE_SFF_8472_COMP 0x5E
+#define I40E_MODULE_SFF_8472_SWAP 0x5C
+#define I40E_MODULE_SFF_ADDR_MODE 0x04
+#define I40E_MODULE_TYPE_QSFP_PLUS 0x0D
+#define I40E_MODULE_TYPE_QSFP28 0x11
+#define I40E_MODULE_QSFP_MAX_LEN 640
+
/* PCI bus types */
enum i40e_bus_type {
i40e_bus_type_unknown = 0,
@@ -556,11 +571,19 @@ struct i40e_hw {
/* LLDP/DCBX Status */
u16 dcbx_status;
+#define I40E_HW_FLAG_802_1AD_CAPABLE BIT_ULL(1)
+#define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE BIT_ULL(2)
+
/* DCBX info */
struct i40e_dcbx_config local_dcbx_config; /* Oper/Local Cfg */
struct i40e_dcbx_config remote_dcbx_config; /* Peer Cfg */
struct i40e_dcbx_config desired_dcbx_config; /* CEE Desired Cfg */
+ /* Used in set switch config AQ command */
+ u16 switch_tag;
+ u16 first_tag;
+ u16 second_tag;
+
/* debug mask */
u32 debug_mask;
char err_str[16];
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 82f69031e5cd..de0af521d602 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -102,6 +102,7 @@ struct i40e_vsi {
#define I40E_TX_CTXTDESC(R, i) \
(&(((struct i40e_tx_context_desc *)((R)->desc))[i]))
#define MAX_QUEUES 16
+#define I40EVF_MAX_REQ_QUEUES 4
#define I40EVF_HKEY_ARRAY_SIZE ((I40E_VFQF_HKEY_MAX_INDEX + 1) * 4)
#define I40EVF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT_MAX_INDEX + 1) * 4)
@@ -200,6 +201,7 @@ struct i40evf_adapter {
struct list_head vlan_filter_list;
char misc_vector_name[IFNAMSIZ + 9];
int num_active_queues;
+ int num_req_queues;
/* TX */
struct i40e_ring *tx_rings;
@@ -220,21 +222,22 @@ struct i40evf_adapter {
u32 flags;
#define I40EVF_FLAG_RX_CSUM_ENABLED BIT(0)
-#define I40EVF_FLAG_IMIR_ENABLED BIT(5)
-#define I40EVF_FLAG_MQ_CAPABLE BIT(6)
-#define I40EVF_FLAG_PF_COMMS_FAILED BIT(8)
-#define I40EVF_FLAG_RESET_PENDING BIT(9)
-#define I40EVF_FLAG_RESET_NEEDED BIT(10)
-#define I40EVF_FLAG_WB_ON_ITR_CAPABLE BIT(11)
-#define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE BIT(12)
-#define I40EVF_FLAG_ADDR_SET_BY_PF BIT(13)
-#define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED BIT(14)
-#define I40EVF_FLAG_CLIENT_NEEDS_OPEN BIT(15)
-#define I40EVF_FLAG_CLIENT_NEEDS_CLOSE BIT(16)
-#define I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS BIT(17)
-#define I40EVF_FLAG_PROMISC_ON BIT(18)
-#define I40EVF_FLAG_ALLMULTI_ON BIT(19)
-#define I40EVF_FLAG_LEGACY_RX BIT(20)
+#define I40EVF_FLAG_IMIR_ENABLED BIT(1)
+#define I40EVF_FLAG_MQ_CAPABLE BIT(2)
+#define I40EVF_FLAG_PF_COMMS_FAILED BIT(3)
+#define I40EVF_FLAG_RESET_PENDING BIT(4)
+#define I40EVF_FLAG_RESET_NEEDED BIT(5)
+#define I40EVF_FLAG_WB_ON_ITR_CAPABLE BIT(6)
+#define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE BIT(7)
+#define I40EVF_FLAG_ADDR_SET_BY_PF BIT(8)
+#define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED BIT(9)
+#define I40EVF_FLAG_CLIENT_NEEDS_OPEN BIT(10)
+#define I40EVF_FLAG_CLIENT_NEEDS_CLOSE BIT(11)
+#define I40EVF_FLAG_CLIENT_NEEDS_L2_PARAMS BIT(12)
+#define I40EVF_FLAG_PROMISC_ON BIT(13)
+#define I40EVF_FLAG_ALLMULTI_ON BIT(14)
+#define I40EVF_FLAG_LEGACY_RX BIT(15)
+#define I40EVF_FLAG_REINIT_ITR_NEEDED BIT(16)
/* duplicates for common code */
#define I40E_FLAG_DCB_ENABLED 0
#define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED
@@ -349,6 +352,7 @@ void i40evf_deconfigure_queues(struct i40evf_adapter *adapter);
void i40evf_enable_queues(struct i40evf_adapter *adapter);
void i40evf_disable_queues(struct i40evf_adapter *adapter);
void i40evf_map_queues(struct i40evf_adapter *adapter);
+int i40evf_request_queues(struct i40evf_adapter *adapter, int num);
void i40evf_add_ether_addrs(struct i40evf_adapter *adapter);
void i40evf_del_ether_addrs(struct i40evf_adapter *adapter);
void i40evf_add_vlans(struct i40evf_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index 65874d6b3ab9..da006fa3fec1 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -669,7 +669,7 @@ static void i40evf_get_channels(struct net_device *netdev,
struct i40evf_adapter *adapter = netdev_priv(netdev);
/* Report maximum channels */
- ch->max_combined = adapter->num_active_queues;
+ ch->max_combined = I40EVF_MAX_REQ_QUEUES;
ch->max_other = NONQ_VECS;
ch->other_count = NONQ_VECS;
@@ -678,6 +678,41 @@ static void i40evf_get_channels(struct net_device *netdev,
}
/**
+ * i40evf_set_channels: set the new channel count
+ * @netdev: network interface device structure
+ * @ch: channel information structure
+ *
+ * Negotiate a new number of channels with the PF then do a reset. During
+ * reset we'll realloc queues and fix the RSS table. Returns 0 on success,
+ * negative on failure.
+ **/
+static int i40evf_set_channels(struct net_device *netdev,
+ struct ethtool_channels *ch)
+{
+ struct i40evf_adapter *adapter = netdev_priv(netdev);
+ int num_req = ch->combined_count;
+
+ if (num_req != adapter->num_active_queues &&
+ !(adapter->vf_res->vf_cap_flags &
+ VIRTCHNL_VF_OFFLOAD_REQ_QUEUES)) {
+ dev_info(&adapter->pdev->dev, "PF is not capable of queue negotiation.\n");
+ return -EINVAL;
+ }
+
+ /* All of these should have already been checked by ethtool before this
+ * even gets to us, but just to be sure.
+ */
+ if (num_req <= 0 || num_req > I40EVF_MAX_REQ_QUEUES)
+ return -EINVAL;
+
+ if (ch->rx_count || ch->tx_count || ch->other_count != NONQ_VECS)
+ return -EINVAL;
+
+ adapter->num_req_queues = num_req;
+ return i40evf_request_queues(adapter, num_req);
+}
+
+/**
* i40evf_get_rxfh_key_size - get the RSS hash key size
* @netdev: network interface device structure
*
@@ -785,6 +820,7 @@ static const struct ethtool_ops i40evf_ethtool_ops = {
.get_rxfh = i40evf_get_rxfh,
.set_rxfh = i40evf_set_rxfh,
.get_channels = i40evf_get_channels,
+ .set_channels = i40evf_set_channels,
.get_rxfh_key_size = i40evf_get_rxfh_key_size,
.get_link_ksettings = i40evf_get_link_ksettings,
};
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 1825d956bb00..ca2ebdbd24d7 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -46,7 +46,7 @@ static const char i40evf_driver_string[] =
#define DRV_VERSION_MAJOR 3
#define DRV_VERSION_MINOR 0
-#define DRV_VERSION_BUILD 0
+#define DRV_VERSION_BUILD 1
#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
__stringify(DRV_VERSION_MINOR) "." \
__stringify(DRV_VERSION_BUILD) \
@@ -430,57 +430,26 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx)
* group the rings as "efficiently" as possible. You would add new
* mapping configurations in here.
**/
-static int i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter)
+static void i40evf_map_rings_to_vectors(struct i40evf_adapter *adapter)
{
+ int rings_remaining = adapter->num_active_queues;
+ int ridx = 0, vidx = 0;
int q_vectors;
- int v_start = 0;
- int rxr_idx = 0, txr_idx = 0;
- int rxr_remaining = adapter->num_active_queues;
- int txr_remaining = adapter->num_active_queues;
- int i, j;
- int rqpv, tqpv;
- int err = 0;
q_vectors = adapter->num_msix_vectors - NONQ_VECS;
- /* The ideal configuration...
- * We have enough vectors to map one per queue.
- */
- if (q_vectors >= (rxr_remaining * 2)) {
- for (; rxr_idx < rxr_remaining; v_start++, rxr_idx++)
- i40evf_map_vector_to_rxq(adapter, v_start, rxr_idx);
-
- for (; txr_idx < txr_remaining; v_start++, txr_idx++)
- i40evf_map_vector_to_txq(adapter, v_start, txr_idx);
- goto out;
- }
+ for (; ridx < rings_remaining; ridx++) {
+ i40evf_map_vector_to_rxq(adapter, vidx, ridx);
+ i40evf_map_vector_to_txq(adapter, vidx, ridx);
- /* If we don't have enough vectors for a 1-to-1
- * mapping, we'll have to group them so there are
- * multiple queues per vector.
- * Re-adjusting *qpv takes care of the remainder.
- */
- for (i = v_start; i < q_vectors; i++) {
- rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - i);
- for (j = 0; j < rqpv; j++) {
- i40evf_map_vector_to_rxq(adapter, i, rxr_idx);
- rxr_idx++;
- rxr_remaining--;
- }
- }
- for (i = v_start; i < q_vectors; i++) {
- tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - i);
- for (j = 0; j < tqpv; j++) {
- i40evf_map_vector_to_txq(adapter, i, txr_idx);
- txr_idx++;
- txr_remaining--;
- }
+ /* In the case where we have more queues than vectors, continue
+ * round-robin on vectors until all queues are mapped.
+ */
+ if (++vidx >= q_vectors)
+ vidx = 0;
}
-out:
adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS;
-
- return err;
}
#ifdef CONFIG_NET_POLL_CONTROLLER
@@ -546,6 +515,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
unsigned int vector, q_vectors;
unsigned int rx_int_idx = 0, tx_int_idx = 0;
int irq_num, err;
+ int cpu;
i40evf_irq_disable(adapter);
/* Decrement for Other and TCP Timer vectors */
@@ -584,10 +554,12 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
q_vector->affinity_notify.release =
i40evf_irq_affinity_release;
irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
- /* get_cpu_mask returns a static constant mask with
- * a permanent lifetime so it's ok to use here.
+ /* Spread the IRQ affinity hints across online CPUs. Note that
+ * get_cpu_mask returns a mask with a permanent lifetime so
+ * it's safe to use as a hint for irq_set_affinity_hint.
*/
- irq_set_affinity_hint(irq_num, get_cpu_mask(q_vector->v_idx));
+ cpu = cpumask_local_spread(q_vector->v_idx, -1);
+ irq_set_affinity_hint(irq_num, get_cpu_mask(cpu));
}
return 0;
@@ -908,6 +880,8 @@ i40evf_mac_filter *i40evf_add_filter(struct i40evf_adapter *adapter,
list_add_tail(&f->list, &adapter->mac_filter_list);
f->add = true;
adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER;
+ } else {
+ f->remove = false;
}
clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
@@ -1217,9 +1191,18 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
{
int i, num_active_queues;
- num_active_queues = min_t(int,
- adapter->vsi_res->num_queue_pairs,
- (int)(num_online_cpus()));
+ /* If we're in reset reallocating queues we don't actually know yet for
+ * certain the PF gave us the number of queues we asked for but we'll
+ * assume it did. Once basic reset is finished we'll confirm once we
+ * start negotiating config with PF.
+ */
+ if (adapter->num_req_queues)
+ num_active_queues = adapter->num_req_queues;
+ else
+ num_active_queues = min_t(int,
+ adapter->vsi_res->num_queue_pairs,
+ (int)(num_online_cpus()));
+
adapter->tx_rings = kcalloc(num_active_queues,
sizeof(struct i40e_ring), GFP_KERNEL);
@@ -1240,7 +1223,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
tx_ring->netdev = adapter->netdev;
tx_ring->dev = &adapter->pdev->dev;
tx_ring->count = adapter->tx_desc_count;
- tx_ring->tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF);
+ tx_ring->tx_itr_setting = I40E_ITR_TX_DEF;
if (adapter->flags & I40EVF_FLAG_WB_ON_ITR_CAPABLE)
tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR;
@@ -1249,7 +1232,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter)
rx_ring->netdev = adapter->netdev;
rx_ring->dev = &adapter->pdev->dev;
rx_ring->count = adapter->rx_desc_count;
- rx_ring->rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF);
+ rx_ring->rx_itr_setting = I40E_ITR_RX_DEF;
}
adapter->num_active_queues = num_active_queues;
@@ -1568,12 +1551,53 @@ static void i40evf_free_rss(struct i40evf_adapter *adapter)
}
/**
+ * i40evf_reinit_interrupt_scheme - Reallocate queues and vectors
+ * @adapter: board private structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40evf_reinit_interrupt_scheme(struct i40evf_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ int err;
+
+ if (netif_running(netdev))
+ i40evf_free_traffic_irqs(adapter);
+ i40evf_free_misc_irq(adapter);
+ i40evf_reset_interrupt_capability(adapter);
+ i40evf_free_q_vectors(adapter);
+ i40evf_free_queues(adapter);
+
+ err = i40evf_init_interrupt_scheme(adapter);
+ if (err)
+ goto err;
+
+ netif_tx_stop_all_queues(netdev);
+
+ err = i40evf_request_misc_irq(adapter);
+ if (err)
+ goto err;
+
+ set_bit(__I40E_VSI_DOWN, adapter->vsi.state);
+
+ i40evf_map_rings_to_vectors(adapter);
+
+ if (RSS_AQ(adapter))
+ adapter->aq_required |= I40EVF_FLAG_AQ_CONFIGURE_RSS;
+ else
+ err = i40evf_init_rss(adapter);
+err:
+ return err;
+}
+
+/**
* i40evf_watchdog_timer - Periodic call-back timer
* @data: pointer to adapter disguised as unsigned long
**/
-static void i40evf_watchdog_timer(unsigned long data)
+static void i40evf_watchdog_timer(struct timer_list *t)
{
- struct i40evf_adapter *adapter = (struct i40evf_adapter *)data;
+ struct i40evf_adapter *adapter = from_timer(adapter, t,
+ watchdog_timer);
schedule_work(&adapter->watchdog_task);
/* timer will be rescheduled in watchdog task */
@@ -1913,8 +1937,15 @@ continue_reset:
if (err)
dev_info(&adapter->pdev->dev, "Failed to init adminq: %d\n",
err);
+ adapter->aq_required = 0;
- adapter->aq_required = I40EVF_FLAG_AQ_GET_CONFIG;
+ if (adapter->flags & I40EVF_FLAG_REINIT_ITR_NEEDED) {
+ err = i40evf_reinit_interrupt_scheme(adapter);
+ if (err)
+ goto reset_err;
+ }
+
+ adapter->aq_required |= I40EVF_FLAG_AQ_GET_CONFIG;
adapter->aq_required |= I40EVF_FLAG_AQ_MAP_VECTORS;
/* re-add all MAC filters */
@@ -1944,6 +1975,15 @@ continue_reset:
if (err)
goto reset_err;
+ if (adapter->flags & I40EVF_FLAG_REINIT_ITR_NEEDED) {
+ err = i40evf_request_traffic_irqs(adapter,
+ netdev->name);
+ if (err)
+ goto reset_err;
+
+ adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+ }
+
i40evf_configure(adapter);
i40evf_up_complete(adapter);
@@ -2386,10 +2426,6 @@ out_err:
return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
}
-#define I40EVF_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_TX |\
- NETIF_F_HW_VLAN_CTAG_RX |\
- NETIF_F_HW_VLAN_CTAG_FILTER)
-
/**
* i40evf_fix_features - fix up the netdev feature bits
* @netdev: our net device
@@ -2402,9 +2438,11 @@ static netdev_features_t i40evf_fix_features(struct net_device *netdev,
{
struct i40evf_adapter *adapter = netdev_priv(netdev);
- features &= ~I40EVF_VLAN_FEATURES;
- if (adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
- features |= I40EVF_VLAN_FEATURES;
+ if (!(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN))
+ features &= ~(NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_CTAG_RX |
+ NETIF_F_HW_VLAN_CTAG_FILTER);
+
return features;
}
@@ -2459,9 +2497,9 @@ static int i40evf_check_reset_complete(struct i40e_hw *hw)
int i40evf_process_config(struct i40evf_adapter *adapter)
{
struct virtchnl_vf_resource *vfres = adapter->vf_res;
+ int i, num_req_queues = adapter->num_req_queues;
struct net_device *netdev = adapter->netdev;
struct i40e_vsi *vsi = &adapter->vsi;
- int i;
netdev_features_t hw_enc_features;
netdev_features_t hw_features;
@@ -2475,6 +2513,23 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
return -ENODEV;
}
+ if (num_req_queues &&
+ num_req_queues != adapter->vsi_res->num_queue_pairs) {
+ /* Problem. The PF gave us fewer queues than what we had
+ * negotiated in our request. Need a reset to see if we can't
+ * get back to a working state.
+ */
+ dev_err(&adapter->pdev->dev,
+ "Requested %d queues, but PF only gave us %d.\n",
+ num_req_queues,
+ adapter->vsi_res->num_queue_pairs);
+ adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+ adapter->num_req_queues = adapter->vsi_res->num_queue_pairs;
+ i40evf_schedule_reset(adapter);
+ return -ENODEV;
+ }
+ adapter->num_req_queues = 0;
+
hw_enc_features = NETIF_F_SG |
NETIF_F_IP_CSUM |
NETIF_F_IPV6_CSUM |
@@ -2518,9 +2573,17 @@ int i40evf_process_config(struct i40evf_adapter *adapter)
*/
hw_features = hw_enc_features;
+ /* Enable VLAN features if supported */
+ if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
+ hw_features |= (NETIF_F_HW_VLAN_CTAG_TX |
+ NETIF_F_HW_VLAN_CTAG_RX);
+
netdev->hw_features |= hw_features;
- netdev->features |= hw_features | I40EVF_VLAN_FEATURES;
+ netdev->features |= hw_features;
+
+ if (vfres->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)
+ netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
adapter->vsi.id = adapter->vsi_res->vsi_id;
@@ -2686,9 +2749,7 @@ static void i40evf_init_task(struct work_struct *work)
ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr);
}
- init_timer(&adapter->watchdog_timer);
- adapter->watchdog_timer.function = &i40evf_watchdog_timer;
- adapter->watchdog_timer.data = (unsigned long)adapter;
+ timer_setup(&adapter->watchdog_timer, i40evf_watchdog_timer, 0);
mod_timer(&adapter->watchdog_timer, jiffies + 1);
adapter->tx_desc_count = I40EVF_DEFAULT_TXD;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index 85876f4fb1fb..46c8b8a3907c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -52,7 +52,7 @@ static int i40evf_send_pf_msg(struct i40evf_adapter *adapter,
err = i40e_aq_send_msg_to_pf(hw, op, 0, msg, len, NULL);
if (err)
- dev_err(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n",
+ dev_dbg(&adapter->pdev->dev, "Unable to send opcode %d to PF, err %s, aq_err %s\n",
op, i40evf_stat_str(hw, err),
i40evf_aq_str(hw, hw->aq.asq_last_status));
return err;
@@ -160,7 +160,8 @@ int i40evf_send_vf_config_msg(struct i40evf_adapter *adapter)
VIRTCHNL_VF_OFFLOAD_WB_ON_ITR |
VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 |
VIRTCHNL_VF_OFFLOAD_ENCAP |
- VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM;
+ VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM |
+ VIRTCHNL_VF_OFFLOAD_REQ_QUEUES;
adapter->current_op = VIRTCHNL_OP_GET_VF_RESOURCES;
adapter->aq_required &= ~I40EVF_FLAG_AQ_GET_CONFIG;
@@ -385,6 +386,33 @@ void i40evf_map_queues(struct i40evf_adapter *adapter)
}
/**
+ * i40evf_request_queues
+ * @adapter: adapter structure
+ * @num: number of requested queues
+ *
+ * We get a default number of queues from the PF. This enables us to request a
+ * different number. Returns 0 on success, negative on failure
+ **/
+int i40evf_request_queues(struct i40evf_adapter *adapter, int num)
+{
+ struct virtchnl_vf_res_request vfres;
+
+ if (adapter->current_op != VIRTCHNL_OP_UNKNOWN) {
+ /* bail because we already have a command pending */
+ dev_err(&adapter->pdev->dev, "Cannot request queues, command %d pending\n",
+ adapter->current_op);
+ return -EBUSY;
+ }
+
+ vfres.num_queue_pairs = num;
+
+ adapter->current_op = VIRTCHNL_OP_REQUEST_QUEUES;
+ adapter->flags |= I40EVF_FLAG_REINIT_ITR_NEEDED;
+ return i40evf_send_pf_msg(adapter, VIRTCHNL_OP_REQUEST_QUEUES,
+ (u8 *)&vfres, sizeof(vfres));
+}
+
+/**
* i40evf_add_ether_addrs
* @adapter: adapter structure
* @addrs: the MAC address filters to add (contiguous)
@@ -1068,6 +1096,19 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter,
"Invalid message %d from PF\n", v_opcode);
}
break;
+ case VIRTCHNL_OP_REQUEST_QUEUES: {
+ struct virtchnl_vf_res_request *vfres =
+ (struct virtchnl_vf_res_request *)msg;
+ if (vfres->num_queue_pairs != adapter->num_req_queues) {
+ dev_info(&adapter->pdev->dev,
+ "Requested %d queues, PF can support %d\n",
+ adapter->num_req_queues,
+ vfres->num_queue_pairs);
+ adapter->num_req_queues = 0;
+ adapter->flags &= ~I40EVF_FLAG_REINIT_ITR_NEEDED;
+ }
+ }
+ break;
default:
if (adapter->current_op && (v_opcode != adapter->current_op))
dev_warn(&adapter->pdev->dev, "Expected response %d from PF, received %d\n",
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index fd4a46b03cc8..58d01a211367 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -133,8 +133,8 @@ static void igb_clean_all_rx_rings(struct igb_adapter *);
static void igb_clean_tx_ring(struct igb_ring *);
static void igb_clean_rx_ring(struct igb_ring *);
static void igb_set_rx_mode(struct net_device *);
-static void igb_update_phy_info(unsigned long);
-static void igb_watchdog(unsigned long);
+static void igb_update_phy_info(struct timer_list *);
+static void igb_watchdog(struct timer_list *);
static void igb_watchdog_task(struct work_struct *);
static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
static void igb_get_stats64(struct net_device *dev,
@@ -2538,10 +2538,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
wr32(E1000_TXPBS, I210_TXPBSIZE_DEFAULT);
}
- setup_timer(&adapter->watchdog_timer, igb_watchdog,
- (unsigned long) adapter);
- setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
- (unsigned long) adapter);
+ timer_setup(&adapter->watchdog_timer, igb_watchdog, 0);
+ timer_setup(&adapter->phy_info_timer, igb_update_phy_info, 0);
INIT_WORK(&adapter->reset_task, igb_reset_task);
INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
@@ -3162,6 +3160,8 @@ static int igb_sw_init(struct igb_adapter *adapter)
/* Setup and initialize a copy of the hw vlan table array */
adapter->shadow_vfta = kcalloc(E1000_VLAN_FILTER_TBL_SIZE, sizeof(u32),
GFP_ATOMIC);
+ if (!adapter->shadow_vfta)
+ return -ENOMEM;
/* This call may decrease the number of queues */
if (igb_init_interrupt_scheme(adapter, true)) {
@@ -4423,9 +4423,9 @@ static void igb_spoof_check(struct igb_adapter *adapter)
/* Need to wait a few seconds after link up to get diagnostic information from
* the phy
*/
-static void igb_update_phy_info(unsigned long data)
+static void igb_update_phy_info(struct timer_list *t)
{
- struct igb_adapter *adapter = (struct igb_adapter *) data;
+ struct igb_adapter *adapter = from_timer(adapter, t, phy_info_timer);
igb_get_phy_info(&adapter->hw);
}
@@ -4512,9 +4512,9 @@ static void igb_check_lvmmc(struct igb_adapter *adapter)
* igb_watchdog - Timer Call-back
* @data: pointer to adapter cast into an unsigned long
**/
-static void igb_watchdog(unsigned long data)
+static void igb_watchdog(struct timer_list *t)
{
- struct igb_adapter *adapter = (struct igb_adapter *)data;
+ struct igb_adapter *adapter = from_timer(adapter, t, watchdog_timer);
/* Do the rest outside of interrupt context */
schedule_work(&adapter->watchdog_task);
}
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 1ed556911b14..713e8df23744 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -1915,9 +1915,9 @@ static bool igbvf_has_link(struct igbvf_adapter *adapter)
* igbvf_watchdog - Timer Call-back
* @data: pointer to adapter cast into an unsigned long
**/
-static void igbvf_watchdog(unsigned long data)
+static void igbvf_watchdog(struct timer_list *t)
{
- struct igbvf_adapter *adapter = (struct igbvf_adapter *)data;
+ struct igbvf_adapter *adapter = from_timer(adapter, t, watchdog_timer);
/* Do the rest outside of interrupt context */
schedule_work(&adapter->watchdog_task);
@@ -2878,8 +2878,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->addr_len);
}
- setup_timer(&adapter->watchdog_timer, &igbvf_watchdog,
- (unsigned long)adapter);
+ timer_setup(&adapter->watchdog_timer, igbvf_watchdog, 0);
INIT_WORK(&adapter->reset_task, igbvf_reset_task);
INIT_WORK(&adapter->watchdog_task, igbvf_watchdog_task);
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index 5a713199653c..2353c383f0a7 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -83,7 +83,7 @@ static void ixgb_setup_rctl(struct ixgb_adapter *adapter);
static void ixgb_clean_tx_ring(struct ixgb_adapter *adapter);
static void ixgb_clean_rx_ring(struct ixgb_adapter *adapter);
static void ixgb_set_multi(struct net_device *netdev);
-static void ixgb_watchdog(unsigned long data);
+static void ixgb_watchdog(struct timer_list *t);
static netdev_tx_t ixgb_xmit_frame(struct sk_buff *skb,
struct net_device *netdev);
static int ixgb_change_mtu(struct net_device *netdev, int new_mtu);
@@ -508,9 +508,7 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
adapter->part_num = ixgb_get_ee_pba_number(&adapter->hw);
- init_timer(&adapter->watchdog_timer);
- adapter->watchdog_timer.function = ixgb_watchdog;
- adapter->watchdog_timer.data = (unsigned long)adapter;
+ timer_setup(&adapter->watchdog_timer, ixgb_watchdog, 0);
INIT_WORK(&adapter->tx_timeout_task, ixgb_tx_timeout_task);
@@ -1152,9 +1150,9 @@ alloc_failed:
**/
static void
-ixgb_watchdog(unsigned long data)
+ixgb_watchdog(struct timer_list *t)
{
- struct ixgb_adapter *adapter = (struct ixgb_adapter *)data;
+ struct ixgb_adapter *adapter = from_timer(adapter, t, watchdog_timer);
struct net_device *netdev = adapter->netdev;
struct ixgb_desc_ring *txdr = &adapter->tx_ring;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index dd5578756ae0..468c3555a629 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -275,6 +275,7 @@ struct ixgbe_rx_queue_stats {
u64 rsc_count;
u64 rsc_flush;
u64 non_eop_descs;
+ u64 alloc_rx_page;
u64 alloc_rx_page_failed;
u64 alloc_rx_buff_failed;
u64 csum_err;
@@ -434,8 +435,15 @@ static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring)
}
#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring))
+#define IXGBE_ITR_ADAPTIVE_MIN_INC 2
+#define IXGBE_ITR_ADAPTIVE_MIN_USECS 10
+#define IXGBE_ITR_ADAPTIVE_MAX_USECS 126
+#define IXGBE_ITR_ADAPTIVE_LATENCY 0x80
+#define IXGBE_ITR_ADAPTIVE_BULK 0x00
+
struct ixgbe_ring_container {
struct ixgbe_ring *ring; /* pointer to linked list of rings */
+ unsigned long next_update; /* jiffies value of last update */
unsigned int total_bytes; /* total bytes processed this int */
unsigned int total_packets; /* total packets processed this int */
u16 work_limit; /* total work allowed per interrupt */
@@ -655,6 +663,7 @@ struct ixgbe_adapter {
u64 rsc_total_count;
u64 rsc_total_flush;
u64 non_eop_descs;
+ u32 alloc_rx_page;
u32 alloc_rx_page_failed;
u32 alloc_rx_buff_failed;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 6e6ab6f6875e..9bef255f6a18 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -3781,10 +3781,10 @@ s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min,
fw_cmd.ver_build = build;
fw_cmd.ver_sub = sub;
fw_cmd.hdr.checksum = 0;
- fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
- (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
fw_cmd.pad = 0;
fw_cmd.pad2 = 0;
+ fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd,
+ (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len));
for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) {
ret_val = ixgbe_host_interface_command(hw, &fw_cmd,
@@ -4081,8 +4081,8 @@ bool ixgbe_mng_present(struct ixgbe_hw *hw)
return false;
fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM(hw));
- fwsm &= IXGBE_FWSM_MODE_MASK;
- return fwsm == IXGBE_FWSM_FW_MODE_PT;
+
+ return !!(fwsm & IXGBE_FWSM_FW_MODE_PT);
}
/**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index c3e7a8191128..0aad1c2a3667 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -104,6 +104,7 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = {
{"tx_flow_control_xoff", IXGBE_STAT(stats.lxofftxc)},
{"rx_flow_control_xoff", IXGBE_STAT(stats.lxoffrxc)},
{"rx_csum_offload_errors", IXGBE_STAT(hw_csum_rx_error)},
+ {"alloc_rx_page", IXGBE_STAT(alloc_rx_page)},
{"alloc_rx_page_failed", IXGBE_STAT(alloc_rx_page_failed)},
{"alloc_rx_buff_failed", IXGBE_STAT(alloc_rx_buff_failed)},
{"rx_no_dma_resources", IXGBE_STAT(hw_rx_no_dma_resources)},
@@ -1916,8 +1917,6 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring,
unsigned int size)
{
union ixgbe_adv_rx_desc *rx_desc;
- struct ixgbe_rx_buffer *rx_buffer;
- struct ixgbe_tx_buffer *tx_buffer;
u16 rx_ntc, tx_ntc, count = 0;
/* initialize next to clean and descriptor values */
@@ -1925,7 +1924,38 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring,
tx_ntc = tx_ring->next_to_clean;
rx_desc = IXGBE_RX_DESC(rx_ring, rx_ntc);
+ while (tx_ntc != tx_ring->next_to_use) {
+ union ixgbe_adv_tx_desc *tx_desc;
+ struct ixgbe_tx_buffer *tx_buffer;
+
+ tx_desc = IXGBE_TX_DESC(tx_ring, tx_ntc);
+
+ /* if DD is not set transmit has not completed */
+ if (!(tx_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
+ return count;
+
+ /* unmap buffer on Tx side */
+ tx_buffer = &tx_ring->tx_buffer_info[tx_ntc];
+
+ /* Free all the Tx ring sk_buffs */
+ dev_kfree_skb_any(tx_buffer->skb);
+
+ /* unmap skb header data */
+ dma_unmap_single(tx_ring->dev,
+ dma_unmap_addr(tx_buffer, dma),
+ dma_unmap_len(tx_buffer, len),
+ DMA_TO_DEVICE);
+ dma_unmap_len_set(tx_buffer, len, 0);
+
+ /* increment Tx next to clean counter */
+ tx_ntc++;
+ if (tx_ntc == tx_ring->count)
+ tx_ntc = 0;
+ }
+
while (rx_desc->wb.upper.length) {
+ struct ixgbe_rx_buffer *rx_buffer;
+
/* check Rx buffer */
rx_buffer = &rx_ring->rx_buffer_info[rx_ntc];
@@ -1938,6 +1968,8 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring,
/* verify contents of skb */
if (ixgbe_check_lbtest_frame(rx_buffer, size))
count++;
+ else
+ break;
/* sync Rx buffer for device write */
dma_sync_single_for_device(rx_ring->dev,
@@ -1945,26 +1977,10 @@ static u16 ixgbe_clean_test_rings(struct ixgbe_ring *rx_ring,
ixgbe_rx_bufsz(rx_ring),
DMA_FROM_DEVICE);
- /* unmap buffer on Tx side */
- tx_buffer = &tx_ring->tx_buffer_info[tx_ntc];
-
- /* Free all the Tx ring sk_buffs */
- dev_kfree_skb_any(tx_buffer->skb);
-
- /* unmap skb header data */
- dma_unmap_single(tx_ring->dev,
- dma_unmap_addr(tx_buffer, dma),
- dma_unmap_len(tx_buffer, len),
- DMA_TO_DEVICE);
- dma_unmap_len_set(tx_buffer, len, 0);
-
- /* increment Rx/Tx next to clean counters */
+ /* increment Rx next to clean counter */
rx_ntc++;
if (rx_ntc == rx_ring->count)
rx_ntc = 0;
- tx_ntc++;
- if (tx_ntc == tx_ring->count)
- tx_ntc = 0;
/* fetch next descriptor */
rx_desc = IXGBE_RX_DESC(rx_ring, rx_ntc);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index f1bfae0c41d0..8e2a957aca18 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -806,6 +806,7 @@ static void ixgbe_add_ring(struct ixgbe_ring *ring,
ring->next = head->ring;
head->ring = ring;
head->count++;
+ head->next_update = jiffies + 1;
}
/**
@@ -879,8 +880,11 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
/* initialize work limits */
q_vector->tx.work_limit = adapter->tx_work_limit;
- /* initialize pointer to rings */
- ring = q_vector->ring;
+ /* Initialize setting for adaptive ITR */
+ q_vector->tx.itr = IXGBE_ITR_ADAPTIVE_MAX_USECS |
+ IXGBE_ITR_ADAPTIVE_LATENCY;
+ q_vector->rx.itr = IXGBE_ITR_ADAPTIVE_MAX_USECS |
+ IXGBE_ITR_ADAPTIVE_LATENCY;
/* intialize ITR */
if (txr_count && !rxr_count) {
@@ -897,6 +901,9 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
q_vector->itr = adapter->rx_itr_setting;
}
+ /* initialize pointer to rings */
+ ring = q_vector->ring;
+
while (txr_count) {
/* assign generic ring traits */
ring->dev = &adapter->pdev->dev;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 4d76afd13868..7f503d35eb1c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1620,6 +1620,7 @@ static bool ixgbe_alloc_mapped_page(struct ixgbe_ring *rx_ring,
bi->page = page;
bi->page_offset = ixgbe_rx_offset(rx_ring);
bi->pagecnt_bias = 1;
+ rx_ring->rx_stats.alloc_rx_page++;
return true;
}
@@ -2133,6 +2134,21 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
#if L1_CACHE_BYTES < 128
prefetch(xdp->data + L1_CACHE_BYTES);
#endif
+ /* Note, we get here by enabling legacy-rx via:
+ *
+ * ethtool --set-priv-flags <dev> legacy-rx on
+ *
+ * In this mode, we currently get 0 extra XDP headroom as
+ * opposed to having legacy-rx off, where we process XDP
+ * packets going to stack via ixgbe_build_skb(). The latter
+ * provides us currently with 192 bytes of headroom.
+ *
+ * For ixgbe_construct_skb() mode it means that the
+ * xdp->data_meta will always point to xdp->data, since
+ * the helper cannot expand the head. Should this ever
+ * change in future for legacy-rx mode on, then lets also
+ * add xdp->data_meta handling here.
+ */
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE);
@@ -2165,6 +2181,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
struct xdp_buff *xdp,
union ixgbe_adv_rx_desc *rx_desc)
{
+ unsigned int metasize = xdp->data - xdp->data_meta;
#if (PAGE_SIZE < 8192)
unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
#else
@@ -2174,10 +2191,14 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
#endif
struct sk_buff *skb;
- /* prefetch first cache line of first page */
- prefetch(xdp->data);
+ /* Prefetch first cache line of first page. If xdp->data_meta
+ * is unused, this points extactly as xdp->data, otherwise we
+ * likely have a consumer accessing first few bytes of meta
+ * data, and then actual data.
+ */
+ prefetch(xdp->data_meta);
#if L1_CACHE_BYTES < 128
- prefetch(xdp->data + L1_CACHE_BYTES);
+ prefetch(xdp->data_meta + L1_CACHE_BYTES);
#endif
/* build an skb to around the page buffer */
@@ -2188,6 +2209,8 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
/* update pointers within the skb to store the data */
skb_reserve(skb, xdp->data - xdp->data_hard_start);
__skb_put(skb, xdp->data_end - xdp->data);
+ if (metasize)
+ skb_metadata_set(skb, metasize);
/* record DMA address if this is the start of a chain of buffers */
if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
@@ -2326,6 +2349,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
if (!skb) {
xdp.data = page_address(rx_buffer->page) +
rx_buffer->page_offset;
+ xdp.data_meta = xdp.data;
xdp.data_hard_start = xdp.data -
ixgbe_rx_offset(rx_ring);
xdp.data_end = xdp.data + size;
@@ -2516,50 +2540,174 @@ enum latency_range {
static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector,
struct ixgbe_ring_container *ring_container)
{
- int bytes = ring_container->total_bytes;
- int packets = ring_container->total_packets;
- u32 timepassed_us;
- u64 bytes_perint;
- u8 itr_setting = ring_container->itr;
+ unsigned int itr = IXGBE_ITR_ADAPTIVE_MIN_USECS |
+ IXGBE_ITR_ADAPTIVE_LATENCY;
+ unsigned int avg_wire_size, packets, bytes;
+ unsigned long next_update = jiffies;
- if (packets == 0)
+ /* If we don't have any rings just leave ourselves set for maximum
+ * possible latency so we take ourselves out of the equation.
+ */
+ if (!ring_container->ring)
return;
- /* simple throttlerate management
- * 0-10MB/s lowest (100000 ints/s)
- * 10-20MB/s low (20000 ints/s)
- * 20-1249MB/s bulk (12000 ints/s)
+ /* If we didn't update within up to 1 - 2 jiffies we can assume
+ * that either packets are coming in so slow there hasn't been
+ * any work, or that there is so much work that NAPI is dealing
+ * with interrupt moderation and we don't need to do anything.
*/
- /* what was last interrupt timeslice? */
- timepassed_us = q_vector->itr >> 2;
- if (timepassed_us == 0)
- return;
+ if (time_after(next_update, ring_container->next_update))
+ goto clear_counts;
- bytes_perint = bytes / timepassed_us; /* bytes/usec */
+ packets = ring_container->total_packets;
- switch (itr_setting) {
- case lowest_latency:
- if (bytes_perint > 10)
- itr_setting = low_latency;
- break;
- case low_latency:
- if (bytes_perint > 20)
- itr_setting = bulk_latency;
- else if (bytes_perint <= 10)
- itr_setting = lowest_latency;
+ /* We have no packets to actually measure against. This means
+ * either one of the other queues on this vector is active or
+ * we are a Tx queue doing TSO with too high of an interrupt rate.
+ *
+ * When this occurs just tick up our delay by the minimum value
+ * and hope that this extra delay will prevent us from being called
+ * without any work on our queue.
+ */
+ if (!packets) {
+ itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+ if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+ itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+ itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY;
+ goto clear_counts;
+ }
+
+ bytes = ring_container->total_bytes;
+
+ /* If packets are less than 4 or bytes are less than 9000 assume
+ * insufficient data to use bulk rate limiting approach. We are
+ * likely latency driven.
+ */
+ if (packets < 4 && bytes < 9000) {
+ itr = IXGBE_ITR_ADAPTIVE_LATENCY;
+ goto adjust_by_size;
+ }
+
+ /* Between 4 and 48 we can assume that our current interrupt delay
+ * is only slightly too low. As such we should increase it by a small
+ * fixed amount.
+ */
+ if (packets < 48) {
+ itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC;
+ if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
+ itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
+ goto clear_counts;
+ }
+
+ /* Between 48 and 96 is our "goldilocks" zone where we are working
+ * out "just right". Just report that our current ITR is good for us.
+ */
+ if (packets < 96) {
+ itr = q_vector->itr >> 2;
+ goto clear_counts;
+ }
+
+ /* If packet count is 96 or greater we are likely looking at a slight
+ * overrun of the delay we want. Try halving our delay to see if that
+ * will cut the number of packets in half per interrupt.
+ */
+ if (packets < 256) {
+ itr = q_vector->itr >> 3;
+ if (itr < IXGBE_ITR_ADAPTIVE_MIN_USECS)
+ itr = IXGBE_ITR_ADAPTIVE_MIN_USECS;
+ goto clear_counts;
+ }
+
+ /* The paths below assume we are dealing with a bulk ITR since number
+ * of packets is 256 or greater. We are just going to have to compute
+ * a value and try to bring the count under control, though for smaller
+ * packet sizes there isn't much we can do as NAPI polling will likely
+ * be kicking in sooner rather than later.
+ */
+ itr = IXGBE_ITR_ADAPTIVE_BULK;
+
+adjust_by_size:
+ /* If packet counts are 256 or greater we can assume we have a gross
+ * overestimation of what the rate should be. Instead of trying to fine
+ * tune it just use the formula below to try and dial in an exact value
+ * give the current packet size of the frame.
+ */
+ avg_wire_size = bytes / packets;
+
+ /* The following is a crude approximation of:
+ * wmem_default / (size + overhead) = desired_pkts_per_int
+ * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
+ * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
+ *
+ * Assuming wmem_default is 212992 and overhead is 640 bytes per
+ * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
+ * formula down to
+ *
+ * (170 * (size + 24)) / (size + 640) = ITR
+ *
+ * We first do some math on the packet size and then finally bitshift
+ * by 8 after rounding up. We also have to account for PCIe link speed
+ * difference as ITR scales based on this.
+ */
+ if (avg_wire_size <= 60) {
+ /* Start at 50k ints/sec */
+ avg_wire_size = 5120;
+ } else if (avg_wire_size <= 316) {
+ /* 50K ints/sec to 16K ints/sec */
+ avg_wire_size *= 40;
+ avg_wire_size += 2720;
+ } else if (avg_wire_size <= 1084) {
+ /* 16K ints/sec to 9.2K ints/sec */
+ avg_wire_size *= 15;
+ avg_wire_size += 11452;
+ } else if (avg_wire_size <= 1980) {
+ /* 9.2K ints/sec to 8K ints/sec */
+ avg_wire_size *= 5;
+ avg_wire_size += 22420;
+ } else {
+ /* plateau at a limit of 8K ints/sec */
+ avg_wire_size = 32256;
+ }
+
+ /* If we are in low latency mode half our delay which doubles the rate
+ * to somewhere between 100K to 16K ints/sec
+ */
+ if (itr & IXGBE_ITR_ADAPTIVE_LATENCY)
+ avg_wire_size >>= 1;
+
+ /* Resultant value is 256 times larger than it needs to be. This
+ * gives us room to adjust the value as needed to either increase
+ * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
+ *
+ * Use addition as we have already recorded the new latency flag
+ * for the ITR value.
+ */
+ switch (q_vector->adapter->link_speed) {
+ case IXGBE_LINK_SPEED_10GB_FULL:
+ case IXGBE_LINK_SPEED_100_FULL:
+ default:
+ itr += DIV_ROUND_UP(avg_wire_size,
+ IXGBE_ITR_ADAPTIVE_MIN_INC * 256) *
+ IXGBE_ITR_ADAPTIVE_MIN_INC;
break;
- case bulk_latency:
- if (bytes_perint <= 20)
- itr_setting = low_latency;
+ case IXGBE_LINK_SPEED_2_5GB_FULL:
+ case IXGBE_LINK_SPEED_1GB_FULL:
+ case IXGBE_LINK_SPEED_10_FULL:
+ itr += DIV_ROUND_UP(avg_wire_size,
+ IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
+ IXGBE_ITR_ADAPTIVE_MIN_INC;
break;
}
- /* clear work counters since we have the values we need */
+clear_counts:
+ /* write back value */
+ ring_container->itr = itr;
+
+ /* next update should occur within next jiffy */
+ ring_container->next_update = next_update + 1;
+
ring_container->total_bytes = 0;
ring_container->total_packets = 0;
-
- /* write updated itr to ring container */
- ring_container->itr = itr_setting;
}
/**
@@ -2601,34 +2749,19 @@ void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector)
static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector)
{
- u32 new_itr = q_vector->itr;
- u8 current_itr;
+ u32 new_itr;
ixgbe_update_itr(q_vector, &q_vector->tx);
ixgbe_update_itr(q_vector, &q_vector->rx);
- current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+ /* use the smallest value of new ITR delay calculations */
+ new_itr = min(q_vector->rx.itr, q_vector->tx.itr);
- switch (current_itr) {
- /* counts and packets in update_itr are dependent on these numbers */
- case lowest_latency:
- new_itr = IXGBE_100K_ITR;
- break;
- case low_latency:
- new_itr = IXGBE_20K_ITR;
- break;
- case bulk_latency:
- new_itr = IXGBE_12K_ITR;
- break;
- default:
- break;
- }
+ /* Clear latency flag if set, shift into correct position */
+ new_itr &= ~IXGBE_ITR_ADAPTIVE_LATENCY;
+ new_itr <<= 2;
if (new_itr != q_vector->itr) {
- /* do an exponential smoothing */
- new_itr = (10 * new_itr * q_vector->itr) /
- ((9 * new_itr) + q_vector->itr);
-
/* save the algorithm value here */
q_vector->itr = new_itr;
@@ -6771,6 +6904,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot;
u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0;
u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0;
+ u64 alloc_rx_page = 0;
u64 bytes = 0, packets = 0, hw_csum_rx_error = 0;
if (test_bit(__IXGBE_DOWN, &adapter->state) ||
@@ -6791,6 +6925,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
for (i = 0; i < adapter->num_rx_queues; i++) {
struct ixgbe_ring *rx_ring = adapter->rx_ring[i];
non_eop_descs += rx_ring->rx_stats.non_eop_descs;
+ alloc_rx_page += rx_ring->rx_stats.alloc_rx_page;
alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed;
alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed;
hw_csum_rx_error += rx_ring->rx_stats.csum_err;
@@ -6798,6 +6933,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
packets += rx_ring->stats.packets;
}
adapter->non_eop_descs = non_eop_descs;
+ adapter->alloc_rx_page = alloc_rx_page;
adapter->alloc_rx_page_failed = alloc_rx_page_failed;
adapter->alloc_rx_buff_failed = alloc_rx_buff_failed;
adapter->hw_csum_rx_error = hw_csum_rx_error;
@@ -7554,9 +7690,9 @@ static void ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter)
* ixgbe_service_timer - Timer Call-back
* @data: pointer to adapter cast into an unsigned long
**/
-static void ixgbe_service_timer(unsigned long data)
+static void ixgbe_service_timer(struct timer_list *t)
{
- struct ixgbe_adapter *adapter = (struct ixgbe_adapter *)data;
+ struct ixgbe_adapter *adapter = from_timer(adapter, t, service_timer);
unsigned long next_event_offset;
/* poll faster when waiting for link */
@@ -9229,13 +9365,10 @@ free_jump:
return err;
}
-static int ixgbe_setup_tc_cls_u32(struct net_device *dev,
+static int ixgbe_setup_tc_cls_u32(struct ixgbe_adapter *adapter,
struct tc_cls_u32_offload *cls_u32)
{
- struct ixgbe_adapter *adapter = netdev_priv(dev);
-
- if (!is_classid_clsact_ingress(cls_u32->common.classid) ||
- cls_u32->common.chain_index)
+ if (cls_u32->common.chain_index)
return -EOPNOTSUPP;
switch (cls_u32->command) {
@@ -9254,6 +9387,40 @@ static int ixgbe_setup_tc_cls_u32(struct net_device *dev,
}
}
+static int ixgbe_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct ixgbe_adapter *adapter = cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSU32:
+ return ixgbe_setup_tc_cls_u32(adapter, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int ixgbe_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, ixgbe_setup_tc_block_cb,
+ adapter, adapter);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, ixgbe_setup_tc_block_cb,
+ adapter);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static int ixgbe_setup_tc_mqprio(struct net_device *dev,
struct tc_mqprio_qopt *mqprio)
{
@@ -9265,8 +9432,8 @@ static int __ixgbe_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
switch (type) {
- case TC_SETUP_CLSU32:
- return ixgbe_setup_tc_cls_u32(dev, type_data);
+ case TC_SETUP_BLOCK:
+ return ixgbe_setup_tc_block(dev, type_data);
case TC_SETUP_MQPRIO:
return ixgbe_setup_tc_mqprio(dev, type_data);
default:
@@ -9739,6 +9906,17 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
limit = find_last_bit(&adapter->fwd_bitmask, 32);
adapter->ring_feature[RING_F_VMDQ].limit = limit + 1;
ixgbe_fwd_ring_down(fwd_adapter->netdev, fwd_adapter);
+
+ /* go back to full RSS if we're done with our VMQs */
+ if (adapter->ring_feature[RING_F_VMDQ].limit == 1) {
+ int rss = min_t(int, ixgbe_max_rss_indices(adapter),
+ num_online_cpus());
+
+ adapter->flags &= ~IXGBE_FLAG_VMDQ_ENABLED;
+ adapter->flags &= ~IXGBE_FLAG_SRIOV_ENABLED;
+ adapter->ring_feature[RING_F_RSS].limit = rss;
+ }
+
ixgbe_setup_tc(pdev, netdev_get_num_tc(pdev));
netdev_dbg(pdev, "pool %i:%i queues %i:%i VSI bitmask %lx\n",
fwd_adapter->pool, adapter->num_rx_pools,
@@ -10361,8 +10539,7 @@ skip_sriov:
ether_addr_copy(hw->mac.addr, hw->mac.perm_addr);
ixgbe_mac_set_default_filter(adapter);
- setup_timer(&adapter->service_timer, &ixgbe_service_timer,
- (unsigned long) adapter);
+ timer_setup(&adapter->service_timer, ixgbe_service_timer, 0);
if (ixgbe_removed(hw->hw_addr)) {
err = -EIO;
@@ -10718,6 +10895,9 @@ skip_bad_vf_detection:
if (!test_bit(__IXGBE_SERVICE_INITED, &adapter->state))
return PCI_ERS_RESULT_DISCONNECT;
+ if (!netif_device_present(netdev))
+ return PCI_ERS_RESULT_DISCONNECT;
+
rtnl_lock();
netif_device_detach(netdev);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index 6ea0d6a5fb90..b8c5fd2a2115 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -619,12 +619,6 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
usleep_range(5000, 10000);
}
- /* Failed to get SW only semaphore */
- if (swmask == IXGBE_GSSR_SW_MNG_SM) {
- hw_dbg(hw, "Failed to get SW only semaphore\n");
- return IXGBE_ERR_SWFW_SYNC;
- }
-
/* If the resource is not released by the FW/HW the SW can assume that
* the FW/HW malfunctions. In that case the SW should set the SW bit(s)
* of the requested resource(s) while ignoring the corresponding FW/HW
@@ -647,7 +641,8 @@ s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask)
*/
if (swfw_sync & swmask) {
u32 rmask = IXGBE_GSSR_EEP_SM | IXGBE_GSSR_PHY0_SM |
- IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM;
+ IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM |
+ IXGBE_GSSR_SW_MNG_SM;
if (swi2c_mask)
rmask |= IXGBE_GSSR_I2C_MASK;
@@ -763,6 +758,8 @@ static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw)
**/
void ixgbe_init_swfw_sync_X540(struct ixgbe_hw *hw)
{
+ u32 rmask;
+
/* First try to grab the semaphore but we don't need to bother
* looking to see whether we got the lock or not since we do
* the same thing regardless of whether we got the lock or not.
@@ -771,6 +768,14 @@ void ixgbe_init_swfw_sync_X540(struct ixgbe_hw *hw)
*/
ixgbe_get_swfw_sync_semaphore(hw);
ixgbe_release_swfw_sync_semaphore(hw);
+
+ /* Acquire and release all software resources. */
+ rmask = IXGBE_GSSR_EEP_SM | IXGBE_GSSR_PHY0_SM |
+ IXGBE_GSSR_PHY1_SM | IXGBE_GSSR_MAC_CSR_SM |
+ IXGBE_GSSR_SW_MNG_SM | IXGBE_GSSR_I2C_MASK;
+
+ ixgbe_acquire_swfw_sync_X540(hw, rmask);
+ ixgbe_release_swfw_sync_X540(hw, rmask);
}
/**
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 19fbb2f28ea4..cb7da5f9c4da 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -900,6 +900,8 @@ static s32 ixgbe_read_ee_hostif_buffer_X550(struct ixgbe_hw *hw,
/* convert offset from words to bytes */
buffer.address = cpu_to_be32((offset + current_word) * 2);
buffer.length = cpu_to_be16(words_to_read * 2);
+ buffer.pad2 = 0;
+ buffer.pad3 = 0;
status = ixgbe_hic_unlocked(hw, (u32 *)&buffer, sizeof(buffer),
IXGBE_HI_COMMAND_TIMEOUT);
@@ -3192,6 +3194,9 @@ static s32 ixgbe_init_phy_ops_X550em(struct ixgbe_hw *hw)
/* Identify the PHY or SFP module */
ret_val = phy->ops.identify(hw);
+ if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED ||
+ ret_val == IXGBE_ERR_PHY_ADDR_INVALID)
+ return ret_val;
/* Setup function pointers based on detected hardware */
ixgbe_init_mac_link_ops_X550em(hw);
@@ -3394,9 +3399,10 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw)
ixgbe_clear_tx_pending(hw);
/* PHY ops must be identified and initialized prior to reset */
-
- /* Identify PHY and related function pointers */
status = hw->phy.ops.init(hw);
+ if (status == IXGBE_ERR_SFP_NOT_SUPPORTED ||
+ status == IXGBE_ERR_PHY_ADDR_INVALID)
+ return status;
/* start the external PHY */
if (hw->phy.type == ixgbe_phy_x550em_ext_t) {
@@ -3884,7 +3890,7 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x_fw = {
.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550,
};
-static struct ixgbe_mac_operations mac_ops_x550em_a = {
+static const struct ixgbe_mac_operations mac_ops_x550em_a = {
X550_COMMON_MAC
.led_on = ixgbe_led_on_t_x550em,
.led_off = ixgbe_led_off_t_x550em,
@@ -3905,7 +3911,7 @@ static struct ixgbe_mac_operations mac_ops_x550em_a = {
.write_iosf_sb_reg = ixgbe_write_iosf_sb_reg_x550a,
};
-static struct ixgbe_mac_operations mac_ops_x550em_a_fw = {
+static const struct ixgbe_mac_operations mac_ops_x550em_a_fw = {
X550_COMMON_MAC
.led_on = ixgbe_led_on_generic,
.led_off = ixgbe_led_off_generic,
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 032f8ac06357..12d3601b1d57 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -2747,9 +2747,10 @@ void ixgbevf_update_stats(struct ixgbevf_adapter *adapter)
* ixgbevf_service_timer - Timer Call-back
* @data: pointer to adapter cast into an unsigned long
**/
-static void ixgbevf_service_timer(unsigned long data)
+static void ixgbevf_service_timer(struct timer_list *t)
{
- struct ixgbevf_adapter *adapter = (struct ixgbevf_adapter *)data;
+ struct ixgbevf_adapter *adapter = from_timer(adapter, t,
+ service_timer);
/* Reset the timer */
mod_timer(&adapter->service_timer, (HZ * 2) + jiffies);
@@ -4120,8 +4121,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_sw_init;
}
- setup_timer(&adapter->service_timer, &ixgbevf_service_timer,
- (unsigned long)adapter);
+ timer_setup(&adapter->service_timer, ixgbevf_service_timer, 0);
INIT_WORK(&adapter->service_task, ixgbevf_service_task);
set_bit(__IXGBEVF_SERVICE_INITED, &adapter->state);
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 3c0a6451273d..7cecd9dbc111 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -4,6 +4,7 @@
* Copyright 2004 IDT Inc. (rischelp@idt.com)
* Copyright 2006 Felix Fietkau <nbd@openwrt.org>
* Copyright 2008 Florian Fainelli <florian@openwrt.org>
+ * Copyright 2017 Roman Yeryomin <roman@advem.lv>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
@@ -64,9 +65,9 @@
#include <asm/mach-rc32434/eth.h>
#include <asm/mach-rc32434/dma_v.h>
-#define DRV_NAME "korina"
-#define DRV_VERSION "0.10"
-#define DRV_RELDATE "04Mar2008"
+#define DRV_NAME "korina"
+#define DRV_VERSION "0.20"
+#define DRV_RELDATE "15Sep2017"
#define STATION_ADDRESS_HIGH(dev) (((dev)->dev_addr[0] << 8) | \
((dev)->dev_addr[1]))
@@ -75,7 +76,7 @@
((dev)->dev_addr[4] << 8) | \
((dev)->dev_addr[5]))
-#define MII_CLOCK 1250000 /* no more than 2.5MHz */
+#define MII_CLOCK 1250000 /* no more than 2.5MHz */
/* the following must be powers of two */
#define KORINA_NUM_RDS 64 /* number of receive descriptors */
@@ -87,15 +88,19 @@
#define KORINA_RBSIZE 1536 /* size of one resource buffer = Ether MTU */
#define KORINA_RDS_MASK (KORINA_NUM_RDS - 1)
#define KORINA_TDS_MASK (KORINA_NUM_TDS - 1)
-#define RD_RING_SIZE (KORINA_NUM_RDS * sizeof(struct dma_desc))
+#define RD_RING_SIZE (KORINA_NUM_RDS * sizeof(struct dma_desc))
#define TD_RING_SIZE (KORINA_NUM_TDS * sizeof(struct dma_desc))
-#define TX_TIMEOUT (6000 * HZ / 1000)
+#define TX_TIMEOUT (6000 * HZ / 1000)
-enum chain_status { desc_filled, desc_empty };
-#define IS_DMA_FINISHED(X) (((X) & (DMA_DESC_FINI)) != 0)
-#define IS_DMA_DONE(X) (((X) & (DMA_DESC_DONE)) != 0)
-#define RCVPKT_LENGTH(X) (((X) & ETH_RX_LEN) >> ETH_RX_LEN_BIT)
+enum chain_status {
+ desc_filled,
+ desc_empty
+};
+
+#define IS_DMA_FINISHED(X) (((X) & (DMA_DESC_FINI)) != 0)
+#define IS_DMA_DONE(X) (((X) & (DMA_DESC_DONE)) != 0)
+#define RCVPKT_LENGTH(X) (((X) & ETH_RX_LEN) >> ETH_RX_LEN_BIT)
/* Information that need to be kept for each board. */
struct korina_private {
@@ -122,10 +127,8 @@ struct korina_private {
int rx_irq;
int tx_irq;
- int ovr_irq;
- int und_irq;
- spinlock_t lock; /* NIC xmit lock */
+ spinlock_t lock; /* NIC xmit lock */
int dma_halt_cnt;
int dma_run_cnt;
@@ -148,17 +151,17 @@ static inline void korina_start_dma(struct dma_reg *ch, u32 dma_addr)
static inline void korina_abort_dma(struct net_device *dev,
struct dma_reg *ch)
{
- if (readl(&ch->dmac) & DMA_CHAN_RUN_BIT) {
- writel(0x10, &ch->dmac);
+ if (readl(&ch->dmac) & DMA_CHAN_RUN_BIT) {
+ writel(0x10, &ch->dmac);
- while (!(readl(&ch->dmas) & DMA_STAT_HALT))
- netif_trans_update(dev);
+ while (!(readl(&ch->dmas) & DMA_STAT_HALT))
+ netif_trans_update(dev);
- writel(0, &ch->dmas);
- }
+ writel(0, &ch->dmas);
+ }
- writel(0, &ch->dmadptr);
- writel(0, &ch->dmandptr);
+ writel(0, &ch->dmadptr);
+ writel(0, &ch->dmandptr);
}
static inline void korina_chain_dma(struct dma_reg *ch, u32 dma_addr)
@@ -365,59 +368,60 @@ static int korina_rx(struct net_device *dev, int limit)
if ((KORINA_RBSIZE - (u32)DMA_COUNT(rd->control)) == 0)
break;
- /* Update statistics counters */
- if (devcs & ETH_RX_CRC)
- dev->stats.rx_crc_errors++;
- if (devcs & ETH_RX_LOR)
- dev->stats.rx_length_errors++;
- if (devcs & ETH_RX_LE)
- dev->stats.rx_length_errors++;
- if (devcs & ETH_RX_OVR)
- dev->stats.rx_fifo_errors++;
- if (devcs & ETH_RX_CV)
- dev->stats.rx_frame_errors++;
- if (devcs & ETH_RX_CES)
- dev->stats.rx_length_errors++;
- if (devcs & ETH_RX_MP)
- dev->stats.multicast++;
+ /* check that this is a whole packet
+ * WARNING: DMA_FD bit incorrectly set
+ * in Rc32434 (errata ref #077) */
+ if (!(devcs & ETH_RX_LD))
+ goto next;
- if ((devcs & ETH_RX_LD) != ETH_RX_LD) {
- /* check that this is a whole packet
- * WARNING: DMA_FD bit incorrectly set
- * in Rc32434 (errata ref #077) */
+ if (!(devcs & ETH_RX_ROK)) {
+ /* Update statistics counters */
dev->stats.rx_errors++;
dev->stats.rx_dropped++;
- } else if ((devcs & ETH_RX_ROK)) {
- pkt_len = RCVPKT_LENGTH(devcs);
+ if (devcs & ETH_RX_CRC)
+ dev->stats.rx_crc_errors++;
+ if (devcs & ETH_RX_LE)
+ dev->stats.rx_length_errors++;
+ if (devcs & ETH_RX_OVR)
+ dev->stats.rx_fifo_errors++;
+ if (devcs & ETH_RX_CV)
+ dev->stats.rx_frame_errors++;
+ if (devcs & ETH_RX_CES)
+ dev->stats.rx_frame_errors++;
+
+ goto next;
+ }
- /* must be the (first and) last
- * descriptor then */
- pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data;
+ pkt_len = RCVPKT_LENGTH(devcs);
- /* invalidate the cache */
- dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4);
+ /* must be the (first and) last
+ * descriptor then */
+ pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data;
- /* Malloc up new buffer. */
- skb_new = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE);
+ /* invalidate the cache */
+ dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4);
- if (!skb_new)
- break;
- /* Do not count the CRC */
- skb_put(skb, pkt_len - 4);
- skb->protocol = eth_type_trans(skb, dev);
+ /* Malloc up new buffer. */
+ skb_new = netdev_alloc_skb_ip_align(dev, KORINA_RBSIZE);
- /* Pass the packet to upper layers */
- netif_receive_skb(skb);
- dev->stats.rx_packets++;
- dev->stats.rx_bytes += pkt_len;
+ if (!skb_new)
+ break;
+ /* Do not count the CRC */
+ skb_put(skb, pkt_len - 4);
+ skb->protocol = eth_type_trans(skb, dev);
- /* Update the mcast stats */
- if (devcs & ETH_RX_MP)
- dev->stats.multicast++;
+ /* Pass the packet to upper layers */
+ napi_gro_receive(&lp->napi, skb);
+ dev->stats.rx_packets++;
+ dev->stats.rx_bytes += pkt_len;
- lp->rx_skb[lp->rx_next_done] = skb_new;
- }
+ /* Update the mcast stats */
+ if (devcs & ETH_RX_MP)
+ dev->stats.multicast++;
+
+ lp->rx_skb[lp->rx_next_done] = skb_new;
+next:
rd->devcs = 0;
/* Restore descriptor's curr_addr */
@@ -686,7 +690,7 @@ static int korina_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
/* ethtool helpers */
static void netdev_get_drvinfo(struct net_device *dev,
- struct ethtool_drvinfo *info)
+ struct ethtool_drvinfo *info)
{
struct korina_private *lp = netdev_priv(dev);
@@ -729,10 +733,10 @@ static u32 netdev_get_link(struct net_device *dev)
}
static const struct ethtool_ops netdev_ethtool_ops = {
- .get_drvinfo = netdev_get_drvinfo,
- .get_link = netdev_get_link,
- .get_link_ksettings = netdev_get_link_ksettings,
- .set_link_ksettings = netdev_set_link_ksettings,
+ .get_drvinfo = netdev_get_drvinfo,
+ .get_link = netdev_get_link,
+ .get_link_ksettings = netdev_get_link_ksettings,
+ .set_link_ksettings = netdev_set_link_ksettings,
};
static int korina_alloc_ring(struct net_device *dev)
@@ -864,7 +868,7 @@ static int korina_init(struct net_device *dev)
/* Management Clock Prescaler Divisor
* Clock independent setting */
writel(((idt_cpu_freq) / MII_CLOCK + 1) & ~1,
- &lp->eth_regs->ethmcp);
+ &lp->eth_regs->ethmcp);
/* don't transmit until fifo contains 48b */
writel(48, &lp->eth_regs->ethfifott);
@@ -891,8 +895,6 @@ static void korina_restart_task(struct work_struct *work)
*/
disable_irq(lp->rx_irq);
disable_irq(lp->tx_irq);
- disable_irq(lp->ovr_irq);
- disable_irq(lp->und_irq);
writel(readl(&lp->tx_dma_regs->dmasm) |
DMA_STAT_FINI | DMA_STAT_ERR,
@@ -911,40 +913,10 @@ static void korina_restart_task(struct work_struct *work)
}
korina_multicast_list(dev);
- enable_irq(lp->und_irq);
- enable_irq(lp->ovr_irq);
enable_irq(lp->tx_irq);
enable_irq(lp->rx_irq);
}
-static void korina_clear_and_restart(struct net_device *dev, u32 value)
-{
- struct korina_private *lp = netdev_priv(dev);
-
- netif_stop_queue(dev);
- writel(value, &lp->eth_regs->ethintfc);
- schedule_work(&lp->restart_task);
-}
-
-/* Ethernet Tx Underflow interrupt */
-static irqreturn_t korina_und_interrupt(int irq, void *dev_id)
-{
- struct net_device *dev = dev_id;
- struct korina_private *lp = netdev_priv(dev);
- unsigned int und;
-
- spin_lock(&lp->lock);
-
- und = readl(&lp->eth_regs->ethintfc);
-
- if (und & ETH_INT_FC_UND)
- korina_clear_and_restart(dev, und & ~ETH_INT_FC_UND);
-
- spin_unlock(&lp->lock);
-
- return IRQ_HANDLED;
-}
-
static void korina_tx_timeout(struct net_device *dev)
{
struct korina_private *lp = netdev_priv(dev);
@@ -952,25 +924,6 @@ static void korina_tx_timeout(struct net_device *dev)
schedule_work(&lp->restart_task);
}
-/* Ethernet Rx Overflow interrupt */
-static irqreturn_t
-korina_ovr_interrupt(int irq, void *dev_id)
-{
- struct net_device *dev = dev_id;
- struct korina_private *lp = netdev_priv(dev);
- unsigned int ovr;
-
- spin_lock(&lp->lock);
- ovr = readl(&lp->eth_regs->ethintfc);
-
- if (ovr & ETH_INT_FC_OVR)
- korina_clear_and_restart(dev, ovr & ~ETH_INT_FC_OVR);
-
- spin_unlock(&lp->lock);
-
- return IRQ_HANDLED;
-}
-
#ifdef CONFIG_NET_POLL_CONTROLLER
static void korina_poll_controller(struct net_device *dev)
{
@@ -993,48 +946,26 @@ static int korina_open(struct net_device *dev)
}
/* Install the interrupt handler
- * that handles the Done Finished
- * Ovr and Und Events */
+ * that handles the Done Finished */
ret = request_irq(lp->rx_irq, korina_rx_dma_interrupt,
0, "Korina ethernet Rx", dev);
if (ret < 0) {
printk(KERN_ERR "%s: unable to get Rx DMA IRQ %d\n",
- dev->name, lp->rx_irq);
+ dev->name, lp->rx_irq);
goto err_release;
}
ret = request_irq(lp->tx_irq, korina_tx_dma_interrupt,
0, "Korina ethernet Tx", dev);
if (ret < 0) {
printk(KERN_ERR "%s: unable to get Tx DMA IRQ %d\n",
- dev->name, lp->tx_irq);
+ dev->name, lp->tx_irq);
goto err_free_rx_irq;
}
- /* Install handler for overrun error. */
- ret = request_irq(lp->ovr_irq, korina_ovr_interrupt,
- 0, "Ethernet Overflow", dev);
- if (ret < 0) {
- printk(KERN_ERR "%s: unable to get OVR IRQ %d\n",
- dev->name, lp->ovr_irq);
- goto err_free_tx_irq;
- }
-
- /* Install handler for underflow error. */
- ret = request_irq(lp->und_irq, korina_und_interrupt,
- 0, "Ethernet Underflow", dev);
- if (ret < 0) {
- printk(KERN_ERR "%s: unable to get UND IRQ %d\n",
- dev->name, lp->und_irq);
- goto err_free_ovr_irq;
- }
mod_timer(&lp->media_check_timer, jiffies + 1);
out:
return ret;
-err_free_ovr_irq:
- free_irq(lp->ovr_irq, dev);
-err_free_tx_irq:
- free_irq(lp->tx_irq, dev);
err_free_rx_irq:
free_irq(lp->rx_irq, dev);
err_release:
@@ -1052,8 +983,6 @@ static int korina_close(struct net_device *dev)
/* Disable interrupts */
disable_irq(lp->rx_irq);
disable_irq(lp->tx_irq);
- disable_irq(lp->ovr_irq);
- disable_irq(lp->und_irq);
korina_abort_tx(dev);
tmp = readl(&lp->tx_dma_regs->dmasm);
@@ -1073,8 +1002,6 @@ static int korina_close(struct net_device *dev)
free_irq(lp->rx_irq, dev);
free_irq(lp->tx_irq, dev);
- free_irq(lp->ovr_irq, dev);
- free_irq(lp->und_irq, dev);
return 0;
}
@@ -1113,8 +1040,6 @@ static int korina_probe(struct platform_device *pdev)
lp->rx_irq = platform_get_irq_byname(pdev, "korina_rx");
lp->tx_irq = platform_get_irq_byname(pdev, "korina_tx");
- lp->ovr_irq = platform_get_irq_byname(pdev, "korina_ovr");
- lp->und_irq = platform_get_irq_byname(pdev, "korina_und");
r = platform_get_resource_byname(pdev, IORESOURCE_MEM, "korina_regs");
dev->base_addr = r->start;
@@ -1162,7 +1087,7 @@ static int korina_probe(struct platform_device *pdev)
dev->netdev_ops = &korina_netdev_ops;
dev->ethtool_ops = &netdev_ethtool_ops;
dev->watchdog_timeo = TX_TIMEOUT;
- netif_napi_add(dev, &lp->napi, korina_poll, 64);
+ netif_napi_add(dev, &lp->napi, korina_poll, NAPI_POLL_WEIGHT);
lp->phy_addr = (((lp->rx_irq == 0x2c? 1:0) << 8) | 0x05);
lp->mii_if.dev = dev;
@@ -1226,5 +1151,6 @@ module_platform_driver(korina_driver);
MODULE_AUTHOR("Philip Rischel <rischelp@idt.com>");
MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
MODULE_AUTHOR("Florian Fainelli <florian@openwrt.org>");
+MODULE_AUTHOR("Roman Yeryomin <roman@advem.lv>");
MODULE_DESCRIPTION("IDT RC32434 (Korina) Ethernet driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 9c86cb7cb988..97efe4733661 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -6460,7 +6460,6 @@ static int mvpp2_tx(struct sk_buff *skb, struct net_device *dev)
if (mvpp2_tx_frag_process(port, skb, aggr_txq, txq)) {
tx_desc_unmap_put(port, txq, tx_desc);
frags = 0;
- goto out;
}
}
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 993724959a7c..91b1c154fd29 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1496,9 +1496,8 @@ static int pxa168_eth_probe(struct platform_device *pdev)
netif_napi_add(dev, &pep->napi, pxa168_rx_poll, pep->rx_ring_size);
memset(&pep->timeout, 0, sizeof(struct timer_list));
- init_timer(&pep->timeout);
- pep->timeout.function = rxq_refill_timer_wrapper;
- pep->timeout.data = (unsigned long)pep;
+ setup_timer(&pep->timeout, rxq_refill_timer_wrapper,
+ (unsigned long)pep);
pep->smi_bus = mdiobus_alloc();
if (!pep->smi_bus) {
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 5e81a7263654..54adfd967858 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1817,7 +1817,7 @@ static int mtk_open(struct net_device *dev)
struct mtk_eth *eth = mac->hw;
/* we run 2 netdevs on the same dma ring so we only bring it up once */
- if (!atomic_read(&eth->dma_refcnt)) {
+ if (!refcount_read(&eth->dma_refcnt)) {
int err = mtk_start_dma(eth);
if (err)
@@ -1827,8 +1827,10 @@ static int mtk_open(struct net_device *dev)
napi_enable(&eth->rx_napi);
mtk_tx_irq_enable(eth, MTK_TX_DONE_INT);
mtk_rx_irq_enable(eth, MTK_RX_DONE_INT);
+ refcount_set(&eth->dma_refcnt, 1);
}
- atomic_inc(&eth->dma_refcnt);
+ else
+ refcount_inc(&eth->dma_refcnt);
phy_start(dev->phydev);
netif_start_queue(dev);
@@ -1868,7 +1870,7 @@ static int mtk_stop(struct net_device *dev)
phy_stop(dev->phydev);
/* only shutdown DMA if this is the last user */
- if (!atomic_dec_and_test(&eth->dma_refcnt))
+ if (!refcount_dec_and_test(&eth->dma_refcnt))
return 0;
mtk_tx_irq_disable(eth, MTK_TX_DONE_INT);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index 3d3c24a28112..a3af4660de81 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -15,6 +15,8 @@
#ifndef MTK_ETH_H
#define MTK_ETH_H
+#include <linux/refcount.h>
+
#define MTK_QDMA_PAGE_SIZE 2048
#define MTK_MAX_RX_LENGTH 1536
#define MTK_TX_DMA_BUF_LEN 0x3fff
@@ -632,7 +634,7 @@ struct mtk_eth {
struct regmap *pctl;
u32 chip_id;
bool hwlro;
- atomic_t dma_refcnt;
+ refcount_t dma_refcnt;
struct mtk_tx_ring tx_ring;
struct mtk_rx_ring rx_ring[MTK_MAX_RX_RING_NUM];
struct mtk_rx_ring rx_ring_qdma;
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 53daa6ca5d83..de0f9e5e42ec 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -277,7 +277,7 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
phys_addr_t addr;
INIT_LIST_HEAD(&priv->catas_err.list);
- init_timer(&priv->catas_err.timer);
+ setup_timer(&priv->catas_err.timer, poll_catas, (unsigned long)dev);
priv->catas_err.map = NULL;
if (!mlx4_is_slave(dev)) {
@@ -293,8 +293,6 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
}
}
- priv->catas_err.timer.data = (unsigned long) dev;
- priv->catas_err.timer.function = poll_catas;
priv->catas_err.timer.expires =
round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL);
add_timer(&priv->catas_err.timer);
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 72eb50cd5ecd..d8e9a323122e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -69,7 +69,7 @@ void mlx4_cq_tasklet_cb(unsigned long data)
list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) {
list_del_init(&mcq->tasklet_ctx.list);
mcq->tasklet_ctx.comp(mcq);
- if (atomic_dec_and_test(&mcq->refcount))
+ if (refcount_dec_and_test(&mcq->refcount))
complete(&mcq->free);
if (time_after(jiffies, end))
break;
@@ -92,7 +92,7 @@ static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq)
* still arrive.
*/
if (list_empty_careful(&cq->tasklet_ctx.list)) {
- atomic_inc(&cq->refcount);
+ refcount_inc(&cq->refcount);
kick = list_empty(&tasklet_ctx->list);
list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
if (kick)
@@ -344,7 +344,7 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
cq->cons_index = 0;
cq->arm_sn = 1;
cq->uar = uar;
- atomic_set(&cq->refcount, 1);
+ refcount_set(&cq->refcount, 1);
init_completion(&cq->free);
cq->comp = mlx4_add_cq_to_tasklet;
cq->tasklet_ctx.priv =
@@ -386,7 +386,7 @@ void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq)
priv->eq_table.eq[MLX4_EQ_ASYNC].irq)
synchronize_irq(priv->eq_table.eq[MLX4_EQ_ASYNC].irq);
- if (atomic_dec_and_test(&cq->refcount))
+ if (refcount_dec_and_test(&cq->refcount))
complete(&cq->free);
wait_for_completion(&cq->free);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 3d4e4a5d00d1..bf1f04164885 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1742,13 +1742,18 @@ static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
return err;
}
+static int mlx4_en_get_max_num_rx_rings(struct net_device *dev)
+{
+ return min_t(int, num_online_cpus(), MAX_RX_RINGS);
+}
+
static void mlx4_en_get_channels(struct net_device *dev,
struct ethtool_channels *channel)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
- channel->max_rx = MAX_RX_RINGS;
- channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP;
+ channel->max_rx = mlx4_en_get_max_num_rx_rings(dev);
+ channel->max_tx = priv->mdev->profile.max_num_tx_rings_p_up;
channel->rx_count = priv->rx_ring_num;
channel->tx_count = priv->tx_ring_num[TX] /
@@ -1777,7 +1782,7 @@ static int mlx4_en_set_channels(struct net_device *dev,
mutex_lock(&mdev->state_lock);
xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0;
if (channel->tx_count * priv->prof->num_up + xdp_count >
- MAX_TX_RINGS) {
+ priv->mdev->profile.max_num_tx_rings_p_up * priv->prof->num_up) {
err = -EINVAL;
en_err(priv,
"Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 686e18de9a97..2c2965497ed3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -153,7 +153,7 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev)
int i;
params->udp_rss = udp_rss;
- params->num_tx_rings_p_up = mlx4_low_memory_profile() ?
+ params->max_num_tx_rings_p_up = mlx4_low_memory_profile() ?
MLX4_EN_MIN_TX_RING_P_UP :
min_t(int, num_online_cpus(), MLX4_EN_MAX_TX_RING_P_UP);
@@ -170,8 +170,8 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev)
params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
params->prof[i].num_up = MLX4_EN_NUM_UP_LOW;
- params->prof[i].num_tx_rings_p_up = params->num_tx_rings_p_up;
- params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up *
+ params->prof[i].num_tx_rings_p_up = params->max_num_tx_rings_p_up;
+ params->prof[i].tx_ring_num[TX] = params->max_num_tx_rings_p_up *
params->prof[i].num_up;
params->prof[i].rss_rings = 0;
params->prof[i].inline_thold = inline_thold;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 9c218f1cfc6c..d611df2f274d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1752,6 +1752,7 @@ int mlx4_en_start_port(struct net_device *dev)
mlx4_en_arm_cq(priv, cq);
} else {
+ mlx4_en_init_tx_xdp_ring_descs(priv, tx_ring);
mlx4_en_init_recycle_ring(priv, i);
/* XDP TX CQ should never be armed */
}
@@ -3305,7 +3306,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
priv->pflags = MLX4_EN_PRIV_FLAGS_BLUEFLAME;
priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
MLX4_WQE_CTRL_SOLICITED);
- priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up;
+ priv->num_tx_rings_p_up = mdev->profile.max_num_tx_rings_p_up;
priv->tx_work_limit = MLX4_EN_DEFAULT_TX_WORK;
netdev_rss_key_fill(priv->rss_key, sizeof(priv->rss_key));
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index 5a47f9669621..6883ac75d37f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -53,7 +53,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
if (is_tx) {
context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4);
if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)
- context->params2 |= MLX4_QP_BIT_FPP;
+ context->params2 |= cpu_to_be32(MLX4_QP_BIT_FPP);
} else {
context->sq_size_stride = ilog2(TXBB_SIZE) - 4;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index b97a55c827eb..92aec17f4b4d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -254,8 +254,7 @@ void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev)
DEF_RX_RINGS));
num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS :
- min_t(int, num_of_eqs,
- netif_get_num_default_rss_queues());
+ min_t(int, num_of_eqs, num_online_cpus());
mdev->profile.prof[i].rx_ring_num =
rounddown_pow_of_two(num_rx_rings);
}
@@ -762,6 +761,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
xdp.data_hard_start = va - frags[0].page_offset;
xdp.data = va;
+ xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + length;
orig_data = xdp.data;
@@ -778,7 +778,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
case XDP_PASS:
break;
case XDP_TX:
- if (likely(!mlx4_en_xmit_frame(ring, frags, dev,
+ if (likely(!mlx4_en_xmit_frame(ring, frags, priv,
length, cq_ring,
&doorbell_pending))) {
frags[0].page = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 8a32a8f7f9c0..596445a4a241 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -718,7 +718,7 @@ void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring)
#else
iowrite32be(
#endif
- ring->doorbell_qpn,
+ (__force u32)ring->doorbell_qpn,
ring->bf.uar->map + MLX4_SEND_DOORBELL);
}
@@ -1085,13 +1085,35 @@ tx_drop:
#define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
/ 16) & 0x3f)
+void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring)
+{
+ int i;
+
+ for (i = 0; i < ring->size; i++) {
+ struct mlx4_en_tx_info *tx_info = &ring->tx_info[i];
+ struct mlx4_en_tx_desc *tx_desc = ring->buf +
+ (i << LOG_TXBB_SIZE);
+
+ tx_info->map0_byte_count = PAGE_SIZE;
+ tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
+ tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
+ tx_info->ts_requested = 0;
+ tx_info->nr_maps = 1;
+ tx_info->linear = 1;
+ tx_info->inl = 0;
+
+ tx_desc->data.lkey = ring->mr_key;
+ tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
+ tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
+ }
+}
+
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
struct mlx4_en_rx_alloc *frame,
- struct net_device *dev, unsigned int length,
+ struct mlx4_en_priv *priv, unsigned int length,
int tx_ind, bool *doorbell_pending)
{
- struct mlx4_en_priv *priv = netdev_priv(dev);
- union mlx4_wqe_qpn_vlan qpn_vlan = {};
struct mlx4_en_tx_desc *tx_desc;
struct mlx4_en_tx_info *tx_info;
struct mlx4_wqe_data_seg *data;
@@ -1123,25 +1145,16 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
tx_info->page = frame->page;
frame->page = NULL;
tx_info->map0_dma = dma;
- tx_info->map0_byte_count = PAGE_SIZE;
- tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
- tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
- tx_info->ts_requested = 0;
- tx_info->nr_maps = 1;
- tx_info->linear = 1;
- tx_info->inl = 0;
dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
length, PCI_DMA_TODEVICE);
data->addr = cpu_to_be64(dma + frame->page_offset);
- data->lkey = ring->mr_key;
dma_wmb();
data->byte_count = cpu_to_be32(length);
/* tx completion can avoid cache line miss for common cases */
- tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
((ring->prod & ring->size) ?
@@ -1152,10 +1165,13 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
ring->prod += MLX4_EN_XDP_TX_NRTXBB;
- qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
+ /* Ensure new descriptor hits memory
+ * before setting ownership of this descriptor to HW
+ */
+ dma_wmb();
+ tx_desc->ctrl.owner_opcode = op_own;
+ ring->xmit_more++;
- mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0,
- op_own, false, false);
*doorbell_pending = true;
return NETDEV_TX_OK;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 16c09949afd5..634f603f941c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -57,12 +57,12 @@ MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)");
#define MLX4_GET(dest, source, offset) \
do { \
void *__p = (char *) (source) + (offset); \
- u64 val; \
- switch (sizeof(dest)) { \
+ __be64 val; \
+ switch (sizeof(dest)) { \
case 1: (dest) = *(u8 *) __p; break; \
case 2: (dest) = be16_to_cpup(__p); break; \
case 4: (dest) = be32_to_cpup(__p); break; \
- case 8: val = get_unaligned((u64 *)__p); \
+ case 8: val = get_unaligned((__be64 *)__p); \
(dest) = be64_to_cpu(val); break; \
default: __buggy_use_of_MLX4_GET(); \
} \
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index fdb3ad0cbe54..1856e279a7e0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -399,7 +399,7 @@ struct mlx4_en_profile {
u32 active_ports;
u32 small_pkt_int;
u8 no_reset;
- u8 num_tx_rings_p_up;
+ u8 max_num_tx_rings_p_up;
struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1];
};
@@ -693,7 +693,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
struct mlx4_en_rx_alloc *frame,
- struct net_device *dev, unsigned int length,
+ struct mlx4_en_priv *priv, unsigned int length,
int tx_ind, bool *doorbell_pending);
void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
@@ -705,6 +705,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
int node, int queue_index);
void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring **pring);
+void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
+ struct mlx4_en_tx_ring *ring);
int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
struct mlx4_en_tx_ring *ring,
int cq, int user_prio);
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 728a2fb1f5c0..769598f7b6c8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -55,7 +55,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
qp = __mlx4_qp_lookup(dev, qpn);
if (qp)
- atomic_inc(&qp->refcount);
+ refcount_inc(&qp->refcount);
spin_unlock(&qp_table->lock);
@@ -66,7 +66,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
qp->event(qp, event_type);
- if (atomic_dec_and_test(&qp->refcount))
+ if (refcount_dec_and_test(&qp->refcount))
complete(&qp->free);
}
@@ -420,7 +420,7 @@ int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp)
if (err)
goto err_icm;
- atomic_set(&qp->refcount, 1);
+ refcount_set(&qp->refcount, 1);
init_completion(&qp->free);
return 0;
@@ -520,7 +520,7 @@ EXPORT_SYMBOL_GPL(mlx4_qp_remove);
void mlx4_qp_free(struct mlx4_dev *dev, struct mlx4_qp *qp)
{
- if (atomic_dec_and_test(&qp->refcount))
+ if (refcount_dec_and_test(&qp->refcount))
complete(&qp->free);
wait_for_completion(&qp->free);
@@ -925,7 +925,7 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
context->flags &= cpu_to_be32(~(0xf << 28));
context->flags |= cpu_to_be32(states[i + 1] << 28);
if (states[i + 1] != MLX4_QP_STATE_RTR)
- context->params2 &= ~MLX4_QP_BIT_FPP;
+ context->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP);
err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1],
context, 0, 0, qp);
if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index fabb53379727..04304dd894c6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -3185,7 +3185,7 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
optpar = be32_to_cpu(*(__be32 *) inbox->buf);
if (slave != mlx4_master_func_num(dev)) {
- qp_ctx->params2 &= ~MLX4_QP_BIT_FPP;
+ qp_ctx->params2 &= ~cpu_to_be32(MLX4_QP_BIT_FPP);
/* setting QP rate-limit is disallowed for VFs */
if (qp_ctx->rate_limit_params)
return -EPERM;
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index bedf52126824..cbe4d9746ddf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -49,7 +49,7 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1));
rcu_read_unlock();
if (srq)
- atomic_inc(&srq->refcount);
+ refcount_inc(&srq->refcount);
else {
mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn);
return;
@@ -57,7 +57,7 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type)
srq->event(srq, event_type);
- if (atomic_dec_and_test(&srq->refcount))
+ if (refcount_dec_and_test(&srq->refcount))
complete(&srq->free);
}
@@ -203,7 +203,7 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
if (err)
goto err_radix;
- atomic_set(&srq->refcount, 1);
+ refcount_set(&srq->refcount, 1);
init_completion(&srq->free);
return 0;
@@ -232,7 +232,7 @@ void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq)
radix_tree_delete(&srq_table->tree, srq->srqn);
spin_unlock_irq(&srq_table->lock);
- if (atomic_dec_and_test(&srq->refcount))
+ if (refcount_dec_and_test(&srq->refcount))
complete(&srq->free);
wait_for_completion(&srq->free);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index fdaef00465d7..25deaa5a534c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -6,6 +6,7 @@ config MLX5_CORE
tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver"
depends on MAY_USE_DEVLINK
depends on PCI
+ imply PTP_1588_CLOCK
default n
---help---
Core driver for low level functionality of the ConnectX-4 and
@@ -29,7 +30,6 @@ config MLX5_CORE_EN
bool "Mellanox Technologies ConnectX-4 Ethernet support"
depends on NETDEVICES && ETHERNET && INET && PCI && MLX5_CORE
depends on IPV6=y || IPV6=n || MLX5_CORE=m
- imply PTP_1588_CLOCK
default n
---help---
Ethernet support in Mellanox Technologies ConnectX-4 NIC.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 87a3099808f3..100fe4ecad9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -4,7 +4,7 @@ subdir-ccflags-y += -I$(src)
mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
- fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \
+ fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o lib/clock.o \
diag/fs_tracepoint.o
mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o
@@ -13,7 +13,7 @@ mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \
fpga/ipsec.o
mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
- en_tx.o en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
+ en_tx.o en_rx.o en_rx_am.o en_txrx.o vxlan.o \
en_arfs.o en_fs_ethtool.o en_selftest.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
@@ -22,7 +22,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o en_rep.o en_tc.
mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o
-mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o
+mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o ipoib/ipoib_vlan.o
mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
en_accel/ipsec_stats.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 336d4738b807..1016e05c7ec7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -58,7 +58,7 @@ void mlx5_cq_tasklet_cb(unsigned long data)
tasklet_ctx.list) {
list_del_init(&mcq->tasklet_ctx.list);
mcq->tasklet_ctx.comp(mcq);
- if (atomic_dec_and_test(&mcq->refcount))
+ if (refcount_dec_and_test(&mcq->refcount))
complete(&mcq->free);
if (time_after(jiffies, end))
break;
@@ -80,7 +80,7 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq)
* still arrive.
*/
if (list_empty_careful(&cq->tasklet_ctx.list)) {
- atomic_inc(&cq->refcount);
+ refcount_inc(&cq->refcount);
list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
}
spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
@@ -94,7 +94,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
spin_lock(&table->lock);
cq = radix_tree_lookup(&table->tree, cqn);
if (likely(cq))
- atomic_inc(&cq->refcount);
+ refcount_inc(&cq->refcount);
spin_unlock(&table->lock);
if (!cq) {
@@ -106,7 +106,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn)
cq->comp(cq);
- if (atomic_dec_and_test(&cq->refcount))
+ if (refcount_dec_and_test(&cq->refcount))
complete(&cq->free);
}
@@ -119,7 +119,7 @@ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
cq = radix_tree_lookup(&table->tree, cqn);
if (cq)
- atomic_inc(&cq->refcount);
+ refcount_inc(&cq->refcount);
spin_unlock(&table->lock);
@@ -130,7 +130,7 @@ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type)
cq->event(cq, event_type);
- if (atomic_dec_and_test(&cq->refcount))
+ if (refcount_dec_and_test(&cq->refcount))
complete(&cq->free);
}
@@ -159,7 +159,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
cq->cqn = MLX5_GET(create_cq_out, out, cqn);
cq->cons_index = 0;
cq->arm_sn = 0;
- atomic_set(&cq->refcount, 1);
+ refcount_set(&cq->refcount, 1);
init_completion(&cq->free);
if (!cq->comp)
cq->comp = mlx5_add_cq_to_tasklet;
@@ -222,7 +222,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
synchronize_irq(cq->irqn);
mlx5_debug_cq_remove(dev, cq);
- if (atomic_dec_and_test(&cq->refcount))
+ if (refcount_dec_and_test(&cq->refcount))
complete(&cq->free);
wait_for_completion(&cq->free);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index cc13d3dbd366..e613ce02216d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -267,28 +267,6 @@ struct mlx5e_dcbx {
};
#endif
-#define MAX_PIN_NUM 8
-struct mlx5e_pps {
- u8 pin_caps[MAX_PIN_NUM];
- struct work_struct out_work;
- u64 start[MAX_PIN_NUM];
- u8 enabled;
-};
-
-struct mlx5e_tstamp {
- rwlock_t lock;
- struct cyclecounter cycles;
- struct timecounter clock;
- struct hwtstamp_config hwtstamp_config;
- u32 nominal_c_mult;
- unsigned long overflow_period;
- struct delayed_work overflow_work;
- struct mlx5_core_dev *mdev;
- struct ptp_clock *ptp;
- struct ptp_clock_info ptp_info;
- struct mlx5e_pps pps_info;
-};
-
enum {
MLX5E_RQ_STATE_ENABLED,
MLX5E_RQ_STATE_AM,
@@ -375,9 +353,10 @@ struct mlx5e_txqsq {
u8 min_inline_mode;
u16 edge;
struct device *pdev;
- struct mlx5e_tstamp *tstamp;
__be32 mkey_be;
unsigned long state;
+ struct hwtstamp_config *tstamp;
+ struct mlx5_clock *clock;
/* control path */
struct mlx5_wq_ctrl wq_ctrl;
@@ -543,10 +522,11 @@ struct mlx5e_rq {
struct mlx5e_channel *channel;
struct device *pdev;
struct net_device *netdev;
- struct mlx5e_tstamp *tstamp;
struct mlx5e_rq_stats stats;
struct mlx5e_cq cq;
struct mlx5e_page_cache page_cache;
+ struct hwtstamp_config *tstamp;
+ struct mlx5_clock *clock;
mlx5e_fp_handle_rx_cqe handle_rx_cqe;
mlx5e_fp_post_rx_wqes post_wqes;
@@ -588,7 +568,7 @@ struct mlx5e_channel {
/* control */
struct mlx5e_priv *priv;
struct mlx5_core_dev *mdev;
- struct mlx5e_tstamp *tstamp;
+ struct hwtstamp_config *tstamp;
int ix;
};
@@ -789,7 +769,7 @@ struct mlx5e_priv {
struct mlx5_core_dev *mdev;
struct net_device *netdev;
struct mlx5e_stats stats;
- struct mlx5e_tstamp tstamp;
+ struct hwtstamp_config tstamp;
u16 q_counter;
#ifdef CONFIG_MLX5_CORE_EN_DCB
struct mlx5e_dcbx dcbx;
@@ -873,12 +853,6 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv);
void mlx5e_ethtool_cleanup_steering(struct mlx5e_priv *priv);
void mlx5e_set_rx_mode_work(struct work_struct *work);
-void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp,
- struct skb_shared_hwtstamps *hwts);
-void mlx5e_timestamp_init(struct mlx5e_priv *priv);
-void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv);
-void mlx5e_pps_event_handler(struct mlx5e_priv *priv,
- struct ptp_clock_event *event);
int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr);
int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr);
int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val);
@@ -889,6 +863,7 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,
u16 vid);
void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv);
void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv);
+void mlx5e_timestamp_set(struct mlx5e_priv *priv);
struct mlx5e_redirect_rqt_param {
bool is_rss;
@@ -1081,6 +1056,9 @@ int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
struct ethtool_flash *flash);
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv);
+
/* mlx5e generic netdev management API */
struct net_device*
mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 4614ddfa91eb..6a7c8b04447e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -256,7 +256,7 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
goto drop;
}
mdata = mlx5e_ipsec_add_metadata(skb);
- if (unlikely(IS_ERR(mdata))) {
+ if (IS_ERR(mdata)) {
atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
goto drop;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
deleted file mode 100644
index 84dd63e74041..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ /dev/null
@@ -1,619 +0,0 @@
-/*
- * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses. You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above
- * copyright notice, this list of conditions and the following
- * disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials
- * provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/clocksource.h>
-#include "en.h"
-
-enum {
- MLX5E_CYCLES_SHIFT = 23
-};
-
-enum {
- MLX5E_PIN_MODE_IN = 0x0,
- MLX5E_PIN_MODE_OUT = 0x1,
-};
-
-enum {
- MLX5E_OUT_PATTERN_PULSE = 0x0,
- MLX5E_OUT_PATTERN_PERIODIC = 0x1,
-};
-
-enum {
- MLX5E_EVENT_MODE_DISABLE = 0x0,
- MLX5E_EVENT_MODE_REPETETIVE = 0x1,
- MLX5E_EVENT_MODE_ONCE_TILL_ARM = 0x2,
-};
-
-enum {
- MLX5E_MTPPS_FS_ENABLE = BIT(0x0),
- MLX5E_MTPPS_FS_PATTERN = BIT(0x2),
- MLX5E_MTPPS_FS_PIN_MODE = BIT(0x3),
- MLX5E_MTPPS_FS_TIME_STAMP = BIT(0x4),
- MLX5E_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5),
- MLX5E_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7),
-};
-
-void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp,
- struct skb_shared_hwtstamps *hwts)
-{
- u64 nsec;
-
- read_lock(&tstamp->lock);
- nsec = timecounter_cyc2time(&tstamp->clock, timestamp);
- read_unlock(&tstamp->lock);
-
- hwts->hwtstamp = ns_to_ktime(nsec);
-}
-
-static u64 mlx5e_read_internal_timer(const struct cyclecounter *cc)
-{
- struct mlx5e_tstamp *tstamp = container_of(cc, struct mlx5e_tstamp,
- cycles);
-
- return mlx5_read_internal_timer(tstamp->mdev) & cc->mask;
-}
-
-static void mlx5e_pps_out(struct work_struct *work)
-{
- struct mlx5e_pps *pps_info = container_of(work, struct mlx5e_pps,
- out_work);
- struct mlx5e_tstamp *tstamp = container_of(pps_info, struct mlx5e_tstamp,
- pps_info);
- u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
- unsigned long flags;
- int i;
-
- for (i = 0; i < tstamp->ptp_info.n_pins; i++) {
- u64 tstart;
-
- write_lock_irqsave(&tstamp->lock, flags);
- tstart = tstamp->pps_info.start[i];
- tstamp->pps_info.start[i] = 0;
- write_unlock_irqrestore(&tstamp->lock, flags);
- if (!tstart)
- continue;
-
- MLX5_SET(mtpps_reg, in, pin, i);
- MLX5_SET64(mtpps_reg, in, time_stamp, tstart);
- MLX5_SET(mtpps_reg, in, field_select, MLX5E_MTPPS_FS_TIME_STAMP);
- mlx5_set_mtpps(tstamp->mdev, in, sizeof(in));
- }
-}
-
-static void mlx5e_timestamp_overflow(struct work_struct *work)
-{
- struct delayed_work *dwork = to_delayed_work(work);
- struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp,
- overflow_work);
- struct mlx5e_priv *priv = container_of(tstamp, struct mlx5e_priv, tstamp);
- unsigned long flags;
-
- write_lock_irqsave(&tstamp->lock, flags);
- timecounter_read(&tstamp->clock);
- write_unlock_irqrestore(&tstamp->lock, flags);
- queue_delayed_work(priv->wq, &tstamp->overflow_work,
- msecs_to_jiffies(tstamp->overflow_period * 1000));
-}
-
-int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
-{
- struct hwtstamp_config config;
- int err;
-
- if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
- return -EOPNOTSUPP;
-
- if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
- return -EFAULT;
-
- /* TX HW timestamp */
- switch (config.tx_type) {
- case HWTSTAMP_TX_OFF:
- case HWTSTAMP_TX_ON:
- break;
- default:
- return -ERANGE;
- }
-
- mutex_lock(&priv->state_lock);
- /* RX HW timestamp */
- switch (config.rx_filter) {
- case HWTSTAMP_FILTER_NONE:
- /* Reset CQE compression to Admin default */
- mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def);
- break;
- case HWTSTAMP_FILTER_ALL:
- case HWTSTAMP_FILTER_SOME:
- case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
- case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
- case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
- case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
- case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
- case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
- case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
- case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
- case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
- case HWTSTAMP_FILTER_PTP_V2_EVENT:
- case HWTSTAMP_FILTER_PTP_V2_SYNC:
- case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
- case HWTSTAMP_FILTER_NTP_ALL:
- /* Disable CQE compression */
- netdev_warn(priv->netdev, "Disabling cqe compression");
- err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
- if (err) {
- netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
- mutex_unlock(&priv->state_lock);
- return err;
- }
- config.rx_filter = HWTSTAMP_FILTER_ALL;
- break;
- default:
- mutex_unlock(&priv->state_lock);
- return -ERANGE;
- }
-
- memcpy(&priv->tstamp.hwtstamp_config, &config, sizeof(config));
- mutex_unlock(&priv->state_lock);
-
- return copy_to_user(ifr->ifr_data, &config,
- sizeof(config)) ? -EFAULT : 0;
-}
-
-int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
-{
- struct hwtstamp_config *cfg = &priv->tstamp.hwtstamp_config;
-
- if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
- return -EOPNOTSUPP;
-
- return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0;
-}
-
-static int mlx5e_ptp_settime(struct ptp_clock_info *ptp,
- const struct timespec64 *ts)
-{
- struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
- ptp_info);
- u64 ns = timespec64_to_ns(ts);
- unsigned long flags;
-
- write_lock_irqsave(&tstamp->lock, flags);
- timecounter_init(&tstamp->clock, &tstamp->cycles, ns);
- write_unlock_irqrestore(&tstamp->lock, flags);
-
- return 0;
-}
-
-static int mlx5e_ptp_gettime(struct ptp_clock_info *ptp,
- struct timespec64 *ts)
-{
- struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
- ptp_info);
- u64 ns;
- unsigned long flags;
-
- write_lock_irqsave(&tstamp->lock, flags);
- ns = timecounter_read(&tstamp->clock);
- write_unlock_irqrestore(&tstamp->lock, flags);
-
- *ts = ns_to_timespec64(ns);
-
- return 0;
-}
-
-static int mlx5e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
-{
- struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
- ptp_info);
- unsigned long flags;
-
- write_lock_irqsave(&tstamp->lock, flags);
- timecounter_adjtime(&tstamp->clock, delta);
- write_unlock_irqrestore(&tstamp->lock, flags);
-
- return 0;
-}
-
-static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
-{
- u64 adj;
- u32 diff;
- unsigned long flags;
- int neg_adj = 0;
- struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
- ptp_info);
-
- if (delta < 0) {
- neg_adj = 1;
- delta = -delta;
- }
-
- adj = tstamp->nominal_c_mult;
- adj *= delta;
- diff = div_u64(adj, 1000000000ULL);
-
- write_lock_irqsave(&tstamp->lock, flags);
- timecounter_read(&tstamp->clock);
- tstamp->cycles.mult = neg_adj ? tstamp->nominal_c_mult - diff :
- tstamp->nominal_c_mult + diff;
- write_unlock_irqrestore(&tstamp->lock, flags);
-
- return 0;
-}
-
-static int mlx5e_extts_configure(struct ptp_clock_info *ptp,
- struct ptp_clock_request *rq,
- int on)
-{
- struct mlx5e_tstamp *tstamp =
- container_of(ptp, struct mlx5e_tstamp, ptp_info);
- struct mlx5e_priv *priv =
- container_of(tstamp, struct mlx5e_priv, tstamp);
- u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
- u32 field_select = 0;
- u8 pin_mode = 0;
- u8 pattern = 0;
- int pin = -1;
- int err = 0;
-
- if (!MLX5_PPS_CAP(priv->mdev))
- return -EOPNOTSUPP;
-
- if (rq->extts.index >= tstamp->ptp_info.n_pins)
- return -EINVAL;
-
- if (on) {
- pin = ptp_find_pin(tstamp->ptp, PTP_PF_EXTTS, rq->extts.index);
- if (pin < 0)
- return -EBUSY;
- pin_mode = MLX5E_PIN_MODE_IN;
- pattern = !!(rq->extts.flags & PTP_FALLING_EDGE);
- field_select = MLX5E_MTPPS_FS_PIN_MODE |
- MLX5E_MTPPS_FS_PATTERN |
- MLX5E_MTPPS_FS_ENABLE;
- } else {
- pin = rq->extts.index;
- field_select = MLX5E_MTPPS_FS_ENABLE;
- }
-
- MLX5_SET(mtpps_reg, in, pin, pin);
- MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
- MLX5_SET(mtpps_reg, in, pattern, pattern);
- MLX5_SET(mtpps_reg, in, enable, on);
- MLX5_SET(mtpps_reg, in, field_select, field_select);
-
- err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
- if (err)
- return err;
-
- return mlx5_set_mtppse(priv->mdev, pin, 0,
- MLX5E_EVENT_MODE_REPETETIVE & on);
-}
-
-static int mlx5e_perout_configure(struct ptp_clock_info *ptp,
- struct ptp_clock_request *rq,
- int on)
-{
- struct mlx5e_tstamp *tstamp =
- container_of(ptp, struct mlx5e_tstamp, ptp_info);
- struct mlx5e_priv *priv =
- container_of(tstamp, struct mlx5e_priv, tstamp);
- u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
- u64 nsec_now, nsec_delta, time_stamp = 0;
- u64 cycles_now, cycles_delta;
- struct timespec64 ts;
- unsigned long flags;
- u32 field_select = 0;
- u8 pin_mode = 0;
- u8 pattern = 0;
- int pin = -1;
- int err = 0;
- s64 ns;
-
- if (!MLX5_PPS_CAP(priv->mdev))
- return -EOPNOTSUPP;
-
- if (rq->perout.index >= tstamp->ptp_info.n_pins)
- return -EINVAL;
-
- if (on) {
- pin = ptp_find_pin(tstamp->ptp, PTP_PF_PEROUT,
- rq->perout.index);
- if (pin < 0)
- return -EBUSY;
-
- pin_mode = MLX5E_PIN_MODE_OUT;
- pattern = MLX5E_OUT_PATTERN_PERIODIC;
- ts.tv_sec = rq->perout.period.sec;
- ts.tv_nsec = rq->perout.period.nsec;
- ns = timespec64_to_ns(&ts);
-
- if ((ns >> 1) != 500000000LL)
- return -EINVAL;
-
- ts.tv_sec = rq->perout.start.sec;
- ts.tv_nsec = rq->perout.start.nsec;
- ns = timespec64_to_ns(&ts);
- cycles_now = mlx5_read_internal_timer(tstamp->mdev);
- write_lock_irqsave(&tstamp->lock, flags);
- nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now);
- nsec_delta = ns - nsec_now;
- cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift,
- tstamp->cycles.mult);
- write_unlock_irqrestore(&tstamp->lock, flags);
- time_stamp = cycles_now + cycles_delta;
- field_select = MLX5E_MTPPS_FS_PIN_MODE |
- MLX5E_MTPPS_FS_PATTERN |
- MLX5E_MTPPS_FS_ENABLE |
- MLX5E_MTPPS_FS_TIME_STAMP;
- } else {
- pin = rq->perout.index;
- field_select = MLX5E_MTPPS_FS_ENABLE;
- }
-
- MLX5_SET(mtpps_reg, in, pin, pin);
- MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
- MLX5_SET(mtpps_reg, in, pattern, pattern);
- MLX5_SET(mtpps_reg, in, enable, on);
- MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
- MLX5_SET(mtpps_reg, in, field_select, field_select);
-
- err = mlx5_set_mtpps(priv->mdev, in, sizeof(in));
- if (err)
- return err;
-
- return mlx5_set_mtppse(priv->mdev, pin, 0,
- MLX5E_EVENT_MODE_REPETETIVE & on);
-}
-
-static int mlx5e_pps_configure(struct ptp_clock_info *ptp,
- struct ptp_clock_request *rq,
- int on)
-{
- struct mlx5e_tstamp *tstamp =
- container_of(ptp, struct mlx5e_tstamp, ptp_info);
-
- tstamp->pps_info.enabled = !!on;
- return 0;
-}
-
-static int mlx5e_ptp_enable(struct ptp_clock_info *ptp,
- struct ptp_clock_request *rq,
- int on)
-{
- switch (rq->type) {
- case PTP_CLK_REQ_EXTTS:
- return mlx5e_extts_configure(ptp, rq, on);
- case PTP_CLK_REQ_PEROUT:
- return mlx5e_perout_configure(ptp, rq, on);
- case PTP_CLK_REQ_PPS:
- return mlx5e_pps_configure(ptp, rq, on);
- default:
- return -EOPNOTSUPP;
- }
- return 0;
-}
-
-static int mlx5e_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
- enum ptp_pin_function func, unsigned int chan)
-{
- return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0;
-}
-
-static const struct ptp_clock_info mlx5e_ptp_clock_info = {
- .owner = THIS_MODULE,
- .max_adj = 100000000,
- .n_alarm = 0,
- .n_ext_ts = 0,
- .n_per_out = 0,
- .n_pins = 0,
- .pps = 0,
- .adjfreq = mlx5e_ptp_adjfreq,
- .adjtime = mlx5e_ptp_adjtime,
- .gettime64 = mlx5e_ptp_gettime,
- .settime64 = mlx5e_ptp_settime,
- .enable = NULL,
- .verify = NULL,
-};
-
-static void mlx5e_timestamp_init_config(struct mlx5e_tstamp *tstamp)
-{
- tstamp->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF;
- tstamp->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
-}
-
-static int mlx5e_init_pin_config(struct mlx5e_tstamp *tstamp)
-{
- int i;
-
- tstamp->ptp_info.pin_config =
- kzalloc(sizeof(*tstamp->ptp_info.pin_config) *
- tstamp->ptp_info.n_pins, GFP_KERNEL);
- if (!tstamp->ptp_info.pin_config)
- return -ENOMEM;
- tstamp->ptp_info.enable = mlx5e_ptp_enable;
- tstamp->ptp_info.verify = mlx5e_ptp_verify;
- tstamp->ptp_info.pps = 1;
-
- for (i = 0; i < tstamp->ptp_info.n_pins; i++) {
- snprintf(tstamp->ptp_info.pin_config[i].name,
- sizeof(tstamp->ptp_info.pin_config[i].name),
- "mlx5_pps%d", i);
- tstamp->ptp_info.pin_config[i].index = i;
- tstamp->ptp_info.pin_config[i].func = PTP_PF_NONE;
- tstamp->ptp_info.pin_config[i].chan = i;
- }
-
- return 0;
-}
-
-static void mlx5e_get_pps_caps(struct mlx5e_priv *priv,
- struct mlx5e_tstamp *tstamp)
-{
- u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
-
- mlx5_query_mtpps(priv->mdev, out, sizeof(out));
-
- tstamp->ptp_info.n_pins = MLX5_GET(mtpps_reg, out,
- cap_number_of_pps_pins);
- tstamp->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out,
- cap_max_num_of_pps_in_pins);
- tstamp->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out,
- cap_max_num_of_pps_out_pins);
-
- tstamp->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
- tstamp->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
- tstamp->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
- tstamp->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
- tstamp->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
- tstamp->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
- tstamp->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
- tstamp->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
-}
-
-void mlx5e_pps_event_handler(struct mlx5e_priv *priv,
- struct ptp_clock_event *event)
-{
- struct net_device *netdev = priv->netdev;
- struct mlx5e_tstamp *tstamp = &priv->tstamp;
- struct timespec64 ts;
- u64 nsec_now, nsec_delta;
- u64 cycles_now, cycles_delta;
- int pin = event->index;
- s64 ns;
- unsigned long flags;
-
- switch (tstamp->ptp_info.pin_config[pin].func) {
- case PTP_PF_EXTTS:
- if (tstamp->pps_info.enabled) {
- event->type = PTP_CLOCK_PPSUSR;
- event->pps_times.ts_real = ns_to_timespec64(event->timestamp);
- } else {
- event->type = PTP_CLOCK_EXTTS;
- }
- ptp_clock_event(tstamp->ptp, event);
- break;
- case PTP_PF_PEROUT:
- mlx5e_ptp_gettime(&tstamp->ptp_info, &ts);
- cycles_now = mlx5_read_internal_timer(tstamp->mdev);
- ts.tv_sec += 1;
- ts.tv_nsec = 0;
- ns = timespec64_to_ns(&ts);
- write_lock_irqsave(&tstamp->lock, flags);
- nsec_now = timecounter_cyc2time(&tstamp->clock, cycles_now);
- nsec_delta = ns - nsec_now;
- cycles_delta = div64_u64(nsec_delta << tstamp->cycles.shift,
- tstamp->cycles.mult);
- tstamp->pps_info.start[pin] = cycles_now + cycles_delta;
- queue_work(priv->wq, &tstamp->pps_info.out_work);
- write_unlock_irqrestore(&tstamp->lock, flags);
- break;
- default:
- netdev_err(netdev, "%s: Unhandled event\n", __func__);
- }
-}
-
-void mlx5e_timestamp_init(struct mlx5e_priv *priv)
-{
- struct mlx5e_tstamp *tstamp = &priv->tstamp;
- u64 ns;
- u64 frac = 0;
- u32 dev_freq;
-
- mlx5e_timestamp_init_config(tstamp);
- dev_freq = MLX5_CAP_GEN(priv->mdev, device_frequency_khz);
- if (!dev_freq) {
- mlx5_core_warn(priv->mdev, "invalid device_frequency_khz, aborting HW clock init\n");
- return;
- }
- rwlock_init(&tstamp->lock);
- tstamp->cycles.read = mlx5e_read_internal_timer;
- tstamp->cycles.shift = MLX5E_CYCLES_SHIFT;
- tstamp->cycles.mult = clocksource_khz2mult(dev_freq,
- tstamp->cycles.shift);
- tstamp->nominal_c_mult = tstamp->cycles.mult;
- tstamp->cycles.mask = CLOCKSOURCE_MASK(41);
- tstamp->mdev = priv->mdev;
-
- timecounter_init(&tstamp->clock, &tstamp->cycles,
- ktime_to_ns(ktime_get_real()));
-
- /* Calculate period in seconds to call the overflow watchdog - to make
- * sure counter is checked at least once every wrap around.
- */
- ns = cyclecounter_cyc2ns(&tstamp->cycles, tstamp->cycles.mask,
- frac, &frac);
- do_div(ns, NSEC_PER_SEC / 2 / HZ);
- tstamp->overflow_period = ns;
-
- INIT_WORK(&tstamp->pps_info.out_work, mlx5e_pps_out);
- INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow);
- if (tstamp->overflow_period)
- queue_delayed_work(priv->wq, &tstamp->overflow_work, 0);
- else
- mlx5_core_warn(priv->mdev, "invalid overflow period, overflow_work is not scheduled\n");
-
- /* Configure the PHC */
- tstamp->ptp_info = mlx5e_ptp_clock_info;
- snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp");
-
- /* Initialize 1PPS data structures */
- if (MLX5_PPS_CAP(priv->mdev))
- mlx5e_get_pps_caps(priv, tstamp);
- if (tstamp->ptp_info.n_pins)
- mlx5e_init_pin_config(tstamp);
-
- tstamp->ptp = ptp_clock_register(&tstamp->ptp_info,
- &priv->mdev->pdev->dev);
- if (IS_ERR(tstamp->ptp)) {
- mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n",
- PTR_ERR(tstamp->ptp));
- tstamp->ptp = NULL;
- }
-}
-
-void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv)
-{
- struct mlx5e_tstamp *tstamp = &priv->tstamp;
-
- if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
- return;
-
- if (priv->tstamp.ptp) {
- ptp_clock_unregister(priv->tstamp.ptp);
- priv->tstamp.ptp = NULL;
- }
-
- cancel_work_sync(&tstamp->pps_info.out_work);
- cancel_delayed_work_sync(&tstamp->overflow_work);
- kfree(tstamp->ptp_info.pin_config);
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index ece3fb147e3e..157d02917237 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -134,6 +134,7 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
mlx5_core_destroy_mkey(mdev, &res->mkey);
mlx5_core_dealloc_transport_domain(mdev, res->td.tdn);
mlx5_core_dealloc_pd(mdev, res->pdn);
+ memset(res, 0, sizeof(*res));
}
int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index d12e9fc0d76b..81a112e40fe3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1417,14 +1417,15 @@ static int mlx5e_set_pauseparam(struct net_device *netdev,
int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
struct ethtool_ts_info *info)
{
+ struct mlx5_core_dev *mdev = priv->mdev;
int ret;
ret = ethtool_op_get_ts_info(priv->netdev, info);
if (ret)
return ret;
- info->phc_index = priv->tstamp.ptp ?
- ptp_clock_index(priv->tstamp.ptp) : -1;
+ info->phc_index = mdev->clock.ptp ?
+ ptp_clock_index(mdev->clock.ptp) : -1;
if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
return 0;
@@ -1754,7 +1755,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev,
if (!MLX5_CAP_GEN(mdev, cqe_compression))
return -EOPNOTSUPP;
- if (enable && priv->tstamp.hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE) {
+ if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) {
netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index cc11bbbd0309..560b208c0483 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -373,8 +373,6 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
enum mlx5_dev_event event, unsigned long param)
{
struct mlx5e_priv *priv = vpriv;
- struct ptp_clock_event ptp_event;
- struct mlx5_eqe *eqe = NULL;
if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state))
return;
@@ -384,14 +382,6 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
case MLX5_DEV_EVENT_PORT_DOWN:
queue_work(priv->wq, &priv->update_carrier_work);
break;
- case MLX5_DEV_EVENT_PPS:
- eqe = (struct mlx5_eqe *)param;
- ptp_event.index = eqe->data.pps.pin;
- ptp_event.timestamp =
- timecounter_cyc2time(&priv->tstamp.clock,
- be64_to_cpu(eqe->data.pps.time_stamp));
- mlx5e_pps_event_handler(vpriv, &ptp_event);
- break;
default:
break;
}
@@ -585,6 +575,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq->pdev = c->pdev;
rq->netdev = c->netdev;
rq->tstamp = c->tstamp;
+ rq->clock = &mdev->clock;
rq->channel = c;
rq->ix = c->ix;
rq->mdev = mdev;
@@ -1123,6 +1114,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
sq->pdev = c->pdev;
sq->tstamp = c->tstamp;
+ sq->clock = &mdev->clock;
sq->mkey_be = c->mkey_be;
sq->channel = c;
sq->txq_ix = txq_ix;
@@ -2678,6 +2670,12 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
netif_carrier_on(netdev);
}
+void mlx5e_timestamp_set(struct mlx5e_priv *priv)
+{
+ priv->tstamp.tx_type = HWTSTAMP_TX_OFF;
+ priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
+}
+
int mlx5e_open_locked(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -2693,7 +2691,7 @@ int mlx5e_open_locked(struct net_device *netdev)
mlx5e_activate_priv_channels(priv);
if (priv->profile->update_carrier)
priv->profile->update_carrier(priv);
- mlx5e_timestamp_init(priv);
+ mlx5e_timestamp_set(priv);
if (priv->profile->update_stats)
queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
@@ -2731,7 +2729,6 @@ int mlx5e_close_locked(struct net_device *netdev)
clear_bit(MLX5E_STATE_OPENED, &priv->state);
- mlx5e_timestamp_cleanup(priv);
netif_carrier_off(priv->netdev);
mlx5e_deactivate_priv_channels(priv);
mlx5e_close_channels(&priv->channels);
@@ -3086,13 +3083,10 @@ out:
}
#ifdef CONFIG_MLX5_ESWITCH
-static int mlx5e_setup_tc_cls_flower(struct net_device *dev,
+static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *cls_flower)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
-
- if (!is_classid_clsact_ingress(cls_flower->common.classid) ||
- cls_flower->common.chain_index)
+ if (cls_flower->common.chain_index)
return -EOPNOTSUPP;
switch (cls_flower->command) {
@@ -3106,15 +3100,49 @@ static int mlx5e_setup_tc_cls_flower(struct net_device *dev,
return -EOPNOTSUPP;
}
}
+
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct mlx5e_priv *priv = cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_setup_tc_cls_flower(priv, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, mlx5e_setup_tc_block_cb,
+ priv, priv);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, mlx5e_setup_tc_block_cb,
+ priv);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
#endif
-static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
- void *type_data)
+int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
{
switch (type) {
#ifdef CONFIG_MLX5_ESWITCH
- case TC_SETUP_CLSFLOWER:
- return mlx5e_setup_tc_cls_flower(dev, type_data);
+ case TC_SETUP_BLOCK:
+ return mlx5e_setup_tc_block(dev, type_data);
#endif
case TC_SETUP_MQPRIO:
return mlx5e_setup_tc_mqprio(dev, type_data);
@@ -3403,6 +3431,80 @@ out:
return err;
}
+int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
+{
+ struct hwtstamp_config config;
+ int err;
+
+ if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
+ return -EOPNOTSUPP;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ /* TX HW timestamp */
+ switch (config.tx_type) {
+ case HWTSTAMP_TX_OFF:
+ case HWTSTAMP_TX_ON:
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ mutex_lock(&priv->state_lock);
+ /* RX HW timestamp */
+ switch (config.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ /* Reset CQE compression to Admin default */
+ mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def);
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_NTP_ALL:
+ /* Disable CQE compression */
+ netdev_warn(priv->netdev, "Disabling cqe compression");
+ err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
+ if (err) {
+ netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
+ mutex_unlock(&priv->state_lock);
+ return err;
+ }
+ config.rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ default:
+ mutex_unlock(&priv->state_lock);
+ return -ERANGE;
+ }
+
+ memcpy(&priv->tstamp, &config, sizeof(config));
+ mutex_unlock(&priv->state_lock);
+
+ return copy_to_user(ifr->ifr_data, &config,
+ sizeof(config)) ? -EFAULT : 0;
+}
+
+int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
+{
+ struct hwtstamp_config *cfg = &priv->tstamp;
+
+ if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
+ return -EOPNOTSUPP;
+
+ return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0;
+}
+
static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
struct mlx5e_priv *priv = netdev_priv(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 45e03c427faf..0edb7065d811 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -34,6 +34,7 @@
#include <linux/mlx5/fs.h>
#include <net/switchdev.h>
#include <net/pkt_cls.h>
+#include <net/act_api.h>
#include <net/netevent.h>
#include <net/arp.h>
@@ -658,23 +659,12 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
}
static int
-mlx5e_rep_setup_tc_cls_flower(struct net_device *dev,
+mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *cls_flower)
{
- struct mlx5e_priv *priv = netdev_priv(dev);
-
- if (!is_classid_clsact_ingress(cls_flower->common.classid) ||
- cls_flower->common.chain_index)
+ if (cls_flower->common.chain_index)
return -EOPNOTSUPP;
- if (cls_flower->egress_dev) {
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-
- dev = mlx5_eswitch_get_uplink_netdev(esw);
- return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
- cls_flower);
- }
-
switch (cls_flower->command) {
case TC_CLSFLOWER_REPLACE:
return mlx5e_configure_flower(priv, cls_flower);
@@ -687,12 +677,45 @@ mlx5e_rep_setup_tc_cls_flower(struct net_device *dev,
}
}
+static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv)
+{
+ struct mlx5e_priv *priv = cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return mlx5e_rep_setup_tc_cls_flower(priv, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlx5e_rep_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, mlx5e_rep_setup_tc_cb,
+ priv, priv);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, mlx5e_rep_setup_tc_cb, priv);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
switch (type) {
- case TC_SETUP_CLSFLOWER:
- return mlx5e_rep_setup_tc_cls_flower(dev, type_data);
+ case TC_SETUP_BLOCK:
+ return mlx5e_rep_setup_tc_block(dev, type_data);
default:
return -EOPNOTSUPP;
}
@@ -986,6 +1009,7 @@ mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
{
struct mlx5e_rep_priv *rpriv;
struct net_device *netdev;
+ struct mlx5e_priv *upriv;
int err;
rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
@@ -1017,15 +1041,25 @@ mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
goto err_detach_netdev;
}
+ upriv = netdev_priv(mlx5_eswitch_get_uplink_netdev(esw));
+ err = tc_setup_cb_egdev_register(netdev, mlx5e_setup_tc_block_cb,
+ upriv);
+ if (err)
+ goto err_neigh_cleanup;
+
err = register_netdev(netdev);
if (err) {
pr_warn("Failed to register representor netdev for vport %d\n",
rep->vport);
- goto err_neigh_cleanup;
+ goto err_egdev_cleanup;
}
return 0;
+err_egdev_cleanup:
+ tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb,
+ upriv);
+
err_neigh_cleanup:
mlx5e_rep_neigh_cleanup(rpriv);
@@ -1045,9 +1079,12 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
void *ppriv = priv->ppriv;
+ struct mlx5e_priv *upriv;
unregister_netdev(rep->netdev);
-
+ upriv = netdev_priv(mlx5_eswitch_get_uplink_netdev(esw));
+ tc_setup_cb_egdev_unregister(netdev, mlx5e_setup_tc_block_cb,
+ upriv);
mlx5e_rep_neigh_cleanup(rpriv);
mlx5e_detach_netdev(priv);
mlx5e_destroy_netdev(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 15a1687483cc..6d7df4750e0f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -42,10 +42,11 @@
#include "en_rep.h"
#include "ipoib/ipoib.h"
#include "en_accel/ipsec_rxtx.h"
+#include "lib/clock.h"
-static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
+static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
{
- return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL;
+ return config->rx_filter == HWTSTAMP_FILTER_ALL;
}
static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc,
@@ -661,7 +662,6 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
struct sk_buff *skb)
{
struct net_device *netdev = rq->netdev;
- struct mlx5e_tstamp *tstamp = rq->tstamp;
int lro_num_seg;
lro_num_seg = be32_to_cpu(cqe->srqn) >> 24;
@@ -676,8 +676,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
rq->stats.lro_bytes += cqe_bcnt;
}
- if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
- mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb));
+ if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp)))
+ skb_hwtstamps(skb)->hwtstamp =
+ mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe));
skb_record_rx_queue(skb, rq->ix);
@@ -797,6 +798,7 @@ static inline int mlx5e_xdp_handle(struct mlx5e_rq *rq,
return false;
xdp.data = va + *rx_headroom;
+ xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + *len;
xdp.data_hard_start = va;
@@ -1162,12 +1164,25 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
u32 cqe_bcnt,
struct sk_buff *skb)
{
- struct net_device *netdev = rq->netdev;
- struct mlx5e_tstamp *tstamp = rq->tstamp;
+ struct net_device *netdev;
char *pseudo_header;
+ u32 qpn;
u8 *dgid;
u8 g;
+ qpn = be32_to_cpu(cqe->sop_drop_qpn) & 0xffffff;
+ netdev = mlx5i_pkey_get_netdev(rq->netdev, qpn);
+
+ /* No mapping present, cannot process SKB. This might happen if a child
+ * interface is going down while having unprocessed CQEs on parent RQ
+ */
+ if (unlikely(!netdev)) {
+ /* TODO: add drop counters support */
+ skb->dev = NULL;
+ pr_warn_once("Unable to map QPN %u to dev - dropping skb\n", qpn);
+ return;
+ }
+
g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET;
if ((!g) || dgid[0] != 0xff)
@@ -1188,8 +1203,9 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
skb->ip_summed = CHECKSUM_COMPLETE;
skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
- if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
- mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb));
+ if (unlikely(mlx5e_rx_hw_stamp(rq->tstamp)))
+ skb_hwtstamps(skb)->hwtstamp =
+ mlx5_timecounter_cyc2time(rq->clock, get_cqe_ts(cqe));
skb_record_rx_queue(skb, rq->ix);
@@ -1229,6 +1245,10 @@ void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
goto wq_free_wqe;
mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
+ if (unlikely(!skb->dev)) {
+ dev_kfree_skb_any(skb);
+ goto wq_free_wqe;
+ }
napi_gro_receive(rq->cq.napi, skb);
wq_free_wqe:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 1d6925d4369a..a7c208a1ad83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -35,6 +35,7 @@
#include "en.h"
#include "ipoib/ipoib.h"
#include "en_accel/ipsec_rxtx.h"
+#include "lib/clock.h"
#define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS
#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
@@ -452,8 +453,9 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
SKBTX_HW_TSTAMP)) {
struct skb_shared_hwtstamps hwts = {};
- mlx5e_fill_hwstamp(sq->tstamp,
- get_cqe_ts(cqe), &hwts);
+ hwts.hwtstamp =
+ mlx5_timecounter_cyc2time(sq->clock,
+ get_cqe_ts(cqe));
skb_tstamp_tx(skb, &hwts);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index fc606bfd1d6e..60771865c99c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -491,8 +491,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
break;
case MLX5_EVENT_TYPE_PPS_EVENT:
- if (dev->event)
- dev->event(dev, MLX5_DEV_EVENT_PPS, (unsigned long)eqe);
+ mlx5_pps_event(dev, eqe);
break;
case MLX5_EVENT_TYPE_FPGA_ERROR:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 36ecc2b2e187..881e2e55840c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -40,7 +40,8 @@
#include "eswitch.h"
int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
- struct mlx5_flow_table *ft, u32 underlay_qpn)
+ struct mlx5_flow_table *ft, u32 underlay_qpn,
+ bool disconnect)
{
u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0};
u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
@@ -52,7 +53,15 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
MLX5_SET(set_flow_table_root_in, in, opcode,
MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
- MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
+
+ if (disconnect) {
+ MLX5_SET(set_flow_table_root_in, in, op_mod, 1);
+ MLX5_SET(set_flow_table_root_in, in, table_id, 0);
+ } else {
+ MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+ MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
+ }
+
MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn);
if (ft->vport) {
MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index c6d7bdf255b6..71e2d0f37ad9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -71,8 +71,8 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev,
unsigned int index);
int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
- struct mlx5_flow_table *ft,
- u32 underlay_qpn);
+ struct mlx5_flow_table *ft, u32 underlay_qpn,
+ bool disconnect);
int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id);
int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 5a7bea688ec8..c7fa1389bace 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -145,10 +145,10 @@ static struct init_tree_node {
}
};
-enum fs_i_mutex_lock_class {
- FS_MUTEX_GRANDPARENT,
- FS_MUTEX_PARENT,
- FS_MUTEX_CHILD
+enum fs_i_lock_class {
+ FS_LOCK_GRANDPARENT,
+ FS_LOCK_PARENT,
+ FS_LOCK_CHILD
};
static const struct rhashtable_params rhash_fte = {
@@ -168,10 +168,16 @@ static const struct rhashtable_params rhash_fg = {
};
-static void del_rule(struct fs_node *node);
-static void del_flow_table(struct fs_node *node);
-static void del_flow_group(struct fs_node *node);
-static void del_fte(struct fs_node *node);
+static void del_hw_flow_table(struct fs_node *node);
+static void del_hw_flow_group(struct fs_node *node);
+static void del_hw_fte(struct fs_node *node);
+static void del_sw_flow_table(struct fs_node *node);
+static void del_sw_flow_group(struct fs_node *node);
+static void del_sw_fte(struct fs_node *node);
+/* Delete rule (destination) is special case that
+ * requires to lock the FTE for all the deletion process.
+ */
+static void del_sw_hw_rule(struct fs_node *node);
static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
struct mlx5_flow_destination *d2);
static struct mlx5_flow_rule *
@@ -179,20 +185,22 @@ find_flow_rule(struct fs_fte *fte,
struct mlx5_flow_destination *dest);
static void tree_init_node(struct fs_node *node,
- unsigned int refcount,
- void (*remove_func)(struct fs_node *))
+ void (*del_hw_func)(struct fs_node *),
+ void (*del_sw_func)(struct fs_node *))
{
- atomic_set(&node->refcount, refcount);
+ refcount_set(&node->refcount, 1);
INIT_LIST_HEAD(&node->list);
INIT_LIST_HEAD(&node->children);
- mutex_init(&node->lock);
- node->remove_func = remove_func;
+ init_rwsem(&node->lock);
+ node->del_hw_func = del_hw_func;
+ node->del_sw_func = del_sw_func;
+ node->active = false;
}
static void tree_add_node(struct fs_node *node, struct fs_node *parent)
{
if (parent)
- atomic_inc(&parent->refcount);
+ refcount_inc(&parent->refcount);
node->parent = parent;
/* Parent is the root */
@@ -202,58 +210,78 @@ static void tree_add_node(struct fs_node *node, struct fs_node *parent)
node->root = parent->root;
}
-static void tree_get_node(struct fs_node *node)
+static int tree_get_node(struct fs_node *node)
{
- atomic_inc(&node->refcount);
+ return refcount_inc_not_zero(&node->refcount);
}
-static void nested_lock_ref_node(struct fs_node *node,
- enum fs_i_mutex_lock_class class)
+static void nested_down_read_ref_node(struct fs_node *node,
+ enum fs_i_lock_class class)
{
if (node) {
- mutex_lock_nested(&node->lock, class);
- atomic_inc(&node->refcount);
+ down_read_nested(&node->lock, class);
+ refcount_inc(&node->refcount);
}
}
-static void lock_ref_node(struct fs_node *node)
+static void nested_down_write_ref_node(struct fs_node *node,
+ enum fs_i_lock_class class)
{
if (node) {
- mutex_lock(&node->lock);
- atomic_inc(&node->refcount);
+ down_write_nested(&node->lock, class);
+ refcount_inc(&node->refcount);
}
}
-static void unlock_ref_node(struct fs_node *node)
+static void down_write_ref_node(struct fs_node *node)
{
if (node) {
- atomic_dec(&node->refcount);
- mutex_unlock(&node->lock);
+ down_write(&node->lock);
+ refcount_inc(&node->refcount);
}
}
+static void up_read_ref_node(struct fs_node *node)
+{
+ refcount_dec(&node->refcount);
+ up_read(&node->lock);
+}
+
+static void up_write_ref_node(struct fs_node *node)
+{
+ refcount_dec(&node->refcount);
+ up_write(&node->lock);
+}
+
static void tree_put_node(struct fs_node *node)
{
struct fs_node *parent_node = node->parent;
- lock_ref_node(parent_node);
- if (atomic_dec_and_test(&node->refcount)) {
- if (parent_node)
+ if (refcount_dec_and_test(&node->refcount)) {
+ if (node->del_hw_func)
+ node->del_hw_func(node);
+ if (parent_node) {
+ /* Only root namespace doesn't have parent and we just
+ * need to free its node.
+ */
+ down_write_ref_node(parent_node);
list_del_init(&node->list);
- if (node->remove_func)
- node->remove_func(node);
- kfree(node);
+ if (node->del_sw_func)
+ node->del_sw_func(node);
+ up_write_ref_node(parent_node);
+ } else {
+ kfree(node);
+ }
node = NULL;
}
- unlock_ref_node(parent_node);
if (!node && parent_node)
tree_put_node(parent_node);
}
static int tree_remove_node(struct fs_node *node)
{
- if (atomic_read(&node->refcount) > 1) {
- atomic_dec(&node->refcount);
+ if (refcount_read(&node->refcount) > 1) {
+ refcount_dec(&node->refcount);
return -EEXIST;
}
tree_put_node(node);
@@ -362,6 +390,15 @@ static struct mlx5_flow_root_namespace *find_root(struct fs_node *node)
return container_of(ns, struct mlx5_flow_root_namespace, ns);
}
+static inline struct mlx5_flow_steering *get_steering(struct fs_node *node)
+{
+ struct mlx5_flow_root_namespace *root = find_root(node);
+
+ if (root)
+ return root->dev->priv.steering;
+ return NULL;
+}
+
static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
{
struct mlx5_flow_root_namespace *root = find_root(node);
@@ -371,26 +408,36 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
return NULL;
}
-static void del_flow_table(struct fs_node *node)
+static void del_hw_flow_table(struct fs_node *node)
{
struct mlx5_flow_table *ft;
struct mlx5_core_dev *dev;
- struct fs_prio *prio;
int err;
fs_get_obj(ft, node);
dev = get_dev(&ft->node);
- err = mlx5_cmd_destroy_flow_table(dev, ft);
- if (err)
- mlx5_core_warn(dev, "flow steering can't destroy ft\n");
- ida_destroy(&ft->fte_allocator);
+ if (node->active) {
+ err = mlx5_cmd_destroy_flow_table(dev, ft);
+ if (err)
+ mlx5_core_warn(dev, "flow steering can't destroy ft\n");
+ }
+}
+
+static void del_sw_flow_table(struct fs_node *node)
+{
+ struct mlx5_flow_table *ft;
+ struct fs_prio *prio;
+
+ fs_get_obj(ft, node);
+
rhltable_destroy(&ft->fgs_hash);
fs_get_obj(prio, ft->node.parent);
prio->num_ft--;
+ kfree(ft);
}
-static void del_rule(struct fs_node *node)
+static void del_sw_hw_rule(struct fs_node *node)
{
struct mlx5_flow_rule *rule;
struct mlx5_flow_table *ft;
@@ -406,7 +453,6 @@ static void del_rule(struct fs_node *node)
fs_get_obj(fg, fte->node.parent);
fs_get_obj(ft, fg->node.parent);
trace_mlx5_fs_del_rule(rule);
- list_del(&rule->node.list);
if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
mutex_lock(&rule->dest_attr.ft->lock);
list_del(&rule->next_ft);
@@ -434,117 +480,203 @@ out:
"%s can't del rule fg id=%d fte_index=%d\n",
__func__, fg->id, fte->index);
}
+ kfree(rule);
}
-static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg)
+static void del_hw_fte(struct fs_node *node)
{
struct mlx5_flow_table *ft;
- int ret;
+ struct mlx5_flow_group *fg;
+ struct mlx5_core_dev *dev;
+ struct fs_fte *fte;
+ int err;
- ret = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, rhash_fte);
- WARN_ON(ret);
- fte->status = 0;
+ fs_get_obj(fte, node);
+ fs_get_obj(fg, fte->node.parent);
fs_get_obj(ft, fg->node.parent);
- ida_simple_remove(&ft->fte_allocator, fte->index);
+
+ trace_mlx5_fs_del_fte(fte);
+ dev = get_dev(&ft->node);
+ if (node->active) {
+ err = mlx5_cmd_delete_fte(dev, ft,
+ fte->index);
+ if (err)
+ mlx5_core_warn(dev,
+ "flow steering can't delete fte in index %d of flow group id %d\n",
+ fte->index, fg->id);
+ }
}
-static void del_fte(struct fs_node *node)
+static void del_sw_fte(struct fs_node *node)
{
- struct mlx5_flow_table *ft;
+ struct mlx5_flow_steering *steering = get_steering(node);
struct mlx5_flow_group *fg;
- struct mlx5_core_dev *dev;
struct fs_fte *fte;
int err;
fs_get_obj(fte, node);
fs_get_obj(fg, fte->node.parent);
- fs_get_obj(ft, fg->node.parent);
- trace_mlx5_fs_del_fte(fte);
- dev = get_dev(&ft->node);
- err = mlx5_cmd_delete_fte(dev, ft,
- fte->index);
- if (err)
- mlx5_core_warn(dev,
- "flow steering can't delete fte in index %d of flow group id %d\n",
- fte->index, fg->id);
-
- destroy_fte(fte, fg);
+ err = rhashtable_remove_fast(&fg->ftes_hash,
+ &fte->hash,
+ rhash_fte);
+ WARN_ON(err);
+ ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index);
+ kmem_cache_free(steering->ftes_cache, fte);
}
-static void del_flow_group(struct fs_node *node)
+static void del_hw_flow_group(struct fs_node *node)
{
struct mlx5_flow_group *fg;
struct mlx5_flow_table *ft;
struct mlx5_core_dev *dev;
- int err;
fs_get_obj(fg, node);
fs_get_obj(ft, fg->node.parent);
dev = get_dev(&ft->node);
trace_mlx5_fs_del_fg(fg);
- if (ft->autogroup.active)
- ft->autogroup.num_groups--;
+ if (fg->node.active && mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
+ mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
+ fg->id, ft->id);
+}
+
+static void del_sw_flow_group(struct fs_node *node)
+{
+ struct mlx5_flow_steering *steering = get_steering(node);
+ struct mlx5_flow_group *fg;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ fs_get_obj(fg, node);
+ fs_get_obj(ft, fg->node.parent);
rhashtable_destroy(&fg->ftes_hash);
+ ida_destroy(&fg->fte_allocator);
+ if (ft->autogroup.active)
+ ft->autogroup.num_groups--;
err = rhltable_remove(&ft->fgs_hash,
&fg->hash,
rhash_fg);
WARN_ON(err);
- if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id))
- mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n",
- fg->id, ft->id);
+ kmem_cache_free(steering->fgs_cache, fg);
+}
+
+static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte)
+{
+ int index;
+ int ret;
+
+ index = ida_simple_get(&fg->fte_allocator, 0, fg->max_ftes, GFP_KERNEL);
+ if (index < 0)
+ return index;
+
+ fte->index = index + fg->start_index;
+ ret = rhashtable_insert_fast(&fg->ftes_hash,
+ &fte->hash,
+ rhash_fte);
+ if (ret)
+ goto err_ida_remove;
+
+ tree_add_node(&fte->node, &fg->node);
+ list_add_tail(&fte->node.list, &fg->node.children);
+ return 0;
+
+err_ida_remove:
+ ida_simple_remove(&fg->fte_allocator, index);
+ return ret;
}
-static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act,
+static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft,
u32 *match_value,
- unsigned int index)
+ struct mlx5_flow_act *flow_act)
{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
struct fs_fte *fte;
- fte = kzalloc(sizeof(*fte), GFP_KERNEL);
+ fte = kmem_cache_zalloc(steering->ftes_cache, GFP_KERNEL);
if (!fte)
return ERR_PTR(-ENOMEM);
memcpy(fte->val, match_value, sizeof(fte->val));
fte->node.type = FS_TYPE_FLOW_ENTRY;
fte->flow_tag = flow_act->flow_tag;
- fte->index = index;
fte->action = flow_act->action;
fte->encap_id = flow_act->encap_id;
fte->modify_id = flow_act->modify_id;
+ tree_init_node(&fte->node, del_hw_fte, del_sw_fte);
+
return fte;
}
-static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in)
+static void dealloc_flow_group(struct mlx5_flow_steering *steering,
+ struct mlx5_flow_group *fg)
+{
+ rhashtable_destroy(&fg->ftes_hash);
+ kmem_cache_free(steering->fgs_cache, fg);
+}
+
+static struct mlx5_flow_group *alloc_flow_group(struct mlx5_flow_steering *steering,
+ u8 match_criteria_enable,
+ void *match_criteria,
+ int start_index,
+ int end_index)
{
struct mlx5_flow_group *fg;
- void *match_criteria = MLX5_ADDR_OF(create_flow_group_in,
- create_fg_in, match_criteria);
- u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
- create_fg_in,
- match_criteria_enable);
int ret;
- fg = kzalloc(sizeof(*fg), GFP_KERNEL);
+ fg = kmem_cache_zalloc(steering->fgs_cache, GFP_KERNEL);
if (!fg)
return ERR_PTR(-ENOMEM);
ret = rhashtable_init(&fg->ftes_hash, &rhash_fte);
if (ret) {
- kfree(fg);
+ kmem_cache_free(steering->fgs_cache, fg);
return ERR_PTR(ret);
- }
+}
+ ida_init(&fg->fte_allocator);
fg->mask.match_criteria_enable = match_criteria_enable;
memcpy(&fg->mask.match_criteria, match_criteria,
sizeof(fg->mask.match_criteria));
fg->node.type = FS_TYPE_FLOW_GROUP;
- fg->start_index = MLX5_GET(create_flow_group_in, create_fg_in,
- start_flow_index);
- fg->max_ftes = MLX5_GET(create_flow_group_in, create_fg_in,
- end_flow_index) - fg->start_index + 1;
+ fg->start_index = start_index;
+ fg->max_ftes = end_index - start_index + 1;
+
+ return fg;
+}
+
+static struct mlx5_flow_group *alloc_insert_flow_group(struct mlx5_flow_table *ft,
+ u8 match_criteria_enable,
+ void *match_criteria,
+ int start_index,
+ int end_index,
+ struct list_head *prev)
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct mlx5_flow_group *fg;
+ int ret;
+
+ fg = alloc_flow_group(steering, match_criteria_enable, match_criteria,
+ start_index, end_index);
+ if (IS_ERR(fg))
+ return fg;
+
+ /* initialize refcnt, add to parent list */
+ ret = rhltable_insert(&ft->fgs_hash,
+ &fg->hash,
+ rhash_fg);
+ if (ret) {
+ dealloc_flow_group(steering, fg);
+ return ERR_PTR(ret);
+ }
+
+ tree_init_node(&fg->node, del_hw_flow_group, del_sw_flow_group);
+ tree_add_node(&fg->node, &ft->node);
+ /* Add node to group list */
+ list_add(&fg->node.list, prev);
+ atomic_inc(&ft->node.version);
+
return fg;
}
@@ -575,7 +707,6 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft
ft->flags = flags;
INIT_LIST_HEAD(&ft->fwd_rules);
mutex_init(&ft->lock);
- ida_init(&ft->fte_allocator);
return ft;
}
@@ -693,8 +824,10 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
*prio)
{
struct mlx5_flow_root_namespace *root = find_root(&prio->node);
+ struct mlx5_ft_underlay_qp *uqp;
int min_level = INT_MAX;
int err;
+ u32 qpn;
if (root->root_ft)
min_level = root->root_ft->level;
@@ -702,10 +835,24 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
if (ft->level >= min_level)
return 0;
- err = mlx5_cmd_update_root_ft(root->dev, ft, root->underlay_qpn);
+ if (list_empty(&root->underlay_qpns)) {
+ /* Don't set any QPN (zero) in case QPN list is empty */
+ qpn = 0;
+ err = mlx5_cmd_update_root_ft(root->dev, ft, qpn, false);
+ } else {
+ list_for_each_entry(uqp, &root->underlay_qpns, list) {
+ qpn = uqp->qpn;
+ err = mlx5_cmd_update_root_ft(root->dev, ft, qpn,
+ false);
+ if (err)
+ break;
+ }
+ }
+
if (err)
- mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
- ft->id);
+ mlx5_core_warn(root->dev,
+ "Update root flow table of id(%u) qpn(%d) failed\n",
+ ft->id, qpn);
else
root->root_ft = ft;
@@ -724,7 +871,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
fs_get_obj(fte, rule->node.parent);
if (!(fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
return -EINVAL;
- lock_ref_node(&fte->node);
+ down_write_ref_node(&fte->node);
fs_get_obj(fg, fte->node.parent);
fs_get_obj(ft, fg->node.parent);
@@ -733,7 +880,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
ft, fg->id,
modify_mask,
fte);
- unlock_ref_node(&fte->node);
+ up_write_ref_node(&fte->node);
return err;
}
@@ -870,7 +1017,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
goto unlock_root;
}
- tree_init_node(&ft->node, 1, del_flow_table);
+ tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
next_ft = find_next_chained_ft(fs_prio);
err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type,
@@ -882,17 +1029,17 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
err = connect_flow_table(root->dev, ft, fs_prio);
if (err)
goto destroy_ft;
- lock_ref_node(&fs_prio->node);
+ ft->node.active = true;
+ down_write_ref_node(&fs_prio->node);
tree_add_node(&ft->node, &fs_prio->node);
list_add_flow_table(ft, fs_prio);
fs_prio->num_ft++;
- unlock_ref_node(&fs_prio->node);
+ up_write_ref_node(&fs_prio->node);
mutex_unlock(&root->chain_lock);
return ft;
destroy_ft:
mlx5_cmd_destroy_flow_table(root->dev, ft);
free_ft:
- ida_destroy(&ft->fte_allocator);
kfree(ft);
unlock_root:
mutex_unlock(&root->chain_lock);
@@ -960,54 +1107,6 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
}
EXPORT_SYMBOL(mlx5_create_auto_grouped_flow_table);
-/* Flow table should be locked */
-static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table *ft,
- u32 *fg_in,
- struct list_head
- *prev_fg,
- bool is_auto_fg)
-{
- struct mlx5_flow_group *fg;
- struct mlx5_core_dev *dev = get_dev(&ft->node);
- int err;
-
- if (!dev)
- return ERR_PTR(-ENODEV);
-
- fg = alloc_flow_group(fg_in);
- if (IS_ERR(fg))
- return fg;
-
- err = rhltable_insert(&ft->fgs_hash, &fg->hash, rhash_fg);
- if (err)
- goto err_free_fg;
-
- err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
- if (err)
- goto err_remove_fg;
-
- if (ft->autogroup.active)
- ft->autogroup.num_groups++;
- /* Add node to tree */
- tree_init_node(&fg->node, !is_auto_fg, del_flow_group);
- tree_add_node(&fg->node, &ft->node);
- /* Add node to group list */
- list_add(&fg->node.list, prev_fg);
-
- trace_mlx5_fs_add_fg(fg);
- return fg;
-
-err_remove_fg:
- WARN_ON(rhltable_remove(&ft->fgs_hash,
- &fg->hash,
- rhash_fg));
-err_free_fg:
- rhashtable_destroy(&fg->ftes_hash);
- kfree(fg);
-
- return ERR_PTR(err);
-}
-
struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
u32 *fg_in)
{
@@ -1016,7 +1115,13 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
u8 match_criteria_enable = MLX5_GET(create_flow_group_in,
fg_in,
match_criteria_enable);
+ int start_index = MLX5_GET(create_flow_group_in, fg_in,
+ start_flow_index);
+ int end_index = MLX5_GET(create_flow_group_in, fg_in,
+ end_flow_index);
+ struct mlx5_core_dev *dev = get_dev(&ft->node);
struct mlx5_flow_group *fg;
+ int err;
if (!check_valid_mask(match_criteria_enable, match_criteria))
return ERR_PTR(-EINVAL);
@@ -1024,9 +1129,21 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft,
if (ft->autogroup.active)
return ERR_PTR(-EPERM);
- lock_ref_node(&ft->node);
- fg = create_flow_group_common(ft, fg_in, ft->node.children.prev, false);
- unlock_ref_node(&ft->node);
+ down_write_ref_node(&ft->node);
+ fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria,
+ start_index, end_index,
+ ft->node.children.prev);
+ up_write_ref_node(&ft->node);
+ if (IS_ERR(fg))
+ return fg;
+
+ err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id);
+ if (err) {
+ tree_put_node(&fg->node);
+ return ERR_PTR(err);
+ }
+ trace_mlx5_fs_add_fg(fg);
+ fg->node.active = true;
return fg;
}
@@ -1067,7 +1184,7 @@ static void destroy_flow_handle(struct fs_fte *fte,
int i)
{
for (; --i >= 0;) {
- if (atomic_dec_and_test(&handle->rule[i]->node.refcount)) {
+ if (refcount_dec_and_test(&handle->rule[i]->node.refcount)) {
fte->dests_size--;
list_del(&handle->rule[i]->node.list);
kfree(handle->rule[i]);
@@ -1098,7 +1215,7 @@ create_flow_handle(struct fs_fte *fte,
if (dest) {
rule = find_flow_rule(fte, dest + i);
if (rule) {
- atomic_inc(&rule->node.refcount);
+ refcount_inc(&rule->node.refcount);
goto rule_found;
}
}
@@ -1111,7 +1228,7 @@ create_flow_handle(struct fs_fte *fte,
/* Add dest to dests list- we need flow tables to be in the
* end of the list for forward to next prio rules.
*/
- tree_init_node(&rule->node, 1, del_rule);
+ tree_init_node(&rule->node, NULL, del_sw_hw_rule);
if (dest &&
dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
list_add(&rule->node.list, &fte->node.children);
@@ -1167,7 +1284,9 @@ add_rule_fte(struct fs_fte *fte,
if (err)
goto free_handle;
+ fte->node.active = true;
fte->status |= FS_FTE_STATUS_EXISTING;
+ atomic_inc(&fte->node.version);
out:
return handle;
@@ -1177,59 +1296,17 @@ free_handle:
return ERR_PTR(err);
}
-static struct fs_fte *create_fte(struct mlx5_flow_group *fg,
- u32 *match_value,
- struct mlx5_flow_act *flow_act)
-{
- struct mlx5_flow_table *ft;
- struct fs_fte *fte;
- int index;
- int ret;
-
- fs_get_obj(ft, fg->node.parent);
- index = ida_simple_get(&ft->fte_allocator, fg->start_index,
- fg->start_index + fg->max_ftes,
- GFP_KERNEL);
- if (index < 0)
- return ERR_PTR(index);
-
- fte = alloc_fte(flow_act, match_value, index);
- if (IS_ERR(fte)) {
- ret = PTR_ERR(fte);
- goto err_alloc;
- }
- ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte);
- if (ret)
- goto err_hash;
-
- return fte;
-
-err_hash:
- kfree(fte);
-err_alloc:
- ida_simple_remove(&ft->fte_allocator, index);
- return ERR_PTR(ret);
-}
-
-static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
- u8 match_criteria_enable,
- u32 *match_criteria)
+static struct mlx5_flow_group *alloc_auto_flow_group(struct mlx5_flow_table *ft,
+ struct mlx5_flow_spec *spec)
{
- int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct list_head *prev = &ft->node.children;
- unsigned int candidate_index = 0;
struct mlx5_flow_group *fg;
- void *match_criteria_addr;
+ unsigned int candidate_index = 0;
unsigned int group_size = 0;
- u32 *in;
if (!ft->autogroup.active)
return ERR_PTR(-ENOENT);
- in = kvzalloc(inlen, GFP_KERNEL);
- if (!in)
- return ERR_PTR(-ENOMEM);
-
if (ft->autogroup.num_groups < ft->autogroup.required_groups)
/* We save place for flow groups in addition to max types */
group_size = ft->max_fte / (ft->autogroup.required_groups + 1);
@@ -1247,25 +1324,55 @@ static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft,
prev = &fg->node.list;
}
- if (candidate_index + group_size > ft->max_fte) {
- fg = ERR_PTR(-ENOSPC);
+ if (candidate_index + group_size > ft->max_fte)
+ return ERR_PTR(-ENOSPC);
+
+ fg = alloc_insert_flow_group(ft,
+ spec->match_criteria_enable,
+ spec->match_criteria,
+ candidate_index,
+ candidate_index + group_size - 1,
+ prev);
+ if (IS_ERR(fg))
goto out;
- }
+
+ ft->autogroup.num_groups++;
+
+out:
+ return fg;
+}
+
+static int create_auto_flow_group(struct mlx5_flow_table *ft,
+ struct mlx5_flow_group *fg)
+{
+ struct mlx5_core_dev *dev = get_dev(&ft->node);
+ int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+ void *match_criteria_addr;
+ int err;
+ u32 *in;
+
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
MLX5_SET(create_flow_group_in, in, match_criteria_enable,
- match_criteria_enable);
- MLX5_SET(create_flow_group_in, in, start_flow_index, candidate_index);
- MLX5_SET(create_flow_group_in, in, end_flow_index, candidate_index +
- group_size - 1);
+ fg->mask.match_criteria_enable);
+ MLX5_SET(create_flow_group_in, in, start_flow_index, fg->start_index);
+ MLX5_SET(create_flow_group_in, in, end_flow_index, fg->start_index +
+ fg->max_ftes - 1);
match_criteria_addr = MLX5_ADDR_OF(create_flow_group_in,
in, match_criteria);
- memcpy(match_criteria_addr, match_criteria,
- MLX5_ST_SZ_BYTES(fte_match_param));
+ memcpy(match_criteria_addr, fg->mask.match_criteria,
+ sizeof(fg->mask.match_criteria));
+
+ err = mlx5_cmd_create_flow_group(dev, ft, in, &fg->id);
+ if (!err) {
+ fg->node.active = true;
+ trace_mlx5_fs_add_fg(fg);
+ }
- fg = create_flow_group_common(ft, in, prev, true);
-out:
kvfree(in);
- return fg;
+ return err;
}
static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
@@ -1340,60 +1447,30 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
struct fs_fte *fte)
{
struct mlx5_flow_handle *handle;
- struct mlx5_flow_table *ft;
+ int old_action;
int i;
+ int ret;
- if (fte) {
- int old_action;
- int ret;
-
- nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
- ret = check_conflicting_ftes(fte, flow_act);
- if (ret) {
- handle = ERR_PTR(ret);
- goto unlock_fte;
- }
-
- old_action = fte->action;
- fte->action |= flow_act->action;
- handle = add_rule_fte(fte, fg, dest, dest_num,
- old_action != flow_act->action);
- if (IS_ERR(handle)) {
- fte->action = old_action;
- goto unlock_fte;
- } else {
- trace_mlx5_fs_set_fte(fte, false);
- goto add_rules;
- }
- }
- fs_get_obj(ft, fg->node.parent);
+ ret = check_conflicting_ftes(fte, flow_act);
+ if (ret)
+ return ERR_PTR(ret);
- fte = create_fte(fg, match_value, flow_act);
- if (IS_ERR(fte))
- return (void *)fte;
- tree_init_node(&fte->node, 0, del_fte);
- nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
- handle = add_rule_fte(fte, fg, dest, dest_num, false);
+ old_action = fte->action;
+ fte->action |= flow_act->action;
+ handle = add_rule_fte(fte, fg, dest, dest_num,
+ old_action != flow_act->action);
if (IS_ERR(handle)) {
- unlock_ref_node(&fte->node);
- destroy_fte(fte, fg);
- kfree(fte);
+ fte->action = old_action;
return handle;
}
+ trace_mlx5_fs_set_fte(fte, false);
- tree_add_node(&fte->node, &fg->node);
- /* fte list isn't sorted */
- list_add_tail(&fte->node.list, &fg->node.children);
- trace_mlx5_fs_set_fte(fte, true);
-add_rules:
for (i = 0; i < handle->num_rules; i++) {
- if (atomic_read(&handle->rule[i]->node.refcount) == 1) {
+ if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
tree_add_node(&handle->rule[i]->node, &fte->node);
trace_mlx5_fs_add_rule(handle->rule[i]);
}
}
-unlock_fte:
- unlock_ref_node(&fte->node);
return handle;
}
@@ -1441,93 +1518,197 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
return true;
}
-static struct mlx5_flow_handle *
-try_add_to_existing_fg(struct mlx5_flow_table *ft,
- struct mlx5_flow_spec *spec,
- struct mlx5_flow_act *flow_act,
- struct mlx5_flow_destination *dest,
- int dest_num)
-{
+struct match_list {
+ struct list_head list;
struct mlx5_flow_group *g;
- struct mlx5_flow_handle *rule = ERR_PTR(-ENOENT);
+};
+
+struct match_list_head {
+ struct list_head list;
+ struct match_list first;
+};
+
+static void free_match_list(struct match_list_head *head)
+{
+ if (!list_empty(&head->list)) {
+ struct match_list *iter, *match_tmp;
+
+ list_del(&head->first.list);
+ tree_put_node(&head->first.g->node);
+ list_for_each_entry_safe(iter, match_tmp, &head->list,
+ list) {
+ tree_put_node(&iter->g->node);
+ list_del(&iter->list);
+ kfree(iter);
+ }
+ }
+}
+
+static int build_match_list(struct match_list_head *match_head,
+ struct mlx5_flow_table *ft,
+ struct mlx5_flow_spec *spec)
+{
struct rhlist_head *tmp, *list;
- struct match_list {
- struct list_head list;
- struct mlx5_flow_group *g;
- } match_list, *iter;
- LIST_HEAD(match_head);
+ struct mlx5_flow_group *g;
+ int err = 0;
rcu_read_lock();
+ INIT_LIST_HEAD(&match_head->list);
/* Collect all fgs which has a matching match_criteria */
list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg);
+ /* RCU is atomic, we can't execute FW commands here */
rhl_for_each_entry_rcu(g, tmp, list, hash) {
struct match_list *curr_match;
- if (likely(list_empty(&match_head))) {
- match_list.g = g;
- list_add_tail(&match_list.list, &match_head);
+ if (likely(list_empty(&match_head->list))) {
+ if (!tree_get_node(&g->node))
+ continue;
+ match_head->first.g = g;
+ list_add_tail(&match_head->first.list,
+ &match_head->list);
continue;
}
- curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
+ curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC);
if (!curr_match) {
- rcu_read_unlock();
- rule = ERR_PTR(-ENOMEM);
- goto free_list;
+ free_match_list(match_head);
+ err = -ENOMEM;
+ goto out;
+ }
+ if (!tree_get_node(&g->node)) {
+ kfree(curr_match);
+ continue;
}
curr_match->g = g;
- list_add_tail(&curr_match->list, &match_head);
+ list_add_tail(&curr_match->list, &match_head->list);
}
+out:
rcu_read_unlock();
+ return err;
+}
+
+static u64 matched_fgs_get_version(struct list_head *match_head)
+{
+ struct match_list *iter;
+ u64 version = 0;
+
+ list_for_each_entry(iter, match_head, list)
+ version += (u64)atomic_read(&iter->g->node.version);
+ return version;
+}
+static struct mlx5_flow_handle *
+try_add_to_existing_fg(struct mlx5_flow_table *ft,
+ struct list_head *match_head,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_act *flow_act,
+ struct mlx5_flow_destination *dest,
+ int dest_num,
+ int ft_version)
+{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
+ struct mlx5_flow_group *g;
+ struct mlx5_flow_handle *rule;
+ struct match_list *iter;
+ bool take_write = false;
+ struct fs_fte *fte;
+ u64 version;
+ int err;
+
+ fte = alloc_fte(ft, spec->match_value, flow_act);
+ if (IS_ERR(fte))
+ return ERR_PTR(-ENOMEM);
+
+ list_for_each_entry(iter, match_head, list) {
+ nested_down_read_ref_node(&iter->g->node, FS_LOCK_PARENT);
+ ida_pre_get(&iter->g->fte_allocator, GFP_KERNEL);
+ }
+
+search_again_locked:
+ version = matched_fgs_get_version(match_head);
/* Try to find a fg that already contains a matching fte */
- list_for_each_entry(iter, &match_head, list) {
- struct fs_fte *fte;
+ list_for_each_entry(iter, match_head, list) {
+ struct fs_fte *fte_tmp;
g = iter->g;
- nested_lock_ref_node(&g->node, FS_MUTEX_PARENT);
- fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value,
- rhash_fte);
- if (fte) {
- rule = add_rule_fg(g, spec->match_value,
- flow_act, dest, dest_num, fte);
- unlock_ref_node(&g->node);
- goto free_list;
+ fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value,
+ rhash_fte);
+ if (!fte_tmp || !tree_get_node(&fte_tmp->node))
+ continue;
+
+ nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
+ if (!take_write) {
+ list_for_each_entry(iter, match_head, list)
+ up_read_ref_node(&iter->g->node);
+ } else {
+ list_for_each_entry(iter, match_head, list)
+ up_write_ref_node(&iter->g->node);
}
- unlock_ref_node(&g->node);
+
+ rule = add_rule_fg(g, spec->match_value,
+ flow_act, dest, dest_num, fte_tmp);
+ up_write_ref_node(&fte_tmp->node);
+ tree_put_node(&fte_tmp->node);
+ kmem_cache_free(steering->ftes_cache, fte);
+ return rule;
}
/* No group with matching fte found. Try to add a new fte to any
* matching fg.
*/
- list_for_each_entry(iter, &match_head, list) {
- g = iter->g;
- nested_lock_ref_node(&g->node, FS_MUTEX_PARENT);
- rule = add_rule_fg(g, spec->match_value,
- flow_act, dest, dest_num, NULL);
- if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) {
- unlock_ref_node(&g->node);
- goto free_list;
- }
- unlock_ref_node(&g->node);
+ if (!take_write) {
+ list_for_each_entry(iter, match_head, list)
+ up_read_ref_node(&iter->g->node);
+ list_for_each_entry(iter, match_head, list)
+ nested_down_write_ref_node(&iter->g->node,
+ FS_LOCK_PARENT);
+ take_write = true;
}
-free_list:
- if (!list_empty(&match_head)) {
- struct match_list *match_tmp;
+ /* Check the ft version, for case that new flow group
+ * was added while the fgs weren't locked
+ */
+ if (atomic_read(&ft->node.version) != ft_version) {
+ rule = ERR_PTR(-EAGAIN);
+ goto out;
+ }
- /* The most common case is having one FG. Since we want to
- * optimize this case, we save the first on the stack.
- * Therefore, no need to free it.
- */
- list_del(&list_first_entry(&match_head, typeof(*iter), list)->list);
- list_for_each_entry_safe(iter, match_tmp, &match_head, list) {
- list_del(&iter->list);
- kfree(iter);
+ /* Check the fgs version, for case the new FTE with the
+ * same values was added while the fgs weren't locked
+ */
+ if (version != matched_fgs_get_version(match_head))
+ goto search_again_locked;
+
+ list_for_each_entry(iter, match_head, list) {
+ g = iter->g;
+
+ if (!g->node.active)
+ continue;
+ err = insert_fte(g, fte);
+ if (err) {
+ if (err == -ENOSPC)
+ continue;
+ list_for_each_entry(iter, match_head, list)
+ up_write_ref_node(&iter->g->node);
+ kmem_cache_free(steering->ftes_cache, fte);
+ return ERR_PTR(err);
}
- }
+ nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
+ list_for_each_entry(iter, match_head, list)
+ up_write_ref_node(&iter->g->node);
+ rule = add_rule_fg(g, spec->match_value,
+ flow_act, dest, dest_num, fte);
+ up_write_ref_node(&fte->node);
+ tree_put_node(&fte->node);
+ return rule;
+ }
+ rule = ERR_PTR(-ENOENT);
+out:
+ list_for_each_entry(iter, match_head, list)
+ up_write_ref_node(&iter->g->node);
+ kmem_cache_free(steering->ftes_cache, fte);
return rule;
}
@@ -1539,8 +1720,14 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
int dest_num)
{
+ struct mlx5_flow_steering *steering = get_steering(&ft->node);
struct mlx5_flow_group *g;
struct mlx5_flow_handle *rule;
+ struct match_list_head match_head;
+ bool take_write = false;
+ struct fs_fte *fte;
+ int version;
+ int err;
int i;
if (!check_valid_spec(spec))
@@ -1550,33 +1737,73 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft,
if (!dest_is_valid(&dest[i], flow_act->action, ft))
return ERR_PTR(-EINVAL);
}
+ nested_down_read_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
+search_again_locked:
+ version = atomic_read(&ft->node.version);
+
+ /* Collect all fgs which has a matching match_criteria */
+ err = build_match_list(&match_head, ft, spec);
+ if (err)
+ return ERR_PTR(err);
- nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
- rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num);
- if (!IS_ERR(rule))
- goto unlock;
+ if (!take_write)
+ up_read_ref_node(&ft->node);
- g = create_autogroup(ft, spec->match_criteria_enable,
- spec->match_criteria);
+ rule = try_add_to_existing_fg(ft, &match_head.list, spec, flow_act, dest,
+ dest_num, version);
+ free_match_list(&match_head);
+ if (!IS_ERR(rule) ||
+ (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN))
+ return rule;
+
+ if (!take_write) {
+ nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT);
+ take_write = true;
+ }
+
+ if (PTR_ERR(rule) == -EAGAIN ||
+ version != atomic_read(&ft->node.version))
+ goto search_again_locked;
+
+ g = alloc_auto_flow_group(ft, spec);
if (IS_ERR(g)) {
rule = (void *)g;
- goto unlock;
+ up_write_ref_node(&ft->node);
+ return rule;
}
- rule = add_rule_fg(g, spec->match_value, flow_act, dest,
- dest_num, NULL);
- if (IS_ERR(rule)) {
- /* Remove assumes refcount > 0 and autogroup creates a group
- * with a refcount = 0.
- */
- unlock_ref_node(&ft->node);
- tree_get_node(&g->node);
- tree_remove_node(&g->node);
- return rule;
+ nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+ up_write_ref_node(&ft->node);
+
+ err = create_auto_flow_group(ft, g);
+ if (err)
+ goto err_release_fg;
+
+ fte = alloc_fte(ft, spec->match_value, flow_act);
+ if (IS_ERR(fte)) {
+ err = PTR_ERR(fte);
+ goto err_release_fg;
}
-unlock:
- unlock_ref_node(&ft->node);
+
+ err = insert_fte(g, fte);
+ if (err) {
+ kmem_cache_free(steering->ftes_cache, fte);
+ goto err_release_fg;
+ }
+
+ nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD);
+ up_write_ref_node(&g->node);
+ rule = add_rule_fg(g, spec->match_value, flow_act, dest,
+ dest_num, fte);
+ up_write_ref_node(&fte->node);
+ tree_put_node(&fte->node);
+ tree_put_node(&g->node);
return rule;
+
+err_release_fg:
+ up_write_ref_node(&g->node);
+ tree_put_node(&g->node);
+ return ERR_PTR(err);
}
static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
@@ -1661,23 +1888,43 @@ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
static int update_root_ft_destroy(struct mlx5_flow_table *ft)
{
struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+ struct mlx5_ft_underlay_qp *uqp;
struct mlx5_flow_table *new_root_ft = NULL;
+ int err = 0;
+ u32 qpn;
if (root->root_ft != ft)
return 0;
new_root_ft = find_next_ft(ft);
- if (new_root_ft) {
- int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft,
- root->underlay_qpn);
- if (err) {
- mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n",
- ft->id);
- return err;
+ if (!new_root_ft) {
+ root->root_ft = NULL;
+ return 0;
+ }
+
+ if (list_empty(&root->underlay_qpns)) {
+ /* Don't set any QPN (zero) in case QPN list is empty */
+ qpn = 0;
+ err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, qpn,
+ false);
+ } else {
+ list_for_each_entry(uqp, &root->underlay_qpns, list) {
+ qpn = uqp->qpn;
+ err = mlx5_cmd_update_root_ft(root->dev, new_root_ft,
+ qpn, false);
+ if (err)
+ break;
}
}
- root->root_ft = new_root_ft;
+
+ if (err)
+ mlx5_core_warn(root->dev,
+ "Update root flow table of id(%u) qpn(%d) failed\n",
+ ft->id, qpn);
+ else
+ root->root_ft = new_root_ft;
+
return 0;
}
@@ -1817,7 +2064,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
return ERR_PTR(-ENOMEM);
fs_prio->node.type = FS_TYPE_PRIO;
- tree_init_node(&fs_prio->node, 1, NULL);
+ tree_init_node(&fs_prio->node, NULL, NULL);
tree_add_node(&fs_prio->node, &ns->node);
fs_prio->num_levels = num_levels;
fs_prio->prio = prio;
@@ -1843,7 +2090,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
return ERR_PTR(-ENOMEM);
fs_init_namespace(ns);
- tree_init_node(&ns->node, 1, NULL);
+ tree_init_node(&ns->node, NULL, NULL);
tree_add_node(&ns->node, &prio->node);
list_add_tail(&ns->node.list, &prio->node.children);
@@ -1965,10 +2212,12 @@ static struct mlx5_flow_root_namespace *create_root_ns(struct mlx5_flow_steering
root_ns->dev = steering->dev;
root_ns->table_type = table_type;
+ INIT_LIST_HEAD(&root_ns->underlay_qpns);
+
ns = &root_ns->ns;
fs_init_namespace(ns);
mutex_init(&root_ns->chain_lock);
- tree_init_node(&ns->node, 1, NULL);
+ tree_init_node(&ns->node, NULL, NULL);
tree_add_node(&ns->node, NULL);
return root_ns;
@@ -2066,8 +2315,10 @@ static void clean_tree(struct fs_node *node)
struct fs_node *iter;
struct fs_node *temp;
+ tree_get_node(node);
list_for_each_entry_safe(iter, temp, &node->children, list)
clean_tree(iter);
+ tree_put_node(node);
tree_remove_node(node);
}
}
@@ -2091,6 +2342,8 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
cleanup_root_ns(steering->sniffer_rx_root_ns);
cleanup_root_ns(steering->sniffer_tx_root_ns);
mlx5_cleanup_fc_stats(dev);
+ kmem_cache_destroy(steering->ftes_cache);
+ kmem_cache_destroy(steering->fgs_cache);
kfree(steering);
}
@@ -2196,6 +2449,16 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
steering->dev = dev;
dev->priv.steering = steering;
+ steering->fgs_cache = kmem_cache_create("mlx5_fs_fgs",
+ sizeof(struct mlx5_flow_group), 0,
+ 0, NULL);
+ steering->ftes_cache = kmem_cache_create("mlx5_fs_ftes", sizeof(struct fs_fte), 0,
+ 0, NULL);
+ if (!steering->ftes_cache || !steering->fgs_cache) {
+ err = -ENOMEM;
+ goto err;
+ }
+
if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) &&
(MLX5_CAP_GEN(dev, nic_flow_table))) ||
((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
@@ -2245,17 +2508,76 @@ err:
int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
{
struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+ struct mlx5_ft_underlay_qp *new_uqp;
+ int err = 0;
+
+ new_uqp = kzalloc(sizeof(*new_uqp), GFP_KERNEL);
+ if (!new_uqp)
+ return -ENOMEM;
+
+ mutex_lock(&root->chain_lock);
+
+ if (!root->root_ft) {
+ err = -EINVAL;
+ goto update_ft_fail;
+ }
+
+ err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, false);
+ if (err) {
+ mlx5_core_warn(dev, "Failed adding underlay QPN (%u) to root FT err(%d)\n",
+ underlay_qpn, err);
+ goto update_ft_fail;
+ }
+
+ new_uqp->qpn = underlay_qpn;
+ list_add_tail(&new_uqp->list, &root->underlay_qpns);
+
+ mutex_unlock(&root->chain_lock);
- root->underlay_qpn = underlay_qpn;
return 0;
+
+update_ft_fail:
+ mutex_unlock(&root->chain_lock);
+ kfree(new_uqp);
+ return err;
}
EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn);
int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn)
{
struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns;
+ struct mlx5_ft_underlay_qp *uqp;
+ bool found = false;
+ int err = 0;
+
+ mutex_lock(&root->chain_lock);
+ list_for_each_entry(uqp, &root->underlay_qpns, list) {
+ if (uqp->qpn == underlay_qpn) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ mlx5_core_warn(dev, "Failed finding underlay qp (%u) in qpn list\n",
+ underlay_qpn);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = mlx5_cmd_update_root_ft(dev, root->root_ft, underlay_qpn, true);
+ if (err)
+ mlx5_core_warn(dev, "Failed removing underlay QPN (%u) from root FT err(%d)\n",
+ underlay_qpn, err);
+
+ list_del(&uqp->list);
+ mutex_unlock(&root->chain_lock);
+ kfree(uqp);
- root->underlay_qpn = 0;
return 0;
+
+out:
+ mutex_unlock(&root->chain_lock);
+ return err;
}
EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 48dd78975062..397d24a621a4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -33,6 +33,7 @@
#ifndef _MLX5_FS_CORE_
#define _MLX5_FS_CORE_
+#include <linux/refcount.h>
#include <linux/mlx5/fs.h>
#include <linux/rhashtable.h>
@@ -66,6 +67,8 @@ enum fs_fte_status {
struct mlx5_flow_steering {
struct mlx5_core_dev *dev;
+ struct kmem_cache *fgs_cache;
+ struct kmem_cache *ftes_cache;
struct mlx5_flow_root_namespace *root_ns;
struct mlx5_flow_root_namespace *fdb_root_ns;
struct mlx5_flow_root_namespace *esw_egress_root_ns;
@@ -81,9 +84,12 @@ struct fs_node {
struct fs_node *parent;
struct fs_node *root;
/* lock the node for writing and traversing */
- struct mutex lock;
- atomic_t refcount;
- void (*remove_func)(struct fs_node *);
+ struct rw_semaphore lock;
+ refcount_t refcount;
+ bool active;
+ void (*del_hw_func)(struct fs_node *);
+ void (*del_sw_func)(struct fs_node *);
+ atomic_t version;
};
struct mlx5_flow_rule {
@@ -120,7 +126,6 @@ struct mlx5_flow_table {
/* FWD rules that point on this flow table */
struct list_head fwd_rules;
u32 flags;
- struct ida fte_allocator;
struct rhltable fgs_hash;
};
@@ -147,6 +152,11 @@ struct mlx5_fc {
struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
};
+struct mlx5_ft_underlay_qp {
+ struct list_head list;
+ u32 qpn;
+};
+
#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_600
/* Calculate the fte_match_param length and without the reserved length.
* Make sure the reserved field is the last.
@@ -200,6 +210,7 @@ struct mlx5_flow_group {
struct mlx5_flow_group_mask mask;
u32 start_index;
u32 max_ftes;
+ struct ida fte_allocator;
u32 id;
struct rhashtable ftes_hash;
struct rhlist_head hash;
@@ -212,7 +223,7 @@ struct mlx5_flow_root_namespace {
struct mlx5_flow_table *root_ft;
/* Should be held when chaining flow tables */
struct mutex chain_lock;
- u32 underlay_qpn;
+ struct list_head underlay_qpns;
};
int mlx5_init_fc_stats(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 8aea0a065e56..a89a68ce53ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -320,15 +320,13 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev)
{
struct mlx5_core_health *health = &dev->priv.health;
- init_timer(&health->timer);
+ setup_timer(&health->timer, poll_health, (unsigned long)dev);
health->sick = 0;
clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
health->health = &dev->iseg->health;
health->health_counter = &dev->iseg->health_counter;
- health->timer.data = (unsigned long)dev;
- health->timer.function = poll_health;
health->timer.expires = round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL);
add_timer(&health->timer);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
index 43c126c63955..6f338a9219c8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c
@@ -250,3 +250,8 @@ const struct ethtool_ops mlx5i_ethtool_ops = {
.get_link_ksettings = mlx5i_get_link_ksettings,
.get_link = ethtool_op_get_link,
};
+
+const struct ethtool_ops mlx5i_pkey_ethtool_ops = {
+ .get_drvinfo = mlx5i_get_drvinfo,
+ .get_link = ethtool_op_get_link,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 145e392ab849..abf270d7f556 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -40,8 +40,6 @@
static int mlx5i_open(struct net_device *netdev);
static int mlx5i_close(struct net_device *netdev);
-static int mlx5i_dev_init(struct net_device *dev);
-static void mlx5i_dev_cleanup(struct net_device *dev);
static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu);
static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
@@ -70,10 +68,10 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
}
/* Called directly after IPoIB netdevice was created to initialize SW structs */
-static void mlx5i_init(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile,
- void *ppriv)
+void mlx5i_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
@@ -108,11 +106,69 @@ static void mlx5i_cleanup(struct mlx5e_priv *priv)
/* Do nothing .. */
}
+int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ struct mlx5_core_qp *qp = &ipriv->qp;
+ struct mlx5_qp_context *context;
+ int ret;
+
+ /* QP states */
+ context = kzalloc(sizeof(*context), GFP_KERNEL);
+ if (!context)
+ return -ENOMEM;
+
+ context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
+ context->pri_path.port = 1;
+ context->pri_path.pkey_index = cpu_to_be16(ipriv->pkey_index);
+ context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY);
+
+ ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp);
+ if (ret) {
+ mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret);
+ goto err_qp_modify_to_err;
+ }
+ memset(context, 0, sizeof(*context));
+
+ ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp);
+ if (ret) {
+ mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret);
+ goto err_qp_modify_to_err;
+ }
+
+ ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp);
+ if (ret) {
+ mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret);
+ goto err_qp_modify_to_err;
+ }
+
+ kfree(context);
+ return 0;
+
+err_qp_modify_to_err:
+ mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, &context, qp);
+ kfree(context);
+ return ret;
+}
+
+void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+ struct mlx5_qp_context context;
+ int err;
+
+ err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context,
+ &ipriv->qp);
+ if (err)
+ mlx5_core_err(mdev, "Failed to modify qp 2RST, err: %d\n", err);
+}
+
#define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2
-static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
{
- struct mlx5_qp_context *context = NULL;
u32 *in = NULL;
void *addr_path;
int ret = 0;
@@ -140,43 +196,12 @@ static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core
goto out;
}
- /* QP states */
- context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (!context) {
- ret = -ENOMEM;
- goto out;
- }
-
- context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
- context->pri_path.port = 1;
- context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY);
-
- ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp);
- if (ret) {
- mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret);
- goto out;
- }
- memset(context, 0, sizeof(*context));
-
- ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp);
- if (ret) {
- mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret);
- goto out;
- }
-
- ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp);
- if (ret) {
- mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret);
- goto out;
- }
-
out:
- kfree(context);
kvfree(in);
return ret;
}
-static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
{
mlx5_core_destroy_qp(mdev, qp);
}
@@ -195,10 +220,14 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv)
err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]);
if (err) {
mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
- return err;
+ goto err_destroy_underlay_qp;
}
return 0;
+
+err_destroy_underlay_qp:
+ mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+ return err;
}
static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
@@ -249,7 +278,6 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
static int mlx5i_init_rx(struct mlx5e_priv *priv)
{
- struct mlx5i_priv *ipriv = priv->ppriv;
int err;
err = mlx5e_create_indirect_rqt(priv);
@@ -268,18 +296,12 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
if (err)
goto err_destroy_indirect_tirs;
- err = mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
- if (err)
- goto err_destroy_direct_tirs;
-
err = mlx5i_create_flow_steering(priv);
if (err)
- goto err_remove_rx_underlay_qpn;
+ goto err_destroy_direct_tirs;
return 0;
-err_remove_rx_underlay_qpn:
- mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
err_destroy_direct_tirs:
mlx5e_destroy_direct_tirs(priv);
err_destroy_indirect_tirs:
@@ -293,9 +315,6 @@ err_destroy_indirect_rqts:
static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
{
- struct mlx5i_priv *ipriv = priv->ppriv;
-
- mlx5_fs_remove_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn);
mlx5i_destroy_flow_steering(priv);
mlx5e_destroy_direct_tirs(priv);
mlx5e_destroy_indirect_tirs(priv);
@@ -351,7 +370,7 @@ out:
return err;
}
-static int mlx5i_dev_init(struct net_device *dev)
+int mlx5i_dev_init(struct net_device *dev)
{
struct mlx5e_priv *priv = mlx5i_epriv(dev);
struct mlx5i_priv *ipriv = priv->ppriv;
@@ -361,6 +380,9 @@ static int mlx5i_dev_init(struct net_device *dev)
dev->dev_addr[2] = (ipriv->qp.qpn >> 8) & 0xff;
dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff;
+ /* Add QPN to net-device mapping to HT */
+ mlx5i_pkey_add_qpn(dev ,ipriv->qp.qpn);
+
return 0;
}
@@ -378,63 +400,84 @@ static int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
}
}
-static void mlx5i_dev_cleanup(struct net_device *dev)
+void mlx5i_dev_cleanup(struct net_device *dev)
{
struct mlx5e_priv *priv = mlx5i_epriv(dev);
- struct mlx5_core_dev *mdev = priv->mdev;
- struct mlx5i_priv *ipriv = priv->ppriv;
- struct mlx5_qp_context context;
+ struct mlx5i_priv *ipriv = priv->ppriv;
+
+ mlx5i_uninit_underlay_qp(priv);
- /* detach qp from flow-steering by reset it */
- mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context, &ipriv->qp);
+ /* Delete QPN to net-device mapping from HT */
+ mlx5i_pkey_del_qpn(dev, ipriv->qp.qpn);
}
static int mlx5i_open(struct net_device *netdev)
{
- struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5_core_dev *mdev = epriv->mdev;
int err;
- mutex_lock(&priv->state_lock);
+ mutex_lock(&epriv->state_lock);
- set_bit(MLX5E_STATE_OPENED, &priv->state);
+ set_bit(MLX5E_STATE_OPENED, &epriv->state);
- err = mlx5e_open_channels(priv, &priv->channels);
- if (err)
+ err = mlx5i_init_underlay_qp(epriv);
+ if (err) {
+ mlx5_core_warn(mdev, "prepare underlay qp state failed, %d\n", err);
goto err_clear_state_opened_flag;
+ }
- mlx5e_refresh_tirs(priv, false);
- mlx5e_activate_priv_channels(priv);
- mlx5e_timestamp_init(priv);
+ err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ if (err) {
+ mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err);
+ goto err_reset_qp;
+ }
- mutex_unlock(&priv->state_lock);
+ err = mlx5e_open_channels(epriv, &epriv->channels);
+ if (err)
+ goto err_remove_fs_underlay_qp;
+
+ mlx5e_refresh_tirs(epriv, false);
+ mlx5e_activate_priv_channels(epriv);
+ mlx5e_timestamp_set(epriv);
+
+ mutex_unlock(&epriv->state_lock);
return 0;
+err_remove_fs_underlay_qp:
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+err_reset_qp:
+ mlx5i_uninit_underlay_qp(epriv);
err_clear_state_opened_flag:
- clear_bit(MLX5E_STATE_OPENED, &priv->state);
- mutex_unlock(&priv->state_lock);
+ clear_bit(MLX5E_STATE_OPENED, &epriv->state);
+ mutex_unlock(&epriv->state_lock);
return err;
}
static int mlx5i_close(struct net_device *netdev)
{
- struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5_core_dev *mdev = epriv->mdev;
/* May already be CLOSED in case a previous configuration operation
* (e.g RX/TX queue size change) that involves close&open failed.
*/
- mutex_lock(&priv->state_lock);
+ mutex_lock(&epriv->state_lock);
- if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ if (!test_bit(MLX5E_STATE_OPENED, &epriv->state))
goto unlock;
- clear_bit(MLX5E_STATE_OPENED, &priv->state);
+ clear_bit(MLX5E_STATE_OPENED, &epriv->state);
- mlx5e_timestamp_cleanup(priv);
- netif_carrier_off(priv->netdev);
- mlx5e_deactivate_priv_channels(priv);
- mlx5e_close_channels(&priv->channels);
+ netif_carrier_off(epriv->netdev);
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ mlx5i_uninit_underlay_qp(epriv);
+ mlx5e_deactivate_priv_channels(epriv);
+ mlx5e_close_channels(&epriv->channels);;
unlock:
- mutex_unlock(&priv->state_lock);
+ mutex_unlock(&epriv->state_lock);
return 0;
}
@@ -492,6 +535,13 @@ static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb,
return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey);
}
+static void mlx5i_set_pkey_index(struct net_device *netdev, int id)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+
+ ipriv->pkey_index = (u16)id;
+}
+
static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
{
if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
@@ -510,12 +560,13 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
const char *name,
void (*setup)(struct net_device *))
{
- const struct mlx5e_profile *profile = &mlx5i_nic_profile;
- int nch = profile->max_nch(mdev);
+ const struct mlx5e_profile *profile;
struct net_device *netdev;
struct mlx5i_priv *ipriv;
struct mlx5e_priv *epriv;
struct rdma_netdev *rn;
+ bool sub_interface;
+ int nch;
int err;
if (mlx5i_check_required_hca_cap(mdev)) {
@@ -523,10 +574,15 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
return ERR_PTR(-EOPNOTSUPP);
}
- /* This function should only be called once per mdev */
- err = mlx5e_create_mdev_resources(mdev);
- if (err)
- return NULL;
+ /* TODO: Need to find a better way to check if child device*/
+ sub_interface = (mdev->mlx5e_res.pdn != 0);
+
+ if (sub_interface)
+ profile = mlx5i_pkey_get_profile();
+ else
+ profile = &mlx5i_nic_profile;
+
+ nch = profile->max_nch(mdev);
netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv),
name, NET_NAME_UNKNOWN,
@@ -535,7 +591,7 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
nch);
if (!netdev) {
mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n");
- goto free_mdev_resources;
+ return NULL;
}
ipriv = netdev_priv(netdev);
@@ -545,6 +601,20 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
if (!epriv->wq)
goto err_free_netdev;
+ ipriv->sub_interface = sub_interface;
+ if (!ipriv->sub_interface) {
+ err = mlx5i_pkey_qpn_ht_init(netdev);
+ if (err) {
+ mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n");
+ goto destroy_wq;
+ }
+
+ /* This should only be called once per mdev */
+ err = mlx5e_create_mdev_resources(mdev);
+ if (err)
+ goto destroy_ht;
+ }
+
profile->init(mdev, netdev, profile, ipriv);
mlx5e_attach_netdev(epriv);
@@ -556,13 +626,16 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
rn->send = mlx5i_xmit;
rn->attach_mcast = mlx5i_attach_mcast;
rn->detach_mcast = mlx5i_detach_mcast;
+ rn->set_id = mlx5i_set_pkey_index;
return netdev;
+destroy_ht:
+ mlx5i_pkey_qpn_ht_cleanup(netdev);
+destroy_wq:
+ destroy_workqueue(epriv->wq);
err_free_netdev:
free_netdev(netdev);
-free_mdev_resources:
- mlx5e_destroy_mdev_resources(mdev);
return NULL;
}
@@ -570,15 +643,18 @@ EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
void mlx5_rdma_netdev_free(struct net_device *netdev)
{
- struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = priv->ppriv;
const struct mlx5e_profile *profile = priv->profile;
- struct mlx5_core_dev *mdev = priv->mdev;
mlx5e_detach_netdev(priv);
profile->cleanup(priv);
destroy_workqueue(priv->wq);
- free_netdev(netdev);
- mlx5e_destroy_mdev_resources(mdev);
+ if (!ipriv->sub_interface) {
+ mlx5i_pkey_qpn_ht_cleanup(netdev);
+ mlx5e_destroy_mdev_resources(priv->mdev);
+ }
+ free_netdev(netdev);
}
EXPORT_SYMBOL(mlx5_rdma_netdev_free);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index a0f405f520f7..49008022c306 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -39,6 +39,7 @@
#define MLX5I_MAX_NUM_TC 1
extern const struct ethtool_ops mlx5i_ethtool_ops;
+extern const struct ethtool_ops mlx5i_pkey_ethtool_ops;
#define MLX5_IB_GRH_BYTES 40
#define MLX5_IPOIB_ENCAP_LEN 4
@@ -49,10 +50,45 @@ extern const struct ethtool_ops mlx5i_ethtool_ops;
struct mlx5i_priv {
struct rdma_netdev rn; /* keep this first */
struct mlx5_core_qp qp;
+ bool sub_interface;
u32 qkey;
+ u16 pkey_index;
+ struct mlx5i_pkey_qpn_ht *qpn_htbl;
char *mlx5e_priv[0];
};
+/* Underlay QP create/destroy functions */
+int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
+
+/* Underlay QP state modification init/uninit functions */
+int mlx5i_init_underlay_qp(struct mlx5e_priv *priv);
+void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv);
+
+/* Allocate/Free underlay QPN to net-device hash table */
+int mlx5i_pkey_qpn_ht_init(struct net_device *netdev);
+void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev);
+
+/* Add/Remove an underlay QPN to net-device mapping to/from the hash table */
+int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn);
+int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn);
+
+/* Get the net-device corresponding to the given underlay QPN */
+struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn);
+
+/* Shared ndo functionts */
+int mlx5i_dev_init(struct net_device *dev);
+void mlx5i_dev_cleanup(struct net_device *dev);
+
+/* Parent profile functions */
+void mlx5i_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv);
+
+/* Get child interface nic profile */
+const struct mlx5e_profile *mlx5i_pkey_get_profile(void);
+
/* Extract mlx5e_priv from IPoIB netdev */
#define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
new file mode 100644
index 000000000000..531b02cc979b
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/hash.h>
+#include "ipoib.h"
+
+#define MLX5I_MAX_LOG_PKEY_SUP 7
+
+struct qpn_to_netdev {
+ struct net_device *netdev;
+ struct hlist_node hlist;
+ u32 underlay_qpn;
+};
+
+struct mlx5i_pkey_qpn_ht {
+ struct hlist_head buckets[1 << MLX5I_MAX_LOG_PKEY_SUP];
+ spinlock_t ht_lock; /* Synchronise with NAPI */
+};
+
+int mlx5i_pkey_qpn_ht_init(struct net_device *netdev)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+ struct mlx5i_pkey_qpn_ht *qpn_htbl;
+
+ qpn_htbl = kzalloc(sizeof(*qpn_htbl), GFP_KERNEL);
+ if (!qpn_htbl)
+ return -ENOMEM;
+
+ ipriv->qpn_htbl = qpn_htbl;
+ spin_lock_init(&qpn_htbl->ht_lock);
+
+ return 0;
+}
+
+void mlx5i_pkey_qpn_ht_cleanup(struct net_device *netdev)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+
+ kfree(ipriv->qpn_htbl);
+}
+
+static struct qpn_to_netdev *mlx5i_find_qpn_to_netdev_node(struct hlist_head *buckets,
+ u32 qpn)
+{
+ struct hlist_head *h = &buckets[hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP)];
+ struct qpn_to_netdev *node;
+
+ hlist_for_each_entry(node, h, hlist) {
+ if (node->underlay_qpn == qpn)
+ return node;
+ }
+
+ return NULL;
+}
+
+int mlx5i_pkey_add_qpn(struct net_device *netdev, u32 qpn)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+ struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl;
+ u8 key = hash_32(qpn, MLX5I_MAX_LOG_PKEY_SUP);
+ struct qpn_to_netdev *new_node;
+
+ new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
+ if (!new_node)
+ return -ENOMEM;
+
+ new_node->netdev = netdev;
+ new_node->underlay_qpn = qpn;
+ spin_lock_bh(&ht->ht_lock);
+ hlist_add_head(&new_node->hlist, &ht->buckets[key]);
+ spin_unlock_bh(&ht->ht_lock);
+
+ return 0;
+}
+
+int mlx5i_pkey_del_qpn(struct net_device *netdev, u32 qpn)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5i_pkey_qpn_ht *ht = ipriv->qpn_htbl;
+ struct qpn_to_netdev *node;
+
+ node = mlx5i_find_qpn_to_netdev_node(ht->buckets, qpn);
+ if (!node) {
+ mlx5_core_warn(epriv->mdev, "QPN to netdev delete from HT failed\n");
+ return -EINVAL;
+ }
+
+ spin_lock_bh(&ht->ht_lock);
+ hlist_del_init(&node->hlist);
+ spin_unlock_bh(&ht->ht_lock);
+ kfree(node);
+
+ return 0;
+}
+
+struct net_device *mlx5i_pkey_get_netdev(struct net_device *netdev, u32 qpn)
+{
+ struct mlx5i_priv *ipriv = netdev_priv(netdev);
+ struct qpn_to_netdev *node;
+
+ node = mlx5i_find_qpn_to_netdev_node(ipriv->qpn_htbl->buckets, qpn);
+ if (!node)
+ return NULL;
+
+ return node->netdev;
+}
+
+static int mlx5i_pkey_open(struct net_device *netdev);
+static int mlx5i_pkey_close(struct net_device *netdev);
+static int mlx5i_pkey_dev_init(struct net_device *dev);
+static void mlx5i_pkey_dev_cleanup(struct net_device *netdev);
+static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu);
+
+static const struct net_device_ops mlx5i_pkey_netdev_ops = {
+ .ndo_open = mlx5i_pkey_open,
+ .ndo_stop = mlx5i_pkey_close,
+ .ndo_init = mlx5i_pkey_dev_init,
+ .ndo_uninit = mlx5i_pkey_dev_cleanup,
+ .ndo_change_mtu = mlx5i_pkey_change_mtu,
+};
+
+/* Child NDOs */
+static int mlx5i_pkey_dev_init(struct net_device *dev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct mlx5i_priv *ipriv, *parent_ipriv;
+ struct net_device *parent_dev;
+ int parent_ifindex;
+
+ ipriv = priv->ppriv;
+
+ /* Get QPN to netdevice hash table from parent */
+ parent_ifindex = dev->netdev_ops->ndo_get_iflink(dev);
+ parent_dev = dev_get_by_index(dev_net(dev), parent_ifindex);
+ if (!parent_dev) {
+ mlx5_core_warn(priv->mdev, "failed to get parent device\n");
+ return -EINVAL;
+ }
+
+ parent_ipriv = netdev_priv(parent_dev);
+ ipriv->qpn_htbl = parent_ipriv->qpn_htbl;
+ dev_put(parent_dev);
+
+ return mlx5i_dev_init(dev);
+}
+
+static void mlx5i_pkey_dev_cleanup(struct net_device *netdev)
+{
+ return mlx5i_dev_cleanup(netdev);
+}
+
+static int mlx5i_pkey_open(struct net_device *netdev)
+{
+ struct mlx5e_priv *epriv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = epriv->ppriv;
+ struct mlx5_core_dev *mdev = epriv->mdev;
+ int err;
+
+ mutex_lock(&epriv->state_lock);
+
+ set_bit(MLX5E_STATE_OPENED, &epriv->state);
+
+ err = mlx5i_init_underlay_qp(epriv);
+ if (err) {
+ mlx5_core_warn(mdev, "prepare child underlay qp state failed, %d\n", err);
+ goto err_release_lock;
+ }
+
+ err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ if (err) {
+ mlx5_core_warn(mdev, "attach child underlay qp to ft failed, %d\n", err);
+ goto err_unint_underlay_qp;
+ }
+
+ err = mlx5e_create_tis(mdev, 0 /* tc */, ipriv->qp.qpn, &epriv->tisn[0]);
+ if (err) {
+ mlx5_core_warn(mdev, "create child tis failed, %d\n", err);
+ goto err_remove_rx_uderlay_qp;
+ }
+
+ err = mlx5e_open_channels(epriv, &epriv->channels);
+ if (err) {
+ mlx5_core_warn(mdev, "opening child channels failed, %d\n", err);
+ goto err_clear_state_opened_flag;
+ }
+ mlx5e_refresh_tirs(epriv, false);
+ mlx5e_activate_priv_channels(epriv);
+ mutex_unlock(&epriv->state_lock);
+
+ return 0;
+
+err_clear_state_opened_flag:
+ mlx5e_destroy_tis(mdev, epriv->tisn[0]);
+err_remove_rx_uderlay_qp:
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+err_unint_underlay_qp:
+ mlx5i_uninit_underlay_qp(epriv);
+err_release_lock:
+ clear_bit(MLX5E_STATE_OPENED, &epriv->state);
+ mutex_unlock(&epriv->state_lock);
+ return err;
+}
+
+static int mlx5i_pkey_close(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ struct mlx5_core_dev *mdev = priv->mdev;
+
+ mutex_lock(&priv->state_lock);
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ goto unlock;
+
+ clear_bit(MLX5E_STATE_OPENED, &priv->state);
+
+ netif_carrier_off(priv->netdev);
+ mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+ mlx5i_uninit_underlay_qp(priv);
+ mlx5e_deactivate_priv_channels(priv);
+ mlx5e_close_channels(&priv->channels);
+ mlx5e_destroy_tis(mdev, priv->tisn[0]);
+unlock:
+ mutex_unlock(&priv->state_lock);
+ return 0;
+}
+
+static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ mutex_lock(&priv->state_lock);
+ netdev->mtu = new_mtu;
+ mutex_unlock(&priv->state_lock);
+
+ return 0;
+}
+
+/* Called directly after IPoIB netdevice was created to initialize SW structs */
+static void mlx5i_pkey_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+
+ mlx5i_init(mdev, netdev, profile, ppriv);
+
+ /* Override parent ndo */
+ netdev->netdev_ops = &mlx5i_pkey_netdev_ops;
+
+ /* Set child limited ethtool support */
+ netdev->ethtool_ops = &mlx5i_pkey_ethtool_ops;
+
+ /* Use dummy rqs */
+ priv->channels.params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
+}
+
+/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
+static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv)
+{
+ /* Do nothing .. */
+}
+
+static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+ int err;
+
+ err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp);
+ if (err) {
+ mlx5_core_warn(priv->mdev, "create child underlay QP failed, %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void mlx5i_pkey_cleanup_tx(struct mlx5e_priv *priv)
+{
+ struct mlx5i_priv *ipriv = priv->ppriv;
+
+ mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+}
+
+static int mlx5i_pkey_init_rx(struct mlx5e_priv *priv)
+{
+ /* Since the rx resources are shared between child and parent, the
+ * parent interface is taking care of rx resource allocation and init
+ */
+ return 0;
+}
+
+static void mlx5i_pkey_cleanup_rx(struct mlx5e_priv *priv)
+{
+ /* Since the rx resources are shared between child and parent, the
+ * parent interface is taking care of rx resource free and de-init
+ */
+}
+
+static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
+ .init = mlx5i_pkey_init,
+ .cleanup = mlx5i_pkey_cleanup,
+ .init_tx = mlx5i_pkey_init_tx,
+ .cleanup_tx = mlx5i_pkey_cleanup_tx,
+ .init_rx = mlx5i_pkey_init_rx,
+ .cleanup_rx = mlx5i_pkey_cleanup_rx,
+ .enable = NULL,
+ .disable = NULL,
+ .update_stats = NULL,
+ .max_nch = mlx5e_get_max_num_channels,
+ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
+ .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
+ .max_tc = MLX5I_MAX_NUM_TC,
+};
+
+const struct mlx5e_profile *mlx5i_pkey_get_profile(void)
+{
+ return &mlx5i_pkey_nic_profile;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
new file mode 100644
index 000000000000..fa8aed62b231
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -0,0 +1,525 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/clocksource.h>
+#include "en.h"
+
+enum {
+ MLX5_CYCLES_SHIFT = 23
+};
+
+enum {
+ MLX5_PIN_MODE_IN = 0x0,
+ MLX5_PIN_MODE_OUT = 0x1,
+};
+
+enum {
+ MLX5_OUT_PATTERN_PULSE = 0x0,
+ MLX5_OUT_PATTERN_PERIODIC = 0x1,
+};
+
+enum {
+ MLX5_EVENT_MODE_DISABLE = 0x0,
+ MLX5_EVENT_MODE_REPETETIVE = 0x1,
+ MLX5_EVENT_MODE_ONCE_TILL_ARM = 0x2,
+};
+
+enum {
+ MLX5_MTPPS_FS_ENABLE = BIT(0x0),
+ MLX5_MTPPS_FS_PATTERN = BIT(0x2),
+ MLX5_MTPPS_FS_PIN_MODE = BIT(0x3),
+ MLX5_MTPPS_FS_TIME_STAMP = BIT(0x4),
+ MLX5_MTPPS_FS_OUT_PULSE_DURATION = BIT(0x5),
+ MLX5_MTPPS_FS_ENH_OUT_PER_ADJ = BIT(0x7),
+};
+
+static u64 read_internal_timer(const struct cyclecounter *cc)
+{
+ struct mlx5_clock *clock = container_of(cc, struct mlx5_clock, cycles);
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+ clock);
+
+ return mlx5_read_internal_timer(mdev) & cc->mask;
+}
+
+static void mlx5_pps_out(struct work_struct *work)
+{
+ struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps,
+ out_work);
+ struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock,
+ pps_info);
+ struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev,
+ clock);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ unsigned long flags;
+ int i;
+
+ for (i = 0; i < clock->ptp_info.n_pins; i++) {
+ u64 tstart;
+
+ write_lock_irqsave(&clock->lock, flags);
+ tstart = clock->pps_info.start[i];
+ clock->pps_info.start[i] = 0;
+ write_unlock_irqrestore(&clock->lock, flags);
+ if (!tstart)
+ continue;
+
+ MLX5_SET(mtpps_reg, in, pin, i);
+ MLX5_SET64(mtpps_reg, in, time_stamp, tstart);
+ MLX5_SET(mtpps_reg, in, field_select, MLX5_MTPPS_FS_TIME_STAMP);
+ mlx5_set_mtpps(mdev, in, sizeof(in));
+ }
+}
+
+static void mlx5_timestamp_overflow(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct mlx5_clock *clock = container_of(dwork, struct mlx5_clock,
+ overflow_work);
+ unsigned long flags;
+
+ write_lock_irqsave(&clock->lock, flags);
+ timecounter_read(&clock->tc);
+ write_unlock_irqrestore(&clock->lock, flags);
+ schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
+}
+
+static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
+ const struct timespec64 *ts)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+ u64 ns = timespec64_to_ns(ts);
+ unsigned long flags;
+
+ write_lock_irqsave(&clock->lock, flags);
+ timecounter_init(&clock->tc, &clock->cycles, ns);
+ write_unlock_irqrestore(&clock->lock, flags);
+
+ return 0;
+}
+
+static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+ u64 ns;
+ unsigned long flags;
+
+ write_lock_irqsave(&clock->lock, flags);
+ ns = timecounter_read(&clock->tc);
+ write_unlock_irqrestore(&clock->lock, flags);
+
+ *ts = ns_to_timespec64(ns);
+
+ return 0;
+}
+
+static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+ unsigned long flags;
+
+ write_lock_irqsave(&clock->lock, flags);
+ timecounter_adjtime(&clock->tc, delta);
+ write_unlock_irqrestore(&clock->lock, flags);
+
+ return 0;
+}
+
+static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
+{
+ u64 adj;
+ u32 diff;
+ unsigned long flags;
+ int neg_adj = 0;
+ struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock,
+ ptp_info);
+
+ if (delta < 0) {
+ neg_adj = 1;
+ delta = -delta;
+ }
+
+ adj = clock->nominal_c_mult;
+ adj *= delta;
+ diff = div_u64(adj, 1000000000ULL);
+
+ write_lock_irqsave(&clock->lock, flags);
+ timecounter_read(&clock->tc);
+ clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
+ clock->nominal_c_mult + diff;
+ write_unlock_irqrestore(&clock->lock, flags);
+
+ return 0;
+}
+
+static int mlx5_extts_configure(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ struct mlx5_clock *clock =
+ container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev =
+ container_of(clock, struct mlx5_core_dev, clock);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ u32 field_select = 0;
+ u8 pin_mode = 0;
+ u8 pattern = 0;
+ int pin = -1;
+ int err = 0;
+
+ if (!MLX5_PPS_CAP(mdev))
+ return -EOPNOTSUPP;
+
+ if (rq->extts.index >= clock->ptp_info.n_pins)
+ return -EINVAL;
+
+ if (on) {
+ pin = ptp_find_pin(clock->ptp, PTP_PF_EXTTS, rq->extts.index);
+ if (pin < 0)
+ return -EBUSY;
+ pin_mode = MLX5_PIN_MODE_IN;
+ pattern = !!(rq->extts.flags & PTP_FALLING_EDGE);
+ field_select = MLX5_MTPPS_FS_PIN_MODE |
+ MLX5_MTPPS_FS_PATTERN |
+ MLX5_MTPPS_FS_ENABLE;
+ } else {
+ pin = rq->extts.index;
+ field_select = MLX5_MTPPS_FS_ENABLE;
+ }
+
+ MLX5_SET(mtpps_reg, in, pin, pin);
+ MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+ MLX5_SET(mtpps_reg, in, pattern, pattern);
+ MLX5_SET(mtpps_reg, in, enable, on);
+ MLX5_SET(mtpps_reg, in, field_select, field_select);
+
+ err = mlx5_set_mtpps(mdev, in, sizeof(in));
+ if (err)
+ return err;
+
+ return mlx5_set_mtppse(mdev, pin, 0,
+ MLX5_EVENT_MODE_REPETETIVE & on);
+}
+
+static int mlx5_perout_configure(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ struct mlx5_clock *clock =
+ container_of(ptp, struct mlx5_clock, ptp_info);
+ struct mlx5_core_dev *mdev =
+ container_of(clock, struct mlx5_core_dev, clock);
+ u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+ u64 nsec_now, nsec_delta, time_stamp = 0;
+ u64 cycles_now, cycles_delta;
+ struct timespec64 ts;
+ unsigned long flags;
+ u32 field_select = 0;
+ u8 pin_mode = 0;
+ u8 pattern = 0;
+ int pin = -1;
+ int err = 0;
+ s64 ns;
+
+ if (!MLX5_PPS_CAP(mdev))
+ return -EOPNOTSUPP;
+
+ if (rq->perout.index >= clock->ptp_info.n_pins)
+ return -EINVAL;
+
+ if (on) {
+ pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT,
+ rq->perout.index);
+ if (pin < 0)
+ return -EBUSY;
+
+ pin_mode = MLX5_PIN_MODE_OUT;
+ pattern = MLX5_OUT_PATTERN_PERIODIC;
+ ts.tv_sec = rq->perout.period.sec;
+ ts.tv_nsec = rq->perout.period.nsec;
+ ns = timespec64_to_ns(&ts);
+
+ if ((ns >> 1) != 500000000LL)
+ return -EINVAL;
+
+ ts.tv_sec = rq->perout.start.sec;
+ ts.tv_nsec = rq->perout.start.nsec;
+ ns = timespec64_to_ns(&ts);
+ cycles_now = mlx5_read_internal_timer(mdev);
+ write_lock_irqsave(&clock->lock, flags);
+ nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
+ nsec_delta = ns - nsec_now;
+ cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
+ clock->cycles.mult);
+ write_unlock_irqrestore(&clock->lock, flags);
+ time_stamp = cycles_now + cycles_delta;
+ field_select = MLX5_MTPPS_FS_PIN_MODE |
+ MLX5_MTPPS_FS_PATTERN |
+ MLX5_MTPPS_FS_ENABLE |
+ MLX5_MTPPS_FS_TIME_STAMP;
+ } else {
+ pin = rq->perout.index;
+ field_select = MLX5_MTPPS_FS_ENABLE;
+ }
+
+ MLX5_SET(mtpps_reg, in, pin, pin);
+ MLX5_SET(mtpps_reg, in, pin_mode, pin_mode);
+ MLX5_SET(mtpps_reg, in, pattern, pattern);
+ MLX5_SET(mtpps_reg, in, enable, on);
+ MLX5_SET64(mtpps_reg, in, time_stamp, time_stamp);
+ MLX5_SET(mtpps_reg, in, field_select, field_select);
+
+ err = mlx5_set_mtpps(mdev, in, sizeof(in));
+ if (err)
+ return err;
+
+ return mlx5_set_mtppse(mdev, pin, 0,
+ MLX5_EVENT_MODE_REPETETIVE & on);
+}
+
+static int mlx5_pps_configure(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ struct mlx5_clock *clock =
+ container_of(ptp, struct mlx5_clock, ptp_info);
+
+ clock->pps_info.enabled = !!on;
+ return 0;
+}
+
+static int mlx5_ptp_enable(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq,
+ int on)
+{
+ switch (rq->type) {
+ case PTP_CLK_REQ_EXTTS:
+ return mlx5_extts_configure(ptp, rq, on);
+ case PTP_CLK_REQ_PEROUT:
+ return mlx5_perout_configure(ptp, rq, on);
+ case PTP_CLK_REQ_PPS:
+ return mlx5_pps_configure(ptp, rq, on);
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
+
+static int mlx5_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+ enum ptp_pin_function func, unsigned int chan)
+{
+ return (func == PTP_PF_PHYSYNC) ? -EOPNOTSUPP : 0;
+}
+
+static const struct ptp_clock_info mlx5_ptp_clock_info = {
+ .owner = THIS_MODULE,
+ .name = "mlx5_p2p",
+ .max_adj = 100000000,
+ .n_alarm = 0,
+ .n_ext_ts = 0,
+ .n_per_out = 0,
+ .n_pins = 0,
+ .pps = 0,
+ .adjfreq = mlx5_ptp_adjfreq,
+ .adjtime = mlx5_ptp_adjtime,
+ .gettime64 = mlx5_ptp_gettime,
+ .settime64 = mlx5_ptp_settime,
+ .enable = NULL,
+ .verify = NULL,
+};
+
+static int mlx5_init_pin_config(struct mlx5_clock *clock)
+{
+ int i;
+
+ clock->ptp_info.pin_config =
+ kzalloc(sizeof(*clock->ptp_info.pin_config) *
+ clock->ptp_info.n_pins, GFP_KERNEL);
+ if (!clock->ptp_info.pin_config)
+ return -ENOMEM;
+ clock->ptp_info.enable = mlx5_ptp_enable;
+ clock->ptp_info.verify = mlx5_ptp_verify;
+ clock->ptp_info.pps = 1;
+
+ for (i = 0; i < clock->ptp_info.n_pins; i++) {
+ snprintf(clock->ptp_info.pin_config[i].name,
+ sizeof(clock->ptp_info.pin_config[i].name),
+ "mlx5_pps%d", i);
+ clock->ptp_info.pin_config[i].index = i;
+ clock->ptp_info.pin_config[i].func = PTP_PF_NONE;
+ clock->ptp_info.pin_config[i].chan = i;
+ }
+
+ return 0;
+}
+
+static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0};
+
+ mlx5_query_mtpps(mdev, out, sizeof(out));
+
+ clock->ptp_info.n_pins = MLX5_GET(mtpps_reg, out,
+ cap_number_of_pps_pins);
+ clock->ptp_info.n_ext_ts = MLX5_GET(mtpps_reg, out,
+ cap_max_num_of_pps_in_pins);
+ clock->ptp_info.n_per_out = MLX5_GET(mtpps_reg, out,
+ cap_max_num_of_pps_out_pins);
+
+ clock->pps_info.pin_caps[0] = MLX5_GET(mtpps_reg, out, cap_pin_0_mode);
+ clock->pps_info.pin_caps[1] = MLX5_GET(mtpps_reg, out, cap_pin_1_mode);
+ clock->pps_info.pin_caps[2] = MLX5_GET(mtpps_reg, out, cap_pin_2_mode);
+ clock->pps_info.pin_caps[3] = MLX5_GET(mtpps_reg, out, cap_pin_3_mode);
+ clock->pps_info.pin_caps[4] = MLX5_GET(mtpps_reg, out, cap_pin_4_mode);
+ clock->pps_info.pin_caps[5] = MLX5_GET(mtpps_reg, out, cap_pin_5_mode);
+ clock->pps_info.pin_caps[6] = MLX5_GET(mtpps_reg, out, cap_pin_6_mode);
+ clock->pps_info.pin_caps[7] = MLX5_GET(mtpps_reg, out, cap_pin_7_mode);
+}
+
+void mlx5_pps_event(struct mlx5_core_dev *mdev,
+ struct mlx5_eqe *eqe)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ struct ptp_clock_event ptp_event;
+ struct timespec64 ts;
+ u64 nsec_now, nsec_delta;
+ u64 cycles_now, cycles_delta;
+ int pin = eqe->data.pps.pin;
+ s64 ns;
+ unsigned long flags;
+
+ switch (clock->ptp_info.pin_config[pin].func) {
+ case PTP_PF_EXTTS:
+ if (clock->pps_info.enabled) {
+ ptp_event.type = PTP_CLOCK_PPSUSR;
+ ptp_event.pps_times.ts_real = ns_to_timespec64(eqe->data.pps.time_stamp);
+ } else {
+ ptp_event.type = PTP_CLOCK_EXTTS;
+ }
+ ptp_clock_event(clock->ptp, &ptp_event);
+ break;
+ case PTP_PF_PEROUT:
+ mlx5_ptp_gettime(&clock->ptp_info, &ts);
+ cycles_now = mlx5_read_internal_timer(mdev);
+ ts.tv_sec += 1;
+ ts.tv_nsec = 0;
+ ns = timespec64_to_ns(&ts);
+ write_lock_irqsave(&clock->lock, flags);
+ nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
+ nsec_delta = ns - nsec_now;
+ cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
+ clock->cycles.mult);
+ clock->pps_info.start[pin] = cycles_now + cycles_delta;
+ schedule_work(&clock->pps_info.out_work);
+ write_unlock_irqrestore(&clock->lock, flags);
+ break;
+ default:
+ mlx5_core_err(mdev, " Unhandled event\n");
+ }
+}
+
+void mlx5_init_clock(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+ u64 ns;
+ u64 frac = 0;
+ u32 dev_freq;
+
+ dev_freq = MLX5_CAP_GEN(mdev, device_frequency_khz);
+ if (!dev_freq) {
+ mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
+ return;
+ }
+ rwlock_init(&clock->lock);
+ clock->cycles.read = read_internal_timer;
+ clock->cycles.shift = MLX5_CYCLES_SHIFT;
+ clock->cycles.mult = clocksource_khz2mult(dev_freq,
+ clock->cycles.shift);
+ clock->nominal_c_mult = clock->cycles.mult;
+ clock->cycles.mask = CLOCKSOURCE_MASK(41);
+
+ timecounter_init(&clock->tc, &clock->cycles,
+ ktime_to_ns(ktime_get_real()));
+
+ /* Calculate period in seconds to call the overflow watchdog - to make
+ * sure counter is checked at least once every wrap around.
+ */
+ ns = cyclecounter_cyc2ns(&clock->cycles, clock->cycles.mask,
+ frac, &frac);
+ do_div(ns, NSEC_PER_SEC / 2 / HZ);
+ clock->overflow_period = ns;
+
+ INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out);
+ INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow);
+ if (clock->overflow_period)
+ schedule_delayed_work(&clock->overflow_work, 0);
+ else
+ mlx5_core_warn(mdev, "invalid overflow period, overflow_work is not scheduled\n");
+
+ /* Configure the PHC */
+ clock->ptp_info = mlx5_ptp_clock_info;
+
+ /* Initialize 1PPS data structures */
+ if (MLX5_PPS_CAP(mdev))
+ mlx5_get_pps_caps(mdev);
+ if (clock->ptp_info.n_pins)
+ mlx5_init_pin_config(clock);
+
+ clock->ptp = ptp_clock_register(&clock->ptp_info,
+ &mdev->pdev->dev);
+ if (IS_ERR(clock->ptp)) {
+ mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n",
+ PTR_ERR(clock->ptp));
+ clock->ptp = NULL;
+ }
+}
+
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_clock *clock = &mdev->clock;
+
+ if (!MLX5_CAP_GEN(mdev, device_frequency_khz))
+ return;
+
+ if (clock->ptp) {
+ ptp_clock_unregister(clock->ptp);
+ clock->ptp = NULL;
+ }
+
+ cancel_work_sync(&clock->pps_info.out_work);
+ cancel_delayed_work_sync(&clock->overflow_work);
+ kfree(clock->ptp_info.pin_config);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
new file mode 100644
index 000000000000..a8eecedd46c2
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __LIB_CLOCK_H__
+#define __LIB_CLOCK_H__
+
+void mlx5_init_clock(struct mlx5_core_dev *mdev);
+void mlx5_cleanup_clock(struct mlx5_core_dev *mdev);
+
+static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
+ u64 timestamp)
+{
+ u64 nsec;
+
+ read_lock(&clock->lock);
+ nsec = timecounter_cyc2time(&clock->tc, timestamp);
+ read_unlock(&clock->lock);
+
+ return ns_to_ktime(nsec);
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0d2c8dcd6eae..ecbe9fad22d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -59,6 +59,7 @@
#include "lib/mlx5.h"
#include "fpga/core.h"
#include "accel/ipsec.h"
+#include "lib/clock.h"
MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
@@ -889,6 +890,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
mlx5_init_reserved_gids(dev);
+ mlx5_init_clock(dev);
+
err = mlx5_init_rl_table(dev);
if (err) {
dev_err(&pdev->dev, "Failed to init rate limiting\n");
@@ -949,6 +952,7 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
mlx5_eswitch_cleanup(dev->priv.eswitch);
mlx5_mpfs_cleanup(dev);
mlx5_cleanup_rl_table(dev);
+ mlx5_cleanup_clock(dev);
mlx5_cleanup_reserved_gids(dev);
mlx5_cleanup_mkey_table(dev);
mlx5_cleanup_srq_table(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index b7c2900b75f9..8f00de2fe283 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -93,6 +93,7 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
unsigned long param);
void mlx5_core_page_fault(struct mlx5_core_dev *dev,
struct mlx5_pagefault *pfault);
+void mlx5_pps_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force);
void mlx5_disable_device(struct mlx5_core_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 891ff418bb5e..4816504419fc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -17,7 +17,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
spectrum_kvdl.o spectrum_acl_tcam.o \
spectrum_acl.o spectrum_flower.o \
spectrum_cnt.o spectrum_fid.o \
- spectrum_ipip.o
+ spectrum_ipip.o spectrum_acl_flex_actions.o \
+ spectrum_mr.o spectrum_mr_tcam.o
mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
index 5ae110172c22..6a979a09ab72 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c
@@ -399,23 +399,25 @@ u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block)
}
EXPORT_SYMBOL(mlxsw_afa_block_first_set_kvdl_index);
-void mlxsw_afa_block_continue(struct mlxsw_afa_block *block)
+int mlxsw_afa_block_continue(struct mlxsw_afa_block *block)
{
- if (WARN_ON(block->finished))
- return;
+ if (block->finished)
+ return -EINVAL;
mlxsw_afa_set_goto_set(block->cur_set,
MLXSW_AFA_SET_GOTO_BINDING_CMD_NONE, 0);
block->finished = true;
+ return 0;
}
EXPORT_SYMBOL(mlxsw_afa_block_continue);
-void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id)
+int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id)
{
- if (WARN_ON(block->finished))
- return;
+ if (block->finished)
+ return -EINVAL;
mlxsw_afa_set_goto_set(block->cur_set,
MLXSW_AFA_SET_GOTO_BINDING_CMD_JUMP, group_id);
block->finished = true;
+ return 0;
}
EXPORT_SYMBOL(mlxsw_afa_block_jump);
@@ -674,6 +676,7 @@ enum mlxsw_afa_trapdisc_trap_action {
MLXSW_ITEM32(afa, trapdisc, trap_action, 0x00, 24, 4);
enum mlxsw_afa_trapdisc_forward_action {
+ MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD = 1,
MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD = 3,
};
@@ -712,7 +715,7 @@ int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block)
}
EXPORT_SYMBOL(mlxsw_afa_block_append_drop);
-int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block)
+int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id)
{
char *act = mlxsw_afa_block_append_action(block,
MLXSW_AFA_TRAPDISC_CODE,
@@ -722,11 +725,27 @@ int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block)
return -ENOBUFS;
mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP,
MLXSW_AFA_TRAPDISC_FORWARD_ACTION_DISCARD,
- MLXSW_TRAP_ID_ACL0);
+ trap_id);
return 0;
}
EXPORT_SYMBOL(mlxsw_afa_block_append_trap);
+int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
+ u16 trap_id)
+{
+ char *act = mlxsw_afa_block_append_action(block,
+ MLXSW_AFA_TRAPDISC_CODE,
+ MLXSW_AFA_TRAPDISC_SIZE);
+
+ if (!act)
+ return -ENOBUFS;
+ mlxsw_afa_trapdisc_pack(act, MLXSW_AFA_TRAPDISC_TRAP_ACTION_TRAP,
+ MLXSW_AFA_TRAPDISC_FORWARD_ACTION_FORWARD,
+ trap_id);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_trap_and_forward);
+
/* Forwarding Action
* -----------------
* Forwarding Action can be used to implement Policy Based Switching (PBS)
@@ -891,3 +910,74 @@ int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid)
return 0;
}
EXPORT_SYMBOL(mlxsw_afa_block_append_fid_set);
+
+/* MC Routing Action
+ * -----------------
+ * The Multicast router action. Can be used by RMFT_V2 - Router Multicast
+ * Forwarding Table Version 2 Register.
+ */
+
+#define MLXSW_AFA_MCROUTER_CODE 0x10
+#define MLXSW_AFA_MCROUTER_SIZE 2
+
+enum mlxsw_afa_mcrouter_rpf_action {
+ MLXSW_AFA_MCROUTER_RPF_ACTION_NOP,
+ MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP,
+ MLXSW_AFA_MCROUTER_RPF_ACTION_DISCARD_ERROR,
+};
+
+/* afa_mcrouter_rpf_action */
+MLXSW_ITEM32(afa, mcrouter, rpf_action, 0x00, 28, 3);
+
+/* afa_mcrouter_expected_irif */
+MLXSW_ITEM32(afa, mcrouter, expected_irif, 0x00, 0, 16);
+
+/* afa_mcrouter_min_mtu */
+MLXSW_ITEM32(afa, mcrouter, min_mtu, 0x08, 0, 16);
+
+enum mlxsw_afa_mrouter_vrmid {
+ MLXSW_AFA_MCROUTER_VRMID_INVALID,
+ MLXSW_AFA_MCROUTER_VRMID_VALID
+};
+
+/* afa_mcrouter_vrmid
+ * Valid RMID: rigr_rmid_index is used as RMID
+ */
+MLXSW_ITEM32(afa, mcrouter, vrmid, 0x0C, 31, 1);
+
+/* afa_mcrouter_rigr_rmid_index
+ * When the vrmid field is set to invalid, the field is used as pointer to
+ * Router Interface Group (RIGR) Table in the KVD linear.
+ * When the vrmid is set to valid, the field is used as RMID index, ranged
+ * from 0 to max_mid - 1. The index is to the Port Group Table.
+ */
+MLXSW_ITEM32(afa, mcrouter, rigr_rmid_index, 0x0C, 0, 24);
+
+static inline void
+mlxsw_afa_mcrouter_pack(char *payload,
+ enum mlxsw_afa_mcrouter_rpf_action rpf_action,
+ u16 expected_irif, u16 min_mtu,
+ enum mlxsw_afa_mrouter_vrmid vrmid, u32 rigr_rmid_index)
+
+{
+ mlxsw_afa_mcrouter_rpf_action_set(payload, rpf_action);
+ mlxsw_afa_mcrouter_expected_irif_set(payload, expected_irif);
+ mlxsw_afa_mcrouter_min_mtu_set(payload, min_mtu);
+ mlxsw_afa_mcrouter_vrmid_set(payload, vrmid);
+ mlxsw_afa_mcrouter_rigr_rmid_index_set(payload, rigr_rmid_index);
+}
+
+int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block,
+ u16 expected_irif, u16 min_mtu,
+ bool rmid_valid, u32 kvdl_index)
+{
+ char *act = mlxsw_afa_block_append_action(block,
+ MLXSW_AFA_MCROUTER_CODE,
+ MLXSW_AFA_MCROUTER_SIZE);
+ if (!act)
+ return -ENOBUFS;
+ mlxsw_afa_mcrouter_pack(act, MLXSW_AFA_MCROUTER_RPF_ACTION_TRAP,
+ expected_irif, min_mtu, rmid_valid, kvdl_index);
+ return 0;
+}
+EXPORT_SYMBOL(mlxsw_afa_block_append_mcrouter);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
index f99c341b2497..a8d3314c3a24 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h
@@ -57,10 +57,12 @@ void mlxsw_afa_block_destroy(struct mlxsw_afa_block *block);
int mlxsw_afa_block_commit(struct mlxsw_afa_block *block);
char *mlxsw_afa_block_first_set(struct mlxsw_afa_block *block);
u32 mlxsw_afa_block_first_set_kvdl_index(struct mlxsw_afa_block *block);
-void mlxsw_afa_block_continue(struct mlxsw_afa_block *block);
-void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id);
+int mlxsw_afa_block_continue(struct mlxsw_afa_block *block);
+int mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id);
int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block);
-int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block);
+int mlxsw_afa_block_append_trap(struct mlxsw_afa_block *block, u16 trap_id);
+int mlxsw_afa_block_append_trap_and_forward(struct mlxsw_afa_block *block,
+ u16 trap_id);
int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block,
u8 local_port, bool in_port);
int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block,
@@ -68,5 +70,8 @@ int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block,
int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block,
u32 counter_index);
int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid);
+int mlxsw_afa_block_append_mcrouter(struct mlxsw_afa_block *block,
+ u16 expected_irif, u16 min_mtu,
+ bool rmid_valid, u32 kvdl_index);
#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 4afc8486eb9a..a3f31f425550 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -2142,15 +2142,14 @@ MLXSW_REG_DEFINE(pefa, MLXSW_REG_PEFA_ID, MLXSW_REG_PEFA_LEN);
*/
MLXSW_ITEM32(reg, pefa, index, 0x00, 0, 24);
-#define MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN 0xA8
+#define MLXSW_REG_FLEX_ACTION_SET_LEN 0xA8
/* reg_pefa_flex_action_set
* Action-set to perform when rule is matched.
* Must be zero padded if action set is shorter.
* Access: RW
*/
-MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08,
- MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN);
+MLXSW_ITEM_BUF(reg, pefa, flex_action_set, 0x08, MLXSW_REG_FLEX_ACTION_SET_LEN);
static inline void mlxsw_reg_pefa_pack(char *payload, u32 index,
const char *flex_action_set)
@@ -2243,7 +2242,7 @@ MLXSW_ITEM_BUF(reg, ptce2, mask, 0x80,
* Access: RW
*/
MLXSW_ITEM_BUF(reg, ptce2, flex_action_set, 0xE0,
- MLXSW_REG_PXXX_FLEX_ACTION_SET_LEN);
+ MLXSW_REG_FLEX_ACTION_SET_LEN);
static inline void mlxsw_reg_ptce2_pack(char *payload, bool valid,
enum mlxsw_reg_ptce2_op op,
@@ -3682,12 +3681,15 @@ enum mlxsw_reg_htgt_trap_group {
MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP,
MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP,
MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST,
MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP,
MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS,
MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP,
MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE,
MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF,
MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
@@ -3992,6 +3994,12 @@ MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1);
*/
MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1);
+/* reg_ritr_ipv4_mc
+ * IPv4 multicast routing enable.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv4_mc, 0x00, 27, 1);
+
enum mlxsw_reg_ritr_if_type {
/* VLAN interface. */
MLXSW_REG_RITR_VLAN_IF,
@@ -4049,6 +4057,14 @@ MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1);
*/
MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1);
+/* reg_ritr_ipv4_mc_fe
+ * IPv4 Multicast Forwarding Enable.
+ * When disabled, forwarding is blocked but local traffic (traps and IP to me)
+ * will be enabled.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ritr, ipv4_mc_fe, 0x04, 27, 1);
+
/* reg_ritr_lb_en
* Loop-back filter enable for unicast packets.
* If the flag is set then loop-back filter for unicast packets is
@@ -4271,11 +4287,13 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
mlxsw_reg_ritr_enable_set(payload, enable);
mlxsw_reg_ritr_ipv4_set(payload, 1);
mlxsw_reg_ritr_ipv6_set(payload, 1);
+ mlxsw_reg_ritr_ipv4_mc_set(payload, 1);
mlxsw_reg_ritr_type_set(payload, type);
mlxsw_reg_ritr_op_set(payload, op);
mlxsw_reg_ritr_rif_set(payload, rif);
mlxsw_reg_ritr_ipv4_fe_set(payload, 1);
mlxsw_reg_ritr_ipv6_fe_set(payload, 1);
+ mlxsw_reg_ritr_ipv4_mc_fe_set(payload, 1);
mlxsw_reg_ritr_lb_en_set(payload, 1);
mlxsw_reg_ritr_virtual_router_set(payload, vr_id);
mlxsw_reg_ritr_mtu_set(payload, mtu);
@@ -4311,6 +4329,57 @@ mlxsw_reg_ritr_loopback_ipip4_pack(char *payload,
mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip);
}
+/* RTAR - Router TCAM Allocation Register
+ * --------------------------------------
+ * This register is used for allocation of regions in the TCAM table.
+ */
+#define MLXSW_REG_RTAR_ID 0x8004
+#define MLXSW_REG_RTAR_LEN 0x20
+
+MLXSW_REG_DEFINE(rtar, MLXSW_REG_RTAR_ID, MLXSW_REG_RTAR_LEN);
+
+enum mlxsw_reg_rtar_op {
+ MLXSW_REG_RTAR_OP_ALLOCATE,
+ MLXSW_REG_RTAR_OP_RESIZE,
+ MLXSW_REG_RTAR_OP_DEALLOCATE,
+};
+
+/* reg_rtar_op
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rtar, op, 0x00, 28, 4);
+
+enum mlxsw_reg_rtar_key_type {
+ MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST = 1,
+ MLXSW_REG_RTAR_KEY_TYPE_IPV6_MULTICAST = 3
+};
+
+/* reg_rtar_key_type
+ * TCAM key type for the region.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rtar, key_type, 0x00, 0, 8);
+
+/* reg_rtar_region_size
+ * TCAM region size. When allocating/resizing this is the requested
+ * size, the response is the actual size.
+ * Note: Actual size may be larger than requested.
+ * Reserved for op = Deallocate
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rtar, region_size, 0x04, 0, 16);
+
+static inline void mlxsw_reg_rtar_pack(char *payload,
+ enum mlxsw_reg_rtar_op op,
+ enum mlxsw_reg_rtar_key_type key_type,
+ u16 region_size)
+{
+ MLXSW_REG_ZERO(rtar, payload);
+ mlxsw_reg_rtar_op_set(payload, op);
+ mlxsw_reg_rtar_key_type_set(payload, key_type);
+ mlxsw_reg_rtar_region_size_set(payload, region_size);
+}
+
/* RATR - Router Adjacency Table Register
* --------------------------------------
* The RATR register is used to configure the Router Adjacency (next-hop)
@@ -4480,6 +4549,27 @@ MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32);
*/
MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24);
+enum mlxsw_reg_flow_counter_set_type {
+ /* No count */
+ MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00,
+ /* Count packets and bytes */
+ MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03,
+ /* Count only packets */
+ MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05,
+};
+
+/* reg_ratr_counter_set_type
+ * Counter set type for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, counter_set_type, 0x28, 24, 8);
+
+/* reg_ratr_counter_index
+ * Counter index for flow counters
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ratr, counter_index, 0x28, 0, 24);
+
static inline void
mlxsw_reg_ratr_pack(char *payload,
enum mlxsw_reg_ratr_op op, bool valid,
@@ -4507,6 +4597,20 @@ static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 ipv4_udip)
mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip);
}
+static inline void mlxsw_reg_ratr_counter_pack(char *payload, u64 counter_index,
+ bool counter_enable)
+{
+ enum mlxsw_reg_flow_counter_set_type set_type;
+
+ if (counter_enable)
+ set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES;
+ else
+ set_type = MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT;
+
+ mlxsw_reg_ratr_counter_index_set(payload, counter_index);
+ mlxsw_reg_ratr_counter_set_type_set(payload, set_type);
+}
+
/* RICNT - Router Interface Counter Register
* -----------------------------------------
* The RICNT register retrieves per port performance counters
@@ -4630,6 +4734,65 @@ static inline void mlxsw_reg_ricnt_pack(char *payload, u32 index,
MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC);
}
+/* RRCR - Router Rules Copy Register Layout
+ * ----------------------------------------
+ * This register is used for moving and copying route entry rules.
+ */
+#define MLXSW_REG_RRCR_ID 0x800F
+#define MLXSW_REG_RRCR_LEN 0x24
+
+MLXSW_REG_DEFINE(rrcr, MLXSW_REG_RRCR_ID, MLXSW_REG_RRCR_LEN);
+
+enum mlxsw_reg_rrcr_op {
+ /* Move rules */
+ MLXSW_REG_RRCR_OP_MOVE,
+ /* Copy rules */
+ MLXSW_REG_RRCR_OP_COPY,
+};
+
+/* reg_rrcr_op
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rrcr, op, 0x00, 28, 4);
+
+/* reg_rrcr_offset
+ * Offset within the region from which to copy/move.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rrcr, offset, 0x00, 0, 16);
+
+/* reg_rrcr_size
+ * The number of rules to copy/move.
+ * Access: WO
+ */
+MLXSW_ITEM32(reg, rrcr, size, 0x04, 0, 16);
+
+/* reg_rrcr_table_id
+ * Identifier of the table on which to perform the operation. Encoding is the
+ * same as in RTAR.key_type
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rrcr, table_id, 0x10, 0, 4);
+
+/* reg_rrcr_dest_offset
+ * Offset within the region to which to copy/move
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rrcr, dest_offset, 0x20, 0, 16);
+
+static inline void mlxsw_reg_rrcr_pack(char *payload, enum mlxsw_reg_rrcr_op op,
+ u16 offset, u16 size,
+ enum mlxsw_reg_rtar_key_type table_id,
+ u16 dest_offset)
+{
+ MLXSW_REG_ZERO(rrcr, payload);
+ mlxsw_reg_rrcr_op_set(payload, op);
+ mlxsw_reg_rrcr_offset_set(payload, offset);
+ mlxsw_reg_rrcr_size_set(payload, size);
+ mlxsw_reg_rrcr_table_id_set(payload, table_id);
+ mlxsw_reg_rrcr_dest_offset_set(payload, dest_offset);
+}
+
/* RALTA - Router Algorithmic LPM Tree Allocation Register
* -------------------------------------------------------
* RALTA is used to allocate the LPM trees of the SHSPM method.
@@ -5169,15 +5332,6 @@ enum mlxsw_reg_rauht_trap_id {
*/
MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9);
-enum mlxsw_reg_flow_counter_set_type {
- /* No count */
- MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00,
- /* Count packets and bytes */
- MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03,
- /* Count only packets */
- MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05,
-};
-
/* reg_rauht_counter_set_type
* Counter set type for flow counters
* Access: RW
@@ -5596,6 +5750,229 @@ mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif,
mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key);
}
+/* RIGR-V2 - Router Interface Group Register Version 2
+ * ---------------------------------------------------
+ * The RIGR_V2 register is used to add, remove and query egress interface list
+ * of a multicast forwarding entry.
+ */
+#define MLXSW_REG_RIGR2_ID 0x8023
+#define MLXSW_REG_RIGR2_LEN 0xB0
+
+#define MLXSW_REG_RIGR2_MAX_ERIFS 32
+
+MLXSW_REG_DEFINE(rigr2, MLXSW_REG_RIGR2_ID, MLXSW_REG_RIGR2_LEN);
+
+/* reg_rigr2_rigr_index
+ * KVD Linear index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rigr2, rigr_index, 0x04, 0, 24);
+
+/* reg_rigr2_vnext
+ * Next RIGR Index is valid.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rigr2, vnext, 0x08, 31, 1);
+
+/* reg_rigr2_next_rigr_index
+ * Next RIGR Index. The index is to the KVD linear.
+ * Reserved when vnxet = '0'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rigr2, next_rigr_index, 0x08, 0, 24);
+
+/* reg_rigr2_vrmid
+ * RMID Index is valid.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rigr2, vrmid, 0x20, 31, 1);
+
+/* reg_rigr2_rmid_index
+ * RMID Index.
+ * Range 0 .. max_mid - 1
+ * Reserved when vrmid = '0'.
+ * The index is to the Port Group Table (PGT)
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rigr2, rmid_index, 0x20, 0, 16);
+
+/* reg_rigr2_erif_entry_v
+ * Egress Router Interface is valid.
+ * Note that low-entries must be set if high-entries are set. For
+ * example: if erif_entry[2].v is set then erif_entry[1].v and
+ * erif_entry[0].v must be set.
+ * Index can be from 0 to cap_mc_erif_list_entries-1
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_v, 0x24, 31, 1, 4, 0, false);
+
+/* reg_rigr2_erif_entry_erif
+ * Egress Router Interface.
+ * Valid range is from 0 to cap_max_router_interfaces - 1
+ * Index can be from 0 to MLXSW_REG_RIGR2_MAX_ERIFS - 1
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, rigr2, erif_entry_erif, 0x24, 0, 16, 4, 0, false);
+
+static inline void mlxsw_reg_rigr2_pack(char *payload, u32 rigr_index,
+ bool vnext, u32 next_rigr_index)
+{
+ MLXSW_REG_ZERO(rigr2, payload);
+ mlxsw_reg_rigr2_rigr_index_set(payload, rigr_index);
+ mlxsw_reg_rigr2_vnext_set(payload, vnext);
+ mlxsw_reg_rigr2_next_rigr_index_set(payload, next_rigr_index);
+ mlxsw_reg_rigr2_vrmid_set(payload, 0);
+ mlxsw_reg_rigr2_rmid_index_set(payload, 0);
+}
+
+static inline void mlxsw_reg_rigr2_erif_entry_pack(char *payload, int index,
+ bool v, u16 erif)
+{
+ mlxsw_reg_rigr2_erif_entry_v_set(payload, index, v);
+ mlxsw_reg_rigr2_erif_entry_erif_set(payload, index, erif);
+}
+
+/* RMFT-V2 - Router Multicast Forwarding Table Version 2 Register
+ * --------------------------------------------------------------
+ * The RMFT_V2 register is used to configure and query the multicast table.
+ */
+#define MLXSW_REG_RMFT2_ID 0x8027
+#define MLXSW_REG_RMFT2_LEN 0x174
+
+MLXSW_REG_DEFINE(rmft2, MLXSW_REG_RMFT2_ID, MLXSW_REG_RMFT2_LEN);
+
+/* reg_rmft2_v
+ * Valid
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, v, 0x00, 31, 1);
+
+enum mlxsw_reg_rmft2_type {
+ MLXSW_REG_RMFT2_TYPE_IPV4,
+ MLXSW_REG_RMFT2_TYPE_IPV6
+};
+
+/* reg_rmft2_type
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rmft2, type, 0x00, 28, 2);
+
+enum mlxsw_sp_reg_rmft2_op {
+ /* For Write:
+ * Write operation. Used to write a new entry to the table. All RW
+ * fields are relevant for new entry. Activity bit is set for new
+ * entries - Note write with v (Valid) 0 will delete the entry.
+ * For Query:
+ * Read operation
+ */
+ MLXSW_REG_RMFT2_OP_READ_WRITE,
+};
+
+/* reg_rmft2_op
+ * Operation.
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, rmft2, op, 0x00, 20, 2);
+
+/* reg_rmft2_a
+ * Activity. Set for new entries. Set if a packet lookup has hit on the specific
+ * entry.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, rmft2, a, 0x00, 16, 1);
+
+/* reg_rmft2_offset
+ * Offset within the multicast forwarding table to write to.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, rmft2, offset, 0x00, 0, 16);
+
+/* reg_rmft2_virtual_router
+ * Virtual Router ID. Range from 0..cap_max_virtual_routers-1
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, virtual_router, 0x04, 0, 16);
+
+enum mlxsw_reg_rmft2_irif_mask {
+ MLXSW_REG_RMFT2_IRIF_MASK_IGNORE,
+ MLXSW_REG_RMFT2_IRIF_MASK_COMPARE
+};
+
+/* reg_rmft2_irif_mask
+ * Ingress RIF mask.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, irif_mask, 0x08, 24, 1);
+
+/* reg_rmft2_irif
+ * Ingress RIF index.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, irif, 0x08, 0, 16);
+
+/* reg_rmft2_dip4
+ * Destination IPv4 address
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, dip4, 0x1C, 0, 32);
+
+/* reg_rmft2_dip4_mask
+ * A bit that is set directs the TCAM to compare the corresponding bit in key. A
+ * bit that is clear directs the TCAM to ignore the corresponding bit in key.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, dip4_mask, 0x2C, 0, 32);
+
+/* reg_rmft2_sip4
+ * Source IPv4 address
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, sip4, 0x3C, 0, 32);
+
+/* reg_rmft2_sip4_mask
+ * A bit that is set directs the TCAM to compare the corresponding bit in key. A
+ * bit that is clear directs the TCAM to ignore the corresponding bit in key.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, rmft2, sip4_mask, 0x4C, 0, 32);
+
+/* reg_rmft2_flexible_action_set
+ * ACL action set. The only supported action types in this field and in any
+ * action-set pointed from here are as follows:
+ * 00h: ACTION_NULL
+ * 01h: ACTION_MAC_TTL, only TTL configuration is supported.
+ * 03h: ACTION_TRAP
+ * 06h: ACTION_QOS
+ * 08h: ACTION_POLICING_MONITORING
+ * 10h: ACTION_ROUTER_MC
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, rmft2, flexible_action_set, 0x80,
+ MLXSW_REG_FLEX_ACTION_SET_LEN);
+
+static inline void
+mlxsw_reg_rmft2_ipv4_pack(char *payload, bool v, u16 offset, u16 virtual_router,
+ enum mlxsw_reg_rmft2_irif_mask irif_mask, u16 irif,
+ u32 dip4, u32 dip4_mask, u32 sip4, u32 sip4_mask,
+ const char *flexible_action_set)
+{
+ MLXSW_REG_ZERO(rmft2, payload);
+ mlxsw_reg_rmft2_v_set(payload, v);
+ mlxsw_reg_rmft2_type_set(payload, MLXSW_REG_RMFT2_TYPE_IPV4);
+ mlxsw_reg_rmft2_op_set(payload, MLXSW_REG_RMFT2_OP_READ_WRITE);
+ mlxsw_reg_rmft2_offset_set(payload, offset);
+ mlxsw_reg_rmft2_virtual_router_set(payload, virtual_router);
+ mlxsw_reg_rmft2_irif_mask_set(payload, irif_mask);
+ mlxsw_reg_rmft2_irif_set(payload, irif);
+ mlxsw_reg_rmft2_dip4_set(payload, dip4);
+ mlxsw_reg_rmft2_dip4_mask_set(payload, dip4_mask);
+ mlxsw_reg_rmft2_sip4_set(payload, sip4);
+ mlxsw_reg_rmft2_sip4_mask_set(payload, sip4_mask);
+ if (flexible_action_set)
+ mlxsw_reg_rmft2_flexible_action_set_memcpy_to(payload,
+ flexible_action_set);
+}
+
/* MFCR - Management Fan Control Register
* --------------------------------------
* This register controls the settings of the Fan Speed PWM mechanism.
@@ -6886,9 +7263,11 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(hpkt),
MLXSW_REG(rgcr),
MLXSW_REG(ritr),
+ MLXSW_REG(rtar),
MLXSW_REG(ratr),
MLXSW_REG(rtdp),
MLXSW_REG(ricnt),
+ MLXSW_REG(rrcr),
MLXSW_REG(ralta),
MLXSW_REG(ralst),
MLXSW_REG(raltb),
@@ -6896,6 +7275,8 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(rauht),
MLXSW_REG(raleu),
MLXSW_REG(rauhtd),
+ MLXSW_REG(rigr2),
+ MLXSW_REG(rmft2),
MLXSW_REG(mfcr),
MLXSW_REG(mfsc),
MLXSW_REG(mfsm),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
index 9556d934714b..087aad52c195 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/resources.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h
@@ -63,6 +63,7 @@ enum mlxsw_res_id {
MLXSW_RES_ID_MAX_CPU_POLICERS,
MLXSW_RES_ID_MAX_VRS,
MLXSW_RES_ID_MAX_RIFS,
+ MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES,
MLXSW_RES_ID_MAX_LPM_TREES,
/* Internal resources.
@@ -100,6 +101,7 @@ static u16 mlxsw_res_ids[] = {
[MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13,
[MLXSW_RES_ID_MAX_VRS] = 0x2C01,
[MLXSW_RES_ID_MAX_RIFS] = 0x2C02,
+ [MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES] = 0x2C10,
[MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30,
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 696b99e65a5a..4d73a6f7759e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -53,6 +53,7 @@
#include <linux/notifier.h>
#include <linux/dcbnl.h>
#include <linux/inetdevice.h>
+#include <linux/netlink.h>
#include <net/switchdev.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_mirred.h>
@@ -69,6 +70,7 @@
#include "txheader.h"
#include "spectrum_cnt.h"
#include "spectrum_dpipe.h"
+#include "spectrum_acl_flex_actions.h"
#include "../mlxfw/mlxfw.h"
#define MLXSW_FWREV_MAJOR 13
@@ -1695,17 +1697,9 @@ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
}
static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
- struct tc_cls_matchall_offload *f)
+ struct tc_cls_matchall_offload *f,
+ bool ingress)
{
- bool ingress;
-
- if (is_classid_clsact_ingress(f->common.classid))
- ingress = true;
- else if (is_classid_clsact_egress(f->common.classid))
- ingress = false;
- else
- return -EOPNOTSUPP;
-
if (f->common.chain_index)
return -EOPNOTSUPP;
@@ -1723,17 +1717,9 @@ static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
static int
mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port,
- struct tc_cls_flower_offload *f)
+ struct tc_cls_flower_offload *f,
+ bool ingress)
{
- bool ingress;
-
- if (is_classid_clsact_ingress(f->common.classid))
- ingress = true;
- else if (is_classid_clsact_egress(f->common.classid))
- ingress = false;
- else
- return -EOPNOTSUPP;
-
switch (f->command) {
case TC_CLSFLOWER_REPLACE:
return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, f);
@@ -1747,16 +1733,67 @@ mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port,
}
}
+static int mlxsw_sp_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv, bool ingress)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port = cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSMATCHALL:
+ return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data,
+ ingress);
+ case TC_SETUP_CLSFLOWER:
+ return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data,
+ ingress);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int mlxsw_sp_setup_tc_block_cb_ig(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
+{
+ return mlxsw_sp_setup_tc_block_cb(type, type_data, cb_priv, true);
+}
+
+static int mlxsw_sp_setup_tc_block_cb_eg(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
+{
+ return mlxsw_sp_setup_tc_block_cb(type, type_data, cb_priv, false);
+}
+
+static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tc_block_offload *f)
+{
+ tc_setup_cb_t *cb;
+
+ if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ cb = mlxsw_sp_setup_tc_block_cb_ig;
+ else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+ cb = mlxsw_sp_setup_tc_block_cb_eg;
+ else
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, cb, mlxsw_sp_port,
+ mlxsw_sp_port);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, cb, mlxsw_sp_port);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
switch (type) {
- case TC_SETUP_CLSMATCHALL:
- return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data);
- case TC_SETUP_CLSFLOWER:
- return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data);
+ case TC_SETUP_BLOCK:
+ return mlxsw_sp_setup_tc_block(mlxsw_sp_port, type_data);
default:
return -EOPNOTSUPP;
}
@@ -3311,6 +3348,14 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port,
return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
}
+static void mlxsw_sp_rx_listener_mr_mark_func(struct sk_buff *skb,
+ u8 local_port, void *priv)
+{
+ skb->offload_mr_fwd_mark = 1;
+ skb->offload_fwd_mark = 1;
+ return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
+}
+
static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port,
void *priv)
{
@@ -3354,6 +3399,10 @@ out:
MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action, \
_is_ctrl, SP_##_trap_group, DISCARD)
+#define MLXSW_SP_RXL_MR_MARK(_trap_id, _action, _trap_group, _is_ctrl) \
+ MLXSW_RXL(mlxsw_sp_rx_listener_mr_mark_func, _trap_id, _action, \
+ _is_ctrl, SP_##_trap_group, DISCARD)
+
#define MLXSW_SP_EVENTL(_func, _trap_id) \
MLXSW_EVENTL(_func, _trap_id, SP_EVENT)
@@ -3420,6 +3469,11 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
false, SP_IP2ME, DISCARD),
/* ACL trap */
MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, IP2ME, false),
+ /* Multicast Router Traps */
+ MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false),
+ MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false),
+ MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
+ MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
};
static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
@@ -3445,6 +3499,8 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF:
rate = 128;
burst_size = 7;
break;
@@ -3460,6 +3516,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
rate = 1024;
burst_size = 7;
break;
@@ -3505,6 +3562,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
priority = 5;
tc = 5;
break;
@@ -3521,12 +3579,14 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
break;
case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_RPF:
priority = 2;
tc = 2;
break;
case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
priority = 1;
tc = 1;
break;
@@ -3642,6 +3702,9 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
}
+static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
+ unsigned long event, void *ptr);
+
static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
const struct mlxsw_bus_info *mlxsw_bus_info)
{
@@ -3693,12 +3756,34 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_switchdev_init;
}
+ err = mlxsw_sp_counter_pool_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n");
+ goto err_counter_pool_init;
+ }
+
+ err = mlxsw_sp_afa_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize ACL actions\n");
+ goto err_afa_init;
+ }
+
err = mlxsw_sp_router_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
goto err_router_init;
}
+ /* Initialize netdevice notifier after router is initialized, so that
+ * the event handler can use router structures.
+ */
+ mlxsw_sp->netdevice_nb.notifier_call = mlxsw_sp_netdevice_event;
+ err = register_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to register netdev notifier\n");
+ goto err_netdev_notifier;
+ }
+
err = mlxsw_sp_span_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to init span system\n");
@@ -3711,12 +3796,6 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_acl_init;
}
- err = mlxsw_sp_counter_pool_init(mlxsw_sp);
- if (err) {
- dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n");
- goto err_counter_pool_init;
- }
-
err = mlxsw_sp_dpipe_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to init pipeline debug\n");
@@ -3734,14 +3813,18 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
err_ports_create:
mlxsw_sp_dpipe_fini(mlxsw_sp);
err_dpipe_init:
- mlxsw_sp_counter_pool_fini(mlxsw_sp);
-err_counter_pool_init:
mlxsw_sp_acl_fini(mlxsw_sp);
err_acl_init:
mlxsw_sp_span_fini(mlxsw_sp);
err_span_init:
+ unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
+err_netdev_notifier:
mlxsw_sp_router_fini(mlxsw_sp);
err_router_init:
+ mlxsw_sp_afa_fini(mlxsw_sp);
+err_afa_init:
+ mlxsw_sp_counter_pool_fini(mlxsw_sp);
+err_counter_pool_init:
mlxsw_sp_switchdev_fini(mlxsw_sp);
err_switchdev_init:
mlxsw_sp_lag_fini(mlxsw_sp);
@@ -3760,10 +3843,12 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_ports_remove(mlxsw_sp);
mlxsw_sp_dpipe_fini(mlxsw_sp);
- mlxsw_sp_counter_pool_fini(mlxsw_sp);
mlxsw_sp_acl_fini(mlxsw_sp);
mlxsw_sp_span_fini(mlxsw_sp);
+ unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
mlxsw_sp_router_fini(mlxsw_sp);
+ mlxsw_sp_afa_fini(mlxsw_sp);
+ mlxsw_sp_counter_pool_fini(mlxsw_sp);
mlxsw_sp_switchdev_fini(mlxsw_sp);
mlxsw_sp_lag_fini(mlxsw_sp);
mlxsw_sp_buffers_fini(mlxsw_sp);
@@ -3986,14 +4071,21 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp,
static bool
mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
struct net_device *lag_dev,
- struct netdev_lag_upper_info *lag_upper_info)
+ struct netdev_lag_upper_info *lag_upper_info,
+ struct netlink_ext_ack *extack)
{
u16 lag_id;
- if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0)
+ if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) {
+ NL_SET_ERR_MSG(extack,
+ "spectrum: Exceeded number of supported LAG devices");
return false;
- if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH)
+ }
+ if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
+ NL_SET_ERR_MSG(extack,
+ "spectrum: LAG device using unsupported Tx type");
return false;
+ }
return true;
}
@@ -4198,6 +4290,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
{
struct netdev_notifier_changeupper_info *info;
struct mlxsw_sp_port *mlxsw_sp_port;
+ struct netlink_ext_ack *extack;
struct net_device *upper_dev;
struct mlxsw_sp *mlxsw_sp;
int err = 0;
@@ -4205,6 +4298,7 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
mlxsw_sp_port = netdev_priv(dev);
mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
info = ptr;
+ extack = netdev_notifier_info_to_extack(&info->info);
switch (event) {
case NETDEV_PRECHANGEUPPER:
@@ -4212,25 +4306,43 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
if (!is_vlan_dev(upper_dev) &&
!netif_is_lag_master(upper_dev) &&
!netif_is_bridge_master(upper_dev) &&
- !netif_is_ovs_master(upper_dev))
+ !netif_is_ovs_master(upper_dev)) {
+ NL_SET_ERR_MSG(extack,
+ "spectrum: Unknown upper device type");
return -EINVAL;
+ }
if (!info->linking)
break;
- if (netdev_has_any_upper_dev(upper_dev))
+ if (netdev_has_any_upper_dev(upper_dev)) {
+ NL_SET_ERR_MSG(extack,
+ "spectrum: Enslaving a port to a device that already has an upper device is not supported");
return -EINVAL;
+ }
if (netif_is_lag_master(upper_dev) &&
!mlxsw_sp_master_lag_check(mlxsw_sp, upper_dev,
- info->upper_info))
+ info->upper_info, extack))
return -EINVAL;
- if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev))
+ if (netif_is_lag_master(upper_dev) && vlan_uses_dev(dev)) {
+ NL_SET_ERR_MSG(extack,
+ "spectrum: Master device is a LAG master and this device has a VLAN");
return -EINVAL;
+ }
if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) &&
- !netif_is_lag_master(vlan_dev_real_dev(upper_dev)))
+ !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) {
+ NL_SET_ERR_MSG(extack,
+ "spectrum: Can not put a VLAN on a LAG port");
return -EINVAL;
- if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev))
+ }
+ if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) {
+ NL_SET_ERR_MSG(extack,
+ "spectrum: Master device is an OVS master and this device has a VLAN");
return -EINVAL;
- if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev))
+ }
+ if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) {
+ NL_SET_ERR_MSG(extack,
+ "spectrum: Can not put a VLAN on an OVS port");
return -EINVAL;
+ }
break;
case NETDEV_CHANGEUPPER:
upper_dev = info->upper_dev;
@@ -4238,7 +4350,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
if (info->linking)
err = mlxsw_sp_port_bridge_join(mlxsw_sp_port,
lower_dev,
- upper_dev);
+ upper_dev,
+ extack);
else
mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
lower_dev,
@@ -4329,18 +4442,25 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
struct netdev_notifier_changeupper_info *info = ptr;
+ struct netlink_ext_ack *extack;
struct net_device *upper_dev;
int err = 0;
+ extack = netdev_notifier_info_to_extack(&info->info);
+
switch (event) {
case NETDEV_PRECHANGEUPPER:
upper_dev = info->upper_dev;
- if (!netif_is_bridge_master(upper_dev))
+ if (!netif_is_bridge_master(upper_dev)) {
+ NL_SET_ERR_MSG(extack, "spectrum: VLAN devices only support bridge and VRF uppers");
return -EINVAL;
+ }
if (!info->linking)
break;
- if (netdev_has_any_upper_dev(upper_dev))
+ if (netdev_has_any_upper_dev(upper_dev)) {
+ NL_SET_ERR_MSG(extack, "spectrum: Enslaving a port to a device that already has an upper device is not supported");
return -EINVAL;
+ }
break;
case NETDEV_CHANGEUPPER:
upper_dev = info->upper_dev;
@@ -4348,7 +4468,8 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
if (info->linking)
err = mlxsw_sp_port_bridge_join(mlxsw_sp_port,
vlan_dev,
- upper_dev);
+ upper_dev,
+ extack);
else
mlxsw_sp_port_bridge_leave(mlxsw_sp_port,
vlan_dev,
@@ -4411,13 +4532,17 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
return netif_is_l3_master(info->upper_dev);
}
-static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
+static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct mlxsw_sp *mlxsw_sp;
int err = 0;
- if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU)
+ mlxsw_sp = container_of(nb, struct mlxsw_sp, netdevice_nb);
+ if (mlxsw_sp_netdev_is_ipip(mlxsw_sp, dev))
+ err = mlxsw_sp_netdevice_ipip_event(mlxsw_sp, dev, event, ptr);
+ else if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU)
err = mlxsw_sp_netdevice_router_port_event(dev);
else if (mlxsw_sp_is_vrf_event(event, ptr))
err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr);
@@ -4431,13 +4556,16 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
return notifier_from_errno(err);
}
-static struct notifier_block mlxsw_sp_netdevice_nb __read_mostly = {
- .notifier_call = mlxsw_sp_netdevice_event,
+static struct notifier_block mlxsw_sp_inetaddr_valid_nb __read_mostly = {
+ .notifier_call = mlxsw_sp_inetaddr_valid_event,
};
static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = {
.notifier_call = mlxsw_sp_inetaddr_event,
- .priority = 10, /* Must be called before FIB notifier block */
+};
+
+static struct notifier_block mlxsw_sp_inet6addr_valid_nb __read_mostly = {
+ .notifier_call = mlxsw_sp_inet6addr_valid_event,
};
static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = {
@@ -4462,8 +4590,9 @@ static int __init mlxsw_sp_module_init(void)
{
int err;
- register_netdevice_notifier(&mlxsw_sp_netdevice_nb);
+ register_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb);
register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
+ register_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
register_netevent_notifier(&mlxsw_sp_router_netevent_nb);
@@ -4482,8 +4611,9 @@ err_pci_driver_register:
err_core_driver_register:
unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
+ unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
- unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
+ unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb);
return err;
}
@@ -4493,8 +4623,9 @@ static void __exit mlxsw_sp_module_exit(void)
mlxsw_core_driver_unregister(&mlxsw_sp_driver);
unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb);
+ unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
- unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb);
+ unregister_inetaddr_validator_notifier(&mlxsw_sp_inetaddr_valid_nb);
}
module_init(mlxsw_sp_module_init);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 84ce83acdc19..2a2472a09d8c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -94,7 +94,8 @@ struct mlxsw_sp_mid {
unsigned char addr[ETH_ALEN];
u16 fid;
u16 mid;
- unsigned int ref_count;
+ bool in_hw;
+ unsigned long *ports_in_mid; /* bits array */
};
enum mlxsw_sp_span_type {
@@ -138,6 +139,7 @@ struct mlxsw_sp_port_mall_tc_entry {
struct mlxsw_sp_sb;
struct mlxsw_sp_bridge;
struct mlxsw_sp_router;
+struct mlxsw_sp_mr;
struct mlxsw_sp_acl;
struct mlxsw_sp_counter_pool;
struct mlxsw_sp_fid_core;
@@ -152,11 +154,14 @@ struct mlxsw_sp {
struct mlxsw_sp_sb *sb;
struct mlxsw_sp_bridge *bridge;
struct mlxsw_sp_router *router;
+ struct mlxsw_sp_mr *mr;
+ struct mlxsw_afa *afa;
struct mlxsw_sp_acl *acl;
struct mlxsw_sp_fid_core *fid_core;
struct {
DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE);
} kvdl;
+ struct notifier_block netdevice_nb;
struct mlxsw_sp_counter_pool *counter_pool;
struct {
@@ -322,7 +327,8 @@ void
mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
struct net_device *brport_dev,
- struct net_device *br_dev);
+ struct net_device *br_dev,
+ struct netlink_ext_ack *extack);
void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
struct net_device *brport_dev,
struct net_device *br_dev);
@@ -385,10 +391,21 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused,
int mlxsw_sp_netdevice_router_port_event(struct net_device *dev);
int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
unsigned long event, void *ptr);
+int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
+ unsigned long event, void *ptr);
int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
unsigned long event, void *ptr);
+int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
+ unsigned long event, void *ptr);
int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
struct netdev_notifier_changeupper_info *info);
+bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev);
+int
+mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *l3_dev,
+ unsigned long event,
+ struct netdev_notifier_changeupper_info *info);
void
mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
@@ -466,9 +483,9 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei,
enum mlxsw_afk_element element,
const char *key_value,
const char *mask_value, unsigned int len);
-void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei);
-void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
- u16 group_id);
+int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei);
+int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
+ u16 group_id);
int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei);
int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei);
int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 4b2455e3e079..93dcd315f7d6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -52,7 +52,6 @@
struct mlxsw_sp_acl {
struct mlxsw_sp *mlxsw_sp;
struct mlxsw_afk *afk;
- struct mlxsw_afa *afa;
struct mlxsw_sp_fid *dummy_fid;
const struct mlxsw_sp_acl_ops *ops;
struct rhashtable ruleset_ht;
@@ -333,7 +332,7 @@ mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl)
rulei = kzalloc(sizeof(*rulei), GFP_KERNEL);
if (!rulei)
return NULL;
- rulei->act_block = mlxsw_afa_block_create(acl->afa);
+ rulei->act_block = mlxsw_afa_block_create(acl->mlxsw_sp->afa);
if (IS_ERR(rulei->act_block)) {
err = PTR_ERR(rulei->act_block);
goto err_afa_block_create;
@@ -379,15 +378,15 @@ void mlxsw_sp_acl_rulei_keymask_buf(struct mlxsw_sp_acl_rule_info *rulei,
key_value, mask_value, len);
}
-void mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei)
+int mlxsw_sp_acl_rulei_act_continue(struct mlxsw_sp_acl_rule_info *rulei)
{
- mlxsw_afa_block_continue(rulei->act_block);
+ return mlxsw_afa_block_continue(rulei->act_block);
}
-void mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
- u16 group_id)
+int mlxsw_sp_acl_rulei_act_jump(struct mlxsw_sp_acl_rule_info *rulei,
+ u16 group_id)
{
- mlxsw_afa_block_jump(rulei->act_block, group_id);
+ return mlxsw_afa_block_jump(rulei->act_block, group_id);
}
int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei)
@@ -397,7 +396,8 @@ int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei)
int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei)
{
- return mlxsw_afa_block_append_trap(rulei->act_block);
+ return mlxsw_afa_block_append_trap(rulei->act_block,
+ MLXSW_TRAP_ID_ACL0);
}
int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
@@ -653,85 +653,6 @@ int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp,
return 0;
}
-#define MLXSW_SP_KDVL_ACT_EXT_SIZE 1
-
-static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
- char *enc_actions, bool is_first)
-{
- struct mlxsw_sp *mlxsw_sp = priv;
- char pefa_pl[MLXSW_REG_PEFA_LEN];
- u32 kvdl_index;
- int err;
-
- /* The first action set of a TCAM entry is stored directly in TCAM,
- * not KVD linear area.
- */
- if (is_first)
- return 0;
-
- err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KDVL_ACT_EXT_SIZE,
- &kvdl_index);
- if (err)
- return err;
- mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions);
- err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl);
- if (err)
- goto err_pefa_write;
- *p_kvdl_index = kvdl_index;
- return 0;
-
-err_pefa_write:
- mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
- return err;
-}
-
-static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index,
- bool is_first)
-{
- struct mlxsw_sp *mlxsw_sp = priv;
-
- if (is_first)
- return;
- mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
-}
-
-static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
- u8 local_port)
-{
- struct mlxsw_sp *mlxsw_sp = priv;
- char ppbs_pl[MLXSW_REG_PPBS_LEN];
- u32 kvdl_index;
- int err;
-
- err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index);
- if (err)
- return err;
- mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port);
- err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl);
- if (err)
- goto err_ppbs_write;
- *p_kvdl_index = kvdl_index;
- return 0;
-
-err_ppbs_write:
- mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
- return err;
-}
-
-static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index)
-{
- struct mlxsw_sp *mlxsw_sp = priv;
-
- mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
-}
-
-static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
- .kvdl_set_add = mlxsw_sp_act_kvdl_set_add,
- .kvdl_set_del = mlxsw_sp_act_kvdl_set_del,
- .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add,
- .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del,
-};
-
int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
{
const struct mlxsw_sp_acl_ops *acl_ops = &mlxsw_sp_acl_tcam_ops;
@@ -753,14 +674,6 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp)
goto err_afk_create;
}
- acl->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core,
- ACL_ACTIONS_PER_SET),
- &mlxsw_sp_act_afa_ops, mlxsw_sp);
- if (IS_ERR(acl->afa)) {
- err = PTR_ERR(acl->afa);
- goto err_afa_create;
- }
-
err = rhashtable_init(&acl->ruleset_ht,
&mlxsw_sp_acl_ruleset_ht_params);
if (err)
@@ -792,8 +705,6 @@ err_acl_ops_init:
err_fid_get:
rhashtable_destroy(&acl->ruleset_ht);
err_rhashtable_init:
- mlxsw_afa_destroy(acl->afa);
-err_afa_create:
mlxsw_afk_destroy(acl->afk);
err_afk_create:
kfree(acl);
@@ -810,7 +721,6 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp)
WARN_ON(!list_empty(&acl->rules));
mlxsw_sp_fid_put(acl->dummy_fid);
rhashtable_destroy(&acl->ruleset_ht);
- mlxsw_afa_destroy(acl->afa);
mlxsw_afk_destroy(acl->afk);
kfree(acl);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
new file mode 100644
index 000000000000..4d3340ed0291
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -0,0 +1,129 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spectrum_acl_flex_actions.h"
+#include "core_acl_flex_actions.h"
+
+#define MLXSW_SP_KVDL_ACT_EXT_SIZE 1
+
+static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index,
+ char *enc_actions, bool is_first)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ char pefa_pl[MLXSW_REG_PEFA_LEN];
+ u32 kvdl_index;
+ int err;
+
+ /* The first action set of a TCAM entry is stored directly in TCAM,
+ * not KVD linear area.
+ */
+ if (is_first)
+ return 0;
+
+ err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ACT_EXT_SIZE,
+ &kvdl_index);
+ if (err)
+ return err;
+ mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl);
+ if (err)
+ goto err_pefa_write;
+ *p_kvdl_index = kvdl_index;
+ return 0;
+
+err_pefa_write:
+ mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+ return err;
+}
+
+static void mlxsw_sp_act_kvdl_set_del(void *priv, u32 kvdl_index,
+ bool is_first)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ if (is_first)
+ return;
+ mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+}
+
+static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index,
+ u8 local_port)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ char ppbs_pl[MLXSW_REG_PPBS_LEN];
+ u32 kvdl_index;
+ int err;
+
+ err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index);
+ if (err)
+ return err;
+ mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl);
+ if (err)
+ goto err_ppbs_write;
+ *p_kvdl_index = kvdl_index;
+ return 0;
+
+err_ppbs_write:
+ mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+ return err;
+}
+
+static void mlxsw_sp_act_kvdl_fwd_entry_del(void *priv, u32 kvdl_index)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+
+ mlxsw_sp_kvdl_free(mlxsw_sp, kvdl_index);
+}
+
+static const struct mlxsw_afa_ops mlxsw_sp_act_afa_ops = {
+ .kvdl_set_add = mlxsw_sp_act_kvdl_set_add,
+ .kvdl_set_del = mlxsw_sp_act_kvdl_set_del,
+ .kvdl_fwd_entry_add = mlxsw_sp_act_kvdl_fwd_entry_add,
+ .kvdl_fwd_entry_del = mlxsw_sp_act_kvdl_fwd_entry_del,
+};
+
+int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp)
+{
+ mlxsw_sp->afa = mlxsw_afa_create(MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ ACL_ACTIONS_PER_SET),
+ &mlxsw_sp_act_afa_ops, mlxsw_sp);
+ return PTR_ERR_OR_ZERO(mlxsw_sp->afa);
+}
+
+void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ mlxsw_afa_destroy(mlxsw_sp->afa);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h
new file mode 100644
index 000000000000..2726192836ad
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h
@@ -0,0 +1,44 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H
+#define _MLXSW_SPECTRUM_ACL_FLEX_KEYS_H
+
+#include "spectrum.h"
+
+int mlxsw_sp_afa_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_afa_fini(struct mlxsw_sp *mlxsw_sp);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index 50b40de1fb91..7e8284b46968 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -608,7 +608,10 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
goto err_rulei_create;
}
- mlxsw_sp_acl_rulei_act_continue(rulei);
+ err = mlxsw_sp_acl_rulei_act_continue(rulei);
+ if (WARN_ON(err))
+ goto err_rulei_act_continue;
+
err = mlxsw_sp_acl_rulei_commit(rulei);
if (err)
goto err_rulei_commit;
@@ -623,6 +626,7 @@ mlxsw_sp_acl_tcam_region_catchall_add(struct mlxsw_sp *mlxsw_sp,
err_rule_insert:
err_rulei_commit:
+err_rulei_act_continue:
mlxsw_sp_acl_rulei_destroy(rulei);
err_rulei_create:
parman_item_remove(region->parman, parman_prio, parman_item);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
index 51e6846da72b..a056f23d3a0e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c
@@ -43,21 +43,36 @@ enum mlxsw_sp_field_metadata_id {
MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT,
MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD,
MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP,
+ MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX,
+ MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX,
};
static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = {
- { .name = "erif_port",
- .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT,
- .bitwidth = 32,
- .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX,
+ {
+ .name = "erif_port",
+ .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT,
+ .bitwidth = 32,
+ .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX,
},
- { .name = "l3_forward",
- .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD,
- .bitwidth = 1,
+ {
+ .name = "l3_forward",
+ .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD,
+ .bitwidth = 1,
},
- { .name = "l3_drop",
- .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP,
- .bitwidth = 1,
+ {
+ .name = "l3_drop",
+ .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP,
+ .bitwidth = 1,
+ },
+ {
+ .name = "adj_index",
+ .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX,
+ .bitwidth = 32,
+ },
+ {
+ .name = "adj_hash_index",
+ .id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX,
+ .bitwidth = 32,
},
};
@@ -826,6 +841,359 @@ static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp)
MLXSW_SP_DPIPE_TABLE_NAME_HOST6);
}
+static int mlxsw_sp_dpipe_table_adj_matches_dump(void *priv,
+ struct sk_buff *skb)
+{
+ struct devlink_dpipe_match match = {0};
+ int err;
+
+ match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match.header = &mlxsw_sp_dpipe_header_metadata;
+ match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX;
+
+ err = devlink_dpipe_match_put(skb, &match);
+ if (err)
+ return err;
+
+ match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match.header = &mlxsw_sp_dpipe_header_metadata;
+ match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX;
+
+ return devlink_dpipe_match_put(skb, &match);
+}
+
+static int mlxsw_sp_dpipe_table_adj_actions_dump(void *priv,
+ struct sk_buff *skb)
+{
+ struct devlink_dpipe_action action = {0};
+ int err;
+
+ action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+ action.header = &devlink_dpipe_header_ethernet;
+ action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC;
+
+ err = devlink_dpipe_action_put(skb, &action);
+ if (err)
+ return err;
+
+ action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+ action.header = &mlxsw_sp_dpipe_header_metadata;
+ action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+
+ return devlink_dpipe_action_put(skb, &action);
+}
+
+static u64 mlxsw_sp_dpipe_table_adj_size(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_nexthop *nh;
+ u64 size = 0;
+
+ mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router)
+ if (mlxsw_sp_nexthop_offload(nh) &&
+ !mlxsw_sp_nexthop_group_has_ipip(nh))
+ size++;
+ return size;
+}
+
+enum mlxsw_sp_dpipe_table_adj_match {
+ MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX,
+ MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX,
+ MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT,
+};
+
+enum mlxsw_sp_dpipe_table_adj_action {
+ MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC,
+ MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT,
+ MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT,
+};
+
+static void
+mlxsw_sp_dpipe_table_adj_match_action_prepare(struct devlink_dpipe_match *matches,
+ struct devlink_dpipe_action *actions)
+{
+ struct devlink_dpipe_action *action;
+ struct devlink_dpipe_match *match;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
+ match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match->header = &mlxsw_sp_dpipe_header_metadata;
+ match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_INDEX;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
+ match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT;
+ match->header = &mlxsw_sp_dpipe_header_metadata;
+ match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ADJ_HASH_INDEX;
+
+ action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC];
+ action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+ action->header = &devlink_dpipe_header_ethernet;
+ action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC;
+
+ action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT];
+ action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY;
+ action->header = &mlxsw_sp_dpipe_header_metadata;
+ action->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT;
+}
+
+static int
+mlxsw_sp_dpipe_table_adj_entry_prepare(struct devlink_dpipe_entry *entry,
+ struct devlink_dpipe_value *match_values,
+ struct devlink_dpipe_match *matches,
+ struct devlink_dpipe_value *action_values,
+ struct devlink_dpipe_action *actions)
+{ struct devlink_dpipe_value *action_value;
+ struct devlink_dpipe_value *match_value;
+ struct devlink_dpipe_action *action;
+ struct devlink_dpipe_match *match;
+
+ entry->match_values = match_values;
+ entry->match_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT;
+
+ entry->action_values = action_values;
+ entry->action_values_count = MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
+ match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
+
+ match_value->match = match;
+ match_value->value_size = sizeof(u32);
+ match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+ if (!match_value->value)
+ return -ENOMEM;
+
+ match = &matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
+ match_value = &match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
+
+ match_value->match = match;
+ match_value->value_size = sizeof(u32);
+ match_value->value = kmalloc(match_value->value_size, GFP_KERNEL);
+ if (!match_value->value)
+ return -ENOMEM;
+
+ action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC];
+ action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC];
+
+ action_value->action = action;
+ action_value->value_size = sizeof(u64);
+ action_value->value = kmalloc(action_value->value_size, GFP_KERNEL);
+ if (!action_value->value)
+ return -ENOMEM;
+
+ action = &actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT];
+ action_value = &action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT];
+
+ action_value->action = action;
+ action_value->value_size = sizeof(u32);
+ action_value->value = kmalloc(action_value->value_size, GFP_KERNEL);
+ if (!action_value->value)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void
+__mlxsw_sp_dpipe_table_adj_entry_fill(struct devlink_dpipe_entry *entry,
+ u32 adj_index, u32 adj_hash_index,
+ unsigned char *ha,
+ struct mlxsw_sp_rif *rif)
+{
+ struct devlink_dpipe_value *value;
+ u32 *p_rif_value;
+ u32 *p_index;
+
+ value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_INDEX];
+ p_index = value->value;
+ *p_index = adj_index;
+
+ value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_HASH_INDEX];
+ p_index = value->value;
+ *p_index = adj_hash_index;
+
+ value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_DST_MAC];
+ ether_addr_copy(value->value, ha);
+
+ value = &entry->action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_ERIF_PORT];
+ p_rif_value = value->value;
+ *p_rif_value = mlxsw_sp_rif_index(rif);
+ value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif);
+ value->mapping_valid = true;
+}
+
+static void mlxsw_sp_dpipe_table_adj_entry_fill(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh,
+ struct devlink_dpipe_entry *entry)
+{
+ struct mlxsw_sp_rif *rif = mlxsw_sp_nexthop_rif(nh);
+ unsigned char *ha = mlxsw_sp_nexthop_ha(nh);
+ u32 adj_hash_index = 0;
+ u32 adj_index = 0;
+ int err;
+
+ mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_hash_index);
+ __mlxsw_sp_dpipe_table_adj_entry_fill(entry, adj_index,
+ adj_hash_index, ha, rif);
+ err = mlxsw_sp_nexthop_counter_get(mlxsw_sp, nh, &entry->counter);
+ if (!err)
+ entry->counter_valid = true;
+}
+
+static int
+mlxsw_sp_dpipe_table_adj_entries_get(struct mlxsw_sp *mlxsw_sp,
+ struct devlink_dpipe_entry *entry,
+ bool counters_enabled,
+ struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ struct mlxsw_sp_nexthop *nh;
+ int entry_index = 0;
+ int nh_count_max;
+ int nh_count = 0;
+ int nh_skip;
+ int j;
+ int err;
+
+ rtnl_lock();
+ nh_count_max = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp);
+start_again:
+ err = devlink_dpipe_entry_ctx_prepare(dump_ctx);
+ if (err)
+ goto err_ctx_prepare;
+ j = 0;
+ nh_skip = nh_count;
+ mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
+ if (!mlxsw_sp_nexthop_offload(nh) ||
+ mlxsw_sp_nexthop_group_has_ipip(nh))
+ continue;
+
+ if (nh_count < nh_skip)
+ goto skip;
+
+ mlxsw_sp_dpipe_table_adj_entry_fill(mlxsw_sp, nh, entry);
+ entry->index = entry_index;
+ err = devlink_dpipe_entry_ctx_append(dump_ctx, entry);
+ if (err) {
+ if (err == -EMSGSIZE) {
+ if (!j)
+ goto err_entry_append;
+ break;
+ }
+ goto err_entry_append;
+ }
+ entry_index++;
+ j++;
+skip:
+ nh_count++;
+ }
+
+ devlink_dpipe_entry_ctx_close(dump_ctx);
+ if (nh_count != nh_count_max)
+ goto start_again;
+ rtnl_unlock();
+
+ return 0;
+
+err_ctx_prepare:
+err_entry_append:
+ rtnl_unlock();
+ return err;
+}
+
+static int
+mlxsw_sp_dpipe_table_adj_entries_dump(void *priv, bool counters_enabled,
+ struct devlink_dpipe_dump_ctx *dump_ctx)
+{
+ struct devlink_dpipe_value action_values[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT];
+ struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT];
+ struct devlink_dpipe_action actions[MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT];
+ struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT];
+ struct devlink_dpipe_entry entry = {0};
+ struct mlxsw_sp *mlxsw_sp = priv;
+ int err;
+
+ memset(matches, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT *
+ sizeof(matches[0]));
+ memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_MATCH_COUNT *
+ sizeof(match_values[0]));
+ memset(actions, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT *
+ sizeof(actions[0]));
+ memset(action_values, 0, MLXSW_SP_DPIPE_TABLE_ADJ_ACTION_COUNT *
+ sizeof(action_values[0]));
+
+ mlxsw_sp_dpipe_table_adj_match_action_prepare(matches, actions);
+ err = mlxsw_sp_dpipe_table_adj_entry_prepare(&entry,
+ match_values, matches,
+ action_values, actions);
+ if (err)
+ goto out;
+
+ err = mlxsw_sp_dpipe_table_adj_entries_get(mlxsw_sp, &entry,
+ counters_enabled, dump_ctx);
+out:
+ devlink_dpipe_entry_clear(&entry);
+ return err;
+}
+
+static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ struct mlxsw_sp_nexthop *nh;
+ u32 adj_hash_index = 0;
+ u32 adj_index = 0;
+
+ mlxsw_sp_nexthop_for_each(nh, mlxsw_sp->router) {
+ if (!mlxsw_sp_nexthop_offload(nh) ||
+ mlxsw_sp_nexthop_group_has_ipip(nh))
+ continue;
+
+ mlxsw_sp_nexthop_indexes(nh, &adj_index, &adj_hash_index);
+ if (enable)
+ mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
+ else
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ mlxsw_sp_nexthop_update(mlxsw_sp,
+ adj_index + adj_hash_index, nh);
+ }
+ return 0;
+}
+
+static u64
+mlxsw_sp_dpipe_table_adj_size_get(void *priv)
+{
+ struct mlxsw_sp *mlxsw_sp = priv;
+ u64 size;
+
+ rtnl_lock();
+ size = mlxsw_sp_dpipe_table_adj_size(mlxsw_sp);
+ rtnl_unlock();
+
+ return size;
+}
+
+static struct devlink_dpipe_table_ops mlxsw_sp_dpipe_table_adj_ops = {
+ .matches_dump = mlxsw_sp_dpipe_table_adj_matches_dump,
+ .actions_dump = mlxsw_sp_dpipe_table_adj_actions_dump,
+ .entries_dump = mlxsw_sp_dpipe_table_adj_entries_dump,
+ .counters_set_update = mlxsw_sp_dpipe_table_adj_counters_update,
+ .size_get = mlxsw_sp_dpipe_table_adj_size_get,
+};
+
+static int mlxsw_sp_dpipe_adj_table_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+ return devlink_dpipe_table_register(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_ADJ,
+ &mlxsw_sp_dpipe_table_adj_ops,
+ mlxsw_sp, false);
+}
+
+static void mlxsw_sp_dpipe_adj_table_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+
+ devlink_dpipe_table_unregister(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_ADJ);
+}
+
int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp)
{
struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
@@ -846,8 +1214,14 @@ int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp)
err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp);
if (err)
goto err_host6_table_init;
- return 0;
+ err = mlxsw_sp_dpipe_adj_table_init(mlxsw_sp);
+ if (err)
+ goto err_adj_table_init;
+
+ return 0;
+err_adj_table_init:
+ mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp);
err_host6_table_init:
mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp);
err_host4_table_init:
@@ -861,6 +1235,7 @@ void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp)
{
struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+ mlxsw_sp_dpipe_adj_table_fini(mlxsw_sp);
mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp);
mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp);
mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
index 283fde4e6783..815d543cf114 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h
@@ -56,5 +56,6 @@ static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp)
#define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif"
#define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4"
#define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6"
+#define MLXSW_SP_DPIPE_TABLE_NAME_ADJ "mlxsw_adj"
#endif /* _MLXSW_PIPELINE_H_*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 8aace9a06a5d..2f0e57857ea4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -63,7 +63,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
tcf_exts_to_list(exts, &actions);
list_for_each_entry(a, &actions, list) {
- if (is_tcf_gact_shot(a)) {
+ if (is_tcf_gact_ok(a)) {
+ err = mlxsw_sp_acl_rulei_act_continue(rulei);
+ if (err)
+ return err;
+ } else if (is_tcf_gact_shot(a)) {
err = mlxsw_sp_acl_rulei_act_drop(rulei);
if (err)
return err;
@@ -84,7 +88,9 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
return PTR_ERR(ruleset);
group_id = mlxsw_sp_acl_ruleset_group_id(ruleset);
- mlxsw_sp_acl_rulei_act_jump(rulei, group_id);
+ err = mlxsw_sp_acl_rulei_act_jump(rulei, group_id);
+ if (err)
+ return err;
} else if (is_tcf_mirred_egress_redirect(a)) {
int ifindex = tcf_mirred_ifindex(a);
struct net_device *out_dev;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
index 1c2db831d83b..6fb49129ce87 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h
@@ -47,7 +47,6 @@ struct mlxsw_sp_ipip_entry {
enum mlxsw_sp_ipip_type ipipt;
struct net_device *ol_dev; /* Overlay. */
struct mlxsw_sp_rif_ipip_lb *ol_lb;
- unsigned int ref_count; /* Number of next hops using the tunnel. */
struct mlxsw_sp_fib_entry *decap_fib_entry;
struct list_head ipip_list_node;
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
new file mode 100644
index 000000000000..1f84bb8e9135
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
@@ -0,0 +1,1011 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/rhashtable.h>
+
+#include "spectrum_mr.h"
+#include "spectrum_router.h"
+
+struct mlxsw_sp_mr {
+ const struct mlxsw_sp_mr_ops *mr_ops;
+ void *catchall_route_priv;
+ struct delayed_work stats_update_dw;
+ struct list_head table_list;
+#define MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL 5000 /* ms */
+ unsigned long priv[0];
+ /* priv has to be always the last item */
+};
+
+struct mlxsw_sp_mr_vif {
+ struct net_device *dev;
+ const struct mlxsw_sp_rif *rif;
+ unsigned long vif_flags;
+
+ /* A list of route_vif_entry structs that point to routes that the VIF
+ * instance is used as one of the egress VIFs
+ */
+ struct list_head route_evif_list;
+
+ /* A list of route_vif_entry structs that point to routes that the VIF
+ * instance is used as an ingress VIF
+ */
+ struct list_head route_ivif_list;
+};
+
+struct mlxsw_sp_mr_route_vif_entry {
+ struct list_head vif_node;
+ struct list_head route_node;
+ struct mlxsw_sp_mr_vif *mr_vif;
+ struct mlxsw_sp_mr_route *mr_route;
+};
+
+struct mlxsw_sp_mr_table {
+ struct list_head node;
+ enum mlxsw_sp_l3proto proto;
+ struct mlxsw_sp *mlxsw_sp;
+ u32 vr_id;
+ struct mlxsw_sp_mr_vif vifs[MAXVIFS];
+ struct list_head route_list;
+ struct rhashtable route_ht;
+ char catchall_route_priv[0];
+ /* catchall_route_priv has to be always the last item */
+};
+
+struct mlxsw_sp_mr_route {
+ struct list_head node;
+ struct rhash_head ht_node;
+ struct mlxsw_sp_mr_route_key key;
+ enum mlxsw_sp_mr_route_action route_action;
+ u16 min_mtu;
+ struct mfc_cache *mfc4;
+ void *route_priv;
+ const struct mlxsw_sp_mr_table *mr_table;
+ /* A list of route_vif_entry structs that point to the egress VIFs */
+ struct list_head evif_list;
+ /* A route_vif_entry struct that point to the ingress VIF */
+ struct mlxsw_sp_mr_route_vif_entry ivif;
+};
+
+static const struct rhashtable_params mlxsw_sp_mr_route_ht_params = {
+ .key_len = sizeof(struct mlxsw_sp_mr_route_key),
+ .key_offset = offsetof(struct mlxsw_sp_mr_route, key),
+ .head_offset = offsetof(struct mlxsw_sp_mr_route, ht_node),
+ .automatic_shrinking = true,
+};
+
+static bool mlxsw_sp_mr_vif_regular(const struct mlxsw_sp_mr_vif *vif)
+{
+ return !(vif->vif_flags & (VIFF_TUNNEL | VIFF_REGISTER));
+}
+
+static bool mlxsw_sp_mr_vif_valid(const struct mlxsw_sp_mr_vif *vif)
+{
+ return mlxsw_sp_mr_vif_regular(vif) && vif->dev && vif->rif;
+}
+
+static bool mlxsw_sp_mr_vif_exists(const struct mlxsw_sp_mr_vif *vif)
+{
+ return vif->dev;
+}
+
+static bool
+mlxsw_sp_mr_route_ivif_in_evifs(const struct mlxsw_sp_mr_route *mr_route)
+{
+ vifi_t ivif;
+
+ switch (mr_route->mr_table->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ ivif = mr_route->mfc4->mfc_parent;
+ return mr_route->mfc4->mfc_un.res.ttls[ivif] != 255;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ /* fall through */
+ default:
+ WARN_ON_ONCE(1);
+ }
+ return false;
+}
+
+static int
+mlxsw_sp_mr_route_valid_evifs_num(const struct mlxsw_sp_mr_route *mr_route)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve;
+ int valid_evifs;
+
+ valid_evifs = 0;
+ list_for_each_entry(rve, &mr_route->evif_list, route_node)
+ if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
+ valid_evifs++;
+ return valid_evifs;
+}
+
+static bool mlxsw_sp_mr_route_starg(const struct mlxsw_sp_mr_route *mr_route)
+{
+ switch (mr_route->mr_table->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ return mr_route->key.source_mask.addr4 == INADDR_ANY;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ /* fall through */
+ default:
+ WARN_ON_ONCE(1);
+ }
+ return false;
+}
+
+static enum mlxsw_sp_mr_route_action
+mlxsw_sp_mr_route_action(const struct mlxsw_sp_mr_route *mr_route)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve;
+
+ /* If the ingress port is not regular and resolved, trap the route */
+ if (!mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif))
+ return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+
+ /* The kernel does not match a (*,G) route that the ingress interface is
+ * not one of the egress interfaces, so trap these kind of routes.
+ */
+ if (mlxsw_sp_mr_route_starg(mr_route) &&
+ !mlxsw_sp_mr_route_ivif_in_evifs(mr_route))
+ return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+
+ /* If the route has no valid eVIFs, trap it. */
+ if (!mlxsw_sp_mr_route_valid_evifs_num(mr_route))
+ return MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+
+ /* If one of the eVIFs has no RIF, trap-and-forward the route as there
+ * is some more routing to do in software too.
+ */
+ list_for_each_entry(rve, &mr_route->evif_list, route_node)
+ if (mlxsw_sp_mr_vif_exists(rve->mr_vif) && !rve->mr_vif->rif)
+ return MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD;
+
+ return MLXSW_SP_MR_ROUTE_ACTION_FORWARD;
+}
+
+static enum mlxsw_sp_mr_route_prio
+mlxsw_sp_mr_route_prio(const struct mlxsw_sp_mr_route *mr_route)
+{
+ return mlxsw_sp_mr_route_starg(mr_route) ?
+ MLXSW_SP_MR_ROUTE_PRIO_STARG : MLXSW_SP_MR_ROUTE_PRIO_SG;
+}
+
+static void mlxsw_sp_mr_route4_key(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route_key *key,
+ const struct mfc_cache *mfc)
+{
+ bool starg = (mfc->mfc_origin == INADDR_ANY);
+
+ memset(key, 0, sizeof(*key));
+ key->vrid = mr_table->vr_id;
+ key->proto = mr_table->proto;
+ key->group.addr4 = mfc->mfc_mcastgrp;
+ key->group_mask.addr4 = 0xffffffff;
+ key->source.addr4 = mfc->mfc_origin;
+ key->source_mask.addr4 = starg ? 0 : 0xffffffff;
+}
+
+static int mlxsw_sp_mr_route_evif_link(struct mlxsw_sp_mr_route *mr_route,
+ struct mlxsw_sp_mr_vif *mr_vif)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve;
+
+ rve = kzalloc(sizeof(*rve), GFP_KERNEL);
+ if (!rve)
+ return -ENOMEM;
+ rve->mr_route = mr_route;
+ rve->mr_vif = mr_vif;
+ list_add_tail(&rve->route_node, &mr_route->evif_list);
+ list_add_tail(&rve->vif_node, &mr_vif->route_evif_list);
+ return 0;
+}
+
+static void
+mlxsw_sp_mr_route_evif_unlink(struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+ list_del(&rve->route_node);
+ list_del(&rve->vif_node);
+ kfree(rve);
+}
+
+static void mlxsw_sp_mr_route_ivif_link(struct mlxsw_sp_mr_route *mr_route,
+ struct mlxsw_sp_mr_vif *mr_vif)
+{
+ mr_route->ivif.mr_route = mr_route;
+ mr_route->ivif.mr_vif = mr_vif;
+ list_add_tail(&mr_route->ivif.vif_node, &mr_vif->route_ivif_list);
+}
+
+static void mlxsw_sp_mr_route_ivif_unlink(struct mlxsw_sp_mr_route *mr_route)
+{
+ list_del(&mr_route->ivif.vif_node);
+}
+
+static int
+mlxsw_sp_mr_route_info_create(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route *mr_route,
+ struct mlxsw_sp_mr_route_info *route_info)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve;
+ u16 *erif_indices;
+ u16 irif_index;
+ u16 erif = 0;
+
+ erif_indices = kmalloc_array(MAXVIFS, sizeof(*erif_indices),
+ GFP_KERNEL);
+ if (!erif_indices)
+ return -ENOMEM;
+
+ list_for_each_entry(rve, &mr_route->evif_list, route_node) {
+ if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
+ u16 rifi = mlxsw_sp_rif_index(rve->mr_vif->rif);
+
+ erif_indices[erif++] = rifi;
+ }
+ }
+
+ if (mlxsw_sp_mr_vif_valid(mr_route->ivif.mr_vif))
+ irif_index = mlxsw_sp_rif_index(mr_route->ivif.mr_vif->rif);
+ else
+ irif_index = 0;
+
+ route_info->irif_index = irif_index;
+ route_info->erif_indices = erif_indices;
+ route_info->min_mtu = mr_route->min_mtu;
+ route_info->route_action = mr_route->route_action;
+ route_info->erif_num = erif;
+ return 0;
+}
+
+static void
+mlxsw_sp_mr_route_info_destroy(struct mlxsw_sp_mr_route_info *route_info)
+{
+ kfree(route_info->erif_indices);
+}
+
+static int mlxsw_sp_mr_route_write(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route *mr_route,
+ bool replace)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ struct mlxsw_sp_mr_route_info route_info;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ int err;
+
+ err = mlxsw_sp_mr_route_info_create(mr_table, mr_route, &route_info);
+ if (err)
+ return err;
+
+ if (!replace) {
+ struct mlxsw_sp_mr_route_params route_params;
+
+ mr_route->route_priv = kzalloc(mr->mr_ops->route_priv_size,
+ GFP_KERNEL);
+ if (!mr_route->route_priv) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ route_params.key = mr_route->key;
+ route_params.value = route_info;
+ route_params.prio = mlxsw_sp_mr_route_prio(mr_route);
+ err = mr->mr_ops->route_create(mlxsw_sp, mr->priv,
+ mr_route->route_priv,
+ &route_params);
+ if (err)
+ kfree(mr_route->route_priv);
+ } else {
+ err = mr->mr_ops->route_update(mlxsw_sp, mr_route->route_priv,
+ &route_info);
+ }
+out:
+ mlxsw_sp_mr_route_info_destroy(&route_info);
+ return err;
+}
+
+static void mlxsw_sp_mr_route_erase(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route *mr_route)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+
+ mr->mr_ops->route_destroy(mlxsw_sp, mr->priv, mr_route->route_priv);
+ kfree(mr_route->route_priv);
+}
+
+static struct mlxsw_sp_mr_route *
+mlxsw_sp_mr_route4_create(struct mlxsw_sp_mr_table *mr_table,
+ struct mfc_cache *mfc)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
+ struct mlxsw_sp_mr_route *mr_route;
+ int err = 0;
+ int i;
+
+ /* Allocate and init a new route and fill it with parameters */
+ mr_route = kzalloc(sizeof(*mr_route), GFP_KERNEL);
+ if (!mr_route)
+ return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&mr_route->evif_list);
+ mlxsw_sp_mr_route4_key(mr_table, &mr_route->key, mfc);
+
+ /* Find min_mtu and link iVIF and eVIFs */
+ mr_route->min_mtu = ETH_MAX_MTU;
+ ipmr_cache_hold(mfc);
+ mr_route->mfc4 = mfc;
+ mr_route->mr_table = mr_table;
+ for (i = 0; i < MAXVIFS; i++) {
+ if (mfc->mfc_un.res.ttls[i] != 255) {
+ err = mlxsw_sp_mr_route_evif_link(mr_route,
+ &mr_table->vifs[i]);
+ if (err)
+ goto err;
+ if (mr_table->vifs[i].dev &&
+ mr_table->vifs[i].dev->mtu < mr_route->min_mtu)
+ mr_route->min_mtu = mr_table->vifs[i].dev->mtu;
+ }
+ }
+ mlxsw_sp_mr_route_ivif_link(mr_route, &mr_table->vifs[mfc->mfc_parent]);
+
+ mr_route->route_action = mlxsw_sp_mr_route_action(mr_route);
+ return mr_route;
+err:
+ ipmr_cache_put(mfc);
+ list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
+ mlxsw_sp_mr_route_evif_unlink(rve);
+ kfree(mr_route);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_mr_route4_destroy(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route *mr_route)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve, *tmp;
+
+ mlxsw_sp_mr_route_ivif_unlink(mr_route);
+ ipmr_cache_put(mr_route->mfc4);
+ list_for_each_entry_safe(rve, tmp, &mr_route->evif_list, route_node)
+ mlxsw_sp_mr_route_evif_unlink(rve);
+ kfree(mr_route);
+}
+
+static void mlxsw_sp_mr_route_destroy(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route *mr_route)
+{
+ switch (mr_table->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ mlxsw_sp_mr_route4_destroy(mr_table, mr_route);
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ /* fall through */
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+
+static void mlxsw_sp_mr_mfc_offload_set(struct mlxsw_sp_mr_route *mr_route,
+ bool offload)
+{
+ switch (mr_route->mr_table->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ if (offload)
+ mr_route->mfc4->mfc_flags |= MFC_OFFLOAD;
+ else
+ mr_route->mfc4->mfc_flags &= ~MFC_OFFLOAD;
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ /* fall through */
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+
+static void mlxsw_sp_mr_mfc_offload_update(struct mlxsw_sp_mr_route *mr_route)
+{
+ bool offload;
+
+ offload = mr_route->route_action != MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+ mlxsw_sp_mr_mfc_offload_set(mr_route, offload);
+}
+
+static void __mlxsw_sp_mr_route_del(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route *mr_route)
+{
+ mlxsw_sp_mr_mfc_offload_set(mr_route, false);
+ mlxsw_sp_mr_route_erase(mr_table, mr_route);
+ rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
+ mlxsw_sp_mr_route_ht_params);
+ list_del(&mr_route->node);
+ mlxsw_sp_mr_route_destroy(mr_table, mr_route);
+}
+
+int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
+ struct mfc_cache *mfc, bool replace)
+{
+ struct mlxsw_sp_mr_route *mr_orig_route = NULL;
+ struct mlxsw_sp_mr_route *mr_route;
+ int err;
+
+ /* If the route is a (*,*) route, abort, as these kind of routes are
+ * used for proxy routes.
+ */
+ if (mfc->mfc_origin == INADDR_ANY && mfc->mfc_mcastgrp == INADDR_ANY) {
+ dev_warn(mr_table->mlxsw_sp->bus_info->dev,
+ "Offloading proxy routes is not supported.\n");
+ return -EINVAL;
+ }
+
+ /* Create a new route */
+ mr_route = mlxsw_sp_mr_route4_create(mr_table, mfc);
+ if (IS_ERR(mr_route))
+ return PTR_ERR(mr_route);
+
+ /* Find any route with a matching key */
+ mr_orig_route = rhashtable_lookup_fast(&mr_table->route_ht,
+ &mr_route->key,
+ mlxsw_sp_mr_route_ht_params);
+ if (replace) {
+ /* On replace case, make the route point to the new route_priv.
+ */
+ if (WARN_ON(!mr_orig_route)) {
+ err = -ENOENT;
+ goto err_no_orig_route;
+ }
+ mr_route->route_priv = mr_orig_route->route_priv;
+ } else if (mr_orig_route) {
+ /* On non replace case, if another route with the same key was
+ * found, abort, as duplicate routes are used for proxy routes.
+ */
+ dev_warn(mr_table->mlxsw_sp->bus_info->dev,
+ "Offloading proxy routes is not supported.\n");
+ err = -EINVAL;
+ goto err_duplicate_route;
+ }
+
+ /* Put it in the table data-structures */
+ list_add_tail(&mr_route->node, &mr_table->route_list);
+ err = rhashtable_insert_fast(&mr_table->route_ht,
+ &mr_route->ht_node,
+ mlxsw_sp_mr_route_ht_params);
+ if (err)
+ goto err_rhashtable_insert;
+
+ /* Write the route to the hardware */
+ err = mlxsw_sp_mr_route_write(mr_table, mr_route, replace);
+ if (err)
+ goto err_mr_route_write;
+
+ /* Destroy the original route */
+ if (replace) {
+ rhashtable_remove_fast(&mr_table->route_ht,
+ &mr_orig_route->ht_node,
+ mlxsw_sp_mr_route_ht_params);
+ list_del(&mr_orig_route->node);
+ mlxsw_sp_mr_route4_destroy(mr_table, mr_orig_route);
+ }
+
+ mlxsw_sp_mr_mfc_offload_update(mr_route);
+ return 0;
+
+err_mr_route_write:
+ rhashtable_remove_fast(&mr_table->route_ht, &mr_route->ht_node,
+ mlxsw_sp_mr_route_ht_params);
+err_rhashtable_insert:
+ list_del(&mr_route->node);
+err_no_orig_route:
+err_duplicate_route:
+ mlxsw_sp_mr_route4_destroy(mr_table, mr_route);
+ return err;
+}
+
+void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table,
+ struct mfc_cache *mfc)
+{
+ struct mlxsw_sp_mr_route *mr_route;
+ struct mlxsw_sp_mr_route_key key;
+
+ mlxsw_sp_mr_route4_key(mr_table, &key, mfc);
+ mr_route = rhashtable_lookup_fast(&mr_table->route_ht, &key,
+ mlxsw_sp_mr_route_ht_params);
+ if (mr_route)
+ __mlxsw_sp_mr_route_del(mr_table, mr_route);
+}
+
+/* Should be called after the VIF struct is updated */
+static int
+mlxsw_sp_mr_route_ivif_resolve(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ enum mlxsw_sp_mr_route_action route_action;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ u16 irif_index;
+ int err;
+
+ route_action = mlxsw_sp_mr_route_action(rve->mr_route);
+ if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP)
+ return 0;
+
+ /* rve->mr_vif->rif is guaranteed to be valid at this stage */
+ irif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
+ err = mr->mr_ops->route_irif_update(mlxsw_sp, rve->mr_route->route_priv,
+ irif_index);
+ if (err)
+ return err;
+
+ err = mr->mr_ops->route_action_update(mlxsw_sp,
+ rve->mr_route->route_priv,
+ route_action);
+ if (err)
+ /* No need to rollback here because the iRIF change only takes
+ * place after the action has been updated.
+ */
+ return err;
+
+ rve->mr_route->route_action = route_action;
+ mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
+ return 0;
+}
+
+static void
+mlxsw_sp_mr_route_ivif_unresolve(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+
+ mr->mr_ops->route_action_update(mlxsw_sp, rve->mr_route->route_priv,
+ MLXSW_SP_MR_ROUTE_ACTION_TRAP);
+ rve->mr_route->route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+ mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
+}
+
+/* Should be called after the RIF struct is updated */
+static int
+mlxsw_sp_mr_route_evif_resolve(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ enum mlxsw_sp_mr_route_action route_action;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ u16 erif_index = 0;
+ int err;
+
+ /* Update the route action, as the new eVIF can be a tunnel or a pimreg
+ * device which will require updating the action.
+ */
+ route_action = mlxsw_sp_mr_route_action(rve->mr_route);
+ if (route_action != rve->mr_route->route_action) {
+ err = mr->mr_ops->route_action_update(mlxsw_sp,
+ rve->mr_route->route_priv,
+ route_action);
+ if (err)
+ return err;
+ }
+
+ /* Add the eRIF */
+ if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
+ erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
+ err = mr->mr_ops->route_erif_add(mlxsw_sp,
+ rve->mr_route->route_priv,
+ erif_index);
+ if (err)
+ goto err_route_erif_add;
+ }
+
+ /* Update the minimum MTU */
+ if (rve->mr_vif->dev->mtu < rve->mr_route->min_mtu) {
+ rve->mr_route->min_mtu = rve->mr_vif->dev->mtu;
+ err = mr->mr_ops->route_min_mtu_update(mlxsw_sp,
+ rve->mr_route->route_priv,
+ rve->mr_route->min_mtu);
+ if (err)
+ goto err_route_min_mtu_update;
+ }
+
+ rve->mr_route->route_action = route_action;
+ mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
+ return 0;
+
+err_route_min_mtu_update:
+ if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
+ mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv,
+ erif_index);
+err_route_erif_add:
+ if (route_action != rve->mr_route->route_action)
+ mr->mr_ops->route_action_update(mlxsw_sp,
+ rve->mr_route->route_priv,
+ rve->mr_route->route_action);
+ return err;
+}
+
+/* Should be called before the RIF struct is updated */
+static void
+mlxsw_sp_mr_route_evif_unresolve(struct mlxsw_sp_mr_table *mr_table,
+ struct mlxsw_sp_mr_route_vif_entry *rve)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ enum mlxsw_sp_mr_route_action route_action;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ u16 rifi;
+
+ /* If the unresolved RIF was not valid, no need to delete it */
+ if (!mlxsw_sp_mr_vif_valid(rve->mr_vif))
+ return;
+
+ /* Update the route action: if there is only one valid eVIF in the
+ * route, set the action to trap as the VIF deletion will lead to zero
+ * valid eVIFs. On any other case, use the mlxsw_sp_mr_route_action to
+ * determine the route action.
+ */
+ if (mlxsw_sp_mr_route_valid_evifs_num(rve->mr_route) == 1)
+ route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP;
+ else
+ route_action = mlxsw_sp_mr_route_action(rve->mr_route);
+ if (route_action != rve->mr_route->route_action)
+ mr->mr_ops->route_action_update(mlxsw_sp,
+ rve->mr_route->route_priv,
+ route_action);
+
+ /* Delete the erif from the route */
+ rifi = mlxsw_sp_rif_index(rve->mr_vif->rif);
+ mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv, rifi);
+ rve->mr_route->route_action = route_action;
+ mlxsw_sp_mr_mfc_offload_update(rve->mr_route);
+}
+
+static int mlxsw_sp_mr_vif_resolve(struct mlxsw_sp_mr_table *mr_table,
+ struct net_device *dev,
+ struct mlxsw_sp_mr_vif *mr_vif,
+ unsigned long vif_flags,
+ const struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp_mr_route_vif_entry *irve, *erve;
+ int err;
+
+ /* Update the VIF */
+ mr_vif->dev = dev;
+ mr_vif->rif = rif;
+ mr_vif->vif_flags = vif_flags;
+
+ /* Update all routes where this VIF is used as an unresolved iRIF */
+ list_for_each_entry(irve, &mr_vif->route_ivif_list, vif_node) {
+ err = mlxsw_sp_mr_route_ivif_resolve(mr_table, irve);
+ if (err)
+ goto err_irif_unresolve;
+ }
+
+ /* Update all routes where this VIF is used as an unresolved eRIF */
+ list_for_each_entry(erve, &mr_vif->route_evif_list, vif_node) {
+ err = mlxsw_sp_mr_route_evif_resolve(mr_table, erve);
+ if (err)
+ goto err_erif_unresolve;
+ }
+ return 0;
+
+err_erif_unresolve:
+ list_for_each_entry_from_reverse(erve, &mr_vif->route_evif_list,
+ vif_node)
+ mlxsw_sp_mr_route_evif_unresolve(mr_table, erve);
+err_irif_unresolve:
+ list_for_each_entry_from_reverse(irve, &mr_vif->route_ivif_list,
+ vif_node)
+ mlxsw_sp_mr_route_ivif_unresolve(mr_table, irve);
+ mr_vif->rif = NULL;
+ return err;
+}
+
+static void mlxsw_sp_mr_vif_unresolve(struct mlxsw_sp_mr_table *mr_table,
+ struct net_device *dev,
+ struct mlxsw_sp_mr_vif *mr_vif)
+{
+ struct mlxsw_sp_mr_route_vif_entry *rve;
+
+ /* Update all routes where this VIF is used as an unresolved eRIF */
+ list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node)
+ mlxsw_sp_mr_route_evif_unresolve(mr_table, rve);
+
+ /* Update all routes where this VIF is used as an unresolved iRIF */
+ list_for_each_entry(rve, &mr_vif->route_ivif_list, vif_node)
+ mlxsw_sp_mr_route_ivif_unresolve(mr_table, rve);
+
+ /* Update the VIF */
+ mr_vif->dev = dev;
+ mr_vif->rif = NULL;
+}
+
+int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table,
+ struct net_device *dev, vifi_t vif_index,
+ unsigned long vif_flags, const struct mlxsw_sp_rif *rif)
+{
+ struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index];
+
+ if (WARN_ON(vif_index >= MAXVIFS))
+ return -EINVAL;
+ if (mr_vif->dev)
+ return -EEXIST;
+ return mlxsw_sp_mr_vif_resolve(mr_table, dev, mr_vif, vif_flags, rif);
+}
+
+void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index)
+{
+ struct mlxsw_sp_mr_vif *mr_vif = &mr_table->vifs[vif_index];
+
+ if (WARN_ON(vif_index >= MAXVIFS))
+ return;
+ if (WARN_ON(!mr_vif->dev))
+ return;
+ mlxsw_sp_mr_vif_unresolve(mr_table, NULL, mr_vif);
+}
+
+struct mlxsw_sp_mr_vif *
+mlxsw_sp_mr_dev_vif_lookup(struct mlxsw_sp_mr_table *mr_table,
+ const struct net_device *dev)
+{
+ vifi_t vif_index;
+
+ for (vif_index = 0; vif_index < MAXVIFS; vif_index++)
+ if (mr_table->vifs[vif_index].dev == dev)
+ return &mr_table->vifs[vif_index];
+ return NULL;
+}
+
+int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_rif *rif)
+{
+ const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
+ struct mlxsw_sp_mr_vif *mr_vif;
+
+ if (!rif_dev)
+ return 0;
+
+ mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
+ if (!mr_vif)
+ return 0;
+ return mlxsw_sp_mr_vif_resolve(mr_table, mr_vif->dev, mr_vif,
+ mr_vif->vif_flags, rif);
+}
+
+void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_rif *rif)
+{
+ const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
+ struct mlxsw_sp_mr_vif *mr_vif;
+
+ if (!rif_dev)
+ return;
+
+ mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
+ if (!mr_vif)
+ return;
+ mlxsw_sp_mr_vif_unresolve(mr_table, mr_vif->dev, mr_vif);
+}
+
+void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_rif *rif, int mtu)
+{
+ const struct net_device *rif_dev = mlxsw_sp_rif_dev(rif);
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ struct mlxsw_sp_mr_route_vif_entry *rve;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ struct mlxsw_sp_mr_vif *mr_vif;
+
+ if (!rif_dev)
+ return;
+
+ /* Search for a VIF that use that RIF */
+ mr_vif = mlxsw_sp_mr_dev_vif_lookup(mr_table, rif_dev);
+ if (!mr_vif)
+ return;
+
+ /* Update all the routes that uses that VIF as eVIF */
+ list_for_each_entry(rve, &mr_vif->route_evif_list, vif_node) {
+ if (mtu < rve->mr_route->min_mtu) {
+ rve->mr_route->min_mtu = mtu;
+ mr->mr_ops->route_min_mtu_update(mlxsw_sp,
+ rve->mr_route->route_priv,
+ mtu);
+ }
+ }
+}
+
+struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
+ u32 vr_id,
+ enum mlxsw_sp_l3proto proto)
+{
+ struct mlxsw_sp_mr_route_params catchall_route_params = {
+ .prio = MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
+ .key = {
+ .vrid = vr_id,
+ },
+ .value = {
+ .route_action = MLXSW_SP_MR_ROUTE_ACTION_TRAP,
+ }
+ };
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ struct mlxsw_sp_mr_table *mr_table;
+ int err;
+ int i;
+
+ mr_table = kzalloc(sizeof(*mr_table) + mr->mr_ops->route_priv_size,
+ GFP_KERNEL);
+ if (!mr_table)
+ return ERR_PTR(-ENOMEM);
+
+ mr_table->vr_id = vr_id;
+ mr_table->mlxsw_sp = mlxsw_sp;
+ mr_table->proto = proto;
+ INIT_LIST_HEAD(&mr_table->route_list);
+
+ err = rhashtable_init(&mr_table->route_ht,
+ &mlxsw_sp_mr_route_ht_params);
+ if (err)
+ goto err_route_rhashtable_init;
+
+ for (i = 0; i < MAXVIFS; i++) {
+ INIT_LIST_HEAD(&mr_table->vifs[i].route_evif_list);
+ INIT_LIST_HEAD(&mr_table->vifs[i].route_ivif_list);
+ }
+
+ err = mr->mr_ops->route_create(mlxsw_sp, mr->priv,
+ mr_table->catchall_route_priv,
+ &catchall_route_params);
+ if (err)
+ goto err_ops_route_create;
+ list_add_tail(&mr_table->node, &mr->table_list);
+ return mr_table;
+
+err_ops_route_create:
+ rhashtable_destroy(&mr_table->route_ht);
+err_route_rhashtable_init:
+ kfree(mr_table);
+ return ERR_PTR(err);
+}
+
+void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_table->mlxsw_sp;
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+
+ WARN_ON(!mlxsw_sp_mr_table_empty(mr_table));
+ list_del(&mr_table->node);
+ mr->mr_ops->route_destroy(mlxsw_sp, mr->priv,
+ &mr_table->catchall_route_priv);
+ rhashtable_destroy(&mr_table->route_ht);
+ kfree(mr_table);
+}
+
+void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table)
+{
+ struct mlxsw_sp_mr_route *mr_route, *tmp;
+ int i;
+
+ list_for_each_entry_safe(mr_route, tmp, &mr_table->route_list, node)
+ __mlxsw_sp_mr_route_del(mr_table, mr_route);
+
+ for (i = 0; i < MAXVIFS; i++) {
+ mr_table->vifs[i].dev = NULL;
+ mr_table->vifs[i].rif = NULL;
+ }
+}
+
+bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table)
+{
+ int i;
+
+ for (i = 0; i < MAXVIFS; i++)
+ if (mr_table->vifs[i].dev)
+ return false;
+ return list_empty(&mr_table->route_list);
+}
+
+static void mlxsw_sp_mr_route_stats_update(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_route *mr_route)
+{
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+ u64 packets, bytes;
+
+ if (mr_route->route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP)
+ return;
+
+ mr->mr_ops->route_stats(mlxsw_sp, mr_route->route_priv, &packets,
+ &bytes);
+
+ switch (mr_route->mr_table->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ if (mr_route->mfc4->mfc_un.res.pkt != packets)
+ mr_route->mfc4->mfc_un.res.lastuse = jiffies;
+ mr_route->mfc4->mfc_un.res.pkt = packets;
+ mr_route->mfc4->mfc_un.res.bytes = bytes;
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ /* fall through */
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+
+static void mlxsw_sp_mr_stats_update(struct work_struct *work)
+{
+ struct mlxsw_sp_mr *mr = container_of(work, struct mlxsw_sp_mr,
+ stats_update_dw.work);
+ struct mlxsw_sp_mr_table *mr_table;
+ struct mlxsw_sp_mr_route *mr_route;
+ unsigned long interval;
+
+ rtnl_lock();
+ list_for_each_entry(mr_table, &mr->table_list, node)
+ list_for_each_entry(mr_route, &mr_table->route_list, node)
+ mlxsw_sp_mr_route_stats_update(mr_table->mlxsw_sp,
+ mr_route);
+ rtnl_unlock();
+
+ interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL);
+ mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
+}
+
+int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_mr_ops *mr_ops)
+{
+ struct mlxsw_sp_mr *mr;
+ unsigned long interval;
+ int err;
+
+ mr = kzalloc(sizeof(*mr) + mr_ops->priv_size, GFP_KERNEL);
+ if (!mr)
+ return -ENOMEM;
+ mr->mr_ops = mr_ops;
+ mlxsw_sp->mr = mr;
+ INIT_LIST_HEAD(&mr->table_list);
+
+ err = mr_ops->init(mlxsw_sp, mr->priv);
+ if (err)
+ goto err;
+
+ /* Create the delayed work for counter updates */
+ INIT_DELAYED_WORK(&mr->stats_update_dw, mlxsw_sp_mr_stats_update);
+ interval = msecs_to_jiffies(MLXSW_SP_MR_ROUTES_COUNTER_UPDATE_INTERVAL);
+ mlxsw_core_schedule_dw(&mr->stats_update_dw, interval);
+ return 0;
+err:
+ kfree(mr);
+ return err;
+}
+
+void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_mr *mr = mlxsw_sp->mr;
+
+ cancel_delayed_work_sync(&mr->stats_update_dw);
+ mr->mr_ops->fini(mr->priv);
+ kfree(mr);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
new file mode 100644
index 000000000000..5d26a122af49
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
@@ -0,0 +1,134 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_MCROUTER_H
+#define _MLXSW_SPECTRUM_MCROUTER_H
+
+#include <linux/mroute.h>
+#include "spectrum_router.h"
+#include "spectrum.h"
+
+enum mlxsw_sp_mr_route_action {
+ MLXSW_SP_MR_ROUTE_ACTION_FORWARD,
+ MLXSW_SP_MR_ROUTE_ACTION_TRAP,
+ MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD,
+};
+
+enum mlxsw_sp_mr_route_prio {
+ MLXSW_SP_MR_ROUTE_PRIO_SG,
+ MLXSW_SP_MR_ROUTE_PRIO_STARG,
+ MLXSW_SP_MR_ROUTE_PRIO_CATCHALL,
+ __MLXSW_SP_MR_ROUTE_PRIO_MAX
+};
+
+#define MLXSW_SP_MR_ROUTE_PRIO_MAX (__MLXSW_SP_MR_ROUTE_PRIO_MAX - 1)
+
+struct mlxsw_sp_mr_route_key {
+ int vrid;
+ enum mlxsw_sp_l3proto proto;
+ union mlxsw_sp_l3addr group;
+ union mlxsw_sp_l3addr group_mask;
+ union mlxsw_sp_l3addr source;
+ union mlxsw_sp_l3addr source_mask;
+};
+
+struct mlxsw_sp_mr_route_info {
+ enum mlxsw_sp_mr_route_action route_action;
+ u16 irif_index;
+ u16 *erif_indices;
+ size_t erif_num;
+ u16 min_mtu;
+};
+
+struct mlxsw_sp_mr_route_params {
+ struct mlxsw_sp_mr_route_key key;
+ struct mlxsw_sp_mr_route_info value;
+ enum mlxsw_sp_mr_route_prio prio;
+};
+
+struct mlxsw_sp_mr_ops {
+ int priv_size;
+ int route_priv_size;
+ int (*init)(struct mlxsw_sp *mlxsw_sp, void *priv);
+ int (*route_create)(struct mlxsw_sp *mlxsw_sp, void *priv,
+ void *route_priv,
+ struct mlxsw_sp_mr_route_params *route_params);
+ int (*route_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ struct mlxsw_sp_mr_route_info *route_info);
+ int (*route_stats)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ u64 *packets, u64 *bytes);
+ int (*route_action_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ enum mlxsw_sp_mr_route_action route_action);
+ int (*route_min_mtu_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ u16 min_mtu);
+ int (*route_irif_update)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ u16 irif_index);
+ int (*route_erif_add)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ u16 erif_index);
+ int (*route_erif_del)(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ u16 erif_index);
+ void (*route_destroy)(struct mlxsw_sp *mlxsw_sp, void *priv,
+ void *route_priv);
+ void (*fini)(void *priv);
+};
+
+struct mlxsw_sp_mr;
+struct mlxsw_sp_mr_table;
+
+int mlxsw_sp_mr_init(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_mr_ops *mr_ops);
+void mlxsw_sp_mr_fini(struct mlxsw_sp *mlxsw_sp);
+int mlxsw_sp_mr_route4_add(struct mlxsw_sp_mr_table *mr_table,
+ struct mfc_cache *mfc, bool replace);
+void mlxsw_sp_mr_route4_del(struct mlxsw_sp_mr_table *mr_table,
+ struct mfc_cache *mfc);
+int mlxsw_sp_mr_vif_add(struct mlxsw_sp_mr_table *mr_table,
+ struct net_device *dev, vifi_t vif_index,
+ unsigned long vif_flags,
+ const struct mlxsw_sp_rif *rif);
+void mlxsw_sp_mr_vif_del(struct mlxsw_sp_mr_table *mr_table, vifi_t vif_index);
+int mlxsw_sp_mr_rif_add(struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_rif *rif);
+void mlxsw_sp_mr_rif_del(struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_rif *rif);
+void mlxsw_sp_mr_rif_mtu_update(struct mlxsw_sp_mr_table *mr_table,
+ const struct mlxsw_sp_rif *rif, int mtu);
+struct mlxsw_sp_mr_table *mlxsw_sp_mr_table_create(struct mlxsw_sp *mlxsw_sp,
+ u32 tb_id,
+ enum mlxsw_sp_l3proto proto);
+void mlxsw_sp_mr_table_destroy(struct mlxsw_sp_mr_table *mr_table);
+void mlxsw_sp_mr_table_flush(struct mlxsw_sp_mr_table *mr_table);
+bool mlxsw_sp_mr_table_empty(const struct mlxsw_sp_mr_table *mr_table);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
new file mode 100644
index 000000000000..39c21c70ac32
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
@@ -0,0 +1,838 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.c
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/parman.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "core_acl_flex_actions.h"
+#include "spectrum_mr.h"
+
+struct mlxsw_sp_mr_tcam_region {
+ struct mlxsw_sp *mlxsw_sp;
+ enum mlxsw_reg_rtar_key_type rtar_key_type;
+ struct parman *parman;
+ struct parman_prio *parman_prios;
+};
+
+struct mlxsw_sp_mr_tcam {
+ struct mlxsw_sp_mr_tcam_region ipv4_tcam_region;
+};
+
+/* This struct maps to one RIGR2 register entry */
+struct mlxsw_sp_mr_erif_sublist {
+ struct list_head list;
+ u32 rigr2_kvdl_index;
+ int num_erifs;
+ u16 erif_indices[MLXSW_REG_RIGR2_MAX_ERIFS];
+ bool synced;
+};
+
+struct mlxsw_sp_mr_tcam_erif_list {
+ struct list_head erif_sublists;
+ u32 kvdl_index;
+};
+
+static bool
+mlxsw_sp_mr_erif_sublist_full(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_erif_sublist *erif_sublist)
+{
+ int erif_list_entries = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ MC_ERIF_LIST_ENTRIES);
+
+ return erif_sublist->num_erifs == erif_list_entries;
+}
+
+static void
+mlxsw_sp_mr_erif_list_init(struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+ INIT_LIST_HEAD(&erif_list->erif_sublists);
+}
+
+#define MLXSW_SP_KVDL_RIGR2_SIZE 1
+
+static struct mlxsw_sp_mr_erif_sublist *
+mlxsw_sp_mr_erif_sublist_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+ struct mlxsw_sp_mr_erif_sublist *erif_sublist;
+ int err;
+
+ erif_sublist = kzalloc(sizeof(*erif_sublist), GFP_KERNEL);
+ if (!erif_sublist)
+ return ERR_PTR(-ENOMEM);
+ err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_RIGR2_SIZE,
+ &erif_sublist->rigr2_kvdl_index);
+ if (err) {
+ kfree(erif_sublist);
+ return ERR_PTR(err);
+ }
+
+ list_add_tail(&erif_sublist->list, &erif_list->erif_sublists);
+ return erif_sublist;
+}
+
+static void
+mlxsw_sp_mr_erif_sublist_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_erif_sublist *erif_sublist)
+{
+ list_del(&erif_sublist->list);
+ mlxsw_sp_kvdl_free(mlxsw_sp, erif_sublist->rigr2_kvdl_index);
+ kfree(erif_sublist);
+}
+
+static int
+mlxsw_sp_mr_erif_list_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_tcam_erif_list *erif_list,
+ u16 erif_index)
+{
+ struct mlxsw_sp_mr_erif_sublist *sublist;
+
+ /* If either there is no erif_entry or the last one is full, allocate a
+ * new one.
+ */
+ if (list_empty(&erif_list->erif_sublists)) {
+ sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp, erif_list);
+ if (IS_ERR(sublist))
+ return PTR_ERR(sublist);
+ erif_list->kvdl_index = sublist->rigr2_kvdl_index;
+ } else {
+ sublist = list_last_entry(&erif_list->erif_sublists,
+ struct mlxsw_sp_mr_erif_sublist,
+ list);
+ sublist->synced = false;
+ if (mlxsw_sp_mr_erif_sublist_full(mlxsw_sp, sublist)) {
+ sublist = mlxsw_sp_mr_erif_sublist_create(mlxsw_sp,
+ erif_list);
+ if (IS_ERR(sublist))
+ return PTR_ERR(sublist);
+ }
+ }
+
+ /* Add the eRIF to the last entry's last index */
+ sublist->erif_indices[sublist->num_erifs++] = erif_index;
+ return 0;
+}
+
+static void
+mlxsw_sp_mr_erif_list_flush(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+ struct mlxsw_sp_mr_erif_sublist *erif_sublist, *tmp;
+
+ list_for_each_entry_safe(erif_sublist, tmp, &erif_list->erif_sublists,
+ list)
+ mlxsw_sp_mr_erif_sublist_destroy(mlxsw_sp, erif_sublist);
+}
+
+static int
+mlxsw_sp_mr_erif_list_commit(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+ struct mlxsw_sp_mr_erif_sublist *curr_sublist;
+ char rigr2_pl[MLXSW_REG_RIGR2_LEN];
+ int err;
+ int i;
+
+ list_for_each_entry(curr_sublist, &erif_list->erif_sublists, list) {
+ if (curr_sublist->synced)
+ continue;
+
+ /* If the sublist is not the last one, pack the next index */
+ if (list_is_last(&curr_sublist->list,
+ &erif_list->erif_sublists)) {
+ mlxsw_reg_rigr2_pack(rigr2_pl,
+ curr_sublist->rigr2_kvdl_index,
+ false, 0);
+ } else {
+ struct mlxsw_sp_mr_erif_sublist *next_sublist;
+
+ next_sublist = list_next_entry(curr_sublist, list);
+ mlxsw_reg_rigr2_pack(rigr2_pl,
+ curr_sublist->rigr2_kvdl_index,
+ true,
+ next_sublist->rigr2_kvdl_index);
+ }
+
+ /* Pack all the erifs */
+ for (i = 0; i < curr_sublist->num_erifs; i++) {
+ u16 erif_index = curr_sublist->erif_indices[i];
+
+ mlxsw_reg_rigr2_erif_entry_pack(rigr2_pl, i, true,
+ erif_index);
+ }
+
+ /* Write the entry */
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rigr2),
+ rigr2_pl);
+ if (err)
+ /* No need of a rollback here because this
+ * hardware entry should not be pointed yet.
+ */
+ return err;
+ curr_sublist->synced = true;
+ }
+ return 0;
+}
+
+static void mlxsw_sp_mr_erif_list_move(struct mlxsw_sp_mr_tcam_erif_list *to,
+ struct mlxsw_sp_mr_tcam_erif_list *from)
+{
+ list_splice(&from->erif_sublists, &to->erif_sublists);
+ to->kvdl_index = from->kvdl_index;
+}
+
+struct mlxsw_sp_mr_tcam_route {
+ struct mlxsw_sp_mr_tcam_erif_list erif_list;
+ struct mlxsw_afa_block *afa_block;
+ u32 counter_index;
+ struct parman_item parman_item;
+ struct parman_prio *parman_prio;
+ enum mlxsw_sp_mr_route_action action;
+ struct mlxsw_sp_mr_route_key key;
+ u16 irif_index;
+ u16 min_mtu;
+};
+
+static struct mlxsw_afa_block *
+mlxsw_sp_mr_tcam_afa_block_create(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_mr_route_action route_action,
+ u16 irif_index, u32 counter_index,
+ u16 min_mtu,
+ struct mlxsw_sp_mr_tcam_erif_list *erif_list)
+{
+ struct mlxsw_afa_block *afa_block;
+ int err;
+
+ afa_block = mlxsw_afa_block_create(mlxsw_sp->afa);
+ if (!afa_block)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlxsw_afa_block_append_counter(afa_block, counter_index);
+ if (err)
+ goto err;
+
+ switch (route_action) {
+ case MLXSW_SP_MR_ROUTE_ACTION_TRAP:
+ err = mlxsw_afa_block_append_trap(afa_block,
+ MLXSW_TRAP_ID_ACL1);
+ if (err)
+ goto err;
+ break;
+ case MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD:
+ case MLXSW_SP_MR_ROUTE_ACTION_FORWARD:
+ /* If we are about to append a multicast router action, commit
+ * the erif_list.
+ */
+ err = mlxsw_sp_mr_erif_list_commit(mlxsw_sp, erif_list);
+ if (err)
+ goto err;
+
+ err = mlxsw_afa_block_append_mcrouter(afa_block, irif_index,
+ min_mtu, false,
+ erif_list->kvdl_index);
+ if (err)
+ goto err;
+
+ if (route_action == MLXSW_SP_MR_ROUTE_ACTION_TRAP_AND_FORWARD) {
+ err = mlxsw_afa_block_append_trap_and_forward(afa_block,
+ MLXSW_TRAP_ID_ACL2);
+ if (err)
+ goto err;
+ }
+ break;
+ default:
+ err = -EINVAL;
+ goto err;
+ }
+
+ err = mlxsw_afa_block_commit(afa_block);
+ if (err)
+ goto err;
+ return afa_block;
+err:
+ mlxsw_afa_block_destroy(afa_block);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_mr_tcam_afa_block_destroy(struct mlxsw_afa_block *afa_block)
+{
+ mlxsw_afa_block_destroy(afa_block);
+}
+
+static int mlxsw_sp_mr_tcam_route_replace(struct mlxsw_sp *mlxsw_sp,
+ struct parman_item *parman_item,
+ struct mlxsw_sp_mr_route_key *key,
+ struct mlxsw_afa_block *afa_block)
+{
+ char rmft2_pl[MLXSW_REG_RMFT2_LEN];
+
+ switch (key->proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, true, parman_item->index,
+ key->vrid,
+ MLXSW_REG_RMFT2_IRIF_MASK_IGNORE, 0,
+ ntohl(key->group.addr4),
+ ntohl(key->group_mask.addr4),
+ ntohl(key->source.addr4),
+ ntohl(key->source_mask.addr4),
+ mlxsw_afa_block_first_set(afa_block));
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ default:
+ WARN_ON_ONCE(1);
+ }
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
+}
+
+static int mlxsw_sp_mr_tcam_route_remove(struct mlxsw_sp *mlxsw_sp, int vrid,
+ struct parman_item *parman_item)
+{
+ char rmft2_pl[MLXSW_REG_RMFT2_LEN];
+
+ mlxsw_reg_rmft2_ipv4_pack(rmft2_pl, false, parman_item->index, vrid,
+ 0, 0, 0, 0, 0, 0, NULL);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rmft2), rmft2_pl);
+}
+
+static int
+mlxsw_sp_mr_tcam_erif_populate(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_tcam_erif_list *erif_list,
+ struct mlxsw_sp_mr_route_info *route_info)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < route_info->erif_num; i++) {
+ u16 erif_index = route_info->erif_indices[i];
+
+ err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, erif_list,
+ erif_index);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int
+mlxsw_sp_mr_tcam_route_parman_item_add(struct mlxsw_sp_mr_tcam *mr_tcam,
+ struct mlxsw_sp_mr_tcam_route *route,
+ enum mlxsw_sp_mr_route_prio prio)
+{
+ struct parman_prio *parman_prio = NULL;
+ int err;
+
+ switch (route->key.proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ parman_prio = &mr_tcam->ipv4_tcam_region.parman_prios[prio];
+ err = parman_item_add(mr_tcam->ipv4_tcam_region.parman,
+ parman_prio, &route->parman_item);
+ if (err)
+ return err;
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ default:
+ WARN_ON_ONCE(1);
+ }
+ route->parman_prio = parman_prio;
+ return 0;
+}
+
+static void
+mlxsw_sp_mr_tcam_route_parman_item_remove(struct mlxsw_sp_mr_tcam *mr_tcam,
+ struct mlxsw_sp_mr_tcam_route *route)
+{
+ switch (route->key.proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ parman_item_remove(mr_tcam->ipv4_tcam_region.parman,
+ route->parman_prio, &route->parman_item);
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6:
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+
+static int
+mlxsw_sp_mr_tcam_route_create(struct mlxsw_sp *mlxsw_sp, void *priv,
+ void *route_priv,
+ struct mlxsw_sp_mr_route_params *route_params)
+{
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+ int err;
+
+ route->key = route_params->key;
+ route->irif_index = route_params->value.irif_index;
+ route->min_mtu = route_params->value.min_mtu;
+ route->action = route_params->value.route_action;
+
+ /* Create the egress RIFs list */
+ mlxsw_sp_mr_erif_list_init(&route->erif_list);
+ err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &route->erif_list,
+ &route_params->value);
+ if (err)
+ goto err_erif_populate;
+
+ /* Create the flow counter */
+ err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &route->counter_index);
+ if (err)
+ goto err_counter_alloc;
+
+ /* Create the flexible action block */
+ route->afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp,
+ route->action,
+ route->irif_index,
+ route->counter_index,
+ route->min_mtu,
+ &route->erif_list);
+ if (IS_ERR(route->afa_block)) {
+ err = PTR_ERR(route->afa_block);
+ goto err_afa_block_create;
+ }
+
+ /* Allocate place in the TCAM */
+ err = mlxsw_sp_mr_tcam_route_parman_item_add(mr_tcam, route,
+ route_params->prio);
+ if (err)
+ goto err_parman_item_add;
+
+ /* Write the route to the TCAM */
+ err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+ &route->key, route->afa_block);
+ if (err)
+ goto err_route_replace;
+ return 0;
+
+err_route_replace:
+ mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route);
+err_parman_item_add:
+ mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+err_afa_block_create:
+ mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
+err_erif_populate:
+err_counter_alloc:
+ mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
+ return err;
+}
+
+static void mlxsw_sp_mr_tcam_route_destroy(struct mlxsw_sp *mlxsw_sp,
+ void *priv, void *route_priv)
+{
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+
+ mlxsw_sp_mr_tcam_route_remove(mlxsw_sp, route->key.vrid,
+ &route->parman_item);
+ mlxsw_sp_mr_tcam_route_parman_item_remove(mr_tcam, route);
+ mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+ mlxsw_sp_flow_counter_free(mlxsw_sp, route->counter_index);
+ mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
+}
+
+static int mlxsw_sp_mr_tcam_route_stats(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv, u64 *packets,
+ u64 *bytes)
+{
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+
+ return mlxsw_sp_flow_counter_get(mlxsw_sp, route->counter_index,
+ packets, bytes);
+}
+
+static int
+mlxsw_sp_mr_tcam_route_action_update(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv,
+ enum mlxsw_sp_mr_route_action route_action)
+{
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ struct mlxsw_afa_block *afa_block;
+ int err;
+
+ /* Create a new flexible action block */
+ afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route_action,
+ route->irif_index,
+ route->counter_index,
+ route->min_mtu,
+ &route->erif_list);
+ if (IS_ERR(afa_block))
+ return PTR_ERR(afa_block);
+
+ /* Update the TCAM route entry */
+ err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+ &route->key, afa_block);
+ if (err)
+ goto err;
+
+ /* Delete the old one */
+ mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+ route->afa_block = afa_block;
+ route->action = route_action;
+ return 0;
+err:
+ mlxsw_sp_mr_tcam_afa_block_destroy(afa_block);
+ return err;
+}
+
+static int mlxsw_sp_mr_tcam_route_min_mtu_update(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv, u16 min_mtu)
+{
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ struct mlxsw_afa_block *afa_block;
+ int err;
+
+ /* Create a new flexible action block */
+ afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp,
+ route->action,
+ route->irif_index,
+ route->counter_index,
+ min_mtu,
+ &route->erif_list);
+ if (IS_ERR(afa_block))
+ return PTR_ERR(afa_block);
+
+ /* Update the TCAM route entry */
+ err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+ &route->key, afa_block);
+ if (err)
+ goto err;
+
+ /* Delete the old one */
+ mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+ route->afa_block = afa_block;
+ route->min_mtu = min_mtu;
+ return 0;
+err:
+ mlxsw_sp_mr_tcam_afa_block_destroy(afa_block);
+ return err;
+}
+
+static int mlxsw_sp_mr_tcam_route_irif_update(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv, u16 irif_index)
+{
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+
+ if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP)
+ return -EINVAL;
+ route->irif_index = irif_index;
+ return 0;
+}
+
+static int mlxsw_sp_mr_tcam_route_erif_add(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv, u16 erif_index)
+{
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ int err;
+
+ err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &route->erif_list,
+ erif_index);
+ if (err)
+ return err;
+
+ /* Commit the action only if the route action is not TRAP */
+ if (route->action != MLXSW_SP_MR_ROUTE_ACTION_TRAP)
+ return mlxsw_sp_mr_erif_list_commit(mlxsw_sp,
+ &route->erif_list);
+ return 0;
+}
+
+static int mlxsw_sp_mr_tcam_route_erif_del(struct mlxsw_sp *mlxsw_sp,
+ void *route_priv, u16 erif_index)
+{
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ struct mlxsw_sp_mr_erif_sublist *erif_sublist;
+ struct mlxsw_sp_mr_tcam_erif_list erif_list;
+ struct mlxsw_afa_block *afa_block;
+ int err;
+ int i;
+
+ /* Create a copy of the original erif_list without the deleted entry */
+ mlxsw_sp_mr_erif_list_init(&erif_list);
+ list_for_each_entry(erif_sublist, &route->erif_list.erif_sublists, list) {
+ for (i = 0; i < erif_sublist->num_erifs; i++) {
+ u16 curr_erif = erif_sublist->erif_indices[i];
+
+ if (curr_erif == erif_index)
+ continue;
+ err = mlxsw_sp_mr_erif_list_add(mlxsw_sp, &erif_list,
+ curr_erif);
+ if (err)
+ goto err_erif_list_add;
+ }
+ }
+
+ /* Create the flexible action block pointing to the new erif_list */
+ afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp, route->action,
+ route->irif_index,
+ route->counter_index,
+ route->min_mtu,
+ &erif_list);
+ if (IS_ERR(afa_block)) {
+ err = PTR_ERR(afa_block);
+ goto err_afa_block_create;
+ }
+
+ /* Update the TCAM route entry */
+ err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+ &route->key, afa_block);
+ if (err)
+ goto err_route_write;
+
+ mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+ mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
+ route->afa_block = afa_block;
+ mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list);
+ return 0;
+
+err_route_write:
+ mlxsw_sp_mr_tcam_afa_block_destroy(afa_block);
+err_afa_block_create:
+err_erif_list_add:
+ mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list);
+ return err;
+}
+
+static int
+mlxsw_sp_mr_tcam_route_update(struct mlxsw_sp *mlxsw_sp, void *route_priv,
+ struct mlxsw_sp_mr_route_info *route_info)
+{
+ struct mlxsw_sp_mr_tcam_route *route = route_priv;
+ struct mlxsw_sp_mr_tcam_erif_list erif_list;
+ struct mlxsw_afa_block *afa_block;
+ int err;
+
+ /* Create a new erif_list */
+ mlxsw_sp_mr_erif_list_init(&erif_list);
+ err = mlxsw_sp_mr_tcam_erif_populate(mlxsw_sp, &erif_list, route_info);
+ if (err)
+ goto err_erif_populate;
+
+ /* Create the flexible action block pointing to the new erif_list */
+ afa_block = mlxsw_sp_mr_tcam_afa_block_create(mlxsw_sp,
+ route_info->route_action,
+ route_info->irif_index,
+ route->counter_index,
+ route_info->min_mtu,
+ &erif_list);
+ if (IS_ERR(afa_block)) {
+ err = PTR_ERR(afa_block);
+ goto err_afa_block_create;
+ }
+
+ /* Update the TCAM route entry */
+ err = mlxsw_sp_mr_tcam_route_replace(mlxsw_sp, &route->parman_item,
+ &route->key, afa_block);
+ if (err)
+ goto err_route_write;
+
+ mlxsw_sp_mr_tcam_afa_block_destroy(route->afa_block);
+ mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &route->erif_list);
+ route->afa_block = afa_block;
+ mlxsw_sp_mr_erif_list_move(&route->erif_list, &erif_list);
+ route->action = route_info->route_action;
+ route->irif_index = route_info->irif_index;
+ route->min_mtu = route_info->min_mtu;
+ return 0;
+
+err_route_write:
+ mlxsw_sp_mr_tcam_afa_block_destroy(afa_block);
+err_afa_block_create:
+err_erif_populate:
+ mlxsw_sp_mr_erif_list_flush(mlxsw_sp, &erif_list);
+ return err;
+}
+
+#define MLXSW_SP_MR_TCAM_REGION_BASE_COUNT 16
+#define MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP 16
+
+static int
+mlxsw_sp_mr_tcam_region_alloc(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+ char rtar_pl[MLXSW_REG_RTAR_LEN];
+
+ mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_ALLOCATE,
+ mr_tcam_region->rtar_key_type,
+ MLXSW_SP_MR_TCAM_REGION_BASE_COUNT);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static void
+mlxsw_sp_mr_tcam_region_free(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
+{
+ struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+ char rtar_pl[MLXSW_REG_RTAR_LEN];
+
+ mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_DEALLOCATE,
+ mr_tcam_region->rtar_key_type, 0);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static int mlxsw_sp_mr_tcam_region_parman_resize(void *priv,
+ unsigned long new_count)
+{
+ struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv;
+ struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+ char rtar_pl[MLXSW_REG_RTAR_LEN];
+ u64 max_tcam_rules;
+
+ max_tcam_rules = MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_MAX_TCAM_RULES);
+ if (new_count > max_tcam_rules)
+ return -EINVAL;
+ mlxsw_reg_rtar_pack(rtar_pl, MLXSW_REG_RTAR_OP_RESIZE,
+ mr_tcam_region->rtar_key_type, new_count);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtar), rtar_pl);
+}
+
+static void mlxsw_sp_mr_tcam_region_parman_move(void *priv,
+ unsigned long from_index,
+ unsigned long to_index,
+ unsigned long count)
+{
+ struct mlxsw_sp_mr_tcam_region *mr_tcam_region = priv;
+ struct mlxsw_sp *mlxsw_sp = mr_tcam_region->mlxsw_sp;
+ char rrcr_pl[MLXSW_REG_RRCR_LEN];
+
+ mlxsw_reg_rrcr_pack(rrcr_pl, MLXSW_REG_RRCR_OP_MOVE,
+ from_index, count,
+ mr_tcam_region->rtar_key_type, to_index);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rrcr), rrcr_pl);
+}
+
+static const struct parman_ops mlxsw_sp_mr_tcam_region_parman_ops = {
+ .base_count = MLXSW_SP_MR_TCAM_REGION_BASE_COUNT,
+ .resize_step = MLXSW_SP_MR_TCAM_REGION_RESIZE_STEP,
+ .resize = mlxsw_sp_mr_tcam_region_parman_resize,
+ .move = mlxsw_sp_mr_tcam_region_parman_move,
+ .algo = PARMAN_ALGO_TYPE_LSORT,
+};
+
+static int
+mlxsw_sp_mr_tcam_region_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mr_tcam_region *mr_tcam_region,
+ enum mlxsw_reg_rtar_key_type rtar_key_type)
+{
+ struct parman_prio *parman_prios;
+ struct parman *parman;
+ int err;
+ int i;
+
+ mr_tcam_region->rtar_key_type = rtar_key_type;
+ mr_tcam_region->mlxsw_sp = mlxsw_sp;
+
+ err = mlxsw_sp_mr_tcam_region_alloc(mr_tcam_region);
+ if (err)
+ return err;
+
+ parman = parman_create(&mlxsw_sp_mr_tcam_region_parman_ops,
+ mr_tcam_region);
+ if (!parman) {
+ err = -ENOMEM;
+ goto err_parman_create;
+ }
+ mr_tcam_region->parman = parman;
+
+ parman_prios = kmalloc_array(MLXSW_SP_MR_ROUTE_PRIO_MAX + 1,
+ sizeof(*parman_prios), GFP_KERNEL);
+ if (!parman_prios) {
+ err = -ENOMEM;
+ goto err_parman_prios_alloc;
+ }
+ mr_tcam_region->parman_prios = parman_prios;
+
+ for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
+ parman_prio_init(mr_tcam_region->parman,
+ &mr_tcam_region->parman_prios[i], i);
+ return 0;
+
+err_parman_prios_alloc:
+ parman_destroy(parman);
+err_parman_create:
+ mlxsw_sp_mr_tcam_region_free(mr_tcam_region);
+ return err;
+}
+
+static void
+mlxsw_sp_mr_tcam_region_fini(struct mlxsw_sp_mr_tcam_region *mr_tcam_region)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_SP_MR_ROUTE_PRIO_MAX + 1; i++)
+ parman_prio_fini(&mr_tcam_region->parman_prios[i]);
+ kfree(mr_tcam_region->parman_prios);
+ parman_destroy(mr_tcam_region->parman);
+ mlxsw_sp_mr_tcam_region_free(mr_tcam_region);
+}
+
+static int mlxsw_sp_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
+{
+ struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MC_ERIF_LIST_ENTRIES) ||
+ !MLXSW_CORE_RES_VALID(mlxsw_sp->core, ACL_MAX_TCAM_RULES))
+ return -EIO;
+
+ return mlxsw_sp_mr_tcam_region_init(mlxsw_sp,
+ &mr_tcam->ipv4_tcam_region,
+ MLXSW_REG_RTAR_KEY_TYPE_IPV4_MULTICAST);
+}
+
+static void mlxsw_sp_mr_tcam_fini(void *priv)
+{
+ struct mlxsw_sp_mr_tcam *mr_tcam = priv;
+
+ mlxsw_sp_mr_tcam_region_fini(&mr_tcam->ipv4_tcam_region);
+}
+
+const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops = {
+ .priv_size = sizeof(struct mlxsw_sp_mr_tcam),
+ .route_priv_size = sizeof(struct mlxsw_sp_mr_tcam_route),
+ .init = mlxsw_sp_mr_tcam_init,
+ .route_create = mlxsw_sp_mr_tcam_route_create,
+ .route_update = mlxsw_sp_mr_tcam_route_update,
+ .route_stats = mlxsw_sp_mr_tcam_route_stats,
+ .route_action_update = mlxsw_sp_mr_tcam_route_action_update,
+ .route_min_mtu_update = mlxsw_sp_mr_tcam_route_min_mtu_update,
+ .route_irif_update = mlxsw_sp_mr_tcam_route_irif_update,
+ .route_erif_add = mlxsw_sp_mr_tcam_route_erif_add,
+ .route_erif_del = mlxsw_sp_mr_tcam_route_erif_del,
+ .route_destroy = mlxsw_sp_mr_tcam_route_destroy,
+ .fini = mlxsw_sp_mr_tcam_fini,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h
new file mode 100644
index 000000000000..f9b59ee25406
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h
@@ -0,0 +1,43 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/spectrum_mr_tcam.h
+ * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2017 Yotam Gigi <yotamg@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_SPECTRUM_MCROUTER_TCAM_H
+#define _MLXSW_SPECTRUM_MCROUTER_TCAM_H
+
+#include "spectrum.h"
+#include "spectrum_mr.h"
+
+extern const struct mlxsw_sp_mr_ops mlxsw_sp_mr_tcam_ops;
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 5189022a1c8c..5f2d100e3718 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -65,6 +65,8 @@
#include "spectrum_cnt.h"
#include "spectrum_dpipe.h"
#include "spectrum_ipip.h"
+#include "spectrum_mr.h"
+#include "spectrum_mr_tcam.h"
#include "spectrum_router.h"
struct mlxsw_sp_vr;
@@ -78,6 +80,7 @@ struct mlxsw_sp_router {
struct rhashtable neigh_ht;
struct rhashtable nexthop_group_ht;
struct rhashtable nexthop_ht;
+ struct list_head nexthop_list;
struct {
struct mlxsw_sp_lpm_tree *trees;
unsigned int tree_count;
@@ -458,6 +461,7 @@ struct mlxsw_sp_vr {
unsigned int rif_count;
struct mlxsw_sp_fib *fib4;
struct mlxsw_sp_fib *fib6;
+ struct mlxsw_sp_mr_table *mr4_table;
};
static const struct rhashtable_params mlxsw_sp_fib_ht_params;
@@ -652,7 +656,7 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
{
- return !!vr->fib4 || !!vr->fib6;
+ return !!vr->fib4 || !!vr->fib6 || !!vr->mr4_table;
}
static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
@@ -692,8 +696,8 @@ static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
{
- /* For our purpose, squash main and local table into one */
- if (tb_id == RT_TABLE_LOCAL)
+ /* For our purpose, squash main, default and local tables into one */
+ if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
tb_id = RT_TABLE_MAIN;
return tb_id;
}
@@ -727,14 +731,17 @@ static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
}
static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
- u32 tb_id)
+ u32 tb_id,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp_vr *vr;
int err;
vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
- if (!vr)
+ if (!vr) {
+ NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported virtual routers");
return ERR_PTR(-EBUSY);
+ }
vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4);
if (IS_ERR(vr->fib4))
return ERR_CAST(vr->fib4);
@@ -743,9 +750,18 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
err = PTR_ERR(vr->fib6);
goto err_fib6_create;
}
+ vr->mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
+ MLXSW_SP_L3_PROTO_IPV4);
+ if (IS_ERR(vr->mr4_table)) {
+ err = PTR_ERR(vr->mr4_table);
+ goto err_mr_table_create;
+ }
vr->tb_id = tb_id;
return vr;
+err_mr_table_create:
+ mlxsw_sp_fib_destroy(vr->fib6);
+ vr->fib6 = NULL;
err_fib6_create:
mlxsw_sp_fib_destroy(vr->fib4);
vr->fib4 = NULL;
@@ -754,27 +770,31 @@ err_fib6_create:
static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr)
{
+ mlxsw_sp_mr_table_destroy(vr->mr4_table);
+ vr->mr4_table = NULL;
mlxsw_sp_fib_destroy(vr->fib6);
vr->fib6 = NULL;
mlxsw_sp_fib_destroy(vr->fib4);
vr->fib4 = NULL;
}
-static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id)
+static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp_vr *vr;
tb_id = mlxsw_sp_fix_tb_id(tb_id);
vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
if (!vr)
- vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id);
+ vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
return vr;
}
static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr)
{
if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
- list_empty(&vr->fib6->node_list))
+ list_empty(&vr->fib6->node_list) &&
+ mlxsw_sp_mr_table_empty(vr->mr4_table))
mlxsw_sp_vr_destroy(vr);
}
@@ -932,7 +952,8 @@ static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
static struct mlxsw_sp_rif *
mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
- const struct mlxsw_sp_rif_params *params);
+ const struct mlxsw_sp_rif_params *params,
+ struct netlink_ext_ack *extack);
static struct mlxsw_sp_rif_ipip_lb *
mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
@@ -950,7 +971,7 @@ mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
};
- rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common);
+ rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, NULL);
if (IS_ERR(rif))
return ERR_CAST(rif);
return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
@@ -986,9 +1007,8 @@ err_ol_ipip_lb_create:
}
static void
-mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp_ipip_entry *ipip_entry)
+mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
{
- WARN_ON(ipip_entry->ref_count > 0);
mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
kfree(ipip_entry);
}
@@ -1184,26 +1204,22 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
}
static struct mlxsw_sp_ipip_entry *
-mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp,
- enum mlxsw_sp_ipip_type ipipt,
- struct net_device *ol_dev)
+mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_ipip_type ipipt,
+ struct net_device *ol_dev)
{
u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
struct mlxsw_sp_router *router = mlxsw_sp->router;
- struct mlxsw_sp_fib_entry *decap_fib_entry;
struct mlxsw_sp_ipip_entry *ipip_entry;
enum mlxsw_sp_l3proto ul_proto;
union mlxsw_sp_l3addr saddr;
+ /* The configuration where several tunnels have the same local address
+ * in the same underlay table needs special treatment in the HW. That is
+ * currently not implemented in the driver.
+ */
list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
ipip_list_node) {
- if (ipip_entry->ol_dev == ol_dev)
- goto inc_ref_count;
-
- /* The configuration where several tunnels have the same local
- * address in the same underlay table needs special treatment in
- * the HW. That is currently not implemented in the driver.
- */
ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
@@ -1215,29 +1231,18 @@ mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp,
if (IS_ERR(ipip_entry))
return ipip_entry;
- decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
- if (decap_fib_entry)
- mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
- decap_fib_entry);
-
list_add_tail(&ipip_entry->ipip_list_node,
&mlxsw_sp->router->ipip_list);
-inc_ref_count:
- ++ipip_entry->ref_count;
return ipip_entry;
}
static void
-mlxsw_sp_ipip_entry_put(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_ipip_entry *ipip_entry)
+mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_ipip_entry *ipip_entry)
{
- if (--ipip_entry->ref_count == 0) {
- list_del(&ipip_entry->ipip_list_node);
- if (ipip_entry->decap_fib_entry)
- mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
- mlxsw_sp_ipip_entry_destroy(ipip_entry);
- }
+ list_del(&ipip_entry->ipip_list_node);
+ mlxsw_sp_ipip_entry_dealloc(ipip_entry);
}
static bool
@@ -1279,6 +1284,168 @@ mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
return NULL;
}
+static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev,
+ enum mlxsw_sp_ipip_type *p_type)
+{
+ struct mlxsw_sp_router *router = mlxsw_sp->router;
+ const struct mlxsw_sp_ipip_ops *ipip_ops;
+ enum mlxsw_sp_ipip_type ipipt;
+
+ for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
+ ipip_ops = router->ipip_ops_arr[ipipt];
+ if (dev->type == ipip_ops->dev_type) {
+ if (p_type)
+ *p_type = ipipt;
+ return true;
+ }
+ }
+ return false;
+}
+
+bool mlxsw_sp_netdev_is_ipip(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev)
+{
+ return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
+}
+
+static struct mlxsw_sp_ipip_entry *
+mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *ol_dev)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+
+ list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
+ ipip_list_node)
+ if (ipip_entry->ol_dev == ol_dev)
+ return ipip_entry;
+
+ return NULL;
+}
+
+static int mlxsw_sp_netdevice_ipip_reg_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev)
+{
+ struct mlxsw_sp_router *router = mlxsw_sp->router;
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+ enum mlxsw_sp_ipip_type ipipt;
+
+ mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
+ if (router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev,
+ MLXSW_SP_L3_PROTO_IPV4) ||
+ router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, ol_dev,
+ MLXSW_SP_L3_PROTO_IPV6)) {
+ ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
+ ol_dev);
+ if (IS_ERR(ipip_entry))
+ return PTR_ERR(ipip_entry);
+ }
+
+ return 0;
+}
+
+static void mlxsw_sp_netdevice_ipip_unreg_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+
+ ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+ if (ipip_entry)
+ mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
+}
+
+static int mlxsw_sp_netdevice_ipip_up_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev)
+{
+ struct mlxsw_sp_fib_entry *decap_fib_entry;
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+
+ ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+ if (ipip_entry) {
+ decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp,
+ ipip_entry);
+ if (decap_fib_entry)
+ mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
+ decap_fib_entry);
+ }
+
+ return 0;
+}
+
+static void mlxsw_sp_netdevice_ipip_down_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev)
+{
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+
+ ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+ if (ipip_entry && ipip_entry->decap_fib_entry)
+ mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
+}
+
+static int mlxsw_sp_netdevice_ipip_vrf_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev)
+{
+ struct mlxsw_sp_fib_entry *decap_fib_entry;
+ struct mlxsw_sp_ipip_entry *ipip_entry;
+ struct mlxsw_sp_rif_ipip_lb *lb_rif;
+
+ ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+ if (!ipip_entry)
+ return 0;
+
+ /* When a tunneling device is moved to a different VRF, we need to
+ * update the backing loopback. Since RIFs can't be edited, we need to
+ * destroy and recreate it. That might create a window of opportunity
+ * where RALUE and RATR registers end up referencing a RIF that's
+ * already gone. RATRs are handled by the RIF destroy, and to take care
+ * of RALUE, demote the decap route back.
+ */
+ if (ipip_entry->decap_fib_entry)
+ mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
+
+ lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipip_entry->ipipt,
+ ol_dev);
+ if (IS_ERR(lb_rif))
+ return PTR_ERR(lb_rif);
+ mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
+ ipip_entry->ol_lb = lb_rif;
+
+ if (ol_dev->flags & IFF_UP) {
+ decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp,
+ ipip_entry);
+ if (decap_fib_entry)
+ mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
+ decap_fib_entry);
+ }
+
+ return 0;
+}
+
+int mlxsw_sp_netdevice_ipip_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *ol_dev,
+ unsigned long event,
+ struct netdev_notifier_changeupper_info *info)
+{
+ switch (event) {
+ case NETDEV_REGISTER:
+ return mlxsw_sp_netdevice_ipip_reg_event(mlxsw_sp, ol_dev);
+ case NETDEV_UNREGISTER:
+ mlxsw_sp_netdevice_ipip_unreg_event(mlxsw_sp, ol_dev);
+ return 0;
+ case NETDEV_UP:
+ return mlxsw_sp_netdevice_ipip_up_event(mlxsw_sp, ol_dev);
+ case NETDEV_DOWN:
+ mlxsw_sp_netdevice_ipip_down_event(mlxsw_sp, ol_dev);
+ return 0;
+ case NETDEV_CHANGEUPPER:
+ if (netif_is_l3_master(info->upper_dev))
+ return mlxsw_sp_netdevice_ipip_vrf_event(mlxsw_sp,
+ ol_dev);
+ return 0;
+ }
+ return 0;
+}
+
struct mlxsw_sp_neigh_key {
struct neighbour *n;
};
@@ -1316,7 +1483,7 @@ mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
typeof(*neigh_entry),
rif_list_node);
}
- if (neigh_entry->rif_list_node.next == &rif->neigh_list)
+ if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
return NULL;
return list_next_entry(neigh_entry, rif_list_node);
}
@@ -1664,7 +1831,7 @@ __mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
rauhtd_pl);
if (err) {
- dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour talbe\n");
+ dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
break;
}
num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
@@ -2028,6 +2195,7 @@ struct mlxsw_sp_nexthop_key {
struct mlxsw_sp_nexthop {
struct list_head neigh_list_node; /* member of neigh entry list */
struct list_head rif_list_node;
+ struct list_head router_list_node;
struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
* this belongs to
*/
@@ -2050,6 +2218,8 @@ struct mlxsw_sp_nexthop {
struct mlxsw_sp_neigh_entry *neigh_entry;
struct mlxsw_sp_ipip_entry *ipip_entry;
};
+ unsigned int counter_index;
+ bool counter_valid;
};
struct mlxsw_sp_nexthop_group {
@@ -2066,6 +2236,112 @@ struct mlxsw_sp_nexthop_group {
#define nh_rif nexthops[0].rif
};
+void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ struct devlink *devlink;
+
+ devlink = priv_to_devlink(mlxsw_sp->core);
+ if (!devlink_dpipe_table_counter_enabled(devlink,
+ MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
+ return;
+
+ if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
+ return;
+
+ nh->counter_valid = true;
+}
+
+void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh)
+{
+ if (!nh->counter_valid)
+ return;
+ mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
+ nh->counter_valid = false;
+}
+
+int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh, u64 *p_counter)
+{
+ if (!nh->counter_valid)
+ return -EINVAL;
+
+ return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
+ p_counter, NULL);
+}
+
+struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
+ struct mlxsw_sp_nexthop *nh)
+{
+ if (!nh) {
+ if (list_empty(&router->nexthop_list))
+ return NULL;
+ else
+ return list_first_entry(&router->nexthop_list,
+ typeof(*nh), router_list_node);
+ }
+ if (list_is_last(&nh->router_list_node, &router->nexthop_list))
+ return NULL;
+ return list_next_entry(nh, router_list_node);
+}
+
+bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
+{
+ return nh->offloaded;
+}
+
+unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
+{
+ if (!nh->offloaded)
+ return NULL;
+ return nh->neigh_entry->ha;
+}
+
+int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
+ u32 *p_adj_hash_index)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
+ u32 adj_hash_index = 0;
+ int i;
+
+ if (!nh->offloaded || !nh_grp->adj_index_valid)
+ return -EINVAL;
+
+ *p_adj_index = nh_grp->adj_index;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
+
+ if (nh_iter == nh)
+ break;
+ if (nh_iter->offloaded)
+ adj_hash_index++;
+ }
+
+ *p_adj_hash_index = adj_hash_index;
+ return 0;
+}
+
+struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
+{
+ return nh->rif;
+}
+
+bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
+{
+ struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
+ int i;
+
+ for (i = 0; i < nh_grp->count; i++) {
+ struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
+
+ if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
+ return true;
+ }
+ return false;
+}
+
static struct fib_info *
mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
{
@@ -2323,8 +2599,8 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
return 0;
}
-static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
- struct mlxsw_sp_nexthop *nh)
+int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+ struct mlxsw_sp_nexthop *nh)
{
struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
char ratr_pl[MLXSW_REG_RATR_LEN];
@@ -2333,6 +2609,11 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
true, MLXSW_REG_RATR_TYPE_ETHERNET,
adj_index, neigh_entry->rif);
mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
+ if (nh->counter_valid)
+ mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
+ else
+ mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
+
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
}
@@ -2367,7 +2648,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
if (nh->update || reallocate) {
switch (nh->type) {
case MLXSW_SP_NEXTHOP_TYPE_ETH:
- err = mlxsw_sp_nexthop_mac_update
+ err = mlxsw_sp_nexthop_update
(mlxsw_sp, adj_index, nh);
break;
case MLXSW_SP_NEXTHOP_TYPE_IPIP:
@@ -2655,36 +2936,16 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
neigh_release(n);
}
-static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
- const struct net_device *dev,
- enum mlxsw_sp_ipip_type *p_type)
-{
- struct mlxsw_sp_router *router = mlxsw_sp->router;
- const struct mlxsw_sp_ipip_ops *ipip_ops;
- enum mlxsw_sp_ipip_type ipipt;
-
- for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
- ipip_ops = router->ipip_ops_arr[ipipt];
- if (dev->type == ipip_ops->dev_type) {
- if (p_type)
- *p_type = ipipt;
- return true;
- }
- }
- return false;
-}
-
static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
- enum mlxsw_sp_ipip_type ipipt,
struct mlxsw_sp_nexthop *nh,
struct net_device *ol_dev)
{
if (!nh->nh_grp->gateway || nh->ipip_entry)
return 0;
- nh->ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev);
- if (IS_ERR(nh->ipip_entry))
- return PTR_ERR(nh->ipip_entry);
+ nh->ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
+ if (!nh->ipip_entry)
+ return -ENOENT;
__mlxsw_sp_nexthop_neigh_update(nh, false);
return 0;
@@ -2699,7 +2960,6 @@ static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
return;
__mlxsw_sp_nexthop_neigh_update(nh, true);
- mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry);
nh->ipip_entry = NULL;
}
@@ -2743,7 +3003,7 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
MLXSW_SP_L3_PROTO_IPV4)) {
nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
- err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev);
+ err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev);
if (err)
return err;
mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common);
@@ -2789,6 +3049,9 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
if (err)
return err;
+ mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
+ list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
+
if (!dev)
return 0;
@@ -2812,6 +3075,8 @@ static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh)
{
mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
+ list_del(&nh->router_list_node);
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
}
@@ -3121,7 +3386,7 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry,
return;
if (mlxsw_sp_fib_entry_should_offload(fib_entry))
mlxsw_sp_fib_entry_offload_set(fib_entry);
- else if (!mlxsw_sp_fib_entry_should_offload(fib_entry))
+ else
mlxsw_sp_fib_entry_offload_unset(fib_entry);
return;
default:
@@ -3576,7 +3841,7 @@ mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
struct mlxsw_sp_vr *vr;
int err;
- vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id);
+ vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
if (IS_ERR(vr))
return ERR_CAST(vr);
fib = mlxsw_sp_vr_fib(vr, proto);
@@ -4000,7 +4265,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev,
MLXSW_SP_L3_PROTO_IPV6)) {
nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
- err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev);
+ err = mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, dev);
if (err)
return err;
mlxsw_sp_nexthop_rif_init(nh, &nh->ipip_entry->ol_lb->common);
@@ -4039,6 +4304,9 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
nh->nh_grp = nh_grp;
memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr));
+ mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
+
+ list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
if (!dev)
return 0;
@@ -4051,6 +4319,8 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh)
{
mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
+ list_del(&nh->router_list_node);
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
}
static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
@@ -4601,6 +4871,75 @@ static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
return 0;
}
+static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
+ struct mfc_entry_notifier_info *men_info,
+ bool replace)
+{
+ struct mlxsw_sp_vr *vr;
+
+ if (mlxsw_sp->router->aborted)
+ return 0;
+
+ vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
+ if (IS_ERR(vr))
+ return PTR_ERR(vr);
+
+ return mlxsw_sp_mr_route4_add(vr->mr4_table, men_info->mfc, replace);
+}
+
+static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
+ struct mfc_entry_notifier_info *men_info)
+{
+ struct mlxsw_sp_vr *vr;
+
+ if (mlxsw_sp->router->aborted)
+ return;
+
+ vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
+ if (WARN_ON(!vr))
+ return;
+
+ mlxsw_sp_mr_route4_del(vr->mr4_table, men_info->mfc);
+ mlxsw_sp_vr_put(vr);
+}
+
+static int
+mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
+ struct vif_entry_notifier_info *ven_info)
+{
+ struct mlxsw_sp_rif *rif;
+ struct mlxsw_sp_vr *vr;
+
+ if (mlxsw_sp->router->aborted)
+ return 0;
+
+ vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
+ if (IS_ERR(vr))
+ return PTR_ERR(vr);
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
+ return mlxsw_sp_mr_vif_add(vr->mr4_table, ven_info->dev,
+ ven_info->vif_index,
+ ven_info->vif_flags, rif);
+}
+
+static void
+mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
+ struct vif_entry_notifier_info *ven_info)
+{
+ struct mlxsw_sp_vr *vr;
+
+ if (mlxsw_sp->router->aborted)
+ return;
+
+ vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
+ if (WARN_ON(!vr))
+ return;
+
+ mlxsw_sp_mr_vif_del(vr->mr4_table, ven_info->vif_index);
+ mlxsw_sp_vr_put(vr);
+}
+
static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
{
enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
@@ -4611,6 +4950,10 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
if (err)
return err;
+ /* The multicast router code does not need an abort trap as by default,
+ * packets that don't match any routes are trapped to the CPU.
+ */
+
proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
MLXSW_SP_LPM_TREE_MIN + 1);
@@ -4692,6 +5035,8 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
if (!mlxsw_sp_vr_is_used(vr))
continue;
+
+ mlxsw_sp_mr_table_flush(vr->mr4_table);
mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
/* If virtual router was only used for IPv4, then it's no
@@ -4724,6 +5069,8 @@ struct mlxsw_sp_fib_event_work {
struct fib_entry_notifier_info fen_info;
struct fib_rule_notifier_info fr_info;
struct fib_nh_notifier_info fnh_info;
+ struct mfc_entry_notifier_info men_info;
+ struct vif_entry_notifier_info ven_info;
};
struct mlxsw_sp *mlxsw_sp;
unsigned long event;
@@ -4810,28 +5157,87 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
kfree(fib_work);
}
+static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
+{
+ struct mlxsw_sp_fib_event_work *fib_work =
+ container_of(work, struct mlxsw_sp_fib_event_work, work);
+ struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
+ struct fib_rule *rule;
+ bool replace;
+ int err;
+
+ rtnl_lock();
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_ADD:
+ replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
+
+ err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
+ replace);
+ if (err)
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ ipmr_cache_put(fib_work->men_info.mfc);
+ break;
+ case FIB_EVENT_ENTRY_DEL:
+ mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
+ ipmr_cache_put(fib_work->men_info.mfc);
+ break;
+ case FIB_EVENT_VIF_ADD:
+ err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
+ &fib_work->ven_info);
+ if (err)
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ dev_put(fib_work->ven_info.dev);
+ break;
+ case FIB_EVENT_VIF_DEL:
+ mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
+ &fib_work->ven_info);
+ dev_put(fib_work->ven_info.dev);
+ break;
+ case FIB_EVENT_RULE_ADD: /* fall through */
+ case FIB_EVENT_RULE_DEL:
+ rule = fib_work->fr_info.rule;
+ if (!ipmr_rule_default(rule) && !rule->l3mdev)
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ fib_rule_put(rule);
+ break;
+ }
+ rtnl_unlock();
+ kfree(fib_work);
+}
+
static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
struct fib_notifier_info *info)
{
+ struct fib_entry_notifier_info *fen_info;
+ struct fib_rule_notifier_info *fr_info;
+ struct fib_nh_notifier_info *fnh_info;
+
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_APPEND: /* fall through */
case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL:
- memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info));
- /* Take referece on fib_info to prevent it from being
+ fen_info = container_of(info, struct fib_entry_notifier_info,
+ info);
+ fib_work->fen_info = *fen_info;
+ /* Take reference on fib_info to prevent it from being
* freed while work is queued. Release it afterwards.
*/
fib_info_hold(fib_work->fen_info.fi);
break;
case FIB_EVENT_RULE_ADD: /* fall through */
case FIB_EVENT_RULE_DEL:
- memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
+ fr_info = container_of(info, struct fib_rule_notifier_info,
+ info);
+ fib_work->fr_info = *fr_info;
fib_rule_get(fib_work->fr_info.rule);
break;
case FIB_EVENT_NH_ADD: /* fall through */
case FIB_EVENT_NH_DEL:
- memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info));
+ fnh_info = container_of(info, struct fib_nh_notifier_info,
+ info);
+ fib_work->fnh_info = *fnh_info;
fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
break;
}
@@ -4840,15 +5246,46 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
struct fib_notifier_info *info)
{
+ struct fib6_entry_notifier_info *fen6_info;
+ struct fib_rule_notifier_info *fr_info;
+
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL:
- memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info));
+ fen6_info = container_of(info, struct fib6_entry_notifier_info,
+ info);
+ fib_work->fen6_info = *fen6_info;
rt6_hold(fib_work->fen6_info.rt);
break;
case FIB_EVENT_RULE_ADD: /* fall through */
case FIB_EVENT_RULE_DEL:
+ fr_info = container_of(info, struct fib_rule_notifier_info,
+ info);
+ fib_work->fr_info = *fr_info;
+ fib_rule_get(fib_work->fr_info.rule);
+ break;
+ }
+}
+
+static void
+mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
+ struct fib_notifier_info *info)
+{
+ switch (fib_work->event) {
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
+ case FIB_EVENT_ENTRY_ADD: /* fall through */
+ case FIB_EVENT_ENTRY_DEL:
+ memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
+ ipmr_cache_hold(fib_work->men_info.mfc);
+ break;
+ case FIB_EVENT_VIF_ADD: /* fall through */
+ case FIB_EVENT_VIF_DEL:
+ memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
+ dev_hold(fib_work->ven_info.dev);
+ break;
+ case FIB_EVENT_RULE_ADD: /* fall through */
+ case FIB_EVENT_RULE_DEL:
memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info));
fib_rule_get(fib_work->fr_info.rule);
break;
@@ -4864,7 +5301,8 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
struct mlxsw_sp_router *router;
if (!net_eq(info->net, &init_net) ||
- (info->family != AF_INET && info->family != AF_INET6))
+ (info->family != AF_INET && info->family != AF_INET6 &&
+ info->family != RTNL_FAMILY_IPMR))
return NOTIFY_DONE;
fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
@@ -4884,6 +5322,10 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
mlxsw_sp_router_fib6_event(fib_work, info);
break;
+ case RTNL_FAMILY_IPMR:
+ INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
+ mlxsw_sp_router_fibmr_event(fib_work, info);
+ break;
}
mlxsw_core_schedule_work(&fib_work->work);
@@ -5044,9 +5486,15 @@ int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
return rif->dev->ifindex;
}
+const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
+{
+ return rif->dev;
+}
+
static struct mlxsw_sp_rif *
mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
- const struct mlxsw_sp_rif_params *params)
+ const struct mlxsw_sp_rif_params *params,
+ struct netlink_ext_ack *extack)
{
u32 tb_id = l3mdev_fib_table(params->dev);
const struct mlxsw_sp_rif_ops *ops;
@@ -5060,14 +5508,16 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
ops = mlxsw_sp->router->rif_ops_arr[type];
- vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN);
+ vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
if (IS_ERR(vr))
return ERR_CAST(vr);
vr->rif_count++;
err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
- if (err)
+ if (err) {
+ NL_SET_ERR_MSG(extack, "spectrum: Exceeded number of supported router interfaces");
goto err_rif_index_alloc;
+ }
rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
if (!rif) {
@@ -5093,11 +5543,17 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
if (err)
goto err_configure;
+ err = mlxsw_sp_mr_rif_add(vr->mr4_table, rif);
+ if (err)
+ goto err_mr_rif_add;
+
mlxsw_sp_rif_counters_alloc(rif);
mlxsw_sp->router->rifs[rif_index] = rif;
return rif;
+err_mr_rif_add:
+ ops->deconfigure(rif);
err_configure:
if (fid)
mlxsw_sp_fid_put(fid);
@@ -5122,6 +5578,7 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
mlxsw_sp->router->rifs[rif->rif_index] = NULL;
mlxsw_sp_rif_counters_free(rif);
+ mlxsw_sp_mr_rif_del(vr->mr4_table, rif);
ops->deconfigure(rif);
if (fid)
/* Loopback RIFs are not associated with a FID. */
@@ -5147,7 +5604,8 @@ mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
static int
mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
- struct net_device *l3_dev)
+ struct net_device *l3_dev,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
@@ -5163,7 +5621,7 @@ mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
};
mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
- rif = mlxsw_sp_rif_create(mlxsw_sp, &params);
+ rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
if (IS_ERR(rif))
return PTR_ERR(rif);
}
@@ -5218,7 +5676,8 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
struct net_device *port_dev,
- unsigned long event, u16 vid)
+ unsigned long event, u16 vid,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
@@ -5230,7 +5689,7 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
switch (event) {
case NETDEV_UP:
return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
- l3_dev);
+ l3_dev, extack);
case NETDEV_DOWN:
mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
break;
@@ -5240,19 +5699,22 @@ static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
}
static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
- unsigned long event)
+ unsigned long event,
+ struct netlink_ext_ack *extack)
{
if (netif_is_bridge_port(port_dev) ||
netif_is_lag_port(port_dev) ||
netif_is_ovs_port(port_dev))
return 0;
- return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1);
+ return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event, 1,
+ extack);
}
static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
struct net_device *lag_dev,
- unsigned long event, u16 vid)
+ unsigned long event, u16 vid,
+ struct netlink_ext_ack *extack)
{
struct net_device *port_dev;
struct list_head *iter;
@@ -5262,7 +5724,8 @@ static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
if (mlxsw_sp_port_dev_check(port_dev)) {
err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
port_dev,
- event, vid);
+ event, vid,
+ extack);
if (err)
return err;
}
@@ -5272,16 +5735,19 @@ static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
}
static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
- unsigned long event)
+ unsigned long event,
+ struct netlink_ext_ack *extack)
{
if (netif_is_bridge_port(lag_dev))
return 0;
- return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1);
+ return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1,
+ extack);
}
static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
- unsigned long event)
+ unsigned long event,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
struct mlxsw_sp_rif_params params = {
@@ -5291,7 +5757,7 @@ static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
switch (event) {
case NETDEV_UP:
- rif = mlxsw_sp_rif_create(mlxsw_sp, &params);
+ rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
if (IS_ERR(rif))
return PTR_ERR(rif);
break;
@@ -5305,7 +5771,8 @@ static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev,
}
static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
- unsigned long event)
+ unsigned long event,
+ struct netlink_ext_ack *extack)
{
struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
u16 vid = vlan_dev_vlan_id(vlan_dev);
@@ -5315,27 +5782,28 @@ static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev,
if (mlxsw_sp_port_dev_check(real_dev))
return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
- event, vid);
+ event, vid, extack);
else if (netif_is_lag_master(real_dev))
return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
- vid);
+ vid, extack);
else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
- return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event);
+ return mlxsw_sp_inetaddr_bridge_event(vlan_dev, event, extack);
return 0;
}
static int __mlxsw_sp_inetaddr_event(struct net_device *dev,
- unsigned long event)
+ unsigned long event,
+ struct netlink_ext_ack *extack)
{
if (mlxsw_sp_port_dev_check(dev))
- return mlxsw_sp_inetaddr_port_event(dev, event);
+ return mlxsw_sp_inetaddr_port_event(dev, event, extack);
else if (netif_is_lag_master(dev))
- return mlxsw_sp_inetaddr_lag_event(dev, event);
+ return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
else if (netif_is_bridge_master(dev))
- return mlxsw_sp_inetaddr_bridge_event(dev, event);
+ return mlxsw_sp_inetaddr_bridge_event(dev, event, extack);
else if (is_vlan_dev(dev))
- return mlxsw_sp_inetaddr_vlan_event(dev, event);
+ return mlxsw_sp_inetaddr_vlan_event(dev, event, extack);
else
return 0;
}
@@ -5349,6 +5817,32 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
struct mlxsw_sp_rif *rif;
int err = 0;
+ /* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
+ if (event == NETDEV_UP)
+ goto out;
+
+ mlxsw_sp = mlxsw_sp_lower_get(dev);
+ if (!mlxsw_sp)
+ goto out;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+ if (!mlxsw_sp_rif_should_config(rif, dev, event))
+ goto out;
+
+ err = __mlxsw_sp_inetaddr_event(dev, event, NULL);
+out:
+ return notifier_from_errno(err);
+}
+
+int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct in_validator_info *ivi = (struct in_validator_info *) ptr;
+ struct net_device *dev = ivi->ivi_dev->dev;
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_rif *rif;
+ int err = 0;
+
mlxsw_sp = mlxsw_sp_lower_get(dev);
if (!mlxsw_sp)
goto out;
@@ -5357,7 +5851,7 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused,
if (!mlxsw_sp_rif_should_config(rif, dev, event))
goto out;
- err = __mlxsw_sp_inetaddr_event(dev, event);
+ err = __mlxsw_sp_inetaddr_event(dev, event, ivi->extack);
out:
return notifier_from_errno(err);
}
@@ -5386,7 +5880,7 @@ static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
if (!mlxsw_sp_rif_should_config(rif, dev, event))
goto out;
- __mlxsw_sp_inetaddr_event(dev, event);
+ __mlxsw_sp_inetaddr_event(dev, event, NULL);
out:
rtnl_unlock();
dev_put(dev);
@@ -5401,6 +5895,10 @@ int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
struct net_device *dev = if6->idev->dev;
+ /* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
+ if (event == NETDEV_UP)
+ return NOTIFY_DONE;
+
if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
return NOTIFY_DONE;
@@ -5417,6 +5915,28 @@ int mlxsw_sp_inet6addr_event(struct notifier_block *unused,
return NOTIFY_DONE;
}
+int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
+ struct net_device *dev = i6vi->i6vi_dev->dev;
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_rif *rif;
+ int err = 0;
+
+ mlxsw_sp = mlxsw_sp_lower_get(dev);
+ if (!mlxsw_sp)
+ goto out;
+
+ rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
+ if (!mlxsw_sp_rif_should_config(rif, dev, event))
+ goto out;
+
+ err = __mlxsw_sp_inetaddr_event(dev, event, i6vi->extack);
+out:
+ return notifier_from_errno(err);
+}
+
static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
const char *mac, int mtu)
{
@@ -5463,6 +5983,17 @@ int mlxsw_sp_netdevice_router_port_event(struct net_device *dev)
if (err)
goto err_rif_fdb_op;
+ if (rif->mtu != dev->mtu) {
+ struct mlxsw_sp_vr *vr;
+
+ /* The RIF is relevant only to its mr_table instance, as unlike
+ * unicast routing, in multicast routing a RIF cannot be shared
+ * between several multicast routing tables.
+ */
+ vr = &mlxsw_sp->router->vrs[rif->vr_id];
+ mlxsw_sp_mr_rif_mtu_update(vr->mr4_table, rif, dev->mtu);
+ }
+
ether_addr_copy(rif->addr, dev->dev_addr);
rif->mtu = dev->mtu;
@@ -5478,7 +6009,8 @@ err_rif_edit:
}
static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
- struct net_device *l3_dev)
+ struct net_device *l3_dev,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp_rif *rif;
@@ -5487,9 +6019,9 @@ static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
*/
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
if (rif)
- __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
+ __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, extack);
- return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP);
+ return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP, extack);
}
static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
@@ -5500,7 +6032,7 @@ static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
if (!rif)
return;
- __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN);
+ __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN, NULL);
}
int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
@@ -5516,10 +6048,14 @@ int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
case NETDEV_PRECHANGEUPPER:
return 0;
case NETDEV_CHANGEUPPER:
- if (info->linking)
- err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev);
- else
+ if (info->linking) {
+ struct netlink_ext_ack *extack;
+
+ extack = netdev_notifier_info_to_extack(&info->info);
+ err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
+ } else {
mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
+ }
break;
}
@@ -5625,7 +6161,7 @@ static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
}
-static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
+u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
{
return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
}
@@ -5826,7 +6362,7 @@ mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
struct mlxsw_sp_vr *ul_vr;
int err;
- ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id);
+ ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
if (IS_ERR(ul_vr))
return PTR_ERR(ul_vr);
@@ -5990,10 +6526,15 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
if (err)
goto err_nexthop_group_ht_init;
+ INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
err = mlxsw_sp_lpm_init(mlxsw_sp);
if (err)
goto err_lpm_init;
+ err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
+ if (err)
+ goto err_mr_init;
+
err = mlxsw_sp_vrs_init(mlxsw_sp);
if (err)
goto err_vrs_init;
@@ -6015,6 +6556,8 @@ err_register_fib_notifier:
err_neigh_init:
mlxsw_sp_vrs_fini(mlxsw_sp);
err_vrs_init:
+ mlxsw_sp_mr_fini(mlxsw_sp);
+err_mr_init:
mlxsw_sp_lpm_fini(mlxsw_sp);
err_lpm_init:
rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
@@ -6036,6 +6579,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
unregister_fib_notifier(&mlxsw_sp->router->fib_nb);
mlxsw_sp_neigh_fini(mlxsw_sp);
mlxsw_sp_vrs_fini(mlxsw_sp);
+ mlxsw_sp_mr_fini(mlxsw_sp);
mlxsw_sp_lpm_fini(mlxsw_sp);
rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 345fcc4f38e9..3f2d840cb285 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -62,6 +62,7 @@ enum mlxsw_sp_rif_counter_dir {
};
struct mlxsw_sp_neigh_entry;
+struct mlxsw_sp_nexthop;
struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
u16 rif_index);
@@ -69,6 +70,8 @@ u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
+u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
+const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif);
int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_rif *rif,
enum mlxsw_sp_rif_counter_dir dir,
@@ -107,5 +110,24 @@ union mlxsw_sp_l3addr
mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto,
const struct net_device *ol_dev);
__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev);
+struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
+ struct mlxsw_sp_nexthop *nh);
+bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh);
+unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh);
+int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
+ u32 *p_adj_hash_index);
+struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh);
+bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh);
+#define mlxsw_sp_nexthop_for_each(nh, router) \
+ for (nh = mlxsw_sp_nexthop_next(router, NULL); nh; \
+ nh = mlxsw_sp_nexthop_next(router, nh))
+int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh, u64 *p_counter);
+int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
+ struct mlxsw_sp_nexthop *nh);
+void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh);
+void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nexthop *nh);
#endif /* _MLXSW_ROUTER_H_*/
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index d39ffbfcc436..7b8548e25ae7 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -46,8 +46,10 @@
#include <linux/workqueue.h>
#include <linux/jiffies.h>
#include <linux/rtnetlink.h>
+#include <linux/netlink.h>
#include <net/switchdev.h>
+#include "spectrum_router.h"
#include "spectrum.h"
#include "core.h"
#include "reg.h"
@@ -67,7 +69,6 @@ struct mlxsw_sp_bridge {
u32 ageing_time;
bool vlan_enabled_exists;
struct list_head bridges_list;
- struct list_head mids_list;
DECLARE_BITMAP(mids_bitmap, MLXSW_SP_MID_MAX);
const struct mlxsw_sp_bridge_ops *bridge_8021q_ops;
const struct mlxsw_sp_bridge_ops *bridge_8021d_ops;
@@ -77,8 +78,10 @@ struct mlxsw_sp_bridge_device {
struct net_device *dev;
struct list_head list;
struct list_head ports_list;
+ struct list_head mids_list;
u8 vlan_enabled:1,
- multicast_enabled:1;
+ multicast_enabled:1,
+ mrouter:1;
const struct mlxsw_sp_bridge_ops *ops;
};
@@ -107,7 +110,8 @@ struct mlxsw_sp_bridge_vlan {
struct mlxsw_sp_bridge_ops {
int (*port_join)(struct mlxsw_sp_bridge_device *bridge_device,
struct mlxsw_sp_bridge_port *bridge_port,
- struct mlxsw_sp_port *mlxsw_sp_port);
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ struct netlink_ext_ack *extack);
void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device,
struct mlxsw_sp_bridge_port *bridge_port,
struct mlxsw_sp_port *mlxsw_sp_port);
@@ -121,6 +125,20 @@ mlxsw_sp_bridge_port_fdb_flush(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_bridge_port *bridge_port,
u16 fid_index);
+static void
+mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port);
+
+static void
+mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_device
+ *bridge_device);
+
+static void
+mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ bool add);
+
static struct mlxsw_sp_bridge_device *
mlxsw_sp_bridge_device_find(const struct mlxsw_sp_bridge *bridge,
const struct net_device *br_dev)
@@ -154,6 +172,7 @@ mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge,
bridge_device->dev = br_dev;
bridge_device->vlan_enabled = vlan_enabled;
bridge_device->multicast_enabled = br_multicast_enabled(br_dev);
+ bridge_device->mrouter = br_multicast_router(br_dev);
INIT_LIST_HEAD(&bridge_device->ports_list);
if (vlan_enabled) {
bridge->vlan_enabled_exists = true;
@@ -161,6 +180,7 @@ mlxsw_sp_bridge_device_create(struct mlxsw_sp_bridge *bridge,
} else {
bridge_device->ops = bridge->bridge_8021d_ops;
}
+ INIT_LIST_HEAD(&bridge_device->mids_list);
list_add(&bridge_device->list, &bridge->bridges_list);
return bridge_device;
@@ -174,6 +194,7 @@ mlxsw_sp_bridge_device_destroy(struct mlxsw_sp_bridge *bridge,
if (bridge_device->vlan_enabled)
bridge->vlan_enabled_exists = false;
WARN_ON(!list_empty(&bridge_device->ports_list));
+ WARN_ON(!list_empty(&bridge_device->mids_list));
kfree(bridge_device);
}
@@ -249,7 +270,8 @@ mlxsw_sp_bridge_port_create(struct mlxsw_sp_bridge_device *bridge_device,
bridge_port->dev = brport_dev;
bridge_port->bridge_device = bridge_device;
bridge_port->stp_state = BR_STATE_DISABLED;
- bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC;
+ bridge_port->flags = BR_LEARNING | BR_FLOOD | BR_LEARNING_SYNC |
+ BR_MCAST_FLOOD;
INIT_LIST_HEAD(&bridge_port->vlans_list);
list_add(&bridge_port->list, &bridge_device->ports_list);
bridge_port->ref_count = 1;
@@ -455,7 +477,8 @@ static int mlxsw_sp_port_attr_get(struct net_device *dev,
&attr->u.brport_flags);
break;
case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT:
- attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD;
+ attr->u.brport_flags_support = BR_LEARNING | BR_FLOOD |
+ BR_MCAST_FLOOD;
break;
default:
return -EOPNOTSUPP;
@@ -640,8 +663,18 @@ static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port,
if (err)
return err;
- memcpy(&bridge_port->flags, &brport_flags, sizeof(brport_flags));
+ if (bridge_port->bridge_device->multicast_enabled)
+ goto out;
+ err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port,
+ MLXSW_SP_FLOOD_TYPE_MC,
+ brport_flags &
+ BR_MCAST_FLOOD);
+ if (err)
+ return err;
+
+out:
+ memcpy(&bridge_port->flags, &brport_flags, sizeof(brport_flags));
return 0;
}
@@ -699,10 +732,10 @@ static int mlxsw_sp_port_attr_br_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port,
return -EINVAL;
}
-static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port,
- struct switchdev_trans *trans,
- struct net_device *orig_dev,
- bool is_port_mc_router)
+static int mlxsw_sp_port_attr_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct switchdev_trans *trans,
+ struct net_device *orig_dev,
+ bool is_port_mrouter)
{
struct mlxsw_sp_bridge_port *bridge_port;
int err;
@@ -720,15 +753,26 @@ static int mlxsw_sp_port_attr_mc_router_set(struct mlxsw_sp_port *mlxsw_sp_port,
err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port, bridge_port,
MLXSW_SP_FLOOD_TYPE_MC,
- is_port_mc_router);
+ is_port_mrouter);
if (err)
return err;
+ mlxsw_sp_port_mrouter_update_mdb(mlxsw_sp_port, bridge_port,
+ is_port_mrouter);
out:
- bridge_port->mrouter = is_port_mc_router;
+ bridge_port->mrouter = is_port_mrouter;
return 0;
}
+static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port)
+{
+ const struct mlxsw_sp_bridge_device *bridge_device;
+
+ bridge_device = bridge_port->bridge_device;
+ return bridge_device->multicast_enabled ? bridge_port->mrouter :
+ bridge_port->flags & BR_MCAST_FLOOD;
+}
+
static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port,
struct switchdev_trans *trans,
struct net_device *orig_dev,
@@ -749,9 +793,15 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port,
if (!bridge_device)
return 0;
+ if (bridge_device->multicast_enabled != !mc_disabled) {
+ bridge_device->multicast_enabled = !mc_disabled;
+ mlxsw_sp_bridge_mdb_mc_enable_sync(mlxsw_sp_port,
+ bridge_device);
+ }
+
list_for_each_entry(bridge_port, &bridge_device->ports_list, list) {
enum mlxsw_sp_flood_type packet_type = MLXSW_SP_FLOOD_TYPE_MC;
- bool member = mc_disabled ? true : bridge_port->mrouter;
+ bool member = mlxsw_sp_mc_flood(bridge_port);
err = mlxsw_sp_bridge_port_flood_table_set(mlxsw_sp_port,
bridge_port,
@@ -765,6 +815,60 @@ static int mlxsw_sp_port_mc_disabled_set(struct mlxsw_sp_port *mlxsw_sp_port,
return 0;
}
+static int mlxsw_sp_smid_router_port_set(struct mlxsw_sp *mlxsw_sp,
+ u16 mid_idx, bool add)
+{
+ char *smid_pl;
+ int err;
+
+ smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL);
+ if (!smid_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_smid_pack(smid_pl, mid_idx,
+ mlxsw_sp_router_port(mlxsw_sp), add);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl);
+ kfree(smid_pl);
+ return err;
+}
+
+static void
+mlxsw_sp_bridge_mrouter_update_mdb(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_bridge_device *bridge_device,
+ bool add)
+{
+ struct mlxsw_sp_mid *mid;
+
+ list_for_each_entry(mid, &bridge_device->mids_list, list)
+ mlxsw_sp_smid_router_port_set(mlxsw_sp, mid->mid, add);
+}
+
+static int
+mlxsw_sp_port_attr_br_mrouter_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct switchdev_trans *trans,
+ struct net_device *orig_dev,
+ bool is_mrouter)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_bridge_device *bridge_device;
+
+ if (switchdev_trans_ph_prepare(trans))
+ return 0;
+
+ /* It's possible we failed to enslave the port, yet this
+ * operation is executed due to it being deferred.
+ */
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, orig_dev);
+ if (!bridge_device)
+ return 0;
+
+ if (bridge_device->mrouter != is_mrouter)
+ mlxsw_sp_bridge_mrouter_update_mdb(mlxsw_sp, bridge_device,
+ is_mrouter);
+ bridge_device->mrouter = is_mrouter;
+ return 0;
+}
+
static int mlxsw_sp_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct switchdev_trans *trans)
@@ -793,15 +897,20 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
attr->u.vlan_filtering);
break;
case SWITCHDEV_ATTR_ID_PORT_MROUTER:
- err = mlxsw_sp_port_attr_mc_router_set(mlxsw_sp_port, trans,
- attr->orig_dev,
- attr->u.mrouter);
+ err = mlxsw_sp_port_attr_mrouter_set(mlxsw_sp_port, trans,
+ attr->orig_dev,
+ attr->u.mrouter);
break;
case SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED:
err = mlxsw_sp_port_mc_disabled_set(mlxsw_sp_port, trans,
attr->orig_dev,
attr->u.mc_disabled);
break;
+ case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
+ err = mlxsw_sp_port_attr_br_mrouter_set(mlxsw_sp_port, trans,
+ attr->orig_dev,
+ attr->u.mrouter);
+ break;
default:
err = -EOPNOTSUPP;
break;
@@ -810,14 +919,6 @@ static int mlxsw_sp_port_attr_set(struct net_device *dev,
return err;
}
-static bool mlxsw_sp_mc_flood(const struct mlxsw_sp_bridge_port *bridge_port)
-{
- const struct mlxsw_sp_bridge_device *bridge_device;
-
- bridge_device = bridge_port->bridge_device;
- return !bridge_device->multicast_enabled ? true : bridge_port->mrouter;
-}
-
static int
mlxsw_sp_port_vlan_fid_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
struct mlxsw_sp_bridge_port *bridge_port)
@@ -955,24 +1056,28 @@ mlxsw_sp_port_vlan_bridge_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
struct mlxsw_sp_bridge_vlan *bridge_vlan;
struct mlxsw_sp_bridge_port *bridge_port;
u16 vid = mlxsw_sp_port_vlan->vid;
- bool last;
+ bool last_port, last_vlan;
if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021Q &&
mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_8021D))
return;
bridge_port = mlxsw_sp_port_vlan->bridge_port;
+ last_vlan = list_is_singular(&bridge_port->vlans_list);
bridge_vlan = mlxsw_sp_bridge_vlan_find(bridge_port, vid);
- last = list_is_singular(&bridge_vlan->port_vlan_list);
+ last_port = list_is_singular(&bridge_vlan->port_vlan_list);
list_del(&mlxsw_sp_port_vlan->bridge_vlan_node);
mlxsw_sp_bridge_vlan_put(bridge_vlan);
mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_DISABLED);
mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
- if (last)
+ if (last_port)
mlxsw_sp_bridge_port_fdb_flush(mlxsw_sp_port->mlxsw_sp,
bridge_port,
mlxsw_sp_fid_index(fid));
+ if (last_vlan)
+ mlxsw_sp_bridge_port_mdb_flush(mlxsw_sp_port, bridge_port);
+
mlxsw_sp_port_vlan_fid_leave(mlxsw_sp_port_vlan);
mlxsw_sp_bridge_port_put(mlxsw_sp_port->mlxsw_sp->bridge, bridge_port);
@@ -1182,7 +1287,7 @@ mlxsw_sp_port_fdb_set(struct mlxsw_sp_port *mlxsw_sp_port,
}
static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr,
- u16 fid, u16 mid, bool adding)
+ u16 fid, u16 mid_idx, bool adding)
{
char *sfd_pl;
int err;
@@ -1193,16 +1298,16 @@ static int mlxsw_sp_port_mdb_op(struct mlxsw_sp *mlxsw_sp, const char *addr,
mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
mlxsw_reg_sfd_mc_pack(sfd_pl, 0, addr, fid,
- MLXSW_REG_SFD_REC_ACTION_NOP, mid);
+ MLXSW_REG_SFD_REC_ACTION_NOP, mid_idx);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
kfree(sfd_pl);
return err;
}
-static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid,
- bool add, bool clear_all_ports)
+static int mlxsw_sp_port_smid_full_entry(struct mlxsw_sp *mlxsw_sp, u16 mid_idx,
+ long *ports_bitmap,
+ bool set_router_port)
{
- struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
char *smid_pl;
int err, i;
@@ -1210,66 +1315,208 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid,
if (!smid_pl)
return -ENOMEM;
- mlxsw_reg_smid_pack(smid_pl, mid, mlxsw_sp_port->local_port, add);
- if (clear_all_ports) {
- for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++)
- if (mlxsw_sp->ports[i])
- mlxsw_reg_smid_port_mask_set(smid_pl, i, 1);
+ mlxsw_reg_smid_pack(smid_pl, mid_idx, 0, false);
+ for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) {
+ if (mlxsw_sp->ports[i])
+ mlxsw_reg_smid_port_mask_set(smid_pl, i, 1);
}
+
+ mlxsw_reg_smid_port_mask_set(smid_pl,
+ mlxsw_sp_router_port(mlxsw_sp), 1);
+
+ for_each_set_bit(i, ports_bitmap, mlxsw_core_max_ports(mlxsw_sp->core))
+ mlxsw_reg_smid_port_set(smid_pl, i, 1);
+
+ mlxsw_reg_smid_port_set(smid_pl, mlxsw_sp_router_port(mlxsw_sp),
+ set_router_port);
+
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl);
+ kfree(smid_pl);
+ return err;
+}
+
+static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port,
+ u16 mid_idx, bool add)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char *smid_pl;
+ int err;
+
+ smid_pl = kmalloc(MLXSW_REG_SMID_LEN, GFP_KERNEL);
+ if (!smid_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_smid_pack(smid_pl, mid_idx, mlxsw_sp_port->local_port, add);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(smid), smid_pl);
kfree(smid_pl);
return err;
}
-static struct mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp *mlxsw_sp,
- const unsigned char *addr,
- u16 fid)
+static struct
+mlxsw_sp_mid *__mlxsw_sp_mc_get(struct mlxsw_sp_bridge_device *bridge_device,
+ const unsigned char *addr,
+ u16 fid)
{
struct mlxsw_sp_mid *mid;
- list_for_each_entry(mid, &mlxsw_sp->bridge->mids_list, list) {
+ list_for_each_entry(mid, &bridge_device->mids_list, list) {
if (ether_addr_equal(mid->addr, addr) && mid->fid == fid)
return mid;
}
return NULL;
}
-static struct mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp,
- const unsigned char *addr,
- u16 fid)
+static void
+mlxsw_sp_bridge_port_get_ports_bitmap(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ unsigned long *ports_bitmap)
+{
+ struct mlxsw_sp_port *mlxsw_sp_port;
+ u64 max_lag_members, i;
+ int lag_id;
+
+ if (!bridge_port->lagged) {
+ set_bit(bridge_port->system_port, ports_bitmap);
+ } else {
+ max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ MAX_LAG_MEMBERS);
+ lag_id = bridge_port->lag_id;
+ for (i = 0; i < max_lag_members; i++) {
+ mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp,
+ lag_id, i);
+ if (mlxsw_sp_port)
+ set_bit(mlxsw_sp_port->local_port,
+ ports_bitmap);
+ }
+ }
+}
+
+static void
+mlxsw_sp_mc_get_mrouters_bitmap(unsigned long *flood_bitmap,
+ struct mlxsw_sp_bridge_device *bridge_device,
+ struct mlxsw_sp *mlxsw_sp)
{
- struct mlxsw_sp_mid *mid;
+ struct mlxsw_sp_bridge_port *bridge_port;
+
+ list_for_each_entry(bridge_port, &bridge_device->ports_list, list) {
+ if (bridge_port->mrouter) {
+ mlxsw_sp_bridge_port_get_ports_bitmap(mlxsw_sp,
+ bridge_port,
+ flood_bitmap);
+ }
+ }
+}
+
+static bool
+mlxsw_sp_mc_write_mdb_entry(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mid *mid,
+ struct mlxsw_sp_bridge_device *bridge_device)
+{
+ long *flood_bitmap;
+ int num_of_ports;
+ int alloc_size;
u16 mid_idx;
+ int err;
mid_idx = find_first_zero_bit(mlxsw_sp->bridge->mids_bitmap,
MLXSW_SP_MID_MAX);
if (mid_idx == MLXSW_SP_MID_MAX)
- return NULL;
+ return false;
+
+ num_of_ports = mlxsw_core_max_ports(mlxsw_sp->core);
+ alloc_size = sizeof(long) * BITS_TO_LONGS(num_of_ports);
+ flood_bitmap = kzalloc(alloc_size, GFP_KERNEL);
+ if (!flood_bitmap)
+ return false;
+
+ bitmap_copy(flood_bitmap, mid->ports_in_mid, num_of_ports);
+ mlxsw_sp_mc_get_mrouters_bitmap(flood_bitmap, bridge_device, mlxsw_sp);
+
+ mid->mid = mid_idx;
+ err = mlxsw_sp_port_smid_full_entry(mlxsw_sp, mid_idx, flood_bitmap,
+ bridge_device->mrouter);
+ kfree(flood_bitmap);
+ if (err)
+ return false;
+
+ err = mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid_idx,
+ true);
+ if (err)
+ return false;
+
+ set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap);
+ mid->in_hw = true;
+ return true;
+}
+
+static int mlxsw_sp_mc_remove_mdb_entry(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_mid *mid)
+{
+ if (!mid->in_hw)
+ return 0;
+
+ clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap);
+ mid->in_hw = false;
+ return mlxsw_sp_port_mdb_op(mlxsw_sp, mid->addr, mid->fid, mid->mid,
+ false);
+}
+
+static struct
+mlxsw_sp_mid *__mlxsw_sp_mc_alloc(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_bridge_device *bridge_device,
+ const unsigned char *addr,
+ u16 fid)
+{
+ struct mlxsw_sp_mid *mid;
+ size_t alloc_size;
mid = kzalloc(sizeof(*mid), GFP_KERNEL);
if (!mid)
return NULL;
- set_bit(mid_idx, mlxsw_sp->bridge->mids_bitmap);
+ alloc_size = sizeof(unsigned long) *
+ BITS_TO_LONGS(mlxsw_core_max_ports(mlxsw_sp->core));
+
+ mid->ports_in_mid = kzalloc(alloc_size, GFP_KERNEL);
+ if (!mid->ports_in_mid)
+ goto err_ports_in_mid_alloc;
+
ether_addr_copy(mid->addr, addr);
mid->fid = fid;
- mid->mid = mid_idx;
- mid->ref_count = 0;
- list_add_tail(&mid->list, &mlxsw_sp->bridge->mids_list);
+ mid->in_hw = false;
+
+ if (!bridge_device->multicast_enabled)
+ goto out;
+ if (!mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid, bridge_device))
+ goto err_write_mdb_entry;
+
+out:
+ list_add_tail(&mid->list, &bridge_device->mids_list);
return mid;
+
+err_write_mdb_entry:
+ kfree(mid->ports_in_mid);
+err_ports_in_mid_alloc:
+ kfree(mid);
+ return NULL;
}
-static int __mlxsw_sp_mc_dec_ref(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_mid *mid)
+static int mlxsw_sp_port_remove_from_mid(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_mid *mid)
{
- if (--mid->ref_count == 0) {
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ int err = 0;
+
+ clear_bit(mlxsw_sp_port->local_port, mid->ports_in_mid);
+ if (bitmap_empty(mid->ports_in_mid,
+ mlxsw_core_max_ports(mlxsw_sp->core))) {
+ err = mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid);
list_del(&mid->list);
- clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap);
+ kfree(mid->ports_in_mid);
kfree(mid);
- return 1;
}
- return 0;
+ return err;
}
static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -1302,39 +1549,72 @@ static int mlxsw_sp_port_mdb_add(struct mlxsw_sp_port *mlxsw_sp_port,
fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid);
- mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid_index);
+ mid = __mlxsw_sp_mc_get(bridge_device, mdb->addr, fid_index);
if (!mid) {
- mid = __mlxsw_sp_mc_alloc(mlxsw_sp, mdb->addr, fid_index);
+ mid = __mlxsw_sp_mc_alloc(mlxsw_sp, bridge_device, mdb->addr,
+ fid_index);
if (!mid) {
netdev_err(dev, "Unable to allocate MC group\n");
return -ENOMEM;
}
}
- mid->ref_count++;
+ set_bit(mlxsw_sp_port->local_port, mid->ports_in_mid);
+
+ if (!bridge_device->multicast_enabled)
+ return 0;
- err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true,
- mid->ref_count == 1);
+ if (bridge_port->mrouter)
+ return 0;
+
+ err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, true);
if (err) {
netdev_err(dev, "Unable to set SMID\n");
goto err_out;
}
- if (mid->ref_count == 1) {
- err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index,
- mid->mid, true);
- if (err) {
- netdev_err(dev, "Unable to set MC SFD\n");
- goto err_out;
- }
- }
-
return 0;
err_out:
- __mlxsw_sp_mc_dec_ref(mlxsw_sp, mid);
+ mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid);
return err;
}
+static void
+mlxsw_sp_bridge_mdb_mc_enable_sync(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_device
+ *bridge_device)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ struct mlxsw_sp_mid *mid;
+ bool mc_enabled;
+
+ mc_enabled = bridge_device->multicast_enabled;
+
+ list_for_each_entry(mid, &bridge_device->mids_list, list) {
+ if (mc_enabled)
+ mlxsw_sp_mc_write_mdb_entry(mlxsw_sp, mid,
+ bridge_device);
+ else
+ mlxsw_sp_mc_remove_mdb_entry(mlxsw_sp, mid);
+ }
+}
+
+static void
+mlxsw_sp_port_mrouter_update_mdb(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ bool add)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct mlxsw_sp_mid *mid;
+
+ bridge_device = bridge_port->bridge_device;
+
+ list_for_each_entry(mid, &bridge_device->mids_list, list) {
+ if (!test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid))
+ mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, add);
+ }
+}
+
static int mlxsw_sp_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct switchdev_trans *trans)
@@ -1399,6 +1679,30 @@ static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port,
return 0;
}
+static int
+__mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port,
+ struct mlxsw_sp_mid *mid)
+{
+ struct net_device *dev = mlxsw_sp_port->dev;
+ int err;
+
+ if (bridge_port->bridge_device->multicast_enabled) {
+ if (bridge_port->bridge_device->multicast_enabled) {
+ err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid,
+ false);
+ if (err)
+ netdev_err(dev, "Unable to remove port from SMID\n");
+ }
+ }
+
+ err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid);
+ if (err)
+ netdev_err(dev, "Unable to remove MC SFD\n");
+
+ return err;
+}
+
static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
const struct switchdev_obj_port_mdb *mdb)
{
@@ -1410,8 +1714,6 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_bridge_port *bridge_port;
struct mlxsw_sp_mid *mid;
u16 fid_index;
- u16 mid_idx;
- int err = 0;
bridge_port = mlxsw_sp_bridge_port_find(mlxsw_sp->bridge, orig_dev);
if (!bridge_port)
@@ -1426,25 +1728,33 @@ static int mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port,
fid_index = mlxsw_sp_fid_index(mlxsw_sp_port_vlan->fid);
- mid = __mlxsw_sp_mc_get(mlxsw_sp, mdb->addr, fid_index);
+ mid = __mlxsw_sp_mc_get(bridge_device, mdb->addr, fid_index);
if (!mid) {
netdev_err(dev, "Unable to remove port from MC DB\n");
return -EINVAL;
}
- err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false, false);
- if (err)
- netdev_err(dev, "Unable to remove port from SMID\n");
+ return __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port, mid);
+}
- mid_idx = mid->mid;
- if (__mlxsw_sp_mc_dec_ref(mlxsw_sp, mid)) {
- err = mlxsw_sp_port_mdb_op(mlxsw_sp, mdb->addr, fid_index,
- mid_idx, false);
- if (err)
- netdev_err(dev, "Unable to remove MC SFD\n");
- }
+static void
+mlxsw_sp_bridge_port_mdb_flush(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_bridge_port *bridge_port)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct mlxsw_sp_mid *mid, *tmp;
- return err;
+ bridge_device = bridge_port->bridge_device;
+
+ list_for_each_entry_safe(mid, tmp, &bridge_device->mids_list, list) {
+ if (test_bit(mlxsw_sp_port->local_port, mid->ports_in_mid)) {
+ __mlxsw_sp_port_mdb_del(mlxsw_sp_port, bridge_port,
+ mid);
+ } else if (bridge_device->multicast_enabled &&
+ bridge_port->mrouter) {
+ mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false);
+ }
+ }
}
static int mlxsw_sp_port_obj_del(struct net_device *dev,
@@ -1497,12 +1807,15 @@ static const struct switchdev_ops mlxsw_sp_port_switchdev_ops = {
static int
mlxsw_sp_bridge_8021q_port_join(struct mlxsw_sp_bridge_device *bridge_device,
struct mlxsw_sp_bridge_port *bridge_port,
- struct mlxsw_sp_port *mlxsw_sp_port)
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
- if (is_vlan_dev(bridge_port->dev))
+ if (is_vlan_dev(bridge_port->dev)) {
+ NL_SET_ERR_MSG(extack, "spectrum: Can not enslave a VLAN device to a VLAN-aware bridge");
return -EINVAL;
+ }
mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, 1);
if (WARN_ON(!mlxsw_sp_port_vlan))
@@ -1559,13 +1872,16 @@ mlxsw_sp_port_is_br_member(const struct mlxsw_sp_port *mlxsw_sp_port,
static int
mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
struct mlxsw_sp_bridge_port *bridge_port,
- struct mlxsw_sp_port *mlxsw_sp_port)
+ struct mlxsw_sp_port *mlxsw_sp_port,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
u16 vid;
- if (!is_vlan_dev(bridge_port->dev))
+ if (!is_vlan_dev(bridge_port->dev)) {
+ NL_SET_ERR_MSG(extack, "spectrum: Only VLAN devices can be enslaved to a VLAN-unaware bridge");
return -EINVAL;
+ }
vid = vlan_dev_vlan_id(bridge_port->dev);
mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
@@ -1573,7 +1889,7 @@ mlxsw_sp_bridge_8021d_port_join(struct mlxsw_sp_bridge_device *bridge_device,
return -EINVAL;
if (mlxsw_sp_port_is_br_member(mlxsw_sp_port, bridge_device->dev)) {
- netdev_err(mlxsw_sp_port->dev, "Can't bridge VLAN uppers of the same port\n");
+ NL_SET_ERR_MSG(extack, "spectrum: Can not bridge VLAN uppers of the same port");
return -EINVAL;
}
@@ -1616,7 +1932,8 @@ static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = {
int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
struct net_device *brport_dev,
- struct net_device *br_dev)
+ struct net_device *br_dev,
+ struct netlink_ext_ack *extack)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
struct mlxsw_sp_bridge_device *bridge_device;
@@ -1629,7 +1946,7 @@ int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
bridge_device = bridge_port->bridge_device;
err = bridge_device->ops->port_join(bridge_device, bridge_port,
- mlxsw_sp_port);
+ mlxsw_sp_port, extack);
if (err)
goto err_port_join;
@@ -1981,17 +2298,6 @@ static void mlxsw_sp_fdb_fini(struct mlxsw_sp *mlxsw_sp)
}
-static void mlxsw_sp_mids_fini(struct mlxsw_sp *mlxsw_sp)
-{
- struct mlxsw_sp_mid *mid, *tmp;
-
- list_for_each_entry_safe(mid, tmp, &mlxsw_sp->bridge->mids_list, list) {
- list_del(&mid->list);
- clear_bit(mid->mid, mlxsw_sp->bridge->mids_bitmap);
- kfree(mid);
- }
-}
-
int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
{
struct mlxsw_sp_bridge *bridge;
@@ -2003,7 +2309,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
bridge->mlxsw_sp = mlxsw_sp;
INIT_LIST_HEAD(&mlxsw_sp->bridge->bridges_list);
- INIT_LIST_HEAD(&mlxsw_sp->bridge->mids_list);
bridge->bridge_8021q_ops = &mlxsw_sp_bridge_8021q_ops;
bridge->bridge_8021d_ops = &mlxsw_sp_bridge_8021d_ops;
@@ -2014,7 +2319,6 @@ int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp)
void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp)
{
mlxsw_sp_fdb_fini(mlxsw_sp);
- mlxsw_sp_mids_fini(mlxsw_sp);
WARN_ON(!list_empty(&mlxsw_sp->bridge->bridges_list));
kfree(mlxsw_sp->bridge);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index f396a1fef633..ec6cef8267ae 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -62,6 +62,8 @@ enum {
MLXSW_TRAP_ID_TTLERROR = 0x53,
MLXSW_TRAP_ID_LBERROR = 0x54,
MLXSW_TRAP_ID_IPV4_OSPF = 0x55,
+ MLXSW_TRAP_ID_IPV4_PIM = 0x58,
+ MLXSW_TRAP_ID_RPF = 0x5C,
MLXSW_TRAP_ID_IP2ME = 0x5F,
MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60,
MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61,
@@ -89,6 +91,10 @@ enum {
MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
MLXSW_TRAP_ID_ACL0 = 0x1C0,
+ /* Multicast trap used for routes with trap action */
+ MLXSW_TRAP_ID_ACL1 = 0x1C1,
+ /* Multicast trap used for routes with trap-and-forward action */
+ MLXSW_TRAP_ID_ACL2 = 0x1C2,
MLXSW_TRAP_ID_MAX = 0x1FF
};
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index e798fbe08600..52207508744c 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -4338,11 +4338,11 @@ static void ksz_stop_timer(struct ksz_timer_info *info)
}
static void ksz_init_timer(struct ksz_timer_info *info, int period,
- void (*function)(unsigned long), void *data)
+ void (*function)(struct timer_list *))
{
info->max = 0;
info->period = period;
- setup_timer(&info->timer, function, (unsigned long)data);
+ timer_setup(&info->timer, function, 0);
}
static void ksz_update_timer(struct ksz_timer_info *info)
@@ -6689,9 +6689,9 @@ static void mib_read_work(struct work_struct *work)
}
}
-static void mib_monitor(unsigned long ptr)
+static void mib_monitor(struct timer_list *t)
{
- struct dev_info *hw_priv = (struct dev_info *) ptr;
+ struct dev_info *hw_priv = from_timer(hw_priv, t, mib_timer_info.timer);
mib_read_work(&hw_priv->mib_read);
@@ -6716,10 +6716,10 @@ static void mib_monitor(unsigned long ptr)
*
* This routine is run in a kernel timer to monitor the network device.
*/
-static void dev_monitor(unsigned long ptr)
+static void dev_monitor(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *) ptr;
- struct dev_priv *priv = netdev_priv(dev);
+ struct dev_priv *priv = from_timer(priv, t, monitor_timer_info.timer);
+ struct net_device *dev = priv->mii_if.dev;
struct dev_info *hw_priv = priv->adapter;
struct ksz_hw *hw = &hw_priv->hw;
struct ksz_port *port = &priv->port;
@@ -6789,7 +6789,7 @@ static int __init netdev_init(struct net_device *dev)
/* 500 ms timeout */
ksz_init_timer(&priv->monitor_timer_info, 500 * HZ / 1000,
- dev_monitor, dev);
+ dev_monitor);
/* 500 ms timeout */
dev->watchdog_timeo = HZ / 2;
@@ -7065,7 +7065,7 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id)
/* 500 ms timeout */
ksz_init_timer(&hw_priv->mib_timer_info, 500 * HZ / 1000,
- mib_monitor, hw_priv);
+ mib_monitor);
for (i = 0; i < hw->dev_count; i++) {
dev = alloc_etherdev(sizeof(struct dev_priv));
diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index 18af2a23a933..dedeacd0bbca 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -1571,10 +1571,8 @@ static int netdev_open(struct net_device *dev)
dev->name, (int)readl(ioaddr + ChipCmd));
/* Set the timer to check for link beat. */
- init_timer(&np->timer);
+ setup_timer(&np->timer, netdev_timer, (unsigned long)dev);
np->timer.expires = round_jiffies(jiffies + NATSEMI_TIMER_FREQ);
- np->timer.data = (unsigned long)dev;
- np->timer.function = netdev_timer; /* timer handler */
add_timer(&np->timer);
return 0;
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index 729095db3e08..99d3c7884a4a 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -1652,9 +1652,7 @@ static int ns83820_open(struct net_device *ndev)
writel(0, dev->base + TXDP_HI);
writel(desc, dev->base + TXDP);
- init_timer(&dev->tx_watchdog);
- dev->tx_watchdog.data = (unsigned long)ndev;
- dev->tx_watchdog.function = ns83820_tx_watch;
+ setup_timer(&dev->tx_watchdog, ns83820_tx_watch, (unsigned long)ndev);
mod_timer(&dev->tx_watchdog, jiffies + 2*HZ);
netif_start_queue(ndev); /* FIXME: wait for phy to come up */
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index 462eda926b1c..b8983e73265a 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -337,12 +337,6 @@ static const char ethtool_driver_stats_keys[][ETH_GSTRING_LEN] = {
#define S2IO_TEST_LEN ARRAY_SIZE(s2io_gstrings)
#define S2IO_STRINGS_LEN (S2IO_TEST_LEN * ETH_GSTRING_LEN)
-#define S2IO_TIMER_CONF(timer, handle, arg, exp) \
- init_timer(&timer); \
- timer.function = handle; \
- timer.data = (unsigned long)arg; \
- mod_timer(&timer, (jiffies + exp)) \
-
/* copy mac addr to def_mac_addr array */
static void do_s2io_copy_mac_addr(struct s2io_nic *sp, int offset, u64 mac_addr)
{
@@ -4193,9 +4187,9 @@ pci_map_failed:
}
static void
-s2io_alarm_handle(unsigned long data)
+s2io_alarm_handle(struct timer_list *t)
{
- struct s2io_nic *sp = (struct s2io_nic *)data;
+ struct s2io_nic *sp = from_timer(sp, t, alarm_timer);
struct net_device *dev = sp->dev;
s2io_handle_errors(dev);
@@ -7186,7 +7180,8 @@ static int s2io_card_up(struct s2io_nic *sp)
return -ENODEV;
}
- S2IO_TIMER_CONF(sp->alarm_timer, s2io_alarm_handle, sp, (HZ/2));
+ timer_setup(&sp->alarm_timer, s2io_alarm_handle, 0);
+ mod_timer(&sp->alarm_timer, jiffies + HZ / 2);
set_bit(__S2IO_STATE_CARD_UP, &sp->state);
diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h
index 6c5997dc8afc..1a24a7218794 100644
--- a/drivers/net/ethernet/neterion/s2io.h
+++ b/drivers/net/ethernet/neterion/s2io.h
@@ -1094,7 +1094,7 @@ static int s2io_poll_msix(struct napi_struct *napi, int budget);
static int s2io_poll_inta(struct napi_struct *napi, int budget);
static void s2io_init_pci(struct s2io_nic * sp);
static int do_s2io_prog_unicast(struct net_device *dev, u8 *addr);
-static void s2io_alarm_handle(unsigned long data);
+static void s2io_alarm_handle(struct timer_list *t);
static irqreturn_t
s2io_msix_ring_handle(int irq, void *dev_id);
static irqreturn_t
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 50ea69d88480..5d5b9855e24e 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -2597,9 +2597,9 @@ INTA_MODE:
return VXGE_HW_OK;
}
-static void vxge_poll_vp_reset(unsigned long data)
+static void vxge_poll_vp_reset(struct timer_list *t)
{
- struct vxgedev *vdev = (struct vxgedev *)data;
+ struct vxgedev *vdev = from_timer(vdev, t, vp_reset_timer);
int i, j = 0;
for (i = 0; i < vdev->no_of_vpath; i++) {
@@ -2616,9 +2616,9 @@ static void vxge_poll_vp_reset(unsigned long data)
mod_timer(&vdev->vp_reset_timer, jiffies + HZ / 2);
}
-static void vxge_poll_vp_lockup(unsigned long data)
+static void vxge_poll_vp_lockup(struct timer_list *t)
{
- struct vxgedev *vdev = (struct vxgedev *)data;
+ struct vxgedev *vdev = from_timer(vdev, t, vp_lockup_timer);
enum vxge_hw_status status = VXGE_HW_OK;
struct vxge_vpath *vpath;
struct vxge_ring *ring;
@@ -2858,12 +2858,12 @@ static int vxge_open(struct net_device *dev)
vdev->config.rx_pause_enable);
if (vdev->vp_reset_timer.function == NULL)
- vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset, vdev,
+ vxge_os_timer(&vdev->vp_reset_timer, vxge_poll_vp_reset,
HZ / 2);
/* There is no need to check for RxD leak and RxD lookup on Titan1A */
if (vdev->titan1 && vdev->vp_lockup_timer.function == NULL)
- vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup, vdev,
+ vxge_os_timer(&vdev->vp_lockup_timer, vxge_poll_vp_lockup,
HZ / 2);
set_bit(__VXGE_STATE_CARD_UP, &vdev->state);
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.h b/drivers/net/ethernet/neterion/vxge/vxge-main.h
index 3a79d93b8445..59a57ff5e96a 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.h
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.h
@@ -417,12 +417,10 @@ struct vxge_tx_priv {
module_param(p, int, 0)
static inline
-void vxge_os_timer(struct timer_list *timer, void (*func)(unsigned long data),
- struct vxgedev *vdev, unsigned long timeout)
+void vxge_os_timer(struct timer_list *timer, void (*func)(struct timer_list *),
+ unsigned long timeout)
{
- init_timer(timer);
- timer->function = func;
- timer->data = (unsigned long)vdev;
+ timer_setup(timer, func, 0);
mod_timer(timer, jiffies + timeout);
}
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
index 5f630a24e491..0c3b5dea2858 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-traffic.c
@@ -1209,9 +1209,6 @@ void vxge_hw_ring_rxd_pre_post(struct __vxge_hw_ring *ring, void *rxdh)
void vxge_hw_ring_rxd_post_post(struct __vxge_hw_ring *ring, void *rxdh)
{
struct vxge_hw_ring_rxd_1 *rxdp = (struct vxge_hw_ring_rxd_1 *)rxdh;
- struct __vxge_hw_channel *channel;
-
- channel = &ring->channel;
rxdp->control_0 = VXGE_HW_RING_RXD_LIST_OWN_ADAPTER;
@@ -1359,11 +1356,8 @@ exit:
enum vxge_hw_status vxge_hw_ring_handle_tcode(
struct __vxge_hw_ring *ring, void *rxdh, u8 t_code)
{
- struct __vxge_hw_channel *channel;
enum vxge_hw_status status = VXGE_HW_OK;
- channel = &ring->channel;
-
/* If the t_code is not supported and if the
* t_code is other than 0x5 (unparseable packet
* such as unknown UPV6 header), Drop it !!!
@@ -1399,10 +1393,6 @@ exit:
static void __vxge_hw_non_offload_db_post(struct __vxge_hw_fifo *fifo,
u64 txdl_ptr, u32 num_txds, u32 no_snoop)
{
- struct __vxge_hw_channel *channel;
-
- channel = &fifo->channel;
-
writeq(VXGE_HW_NODBW_TYPE(VXGE_HW_NODBW_TYPE_NODBW) |
VXGE_HW_NODBW_LAST_TXD_NUMBER(num_txds) |
VXGE_HW_NODBW_GET_NO_SNOOP(no_snoop),
@@ -1506,9 +1496,6 @@ void vxge_hw_fifo_txdl_buffer_set(struct __vxge_hw_fifo *fifo,
{
struct __vxge_hw_fifo_txdl_priv *txdl_priv;
struct vxge_hw_fifo_txd *txdp, *txdp_last;
- struct __vxge_hw_channel *channel;
-
- channel = &fifo->channel;
txdl_priv = __vxge_hw_fifo_txdl_priv(fifo, txdlh);
txdp = (struct vxge_hw_fifo_txd *)txdlh + txdl_priv->frags;
@@ -1554,9 +1541,6 @@ void vxge_hw_fifo_txdl_post(struct __vxge_hw_fifo *fifo, void *txdlh)
struct __vxge_hw_fifo_txdl_priv *txdl_priv;
struct vxge_hw_fifo_txd *txdp_last;
struct vxge_hw_fifo_txd *txdp_first;
- struct __vxge_hw_channel *channel;
-
- channel = &fifo->channel;
txdl_priv = __vxge_hw_fifo_txdl_priv(fifo, txdlh);
txdp_first = txdlh;
@@ -1672,10 +1656,7 @@ enum vxge_hw_status vxge_hw_fifo_handle_tcode(struct __vxge_hw_fifo *fifo,
void *txdlh,
enum vxge_hw_fifo_tcode t_code)
{
- struct __vxge_hw_channel *channel;
-
enum vxge_hw_status status = VXGE_HW_OK;
- channel = &fifo->channel;
if (((t_code & 0x7) < 0) || ((t_code & 0x7) > 0x4)) {
status = VXGE_HW_ERR_INVALID_TCODE;
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 96e579a15cbe..bd3b2bd408bc 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -14,6 +14,7 @@ nfp-objs := \
nfpcore/nfp_resource.o \
nfpcore/nfp_rtsym.o \
nfpcore/nfp_target.o \
+ nfp_asm.o \
nfp_app.o \
nfp_app_nic.o \
nfp_devlink.o \
@@ -37,7 +38,8 @@ nfp-objs += \
flower/main.o \
flower/match.o \
flower/metadata.o \
- flower/offload.o
+ flower/offload.o \
+ flower/tunnel_conf.o
endif
ifeq ($(CONFIG_BPF_SYSCALL),y)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 239dfbe8a0a1..23fb11a41cc4 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -110,150 +110,7 @@ nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset)
return offset - nfp_prog->start_off;
}
-/* --- SW reg --- */
-struct nfp_insn_ur_regs {
- enum alu_dst_ab dst_ab;
- u16 dst;
- u16 areg, breg;
- bool swap;
- bool wr_both;
-};
-
-struct nfp_insn_re_regs {
- enum alu_dst_ab dst_ab;
- u8 dst;
- u8 areg, breg;
- bool swap;
- bool wr_both;
- bool i8;
-};
-
-static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst)
-{
- u16 val = FIELD_GET(NN_REG_VAL, swreg);
-
- switch (FIELD_GET(NN_REG_TYPE, swreg)) {
- case NN_REG_GPR_A:
- case NN_REG_GPR_B:
- case NN_REG_GPR_BOTH:
- return val;
- case NN_REG_NNR:
- return UR_REG_NN | val;
- case NN_REG_XFER:
- return UR_REG_XFR | val;
- case NN_REG_IMM:
- if (val & ~0xff) {
- pr_err("immediate too large\n");
- return 0;
- }
- return UR_REG_IMM_encode(val);
- case NN_REG_NONE:
- return is_dst ? UR_REG_NO_DST : REG_NONE;
- default:
- pr_err("unrecognized reg encoding %08x\n", swreg);
- return 0;
- }
-}
-
-static int
-swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg)
-{
- memset(reg, 0, sizeof(*reg));
-
- /* Decode destination */
- if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM)
- return -EFAULT;
-
- if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B)
- reg->dst_ab = ALU_DST_B;
- if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH)
- reg->wr_both = true;
- reg->dst = nfp_swreg_to_unreg(dst, true);
-
- /* Decode source operands */
- if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg))
- return -EFAULT;
-
- if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B ||
- FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) {
- reg->areg = nfp_swreg_to_unreg(rreg, false);
- reg->breg = nfp_swreg_to_unreg(lreg, false);
- reg->swap = true;
- } else {
- reg->areg = nfp_swreg_to_unreg(lreg, false);
- reg->breg = nfp_swreg_to_unreg(rreg, false);
- }
-
- return 0;
-}
-
-static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8)
-{
- u16 val = FIELD_GET(NN_REG_VAL, swreg);
-
- switch (FIELD_GET(NN_REG_TYPE, swreg)) {
- case NN_REG_GPR_A:
- case NN_REG_GPR_B:
- case NN_REG_GPR_BOTH:
- return val;
- case NN_REG_XFER:
- return RE_REG_XFR | val;
- case NN_REG_IMM:
- if (val & ~(0x7f | has_imm8 << 7)) {
- pr_err("immediate too large\n");
- return 0;
- }
- *i8 = val & 0x80;
- return RE_REG_IMM_encode(val & 0x7f);
- case NN_REG_NONE:
- return is_dst ? RE_REG_NO_DST : REG_NONE;
- default:
- pr_err("unrecognized reg encoding\n");
- return 0;
- }
-}
-
-static int
-swreg_to_restricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_re_regs *reg,
- bool has_imm8)
-{
- memset(reg, 0, sizeof(*reg));
-
- /* Decode destination */
- if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM)
- return -EFAULT;
-
- if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B)
- reg->dst_ab = ALU_DST_B;
- if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH)
- reg->wr_both = true;
- reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL);
-
- /* Decode source operands */
- if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg))
- return -EFAULT;
-
- if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B ||
- FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) {
- reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
- reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
- reg->swap = true;
- } else {
- reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
- reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
- }
-
- return 0;
-}
-
/* --- Emitters --- */
-static const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
- [CMD_TGT_WRITE8] = { 0x00, 0x42 },
- [CMD_TGT_READ8] = { 0x01, 0x43 },
- [CMD_TGT_READ_LE] = { 0x01, 0x40 },
- [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 },
-};
-
static void
__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync)
@@ -281,7 +138,7 @@ __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
static void
emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
- u8 mode, u8 xfer, u32 lreg, u32 rreg, u8 size, bool sync)
+ u8 mode, u8 xfer, swreg lreg, swreg rreg, u8 size, bool sync)
{
struct nfp_insn_re_regs reg;
int err;
@@ -296,6 +153,11 @@ emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
nfp_prog->error = -EFAULT;
return;
}
+ if (reg.dst_lmextn || reg.src_lmextn) {
+ pr_err("cmd can't use LMextn\n");
+ nfp_prog->error = -EFAULT;
+ return;
+ }
__emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync);
}
@@ -341,7 +203,7 @@ emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
static void
__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8,
- u8 byte, bool equal, u16 addr, u8 defer)
+ u8 byte, bool equal, u16 addr, u8 defer, bool src_lmextn)
{
u16 addr_lo, addr_hi;
u64 insn;
@@ -357,32 +219,34 @@ __emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8,
FIELD_PREP(OP_BB_EQ, equal) |
FIELD_PREP(OP_BB_DEFBR, defer) |
FIELD_PREP(OP_BB_ADDR_LO, addr_lo) |
- FIELD_PREP(OP_BB_ADDR_HI, addr_hi);
+ FIELD_PREP(OP_BB_ADDR_HI, addr_hi) |
+ FIELD_PREP(OP_BB_SRC_LMEXTN, src_lmextn);
nfp_prog_push(nfp_prog, insn);
}
static void
emit_br_byte_neq(struct nfp_prog *nfp_prog,
- u32 dst, u8 imm, u8 byte, u16 addr, u8 defer)
+ swreg src, u8 imm, u8 byte, u16 addr, u8 defer)
{
struct nfp_insn_re_regs reg;
int err;
- err = swreg_to_restricted(reg_none(), dst, reg_imm(imm), &reg, true);
+ err = swreg_to_restricted(reg_none(), src, reg_imm(imm), &reg, true);
if (err) {
nfp_prog->error = err;
return;
}
__emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr,
- defer);
+ defer, reg.src_lmextn);
}
static void
__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
enum immed_width width, bool invert,
- enum immed_shift shift, bool wr_both)
+ enum immed_shift shift, bool wr_both,
+ bool dst_lmextn, bool src_lmextn)
{
u64 insn;
@@ -393,19 +257,21 @@ __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
FIELD_PREP(OP_IMMED_WIDTH, width) |
FIELD_PREP(OP_IMMED_INV, invert) |
FIELD_PREP(OP_IMMED_SHIFT, shift) |
- FIELD_PREP(OP_IMMED_WR_AB, wr_both);
+ FIELD_PREP(OP_IMMED_WR_AB, wr_both) |
+ FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) |
+ FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn);
nfp_prog_push(nfp_prog, insn);
}
static void
-emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm,
+emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
enum immed_width width, bool invert, enum immed_shift shift)
{
struct nfp_insn_ur_regs reg;
int err;
- if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) {
+ if (swreg_type(dst) == NN_REG_IMM) {
nfp_prog->error = -EFAULT;
return;
}
@@ -417,13 +283,15 @@ emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm,
}
__emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width,
- invert, shift, reg.wr_both);
+ invert, shift, reg.wr_both,
+ reg.dst_lmextn, reg.src_lmextn);
}
static void
__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
enum shf_sc sc, u8 shift,
- u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both)
+ u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both,
+ bool dst_lmextn, bool src_lmextn)
{
u64 insn;
@@ -445,14 +313,16 @@ __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
FIELD_PREP(OP_SHF_SHIFT, shift) |
FIELD_PREP(OP_SHF_OP, op) |
FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
- FIELD_PREP(OP_SHF_WR_AB, wr_both);
+ FIELD_PREP(OP_SHF_WR_AB, wr_both) |
+ FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) |
+ FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn);
nfp_prog_push(nfp_prog, insn);
}
static void
-emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg,
- enum shf_sc sc, u8 shift)
+emit_shf(struct nfp_prog *nfp_prog, swreg dst,
+ swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift)
{
struct nfp_insn_re_regs reg;
int err;
@@ -464,12 +334,14 @@ emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg,
}
__emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
- reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both);
+ reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both,
+ reg.dst_lmextn, reg.src_lmextn);
}
static void
__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
- u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both)
+ u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
+ bool dst_lmextn, bool src_lmextn)
{
u64 insn;
@@ -480,13 +352,16 @@ __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
FIELD_PREP(OP_ALU_SW, swap) |
FIELD_PREP(OP_ALU_OP, op) |
FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
- FIELD_PREP(OP_ALU_WR_AB, wr_both);
+ FIELD_PREP(OP_ALU_WR_AB, wr_both) |
+ FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) |
+ FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn);
nfp_prog_push(nfp_prog, insn);
}
static void
-emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg)
+emit_alu(struct nfp_prog *nfp_prog, swreg dst,
+ swreg lreg, enum alu_op op, swreg rreg)
{
struct nfp_insn_ur_regs reg;
int err;
@@ -498,13 +373,15 @@ emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg)
}
__emit_alu(nfp_prog, reg.dst, reg.dst_ab,
- reg.areg, op, reg.breg, reg.swap, reg.wr_both);
+ reg.areg, op, reg.breg, reg.swap, reg.wr_both,
+ reg.dst_lmextn, reg.src_lmextn);
}
static void
__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
- bool zero, bool swap, bool wr_both)
+ bool zero, bool swap, bool wr_both,
+ bool dst_lmextn, bool src_lmextn)
{
u64 insn;
@@ -517,33 +394,42 @@ __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
FIELD_PREP(OP_LDF_ZF, zero) |
FIELD_PREP(OP_LDF_BMASK, bmask) |
FIELD_PREP(OP_LDF_SHF, shift) |
- FIELD_PREP(OP_LDF_WR_AB, wr_both);
+ FIELD_PREP(OP_LDF_WR_AB, wr_both) |
+ FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) |
+ FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn);
nfp_prog_push(nfp_prog, insn);
}
static void
-emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift,
- u32 dst, u8 bmask, u32 src, bool zero)
+emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
+ enum shf_sc sc, u8 shift, bool zero)
{
struct nfp_insn_re_regs reg;
int err;
- err = swreg_to_restricted(reg_none(), dst, src, &reg, true);
+ /* Note: ld_field is special as it uses one of the src regs as dst */
+ err = swreg_to_restricted(dst, dst, src, &reg, true);
if (err) {
nfp_prog->error = err;
return;
}
__emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
- reg.i8, zero, reg.swap, reg.wr_both);
+ reg.i8, zero, reg.swap, reg.wr_both,
+ reg.dst_lmextn, reg.src_lmextn);
}
static void
-emit_ld_field(struct nfp_prog *nfp_prog, u32 dst, u8 bmask, u32 src,
+emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
enum shf_sc sc, u8 shift)
{
- emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false);
+ emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false);
+}
+
+static void emit_nop(struct nfp_prog *nfp_prog)
+{
+ __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
}
/* --- Wrappers --- */
@@ -565,7 +451,7 @@ static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
return true;
}
-static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm)
+static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
{
enum immed_shift shift;
u16 val;
@@ -586,7 +472,7 @@ static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm)
* If the @imm is small enough encode it directly in operand and return
* otherwise load @imm to a spare register and return its encoding.
*/
-static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
+static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
{
if (FIELD_FIT(UR_REG_IMM_MAX, imm))
return reg_imm(imm);
@@ -599,7 +485,7 @@ static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
* If the @imm is small enough encode it directly in operand and return
* otherwise load @imm to a spare register and return its encoding.
*/
-static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg)
+static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
{
if (FIELD_FIT(RE_REG_IMM_MAX, imm))
return reg_imm(imm);
@@ -618,78 +504,134 @@ wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask,
FIELD_PREP(OP_BR_SPECIAL, special);
}
+static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src)
+{
+ emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src);
+}
+
static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
{
- emit_alu(nfp_prog, reg_both(dst), reg_none(), ALU_OP_NONE, reg_b(src));
+ wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
}
static int
-construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset,
- u16 src, bool src_valid, u8 size)
+data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
{
unsigned int i;
u16 shift, sz;
- u32 tmp_reg;
/* We load the value from the address indicated in @offset and then
* shift out the data we don't need. Note: this is big endian!
*/
- sz = size < 4 ? 4 : size;
+ sz = max(size, 4);
shift = size < 4 ? 4 - size : 0;
- if (src_valid) {
- /* Calculate the true offset (src_reg + imm) */
- tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
- emit_alu(nfp_prog, imm_both(nfp_prog),
- reg_a(src), ALU_OP_ADD, tmp_reg);
- /* Check packet length (size guaranteed to fit b/c it's u8) */
- emit_alu(nfp_prog, imm_a(nfp_prog),
- imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
- emit_alu(nfp_prog, reg_none(),
- NFP_BPF_ABI_LEN, ALU_OP_SUB, imm_a(nfp_prog));
- wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
- /* Load data */
- emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
- pkt_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true);
- } else {
- /* Check packet length */
- tmp_reg = ur_load_imm_any(nfp_prog, offset + size,
- imm_a(nfp_prog));
- emit_alu(nfp_prog, reg_none(),
- NFP_BPF_ABI_LEN, ALU_OP_SUB, tmp_reg);
- wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
- /* Load data */
- tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
- emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
- pkt_reg(nfp_prog), tmp_reg, sz - 1, true);
- }
+ emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
+ pptr_reg(nfp_prog), offset, sz - 1, true);
i = 0;
if (shift)
- emit_shf(nfp_prog, reg_both(0), reg_none(), SHF_OP_NONE,
+ emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE,
reg_xfer(0), SHF_SC_R_SHF, shift * 8);
else
for (; i * 4 < size; i++)
- emit_alu(nfp_prog, reg_both(i),
- reg_none(), ALU_OP_NONE, reg_xfer(i));
+ wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
+
+ if (i < 2)
+ wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
+
+ return 0;
+}
+
+static int
+data_ld_host_order(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
+ u8 dst_gpr, int size)
+{
+ unsigned int i;
+ u8 mask, sz;
+
+ /* We load the value from the address indicated in @offset and then
+ * mask out the data we don't need. Note: this is little endian!
+ */
+ sz = max(size, 4);
+ mask = size < 4 ? GENMASK(size - 1, 0) : 0;
+
+ emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0,
+ reg_a(src_gpr), offset, sz / 4 - 1, true);
+
+ i = 0;
+ if (mask)
+ emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask,
+ reg_xfer(0), SHF_SC_NONE, 0, true);
+ else
+ for (; i * 4 < size; i++)
+ wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
if (i < 2)
- wrp_immed(nfp_prog, reg_both(1), 0);
+ wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
return 0;
}
+static int
+construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
+{
+ swreg tmp_reg;
+
+ /* Calculate the true offset (src_reg + imm) */
+ tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
+ emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg);
+
+ /* Check packet length (size guaranteed to fit b/c it's u8) */
+ emit_alu(nfp_prog, imm_a(nfp_prog),
+ imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
+ emit_alu(nfp_prog, reg_none(),
+ plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog));
+ wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
+
+ /* Load data */
+ return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
+}
+
static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
{
- return construct_data_ind_ld(nfp_prog, offset, 0, false, size);
+ swreg tmp_reg;
+
+ /* Check packet length */
+ tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog));
+ emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
+ wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT);
+
+ /* Load data */
+ tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
+ return data_ld(nfp_prog, tmp_reg, 0, size);
+}
+
+static int
+data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
+ u8 src_gpr, u8 size)
+{
+ unsigned int i;
+
+ for (i = 0; i * 4 < size; i++)
+ wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
+
+ emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
+ reg_a(dst_gpr), offset, size - 1, true);
+
+ return 0;
}
-static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src)
+static int
+data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
+ u64 imm, u8 size)
{
- emit_alu(nfp_prog, NFP_BPF_ABI_MARK,
- reg_none(), ALU_OP_NONE, reg_b(src));
- emit_alu(nfp_prog, NFP_BPF_ABI_FLAGS,
- NFP_BPF_ABI_FLAGS, ALU_OP_OR, reg_imm(NFP_BPF_ABI_FLAG_MARK));
+ wrp_immed(nfp_prog, reg_xfer(0), imm);
+ if (size == 8)
+ wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
+
+ emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
+ reg_a(dst_gpr), offset, size - 1, true);
return 0;
}
@@ -697,7 +639,7 @@ static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src)
static void
wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
{
- u32 tmp_reg;
+ swreg tmp_reg;
if (alu_op == ALU_OP_AND) {
if (!imm)
@@ -815,7 +757,7 @@ wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
const struct bpf_insn *insn = &meta->insn;
u64 imm = insn->imm; /* sign extend */
u8 reg = insn->dst_reg * 2;
- u32 tmp_reg;
+ swreg tmp_reg;
if (insn->off < 0) /* TODO */
return -EOPNOTSUPP;
@@ -844,7 +786,10 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
enum br_mask br_mask, bool swap)
{
const struct bpf_insn *insn = &meta->insn;
- u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2;
+ u8 areg, breg;
+
+ areg = insn->dst_reg * 2;
+ breg = insn->src_reg * 2;
if (insn->off < 0) /* TODO */
return -EOPNOTSUPP;
@@ -863,6 +808,14 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
return 0;
}
+static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
+{
+ emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in,
+ SHF_SC_R_ROT, 8);
+ emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out),
+ SHF_SC_R_ROT, 16);
+}
+
/* --- Callbacks --- */
static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
@@ -967,12 +920,24 @@ static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
-
- if (insn->imm != 32)
- return 1; /* TODO */
-
- wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->dst_reg * 2);
- wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), 0);
+ u8 dst = insn->dst_reg * 2;
+
+ if (insn->imm < 32) {
+ emit_shf(nfp_prog, reg_both(dst + 1),
+ reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
+ SHF_SC_R_DSHF, 32 - insn->imm);
+ emit_shf(nfp_prog, reg_both(dst),
+ reg_none(), SHF_OP_NONE, reg_b(dst),
+ SHF_SC_L_SHF, insn->imm);
+ } else if (insn->imm == 32) {
+ wrp_reg_mov(nfp_prog, dst + 1, dst);
+ wrp_immed(nfp_prog, reg_both(dst), 0);
+ } else if (insn->imm > 32) {
+ emit_shf(nfp_prog, reg_both(dst + 1),
+ reg_none(), SHF_OP_NONE, reg_b(dst),
+ SHF_SC_L_SHF, insn->imm - 32);
+ wrp_immed(nfp_prog, reg_both(dst), 0);
+ }
return 0;
}
@@ -980,12 +945,24 @@ static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
-
- if (insn->imm != 32)
- return 1; /* TODO */
-
- wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->dst_reg * 2 + 1);
- wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
+ u8 dst = insn->dst_reg * 2;
+
+ if (insn->imm < 32) {
+ emit_shf(nfp_prog, reg_both(dst),
+ reg_a(dst + 1), SHF_OP_NONE, reg_b(dst),
+ SHF_SC_R_DSHF, insn->imm);
+ emit_shf(nfp_prog, reg_both(dst + 1),
+ reg_none(), SHF_OP_NONE, reg_b(dst + 1),
+ SHF_SC_R_SHF, insn->imm);
+ } else if (insn->imm == 32) {
+ wrp_reg_mov(nfp_prog, dst, dst + 1);
+ wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+ } else if (insn->imm > 32) {
+ emit_shf(nfp_prog, reg_both(dst),
+ reg_none(), SHF_OP_NONE, reg_b(dst + 1),
+ SHF_SC_R_SHF, insn->imm - 32);
+ wrp_immed(nfp_prog, reg_both(dst + 1), 0);
+ }
return 0;
}
@@ -1075,6 +1052,35 @@ static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
return 0;
}
+static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ const struct bpf_insn *insn = &meta->insn;
+ u8 gpr = insn->dst_reg * 2;
+
+ switch (insn->imm) {
+ case 16:
+ emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr),
+ SHF_SC_R_ROT, 8);
+ emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr),
+ SHF_SC_R_SHF, 16);
+
+ wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
+ break;
+ case 32:
+ wrp_end32(nfp_prog, reg_a(gpr), gpr);
+ wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
+ break;
+ case 64:
+ wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1));
+
+ wrp_end32(nfp_prog, reg_a(gpr), gpr + 1);
+ wrp_end32(nfp_prog, imm_a(nfp_prog), gpr);
+ break;
+ }
+
+ return 0;
+}
+
static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1),
@@ -1111,82 +1117,209 @@ static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return construct_data_ind_ld(nfp_prog, meta->insn.imm,
- meta->insn.src_reg * 2, true, 1);
+ meta->insn.src_reg * 2, 1);
}
static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return construct_data_ind_ld(nfp_prog, meta->insn.imm,
- meta->insn.src_reg * 2, true, 2);
+ meta->insn.src_reg * 2, 2);
}
static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
return construct_data_ind_ld(nfp_prog, meta->insn.imm,
- meta->insn.src_reg * 2, true, 4);
+ meta->insn.src_reg * 2, 4);
}
-static int mem_ldx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ u8 size)
{
- if (meta->insn.off == offsetof(struct sk_buff, len))
- emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2),
- reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN);
- else
+ swreg dst = reg_both(meta->insn.dst_reg * 2);
+
+ switch (meta->insn.off) {
+ case offsetof(struct sk_buff, len):
+ if (size != FIELD_SIZEOF(struct sk_buff, len))
+ return -EOPNOTSUPP;
+ wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
+ break;
+ case offsetof(struct sk_buff, data):
+ if (size != sizeof(void *))
+ return -EOPNOTSUPP;
+ wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
+ break;
+ case offsetof(struct sk_buff, cb) +
+ offsetof(struct bpf_skb_data_end, data_end):
+ if (size != sizeof(void *))
+ return -EOPNOTSUPP;
+ emit_alu(nfp_prog, dst,
+ plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
+ break;
+ default:
return -EOPNOTSUPP;
+ }
+
+ wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
return 0;
}
-static int mem_ldx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ u8 size)
{
- u32 dst = reg_both(meta->insn.dst_reg * 2);
+ swreg dst = reg_both(meta->insn.dst_reg * 2);
- if (meta->insn.off != offsetof(struct xdp_md, data) &&
- meta->insn.off != offsetof(struct xdp_md, data_end))
+ if (size != sizeof(void *))
+ return -EINVAL;
+
+ switch (meta->insn.off) {
+ case offsetof(struct xdp_buff, data):
+ wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
+ break;
+ case offsetof(struct xdp_buff, data_end):
+ emit_alu(nfp_prog, dst,
+ plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
+ break;
+ default:
return -EOPNOTSUPP;
+ }
- emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT);
+ wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
- if (meta->insn.off == offsetof(struct xdp_md, data))
- return 0;
+ return 0;
+}
- emit_alu(nfp_prog, dst, dst, ALU_OP_ADD, NFP_BPF_ABI_LEN);
+static int
+mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ unsigned int size)
+{
+ swreg tmp_reg;
- return 0;
+ tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
+
+ return data_ld_host_order(nfp_prog, meta->insn.src_reg * 2, tmp_reg,
+ meta->insn.dst_reg * 2, size);
+}
+
+static int
+mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ unsigned int size)
+{
+ if (meta->ptr.type == PTR_TO_CTX) {
+ if (nfp_prog->act == NN_ACT_XDP)
+ return mem_ldx_xdp(nfp_prog, meta, size);
+ else
+ return mem_ldx_skb(nfp_prog, meta, size);
+ }
+
+ if (meta->ptr.type == PTR_TO_PACKET)
+ return mem_ldx_data(nfp_prog, meta, size);
+
+ return -EOPNOTSUPP;
+}
+
+static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return mem_ldx(nfp_prog, meta, 1);
+}
+
+static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return mem_ldx(nfp_prog, meta, 2);
}
static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- int ret;
+ return mem_ldx(nfp_prog, meta, 4);
+}
- if (nfp_prog->act == NN_ACT_XDP)
- ret = mem_ldx4_xdp(nfp_prog, meta);
- else
- ret = mem_ldx4_skb(nfp_prog, meta);
+static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return mem_ldx(nfp_prog, meta, 8);
+}
- wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+static int
+mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ unsigned int size)
+{
+ u64 imm = meta->insn.imm; /* sign extend */
+ swreg off_reg;
- return ret;
+ off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
+
+ return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
+ imm, size);
}
-static int mem_stx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ unsigned int size)
{
- if (meta->insn.off == offsetof(struct sk_buff, mark))
- return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2);
+ if (meta->ptr.type == PTR_TO_PACKET)
+ return mem_st_data(nfp_prog, meta, size);
return -EOPNOTSUPP;
}
-static int mem_stx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return mem_st(nfp_prog, meta, 1);
+}
+
+static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
+ return mem_st(nfp_prog, meta, 2);
+}
+
+static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return mem_st(nfp_prog, meta, 4);
+}
+
+static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return mem_st(nfp_prog, meta, 8);
+}
+
+static int
+mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ unsigned int size)
+{
+ swreg off_reg;
+
+ off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
+
+ return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
+ meta->insn.src_reg * 2, size);
+}
+
+static int
+mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ unsigned int size)
+{
+ if (meta->ptr.type == PTR_TO_PACKET)
+ return mem_stx_data(nfp_prog, meta, size);
+
return -EOPNOTSUPP;
}
+static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return mem_stx(nfp_prog, meta, 1);
+}
+
+static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return mem_stx(nfp_prog, meta, 2);
+}
+
static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- if (nfp_prog->act == NN_ACT_XDP)
- return mem_stx4_xdp(nfp_prog, meta);
- return mem_stx4_skb(nfp_prog, meta);
+ return mem_stx(nfp_prog, meta, 4);
+}
+
+static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ return mem_stx(nfp_prog, meta, 8);
}
static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -1202,8 +1335,10 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u64 imm = insn->imm; /* sign extend */
- u32 or1 = reg_a(insn->dst_reg * 2), or2 = reg_b(insn->dst_reg * 2 + 1);
- u32 tmp_reg;
+ swreg or1, or2, tmp_reg;
+
+ or1 = reg_a(insn->dst_reg * 2);
+ or2 = reg_b(insn->dst_reg * 2 + 1);
if (insn->off < 0) /* TODO */
return -EOPNOTSUPP;
@@ -1230,29 +1365,29 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
+ return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
}
static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
+ return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
}
static int jlt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- return wrp_cmp_imm(nfp_prog, meta, BR_BHS, false);
+ return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false);
}
static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- return wrp_cmp_imm(nfp_prog, meta, BR_BLO, true);
+ return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true);
}
static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u64 imm = insn->imm; /* sign extend */
- u32 tmp_reg;
+ swreg tmp_reg;
if (insn->off < 0) /* TODO */
return -EOPNOTSUPP;
@@ -1283,7 +1418,7 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
const struct bpf_insn *insn = &meta->insn;
u64 imm = insn->imm; /* sign extend */
- u32 tmp_reg;
+ swreg tmp_reg;
if (insn->off < 0) /* TODO */
return -EOPNOTSUPP;
@@ -1292,6 +1427,7 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
emit_br(nfp_prog, BR_BNE, insn->off, 0);
+ return 0;
}
tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
@@ -1327,22 +1463,22 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
+ return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
}
static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
+ return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
}
static int jlt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- return wrp_cmp_reg(nfp_prog, meta, BR_BHS, false);
+ return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false);
}
static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
{
- return wrp_cmp_reg(nfp_prog, meta, BR_BLO, true);
+ return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true);
}
static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
@@ -1390,6 +1526,7 @@ static const instr_cb_t instr_cb[256] = {
[BPF_ALU | BPF_SUB | BPF_X] = sub_reg,
[BPF_ALU | BPF_SUB | BPF_K] = sub_imm,
[BPF_ALU | BPF_LSH | BPF_K] = shl_imm,
+ [BPF_ALU | BPF_END | BPF_X] = end_reg32,
[BPF_LD | BPF_IMM | BPF_DW] = imm_ld8,
[BPF_LD | BPF_ABS | BPF_B] = data_ld1,
[BPF_LD | BPF_ABS | BPF_H] = data_ld2,
@@ -1397,8 +1534,18 @@ static const instr_cb_t instr_cb[256] = {
[BPF_LD | BPF_IND | BPF_B] = data_ind_ld1,
[BPF_LD | BPF_IND | BPF_H] = data_ind_ld2,
[BPF_LD | BPF_IND | BPF_W] = data_ind_ld4,
+ [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1,
+ [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2,
[BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4,
+ [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8,
+ [BPF_STX | BPF_MEM | BPF_B] = mem_stx1,
+ [BPF_STX | BPF_MEM | BPF_H] = mem_stx2,
[BPF_STX | BPF_MEM | BPF_W] = mem_stx4,
+ [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8,
+ [BPF_ST | BPF_MEM | BPF_B] = mem_st1,
+ [BPF_ST | BPF_MEM | BPF_H] = mem_st2,
+ [BPF_ST | BPF_MEM | BPF_W] = mem_st4,
+ [BPF_ST | BPF_MEM | BPF_DW] = mem_st8,
[BPF_JMP | BPF_JA | BPF_K] = jump,
[BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm,
[BPF_JMP | BPF_JGT | BPF_K] = jgt_imm,
@@ -1510,8 +1657,9 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
static void nfp_intro(struct nfp_prog *nfp_prog)
{
- emit_alu(nfp_prog, pkt_reg(nfp_prog),
- reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT);
+ wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
+ emit_alu(nfp_prog, plen_reg(nfp_prog),
+ plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
}
static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog)
@@ -1534,8 +1682,7 @@ static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog)
* ife + tx 0x24 -> redir, count as stat1
*/
emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2);
- emit_alu(nfp_prog, reg_a(0),
- reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+ wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1);
@@ -1562,8 +1709,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
- emit_alu(nfp_prog, reg_a(0),
- reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+ wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
/* Target for normal exits */
@@ -1572,8 +1718,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
/* if R0 > 7 jump to abort */
emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
- emit_alu(nfp_prog, reg_a(0),
- reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+ wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
wrp_immed(nfp_prog, reg_b(2), 0x41221211);
wrp_immed(nfp_prog, reg_b(3), 0x41001211);
@@ -1610,8 +1755,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
- emit_alu(nfp_prog, reg_a(0),
- reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+ wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
/* Target for normal exits */
@@ -1632,8 +1776,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
- emit_alu(nfp_prog, reg_a(0),
- reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+ wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
}
@@ -1656,7 +1799,7 @@ static void nfp_outro(struct nfp_prog *nfp_prog)
static int nfp_translate(struct nfp_prog *nfp_prog)
{
struct nfp_insn_meta *meta;
- int err;
+ int i, err;
nfp_intro(nfp_prog);
if (nfp_prog->error)
@@ -1688,6 +1831,11 @@ static int nfp_translate(struct nfp_prog *nfp_prog)
if (nfp_prog->error)
return nfp_prog->error;
+ for (i = 0; i < NFP_USTORE_PREFETCH_WINDOW; i++)
+ emit_nop(nfp_prog);
+ if (nfp_prog->error)
+ return nfp_prog->error;
+
return nfp_fixup_branches(nfp_prog);
}
@@ -1737,38 +1885,6 @@ static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
}
}
-/* Try to rename registers so that program uses only low ones */
-static int nfp_bpf_opt_reg_rename(struct nfp_prog *nfp_prog)
-{
- bool reg_used[MAX_BPF_REG] = {};
- u8 tgt_reg[MAX_BPF_REG] = {};
- struct nfp_insn_meta *meta;
- unsigned int i, j;
-
- list_for_each_entry(meta, &nfp_prog->insns, l) {
- if (meta->skip)
- continue;
-
- reg_used[meta->insn.src_reg] = true;
- reg_used[meta->insn.dst_reg] = true;
- }
-
- for (i = 0, j = 0; i < ARRAY_SIZE(tgt_reg); i++) {
- if (!reg_used[i])
- continue;
-
- tgt_reg[i] = j++;
- }
- nfp_prog->num_regs = j;
-
- list_for_each_entry(meta, &nfp_prog->insns, l) {
- meta->insn.src_reg = tgt_reg[meta->insn.src_reg];
- meta->insn.dst_reg = tgt_reg[meta->insn.dst_reg];
- }
-
- return 0;
-}
-
/* Remove masking after load since our load guarantees this is not needed */
static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
{
@@ -1845,20 +1961,33 @@ static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
{
- int ret;
-
nfp_bpf_opt_reg_init(nfp_prog);
- ret = nfp_bpf_opt_reg_rename(nfp_prog);
- if (ret)
- return ret;
-
nfp_bpf_opt_ld_mask(nfp_prog);
nfp_bpf_opt_ld_shift(nfp_prog);
return 0;
}
+static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore)
+{
+ int i;
+
+ for (i = 0; i < nfp_prog->prog_len; i++) {
+ int err;
+
+ err = nfp_ustore_check_valid_no_ecc(nfp_prog->prog[i]);
+ if (err)
+ return err;
+
+ nfp_prog->prog[i] = nfp_ustore_calc_ecc_insn(nfp_prog->prog[i]);
+
+ ustore[i] = cpu_to_le64(nfp_prog->prog[i]);
+ }
+
+ return 0;
+}
+
/**
* nfp_bpf_jit() - translate BPF code into NFP assembly
* @filter: kernel BPF filter struct
@@ -1899,10 +2028,8 @@ nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem,
if (ret)
goto out;
- if (nfp_prog->num_regs <= 7)
- nfp_prog->regs_per_thread = 16;
- else
- nfp_prog->regs_per_thread = 32;
+ nfp_prog->num_regs = MAX_BPF_REG;
+ nfp_prog->regs_per_thread = 32;
nfp_prog->prog = prog_mem;
nfp_prog->__prog_alloc_len = prog_sz;
@@ -1912,10 +2039,13 @@ nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem,
pr_err("Translation failed with error %d (translated: %u)\n",
ret, nfp_prog->n_translated);
ret = -EINVAL;
+ goto out;
}
+ ret = nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)prog_mem);
+
res->n_instr = nfp_prog->prog_len;
- res->dense_mode = nfp_prog->num_regs <= 7;
+ res->dense_mode = false;
out:
nfp_prog_free(nfp_prog);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index be2cf10a2cd7..fa0ac90ed956 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -42,9 +42,11 @@
static bool nfp_net_ebpf_capable(struct nfp_net *nn)
{
+#ifdef __LITTLE_ENDIAN
if (nn->cap & NFP_NET_CFG_CTRL_BPF &&
nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI)
return true;
+#endif
return false;
}
@@ -89,14 +91,6 @@ nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
struct nfp_net_bpf_priv *priv;
int ret;
- /* Limit to single port, otherwise it's just a NIC */
- if (id > 0) {
- nfp_warn(app->cpp,
- "BPF NIC doesn't support more than one port right now\n");
- nn->port = nfp_port_alloc(app, NFP_PORT_INVALID, nn->dp.netdev);
- return PTR_ERR_OR_ZERO(nn->port);
- }
-
priv = kmalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
@@ -120,22 +114,56 @@ static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
kfree(nn->app_priv);
}
-static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev,
- enum tc_setup_type type, void *type_data)
+static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
{
struct tc_cls_bpf_offload *cls_bpf = type_data;
+ struct nfp_net *nn = cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSBPF:
+ if (!nfp_net_ebpf_capable(nn) ||
+ cls_bpf->common.protocol != htons(ETH_P_ALL) ||
+ cls_bpf->common.chain_index)
+ return -EOPNOTSUPP;
+ return nfp_net_bpf_offload(nn, cls_bpf);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int nfp_bpf_setup_tc_block(struct net_device *netdev,
+ struct tc_block_offload *f)
+{
struct nfp_net *nn = netdev_priv(netdev);
- if (type != TC_SETUP_CLSBPF || !nfp_net_ebpf_capable(nn) ||
- !is_classid_clsact_ingress(cls_bpf->common.classid) ||
- cls_bpf->common.protocol != htons(ETH_P_ALL) ||
- cls_bpf->common.chain_index)
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
return -EOPNOTSUPP;
- if (nn->dp.bpf_offload_xdp)
- return -EBUSY;
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block,
+ nfp_bpf_setup_tc_block_cb,
+ nn, nn);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block,
+ nfp_bpf_setup_tc_block_cb,
+ nn);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
- return nfp_net_bpf_offload(nn, cls_bpf);
+static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev,
+ enum tc_setup_type type, void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return nfp_bpf_setup_tc_block(netdev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
}
static bool nfp_bpf_tc_busy(struct nfp_app *app, struct nfp_net *nn)
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 4051e943f363..d77e88a45409 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -36,9 +36,11 @@
#include <linux/bitfield.h>
#include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
#include <linux/list.h>
#include <linux/types.h>
+#include "../nfp_asm.h"
#include "../nfp_net.h"
/* For branch fixup logic use up-most byte of branch instruction as scratch
@@ -53,9 +55,13 @@ enum br_special {
};
enum static_regs {
- STATIC_REG_PKT = 1,
-#define REG_PKT_BANK ALU_DST_A
- STATIC_REG_IMM = 2, /* Bank AB */
+ STATIC_REG_IMM = 21, /* Bank AB */
+ STATIC_REG_PKT_LEN = 22, /* Bank B */
+};
+
+enum pkt_vec {
+ PKT_VEC_PKT_LEN = 0,
+ PKT_VEC_PKT_PTR = 2,
};
enum nfp_bpf_action_type {
@@ -65,39 +71,17 @@ enum nfp_bpf_action_type {
NN_ACT_XDP,
};
-/* Software register representation, hardware encoding in asm.h */
-#define NN_REG_TYPE GENMASK(31, 24)
-#define NN_REG_VAL GENMASK(7, 0)
-
-enum nfp_bpf_reg_type {
- NN_REG_GPR_A = BIT(0),
- NN_REG_GPR_B = BIT(1),
- NN_REG_NNR = BIT(2),
- NN_REG_XFER = BIT(3),
- NN_REG_IMM = BIT(4),
- NN_REG_NONE = BIT(5),
-};
-
-#define NN_REG_GPR_BOTH (NN_REG_GPR_A | NN_REG_GPR_B)
-
-#define reg_both(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH))
-#define reg_a(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A))
-#define reg_b(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B))
-#define reg_nnr(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR))
-#define reg_xfer(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER))
-#define reg_imm(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM))
-#define reg_none() (FIELD_PREP(NN_REG_TYPE, NN_REG_NONE))
+#define pv_len(np) reg_lm(1, PKT_VEC_PKT_LEN)
+#define pv_ctm_ptr(np) reg_lm(1, PKT_VEC_PKT_PTR)
-#define pkt_reg(np) reg_a((np)->regs_per_thread - STATIC_REG_PKT)
-#define imm_a(np) reg_a((np)->regs_per_thread - STATIC_REG_IMM)
-#define imm_b(np) reg_b((np)->regs_per_thread - STATIC_REG_IMM)
-#define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM)
+#define plen_reg(np) reg_b(STATIC_REG_PKT_LEN)
+#define pptr_reg(np) pv_ctm_ptr(np)
+#define imm_a(np) reg_a(STATIC_REG_IMM)
+#define imm_b(np) reg_b(STATIC_REG_IMM)
+#define imm_both(np) reg_both(STATIC_REG_IMM)
-#define NFP_BPF_ABI_FLAGS reg_nnr(0)
+#define NFP_BPF_ABI_FLAGS reg_imm(0)
#define NFP_BPF_ABI_FLAG_MARK 1
-#define NFP_BPF_ABI_MARK reg_nnr(1)
-#define NFP_BPF_ABI_PKT reg_nnr(2)
-#define NFP_BPF_ABI_LEN reg_nnr(3)
struct nfp_prog;
struct nfp_insn_meta;
@@ -113,6 +97,7 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
/**
* struct nfp_insn_meta - BPF instruction wrapper
* @insn: BPF instruction
+ * @ptr: pointer type for memory operations
* @off: index of first generated machine instruction (in nfp_prog.prog)
* @n: eBPF instruction number
* @skip: skip this instruction (optimized out)
@@ -121,6 +106,7 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
*/
struct nfp_insn_meta {
struct bpf_insn insn;
+ struct bpf_reg_state ptr;
unsigned int off;
unsigned short n;
bool skip;
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
index 5b783a91b115..e361c0e3b788 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c
@@ -112,12 +112,19 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
}
static int
-nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog,
- const struct bpf_verifier_env *env, u8 reg)
+nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ const struct bpf_verifier_env *env, u8 reg)
{
- if (env->cur_state.regs[reg].type != PTR_TO_CTX)
+ if (env->cur_state.regs[reg].type != PTR_TO_CTX &&
+ env->cur_state.regs[reg].type != PTR_TO_PACKET)
return -EINVAL;
+ if (meta->ptr.type != NOT_INIT &&
+ meta->ptr.type != env->cur_state.regs[reg].type)
+ return -EINVAL;
+
+ meta->ptr = env->cur_state.regs[reg];
+
return 0;
}
@@ -145,11 +152,11 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx)
return nfp_bpf_check_exit(priv->prog, env);
if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM))
- return nfp_bpf_check_ctx_ptr(priv->prog, env,
- meta->insn.src_reg);
+ return nfp_bpf_check_ptr(priv->prog, meta, env,
+ meta->insn.src_reg);
if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM))
- return nfp_bpf_check_ctx_ptr(priv->prog, env,
- meta->insn.dst_reg);
+ return nfp_bpf_check_ptr(priv->prog, meta, env,
+ meta->insn.dst_reg);
return 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index db9750695dc7..0a5fc9f8545f 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -36,7 +36,9 @@
#include <net/switchdev.h>
#include <net/tc_act/tc_gact.h>
#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_pedit.h>
#include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_tunnel_key.h>
#include "cmsg.h"
#include "main.h"
@@ -45,13 +47,9 @@
static void nfp_fl_pop_vlan(struct nfp_fl_pop_vlan *pop_vlan)
{
size_t act_size = sizeof(struct nfp_fl_pop_vlan);
- u16 tmp_pop_vlan_op;
- tmp_pop_vlan_op =
- FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
- FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_POP_VLAN);
-
- pop_vlan->a_op = cpu_to_be16(tmp_pop_vlan_op);
+ pop_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_POP_VLAN;
+ pop_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ;
pop_vlan->reserved = 0;
}
@@ -62,14 +60,9 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
size_t act_size = sizeof(struct nfp_fl_push_vlan);
struct tcf_vlan *vlan = to_vlan(action);
u16 tmp_push_vlan_tci;
- u16 tmp_push_vlan_op;
-
- tmp_push_vlan_op =
- FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
- FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_PUSH_VLAN);
- push_vlan->a_op = cpu_to_be16(tmp_push_vlan_op);
- /* Set action push vlan parameters. */
+ push_vlan->head.jump_id = NFP_FL_ACTION_OPCODE_PUSH_VLAN;
+ push_vlan->head.len_lw = act_size >> NFP_FL_LW_SIZ;
push_vlan->reserved = 0;
push_vlan->vlan_tpid = tcf_vlan_push_proto(action);
@@ -80,42 +73,357 @@ nfp_fl_push_vlan(struct nfp_fl_push_vlan *push_vlan,
push_vlan->vlan_tci = cpu_to_be16(tmp_push_vlan_tci);
}
+static bool nfp_fl_netdev_is_tunnel_type(struct net_device *out_dev,
+ enum nfp_flower_tun_type tun_type)
+{
+ if (!out_dev->rtnl_link_ops)
+ return false;
+
+ if (!strcmp(out_dev->rtnl_link_ops->kind, "vxlan"))
+ return tun_type == NFP_FL_TUNNEL_VXLAN;
+
+ return false;
+}
+
static int
nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
struct nfp_fl_payload *nfp_flow, bool last,
- struct net_device *in_dev)
+ struct net_device *in_dev, enum nfp_flower_tun_type tun_type,
+ int *tun_out_cnt)
{
size_t act_size = sizeof(struct nfp_fl_output);
struct net_device *out_dev;
- u16 tmp_output_op;
+ u16 tmp_flags;
int ifindex;
- /* Set action opcode to output action. */
- tmp_output_op =
- FIELD_PREP(NFP_FL_ACT_LEN_LW, act_size >> NFP_FL_LW_SIZ) |
- FIELD_PREP(NFP_FL_ACT_JMP_ID, NFP_FL_ACTION_OPCODE_OUTPUT);
-
- output->a_op = cpu_to_be16(tmp_output_op);
-
- /* Set action output parameters. */
- output->flags = cpu_to_be16(last ? NFP_FL_OUT_FLAGS_LAST : 0);
+ output->head.jump_id = NFP_FL_ACTION_OPCODE_OUTPUT;
+ output->head.len_lw = act_size >> NFP_FL_LW_SIZ;
ifindex = tcf_mirred_ifindex(action);
out_dev = __dev_get_by_index(dev_net(in_dev), ifindex);
if (!out_dev)
return -EOPNOTSUPP;
- /* Only offload egress ports are on the same device as the ingress
- * port.
+ tmp_flags = last ? NFP_FL_OUT_FLAGS_LAST : 0;
+
+ if (tun_type) {
+ /* Verify the egress netdev matches the tunnel type. */
+ if (!nfp_fl_netdev_is_tunnel_type(out_dev, tun_type))
+ return -EOPNOTSUPP;
+
+ if (*tun_out_cnt)
+ return -EOPNOTSUPP;
+ (*tun_out_cnt)++;
+
+ output->flags = cpu_to_be16(tmp_flags |
+ NFP_FL_OUT_FLAGS_USE_TUN);
+ output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
+ } else {
+ /* Set action output parameters. */
+ output->flags = cpu_to_be16(tmp_flags);
+
+ /* Only offload if egress ports are on the same device as the
+ * ingress port.
+ */
+ if (!switchdev_port_same_parent_id(in_dev, out_dev))
+ return -EOPNOTSUPP;
+
+ output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
+ if (!output->port)
+ return -EOPNOTSUPP;
+ }
+ nfp_flow->meta.shortcut = output->port;
+
+ return 0;
+}
+
+static bool nfp_fl_supported_tun_port(const struct tc_action *action)
+{
+ struct ip_tunnel_info *tun = tcf_tunnel_info(action);
+
+ return tun->key.tp_dst == htons(NFP_FL_VXLAN_PORT);
+}
+
+static struct nfp_fl_pre_tunnel *nfp_fl_pre_tunnel(char *act_data, int act_len)
+{
+ size_t act_size = sizeof(struct nfp_fl_pre_tunnel);
+ struct nfp_fl_pre_tunnel *pre_tun_act;
+
+ /* Pre_tunnel action must be first on action list.
+ * If other actions already exist they need pushed forward.
*/
- if (!switchdev_port_same_parent_id(in_dev, out_dev))
+ if (act_len)
+ memmove(act_data + act_size, act_data, act_len);
+
+ pre_tun_act = (struct nfp_fl_pre_tunnel *)act_data;
+
+ memset(pre_tun_act, 0, act_size);
+
+ pre_tun_act->head.jump_id = NFP_FL_ACTION_OPCODE_PRE_TUNNEL;
+ pre_tun_act->head.len_lw = act_size >> NFP_FL_LW_SIZ;
+
+ return pre_tun_act;
+}
+
+static int
+nfp_fl_set_vxlan(struct nfp_fl_set_vxlan *set_vxlan,
+ const struct tc_action *action,
+ struct nfp_fl_pre_tunnel *pre_tun)
+{
+ struct ip_tunnel_info *vxlan = tcf_tunnel_info(action);
+ size_t act_size = sizeof(struct nfp_fl_set_vxlan);
+ u32 tmp_set_vxlan_type_index = 0;
+ /* Currently support one pre-tunnel so index is always 0. */
+ int pretun_idx = 0;
+
+ if (vxlan->options_len) {
+ /* Do not support options e.g. vxlan gpe. */
return -EOPNOTSUPP;
+ }
+
+ set_vxlan->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL;
+ set_vxlan->head.len_lw = act_size >> NFP_FL_LW_SIZ;
+
+ /* Set tunnel type and pre-tunnel index. */
+ tmp_set_vxlan_type_index |=
+ FIELD_PREP(NFP_FL_IPV4_TUNNEL_TYPE, NFP_FL_TUNNEL_VXLAN) |
+ FIELD_PREP(NFP_FL_IPV4_PRE_TUN_INDEX, pretun_idx);
+
+ set_vxlan->tun_type_index = cpu_to_be32(tmp_set_vxlan_type_index);
+
+ set_vxlan->tun_id = vxlan->key.tun_id;
+ set_vxlan->tun_flags = vxlan->key.tun_flags;
+ set_vxlan->ipv4_ttl = vxlan->key.ttl;
+ set_vxlan->ipv4_tos = vxlan->key.tos;
+
+ /* Complete pre_tunnel action. */
+ pre_tun->ipv4_dst = vxlan->key.u.ipv4.dst;
+
+ return 0;
+}
- output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
- if (!output->port)
+static void nfp_fl_set_helper32(u32 value, u32 mask, u8 *p_exact, u8 *p_mask)
+{
+ u32 oldvalue = get_unaligned((u32 *)p_exact);
+ u32 oldmask = get_unaligned((u32 *)p_mask);
+
+ value &= mask;
+ value |= oldvalue & ~mask;
+
+ put_unaligned(oldmask | mask, (u32 *)p_mask);
+ put_unaligned(value, (u32 *)p_exact);
+}
+
+static int
+nfp_fl_set_eth(const struct tc_action *action, int idx, u32 off,
+ struct nfp_fl_set_eth *set_eth)
+{
+ u32 exact, mask;
+
+ if (off + 4 > ETH_ALEN * 2)
return -EOPNOTSUPP;
- nfp_flow->meta.shortcut = output->port;
+ mask = ~tcf_pedit_mask(action, idx);
+ exact = tcf_pedit_val(action, idx);
+
+ if (exact & ~mask)
+ return -EOPNOTSUPP;
+
+ nfp_fl_set_helper32(exact, mask, &set_eth->eth_addr_val[off],
+ &set_eth->eth_addr_mask[off]);
+
+ set_eth->reserved = cpu_to_be16(0);
+ set_eth->head.jump_id = NFP_FL_ACTION_OPCODE_SET_ETHERNET;
+ set_eth->head.len_lw = sizeof(*set_eth) >> NFP_FL_LW_SIZ;
+
+ return 0;
+}
+
+static int
+nfp_fl_set_ip4(const struct tc_action *action, int idx, u32 off,
+ struct nfp_fl_set_ip4_addrs *set_ip_addr)
+{
+ __be32 exact, mask;
+
+ /* We are expecting tcf_pedit to return a big endian value */
+ mask = (__force __be32)~tcf_pedit_mask(action, idx);
+ exact = (__force __be32)tcf_pedit_val(action, idx);
+
+ if (exact & ~mask)
+ return -EOPNOTSUPP;
+
+ switch (off) {
+ case offsetof(struct iphdr, daddr):
+ set_ip_addr->ipv4_dst_mask = mask;
+ set_ip_addr->ipv4_dst = exact;
+ break;
+ case offsetof(struct iphdr, saddr):
+ set_ip_addr->ipv4_src_mask = mask;
+ set_ip_addr->ipv4_src = exact;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ set_ip_addr->reserved = cpu_to_be16(0);
+ set_ip_addr->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS;
+ set_ip_addr->head.len_lw = sizeof(*set_ip_addr) >> NFP_FL_LW_SIZ;
+
+ return 0;
+}
+
+static void
+nfp_fl_set_ip6_helper(int opcode_tag, int idx, __be32 exact, __be32 mask,
+ struct nfp_fl_set_ipv6_addr *ip6)
+{
+ ip6->ipv6[idx % 4].mask = mask;
+ ip6->ipv6[idx % 4].exact = exact;
+
+ ip6->reserved = cpu_to_be16(0);
+ ip6->head.jump_id = opcode_tag;
+ ip6->head.len_lw = sizeof(*ip6) >> NFP_FL_LW_SIZ;
+}
+
+static int
+nfp_fl_set_ip6(const struct tc_action *action, int idx, u32 off,
+ struct nfp_fl_set_ipv6_addr *ip_dst,
+ struct nfp_fl_set_ipv6_addr *ip_src)
+{
+ __be32 exact, mask;
+
+ /* We are expecting tcf_pedit to return a big endian value */
+ mask = (__force __be32)~tcf_pedit_mask(action, idx);
+ exact = (__force __be32)tcf_pedit_val(action, idx);
+
+ if (exact & ~mask)
+ return -EOPNOTSUPP;
+
+ if (off < offsetof(struct ipv6hdr, saddr))
+ return -EOPNOTSUPP;
+ else if (off < offsetof(struct ipv6hdr, daddr))
+ nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_SRC, idx,
+ exact, mask, ip_src);
+ else if (off < offsetof(struct ipv6hdr, daddr) +
+ sizeof(struct in6_addr))
+ nfp_fl_set_ip6_helper(NFP_FL_ACTION_OPCODE_SET_IPV6_DST, idx,
+ exact, mask, ip_dst);
+ else
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+static int
+nfp_fl_set_tport(const struct tc_action *action, int idx, u32 off,
+ struct nfp_fl_set_tport *set_tport, int opcode)
+{
+ u32 exact, mask;
+
+ if (off)
+ return -EOPNOTSUPP;
+
+ mask = ~tcf_pedit_mask(action, idx);
+ exact = tcf_pedit_val(action, idx);
+
+ if (exact & ~mask)
+ return -EOPNOTSUPP;
+
+ nfp_fl_set_helper32(exact, mask, set_tport->tp_port_val,
+ set_tport->tp_port_mask);
+
+ set_tport->reserved = cpu_to_be16(0);
+ set_tport->head.jump_id = opcode;
+ set_tport->head.len_lw = sizeof(*set_tport) >> NFP_FL_LW_SIZ;
+
+ return 0;
+}
+
+static int
+nfp_fl_pedit(const struct tc_action *action, char *nfp_action, int *a_len)
+{
+ struct nfp_fl_set_ipv6_addr set_ip6_dst, set_ip6_src;
+ struct nfp_fl_set_ip4_addrs set_ip_addr;
+ struct nfp_fl_set_tport set_tport;
+ struct nfp_fl_set_eth set_eth;
+ enum pedit_header_type htype;
+ int idx, nkeys, err;
+ size_t act_size;
+ u32 offset, cmd;
+
+ memset(&set_ip6_dst, 0, sizeof(set_ip6_dst));
+ memset(&set_ip6_src, 0, sizeof(set_ip6_src));
+ memset(&set_ip_addr, 0, sizeof(set_ip_addr));
+ memset(&set_tport, 0, sizeof(set_tport));
+ memset(&set_eth, 0, sizeof(set_eth));
+ nkeys = tcf_pedit_nkeys(action);
+
+ for (idx = 0; idx < nkeys; idx++) {
+ cmd = tcf_pedit_cmd(action, idx);
+ htype = tcf_pedit_htype(action, idx);
+ offset = tcf_pedit_offset(action, idx);
+
+ if (cmd != TCA_PEDIT_KEY_EX_CMD_SET)
+ return -EOPNOTSUPP;
+
+ switch (htype) {
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
+ err = nfp_fl_set_eth(action, idx, offset, &set_eth);
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
+ err = nfp_fl_set_ip4(action, idx, offset, &set_ip_addr);
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
+ err = nfp_fl_set_ip6(action, idx, offset, &set_ip6_dst,
+ &set_ip6_src);
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
+ err = nfp_fl_set_tport(action, idx, offset, &set_tport,
+ NFP_FL_ACTION_OPCODE_SET_TCP);
+ break;
+ case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
+ err = nfp_fl_set_tport(action, idx, offset, &set_tport,
+ NFP_FL_ACTION_OPCODE_SET_UDP);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ if (err)
+ return err;
+ }
+
+ if (set_eth.head.len_lw) {
+ act_size = sizeof(set_eth);
+ memcpy(nfp_action, &set_eth, act_size);
+ *a_len += act_size;
+ } else if (set_ip_addr.head.len_lw) {
+ act_size = sizeof(set_ip_addr);
+ memcpy(nfp_action, &set_ip_addr, act_size);
+ *a_len += act_size;
+ } else if (set_ip6_dst.head.len_lw && set_ip6_src.head.len_lw) {
+ /* TC compiles set src and dst IPv6 address as a single action,
+ * the hardware requires this to be 2 separate actions.
+ */
+ act_size = sizeof(set_ip6_src);
+ memcpy(nfp_action, &set_ip6_src, act_size);
+ *a_len += act_size;
+
+ act_size = sizeof(set_ip6_dst);
+ memcpy(&nfp_action[sizeof(set_ip6_src)], &set_ip6_dst,
+ act_size);
+ *a_len += act_size;
+ } else if (set_ip6_dst.head.len_lw) {
+ act_size = sizeof(set_ip6_dst);
+ memcpy(nfp_action, &set_ip6_dst, act_size);
+ *a_len += act_size;
+ } else if (set_ip6_src.head.len_lw) {
+ act_size = sizeof(set_ip6_src);
+ memcpy(nfp_action, &set_ip6_src, act_size);
+ *a_len += act_size;
+ } else if (set_tport.head.len_lw) {
+ act_size = sizeof(set_tport);
+ memcpy(nfp_action, &set_tport, act_size);
+ *a_len += act_size;
+ }
return 0;
}
@@ -123,8 +431,11 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
static int
nfp_flower_loop_action(const struct tc_action *a,
struct nfp_fl_payload *nfp_fl, int *a_len,
- struct net_device *netdev)
+ struct net_device *netdev,
+ enum nfp_flower_tun_type *tun_type, int *tun_out_cnt)
{
+ struct nfp_fl_pre_tunnel *pre_tun;
+ struct nfp_fl_set_vxlan *s_vxl;
struct nfp_fl_push_vlan *psh_v;
struct nfp_fl_pop_vlan *pop_v;
struct nfp_fl_output *output;
@@ -137,7 +448,8 @@ nfp_flower_loop_action(const struct tc_action *a,
return -EOPNOTSUPP;
output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
- err = nfp_fl_output(output, a, nfp_fl, true, netdev);
+ err = nfp_fl_output(output, a, nfp_fl, true, netdev, *tun_type,
+ tun_out_cnt);
if (err)
return err;
@@ -147,7 +459,8 @@ nfp_flower_loop_action(const struct tc_action *a,
return -EOPNOTSUPP;
output = (struct nfp_fl_output *)&nfp_fl->action_data[*a_len];
- err = nfp_fl_output(output, a, nfp_fl, false, netdev);
+ err = nfp_fl_output(output, a, nfp_fl, false, netdev, *tun_type,
+ tun_out_cnt);
if (err)
return err;
@@ -170,6 +483,32 @@ nfp_flower_loop_action(const struct tc_action *a,
nfp_fl_push_vlan(psh_v, a);
*a_len += sizeof(struct nfp_fl_push_vlan);
+ } else if (is_tcf_tunnel_set(a) && nfp_fl_supported_tun_port(a)) {
+ /* Pre-tunnel action is required for tunnel encap.
+ * This checks for next hop entries on NFP.
+ * If none, the packet falls back before applying other actions.
+ */
+ if (*a_len + sizeof(struct nfp_fl_pre_tunnel) +
+ sizeof(struct nfp_fl_set_vxlan) > NFP_FL_MAX_A_SIZ)
+ return -EOPNOTSUPP;
+
+ *tun_type = NFP_FL_TUNNEL_VXLAN;
+ pre_tun = nfp_fl_pre_tunnel(nfp_fl->action_data, *a_len);
+ nfp_fl->meta.shortcut = cpu_to_be32(NFP_FL_SC_ACT_NULL);
+ *a_len += sizeof(struct nfp_fl_pre_tunnel);
+
+ s_vxl = (struct nfp_fl_set_vxlan *)&nfp_fl->action_data[*a_len];
+ err = nfp_fl_set_vxlan(s_vxl, a, pre_tun);
+ if (err)
+ return err;
+
+ *a_len += sizeof(struct nfp_fl_set_vxlan);
+ } else if (is_tcf_tunnel_release(a)) {
+ /* Tunnel decap is handled by default so accept action. */
+ return 0;
+ } else if (is_tcf_pedit(a)) {
+ if (nfp_fl_pedit(a, &nfp_fl->action_data[*a_len], a_len))
+ return -EOPNOTSUPP;
} else {
/* Currently we do not handle any other actions. */
return -EOPNOTSUPP;
@@ -182,18 +521,22 @@ int nfp_flower_compile_action(struct tc_cls_flower_offload *flow,
struct net_device *netdev,
struct nfp_fl_payload *nfp_flow)
{
- int act_len, act_cnt, err;
+ int act_len, act_cnt, err, tun_out_cnt;
+ enum nfp_flower_tun_type tun_type;
const struct tc_action *a;
LIST_HEAD(actions);
memset(nfp_flow->action_data, 0, NFP_FL_MAX_A_SIZ);
nfp_flow->meta.act_len = 0;
+ tun_type = NFP_FL_TUNNEL_NONE;
act_len = 0;
act_cnt = 0;
+ tun_out_cnt = 0;
tcf_exts_to_list(flow->exts, &actions);
list_for_each_entry(a, &actions, list) {
- err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev);
+ err = nfp_flower_loop_action(a, nfp_flow, &act_len, netdev,
+ &tun_type, &tun_out_cnt);
if (err)
return err;
act_cnt++;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index c3ca05d10fe1..6b71c719deba 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -38,17 +38,10 @@
#include <net/dst_metadata.h>
#include "main.h"
-#include "../nfpcore/nfp_cpp.h"
#include "../nfp_net.h"
#include "../nfp_net_repr.h"
#include "./cmsg.h"
-#define nfp_flower_cmsg_warn(app, fmt, args...) \
- do { \
- if (net_ratelimit()) \
- nfp_warn((app)->cpp, fmt, ## args); \
- } while (0)
-
static struct nfp_flower_cmsg_hdr *
nfp_flower_cmsg_get_hdr(struct sk_buff *skb)
{
@@ -188,6 +181,15 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
case NFP_FLOWER_CMSG_TYPE_FLOW_STATS:
nfp_flower_rx_flow_stats(app, skb);
break;
+ case NFP_FLOWER_CMSG_TYPE_NO_NEIGH:
+ nfp_tunnel_request_route(app, skb);
+ break;
+ case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS:
+ nfp_tunnel_keep_alive(app, skb);
+ break;
+ case NFP_FLOWER_CMSG_TYPE_TUN_NEIGH:
+ /* Acks from the NFP that the route is added - ignore. */
+ break;
default:
nfp_flower_cmsg_warn(app, "Cannot handle invalid repr control type %u\n",
type);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index a2ec60344236..64e87f8e7089 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -39,6 +39,7 @@
#include <linux/types.h>
#include "../nfp_app.h"
+#include "../nfpcore/nfp_cpp.h"
#define NFP_FLOWER_LAYER_META BIT(0)
#define NFP_FLOWER_LAYER_PORT BIT(1)
@@ -56,6 +57,11 @@
#define NFP_FLOWER_MASK_VLAN_CFI BIT(12)
#define NFP_FLOWER_MASK_VLAN_VID GENMASK(11, 0)
+#define NFP_FLOWER_MASK_MPLS_LB GENMASK(31, 12)
+#define NFP_FLOWER_MASK_MPLS_TC GENMASK(11, 9)
+#define NFP_FLOWER_MASK_MPLS_BOS BIT(8)
+#define NFP_FLOWER_MASK_MPLS_Q BIT(0)
+
#define NFP_FL_SC_ACT_DROP 0x80000000
#define NFP_FL_SC_ACT_USER 0x7D000000
#define NFP_FL_SC_ACT_POPV 0x6A000000
@@ -67,13 +73,18 @@
#define NFP_FL_LW_SIZ 2
/* Action opcodes */
-#define NFP_FL_ACTION_OPCODE_OUTPUT 0
-#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1
-#define NFP_FL_ACTION_OPCODE_POP_VLAN 2
-#define NFP_FL_ACTION_OPCODE_NUM 32
-
-#define NFP_FL_ACT_JMP_ID GENMASK(15, 8)
-#define NFP_FL_ACT_LEN_LW GENMASK(7, 0)
+#define NFP_FL_ACTION_OPCODE_OUTPUT 0
+#define NFP_FL_ACTION_OPCODE_PUSH_VLAN 1
+#define NFP_FL_ACTION_OPCODE_POP_VLAN 2
+#define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL 6
+#define NFP_FL_ACTION_OPCODE_SET_ETHERNET 7
+#define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS 9
+#define NFP_FL_ACTION_OPCODE_SET_IPV6_SRC 11
+#define NFP_FL_ACTION_OPCODE_SET_IPV6_DST 12
+#define NFP_FL_ACTION_OPCODE_SET_UDP 14
+#define NFP_FL_ACTION_OPCODE_SET_TCP 15
+#define NFP_FL_ACTION_OPCODE_PRE_TUNNEL 17
+#define NFP_FL_ACTION_OPCODE_NUM 32
#define NFP_FL_OUT_FLAGS_LAST BIT(15)
#define NFP_FL_OUT_FLAGS_USE_TUN BIT(4)
@@ -83,21 +94,74 @@
#define NFP_FL_PUSH_VLAN_CFI BIT(12)
#define NFP_FL_PUSH_VLAN_VID GENMASK(11, 0)
+/* Tunnel ports */
+#define NFP_FL_PORT_TYPE_TUN 0x50000000
+#define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4)
+#define NFP_FL_IPV4_PRE_TUN_INDEX GENMASK(2, 0)
+
+#define nfp_flower_cmsg_warn(app, fmt, args...) \
+ do { \
+ if (net_ratelimit()) \
+ nfp_warn((app)->cpp, fmt, ## args); \
+ } while (0)
+
+enum nfp_flower_tun_type {
+ NFP_FL_TUNNEL_NONE = 0,
+ NFP_FL_TUNNEL_VXLAN = 2,
+};
+
+struct nfp_fl_act_head {
+ u8 jump_id;
+ u8 len_lw;
+};
+
+struct nfp_fl_set_eth {
+ struct nfp_fl_act_head head;
+ __be16 reserved;
+ u8 eth_addr_mask[ETH_ALEN * 2];
+ u8 eth_addr_val[ETH_ALEN * 2];
+};
+
+struct nfp_fl_set_ip4_addrs {
+ struct nfp_fl_act_head head;
+ __be16 reserved;
+ __be32 ipv4_src_mask;
+ __be32 ipv4_src;
+ __be32 ipv4_dst_mask;
+ __be32 ipv4_dst;
+};
+
+struct nfp_fl_set_ipv6_addr {
+ struct nfp_fl_act_head head;
+ __be16 reserved;
+ struct {
+ __be32 mask;
+ __be32 exact;
+ } ipv6[4];
+};
+
+struct nfp_fl_set_tport {
+ struct nfp_fl_act_head head;
+ __be16 reserved;
+ u8 tp_port_mask[4];
+ u8 tp_port_val[4];
+};
+
struct nfp_fl_output {
- __be16 a_op;
+ struct nfp_fl_act_head head;
__be16 flags;
__be32 port;
};
struct nfp_fl_push_vlan {
- __be16 a_op;
+ struct nfp_fl_act_head head;
__be16 reserved;
__be16 vlan_tpid;
__be16 vlan_tci;
};
struct nfp_fl_pop_vlan {
- __be16 a_op;
+ struct nfp_fl_act_head head;
__be16 reserved;
};
@@ -115,6 +179,25 @@ struct nfp_flower_meta_one {
u16 reserved;
};
+struct nfp_fl_pre_tunnel {
+ struct nfp_fl_act_head head;
+ __be16 reserved;
+ __be32 ipv4_dst;
+ /* reserved for use with IPv6 addresses */
+ __be32 extra[3];
+};
+
+struct nfp_fl_set_vxlan {
+ struct nfp_fl_act_head head;
+ __be16 reserved;
+ __be64 tun_id;
+ __be32 tun_type_index;
+ __be16 tun_flags;
+ u8 ipv4_ttl;
+ u8 ipv4_tos;
+ __be32 extra[2];
+} __packed;
+
/* Metadata with L2 (1W/4B)
* ----------------------------------------------------------------
* 3 2 1
@@ -230,6 +313,36 @@ struct nfp_flower_ipv6 {
struct in6_addr ipv6_dst;
};
+/* Flow Frame VXLAN --> Tunnel details (4W/16B)
+ * -----------------------------------------------------------------
+ * 3 2 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv4_addr_src |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv4_addr_dst |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | tun_flags | tos | ttl |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | gpe_flags | Reserved | Next Protocol |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | VNI | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_vxlan {
+ __be32 ip_src;
+ __be32 ip_dst;
+ __be16 tun_flags;
+ u8 tos;
+ u8 ttl;
+ u8 gpe_flags;
+ u8 reserved[2];
+ u8 nxt_proto;
+ __be32 tun_id;
+};
+
+#define NFP_FL_TUN_VNI_OFFSET 8
+
/* The base header for a control message packet.
* Defines an 8-bit version, and an 8-bit type, padded
* to a 32-bit word. Rest of the packet is type-specific.
@@ -249,6 +362,11 @@ enum nfp_flower_cmsg_type_port {
NFP_FLOWER_CMSG_TYPE_FLOW_DEL = 2,
NFP_FLOWER_CMSG_TYPE_MAC_REPR = 7,
NFP_FLOWER_CMSG_TYPE_PORT_MOD = 8,
+ NFP_FLOWER_CMSG_TYPE_NO_NEIGH = 10,
+ NFP_FLOWER_CMSG_TYPE_TUN_MAC = 11,
+ NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS = 12,
+ NFP_FLOWER_CMSG_TYPE_TUN_NEIGH = 13,
+ NFP_FLOWER_CMSG_TYPE_TUN_IPS = 14,
NFP_FLOWER_CMSG_TYPE_FLOW_STATS = 15,
NFP_FLOWER_CMSG_TYPE_PORT_ECHO = 16,
NFP_FLOWER_CMSG_TYPE_MAX = 32,
@@ -282,6 +400,7 @@ enum nfp_flower_cmsg_port_type {
NFP_FLOWER_CMSG_PORT_TYPE_UNSPEC = 0x0,
NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT = 0x1,
NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT = 0x2,
+ NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT = 0x3,
};
enum nfp_flower_cmsg_port_vnic_type {
@@ -323,6 +442,11 @@ static inline void *nfp_flower_cmsg_get_data(struct sk_buff *skb)
return (unsigned char *)skb->data + NFP_FLOWER_CMSG_HLEN;
}
+static inline int nfp_flower_cmsg_get_data_len(struct sk_buff *skb)
+{
+ return skb->len - NFP_FLOWER_CMSG_HLEN;
+}
+
struct sk_buff *
nfp_flower_cmsg_mac_repr_start(struct nfp_app *app, unsigned int num_ports);
void
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 91fe03617106..e46e7c60d491 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -436,6 +436,16 @@ static void nfp_flower_clean(struct nfp_app *app)
app->priv = NULL;
}
+static int nfp_flower_start(struct nfp_app *app)
+{
+ return nfp_tunnel_config_start(app);
+}
+
+static void nfp_flower_stop(struct nfp_app *app)
+{
+ nfp_tunnel_config_stop(app);
+}
+
const struct nfp_app_type app_flower = {
.id = NFP_APP_FLOWER_NIC,
.name = "flower",
@@ -453,6 +463,9 @@ const struct nfp_app_type app_flower = {
.repr_open = nfp_flower_repr_netdev_open,
.repr_stop = nfp_flower_repr_netdev_stop,
+ .start = nfp_flower_start,
+ .stop = nfp_flower_stop,
+
.ctrl_msg_rx = nfp_flower_cmsg_rx,
.sriov_enable = nfp_flower_sriov_enable,
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index c20dd00a1cae..12c319a219d8 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -58,6 +58,8 @@ struct nfp_app;
#define NFP_FL_MASK_REUSE_TIME_NS 40000
#define NFP_FL_MASK_ID_LOCATION 1
+#define NFP_FL_VXLAN_PORT 4789
+
struct nfp_fl_mask_id {
struct circ_buf mask_id_free_list;
struct timespec64 *last_used;
@@ -82,6 +84,18 @@ struct nfp_fl_stats_id {
* @flow_table: Hash table used to store flower rules
* @cmsg_work: Workqueue for control messages processing
* @cmsg_skbs: List of skbs for control message processing
+ * @nfp_mac_off_list: List of MAC addresses to offload
+ * @nfp_mac_index_list: List of unique 8-bit indexes for non NFP netdevs
+ * @nfp_ipv4_off_list: List of IPv4 addresses to offload
+ * @nfp_neigh_off_list: List of neighbour offloads
+ * @nfp_mac_off_lock: Lock for the MAC address list
+ * @nfp_mac_index_lock: Lock for the MAC index list
+ * @nfp_ipv4_off_lock: Lock for the IPv4 address list
+ * @nfp_neigh_off_lock: Lock for the neighbour address list
+ * @nfp_mac_off_ids: IDA to manage id assignment for offloaded macs
+ * @nfp_mac_off_count: Number of MACs in address list
+ * @nfp_tun_mac_nb: Notifier to monitor link state
+ * @nfp_tun_neigh_nb: Notifier to monitor neighbour state
*/
struct nfp_flower_priv {
struct nfp_app *app;
@@ -94,6 +108,18 @@ struct nfp_flower_priv {
DECLARE_HASHTABLE(flow_table, NFP_FLOWER_HASH_BITS);
struct work_struct cmsg_work;
struct sk_buff_head cmsg_skbs;
+ struct list_head nfp_mac_off_list;
+ struct list_head nfp_mac_index_list;
+ struct list_head nfp_ipv4_off_list;
+ struct list_head nfp_neigh_off_list;
+ struct mutex nfp_mac_off_lock;
+ struct mutex nfp_mac_index_lock;
+ struct mutex nfp_ipv4_off_lock;
+ struct mutex nfp_neigh_off_lock;
+ struct ida nfp_mac_off_ids;
+ int nfp_mac_off_count;
+ struct notifier_block nfp_tun_mac_nb;
+ struct notifier_block nfp_tun_neigh_nb;
};
struct nfp_fl_key_ls {
@@ -126,6 +152,7 @@ struct nfp_fl_payload {
struct rcu_head rcu;
spinlock_t lock; /* lock stats */
struct nfp_fl_stats stats;
+ __be32 nfp_tun_ipv4_addr;
char *unmasked_data;
char *mask_data;
char *action_data;
@@ -163,4 +190,12 @@ nfp_flower_remove_fl_table(struct nfp_app *app, unsigned long tc_flower_cookie);
void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb);
+int nfp_tunnel_config_start(struct nfp_app *app);
+void nfp_tunnel_config_stop(struct nfp_app *app);
+void nfp_tunnel_write_macs(struct nfp_app *app);
+void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4);
+void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4);
+void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb);
+void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb);
+
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index d25b5038c3a2..60614d4f0e22 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -77,14 +77,17 @@ nfp_flower_compile_meta(struct nfp_flower_meta_one *frame, u8 key_type)
static int
nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
- bool mask_version)
+ bool mask_version, enum nfp_flower_tun_type tun_type)
{
if (mask_version) {
frame->in_port = cpu_to_be32(~0);
return 0;
}
- frame->in_port = cpu_to_be32(cmsg_port);
+ if (tun_type)
+ frame->in_port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
+ else
+ frame->in_port = cpu_to_be32(cmsg_port);
return 0;
}
@@ -108,8 +111,21 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame,
ether_addr_copy(frame->mac_src, &addr->src[0]);
}
- if (mask_version)
- frame->mpls_lse = cpu_to_be32(~0);
+ if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_MPLS)) {
+ struct flow_dissector_key_mpls *mpls;
+ u32 t_mpls;
+
+ mpls = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_MPLS,
+ target);
+
+ t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, mpls->mpls_label) |
+ FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, mpls->mpls_tc) |
+ FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, mpls->mpls_bos) |
+ NFP_FLOWER_MASK_MPLS_Q;
+
+ frame->mpls_lse = cpu_to_be32(t_mpls);
+ }
}
static void
@@ -140,7 +156,6 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
struct flow_dissector_key_ipv4_addrs *addr;
struct flow_dissector_key_basic *basic;
- /* Wildcard TOS/TTL for now. */
memset(frame, 0, sizeof(struct nfp_flower_ipv4));
if (dissector_uses_key(flow->dissector,
@@ -158,6 +173,16 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *frame,
target);
frame->proto = basic->ip_proto;
}
+
+ if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) {
+ struct flow_dissector_key_ip *flow_ip;
+
+ flow_ip = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_IP,
+ target);
+ frame->tos = flow_ip->tos;
+ frame->ttl = flow_ip->ttl;
+ }
}
static void
@@ -169,7 +194,6 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
struct flow_dissector_key_ipv6_addrs *addr;
struct flow_dissector_key_basic *basic;
- /* Wildcard LABEL/TOS/TTL for now. */
memset(frame, 0, sizeof(struct nfp_flower_ipv6));
if (dissector_uses_key(flow->dissector,
@@ -187,6 +211,51 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *frame,
target);
frame->proto = basic->ip_proto;
}
+
+ if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP)) {
+ struct flow_dissector_key_ip *flow_ip;
+
+ flow_ip = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_IP,
+ target);
+ frame->tos = flow_ip->tos;
+ frame->ttl = flow_ip->ttl;
+ }
+}
+
+static void
+nfp_flower_compile_vxlan(struct nfp_flower_vxlan *frame,
+ struct tc_cls_flower_offload *flow,
+ bool mask_version, __be32 *tun_dst)
+{
+ struct fl_flow_key *target = mask_version ? flow->mask : flow->key;
+ struct flow_dissector_key_ipv4_addrs *vxlan_ips;
+ struct flow_dissector_key_keyid *vni;
+
+ /* Wildcard TOS/TTL/GPE_FLAGS/NXT_PROTO for now. */
+ memset(frame, 0, sizeof(struct nfp_flower_vxlan));
+
+ if (dissector_uses_key(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ u32 temp_vni;
+
+ vni = skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ target);
+ temp_vni = be32_to_cpu(vni->keyid) << NFP_FL_TUN_VNI_OFFSET;
+ frame->tun_id = cpu_to_be32(temp_vni);
+ }
+
+ if (dissector_uses_key(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+ vxlan_ips =
+ skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ target);
+ frame->ip_src = vxlan_ips->src;
+ frame->ip_dst = vxlan_ips->dst;
+ *tun_dst = vxlan_ips->dst;
+ }
}
int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
@@ -194,10 +263,16 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
struct net_device *netdev,
struct nfp_fl_payload *nfp_flow)
{
+ enum nfp_flower_tun_type tun_type = NFP_FL_TUNNEL_NONE;
+ __be32 tun_dst, tun_dst_mask = 0;
+ struct nfp_repr *netdev_repr;
int err;
u8 *ext;
u8 *msk;
+ if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN)
+ tun_type = NFP_FL_TUNNEL_VXLAN;
+
memset(nfp_flow->unmasked_data, 0, key_ls->key_size);
memset(nfp_flow->mask_data, 0, key_ls->key_size);
@@ -216,14 +291,14 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
/* Populate Exact Port data. */
err = nfp_flower_compile_port((struct nfp_flower_in_port *)ext,
nfp_repr_get_port_id(netdev),
- false);
+ false, tun_type);
if (err)
return err;
/* Populate Mask Port Data. */
err = nfp_flower_compile_port((struct nfp_flower_in_port *)msk,
nfp_repr_get_port_id(netdev),
- true);
+ true, tun_type);
if (err)
return err;
@@ -291,5 +366,28 @@ int nfp_flower_compile_flow_match(struct tc_cls_flower_offload *flow,
msk += sizeof(struct nfp_flower_ipv6);
}
+ if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN) {
+ /* Populate Exact VXLAN Data. */
+ nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)ext,
+ flow, false, &tun_dst);
+ /* Populate Mask VXLAN Data. */
+ nfp_flower_compile_vxlan((struct nfp_flower_vxlan *)msk,
+ flow, true, &tun_dst_mask);
+ ext += sizeof(struct nfp_flower_vxlan);
+ msk += sizeof(struct nfp_flower_vxlan);
+
+ /* Configure tunnel end point MAC. */
+ if (nfp_netdev_is_nfp_repr(netdev)) {
+ netdev_repr = netdev_priv(netdev);
+ nfp_tunnel_write_macs(netdev_repr->app);
+
+ /* Store the tunnel destination in the rule data.
+ * This must be present and be an exact match.
+ */
+ nfp_flow->nfp_tun_ipv4_addr = tun_dst;
+ nfp_tunnel_add_ipv4_off(netdev_repr->app, tun_dst);
+ }
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 3226ddc55f99..193520ef23f0 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -140,7 +140,7 @@ exit_rcu_unlock:
void nfp_flower_rx_flow_stats(struct nfp_app *app, struct sk_buff *skb)
{
- unsigned int msg_len = skb->len - NFP_FLOWER_CMSG_HLEN;
+ unsigned int msg_len = nfp_flower_cmsg_get_data_len(skb);
struct nfp_fl_stats_frame *stats_frame;
unsigned char *msg;
int i;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index a18b4d2b1d3e..c47753fdb55b 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -52,8 +52,26 @@
BIT(FLOW_DISSECTOR_KEY_PORTS) | \
BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | \
BIT(FLOW_DISSECTOR_KEY_VLAN) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | \
+ BIT(FLOW_DISSECTOR_KEY_MPLS) | \
BIT(FLOW_DISSECTOR_KEY_IP))
+#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR \
+ (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
+
+#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R \
+ (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))
+
static int
nfp_flower_xmit_flow(struct net_device *netdev,
struct nfp_fl_payload *nfp_flow, u8 mtype)
@@ -117,7 +135,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
{
struct flow_dissector_key_basic *mask_basic = NULL;
struct flow_dissector_key_basic *key_basic = NULL;
- struct flow_dissector_key_ip *mask_ip = NULL;
u32 key_layer_two;
u8 key_layer;
int key_size;
@@ -125,15 +142,58 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
if (flow->dissector->used_keys & ~NFP_FLOWER_WHITELIST_DISSECTOR)
return -EOPNOTSUPP;
+ /* If any tun dissector is used then the required set must be used. */
+ if (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR &&
+ (flow->dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
+ != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
+ return -EOPNOTSUPP;
+
+ key_layer_two = 0;
+ key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC;
+ key_size = sizeof(struct nfp_flower_meta_one) +
+ sizeof(struct nfp_flower_in_port) +
+ sizeof(struct nfp_flower_mac_mpls);
+
if (dissector_uses_key(flow->dissector,
FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+ struct flow_dissector_key_ipv4_addrs *mask_ipv4 = NULL;
+ struct flow_dissector_key_ports *mask_enc_ports = NULL;
+ struct flow_dissector_key_ports *enc_ports = NULL;
struct flow_dissector_key_control *mask_enc_ctl =
skb_flow_dissector_target(flow->dissector,
FLOW_DISSECTOR_KEY_ENC_CONTROL,
flow->mask);
- /* We are expecting a tunnel. For now we ignore offloading. */
- if (mask_enc_ctl->addr_type)
+ struct flow_dissector_key_control *enc_ctl =
+ skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ flow->key);
+ if (mask_enc_ctl->addr_type != 0xffff ||
+ enc_ctl->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS)
+ return -EOPNOTSUPP;
+
+ /* These fields are already verified as used. */
+ mask_ipv4 =
+ skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ flow->mask);
+ if (mask_ipv4->dst != cpu_to_be32(~0))
+ return -EOPNOTSUPP;
+
+ mask_enc_ports =
+ skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ flow->mask);
+ enc_ports =
+ skb_flow_dissector_target(flow->dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ flow->key);
+
+ if (mask_enc_ports->dst != cpu_to_be16(~0) ||
+ enc_ports->dst != htons(NFP_FL_VXLAN_PORT))
return -EOPNOTSUPP;
+
+ key_layer |= NFP_FLOWER_LAYER_VXLAN;
+ key_size += sizeof(struct nfp_flower_vxlan);
}
if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
@@ -146,34 +206,15 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
flow->key);
}
- if (dissector_uses_key(flow->dissector, FLOW_DISSECTOR_KEY_IP))
- mask_ip = skb_flow_dissector_target(flow->dissector,
- FLOW_DISSECTOR_KEY_IP,
- flow->mask);
-
- key_layer_two = 0;
- key_layer = NFP_FLOWER_LAYER_PORT | NFP_FLOWER_LAYER_MAC;
- key_size = sizeof(struct nfp_flower_meta_one) +
- sizeof(struct nfp_flower_in_port) +
- sizeof(struct nfp_flower_mac_mpls);
-
if (mask_basic && mask_basic->n_proto) {
/* Ethernet type is present in the key. */
switch (key_basic->n_proto) {
case cpu_to_be16(ETH_P_IP):
- if (mask_ip && mask_ip->tos)
- return -EOPNOTSUPP;
- if (mask_ip && mask_ip->ttl)
- return -EOPNOTSUPP;
key_layer |= NFP_FLOWER_LAYER_IPV4;
key_size += sizeof(struct nfp_flower_ipv4);
break;
case cpu_to_be16(ETH_P_IPV6):
- if (mask_ip && mask_ip->tos)
- return -EOPNOTSUPP;
- if (mask_ip && mask_ip->ttl)
- return -EOPNOTSUPP;
key_layer |= NFP_FLOWER_LAYER_IPV6;
key_size += sizeof(struct nfp_flower_ipv6);
break;
@@ -184,11 +225,6 @@ nfp_flower_calculate_key_layers(struct nfp_fl_key_ls *ret_key_ls,
case cpu_to_be16(ETH_P_ARP):
return -EOPNOTSUPP;
- /* Currently we do not offload MPLS. */
- case cpu_to_be16(ETH_P_MPLS_UC):
- case cpu_to_be16(ETH_P_MPLS_MC):
- return -EOPNOTSUPP;
-
/* Will be included in layer 2. */
case cpu_to_be16(ETH_P_8021Q):
break;
@@ -252,6 +288,7 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
if (!flow_pay->action_data)
goto err_free_mask;
+ flow_pay->nfp_tun_ipv4_addr = 0;
flow_pay->meta.flags = 0;
spin_lock_init(&flow_pay->lock);
@@ -361,6 +398,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
if (err)
goto err_free_flow;
+ if (nfp_flow->nfp_tun_ipv4_addr)
+ nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr);
+
err = nfp_flower_xmit_flow(netdev, nfp_flow,
NFP_FLOWER_CMSG_TYPE_FLOW_DEL);
if (err)
@@ -409,6 +449,10 @@ static int
nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
struct tc_cls_flower_offload *flower)
{
+ if (!eth_proto_is_802_3(flower->common.protocol) ||
+ flower->common.chain_index)
+ return -EOPNOTSUPP;
+
switch (flower->command) {
case TC_CLSFLOWER_REPLACE:
return nfp_flower_add_offload(app, netdev, flower);
@@ -421,16 +465,50 @@ nfp_flower_repr_offload(struct nfp_app *app, struct net_device *netdev,
return -EOPNOTSUPP;
}
-int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
- enum tc_setup_type type, void *type_data)
+static int nfp_flower_setup_tc_block_cb(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
+{
+ struct nfp_net *nn = cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSFLOWER:
+ return nfp_flower_repr_offload(nn->app, nn->port->netdev,
+ type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int nfp_flower_setup_tc_block(struct net_device *netdev,
+ struct tc_block_offload *f)
{
- struct tc_cls_flower_offload *cls_flower = type_data;
+ struct nfp_net *nn = netdev_priv(netdev);
+
+ if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ return -EOPNOTSUPP;
- if (type != TC_SETUP_CLSFLOWER ||
- !is_classid_clsact_ingress(cls_flower->common.classid) ||
- !eth_proto_is_802_3(cls_flower->common.protocol) ||
- cls_flower->common.chain_index)
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block,
+ nfp_flower_setup_tc_block_cb,
+ nn, nn);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block,
+ nfp_flower_setup_tc_block_cb,
+ nn);
+ return 0;
+ default:
return -EOPNOTSUPP;
+ }
+}
- return nfp_flower_repr_offload(app, netdev, cls_flower);
+int nfp_flower_setup_tc(struct nfp_app *app, struct net_device *netdev,
+ enum tc_setup_type type, void *type_data)
+{
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return nfp_flower_setup_tc_block(netdev, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
new file mode 100644
index 000000000000..c495f8f38506
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -0,0 +1,811 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <net/netevent.h>
+#include <linux/idr.h>
+#include <net/dst_metadata.h>
+#include <net/arp.h>
+
+#include "cmsg.h"
+#include "main.h"
+#include "../nfp_net_repr.h"
+#include "../nfp_net.h"
+
+#define NFP_FL_MAX_ROUTES 32
+
+/**
+ * struct nfp_tun_active_tuns - periodic message of active tunnels
+ * @seq: sequence number of the message
+ * @count: number of tunnels report in message
+ * @flags: options part of the request
+ * @ipv4: dest IPv4 address of active route
+ * @egress_port: port the encapsulated packet egressed
+ * @extra: reserved for future use
+ * @tun_info: tunnels that have sent traffic in reported period
+ */
+struct nfp_tun_active_tuns {
+ __be32 seq;
+ __be32 count;
+ __be32 flags;
+ struct route_ip_info {
+ __be32 ipv4;
+ __be32 egress_port;
+ __be32 extra[2];
+ } tun_info[];
+};
+
+/**
+ * struct nfp_tun_neigh - neighbour/route entry on the NFP
+ * @dst_ipv4: destination IPv4 address
+ * @src_ipv4: source IPv4 address
+ * @dst_addr: destination MAC address
+ * @src_addr: source MAC address
+ * @port_id: NFP port to output packet on - associated with source IPv4
+ */
+struct nfp_tun_neigh {
+ __be32 dst_ipv4;
+ __be32 src_ipv4;
+ u8 dst_addr[ETH_ALEN];
+ u8 src_addr[ETH_ALEN];
+ __be32 port_id;
+};
+
+/**
+ * struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup
+ * @ingress_port: ingress port of packet that signalled request
+ * @ipv4_addr: destination ipv4 address for route
+ * @reserved: reserved for future use
+ */
+struct nfp_tun_req_route_ipv4 {
+ __be32 ingress_port;
+ __be32 ipv4_addr;
+ __be32 reserved[2];
+};
+
+/**
+ * struct nfp_ipv4_route_entry - routes that are offloaded to the NFP
+ * @ipv4_addr: destination of route
+ * @list: list pointer
+ */
+struct nfp_ipv4_route_entry {
+ __be32 ipv4_addr;
+ struct list_head list;
+};
+
+#define NFP_FL_IPV4_ADDRS_MAX 32
+
+/**
+ * struct nfp_tun_ipv4_addr - set the IP address list on the NFP
+ * @count: number of IPs populated in the array
+ * @ipv4_addr: array of IPV4_ADDRS_MAX 32 bit IPv4 addresses
+ */
+struct nfp_tun_ipv4_addr {
+ __be32 count;
+ __be32 ipv4_addr[NFP_FL_IPV4_ADDRS_MAX];
+};
+
+/**
+ * struct nfp_ipv4_addr_entry - cached IPv4 addresses
+ * @ipv4_addr: IP address
+ * @ref_count: number of rules currently using this IP
+ * @list: list pointer
+ */
+struct nfp_ipv4_addr_entry {
+ __be32 ipv4_addr;
+ int ref_count;
+ struct list_head list;
+};
+
+/**
+ * struct nfp_tun_mac_addr - configure MAC address of tunnel EP on NFP
+ * @reserved: reserved for future use
+ * @count: number of MAC addresses in the message
+ * @index: index of MAC address in the lookup table
+ * @addr: interface MAC address
+ * @addresses: series of MACs to offload
+ */
+struct nfp_tun_mac_addr {
+ __be16 reserved;
+ __be16 count;
+ struct index_mac_addr {
+ __be16 index;
+ u8 addr[ETH_ALEN];
+ } addresses[];
+};
+
+/**
+ * struct nfp_tun_mac_offload_entry - list of MACs to offload
+ * @index: index of MAC address for offloading
+ * @addr: interface MAC address
+ * @list: list pointer
+ */
+struct nfp_tun_mac_offload_entry {
+ __be16 index;
+ u8 addr[ETH_ALEN];
+ struct list_head list;
+};
+
+#define NFP_MAX_MAC_INDEX 0xff
+
+/**
+ * struct nfp_tun_mac_non_nfp_idx - converts non NFP netdev ifindex to 8-bit id
+ * @ifindex: netdev ifindex of the device
+ * @index: index of netdevs mac on NFP
+ * @list: list pointer
+ */
+struct nfp_tun_mac_non_nfp_idx {
+ int ifindex;
+ u8 index;
+ struct list_head list;
+};
+
+void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb)
+{
+ struct nfp_tun_active_tuns *payload;
+ struct net_device *netdev;
+ int count, i, pay_len;
+ struct neighbour *n;
+ __be32 ipv4_addr;
+ u32 port;
+
+ payload = nfp_flower_cmsg_get_data(skb);
+ count = be32_to_cpu(payload->count);
+ if (count > NFP_FL_MAX_ROUTES) {
+ nfp_flower_cmsg_warn(app, "Tunnel keep-alive request exceeds max routes.\n");
+ return;
+ }
+
+ pay_len = nfp_flower_cmsg_get_data_len(skb);
+ if (pay_len != sizeof(struct nfp_tun_active_tuns) +
+ sizeof(struct route_ip_info) * count) {
+ nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n");
+ return;
+ }
+
+ for (i = 0; i < count; i++) {
+ ipv4_addr = payload->tun_info[i].ipv4;
+ port = be32_to_cpu(payload->tun_info[i].egress_port);
+ netdev = nfp_app_repr_get(app, port);
+ if (!netdev)
+ continue;
+
+ n = neigh_lookup(&arp_tbl, &ipv4_addr, netdev);
+ if (!n)
+ continue;
+
+ /* Update the used timestamp of neighbour */
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+ }
+}
+
+static bool nfp_tun_is_netdev_to_offload(struct net_device *netdev)
+{
+ if (!netdev->rtnl_link_ops)
+ return false;
+ if (!strcmp(netdev->rtnl_link_ops->kind, "openvswitch"))
+ return true;
+ if (!strcmp(netdev->rtnl_link_ops->kind, "vxlan"))
+ return true;
+
+ return false;
+}
+
+static int
+nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata)
+{
+ struct sk_buff *skb;
+ unsigned char *msg;
+
+ skb = nfp_flower_cmsg_alloc(app, plen, mtype);
+ if (!skb)
+ return -ENOMEM;
+
+ msg = nfp_flower_cmsg_get_data(skb);
+ memcpy(msg, pdata, nfp_flower_cmsg_get_data_len(skb));
+
+ nfp_ctrl_tx(app->ctrl, skb);
+ return 0;
+}
+
+static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_ipv4_route_entry *entry;
+ struct list_head *ptr, *storage;
+
+ mutex_lock(&priv->nfp_neigh_off_lock);
+ list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+ entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
+ if (entry->ipv4_addr == ipv4_addr) {
+ mutex_unlock(&priv->nfp_neigh_off_lock);
+ return true;
+ }
+ }
+ mutex_unlock(&priv->nfp_neigh_off_lock);
+ return false;
+}
+
+static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_ipv4_route_entry *entry;
+ struct list_head *ptr, *storage;
+
+ mutex_lock(&priv->nfp_neigh_off_lock);
+ list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+ entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
+ if (entry->ipv4_addr == ipv4_addr) {
+ mutex_unlock(&priv->nfp_neigh_off_lock);
+ return;
+ }
+ }
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ mutex_unlock(&priv->nfp_neigh_off_lock);
+ nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n");
+ return;
+ }
+
+ entry->ipv4_addr = ipv4_addr;
+ list_add_tail(&entry->list, &priv->nfp_neigh_off_list);
+ mutex_unlock(&priv->nfp_neigh_off_lock);
+}
+
+static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_ipv4_route_entry *entry;
+ struct list_head *ptr, *storage;
+
+ mutex_lock(&priv->nfp_neigh_off_lock);
+ list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+ entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
+ if (entry->ipv4_addr == ipv4_addr) {
+ list_del(&entry->list);
+ kfree(entry);
+ break;
+ }
+ }
+ mutex_unlock(&priv->nfp_neigh_off_lock);
+}
+
+static void
+nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
+ struct flowi4 *flow, struct neighbour *neigh)
+{
+ struct nfp_tun_neigh payload;
+
+ /* Only offload representor IPv4s for now. */
+ if (!nfp_netdev_is_nfp_repr(netdev))
+ return;
+
+ memset(&payload, 0, sizeof(struct nfp_tun_neigh));
+ payload.dst_ipv4 = flow->daddr;
+
+ /* If entry has expired send dst IP with all other fields 0. */
+ if (!(neigh->nud_state & NUD_VALID)) {
+ nfp_tun_del_route_from_cache(app, payload.dst_ipv4);
+ /* Trigger ARP to verify invalid neighbour state. */
+ neigh_event_send(neigh, NULL);
+ goto send_msg;
+ }
+
+ /* Have a valid neighbour so populate rest of entry. */
+ payload.src_ipv4 = flow->saddr;
+ ether_addr_copy(payload.src_addr, netdev->dev_addr);
+ neigh_ha_snapshot(payload.dst_addr, neigh, netdev);
+ payload.port_id = cpu_to_be32(nfp_repr_get_port_id(netdev));
+ /* Add destination of new route to NFP cache. */
+ nfp_tun_add_route_to_cache(app, payload.dst_ipv4);
+
+send_msg:
+ nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH,
+ sizeof(struct nfp_tun_neigh),
+ (unsigned char *)&payload);
+}
+
+static int
+nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
+ void *ptr)
+{
+ struct nfp_flower_priv *app_priv;
+ struct netevent_redirect *redir;
+ struct flowi4 flow = {};
+ struct neighbour *n;
+ struct nfp_app *app;
+ struct rtable *rt;
+ int err;
+
+ switch (event) {
+ case NETEVENT_REDIRECT:
+ redir = (struct netevent_redirect *)ptr;
+ n = redir->neigh;
+ break;
+ case NETEVENT_NEIGH_UPDATE:
+ n = (struct neighbour *)ptr;
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ flow.daddr = *(__be32 *)n->primary_key;
+
+ /* Only concerned with route changes for representors. */
+ if (!nfp_netdev_is_nfp_repr(n->dev))
+ return NOTIFY_DONE;
+
+ app_priv = container_of(nb, struct nfp_flower_priv, nfp_tun_neigh_nb);
+ app = app_priv->app;
+
+ /* Only concerned with changes to routes already added to NFP. */
+ if (!nfp_tun_has_route(app, flow.daddr))
+ return NOTIFY_DONE;
+
+#if IS_ENABLED(CONFIG_INET)
+ /* Do a route lookup to populate flow data. */
+ rt = ip_route_output_key(dev_net(n->dev), &flow);
+ err = PTR_ERR_OR_ZERO(rt);
+ if (err)
+ return NOTIFY_DONE;
+#else
+ return NOTIFY_DONE;
+#endif
+
+ flow.flowi4_proto = IPPROTO_UDP;
+ nfp_tun_write_neigh(n->dev, app, &flow, n);
+
+ return NOTIFY_OK;
+}
+
+void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb)
+{
+ struct nfp_tun_req_route_ipv4 *payload;
+ struct net_device *netdev;
+ struct flowi4 flow = {};
+ struct neighbour *n;
+ struct rtable *rt;
+ int err;
+
+ payload = nfp_flower_cmsg_get_data(skb);
+
+ netdev = nfp_app_repr_get(app, be32_to_cpu(payload->ingress_port));
+ if (!netdev)
+ goto route_fail_warning;
+
+ flow.daddr = payload->ipv4_addr;
+ flow.flowi4_proto = IPPROTO_UDP;
+
+#if IS_ENABLED(CONFIG_INET)
+ /* Do a route lookup on same namespace as ingress port. */
+ rt = ip_route_output_key(dev_net(netdev), &flow);
+ err = PTR_ERR_OR_ZERO(rt);
+ if (err)
+ goto route_fail_warning;
+#else
+ goto route_fail_warning;
+#endif
+
+ /* Get the neighbour entry for the lookup */
+ n = dst_neigh_lookup(&rt->dst, &flow.daddr);
+ ip_rt_put(rt);
+ if (!n)
+ goto route_fail_warning;
+ nfp_tun_write_neigh(n->dev, app, &flow, n);
+ neigh_release(n);
+ return;
+
+route_fail_warning:
+ nfp_flower_cmsg_warn(app, "Requested route not found.\n");
+}
+
+static void nfp_tun_write_ipv4_list(struct nfp_app *app)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_ipv4_addr_entry *entry;
+ struct nfp_tun_ipv4_addr payload;
+ struct list_head *ptr, *storage;
+ int count;
+
+ memset(&payload, 0, sizeof(struct nfp_tun_ipv4_addr));
+ mutex_lock(&priv->nfp_ipv4_off_lock);
+ count = 0;
+ list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+ if (count >= NFP_FL_IPV4_ADDRS_MAX) {
+ mutex_unlock(&priv->nfp_ipv4_off_lock);
+ nfp_flower_cmsg_warn(app, "IPv4 offload exceeds limit.\n");
+ return;
+ }
+ entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
+ payload.ipv4_addr[count++] = entry->ipv4_addr;
+ }
+ payload.count = cpu_to_be32(count);
+ mutex_unlock(&priv->nfp_ipv4_off_lock);
+
+ nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS,
+ sizeof(struct nfp_tun_ipv4_addr),
+ &payload);
+}
+
+void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_ipv4_addr_entry *entry;
+ struct list_head *ptr, *storage;
+
+ mutex_lock(&priv->nfp_ipv4_off_lock);
+ list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+ entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
+ if (entry->ipv4_addr == ipv4) {
+ entry->ref_count++;
+ mutex_unlock(&priv->nfp_ipv4_off_lock);
+ return;
+ }
+ }
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ mutex_unlock(&priv->nfp_ipv4_off_lock);
+ nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n");
+ return;
+ }
+ entry->ipv4_addr = ipv4;
+ entry->ref_count = 1;
+ list_add_tail(&entry->list, &priv->nfp_ipv4_off_list);
+ mutex_unlock(&priv->nfp_ipv4_off_lock);
+
+ nfp_tun_write_ipv4_list(app);
+}
+
+void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_ipv4_addr_entry *entry;
+ struct list_head *ptr, *storage;
+
+ mutex_lock(&priv->nfp_ipv4_off_lock);
+ list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+ entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
+ if (entry->ipv4_addr == ipv4) {
+ entry->ref_count--;
+ if (!entry->ref_count) {
+ list_del(&entry->list);
+ kfree(entry);
+ }
+ break;
+ }
+ }
+ mutex_unlock(&priv->nfp_ipv4_off_lock);
+
+ nfp_tun_write_ipv4_list(app);
+}
+
+void nfp_tunnel_write_macs(struct nfp_app *app)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_tun_mac_offload_entry *entry;
+ struct nfp_tun_mac_addr *payload;
+ struct list_head *ptr, *storage;
+ int mac_count, err, pay_size;
+
+ mutex_lock(&priv->nfp_mac_off_lock);
+ if (!priv->nfp_mac_off_count) {
+ mutex_unlock(&priv->nfp_mac_off_lock);
+ return;
+ }
+
+ pay_size = sizeof(struct nfp_tun_mac_addr) +
+ sizeof(struct index_mac_addr) * priv->nfp_mac_off_count;
+
+ payload = kzalloc(pay_size, GFP_KERNEL);
+ if (!payload) {
+ mutex_unlock(&priv->nfp_mac_off_lock);
+ return;
+ }
+
+ payload->count = cpu_to_be16(priv->nfp_mac_off_count);
+
+ mac_count = 0;
+ list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) {
+ entry = list_entry(ptr, struct nfp_tun_mac_offload_entry,
+ list);
+ payload->addresses[mac_count].index = entry->index;
+ ether_addr_copy(payload->addresses[mac_count].addr,
+ entry->addr);
+ mac_count++;
+ }
+
+ err = nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_MAC,
+ pay_size, payload);
+
+ kfree(payload);
+
+ if (err) {
+ mutex_unlock(&priv->nfp_mac_off_lock);
+ /* Write failed so retain list for future retry. */
+ return;
+ }
+
+ /* If list was successfully offloaded, flush it. */
+ list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) {
+ entry = list_entry(ptr, struct nfp_tun_mac_offload_entry,
+ list);
+ list_del(&entry->list);
+ kfree(entry);
+ }
+
+ priv->nfp_mac_off_count = 0;
+ mutex_unlock(&priv->nfp_mac_off_lock);
+}
+
+static int nfp_tun_get_mac_idx(struct nfp_app *app, int ifindex)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_tun_mac_non_nfp_idx *entry;
+ struct list_head *ptr, *storage;
+ int idx;
+
+ mutex_lock(&priv->nfp_mac_index_lock);
+ list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) {
+ entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list);
+ if (entry->ifindex == ifindex) {
+ idx = entry->index;
+ mutex_unlock(&priv->nfp_mac_index_lock);
+ return idx;
+ }
+ }
+
+ idx = ida_simple_get(&priv->nfp_mac_off_ids, 0,
+ NFP_MAX_MAC_INDEX, GFP_KERNEL);
+ if (idx < 0) {
+ mutex_unlock(&priv->nfp_mac_index_lock);
+ return idx;
+ }
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ mutex_unlock(&priv->nfp_mac_index_lock);
+ return -ENOMEM;
+ }
+ entry->ifindex = ifindex;
+ entry->index = idx;
+ list_add_tail(&entry->list, &priv->nfp_mac_index_list);
+ mutex_unlock(&priv->nfp_mac_index_lock);
+
+ return idx;
+}
+
+static void nfp_tun_del_mac_idx(struct nfp_app *app, int ifindex)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_tun_mac_non_nfp_idx *entry;
+ struct list_head *ptr, *storage;
+
+ mutex_lock(&priv->nfp_mac_index_lock);
+ list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) {
+ entry = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx, list);
+ if (entry->ifindex == ifindex) {
+ ida_simple_remove(&priv->nfp_mac_off_ids,
+ entry->index);
+ list_del(&entry->list);
+ kfree(entry);
+ break;
+ }
+ }
+ mutex_unlock(&priv->nfp_mac_index_lock);
+}
+
+static void nfp_tun_add_to_mac_offload_list(struct net_device *netdev,
+ struct nfp_app *app)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_tun_mac_offload_entry *entry;
+ u16 nfp_mac_idx;
+ int port = 0;
+
+ /* Check if MAC should be offloaded. */
+ if (!is_valid_ether_addr(netdev->dev_addr))
+ return;
+
+ if (nfp_netdev_is_nfp_repr(netdev))
+ port = nfp_repr_get_port_id(netdev);
+ else if (!nfp_tun_is_netdev_to_offload(netdev))
+ return;
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ nfp_flower_cmsg_warn(app, "Mem fail when offloading MAC.\n");
+ return;
+ }
+
+ if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) ==
+ NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT) {
+ nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PHYS_PORT;
+ } else if (FIELD_GET(NFP_FLOWER_CMSG_PORT_TYPE, port) ==
+ NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT) {
+ port = FIELD_GET(NFP_FLOWER_CMSG_PORT_VNIC, port);
+ nfp_mac_idx = port << 8 | NFP_FLOWER_CMSG_PORT_TYPE_PCIE_PORT;
+ } else {
+ /* Must assign our own unique 8-bit index. */
+ int idx = nfp_tun_get_mac_idx(app, netdev->ifindex);
+
+ if (idx < 0) {
+ nfp_flower_cmsg_warn(app, "Can't assign non-repr MAC index.\n");
+ kfree(entry);
+ return;
+ }
+ nfp_mac_idx = idx << 8 | NFP_FLOWER_CMSG_PORT_TYPE_OTHER_PORT;
+ }
+
+ entry->index = cpu_to_be16(nfp_mac_idx);
+ ether_addr_copy(entry->addr, netdev->dev_addr);
+
+ mutex_lock(&priv->nfp_mac_off_lock);
+ priv->nfp_mac_off_count++;
+ list_add_tail(&entry->list, &priv->nfp_mac_off_list);
+ mutex_unlock(&priv->nfp_mac_off_lock);
+}
+
+static int nfp_tun_mac_event_handler(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct nfp_flower_priv *app_priv;
+ struct net_device *netdev;
+ struct nfp_app *app;
+
+ if (event == NETDEV_DOWN || event == NETDEV_UNREGISTER) {
+ app_priv = container_of(nb, struct nfp_flower_priv,
+ nfp_tun_mac_nb);
+ app = app_priv->app;
+ netdev = netdev_notifier_info_to_dev(ptr);
+
+ /* If non-nfp netdev then free its offload index. */
+ if (nfp_tun_is_netdev_to_offload(netdev))
+ nfp_tun_del_mac_idx(app, netdev->ifindex);
+ } else if (event == NETDEV_UP || event == NETDEV_CHANGEADDR ||
+ event == NETDEV_REGISTER) {
+ app_priv = container_of(nb, struct nfp_flower_priv,
+ nfp_tun_mac_nb);
+ app = app_priv->app;
+ netdev = netdev_notifier_info_to_dev(ptr);
+
+ nfp_tun_add_to_mac_offload_list(netdev, app);
+
+ /* Force a list write to keep NFP up to date. */
+ nfp_tunnel_write_macs(app);
+ }
+ return NOTIFY_OK;
+}
+
+int nfp_tunnel_config_start(struct nfp_app *app)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct net_device *netdev;
+ int err;
+
+ /* Initialise priv data for MAC offloading. */
+ priv->nfp_mac_off_count = 0;
+ mutex_init(&priv->nfp_mac_off_lock);
+ INIT_LIST_HEAD(&priv->nfp_mac_off_list);
+ priv->nfp_tun_mac_nb.notifier_call = nfp_tun_mac_event_handler;
+ mutex_init(&priv->nfp_mac_index_lock);
+ INIT_LIST_HEAD(&priv->nfp_mac_index_list);
+ ida_init(&priv->nfp_mac_off_ids);
+
+ /* Initialise priv data for IPv4 offloading. */
+ mutex_init(&priv->nfp_ipv4_off_lock);
+ INIT_LIST_HEAD(&priv->nfp_ipv4_off_list);
+
+ /* Initialise priv data for neighbour offloading. */
+ mutex_init(&priv->nfp_neigh_off_lock);
+ INIT_LIST_HEAD(&priv->nfp_neigh_off_list);
+ priv->nfp_tun_neigh_nb.notifier_call = nfp_tun_neigh_event_handler;
+
+ err = register_netdevice_notifier(&priv->nfp_tun_mac_nb);
+ if (err)
+ goto err_free_mac_ida;
+
+ err = register_netevent_notifier(&priv->nfp_tun_neigh_nb);
+ if (err)
+ goto err_unreg_mac_nb;
+
+ /* Parse netdevs already registered for MACs that need offloaded. */
+ rtnl_lock();
+ for_each_netdev(&init_net, netdev)
+ nfp_tun_add_to_mac_offload_list(netdev, app);
+ rtnl_unlock();
+
+ return 0;
+
+err_unreg_mac_nb:
+ unregister_netdevice_notifier(&priv->nfp_tun_mac_nb);
+err_free_mac_ida:
+ ida_destroy(&priv->nfp_mac_off_ids);
+ return err;
+}
+
+void nfp_tunnel_config_stop(struct nfp_app *app)
+{
+ struct nfp_tun_mac_offload_entry *mac_entry;
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_ipv4_route_entry *route_entry;
+ struct nfp_tun_mac_non_nfp_idx *mac_idx;
+ struct nfp_ipv4_addr_entry *ip_entry;
+ struct list_head *ptr, *storage;
+
+ unregister_netdevice_notifier(&priv->nfp_tun_mac_nb);
+ unregister_netevent_notifier(&priv->nfp_tun_neigh_nb);
+
+ /* Free any memory that may be occupied by MAC list. */
+ mutex_lock(&priv->nfp_mac_off_lock);
+ list_for_each_safe(ptr, storage, &priv->nfp_mac_off_list) {
+ mac_entry = list_entry(ptr, struct nfp_tun_mac_offload_entry,
+ list);
+ list_del(&mac_entry->list);
+ kfree(mac_entry);
+ }
+ mutex_unlock(&priv->nfp_mac_off_lock);
+
+ /* Free any memory that may be occupied by MAC index list. */
+ mutex_lock(&priv->nfp_mac_index_lock);
+ list_for_each_safe(ptr, storage, &priv->nfp_mac_index_list) {
+ mac_idx = list_entry(ptr, struct nfp_tun_mac_non_nfp_idx,
+ list);
+ list_del(&mac_idx->list);
+ kfree(mac_idx);
+ }
+ mutex_unlock(&priv->nfp_mac_index_lock);
+
+ ida_destroy(&priv->nfp_mac_off_ids);
+
+ /* Free any memory that may be occupied by ipv4 list. */
+ mutex_lock(&priv->nfp_ipv4_off_lock);
+ list_for_each_safe(ptr, storage, &priv->nfp_ipv4_off_list) {
+ ip_entry = list_entry(ptr, struct nfp_ipv4_addr_entry, list);
+ list_del(&ip_entry->list);
+ kfree(ip_entry);
+ }
+ mutex_unlock(&priv->nfp_ipv4_off_lock);
+
+ /* Free any memory that may be occupied by the route list. */
+ mutex_lock(&priv->nfp_neigh_off_lock);
+ list_for_each_safe(ptr, storage, &priv->nfp_neigh_off_list) {
+ route_entry = list_entry(ptr, struct nfp_ipv4_route_entry,
+ list);
+ list_del(&route_entry->list);
+ kfree(route_entry);
+ }
+ mutex_unlock(&priv->nfp_neigh_off_lock);
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.c b/drivers/net/ethernet/netronome/nfp/nfp_app.c
index 82c290763529..5d9e2eba5b49 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.c
@@ -31,6 +31,7 @@
* SOFTWARE.
*/
+#include <linux/bug.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h
index af640b5c2108..857bb33020ba 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_app.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h
@@ -36,6 +36,8 @@
#include <net/devlink.h>
+#include <trace/events/devlink.h>
+
#include "nfp_net_repr.h"
struct bpf_prog;
@@ -271,11 +273,17 @@ static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb)
{
+ trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0,
+ skb->data, skb->len);
+
return nfp_ctrl_tx(app->ctrl, skb);
}
static inline void nfp_app_ctrl_rx(struct nfp_app *app, struct sk_buff *skb)
{
+ trace_devlink_hwmsg(priv_to_devlink(app->pf), true, 0,
+ skb->data, skb->len);
+
app->type->ctrl_msg_rx(app, skb);
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
new file mode 100644
index 000000000000..830f6de25f47
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2016-2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include "nfp_asm.h"
+
+const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
+ [CMD_TGT_WRITE8_SWAP] = { 0x02, 0x42 },
+ [CMD_TGT_READ8] = { 0x01, 0x43 },
+ [CMD_TGT_READ32] = { 0x00, 0x5c },
+ [CMD_TGT_READ32_LE] = { 0x01, 0x5c },
+ [CMD_TGT_READ32_SWAP] = { 0x02, 0x5c },
+ [CMD_TGT_READ_LE] = { 0x01, 0x40 },
+ [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 },
+};
+
+static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst)
+{
+ bool lm_id, lm_dec = false;
+ u16 val = swreg_value(reg);
+
+ switch (swreg_type(reg)) {
+ case NN_REG_GPR_A:
+ case NN_REG_GPR_B:
+ case NN_REG_GPR_BOTH:
+ return val;
+ case NN_REG_NNR:
+ return UR_REG_NN | val;
+ case NN_REG_XFER:
+ return UR_REG_XFR | val;
+ case NN_REG_LMEM:
+ lm_id = swreg_lm_idx(reg);
+
+ switch (swreg_lm_mode(reg)) {
+ case NN_LM_MOD_NONE:
+ if (val & ~UR_REG_LM_IDX_MAX) {
+ pr_err("LM offset too large\n");
+ return 0;
+ }
+ return UR_REG_LM | FIELD_PREP(UR_REG_LM_IDX, lm_id) |
+ val;
+ case NN_LM_MOD_DEC:
+ lm_dec = true;
+ /* fall through */
+ case NN_LM_MOD_INC:
+ if (val) {
+ pr_err("LM offset in inc/dev mode\n");
+ return 0;
+ }
+ return UR_REG_LM | UR_REG_LM_POST_MOD |
+ FIELD_PREP(UR_REG_LM_IDX, lm_id) |
+ FIELD_PREP(UR_REG_LM_POST_MOD_DEC, lm_dec);
+ default:
+ pr_err("bad LM mode for unrestricted operands %d\n",
+ swreg_lm_mode(reg));
+ return 0;
+ }
+ case NN_REG_IMM:
+ if (val & ~0xff) {
+ pr_err("immediate too large\n");
+ return 0;
+ }
+ return UR_REG_IMM_encode(val);
+ case NN_REG_NONE:
+ return is_dst ? UR_REG_NO_DST : REG_NONE;
+ }
+
+ pr_err("unrecognized reg encoding %08x\n", reg);
+ return 0;
+}
+
+int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg,
+ struct nfp_insn_ur_regs *reg)
+{
+ memset(reg, 0, sizeof(*reg));
+
+ /* Decode destination */
+ if (swreg_type(dst) == NN_REG_IMM)
+ return -EFAULT;
+
+ if (swreg_type(dst) == NN_REG_GPR_B)
+ reg->dst_ab = ALU_DST_B;
+ if (swreg_type(dst) == NN_REG_GPR_BOTH)
+ reg->wr_both = true;
+ reg->dst = nfp_swreg_to_unreg(dst, true);
+
+ /* Decode source operands */
+ if (swreg_type(lreg) == swreg_type(rreg))
+ return -EFAULT;
+
+ if (swreg_type(lreg) == NN_REG_GPR_B ||
+ swreg_type(rreg) == NN_REG_GPR_A) {
+ reg->areg = nfp_swreg_to_unreg(rreg, false);
+ reg->breg = nfp_swreg_to_unreg(lreg, false);
+ reg->swap = true;
+ } else {
+ reg->areg = nfp_swreg_to_unreg(lreg, false);
+ reg->breg = nfp_swreg_to_unreg(rreg, false);
+ }
+
+ reg->dst_lmextn = swreg_lmextn(dst);
+ reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg);
+
+ return 0;
+}
+
+static u16 nfp_swreg_to_rereg(swreg reg, bool is_dst, bool has_imm8, bool *i8)
+{
+ u16 val = swreg_value(reg);
+ bool lm_id;
+
+ switch (swreg_type(reg)) {
+ case NN_REG_GPR_A:
+ case NN_REG_GPR_B:
+ case NN_REG_GPR_BOTH:
+ return val;
+ case NN_REG_XFER:
+ return RE_REG_XFR | val;
+ case NN_REG_LMEM:
+ lm_id = swreg_lm_idx(reg);
+
+ if (swreg_lm_mode(reg) != NN_LM_MOD_NONE) {
+ pr_err("bad LM mode for restricted operands %d\n",
+ swreg_lm_mode(reg));
+ return 0;
+ }
+
+ if (val & ~RE_REG_LM_IDX_MAX) {
+ pr_err("LM offset too large\n");
+ return 0;
+ }
+
+ return RE_REG_LM | FIELD_PREP(RE_REG_LM_IDX, lm_id) | val;
+ case NN_REG_IMM:
+ if (val & ~(0x7f | has_imm8 << 7)) {
+ pr_err("immediate too large\n");
+ return 0;
+ }
+ *i8 = val & 0x80;
+ return RE_REG_IMM_encode(val & 0x7f);
+ case NN_REG_NONE:
+ return is_dst ? RE_REG_NO_DST : REG_NONE;
+ case NN_REG_NNR:
+ pr_err("NNRs used with restricted encoding\n");
+ return 0;
+ }
+
+ pr_err("unrecognized reg encoding\n");
+ return 0;
+}
+
+int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg,
+ struct nfp_insn_re_regs *reg, bool has_imm8)
+{
+ memset(reg, 0, sizeof(*reg));
+
+ /* Decode destination */
+ if (swreg_type(dst) == NN_REG_IMM)
+ return -EFAULT;
+
+ if (swreg_type(dst) == NN_REG_GPR_B)
+ reg->dst_ab = ALU_DST_B;
+ if (swreg_type(dst) == NN_REG_GPR_BOTH)
+ reg->wr_both = true;
+ reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL);
+
+ /* Decode source operands */
+ if (swreg_type(lreg) == swreg_type(rreg))
+ return -EFAULT;
+
+ if (swreg_type(lreg) == NN_REG_GPR_B ||
+ swreg_type(rreg) == NN_REG_GPR_A) {
+ reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
+ reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
+ reg->swap = true;
+ } else {
+ reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, &reg->i8);
+ reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, &reg->i8);
+ }
+
+ reg->dst_lmextn = swreg_lmextn(dst);
+ reg->src_lmextn = swreg_lmextn(lreg) | swreg_lmextn(rreg);
+
+ return 0;
+}
+
+#define NFP_USTORE_ECC_POLY_WORDS 7
+#define NFP_USTORE_OP_BITS 45
+
+static const u64 nfp_ustore_ecc_polynomials[NFP_USTORE_ECC_POLY_WORDS] = {
+ 0x0ff800007fffULL,
+ 0x11f801ff801fULL,
+ 0x1e387e0781e1ULL,
+ 0x17cb8e388e22ULL,
+ 0x1af5b2c93244ULL,
+ 0x1f56d5525488ULL,
+ 0x0daf69a46910ULL,
+};
+
+static bool parity(u64 value)
+{
+ return hweight64(value) & 1;
+}
+
+int nfp_ustore_check_valid_no_ecc(u64 insn)
+{
+ if (insn & ~GENMASK_ULL(NFP_USTORE_OP_BITS, 0))
+ return -EINVAL;
+
+ return 0;
+}
+
+u64 nfp_ustore_calc_ecc_insn(u64 insn)
+{
+ u8 ecc = 0;
+ int i;
+
+ for (i = 0; i < NFP_USTORE_ECC_POLY_WORDS; i++)
+ ecc |= parity(nfp_ustore_ecc_polynomials[i] & insn) << i;
+
+ return insn | (u64)ecc << NFP_USTORE_OP_BITS;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index d2b535739d2b..86e7daee6099 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -34,6 +34,8 @@
#ifndef __NFP_ASM_H__
#define __NFP_ASM_H__ 1
+#include <linux/bitfield.h>
+#include <linux/bug.h>
#include <linux/types.h>
#define REG_NONE 0
@@ -43,23 +45,31 @@
#define RE_REG_IMM_encode(x) \
(RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1))
#define RE_REG_IMM_MAX 0x07fULL
+#define RE_REG_LM 0x050
+#define RE_REG_LM_IDX 0x008
+#define RE_REG_LM_IDX_MAX 0x7
#define RE_REG_XFR 0x080
#define UR_REG_XFR 0x180
+#define UR_REG_LM 0x200
+#define UR_REG_LM_IDX 0x020
+#define UR_REG_LM_POST_MOD 0x010
+#define UR_REG_LM_POST_MOD_DEC 0x001
+#define UR_REG_LM_IDX_MAX 0xf
#define UR_REG_NN 0x280
#define UR_REG_NO_DST 0x300
#define UR_REG_IMM UR_REG_NO_DST
#define UR_REG_IMM_encode(x) (UR_REG_IMM | (x))
#define UR_REG_IMM_MAX 0x0ffULL
-#define OP_BR_BASE 0x0d800000020ULL
-#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL
-#define OP_BR_MASK 0x0000000001fULL
-#define OP_BR_EV_PIP 0x00000000300ULL
-#define OP_BR_CSS 0x0000003c000ULL
-#define OP_BR_DEFBR 0x00000300000ULL
-#define OP_BR_ADDR_LO 0x007ffc00000ULL
-#define OP_BR_ADDR_HI 0x10000000000ULL
+#define OP_BR_BASE 0x0d800000020ULL
+#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL
+#define OP_BR_MASK 0x0000000001fULL
+#define OP_BR_EV_PIP 0x00000000300ULL
+#define OP_BR_CSS 0x0000003c000ULL
+#define OP_BR_DEFBR 0x00000300000ULL
+#define OP_BR_ADDR_LO 0x007ffc00000ULL
+#define OP_BR_ADDR_HI 0x10000000000ULL
#define nfp_is_br(_insn) \
(((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE)
@@ -82,30 +92,33 @@ enum br_ctx_signal_state {
BR_CSS_NONE = 2,
};
-#define OP_BBYTE_BASE 0x0c800000000ULL
-#define OP_BB_A_SRC 0x000000000ffULL
-#define OP_BB_BYTE 0x00000000300ULL
-#define OP_BB_B_SRC 0x0000003fc00ULL
-#define OP_BB_I8 0x00000040000ULL
-#define OP_BB_EQ 0x00000080000ULL
-#define OP_BB_DEFBR 0x00000300000ULL
-#define OP_BB_ADDR_LO 0x007ffc00000ULL
-#define OP_BB_ADDR_HI 0x10000000000ULL
-
-#define OP_BALU_BASE 0x0e800000000ULL
-#define OP_BA_A_SRC 0x000000003ffULL
-#define OP_BA_B_SRC 0x000000ffc00ULL
-#define OP_BA_DEFBR 0x00000300000ULL
-#define OP_BA_ADDR_HI 0x0007fc00000ULL
-
-#define OP_IMMED_A_SRC 0x000000003ffULL
-#define OP_IMMED_B_SRC 0x000000ffc00ULL
-#define OP_IMMED_IMM 0x0000ff00000ULL
-#define OP_IMMED_WIDTH 0x00060000000ULL
-#define OP_IMMED_INV 0x00080000000ULL
-#define OP_IMMED_SHIFT 0x00600000000ULL
-#define OP_IMMED_BASE 0x0f000000000ULL
-#define OP_IMMED_WR_AB 0x20000000000ULL
+#define OP_BBYTE_BASE 0x0c800000000ULL
+#define OP_BB_A_SRC 0x000000000ffULL
+#define OP_BB_BYTE 0x00000000300ULL
+#define OP_BB_B_SRC 0x0000003fc00ULL
+#define OP_BB_I8 0x00000040000ULL
+#define OP_BB_EQ 0x00000080000ULL
+#define OP_BB_DEFBR 0x00000300000ULL
+#define OP_BB_ADDR_LO 0x007ffc00000ULL
+#define OP_BB_ADDR_HI 0x10000000000ULL
+#define OP_BB_SRC_LMEXTN 0x40000000000ULL
+
+#define OP_BALU_BASE 0x0e800000000ULL
+#define OP_BA_A_SRC 0x000000003ffULL
+#define OP_BA_B_SRC 0x000000ffc00ULL
+#define OP_BA_DEFBR 0x00000300000ULL
+#define OP_BA_ADDR_HI 0x0007fc00000ULL
+
+#define OP_IMMED_A_SRC 0x000000003ffULL
+#define OP_IMMED_B_SRC 0x000000ffc00ULL
+#define OP_IMMED_IMM 0x0000ff00000ULL
+#define OP_IMMED_WIDTH 0x00060000000ULL
+#define OP_IMMED_INV 0x00080000000ULL
+#define OP_IMMED_SHIFT 0x00600000000ULL
+#define OP_IMMED_BASE 0x0f000000000ULL
+#define OP_IMMED_WR_AB 0x20000000000ULL
+#define OP_IMMED_SRC_LMEXTN 0x40000000000ULL
+#define OP_IMMED_DST_LMEXTN 0x80000000000ULL
enum immed_width {
IMMED_WIDTH_ALL = 0,
@@ -119,17 +132,19 @@ enum immed_shift {
IMMED_SHIFT_2B = 2,
};
-#define OP_SHF_BASE 0x08000000000ULL
-#define OP_SHF_A_SRC 0x000000000ffULL
-#define OP_SHF_SC 0x00000000300ULL
-#define OP_SHF_B_SRC 0x0000003fc00ULL
-#define OP_SHF_I8 0x00000040000ULL
-#define OP_SHF_SW 0x00000080000ULL
-#define OP_SHF_DST 0x0000ff00000ULL
-#define OP_SHF_SHIFT 0x001f0000000ULL
-#define OP_SHF_OP 0x00e00000000ULL
-#define OP_SHF_DST_AB 0x01000000000ULL
-#define OP_SHF_WR_AB 0x20000000000ULL
+#define OP_SHF_BASE 0x08000000000ULL
+#define OP_SHF_A_SRC 0x000000000ffULL
+#define OP_SHF_SC 0x00000000300ULL
+#define OP_SHF_B_SRC 0x0000003fc00ULL
+#define OP_SHF_I8 0x00000040000ULL
+#define OP_SHF_SW 0x00000080000ULL
+#define OP_SHF_DST 0x0000ff00000ULL
+#define OP_SHF_SHIFT 0x001f0000000ULL
+#define OP_SHF_OP 0x00e00000000ULL
+#define OP_SHF_DST_AB 0x01000000000ULL
+#define OP_SHF_WR_AB 0x20000000000ULL
+#define OP_SHF_SRC_LMEXTN 0x40000000000ULL
+#define OP_SHF_DST_LMEXTN 0x80000000000ULL
enum shf_op {
SHF_OP_NONE = 0,
@@ -139,19 +154,22 @@ enum shf_op {
enum shf_sc {
SHF_SC_R_ROT = 0,
+ SHF_SC_NONE = SHF_SC_R_ROT,
SHF_SC_R_SHF = 1,
SHF_SC_L_SHF = 2,
SHF_SC_R_DSHF = 3,
};
-#define OP_ALU_A_SRC 0x000000003ffULL
-#define OP_ALU_B_SRC 0x000000ffc00ULL
-#define OP_ALU_DST 0x0003ff00000ULL
-#define OP_ALU_SW 0x00040000000ULL
-#define OP_ALU_OP 0x00f80000000ULL
-#define OP_ALU_DST_AB 0x01000000000ULL
-#define OP_ALU_BASE 0x0a000000000ULL
-#define OP_ALU_WR_AB 0x20000000000ULL
+#define OP_ALU_A_SRC 0x000000003ffULL
+#define OP_ALU_B_SRC 0x000000ffc00ULL
+#define OP_ALU_DST 0x0003ff00000ULL
+#define OP_ALU_SW 0x00040000000ULL
+#define OP_ALU_OP 0x00f80000000ULL
+#define OP_ALU_DST_AB 0x01000000000ULL
+#define OP_ALU_BASE 0x0a000000000ULL
+#define OP_ALU_WR_AB 0x20000000000ULL
+#define OP_ALU_SRC_LMEXTN 0x40000000000ULL
+#define OP_ALU_DST_LMEXTN 0x80000000000ULL
enum alu_op {
ALU_OP_NONE = 0x00,
@@ -170,26 +188,28 @@ enum alu_dst_ab {
ALU_DST_B = 1,
};
-#define OP_LDF_BASE 0x0c000000000ULL
-#define OP_LDF_A_SRC 0x000000000ffULL
-#define OP_LDF_SC 0x00000000300ULL
-#define OP_LDF_B_SRC 0x0000003fc00ULL
-#define OP_LDF_I8 0x00000040000ULL
-#define OP_LDF_SW 0x00000080000ULL
-#define OP_LDF_ZF 0x00000100000ULL
-#define OP_LDF_BMASK 0x0000f000000ULL
-#define OP_LDF_SHF 0x001f0000000ULL
-#define OP_LDF_WR_AB 0x20000000000ULL
-
-#define OP_CMD_A_SRC 0x000000000ffULL
-#define OP_CMD_CTX 0x00000000300ULL
-#define OP_CMD_B_SRC 0x0000003fc00ULL
-#define OP_CMD_TOKEN 0x000000c0000ULL
-#define OP_CMD_XFER 0x00001f00000ULL
-#define OP_CMD_CNT 0x0000e000000ULL
-#define OP_CMD_SIG 0x000f0000000ULL
-#define OP_CMD_TGT_CMD 0x07f00000000ULL
-#define OP_CMD_MODE 0x1c0000000000ULL
+#define OP_LDF_BASE 0x0c000000000ULL
+#define OP_LDF_A_SRC 0x000000000ffULL
+#define OP_LDF_SC 0x00000000300ULL
+#define OP_LDF_B_SRC 0x0000003fc00ULL
+#define OP_LDF_I8 0x00000040000ULL
+#define OP_LDF_SW 0x00000080000ULL
+#define OP_LDF_ZF 0x00000100000ULL
+#define OP_LDF_BMASK 0x0000f000000ULL
+#define OP_LDF_SHF 0x001f0000000ULL
+#define OP_LDF_WR_AB 0x20000000000ULL
+#define OP_LDF_SRC_LMEXTN 0x40000000000ULL
+#define OP_LDF_DST_LMEXTN 0x80000000000ULL
+
+#define OP_CMD_A_SRC 0x000000000ffULL
+#define OP_CMD_CTX 0x00000000300ULL
+#define OP_CMD_B_SRC 0x0000003fc00ULL
+#define OP_CMD_TOKEN 0x000000c0000ULL
+#define OP_CMD_XFER 0x00001f00000ULL
+#define OP_CMD_CNT 0x0000e000000ULL
+#define OP_CMD_SIG 0x000f0000000ULL
+#define OP_CMD_TGT_CMD 0x07f00000000ULL
+#define OP_CMD_MODE 0x1c0000000000ULL
struct cmd_tgt_act {
u8 token;
@@ -198,12 +218,17 @@ struct cmd_tgt_act {
enum cmd_tgt_map {
CMD_TGT_READ8,
- CMD_TGT_WRITE8,
+ CMD_TGT_WRITE8_SWAP,
+ CMD_TGT_READ32,
+ CMD_TGT_READ32_LE,
+ CMD_TGT_READ32_SWAP,
CMD_TGT_READ_LE,
CMD_TGT_READ_SWAP_LE,
__CMD_TGT_MAP_SIZE,
};
+extern const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE];
+
enum cmd_mode {
CMD_MODE_40b_AB = 0,
CMD_MODE_40b_BA = 1,
@@ -215,11 +240,13 @@ enum cmd_ctx_swap {
CMD_CTX_NO_SWAP = 3,
};
-#define OP_LCSR_BASE 0x0fc00000000ULL
-#define OP_LCSR_A_SRC 0x000000003ffULL
-#define OP_LCSR_B_SRC 0x000000ffc00ULL
-#define OP_LCSR_WRITE 0x00000200000ULL
-#define OP_LCSR_ADDR 0x001ffc00000ULL
+#define OP_LCSR_BASE 0x0fc00000000ULL
+#define OP_LCSR_A_SRC 0x000000003ffULL
+#define OP_LCSR_B_SRC 0x000000ffc00ULL
+#define OP_LCSR_WRITE 0x00000200000ULL
+#define OP_LCSR_ADDR 0x001ffc00000ULL
+#define OP_LCSR_SRC_LMEXTN 0x40000000000ULL
+#define OP_LCSR_DST_LMEXTN 0x80000000000ULL
enum lcsr_wr_src {
LCSR_WR_AREG,
@@ -227,7 +254,122 @@ enum lcsr_wr_src {
LCSR_WR_IMM,
};
-#define OP_CARB_BASE 0x0e000000000ULL
-#define OP_CARB_OR 0x00000010000ULL
+#define OP_CARB_BASE 0x0e000000000ULL
+#define OP_CARB_OR 0x00000010000ULL
+
+/* Software register representation, independent of operand type */
+#define NN_REG_TYPE GENMASK(31, 24)
+#define NN_REG_LM_IDX GENMASK(23, 22)
+#define NN_REG_LM_IDX_HI BIT(23)
+#define NN_REG_LM_IDX_LO BIT(22)
+#define NN_REG_LM_MOD GENMASK(21, 20)
+#define NN_REG_VAL GENMASK(7, 0)
+
+enum nfp_bpf_reg_type {
+ NN_REG_GPR_A = BIT(0),
+ NN_REG_GPR_B = BIT(1),
+ NN_REG_GPR_BOTH = NN_REG_GPR_A | NN_REG_GPR_B,
+ NN_REG_NNR = BIT(2),
+ NN_REG_XFER = BIT(3),
+ NN_REG_IMM = BIT(4),
+ NN_REG_NONE = BIT(5),
+ NN_REG_LMEM = BIT(6),
+};
+
+enum nfp_bpf_lm_mode {
+ NN_LM_MOD_NONE = 0,
+ NN_LM_MOD_INC,
+ NN_LM_MOD_DEC,
+};
+
+#define reg_both(x) __enc_swreg((x), NN_REG_GPR_BOTH)
+#define reg_a(x) __enc_swreg((x), NN_REG_GPR_A)
+#define reg_b(x) __enc_swreg((x), NN_REG_GPR_B)
+#define reg_nnr(x) __enc_swreg((x), NN_REG_NNR)
+#define reg_xfer(x) __enc_swreg((x), NN_REG_XFER)
+#define reg_imm(x) __enc_swreg((x), NN_REG_IMM)
+#define reg_none() __enc_swreg(0, NN_REG_NONE)
+#define reg_lm(x, off) __enc_swreg_lm((x), NN_LM_MOD_NONE, (off))
+#define reg_lm_inc(x) __enc_swreg_lm((x), NN_LM_MOD_INC, 0)
+#define reg_lm_dec(x) __enc_swreg_lm((x), NN_LM_MOD_DEC, 0)
+#define __reg_lm(x, mod, off) __enc_swreg_lm((x), (mod), (off))
+
+typedef __u32 __bitwise swreg;
+
+static inline swreg __enc_swreg(u16 id, u8 type)
+{
+ return (__force swreg)(id | FIELD_PREP(NN_REG_TYPE, type));
+}
+
+static inline swreg __enc_swreg_lm(u8 id, enum nfp_bpf_lm_mode mode, u8 off)
+{
+ WARN_ON(id > 3 || (off && mode != NN_LM_MOD_NONE));
+
+ return (__force swreg)(FIELD_PREP(NN_REG_TYPE, NN_REG_LMEM) |
+ FIELD_PREP(NN_REG_LM_IDX, id) |
+ FIELD_PREP(NN_REG_LM_MOD, mode) |
+ off);
+}
+
+static inline u32 swreg_raw(swreg reg)
+{
+ return (__force u32)reg;
+}
+
+static inline enum nfp_bpf_reg_type swreg_type(swreg reg)
+{
+ return FIELD_GET(NN_REG_TYPE, swreg_raw(reg));
+}
+
+static inline u16 swreg_value(swreg reg)
+{
+ return FIELD_GET(NN_REG_VAL, swreg_raw(reg));
+}
+
+static inline bool swreg_lm_idx(swreg reg)
+{
+ return FIELD_GET(NN_REG_LM_IDX_LO, swreg_raw(reg));
+}
+
+static inline bool swreg_lmextn(swreg reg)
+{
+ return FIELD_GET(NN_REG_LM_IDX_HI, swreg_raw(reg));
+}
+
+static inline enum nfp_bpf_lm_mode swreg_lm_mode(swreg reg)
+{
+ return FIELD_GET(NN_REG_LM_MOD, swreg_raw(reg));
+}
+
+struct nfp_insn_ur_regs {
+ enum alu_dst_ab dst_ab;
+ u16 dst;
+ u16 areg, breg;
+ bool swap;
+ bool wr_both;
+ bool dst_lmextn;
+ bool src_lmextn;
+};
+
+struct nfp_insn_re_regs {
+ enum alu_dst_ab dst_ab;
+ u8 dst;
+ u8 areg, breg;
+ bool swap;
+ bool wr_both;
+ bool i8;
+ bool dst_lmextn;
+ bool src_lmextn;
+};
+
+int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg,
+ struct nfp_insn_ur_regs *reg);
+int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg,
+ struct nfp_insn_re_regs *reg, bool has_imm8);
+
+#define NFP_USTORE_PREFETCH_WINDOW 8
+
+int nfp_ustore_check_valid_no_ecc(u64 insn);
+u64 nfp_ustore_calc_ecc_insn(u64 insn);
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index e118b5f23996..2c9109b09faf 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1582,26 +1582,6 @@ nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
return true;
}
-static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, void *hard_start,
- unsigned int *off, unsigned int *len)
-{
- struct xdp_buff xdp;
- void *orig_data;
- int ret;
-
- xdp.data_hard_start = hard_start;
- xdp.data = data + *off;
- xdp.data_end = data + *off + *len;
-
- orig_data = xdp.data;
- ret = bpf_prog_run_xdp(prog, &xdp);
-
- *len -= xdp.data - orig_data;
- *off += xdp.data - orig_data;
-
- return ret;
-}
-
/**
* nfp_net_rx() - receive up to @budget packets on @rx_ring
* @rx_ring: RX ring to receive from
@@ -1637,6 +1617,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
struct nfp_meta_parsed meta;
struct net_device *netdev;
dma_addr_t new_dma_addr;
+ u32 meta_len_xdp = 0;
void *new_frag;
idx = D_IDX(rx_ring, rx_ring->rd_p);
@@ -1715,16 +1696,24 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF &&
dp->bpf_offload_xdp) && !meta.portid) {
+ void *orig_data = rxbuf->frag + pkt_off;
unsigned int dma_off;
- void *hard_start;
+ struct xdp_buff xdp;
int act;
- hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
+ xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM;
+ xdp.data = orig_data;
+ xdp.data_meta = orig_data;
+ xdp.data_end = orig_data + pkt_len;
+
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+ pkt_len -= xdp.data - orig_data;
+ pkt_off += xdp.data - orig_data;
- act = nfp_net_run_xdp(xdp_prog, rxbuf->frag, hard_start,
- &pkt_off, &pkt_len);
switch (act) {
case XDP_PASS:
+ meta_len_xdp = xdp.data - xdp.data_meta;
break;
case XDP_TX:
dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM;
@@ -1792,6 +1781,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
if (rxd->rxd.flags & PCIE_DESC_RX_VLAN)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
le16_to_cpu(rxd->rxd.vlan));
+ if (meta_len_xdp)
+ skb_metadata_set(skb, meta_len_xdp);
napi_gro_receive(&rx_ring->r_vec->napi, skb);
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index b0a452ba9039..782d452e0fc2 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -255,7 +255,7 @@
* @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code
*/
#define NFP_NET_CFG_BPF_ABI 0x0080
-#define NFP_NET_BPF_ABI 1
+#define NFP_NET_BPF_ABI 2
#define NFP_NET_CFG_BPF_CAP 0x0081
#define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */
#define NFP_NET_CFG_BPF_MAX_LEN 0x0082
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 994a83a1f0a5..88128ce61471 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -1813,12 +1813,12 @@ static int nv_alloc_rx(struct net_device *dev)
struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + NV_RX_ALLOC_PAD);
if (skb) {
np->put_rx_ctx->skb = skb;
- np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
+ np->put_rx_ctx->dma = dma_map_single(&np->pci_dev->dev,
skb->data,
skb_tailroom(skb),
- PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(np->pci_dev,
- np->put_rx_ctx->dma)) {
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+ np->put_rx_ctx->dma))) {
kfree_skb(skb);
goto packet_dropped;
}
@@ -1854,12 +1854,12 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
struct sk_buff *skb = netdev_alloc_skb(dev, np->rx_buf_sz + NV_RX_ALLOC_PAD);
if (skb) {
np->put_rx_ctx->skb = skb;
- np->put_rx_ctx->dma = pci_map_single(np->pci_dev,
+ np->put_rx_ctx->dma = dma_map_single(&np->pci_dev->dev,
skb->data,
skb_tailroom(skb),
- PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(np->pci_dev,
- np->put_rx_ctx->dma)) {
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+ np->put_rx_ctx->dma))) {
kfree_skb(skb);
goto packet_dropped;
}
@@ -1884,10 +1884,9 @@ packet_dropped:
}
/* If rx bufs are exhausted called after 50ms to attempt to refresh */
-static void nv_do_rx_refill(unsigned long data)
+static void nv_do_rx_refill(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *) data;
- struct fe_priv *np = netdev_priv(dev);
+ struct fe_priv *np = from_timer(np, t, oom_kick);
/* Just reschedule NAPI rx processing */
napi_schedule(&np->napi);
@@ -1977,9 +1976,9 @@ static void nv_unmap_txskb(struct fe_priv *np, struct nv_skb_map *tx_skb)
{
if (tx_skb->dma) {
if (tx_skb->dma_single)
- pci_unmap_single(np->pci_dev, tx_skb->dma,
+ dma_unmap_single(&np->pci_dev->dev, tx_skb->dma,
tx_skb->dma_len,
- PCI_DMA_TODEVICE);
+ DMA_TO_DEVICE);
else
pci_unmap_page(np->pci_dev, tx_skb->dma,
tx_skb->dma_len,
@@ -2047,10 +2046,10 @@ static void nv_drain_rx(struct net_device *dev)
}
wmb();
if (np->rx_skb[i].skb) {
- pci_unmap_single(np->pci_dev, np->rx_skb[i].dma,
+ dma_unmap_single(&np->pci_dev->dev, np->rx_skb[i].dma,
(skb_end_pointer(np->rx_skb[i].skb) -
- np->rx_skb[i].skb->data),
- PCI_DMA_FROMDEVICE);
+ np->rx_skb[i].skb->data),
+ DMA_FROM_DEVICE);
dev_kfree_skb(np->rx_skb[i].skb);
np->rx_skb[i].skb = NULL;
}
@@ -2224,10 +2223,11 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
prev_tx = put_tx;
prev_tx_ctx = np->put_tx_ctx;
bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
- np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(np->pci_dev,
- np->put_tx_ctx->dma)) {
+ np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev,
+ skb->data + offset, bcnt,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+ np->put_tx_ctx->dma))) {
/* on DMA mapping error - drop the packet */
dev_kfree_skb_any(skb);
u64_stats_update_begin(&np->swstats_tx_syncp);
@@ -2267,7 +2267,8 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
frag, offset,
bcnt,
DMA_TO_DEVICE);
- if (dma_mapping_error(&np->pci_dev->dev, np->put_tx_ctx->dma)) {
+ if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+ np->put_tx_ctx->dma))) {
/* Unwind the mapped fragments */
do {
@@ -2373,10 +2374,11 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
prev_tx = put_tx;
prev_tx_ctx = np->put_tx_ctx;
bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
- np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
- PCI_DMA_TODEVICE);
- if (pci_dma_mapping_error(np->pci_dev,
- np->put_tx_ctx->dma)) {
+ np->put_tx_ctx->dma = dma_map_single(&np->pci_dev->dev,
+ skb->data + offset, bcnt,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+ np->put_tx_ctx->dma))) {
/* on DMA mapping error - drop the packet */
dev_kfree_skb_any(skb);
u64_stats_update_begin(&np->swstats_tx_syncp);
@@ -2417,7 +2419,8 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
bcnt,
DMA_TO_DEVICE);
- if (dma_mapping_error(&np->pci_dev->dev, np->put_tx_ctx->dma)) {
+ if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+ np->put_tx_ctx->dma))) {
/* Unwind the mapped fragments */
do {
@@ -2810,9 +2813,9 @@ static int nv_rx_process(struct net_device *dev, int limit)
* TODO: check if a prefetch of the first cacheline improves
* the performance.
*/
- pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma,
- np->get_rx_ctx->dma_len,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&np->pci_dev->dev, np->get_rx_ctx->dma,
+ np->get_rx_ctx->dma_len,
+ DMA_FROM_DEVICE);
skb = np->get_rx_ctx->skb;
np->get_rx_ctx->skb = NULL;
@@ -2916,9 +2919,9 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
* TODO: check if a prefetch of the first cacheline improves
* the performance.
*/
- pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma,
- np->get_rx_ctx->dma_len,
- PCI_DMA_FROMDEVICE);
+ dma_unmap_single(&np->pci_dev->dev, np->get_rx_ctx->dma,
+ np->get_rx_ctx->dma_len,
+ DMA_FROM_DEVICE);
skb = np->get_rx_ctx->skb;
np->get_rx_ctx->skb = NULL;
@@ -4061,10 +4064,10 @@ static void nv_free_irq(struct net_device *dev)
}
}
-static void nv_do_nic_poll(unsigned long data)
+static void nv_do_nic_poll(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *) data;
- struct fe_priv *np = netdev_priv(dev);
+ struct fe_priv *np = from_timer(np, t, nic_poll);
+ struct net_device *dev = np->dev;
u8 __iomem *base = get_hwbase(dev);
u32 mask = 0;
unsigned long flags;
@@ -4172,16 +4175,18 @@ static void nv_do_nic_poll(unsigned long data)
#ifdef CONFIG_NET_POLL_CONTROLLER
static void nv_poll_controller(struct net_device *dev)
{
- nv_do_nic_poll((unsigned long) dev);
+ struct fe_priv *np = netdev_priv(dev);
+
+ nv_do_nic_poll(&np->nic_poll);
}
#endif
-static void nv_do_stats_poll(unsigned long data)
+static void nv_do_stats_poll(struct timer_list *t)
__acquires(&netdev_priv(dev)->hwstats_lock)
__releases(&netdev_priv(dev)->hwstats_lock)
{
- struct net_device *dev = (struct net_device *) data;
- struct fe_priv *np = netdev_priv(dev);
+ struct fe_priv *np = from_timer(np, t, stats_poll);
+ struct net_device *dev = np->dev;
/* If lock is currently taken, the stats are being refreshed
* and hence fresh enough */
@@ -5070,11 +5075,11 @@ static int nv_loopback_test(struct net_device *dev)
ret = 0;
goto out;
}
- test_dma_addr = pci_map_single(np->pci_dev, tx_skb->data,
+ test_dma_addr = dma_map_single(&np->pci_dev->dev, tx_skb->data,
skb_tailroom(tx_skb),
- PCI_DMA_FROMDEVICE);
- if (pci_dma_mapping_error(np->pci_dev,
- test_dma_addr)) {
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&np->pci_dev->dev,
+ test_dma_addr))) {
dev_kfree_skb_any(tx_skb);
goto out;
}
@@ -5129,9 +5134,9 @@ static int nv_loopback_test(struct net_device *dev)
}
}
- pci_unmap_single(np->pci_dev, test_dma_addr,
- (skb_end_pointer(tx_skb) - tx_skb->data),
- PCI_DMA_TODEVICE);
+ dma_unmap_single(&np->pci_dev->dev, test_dma_addr,
+ (skb_end_pointer(tx_skb) - tx_skb->data),
+ DMA_TO_DEVICE);
dev_kfree_skb_any(tx_skb);
out:
/* stop engines */
@@ -5627,10 +5632,9 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
u64_stats_init(&np->swstats_rx_syncp);
u64_stats_init(&np->swstats_tx_syncp);
- setup_timer(&np->oom_kick, nv_do_rx_refill, (unsigned long)dev);
- setup_timer(&np->nic_poll, nv_do_nic_poll, (unsigned long)dev);
- setup_deferrable_timer(&np->stats_poll, nv_do_stats_poll,
- (unsigned long)dev);
+ timer_setup(&np->oom_kick, nv_do_rx_refill, 0);
+ timer_setup(&np->nic_poll, nv_do_nic_poll, 0);
+ timer_setup(&np->stats_poll, nv_do_stats_poll, TIMER_DEFERRABLE);
err = pci_enable_device(pci_dev);
if (err)
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
index 8d710a3b4db0..697e29dd4bd3 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe.h
@@ -613,7 +613,6 @@ struct pch_gbe_privdata {
* @rx_ring: Pointer of Rx descriptor ring structure
* @rx_buffer_len: Receive buffer length
* @tx_queue_len: Transmit queue length
- * @have_msi: PCI MSI mode flag
* @pch_gbe_privdata: PCI Device ID driver_data
*/
@@ -623,6 +622,7 @@ struct pch_gbe_adapter {
atomic_t irq_sem;
struct net_device *netdev;
struct pci_dev *pdev;
+ int irq;
struct net_device *polling_netdev;
struct napi_struct napi;
struct pch_gbe_hw hw;
@@ -637,7 +637,6 @@ struct pch_gbe_adapter {
struct pch_gbe_rx_ring *rx_ring;
unsigned long rx_buffer_len;
unsigned long tx_queue_len;
- bool have_msi;
bool rx_stop_flag;
int hwts_tx_en;
int hwts_rx_en;
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 5ae9681a2da7..457ee80307ea 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -781,11 +781,8 @@ static void pch_gbe_free_irq(struct pch_gbe_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
- free_irq(adapter->pdev->irq, netdev);
- if (adapter->have_msi) {
- pci_disable_msi(adapter->pdev);
- netdev_dbg(netdev, "call pci_disable_msi\n");
- }
+ free_irq(adapter->irq, netdev);
+ pci_free_irq_vectors(adapter->pdev);
}
/**
@@ -799,7 +796,7 @@ static void pch_gbe_irq_disable(struct pch_gbe_adapter *adapter)
atomic_inc(&adapter->irq_sem);
iowrite32(0, &hw->reg->INT_EN);
ioread32(&hw->reg->INT_ST);
- synchronize_irq(adapter->pdev->irq);
+ synchronize_irq(adapter->irq);
netdev_dbg(adapter->netdev, "INT_EN reg : 0x%08x\n",
ioread32(&hw->reg->INT_EN));
@@ -1903,30 +1900,23 @@ static int pch_gbe_request_irq(struct pch_gbe_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
int err;
- int flags;
- flags = IRQF_SHARED;
- adapter->have_msi = false;
- err = pci_enable_msi(adapter->pdev);
- netdev_dbg(netdev, "call pci_enable_msi\n");
- if (err) {
- netdev_dbg(netdev, "call pci_enable_msi - Error: %d\n", err);
- } else {
- flags = 0;
- adapter->have_msi = true;
- }
- err = request_irq(adapter->pdev->irq, &pch_gbe_intr,
- flags, netdev->name, netdev);
+ err = pci_alloc_irq_vectors(adapter->pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+ if (err < 0)
+ return err;
+
+ adapter->irq = pci_irq_vector(adapter->pdev, 0);
+
+ err = request_irq(adapter->irq, &pch_gbe_intr, IRQF_SHARED,
+ netdev->name, netdev);
if (err)
netdev_err(netdev, "Unable to allocate interrupt Error: %d\n",
err);
- netdev_dbg(netdev,
- "adapter->have_msi : %d flags : 0x%04x return : 0x%04x\n",
- adapter->have_msi, flags, err);
+ netdev_dbg(netdev, "have_msi : %d return : 0x%04x\n",
+ pci_dev_msi_enabled(adapter->pdev), err);
return err;
}
-
/**
* pch_gbe_up - Up GbE network device
* @adapter: Board private structure
@@ -2399,9 +2389,9 @@ static void pch_gbe_netpoll(struct net_device *netdev)
{
struct pch_gbe_adapter *adapter = netdev_priv(netdev);
- disable_irq(adapter->pdev->irq);
- pch_gbe_intr(adapter->pdev->irq, netdev);
- enable_irq(adapter->pdev->irq);
+ disable_irq(adapter->irq);
+ pch_gbe_intr(adapter->irq, netdev);
+ enable_irq(adapter->irq);
}
#endif
diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c
index 482b85e4d665..77bc7cca8980 100644
--- a/drivers/net/ethernet/packetengines/hamachi.c
+++ b/drivers/net/ethernet/packetengines/hamachi.c
@@ -979,10 +979,8 @@ static int hamachi_open(struct net_device *dev)
dev->name, readw(ioaddr + RxStatus), readw(ioaddr + TxStatus));
}
/* Set the timer to check for link beat. */
- init_timer(&hmp->timer);
+ setup_timer(&hmp->timer, hamachi_timer, (unsigned long)dev);
hmp->timer.expires = RUN_AT((24*HZ)/10); /* 2.4 sec. */
- hmp->timer.data = (unsigned long)dev;
- hmp->timer.function = hamachi_timer; /* timer handler */
add_timer(&hmp->timer);
return 0;
diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c
index fa7770da6ef8..33c241f52a71 100644
--- a/drivers/net/ethernet/packetengines/yellowfin.c
+++ b/drivers/net/ethernet/packetengines/yellowfin.c
@@ -632,10 +632,8 @@ static int yellowfin_open(struct net_device *dev)
}
/* Set the timer to check for link beat. */
- init_timer(&yp->timer);
+ setup_timer(&yp->timer, yellowfin_timer, (unsigned long)dev);
yp->timer.expires = jiffies + 3*HZ;
- yp->timer.data = (unsigned long)dev;
- yp->timer.function = yellowfin_timer; /* timer handler */
add_timer(&yp->timer);
out:
return rc;
diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index c2e24afbaeb2..26ddf092e3ec 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -117,4 +117,7 @@ config QED_ISCSI
config QED_FCOE
bool
+config QED_OOO
+ bool
+
endif # NET_VENDOR_QLOGIC
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index 82dd47068e18..c3c599950574 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -6,5 +6,6 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
qed-$(CONFIG_QED_LL2) += qed_ll2.o
qed-$(CONFIG_QED_RDMA) += qed_roce.o qed_rdma.o qed_iwarp.o
-qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o qed_ooo.o
+qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o
qed-$(CONFIG_QED_FCOE) += qed_fcoe.o
+qed-$(CONFIG_QED_OOO) += qed_ooo.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index af106be8cc08..afd07ad91631 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -2069,6 +2069,12 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn,
num_srqs = min_t(u32, 32 * 1024, p_params->num_srqs);
+ if (p_hwfn->mcp_info->func_info.protocol == QED_PCI_ETH_RDMA) {
+ DP_NOTICE(p_hwfn,
+ "Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only\n");
+ p_hwfn->hw_info.personality = QED_PCI_ETH_ROCE;
+ }
+
switch (p_hwfn->hw_info.personality) {
case QED_PCI_ETH_IWARP:
/* Each QP requires one connection */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
index 8f6ccc0c39e5..6e15d3c10ebf 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c
@@ -2308,7 +2308,7 @@ static int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app)
DP_VERBOSE(hwfn, QED_MSG_DCB, "selector = %d protocol = %d pri = %d\n",
app->selector, app->protocol, app->priority);
- if (app->priority < 0 || app->priority >= QED_MAX_PFC_PRIORITIES) {
+ if (app->priority >= QED_MAX_PFC_PRIORITIES) {
DP_INFO(hwfn, "Invalid priority %d\n", app->priority);
return -EINVAL;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index 9d989c96278c..409041eab189 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -41,6 +41,7 @@
#include "qed_rdma.h"
#include "qed_reg_addr.h"
#include "qed_sp.h"
+#include "qed_ooo.h"
#define QED_IWARP_ORD_DEFAULT 32
#define QED_IWARP_IRD_DEFAULT 32
@@ -119,6 +120,13 @@ static void qed_iwarp_cid_cleaned(struct qed_hwfn *p_hwfn, u32 cid)
spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
}
+void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn,
+ struct iwarp_init_func_params *p_ramrod)
+{
+ p_ramrod->ll2_ooo_q_index = RESC_START(p_hwfn, QED_LL2_QUEUE) +
+ p_hwfn->p_rdma_info->iwarp.ll2_ooo_handle;
+}
+
static int qed_iwarp_alloc_cid(struct qed_hwfn *p_hwfn, u32 *cid)
{
int rc;
@@ -1402,12 +1410,22 @@ int qed_iwarp_alloc(struct qed_hwfn *p_hwfn)
INIT_LIST_HEAD(&p_hwfn->p_rdma_info->iwarp.ep_free_list);
spin_lock_init(&p_hwfn->p_rdma_info->iwarp.iw_lock);
- return qed_iwarp_prealloc_ep(p_hwfn, true);
+ rc = qed_iwarp_prealloc_ep(p_hwfn, true);
+ if (rc)
+ return rc;
+
+ return qed_ooo_alloc(p_hwfn);
}
void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn)
{
+ struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+
+ qed_ooo_free(p_hwfn);
qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tcp_cid_map, 1);
+ kfree(iwarp_info->mpa_bufs);
+ kfree(iwarp_info->partial_fpdus);
+ kfree(iwarp_info->mpa_intermediate_buf);
}
int qed_iwarp_accept(void *rdma_cxt, struct qed_iwarp_accept_in *iparams)
@@ -1705,6 +1723,569 @@ qed_iwarp_parse_rx_pkt(struct qed_hwfn *p_hwfn,
return 0;
}
+static struct qed_iwarp_fpdu *qed_iwarp_get_curr_fpdu(struct qed_hwfn *p_hwfn,
+ u16 cid)
+{
+ struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+ struct qed_iwarp_fpdu *partial_fpdu;
+ u32 idx;
+
+ idx = cid - qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_IWARP);
+ if (idx >= iwarp_info->max_num_partial_fpdus) {
+ DP_ERR(p_hwfn, "Invalid cid %x max_num_partial_fpdus=%x\n", cid,
+ iwarp_info->max_num_partial_fpdus);
+ return NULL;
+ }
+
+ partial_fpdu = &iwarp_info->partial_fpdus[idx];
+
+ return partial_fpdu;
+}
+
+enum qed_iwarp_mpa_pkt_type {
+ QED_IWARP_MPA_PKT_PACKED,
+ QED_IWARP_MPA_PKT_PARTIAL,
+ QED_IWARP_MPA_PKT_UNALIGNED
+};
+
+#define QED_IWARP_INVALID_FPDU_LENGTH 0xffff
+#define QED_IWARP_MPA_FPDU_LENGTH_SIZE (2)
+#define QED_IWARP_MPA_CRC32_DIGEST_SIZE (4)
+
+/* Pad to multiple of 4 */
+#define QED_IWARP_PDU_DATA_LEN_WITH_PAD(data_len) ALIGN(data_len, 4)
+#define QED_IWARP_FPDU_LEN_WITH_PAD(_mpa_len) \
+ (QED_IWARP_PDU_DATA_LEN_WITH_PAD((_mpa_len) + \
+ QED_IWARP_MPA_FPDU_LENGTH_SIZE) + \
+ QED_IWARP_MPA_CRC32_DIGEST_SIZE)
+
+/* fpdu can be fragmented over maximum 3 bds: header, partial mpa, unaligned */
+#define QED_IWARP_MAX_BDS_PER_FPDU 3
+
+char *pkt_type_str[] = {
+ "QED_IWARP_MPA_PKT_PACKED",
+ "QED_IWARP_MPA_PKT_PARTIAL",
+ "QED_IWARP_MPA_PKT_UNALIGNED"
+};
+
+static int
+qed_iwarp_recycle_pkt(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_fpdu *fpdu,
+ struct qed_iwarp_ll2_buff *buf);
+
+static enum qed_iwarp_mpa_pkt_type
+qed_iwarp_mpa_classify(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_fpdu *fpdu,
+ u16 tcp_payload_len, u8 *mpa_data)
+{
+ enum qed_iwarp_mpa_pkt_type pkt_type;
+ u16 mpa_len;
+
+ if (fpdu->incomplete_bytes) {
+ pkt_type = QED_IWARP_MPA_PKT_UNALIGNED;
+ goto out;
+ }
+
+ /* special case of one byte remaining...
+ * lower byte will be read next packet
+ */
+ if (tcp_payload_len == 1) {
+ fpdu->fpdu_length = *mpa_data << BITS_PER_BYTE;
+ pkt_type = QED_IWARP_MPA_PKT_PARTIAL;
+ goto out;
+ }
+
+ mpa_len = ntohs(*((u16 *)(mpa_data)));
+ fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len);
+
+ if (fpdu->fpdu_length <= tcp_payload_len)
+ pkt_type = QED_IWARP_MPA_PKT_PACKED;
+ else
+ pkt_type = QED_IWARP_MPA_PKT_PARTIAL;
+
+out:
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "MPA_ALIGN: %s: fpdu_length=0x%x tcp_payload_len:0x%x\n",
+ pkt_type_str[pkt_type], fpdu->fpdu_length, tcp_payload_len);
+
+ return pkt_type;
+}
+
+static void
+qed_iwarp_init_fpdu(struct qed_iwarp_ll2_buff *buf,
+ struct qed_iwarp_fpdu *fpdu,
+ struct unaligned_opaque_data *pkt_data,
+ u16 tcp_payload_size, u8 placement_offset)
+{
+ fpdu->mpa_buf = buf;
+ fpdu->pkt_hdr = buf->data_phys_addr + placement_offset;
+ fpdu->pkt_hdr_size = pkt_data->tcp_payload_offset;
+ fpdu->mpa_frag = buf->data_phys_addr + pkt_data->first_mpa_offset;
+ fpdu->mpa_frag_virt = (u8 *)(buf->data) + pkt_data->first_mpa_offset;
+
+ if (tcp_payload_size == 1)
+ fpdu->incomplete_bytes = QED_IWARP_INVALID_FPDU_LENGTH;
+ else if (tcp_payload_size < fpdu->fpdu_length)
+ fpdu->incomplete_bytes = fpdu->fpdu_length - tcp_payload_size;
+ else
+ fpdu->incomplete_bytes = 0; /* complete fpdu */
+
+ fpdu->mpa_frag_len = fpdu->fpdu_length - fpdu->incomplete_bytes;
+}
+
+static int
+qed_iwarp_cp_pkt(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_fpdu *fpdu,
+ struct unaligned_opaque_data *pkt_data,
+ struct qed_iwarp_ll2_buff *buf, u16 tcp_payload_size)
+{
+ u8 *tmp_buf = p_hwfn->p_rdma_info->iwarp.mpa_intermediate_buf;
+ int rc;
+
+ /* need to copy the data from the partial packet stored in fpdu
+ * to the new buf, for this we also need to move the data currently
+ * placed on the buf. The assumption is that the buffer is big enough
+ * since fpdu_length <= mss, we use an intermediate buffer since
+ * we may need to copy the new data to an overlapping location
+ */
+ if ((fpdu->mpa_frag_len + tcp_payload_size) > (u16)buf->buff_size) {
+ DP_ERR(p_hwfn,
+ "MPA ALIGN: Unexpected: buffer is not large enough for split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n",
+ buf->buff_size, fpdu->mpa_frag_len,
+ tcp_payload_size, fpdu->incomplete_bytes);
+ return -EINVAL;
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "MPA ALIGN Copying fpdu: [%p, %d] [%p, %d]\n",
+ fpdu->mpa_frag_virt, fpdu->mpa_frag_len,
+ (u8 *)(buf->data) + pkt_data->first_mpa_offset,
+ tcp_payload_size);
+
+ memcpy(tmp_buf, fpdu->mpa_frag_virt, fpdu->mpa_frag_len);
+ memcpy(tmp_buf + fpdu->mpa_frag_len,
+ (u8 *)(buf->data) + pkt_data->first_mpa_offset,
+ tcp_payload_size);
+
+ rc = qed_iwarp_recycle_pkt(p_hwfn, fpdu, fpdu->mpa_buf);
+ if (rc)
+ return rc;
+
+ /* If we managed to post the buffer copy the data to the new buffer
+ * o/w this will occur in the next round...
+ */
+ memcpy((u8 *)(buf->data), tmp_buf,
+ fpdu->mpa_frag_len + tcp_payload_size);
+
+ fpdu->mpa_buf = buf;
+ /* fpdu->pkt_hdr remains as is */
+ /* fpdu->mpa_frag is overridden with new buf */
+ fpdu->mpa_frag = buf->data_phys_addr;
+ fpdu->mpa_frag_virt = buf->data;
+ fpdu->mpa_frag_len += tcp_payload_size;
+
+ fpdu->incomplete_bytes -= tcp_payload_size;
+
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "MPA ALIGN: split fpdu buff_size = %d mpa_frag_len = %d, tcp_payload_size = %d, incomplete_bytes = %d\n",
+ buf->buff_size, fpdu->mpa_frag_len, tcp_payload_size,
+ fpdu->incomplete_bytes);
+
+ return 0;
+}
+
+static void
+qed_iwarp_update_fpdu_length(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_fpdu *fpdu, u8 *mpa_data)
+{
+ u16 mpa_len;
+
+ /* Update incomplete packets if needed */
+ if (fpdu->incomplete_bytes == QED_IWARP_INVALID_FPDU_LENGTH) {
+ /* Missing lower byte is now available */
+ mpa_len = fpdu->fpdu_length | *mpa_data;
+ fpdu->fpdu_length = QED_IWARP_FPDU_LEN_WITH_PAD(mpa_len);
+ fpdu->mpa_frag_len = fpdu->fpdu_length;
+ /* one byte of hdr */
+ fpdu->incomplete_bytes = fpdu->fpdu_length - 1;
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "MPA_ALIGN: Partial header mpa_len=%x fpdu_length=%x incomplete_bytes=%x\n",
+ mpa_len, fpdu->fpdu_length, fpdu->incomplete_bytes);
+ }
+}
+
+#define QED_IWARP_IS_RIGHT_EDGE(_curr_pkt) \
+ (GET_FIELD((_curr_pkt)->flags, \
+ UNALIGNED_OPAQUE_DATA_PKT_REACHED_WIN_RIGHT_EDGE))
+
+/* This function is used to recycle a buffer using the ll2 drop option. It
+ * uses the mechanism to ensure that all buffers posted to tx before this one
+ * were completed. The buffer sent here will be sent as a cookie in the tx
+ * completion function and can then be reposted to rx chain when done. The flow
+ * that requires this is the flow where a FPDU splits over more than 3 tcp
+ * segments. In this case the driver needs to re-post a rx buffer instead of
+ * the one received, but driver can't simply repost a buffer it copied from
+ * as there is a case where the buffer was originally a packed FPDU, and is
+ * partially posted to FW. Driver needs to ensure FW is done with it.
+ */
+static int
+qed_iwarp_recycle_pkt(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_fpdu *fpdu,
+ struct qed_iwarp_ll2_buff *buf)
+{
+ struct qed_ll2_tx_pkt_info tx_pkt;
+ u8 ll2_handle;
+ int rc;
+
+ memset(&tx_pkt, 0, sizeof(tx_pkt));
+ tx_pkt.num_of_bds = 1;
+ tx_pkt.tx_dest = QED_LL2_TX_DEST_DROP;
+ tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2;
+ tx_pkt.first_frag = fpdu->pkt_hdr;
+ tx_pkt.first_frag_len = fpdu->pkt_hdr_size;
+ buf->piggy_buf = NULL;
+ tx_pkt.cookie = buf;
+
+ ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle;
+
+ rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true);
+ if (rc)
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Can't drop packet rc=%d\n", rc);
+
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "MPA_ALIGN: send drop tx packet [%lx, 0x%x], buf=%p, rc=%d\n",
+ (unsigned long int)tx_pkt.first_frag,
+ tx_pkt.first_frag_len, buf, rc);
+
+ return rc;
+}
+
+static int
+qed_iwarp_win_right_edge(struct qed_hwfn *p_hwfn, struct qed_iwarp_fpdu *fpdu)
+{
+ struct qed_ll2_tx_pkt_info tx_pkt;
+ u8 ll2_handle;
+ int rc;
+
+ memset(&tx_pkt, 0, sizeof(tx_pkt));
+ tx_pkt.num_of_bds = 1;
+ tx_pkt.tx_dest = QED_LL2_TX_DEST_LB;
+ tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2;
+
+ tx_pkt.first_frag = fpdu->pkt_hdr;
+ tx_pkt.first_frag_len = fpdu->pkt_hdr_size;
+ tx_pkt.enable_ip_cksum = true;
+ tx_pkt.enable_l4_cksum = true;
+ tx_pkt.calc_ip_len = true;
+ /* vlan overload with enum iwarp_ll2_tx_queues */
+ tx_pkt.vlan = IWARP_LL2_ALIGNED_RIGHT_TRIMMED_TX_QUEUE;
+
+ ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle;
+
+ rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true);
+ if (rc)
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Can't send right edge rc=%d\n", rc);
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "MPA_ALIGN: Sent right edge FPDU num_bds=%d [%lx, 0x%x], rc=%d\n",
+ tx_pkt.num_of_bds,
+ (unsigned long int)tx_pkt.first_frag,
+ tx_pkt.first_frag_len, rc);
+
+ return rc;
+}
+
+static int
+qed_iwarp_send_fpdu(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_fpdu *fpdu,
+ struct unaligned_opaque_data *curr_pkt,
+ struct qed_iwarp_ll2_buff *buf,
+ u16 tcp_payload_size, enum qed_iwarp_mpa_pkt_type pkt_type)
+{
+ struct qed_ll2_tx_pkt_info tx_pkt;
+ u8 ll2_handle;
+ int rc;
+
+ memset(&tx_pkt, 0, sizeof(tx_pkt));
+
+ /* An unaligned packet means it's split over two tcp segments. So the
+ * complete packet requires 3 bds, one for the header, one for the
+ * part of the fpdu of the first tcp segment, and the last fragment
+ * will point to the remainder of the fpdu. A packed pdu, requires only
+ * two bds, one for the header and one for the data.
+ */
+ tx_pkt.num_of_bds = (pkt_type == QED_IWARP_MPA_PKT_UNALIGNED) ? 3 : 2;
+ tx_pkt.tx_dest = QED_LL2_TX_DEST_LB;
+ tx_pkt.l4_hdr_offset_w = fpdu->pkt_hdr_size >> 2; /* offset in words */
+
+ /* Send the mpa_buf only with the last fpdu (in case of packed) */
+ if (pkt_type == QED_IWARP_MPA_PKT_UNALIGNED ||
+ tcp_payload_size <= fpdu->fpdu_length)
+ tx_pkt.cookie = fpdu->mpa_buf;
+
+ tx_pkt.first_frag = fpdu->pkt_hdr;
+ tx_pkt.first_frag_len = fpdu->pkt_hdr_size;
+ tx_pkt.enable_ip_cksum = true;
+ tx_pkt.enable_l4_cksum = true;
+ tx_pkt.calc_ip_len = true;
+ /* vlan overload with enum iwarp_ll2_tx_queues */
+ tx_pkt.vlan = IWARP_LL2_ALIGNED_TX_QUEUE;
+
+ /* special case of unaligned packet and not packed, need to send
+ * both buffers as cookie to release.
+ */
+ if (tcp_payload_size == fpdu->incomplete_bytes)
+ fpdu->mpa_buf->piggy_buf = buf;
+
+ ll2_handle = p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle;
+
+ /* Set first fragment to header */
+ rc = qed_ll2_prepare_tx_packet(p_hwfn, ll2_handle, &tx_pkt, true);
+ if (rc)
+ goto out;
+
+ /* Set second fragment to first part of packet */
+ rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn, ll2_handle,
+ fpdu->mpa_frag,
+ fpdu->mpa_frag_len);
+ if (rc)
+ goto out;
+
+ if (!fpdu->incomplete_bytes)
+ goto out;
+
+ /* Set third fragment to second part of the packet */
+ rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn,
+ ll2_handle,
+ buf->data_phys_addr +
+ curr_pkt->first_mpa_offset,
+ fpdu->incomplete_bytes);
+out:
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "MPA_ALIGN: Sent FPDU num_bds=%d first_frag_len=%x, mpa_frag_len=0x%x, incomplete_bytes:0x%x rc=%d\n",
+ tx_pkt.num_of_bds,
+ tx_pkt.first_frag_len,
+ fpdu->mpa_frag_len,
+ fpdu->incomplete_bytes, rc);
+
+ return rc;
+}
+
+static void
+qed_iwarp_mpa_get_data(struct qed_hwfn *p_hwfn,
+ struct unaligned_opaque_data *curr_pkt,
+ u32 opaque_data0, u32 opaque_data1)
+{
+ u64 opaque_data;
+
+ opaque_data = HILO_64(opaque_data1, opaque_data0);
+ *curr_pkt = *((struct unaligned_opaque_data *)&opaque_data);
+
+ curr_pkt->first_mpa_offset = curr_pkt->tcp_payload_offset +
+ le16_to_cpu(curr_pkt->first_mpa_offset);
+ curr_pkt->cid = le32_to_cpu(curr_pkt->cid);
+}
+
+/* This function is called when an unaligned or incomplete MPA packet arrives
+ * driver needs to align the packet, perhaps using previous data and send
+ * it down to FW once it is aligned.
+ */
+static int
+qed_iwarp_process_mpa_pkt(struct qed_hwfn *p_hwfn,
+ struct qed_iwarp_ll2_mpa_buf *mpa_buf)
+{
+ struct unaligned_opaque_data *curr_pkt = &mpa_buf->data;
+ struct qed_iwarp_ll2_buff *buf = mpa_buf->ll2_buf;
+ enum qed_iwarp_mpa_pkt_type pkt_type;
+ struct qed_iwarp_fpdu *fpdu;
+ int rc = -EINVAL;
+ u8 *mpa_data;
+
+ fpdu = qed_iwarp_get_curr_fpdu(p_hwfn, curr_pkt->cid & 0xffff);
+ if (!fpdu) { /* something corrupt with cid, post rx back */
+ DP_ERR(p_hwfn, "Invalid cid, drop and post back to rx cid=%x\n",
+ curr_pkt->cid);
+ goto err;
+ }
+
+ do {
+ mpa_data = ((u8 *)(buf->data) + curr_pkt->first_mpa_offset);
+
+ pkt_type = qed_iwarp_mpa_classify(p_hwfn, fpdu,
+ mpa_buf->tcp_payload_len,
+ mpa_data);
+
+ switch (pkt_type) {
+ case QED_IWARP_MPA_PKT_PARTIAL:
+ qed_iwarp_init_fpdu(buf, fpdu,
+ curr_pkt,
+ mpa_buf->tcp_payload_len,
+ mpa_buf->placement_offset);
+
+ if (!QED_IWARP_IS_RIGHT_EDGE(curr_pkt)) {
+ mpa_buf->tcp_payload_len = 0;
+ break;
+ }
+
+ rc = qed_iwarp_win_right_edge(p_hwfn, fpdu);
+
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Can't send FPDU:reset rc=%d\n", rc);
+ memset(fpdu, 0, sizeof(*fpdu));
+ break;
+ }
+
+ mpa_buf->tcp_payload_len = 0;
+ break;
+ case QED_IWARP_MPA_PKT_PACKED:
+ qed_iwarp_init_fpdu(buf, fpdu,
+ curr_pkt,
+ mpa_buf->tcp_payload_len,
+ mpa_buf->placement_offset);
+
+ rc = qed_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf,
+ mpa_buf->tcp_payload_len,
+ pkt_type);
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Can't send FPDU:reset rc=%d\n", rc);
+ memset(fpdu, 0, sizeof(*fpdu));
+ break;
+ }
+
+ mpa_buf->tcp_payload_len -= fpdu->fpdu_length;
+ curr_pkt->first_mpa_offset += fpdu->fpdu_length;
+ break;
+ case QED_IWARP_MPA_PKT_UNALIGNED:
+ qed_iwarp_update_fpdu_length(p_hwfn, fpdu, mpa_data);
+ if (mpa_buf->tcp_payload_len < fpdu->incomplete_bytes) {
+ /* special handling of fpdu split over more
+ * than 2 segments
+ */
+ if (QED_IWARP_IS_RIGHT_EDGE(curr_pkt)) {
+ rc = qed_iwarp_win_right_edge(p_hwfn,
+ fpdu);
+ /* packet will be re-processed later */
+ if (rc)
+ return rc;
+ }
+
+ rc = qed_iwarp_cp_pkt(p_hwfn, fpdu, curr_pkt,
+ buf,
+ mpa_buf->tcp_payload_len);
+ if (rc) /* packet will be re-processed later */
+ return rc;
+
+ mpa_buf->tcp_payload_len = 0;
+ break;
+ }
+
+ rc = qed_iwarp_send_fpdu(p_hwfn, fpdu, curr_pkt, buf,
+ mpa_buf->tcp_payload_len,
+ pkt_type);
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Can't send FPDU:delay rc=%d\n", rc);
+ /* don't reset fpdu -> we need it for next
+ * classify
+ */
+ break;
+ }
+
+ mpa_buf->tcp_payload_len -= fpdu->incomplete_bytes;
+ curr_pkt->first_mpa_offset += fpdu->incomplete_bytes;
+ /* The framed PDU was sent - no more incomplete bytes */
+ fpdu->incomplete_bytes = 0;
+ break;
+ }
+ } while (mpa_buf->tcp_payload_len && !rc);
+
+ return rc;
+
+err:
+ qed_iwarp_ll2_post_rx(p_hwfn,
+ buf,
+ p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle);
+ return rc;
+}
+
+static void qed_iwarp_process_pending_pkts(struct qed_hwfn *p_hwfn)
+{
+ struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+ struct qed_iwarp_ll2_mpa_buf *mpa_buf = NULL;
+ int rc;
+
+ while (!list_empty(&iwarp_info->mpa_buf_pending_list)) {
+ mpa_buf = list_first_entry(&iwarp_info->mpa_buf_pending_list,
+ struct qed_iwarp_ll2_mpa_buf,
+ list_entry);
+
+ rc = qed_iwarp_process_mpa_pkt(p_hwfn, mpa_buf);
+
+ /* busy means break and continue processing later, don't
+ * remove the buf from the pending list.
+ */
+ if (rc == -EBUSY)
+ break;
+
+ list_del(&mpa_buf->list_entry);
+ list_add_tail(&mpa_buf->list_entry, &iwarp_info->mpa_buf_list);
+
+ if (rc) { /* different error, don't continue */
+ DP_NOTICE(p_hwfn, "process pkts failed rc=%d\n", rc);
+ break;
+ }
+ }
+}
+
+static void
+qed_iwarp_ll2_comp_mpa_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
+{
+ struct qed_iwarp_ll2_mpa_buf *mpa_buf;
+ struct qed_iwarp_info *iwarp_info;
+ struct qed_hwfn *p_hwfn = cxt;
+
+ iwarp_info = &p_hwfn->p_rdma_info->iwarp;
+ mpa_buf = list_first_entry(&iwarp_info->mpa_buf_list,
+ struct qed_iwarp_ll2_mpa_buf, list_entry);
+ if (!mpa_buf) {
+ DP_ERR(p_hwfn, "No free mpa buf\n");
+ goto err;
+ }
+
+ list_del(&mpa_buf->list_entry);
+ qed_iwarp_mpa_get_data(p_hwfn, &mpa_buf->data,
+ data->opaque_data_0, data->opaque_data_1);
+
+ DP_VERBOSE(p_hwfn,
+ QED_MSG_RDMA,
+ "LL2 MPA CompRx payload_len:0x%x\tfirst_mpa_offset:0x%x\ttcp_payload_offset:0x%x\tflags:0x%x\tcid:0x%x\n",
+ data->length.packet_length, mpa_buf->data.first_mpa_offset,
+ mpa_buf->data.tcp_payload_offset, mpa_buf->data.flags,
+ mpa_buf->data.cid);
+
+ mpa_buf->ll2_buf = data->cookie;
+ mpa_buf->tcp_payload_len = data->length.packet_length -
+ mpa_buf->data.first_mpa_offset;
+ mpa_buf->data.first_mpa_offset += data->u.placement_offset;
+ mpa_buf->placement_offset = data->u.placement_offset;
+
+ list_add_tail(&mpa_buf->list_entry, &iwarp_info->mpa_buf_pending_list);
+
+ qed_iwarp_process_pending_pkts(p_hwfn);
+ return;
+err:
+ qed_iwarp_ll2_post_rx(p_hwfn, data->cookie,
+ iwarp_info->ll2_mpa_handle);
+}
+
static void
qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
{
@@ -1725,6 +2306,14 @@ qed_iwarp_ll2_comp_syn_pkt(void *cxt, struct qed_ll2_comp_rx_data *data)
memset(&cm_info, 0, sizeof(cm_info));
ll2_syn_handle = p_hwfn->p_rdma_info->iwarp.ll2_syn_handle;
+
+ /* Check if packet was received with errors... */
+ if (data->err_flags) {
+ DP_NOTICE(p_hwfn, "Error received on SYN packet: 0x%x\n",
+ data->err_flags);
+ goto err;
+ }
+
if (GET_FIELD(data->parse_flags,
PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED) &&
GET_FIELD(data->parse_flags, PARSING_AND_ERR_FLAGS_L4CHKSMERROR)) {
@@ -1839,10 +2428,25 @@ static void qed_iwarp_ll2_comp_tx_pkt(void *cxt, u8 connection_handle,
bool b_last_fragment, bool b_last_packet)
{
struct qed_iwarp_ll2_buff *buffer = cookie;
+ struct qed_iwarp_ll2_buff *piggy;
struct qed_hwfn *p_hwfn = cxt;
+ if (!buffer) /* can happen in packed mpa unaligned... */
+ return;
+
/* this was originally an rx packet, post it back */
+ piggy = buffer->piggy_buf;
+ if (piggy) {
+ buffer->piggy_buf = NULL;
+ qed_iwarp_ll2_post_rx(p_hwfn, piggy, connection_handle);
+ }
+
qed_iwarp_ll2_post_rx(p_hwfn, buffer, connection_handle);
+
+ if (connection_handle == p_hwfn->p_rdma_info->iwarp.ll2_mpa_handle)
+ qed_iwarp_process_pending_pkts(p_hwfn);
+
+ return;
}
static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle,
@@ -1855,12 +2459,44 @@ static void qed_iwarp_ll2_rel_tx_pkt(void *cxt, u8 connection_handle,
if (!buffer)
return;
+ if (buffer->piggy_buf) {
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ buffer->piggy_buf->buff_size,
+ buffer->piggy_buf->data,
+ buffer->piggy_buf->data_phys_addr);
+
+ kfree(buffer->piggy_buf);
+ }
+
dma_free_coherent(&p_hwfn->cdev->pdev->dev, buffer->buff_size,
buffer->data, buffer->data_phys_addr);
kfree(buffer);
}
+/* The only slowpath for iwarp ll2 is unalign flush. When this completion
+ * is received, need to reset the FPDU.
+ */
+void
+qed_iwarp_ll2_slowpath(void *cxt,
+ u8 connection_handle,
+ u32 opaque_data_0, u32 opaque_data_1)
+{
+ struct unaligned_opaque_data unalign_data;
+ struct qed_hwfn *p_hwfn = cxt;
+ struct qed_iwarp_fpdu *fpdu;
+
+ qed_iwarp_mpa_get_data(p_hwfn, &unalign_data,
+ opaque_data_0, opaque_data_1);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "(0x%x) Flush fpdu\n",
+ unalign_data.cid);
+
+ fpdu = qed_iwarp_get_curr_fpdu(p_hwfn, (u16)unalign_data.cid);
+ if (fpdu)
+ memset(fpdu, 0, sizeof(*fpdu));
+}
+
static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
@@ -1876,6 +2512,26 @@ static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL;
}
+ if (iwarp_info->ll2_ooo_handle != QED_IWARP_HANDLE_INVAL) {
+ rc = qed_ll2_terminate_connection(p_hwfn,
+ iwarp_info->ll2_ooo_handle);
+ if (rc)
+ DP_INFO(p_hwfn, "Failed to terminate ooo connection\n");
+
+ qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_ooo_handle);
+ iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL;
+ }
+
+ if (iwarp_info->ll2_mpa_handle != QED_IWARP_HANDLE_INVAL) {
+ rc = qed_ll2_terminate_connection(p_hwfn,
+ iwarp_info->ll2_mpa_handle);
+ if (rc)
+ DP_INFO(p_hwfn, "Failed to terminate mpa connection\n");
+
+ qed_ll2_release_connection(p_hwfn, iwarp_info->ll2_mpa_handle);
+ iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL;
+ }
+
qed_llh_remove_mac_filter(p_hwfn,
p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr);
return rc;
@@ -1927,10 +2583,15 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
struct qed_iwarp_info *iwarp_info;
struct qed_ll2_acquire_data data;
struct qed_ll2_cbs cbs;
+ u32 mpa_buff_size;
+ u16 n_ooo_bufs;
int rc = 0;
+ int i;
iwarp_info = &p_hwfn->p_rdma_info->iwarp;
iwarp_info->ll2_syn_handle = QED_IWARP_HANDLE_INVAL;
+ iwarp_info->ll2_ooo_handle = QED_IWARP_HANDLE_INVAL;
+ iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL;
iwarp_info->max_mtu = params->max_mtu;
@@ -1978,6 +2639,91 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
if (rc)
goto err;
+ /* Start OOO connection */
+ data.input.conn_type = QED_LL2_TYPE_OOO;
+ data.input.mtu = params->max_mtu;
+
+ n_ooo_bufs = (QED_IWARP_MAX_OOO * QED_IWARP_RCV_WND_SIZE_DEF) /
+ iwarp_info->max_mtu;
+ n_ooo_bufs = min_t(u32, n_ooo_bufs, QED_IWARP_LL2_OOO_MAX_RX_SIZE);
+
+ data.input.rx_num_desc = n_ooo_bufs;
+ data.input.rx_num_ooo_buffers = n_ooo_bufs;
+
+ data.input.tx_max_bds_per_packet = 1; /* will never be fragmented */
+ data.input.tx_num_desc = QED_IWARP_LL2_OOO_DEF_TX_SIZE;
+ data.p_connection_handle = &iwarp_info->ll2_ooo_handle;
+
+ rc = qed_ll2_acquire_connection(p_hwfn, &data);
+ if (rc)
+ goto err;
+
+ rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_ooo_handle);
+ if (rc)
+ goto err;
+
+ /* Start Unaligned MPA connection */
+ cbs.rx_comp_cb = qed_iwarp_ll2_comp_mpa_pkt;
+ cbs.slowpath_cb = qed_iwarp_ll2_slowpath;
+
+ memset(&data, 0, sizeof(data));
+ data.input.conn_type = QED_LL2_TYPE_IWARP;
+ data.input.mtu = params->max_mtu;
+ /* FW requires that once a packet arrives OOO, it must have at
+ * least 2 rx buffers available on the unaligned connection
+ * for handling the case that it is a partial fpdu.
+ */
+ data.input.rx_num_desc = n_ooo_bufs * 2;
+ data.input.tx_num_desc = data.input.rx_num_desc;
+ data.input.tx_max_bds_per_packet = QED_IWARP_MAX_BDS_PER_FPDU;
+ data.p_connection_handle = &iwarp_info->ll2_mpa_handle;
+ data.input.secondary_queue = true;
+ data.cbs = &cbs;
+
+ rc = qed_ll2_acquire_connection(p_hwfn, &data);
+ if (rc)
+ goto err;
+
+ rc = qed_ll2_establish_connection(p_hwfn, iwarp_info->ll2_mpa_handle);
+ if (rc)
+ goto err;
+
+ mpa_buff_size = QED_IWARP_MAX_BUF_SIZE(params->max_mtu);
+ rc = qed_iwarp_ll2_alloc_buffers(p_hwfn,
+ data.input.rx_num_desc,
+ mpa_buff_size,
+ iwarp_info->ll2_mpa_handle);
+ if (rc)
+ goto err;
+
+ iwarp_info->partial_fpdus = kcalloc((u16)p_hwfn->p_rdma_info->num_qps,
+ sizeof(*iwarp_info->partial_fpdus),
+ GFP_KERNEL);
+ if (!iwarp_info->partial_fpdus)
+ goto err;
+
+ iwarp_info->max_num_partial_fpdus = (u16)p_hwfn->p_rdma_info->num_qps;
+
+ iwarp_info->mpa_intermediate_buf = kzalloc(mpa_buff_size, GFP_KERNEL);
+ if (!iwarp_info->mpa_intermediate_buf)
+ goto err;
+
+ /* The mpa_bufs array serves for pending RX packets received on the
+ * mpa ll2 that don't have place on the tx ring and require later
+ * processing. We can't fail on allocation of such a struct therefore
+ * we allocate enough to take care of all rx packets
+ */
+ iwarp_info->mpa_bufs = kcalloc(data.input.rx_num_desc,
+ sizeof(*iwarp_info->mpa_bufs),
+ GFP_KERNEL);
+ if (!iwarp_info->mpa_bufs)
+ goto err;
+
+ INIT_LIST_HEAD(&iwarp_info->mpa_buf_pending_list);
+ INIT_LIST_HEAD(&iwarp_info->mpa_buf_list);
+ for (i = 0; i < data.input.rx_num_desc; i++)
+ list_add_tail(&iwarp_info->mpa_bufs[i].list_entry,
+ &iwarp_info->mpa_buf_list);
return rc;
err:
qed_iwarp_ll2_stop(p_hwfn, p_ptt);
@@ -2014,6 +2760,7 @@ int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
qed_spq_register_async_cb(p_hwfn, PROTOCOLID_IWARP,
qed_iwarp_async_event);
+ qed_ooo_setup(p_hwfn);
return qed_iwarp_ll2_start(p_hwfn, params, p_ptt);
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
index 148ef3c33a5d..c1ecd743305f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
@@ -47,18 +47,51 @@ enum qed_iwarp_qp_state qed_roce2iwarp_state(enum qed_roce_qp_state state);
#define QED_IWARP_LL2_SYN_TX_SIZE (128)
#define QED_IWARP_LL2_SYN_RX_SIZE (256)
#define QED_IWARP_MAX_SYN_PKT_SIZE (128)
-#define QED_IWARP_HANDLE_INVAL (0xff)
+
+#define QED_IWARP_LL2_OOO_DEF_TX_SIZE (256)
+#define QED_IWARP_MAX_OOO (16)
+#define QED_IWARP_LL2_OOO_MAX_RX_SIZE (16384)
+
+#define QED_IWARP_HANDLE_INVAL (0xff)
struct qed_iwarp_ll2_buff {
+ struct qed_iwarp_ll2_buff *piggy_buf;
void *data;
dma_addr_t data_phys_addr;
u32 buff_size;
};
+struct qed_iwarp_ll2_mpa_buf {
+ struct list_head list_entry;
+ struct qed_iwarp_ll2_buff *ll2_buf;
+ struct unaligned_opaque_data data;
+ u16 tcp_payload_len;
+ u8 placement_offset;
+};
+
+/* In some cases a fpdu will arrive with only one byte of the header, in this
+ * case the fpdu_length will be partial (contain only higher byte and
+ * incomplete bytes will contain the invalid value
+ */
+#define QED_IWARP_INVALID_INCOMPLETE_BYTES 0xffff
+
+struct qed_iwarp_fpdu {
+ struct qed_iwarp_ll2_buff *mpa_buf;
+ void *mpa_frag_virt;
+ dma_addr_t mpa_frag;
+ dma_addr_t pkt_hdr;
+ u16 mpa_frag_len;
+ u16 fpdu_length;
+ u16 incomplete_bytes;
+ u8 pkt_hdr_size;
+};
+
struct qed_iwarp_info {
struct list_head listen_list; /* qed_iwarp_listener */
struct list_head ep_list; /* qed_iwarp_ep */
struct list_head ep_free_list; /* pre-allocated ep's */
+ struct list_head mpa_buf_list; /* list of mpa_bufs */
+ struct list_head mpa_buf_pending_list;
spinlock_t iw_lock; /* for iwarp resources */
spinlock_t qp_lock; /* for teardown races */
u32 rcv_wnd_scale;
@@ -67,9 +100,15 @@ struct qed_iwarp_info {
u8 crc_needed;
u8 tcp_flags;
u8 ll2_syn_handle;
+ u8 ll2_ooo_handle;
+ u8 ll2_mpa_handle;
u8 peer2peer;
enum mpa_negotiation_mode mpa_rev;
enum mpa_rtr_type rtr_type;
+ struct qed_iwarp_fpdu *partial_fpdus;
+ struct qed_iwarp_ll2_mpa_buf *mpa_bufs;
+ u8 *mpa_intermediate_buf;
+ u16 max_num_partial_fpdus;
};
enum qed_iwarp_ep_state {
@@ -147,6 +186,9 @@ int qed_iwarp_alloc(struct qed_hwfn *p_hwfn);
int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
struct qed_rdma_start_in_params *params);
+void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn,
+ struct iwarp_init_func_params *p_ramrod);
+
int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index c06ad4f0758e..047f556ca62e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -413,6 +413,7 @@ static void qed_ll2_rxq_parse_reg(struct qed_hwfn *p_hwfn,
struct qed_ll2_comp_rx_data *data)
{
data->parse_flags = le16_to_cpu(p_cqe->rx_cqe_fp.parse_flags.flags);
+ data->err_flags = le16_to_cpu(p_cqe->rx_cqe_fp.err_flags.flags);
data->length.packet_length =
le16_to_cpu(p_cqe->rx_cqe_fp.packet_length);
data->vlan = le16_to_cpu(p_cqe->rx_cqe_fp.vlan);
@@ -422,6 +423,41 @@ static void qed_ll2_rxq_parse_reg(struct qed_hwfn *p_hwfn,
}
static int
+qed_ll2_handle_slowpath(struct qed_hwfn *p_hwfn,
+ struct qed_ll2_info *p_ll2_conn,
+ union core_rx_cqe_union *p_cqe,
+ unsigned long *p_lock_flags)
+{
+ struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
+ struct core_rx_slow_path_cqe *sp_cqe;
+
+ sp_cqe = &p_cqe->rx_cqe_sp;
+ if (sp_cqe->ramrod_cmd_id != CORE_RAMROD_RX_QUEUE_FLUSH) {
+ DP_NOTICE(p_hwfn,
+ "LL2 - unexpected Rx CQE slowpath ramrod_cmd_id:%d\n",
+ sp_cqe->ramrod_cmd_id);
+ return -EINVAL;
+ }
+
+ if (!p_ll2_conn->cbs.slowpath_cb) {
+ DP_NOTICE(p_hwfn,
+ "LL2 - received RX_QUEUE_FLUSH but no callback was provided\n");
+ return -EINVAL;
+ }
+
+ spin_unlock_irqrestore(&p_rx->lock, *p_lock_flags);
+
+ p_ll2_conn->cbs.slowpath_cb(p_ll2_conn->cbs.cookie,
+ p_ll2_conn->my_id,
+ le32_to_cpu(sp_cqe->opaque_data.data[0]),
+ le32_to_cpu(sp_cqe->opaque_data.data[1]));
+
+ spin_lock_irqsave(&p_rx->lock, *p_lock_flags);
+
+ return 0;
+}
+
+static int
qed_ll2_rxq_handle_completion(struct qed_hwfn *p_hwfn,
struct qed_ll2_info *p_ll2_conn,
union core_rx_cqe_union *p_cqe,
@@ -494,8 +530,8 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie)
switch (cqe->rx_cqe_sp.type) {
case CORE_RX_CQE_TYPE_SLOW_PATH:
- DP_NOTICE(p_hwfn, "LL2 - unexpected Rx CQE slowpath\n");
- rc = -EINVAL;
+ rc = qed_ll2_handle_slowpath(p_hwfn, p_ll2_conn,
+ cqe, &flags);
break;
case CORE_RX_CQE_TYPE_GSI_OFFLOAD:
case CORE_RX_CQE_TYPE_REGULAR:
@@ -893,7 +929,7 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
p_ramrod->drop_ttl0_flg = p_ll2_conn->input.rx_drop_ttl0_flg;
p_ramrod->inner_vlan_removal_en = p_ll2_conn->input.rx_vlan_removal_en;
p_ramrod->queue_id = p_ll2_conn->queue_id;
- p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_OOO) ? 0 : 1;
+ p_ramrod->main_func_queue = p_ll2_conn->main_func_queue ? 1 : 0;
if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) &&
p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE) &&
@@ -1104,6 +1140,7 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
struct qed_ll2_info *p_ll2_info)
{
struct qed_ll2_tx_packet *p_descq;
+ u32 desc_size;
u32 capacity;
int rc = 0;
@@ -1121,13 +1158,17 @@ static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn,
goto out;
capacity = qed_chain_get_capacity(&p_ll2_info->tx_queue.txq_chain);
- p_descq = kcalloc(capacity, sizeof(struct qed_ll2_tx_packet),
- GFP_KERNEL);
+ /* First element is part of the packet, rest are flexibly added */
+ desc_size = (sizeof(*p_descq) +
+ (p_ll2_info->input.tx_max_bds_per_packet - 1) *
+ sizeof(p_descq->bds_set));
+
+ p_descq = kcalloc(capacity, desc_size, GFP_KERNEL);
if (!p_descq) {
rc = -ENOMEM;
goto out;
}
- p_ll2_info->tx_queue.descq_array = p_descq;
+ p_ll2_info->tx_queue.descq_mem = p_descq;
DP_VERBOSE(p_hwfn, QED_MSG_LL2,
"Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n",
@@ -1208,6 +1249,7 @@ qed_ll2_set_cbs(struct qed_ll2_info *p_ll2_info, const struct qed_ll2_cbs *cbs)
p_ll2_info->cbs.rx_release_cb = cbs->rx_release_cb;
p_ll2_info->cbs.tx_comp_cb = cbs->tx_comp_cb;
p_ll2_info->cbs.tx_release_cb = cbs->tx_release_cb;
+ p_ll2_info->cbs.slowpath_cb = cbs->slowpath_cb;
p_ll2_info->cbs.cookie = cbs->cookie;
return 0;
@@ -1259,6 +1301,11 @@ int qed_ll2_acquire_connection(void *cxt, struct qed_ll2_acquire_data *data)
p_ll2_info->tx_dest = (data->input.tx_dest == QED_LL2_TX_DEST_NW) ?
CORE_TX_DEST_NW : CORE_TX_DEST_LB;
+ if (data->input.conn_type == QED_LL2_TYPE_OOO ||
+ data->input.secondary_queue)
+ p_ll2_info->main_func_queue = false;
+ else
+ p_ll2_info->main_func_queue = true;
/* Correct maximum number of Tx BDs */
p_tx_max = &p_ll2_info->input.tx_max_bds_per_packet;
@@ -1358,11 +1405,13 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
{
struct qed_hwfn *p_hwfn = cxt;
struct qed_ll2_info *p_ll2_conn;
+ struct qed_ll2_tx_packet *p_pkt;
struct qed_ll2_rx_queue *p_rx;
struct qed_ll2_tx_queue *p_tx;
struct qed_ptt *p_ptt;
int rc = -EINVAL;
u32 i, capacity;
+ u32 desc_size;
u8 qid;
p_ptt = qed_ptt_acquire(p_hwfn);
@@ -1396,9 +1445,15 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
INIT_LIST_HEAD(&p_tx->sending_descq);
spin_lock_init(&p_tx->lock);
capacity = qed_chain_get_capacity(&p_tx->txq_chain);
- for (i = 0; i < capacity; i++)
- list_add_tail(&p_tx->descq_array[i].list_entry,
- &p_tx->free_descq);
+ /* First element is part of the packet, rest are flexibly added */
+ desc_size = (sizeof(*p_pkt) +
+ (p_ll2_conn->input.tx_max_bds_per_packet - 1) *
+ sizeof(p_pkt->bds_set));
+
+ for (i = 0; i < capacity; i++) {
+ p_pkt = p_tx->descq_mem + desc_size * i;
+ list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
+ }
p_tx->cur_completing_bd_idx = 0;
p_tx->bds_idx = 0;
p_tx->b_completing_packet = false;
@@ -1578,11 +1633,28 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
roce_flavor = (pkt->qed_roce_flavor == QED_LL2_ROCE) ? CORE_ROCE
: CORE_RROCE;
- tx_dest = (pkt->tx_dest == QED_LL2_TX_DEST_NW) ? CORE_TX_DEST_NW
- : CORE_TX_DEST_LB;
+ switch (pkt->tx_dest) {
+ case QED_LL2_TX_DEST_NW:
+ tx_dest = CORE_TX_DEST_NW;
+ break;
+ case QED_LL2_TX_DEST_LB:
+ tx_dest = CORE_TX_DEST_LB;
+ break;
+ case QED_LL2_TX_DEST_DROP:
+ tx_dest = CORE_TX_DEST_DROP;
+ break;
+ default:
+ tx_dest = CORE_TX_DEST_LB;
+ break;
+ }
start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain);
- start_bd->nw_vlan_or_lb_echo = cpu_to_le16(pkt->vlan);
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn) &&
+ p_ll2->input.conn_type == QED_LL2_TYPE_OOO)
+ start_bd->nw_vlan_or_lb_echo =
+ cpu_to_le16(IWARP_LL2_IN_ORDER_TX_QUEUE);
+ else
+ start_bd->nw_vlan_or_lb_echo = cpu_to_le16(pkt->vlan);
SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W,
cpu_to_le16(pkt->l4_hdr_offset_w));
SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest);
@@ -1590,6 +1662,9 @@ qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
SET_FIELD(bd_data, CORE_TX_BD_DATA_START_BD, 0x1);
SET_FIELD(bd_data, CORE_TX_BD_DATA_NBDS, pkt->num_of_bds);
SET_FIELD(bd_data, CORE_TX_BD_DATA_ROCE_FLAV, roce_flavor);
+ SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_CSUM, !!(pkt->enable_ip_cksum));
+ SET_FIELD(bd_data, CORE_TX_BD_DATA_L4_CSUM, !!(pkt->enable_l4_cksum));
+ SET_FIELD(bd_data, CORE_TX_BD_DATA_IP_LEN, !!(pkt->calc_ip_len));
start_bd->bd_data.as_bitfield = cpu_to_le16(bd_data);
DMA_REGPAIR_LE(start_bd->addr, pkt->first_frag);
start_bd->nbytes = cpu_to_le16(pkt->first_frag_len);
@@ -1697,7 +1772,7 @@ int qed_ll2_prepare_tx_packet(void *cxt,
p_tx = &p_ll2_conn->tx_queue;
p_tx_chain = &p_tx->txq_chain;
- if (pkt->num_of_bds > CORE_LL2_TX_MAX_BDS_PER_PACKET)
+ if (pkt->num_of_bds > p_ll2_conn->input.tx_max_bds_per_packet)
return -EIO;
spin_lock_irqsave(&p_tx->lock, flags);
@@ -1857,7 +1932,7 @@ void qed_ll2_release_connection(void *cxt, u8 connection_handle)
qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index);
}
- kfree(p_ll2_conn->tx_queue.descq_array);
+ kfree(p_ll2_conn->tx_queue.descq_mem);
qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain);
kfree(p_ll2_conn->rx_queue.descq_array);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
index a822528e9c63..f65817012e97 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
@@ -63,17 +63,14 @@ struct qed_ll2_rx_packet {
struct qed_ll2_tx_packet {
struct list_head list_entry;
u16 bd_used;
- u16 vlan;
- u16 l4_hdr_offset_w;
- u8 bd_flags;
bool notify_fw;
void *cookie;
-
+ /* Flexible Array of bds_set determined by max_bds_per_packet */
struct {
struct core_tx_bd *txq_bd;
dma_addr_t tx_frag;
u16 frag_len;
- } bds_set[ETH_TX_MAX_BDS_PER_NON_LSO_PACKET];
+ } bds_set[1];
};
struct qed_ll2_rx_queue {
@@ -101,7 +98,7 @@ struct qed_ll2_tx_queue {
struct list_head active_descq;
struct list_head free_descq;
struct list_head sending_descq;
- struct qed_ll2_tx_packet *descq_array;
+ void *descq_mem; /* memory for variable sized qed_ll2_tx_packet*/
struct qed_ll2_tx_packet *cur_send_packet;
struct qed_ll2_tx_packet cur_completing_packet;
u16 cur_completing_bd_idx;
@@ -124,6 +121,7 @@ struct qed_ll2_info {
bool b_active;
enum core_tx_dest tx_dest;
u8 tx_stats_en;
+ bool main_func_queue;
struct qed_ll2_rx_queue rx_queue;
struct qed_ll2_tx_queue tx_queue;
struct qed_ll2_cbs cbs;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 376485d99357..8b99c7d26f34 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1691,12 +1691,12 @@ qed_mcp_get_shmem_proto_mfw(struct qed_hwfn *p_hwfn,
case FW_MB_PARAM_GET_PF_RDMA_ROCE:
*p_proto = QED_PCI_ETH_ROCE;
break;
+ case FW_MB_PARAM_GET_PF_RDMA_IWARP:
+ *p_proto = QED_PCI_ETH_IWARP;
+ break;
case FW_MB_PARAM_GET_PF_RDMA_BOTH:
- DP_NOTICE(p_hwfn,
- "Current day drivers don't support RoCE & iWARP. Default to RoCE-only\n");
- *p_proto = QED_PCI_ETH_ROCE;
+ *p_proto = QED_PCI_ETH_RDMA;
break;
- case FW_MB_PARAM_GET_PF_RDMA_IWARP:
default:
DP_NOTICE(p_hwfn,
"MFW answers GET_PF_RDMA_PROTOCOL but param is %08x\n",
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
index 000636530111..6172354b451c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
@@ -103,18 +103,28 @@ int qed_ooo_alloc(struct qed_hwfn *p_hwfn)
{
u16 max_num_archipelagos = 0, cid_base;
struct qed_ooo_info *p_ooo_info;
+ enum protocol_type proto;
u16 max_num_isles = 0;
u32 i;
- if (p_hwfn->hw_info.personality != QED_PCI_ISCSI) {
+ switch (p_hwfn->hw_info.personality) {
+ case QED_PCI_ISCSI:
+ proto = PROTOCOLID_ISCSI;
+ break;
+ case QED_PCI_ETH_RDMA:
+ case QED_PCI_ETH_IWARP:
+ proto = PROTOCOLID_IWARP;
+ break;
+ default:
DP_NOTICE(p_hwfn,
"Failed to allocate qed_ooo_info: unknown personality\n");
return -EINVAL;
}
- max_num_archipelagos = p_hwfn->pf_params.iscsi_pf_params.num_cons;
+ max_num_archipelagos = (u16)qed_cxt_get_proto_cid_count(p_hwfn, proto,
+ NULL);
max_num_isles = QED_MAX_NUM_ISLES + max_num_archipelagos;
- cid_base = (u16)qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ISCSI);
+ cid_base = (u16)qed_cxt_get_proto_cid_start(p_hwfn, proto);
if (!max_num_archipelagos) {
DP_NOTICE(p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.h b/drivers/net/ethernet/qlogic/qed/qed_ooo.h
index e8ed40b848f5..49c4e75b15b1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ooo.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.h
@@ -83,7 +83,7 @@ struct qed_ooo_info {
u16 cid_base;
};
-#if IS_ENABLED(CONFIG_QED_ISCSI)
+#if IS_ENABLED(CONFIG_QED_OOO)
void qed_ooo_save_history_entry(struct qed_hwfn *p_hwfn,
struct qed_ooo_info *p_ooo_info,
struct ooo_opaque *p_cqe);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 6fb99518a61f..c8c4b3940564 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -156,7 +156,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
return rc;
p_hwfn->p_rdma_info = p_rdma_info;
- p_rdma_info->proto = PROTOCOLID_ROCE;
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+ p_rdma_info->proto = PROTOCOLID_IWARP;
+ else
+ p_rdma_info->proto = PROTOCOLID_ROCE;
num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto,
NULL);
@@ -206,11 +209,11 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
goto free_pd_map;
}
- /* Allocate bitmap for cq's. The maximum number of CQs is bounded to
- * twice the number of QPs.
+ /* Allocate bitmap for cq's. The maximum number of CQs is bound to
+ * the number of connections we support. (num_qps in iWARP or
+ * num_qps/2 in RoCE).
*/
- rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map,
- p_rdma_info->num_qps * 2, "CQ");
+ rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, num_cons, "CQ");
if (rc) {
DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
"Failed to allocate cq bitmap, rc = %d\n", rc);
@@ -219,10 +222,10 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
/* Allocate bitmap for toggle bit for cq icids
* We toggle the bit every time we create or resize cq for a given icid.
- * The maximum number of CQs is bounded to twice the number of QPs.
+ * Size needs to equal the size of the cq bmap.
*/
rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->toggle_bits,
- p_rdma_info->num_qps * 2, "Toggle");
+ num_cons, "Toggle");
if (rc) {
DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
"Failed to allocate toogle bits, rc = %d\n", rc);
@@ -548,10 +551,13 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
if (rc)
return rc;
- if (QED_IS_IWARP_PERSONALITY(p_hwfn))
+ if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
+ qed_iwarp_init_fw_ramrod(p_hwfn,
+ &p_ent->ramrod.iwarp_init_func.iwarp);
p_ramrod = &p_ent->ramrod.iwarp_init_func.rdma;
- else
+ } else {
p_ramrod = &p_ent->ramrod.roce_init_func.rdma;
+ }
p_params_header = &p_ramrod->params_header;
p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 46d0c3cb83a5..a1d33f35aad3 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -377,6 +377,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
p_ramrod->personality = PERSONALITY_ISCSI;
break;
case QED_PCI_ETH_ROCE:
+ case QED_PCI_ETH_IWARP:
p_ramrod->personality = PERSONALITY_RDMA_AND_ETH;
break;
default:
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index 6fc854b120b0..48ec4c56cddf 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1004,6 +1004,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
xdp.data_hard_start = page_address(bd->data);
xdp.data = xdp.data_hard_start + *data_offset;
+ xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + *len;
/* Queues always have a full reset currently, so for the time
diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index 2991179c2fd0..05479d435469 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -3891,10 +3891,8 @@ static int ql3xxx_probe(struct pci_dev *pdev,
INIT_DELAYED_WORK(&qdev->tx_timeout_work, ql_tx_timeout_work);
INIT_DELAYED_WORK(&qdev->link_state_work, ql_link_state_machine_work);
- init_timer(&qdev->adapter_timer);
- qdev->adapter_timer.function = ql3xxx_timer;
+ setup_timer(&qdev->adapter_timer, ql3xxx_timer, (unsigned long)qdev);
qdev->adapter_timer.expires = jiffies + HZ * 2; /* two second delay */
- qdev->adapter_timer.data = (unsigned long)qdev;
if (!cards_found) {
pr_alert("%s\n", DRV_STRING);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index 3ed9033e56db..9cbb27263742 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -309,22 +309,12 @@ void emac_mac_mode_config(struct emac_adapter *adpt)
/* Config descriptor rings */
static void emac_mac_dma_rings_config(struct emac_adapter *adpt)
{
- static const unsigned short tpd_q_offset[] = {
- EMAC_DESC_CTRL_8, EMAC_H1TPD_BASE_ADDR_LO,
- EMAC_H2TPD_BASE_ADDR_LO, EMAC_H3TPD_BASE_ADDR_LO};
- static const unsigned short rfd_q_offset[] = {
- EMAC_DESC_CTRL_2, EMAC_DESC_CTRL_10,
- EMAC_DESC_CTRL_12, EMAC_DESC_CTRL_13};
- static const unsigned short rrd_q_offset[] = {
- EMAC_DESC_CTRL_5, EMAC_DESC_CTRL_14,
- EMAC_DESC_CTRL_15, EMAC_DESC_CTRL_16};
-
/* TPD (Transmit Packet Descriptor) */
writel(upper_32_bits(adpt->tx_q.tpd.dma_addr),
adpt->base + EMAC_DESC_CTRL_1);
writel(lower_32_bits(adpt->tx_q.tpd.dma_addr),
- adpt->base + tpd_q_offset[0]);
+ adpt->base + EMAC_DESC_CTRL_8);
writel(adpt->tx_q.tpd.count & TPD_RING_SIZE_BMSK,
adpt->base + EMAC_DESC_CTRL_9);
@@ -334,9 +324,9 @@ static void emac_mac_dma_rings_config(struct emac_adapter *adpt)
adpt->base + EMAC_DESC_CTRL_0);
writel(lower_32_bits(adpt->rx_q.rfd.dma_addr),
- adpt->base + rfd_q_offset[0]);
+ adpt->base + EMAC_DESC_CTRL_2);
writel(lower_32_bits(adpt->rx_q.rrd.dma_addr),
- adpt->base + rrd_q_offset[0]);
+ adpt->base + EMAC_DESC_CTRL_5);
writel(adpt->rx_q.rfd.count & RFD_RING_SIZE_BMSK,
adpt->base + EMAC_DESC_CTRL_3);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
index 29ba37a08372..e8ab512ee7e3 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
@@ -68,10 +68,10 @@ static void emac_sgmii_link_init(struct emac_adapter *adpt)
writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2);
}
-static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits)
+static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u8 irq_bits)
{
struct emac_sgmii *phy = &adpt->phy;
- u32 status;
+ u8 status;
writel_relaxed(irq_bits, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR);
writel_relaxed(IRQ_GLOBAL_CLEAR, phy->base + EMAC_SGMII_PHY_IRQ_CMD);
@@ -86,9 +86,8 @@ static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits)
EMAC_SGMII_PHY_INTERRUPT_STATUS,
status, !(status & irq_bits), 1,
SGMII_PHY_IRQ_CLR_WAIT_TIME)) {
- netdev_err(adpt->netdev,
- "error: failed clear SGMII irq: status:0x%x bits:0x%x\n",
- status, irq_bits);
+ net_err_ratelimited("%s: failed to clear SGMII irq: status:0x%x bits:0x%x\n",
+ adpt->netdev->name, status, irq_bits);
return -EIO;
}
@@ -109,7 +108,7 @@ static irqreturn_t emac_sgmii_interrupt(int irq, void *data)
{
struct emac_adapter *adpt = data;
struct emac_sgmii *phy = &adpt->phy;
- u32 status;
+ u8 status;
status = readl(phy->base + EMAC_SGMII_PHY_INTERRUPT_STATUS);
status &= SGMII_ISR_MASK;
@@ -139,10 +138,8 @@ static irqreturn_t emac_sgmii_interrupt(int irq, void *data)
atomic_set(&phy->decode_error_count, 0);
}
- if (emac_sgmii_irq_clear(adpt, status)) {
- netdev_warn(adpt->netdev, "failed to clear SGMII interrupt\n");
+ if (emac_sgmii_irq_clear(adpt, status))
schedule_work(&adpt->work_thread);
- }
return IRQ_HANDLED;
}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 759543512117..70c92b649b29 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -130,7 +130,7 @@ static int emac_start_xmit(struct sk_buff *skb, struct net_device *netdev)
return emac_mac_tx_buf_send(adpt, &adpt->tx_q, skb);
}
-irqreturn_t emac_isr(int _irq, void *data)
+static irqreturn_t emac_isr(int _irq, void *data)
{
struct emac_irq *irq = data;
struct emac_adapter *adpt =
@@ -148,9 +148,8 @@ irqreturn_t emac_isr(int _irq, void *data)
goto exit;
if (status & ISR_ERROR) {
- netif_warn(adpt, intr, adpt->netdev,
- "warning: error irq status 0x%lx\n",
- status & ISR_ERROR);
+ net_err_ratelimited("%s: error interrupt 0x%lx\n",
+ adpt->netdev->name, status & ISR_ERROR);
/* reset MAC */
schedule_work(&adpt->work_thread);
}
@@ -169,7 +168,8 @@ irqreturn_t emac_isr(int _irq, void *data)
emac_mac_tx_process(adpt, &adpt->tx_q);
if (status & ISR_OVER)
- net_warn_ratelimited("warning: TX/RX overflow\n");
+ net_warn_ratelimited("%s: TX/RX overflow interrupt\n",
+ adpt->netdev->name);
exit:
/* enable the interrupt */
@@ -615,20 +615,11 @@ static int emac_probe(struct platform_device *pdev)
u32 reg;
int ret;
- /* The EMAC itself is capable of 64-bit DMA, so try that first. */
- ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ /* The TPD buffer address is limited to 45 bits. */
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(45));
if (ret) {
- /* Some platforms may restrict the EMAC's address bus to less
- * then the size of DDR. In this case, we need to try a
- * smaller mask. We could try every possible smaller mask,
- * but that's overkill. Instead, just fall to 32-bit, which
- * should always work.
- */
- ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
- if (ret) {
- dev_err(&pdev->dev, "could not set DMA mask\n");
- return ret;
- }
+ dev_err(&pdev->dev, "could not set DMA mask\n");
+ return ret;
}
netdev = alloc_etherdev(sizeof(struct emac_adapter));
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index 1e33aea59f50..71bee1af71ef 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -61,23 +61,6 @@ rmnet_get_port_rtnl(const struct net_device *real_dev)
return rtnl_dereference(real_dev->rx_handler_data);
}
-static struct rmnet_endpoint*
-rmnet_get_endpoint(struct net_device *dev, int config_id)
-{
- struct rmnet_endpoint *ep;
- struct rmnet_port *port;
-
- if (!rmnet_is_real_dev_registered(dev)) {
- ep = rmnet_vnd_get_endpoint(dev);
- } else {
- port = rmnet_get_port_rtnl(dev);
-
- ep = &port->muxed_ep[config_id];
- }
-
- return ep;
-}
-
static int rmnet_unregister_real_device(struct net_device *real_dev,
struct rmnet_port *port)
{
@@ -98,7 +81,7 @@ static int rmnet_unregister_real_device(struct net_device *real_dev,
static int rmnet_register_real_device(struct net_device *real_dev)
{
struct rmnet_port *port;
- int rc;
+ int rc, entry;
ASSERT_RTNL();
@@ -119,27 +102,41 @@ static int rmnet_register_real_device(struct net_device *real_dev)
/* hold on to real dev for MAP data */
dev_hold(real_dev);
+ for (entry = 0; entry < RMNET_MAX_LOGICAL_EP; entry++)
+ INIT_HLIST_HEAD(&port->muxed_ep[entry]);
+
netdev_dbg(real_dev, "registered with rmnet\n");
return 0;
}
-static void rmnet_set_endpoint_config(struct net_device *dev,
- u8 mux_id, u8 rmnet_mode,
- struct net_device *egress_dev)
+static void rmnet_unregister_bridge(struct net_device *dev,
+ struct rmnet_port *port)
{
- struct rmnet_endpoint *ep;
+ struct net_device *rmnet_dev, *bridge_dev;
+ struct rmnet_port *bridge_port;
+
+ if (port->rmnet_mode != RMNET_EPMODE_BRIDGE)
+ return;
- netdev_dbg(dev, "id %d mode %d dev %s\n",
- mux_id, rmnet_mode, egress_dev->name);
+ /* bridge slave handling */
+ if (!port->nr_rmnet_devs) {
+ rmnet_dev = netdev_master_upper_dev_get_rcu(dev);
+ netdev_upper_dev_unlink(dev, rmnet_dev);
- ep = rmnet_get_endpoint(dev, mux_id);
- /* This config is cleared on every set, so its ok to not
- * clear it on a device delete.
- */
- memset(ep, 0, sizeof(struct rmnet_endpoint));
- ep->rmnet_mode = rmnet_mode;
- ep->egress_dev = egress_dev;
- ep->mux_id = mux_id;
+ bridge_dev = port->bridge_ep;
+
+ bridge_port = rmnet_get_port_rtnl(bridge_dev);
+ bridge_port->bridge_ep = NULL;
+ bridge_port->rmnet_mode = RMNET_EPMODE_VND;
+ } else {
+ bridge_dev = port->bridge_ep;
+
+ bridge_port = rmnet_get_port_rtnl(bridge_dev);
+ rmnet_dev = netdev_master_upper_dev_get_rcu(bridge_dev);
+ netdev_upper_dev_unlink(bridge_dev, rmnet_dev);
+
+ rmnet_unregister_real_device(bridge_dev, bridge_port);
+ }
}
static int rmnet_newlink(struct net *src_net, struct net_device *dev,
@@ -153,6 +150,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
RMNET_EGRESS_FORMAT_MAP;
struct net_device *real_dev;
int mode = RMNET_EPMODE_VND;
+ struct rmnet_endpoint *ep;
struct rmnet_port *port;
int err = 0;
u16 mux_id;
@@ -164,6 +162,10 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
if (!data[IFLA_VLAN_ID])
return -EINVAL;
+ ep = kzalloc(sizeof(*ep), GFP_ATOMIC);
+ if (!ep)
+ return -ENOMEM;
+
mux_id = nla_get_u16(data[IFLA_VLAN_ID]);
err = rmnet_register_real_device(real_dev);
@@ -171,11 +173,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
goto err0;
port = rmnet_get_port_rtnl(real_dev);
- err = rmnet_vnd_newlink(mux_id, dev, port, real_dev);
+ err = rmnet_vnd_newlink(mux_id, dev, port, real_dev, ep);
if (err)
goto err1;
- err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL);
+ err = netdev_master_upper_dev_link(dev, real_dev, NULL, NULL, extack);
if (err)
goto err2;
@@ -183,13 +185,13 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
ingress_format, egress_format);
port->egress_data_format = egress_format;
port->ingress_data_format = ingress_format;
+ port->rmnet_mode = mode;
- rmnet_set_endpoint_config(real_dev, mux_id, mode, dev);
- rmnet_set_endpoint_config(dev, mux_id, mode, real_dev);
+ hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
return 0;
err2:
- rmnet_vnd_dellink(mux_id, port);
+ rmnet_vnd_dellink(mux_id, port, ep);
err1:
rmnet_unregister_real_device(real_dev, port);
err0:
@@ -199,6 +201,7 @@ err0:
static void rmnet_dellink(struct net_device *dev, struct list_head *head)
{
struct net_device *real_dev;
+ struct rmnet_endpoint *ep;
struct rmnet_port *port;
u8 mux_id;
@@ -212,8 +215,15 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head)
port = rmnet_get_port_rtnl(real_dev);
mux_id = rmnet_vnd_get_mux(dev);
- rmnet_vnd_dellink(mux_id, port);
netdev_upper_dev_unlink(dev, real_dev);
+
+ ep = rmnet_get_endpoint(port, mux_id);
+ if (ep) {
+ hlist_del_init_rcu(&ep->hlnode);
+ rmnet_unregister_bridge(dev, port);
+ rmnet_vnd_dellink(mux_id, port, ep);
+ kfree(ep);
+ }
rmnet_unregister_real_device(real_dev, port);
unregister_netdevice_queue(dev, head);
@@ -222,11 +232,16 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head)
static int rmnet_dev_walk_unreg(struct net_device *rmnet_dev, void *data)
{
struct rmnet_walk_data *d = data;
+ struct rmnet_endpoint *ep;
u8 mux_id;
mux_id = rmnet_vnd_get_mux(rmnet_dev);
-
- rmnet_vnd_dellink(mux_id, d->port);
+ ep = rmnet_get_endpoint(d->port, mux_id);
+ if (ep) {
+ hlist_del_init_rcu(&ep->hlnode);
+ rmnet_vnd_dellink(mux_id, d->port, ep);
+ kfree(ep);
+ }
netdev_upper_dev_unlink(rmnet_dev, d->real_dev);
unregister_netdevice_queue(rmnet_dev, d->head);
@@ -252,6 +267,8 @@ static void rmnet_force_unassociate_device(struct net_device *dev)
d.port = port;
rcu_read_lock();
+ rmnet_unregister_bridge(dev, port);
+
netdev_walk_all_lower_dev_rcu(real_dev, rmnet_dev_walk_unreg, &d);
rcu_read_unlock();
unregister_netdevice_many(&list);
@@ -324,6 +341,77 @@ struct rmnet_port *rmnet_get_port(struct net_device *real_dev)
return NULL;
}
+struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id)
+{
+ struct rmnet_endpoint *ep;
+
+ hlist_for_each_entry_rcu(ep, &port->muxed_ep[mux_id], hlnode) {
+ if (ep->mux_id == mux_id)
+ return ep;
+ }
+
+ return NULL;
+}
+
+int rmnet_add_bridge(struct net_device *rmnet_dev,
+ struct net_device *slave_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct rmnet_priv *priv = netdev_priv(rmnet_dev);
+ struct net_device *real_dev = priv->real_dev;
+ struct rmnet_port *port, *slave_port;
+ int err;
+
+ port = rmnet_get_port(real_dev);
+
+ /* If there is more than one rmnet dev attached, its probably being
+ * used for muxing. Skip the briding in that case
+ */
+ if (port->nr_rmnet_devs > 1)
+ return -EINVAL;
+
+ if (rmnet_is_real_dev_registered(slave_dev))
+ return -EBUSY;
+
+ err = rmnet_register_real_device(slave_dev);
+ if (err)
+ return -EBUSY;
+
+ err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL,
+ extack);
+ if (err)
+ return -EINVAL;
+
+ slave_port = rmnet_get_port(slave_dev);
+ slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE;
+ slave_port->bridge_ep = real_dev;
+
+ port->rmnet_mode = RMNET_EPMODE_BRIDGE;
+ port->bridge_ep = slave_dev;
+
+ netdev_dbg(slave_dev, "registered with rmnet as slave\n");
+ return 0;
+}
+
+int rmnet_del_bridge(struct net_device *rmnet_dev,
+ struct net_device *slave_dev)
+{
+ struct rmnet_priv *priv = netdev_priv(rmnet_dev);
+ struct net_device *real_dev = priv->real_dev;
+ struct rmnet_port *port, *slave_port;
+
+ port = rmnet_get_port(real_dev);
+ port->rmnet_mode = RMNET_EPMODE_VND;
+ port->bridge_ep = NULL;
+
+ netdev_upper_dev_unlink(slave_dev, rmnet_dev);
+ slave_port = rmnet_get_port(slave_dev);
+ rmnet_unregister_real_device(slave_dev, slave_port);
+
+ netdev_dbg(slave_dev, "removed from rmnet as slave\n");
+ return 0;
+}
+
/* Startup/Shutdown */
static int __init rmnet_init(void)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index dde4e9f14f4a..60115e69e415 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -20,13 +20,10 @@
#define RMNET_MAX_LOGICAL_EP 255
-/* Information about the next device to deliver the packet to.
- * Exact usage of this parameter depends on the rmnet_mode.
- */
struct rmnet_endpoint {
- u8 rmnet_mode;
u8 mux_id;
struct net_device *egress_dev;
+ struct hlist_node hlnode;
};
/* One instance of this structure is instantiated for each real_dev associated
@@ -34,22 +31,26 @@ struct rmnet_endpoint {
*/
struct rmnet_port {
struct net_device *dev;
- struct rmnet_endpoint local_ep;
- struct rmnet_endpoint muxed_ep[RMNET_MAX_LOGICAL_EP];
u32 ingress_data_format;
u32 egress_data_format;
- struct net_device *rmnet_devices[RMNET_MAX_LOGICAL_EP];
u8 nr_rmnet_devs;
+ u8 rmnet_mode;
+ struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP];
+ struct net_device *bridge_ep;
};
extern struct rtnl_link_ops rmnet_link_ops;
struct rmnet_priv {
- struct rmnet_endpoint local_ep;
u8 mux_id;
struct net_device *real_dev;
};
struct rmnet_port *rmnet_get_port(struct net_device *real_dev);
-
+struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id);
+int rmnet_add_bridge(struct net_device *rmnet_dev,
+ struct net_device *slave_dev,
+ struct netlink_ext_ack *extack);
+int rmnet_del_bridge(struct net_device *rmnet_dev,
+ struct net_device *slave_dev);
#endif /* _RMNET_CONFIG_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 540c7622dcb1..df3d2d16ce55 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -44,56 +44,18 @@ static void rmnet_set_skb_proto(struct sk_buff *skb)
/* Generic handler */
static rx_handler_result_t
-rmnet_bridge_handler(struct sk_buff *skb, struct rmnet_endpoint *ep)
+rmnet_deliver_skb(struct sk_buff *skb)
{
- if (!ep->egress_dev)
- kfree_skb(skb);
- else
- rmnet_egress_handler(skb, ep);
+ skb_reset_transport_header(skb);
+ skb_reset_network_header(skb);
+ rmnet_vnd_rx_fixup(skb, skb->dev);
+ skb->pkt_type = PACKET_HOST;
+ skb_set_mac_header(skb, 0);
+ netif_receive_skb(skb);
return RX_HANDLER_CONSUMED;
}
-static rx_handler_result_t
-rmnet_deliver_skb(struct sk_buff *skb, struct rmnet_endpoint *ep)
-{
- switch (ep->rmnet_mode) {
- case RMNET_EPMODE_NONE:
- return RX_HANDLER_PASS;
-
- case RMNET_EPMODE_BRIDGE:
- return rmnet_bridge_handler(skb, ep);
-
- case RMNET_EPMODE_VND:
- skb_reset_transport_header(skb);
- skb_reset_network_header(skb);
- rmnet_vnd_rx_fixup(skb, skb->dev);
-
- skb->pkt_type = PACKET_HOST;
- skb_set_mac_header(skb, 0);
- netif_receive_skb(skb);
- return RX_HANDLER_CONSUMED;
-
- default:
- kfree_skb(skb);
- return RX_HANDLER_CONSUMED;
- }
-}
-
-static rx_handler_result_t
-rmnet_ingress_deliver_packet(struct sk_buff *skb,
- struct rmnet_port *port)
-{
- if (!port) {
- kfree_skb(skb);
- return RX_HANDLER_CONSUMED;
- }
-
- skb->dev = port->local_ep.egress_dev;
-
- return rmnet_deliver_skb(skb, &port->local_ep);
-}
-
/* MAP handler */
static rx_handler_result_t
@@ -109,19 +71,18 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
& RMNET_INGRESS_FORMAT_MAP_COMMANDS)
return rmnet_map_command(skb, port);
- kfree_skb(skb);
- return RX_HANDLER_CONSUMED;
+ goto free_skb;
}
mux_id = RMNET_MAP_GET_MUX_ID(skb);
len = RMNET_MAP_GET_LENGTH(skb) - RMNET_MAP_GET_PAD(skb);
- if (mux_id >= RMNET_MAX_LOGICAL_EP) {
- kfree_skb(skb);
- return RX_HANDLER_CONSUMED;
- }
+ if (mux_id >= RMNET_MAX_LOGICAL_EP)
+ goto free_skb;
- ep = &port->muxed_ep[mux_id];
+ ep = rmnet_get_endpoint(port, mux_id);
+ if (!ep)
+ goto free_skb;
if (port->ingress_data_format & RMNET_INGRESS_FORMAT_DEMUXING)
skb->dev = ep->egress_dev;
@@ -130,7 +91,11 @@ __rmnet_map_ingress_handler(struct sk_buff *skb,
skb_pull(skb, sizeof(struct rmnet_map_header));
skb_trim(skb, len);
rmnet_set_skb_proto(skb);
- return rmnet_deliver_skb(skb, ep);
+ return rmnet_deliver_skb(skb);
+
+free_skb:
+ kfree_skb(skb);
+ return RX_HANDLER_CONSUMED;
}
static rx_handler_result_t
@@ -154,8 +119,7 @@ rmnet_map_ingress_handler(struct sk_buff *skb,
}
static int rmnet_map_egress_handler(struct sk_buff *skb,
- struct rmnet_port *port,
- struct rmnet_endpoint *ep,
+ struct rmnet_port *port, u8 mux_id,
struct net_device *orig_dev)
{
int required_headroom, additional_header_len;
@@ -174,10 +138,10 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
return RMNET_MAP_CONSUMED;
if (port->egress_data_format & RMNET_EGRESS_FORMAT_MUXING) {
- if (ep->mux_id == 0xff)
+ if (mux_id == 0xff)
map_header->mux_id = 0;
else
- map_header->mux_id = ep->mux_id;
+ map_header->mux_id = mux_id;
}
skb->protocol = htons(ETH_P_MAP);
@@ -185,6 +149,17 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
return RMNET_MAP_SUCCESS;
}
+static rx_handler_result_t
+rmnet_bridge_handler(struct sk_buff *skb, struct net_device *bridge_dev)
+{
+ if (bridge_dev) {
+ skb->dev = bridge_dev;
+ dev_queue_xmit(skb);
+ }
+
+ return RX_HANDLER_CONSUMED;
+}
+
/* Ingress / Egress Entry Points */
/* Processes packet as per ingress data format for receiving device. Logical
@@ -193,10 +168,10 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
*/
rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb)
{
- struct rmnet_port *port;
+ int rc = RX_HANDLER_CONSUMED;
struct sk_buff *skb = *pskb;
+ struct rmnet_port *port;
struct net_device *dev;
- int rc;
if (!skb)
return RX_HANDLER_CONSUMED;
@@ -204,28 +179,14 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb)
dev = skb->dev;
port = rmnet_get_port(dev);
- if (port->ingress_data_format & RMNET_INGRESS_FORMAT_MAP) {
- rc = rmnet_map_ingress_handler(skb, port);
- } else {
- switch (ntohs(skb->protocol)) {
- case ETH_P_MAP:
- if (port->local_ep.rmnet_mode ==
- RMNET_EPMODE_BRIDGE) {
- rc = rmnet_ingress_deliver_packet(skb, port);
- } else {
- kfree_skb(skb);
- rc = RX_HANDLER_CONSUMED;
- }
- break;
-
- case ETH_P_IP:
- case ETH_P_IPV6:
- rc = rmnet_ingress_deliver_packet(skb, port);
- break;
-
- default:
- rc = RX_HANDLER_PASS;
- }
+ switch (port->rmnet_mode) {
+ case RMNET_EPMODE_VND:
+ if (port->ingress_data_format & RMNET_INGRESS_FORMAT_MAP)
+ rc = rmnet_map_ingress_handler(skb, port);
+ break;
+ case RMNET_EPMODE_BRIDGE:
+ rc = rmnet_bridge_handler(skb, port->bridge_ep);
+ break;
}
return rc;
@@ -235,14 +196,17 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb)
* for egress device configured in logical endpoint. Packet is then transmitted
* on the egress device.
*/
-void rmnet_egress_handler(struct sk_buff *skb,
- struct rmnet_endpoint *ep)
+void rmnet_egress_handler(struct sk_buff *skb)
{
struct net_device *orig_dev;
struct rmnet_port *port;
+ struct rmnet_priv *priv;
+ u8 mux_id;
orig_dev = skb->dev;
- skb->dev = ep->egress_dev;
+ priv = netdev_priv(orig_dev);
+ skb->dev = priv->real_dev;
+ mux_id = priv->mux_id;
port = rmnet_get_port(skb->dev);
if (!port) {
@@ -251,7 +215,7 @@ void rmnet_egress_handler(struct sk_buff *skb,
}
if (port->egress_data_format & RMNET_EGRESS_FORMAT_MAP) {
- switch (rmnet_map_egress_handler(skb, port, ep, orig_dev)) {
+ switch (rmnet_map_egress_handler(skb, port, mux_id, orig_dev)) {
case RMNET_MAP_CONSUMED:
return;
@@ -264,8 +228,7 @@ void rmnet_egress_handler(struct sk_buff *skb,
}
}
- if (ep->rmnet_mode == RMNET_EPMODE_VND)
- rmnet_vnd_tx_fixup(skb, orig_dev);
+ rmnet_vnd_tx_fixup(skb, orig_dev);
dev_queue_xmit(skb);
}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
index f2638cf5693c..3537e4ceedb3 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.h
@@ -18,8 +18,7 @@
#include "rmnet_config.h"
-void rmnet_egress_handler(struct sk_buff *skb,
- struct rmnet_endpoint *ep);
+void rmnet_egress_handler(struct sk_buff *skb);
rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
index d1ea5e21b982..74d362f71cce 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
@@ -17,7 +17,7 @@
#include "rmnet_vnd.h"
static u8 rmnet_map_do_flow_control(struct sk_buff *skb,
- struct rmnet_port *rdinfo,
+ struct rmnet_port *port,
int enable)
{
struct rmnet_map_control_command *cmd;
@@ -37,7 +37,7 @@ static u8 rmnet_map_do_flow_control(struct sk_buff *skb,
return RX_HANDLER_CONSUMED;
}
- ep = &rdinfo->muxed_ep[mux_id];
+ ep = rmnet_get_endpoint(port, mux_id);
vnd = ep->egress_dev;
ip_family = cmd->flow_control.ip_family;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
index 7967198fdd90..49102f922b31 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
@@ -19,23 +19,15 @@
#define RMNET_TX_QUEUE_LEN 1000
/* Constants */
-#define RMNET_EGRESS_FORMAT__RESERVED__ BIT(0)
#define RMNET_EGRESS_FORMAT_MAP BIT(1)
#define RMNET_EGRESS_FORMAT_AGGREGATION BIT(2)
#define RMNET_EGRESS_FORMAT_MUXING BIT(3)
-#define RMNET_EGRESS_FORMAT_MAP_CKSUMV3 BIT(4)
-#define RMNET_EGRESS_FORMAT_MAP_CKSUMV4 BIT(5)
-#define RMNET_INGRESS_FIX_ETHERNET BIT(0)
#define RMNET_INGRESS_FORMAT_MAP BIT(1)
#define RMNET_INGRESS_FORMAT_DEAGGREGATION BIT(2)
#define RMNET_INGRESS_FORMAT_DEMUXING BIT(3)
#define RMNET_INGRESS_FORMAT_MAP_COMMANDS BIT(4)
-#define RMNET_INGRESS_FORMAT_MAP_CKSUMV3 BIT(5)
-#define RMNET_INGRESS_FORMAT_MAP_CKSUMV4 BIT(6)
-/* Pass the frame up the stack with no modifications to skb->dev */
-#define RMNET_EPMODE_NONE (0)
/* Replace skb->dev to a virtual rmnet device and pass up the stack */
#define RMNET_EPMODE_VND (1)
/* Pass the frame directly to another device with dev_queue_xmit() */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 7f90d5587653..12bd0bbd5235 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -45,8 +45,8 @@ static netdev_tx_t rmnet_vnd_start_xmit(struct sk_buff *skb,
struct rmnet_priv *priv;
priv = netdev_priv(dev);
- if (priv->local_ep.egress_dev) {
- rmnet_egress_handler(skb, &priv->local_ep);
+ if (priv->real_dev) {
+ rmnet_egress_handler(skb);
} else {
dev->stats.tx_dropped++;
kfree_skb(skb);
@@ -74,6 +74,8 @@ static const struct net_device_ops rmnet_vnd_ops = {
.ndo_start_xmit = rmnet_vnd_start_xmit,
.ndo_change_mtu = rmnet_vnd_change_mtu,
.ndo_get_iflink = rmnet_vnd_get_iflink,
+ .ndo_add_slave = rmnet_add_bridge,
+ .ndo_del_slave = rmnet_del_bridge,
};
/* Called by kernel whenever a new rmnet<n> device is created. Sets MTU,
@@ -100,17 +102,19 @@ void rmnet_vnd_setup(struct net_device *rmnet_dev)
int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
struct rmnet_port *port,
- struct net_device *real_dev)
+ struct net_device *real_dev,
+ struct rmnet_endpoint *ep)
{
struct rmnet_priv *priv;
int rc;
- if (port->rmnet_devices[id])
+ if (ep->egress_dev)
return -EINVAL;
rc = register_netdevice(rmnet_dev);
if (!rc) {
- port->rmnet_devices[id] = rmnet_dev;
+ ep->egress_dev = rmnet_dev;
+ ep->mux_id = id;
port->nr_rmnet_devs++;
rmnet_dev->rtnl_link_ops = &rmnet_link_ops;
@@ -125,12 +129,13 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
return rc;
}
-int rmnet_vnd_dellink(u8 id, struct rmnet_port *port)
+int rmnet_vnd_dellink(u8 id, struct rmnet_port *port,
+ struct rmnet_endpoint *ep)
{
- if (id >= RMNET_MAX_LOGICAL_EP || !port->rmnet_devices[id])
+ if (id >= RMNET_MAX_LOGICAL_EP || !ep->egress_dev)
return -EINVAL;
- port->rmnet_devices[id] = NULL;
+ ep->egress_dev = NULL;
port->nr_rmnet_devs--;
return 0;
}
@@ -143,21 +148,6 @@ u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev)
return priv->mux_id;
}
-/* Gets the logical endpoint configuration for a RmNet virtual network device
- * node. Caller should confirm that devices is a RmNet VND before calling.
- */
-struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *rmnet_dev)
-{
- struct rmnet_priv *priv;
-
- if (!rmnet_dev)
- return NULL;
-
- priv = netdev_priv(rmnet_dev);
-
- return &priv->local_ep;
-}
-
int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int enable)
{
netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n", enable);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
index 8a4042f0f6bf..71e4c3286951 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h
@@ -17,11 +17,12 @@
#define _RMNET_VND_H_
int rmnet_vnd_do_flow_control(struct net_device *dev, int enable);
-struct rmnet_endpoint *rmnet_vnd_get_endpoint(struct net_device *dev);
int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
struct rmnet_port *port,
- struct net_device *real_dev);
-int rmnet_vnd_dellink(u8 id, struct rmnet_port *port);
+ struct net_device *real_dev,
+ struct rmnet_endpoint *ep);
+int rmnet_vnd_dellink(u8 id, struct rmnet_port *port,
+ struct rmnet_endpoint *ep);
void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev);
void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev);
u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev);
diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c
index bed34684994f..bdc3833fab7e 100644
--- a/drivers/net/ethernet/realtek/atp.c
+++ b/drivers/net/ethernet/realtek/atp.c
@@ -438,10 +438,8 @@ static int net_open(struct net_device *dev)
hardware_init(dev);
- init_timer(&lp->timer);
+ setup_timer(&lp->timer, atp_timed_checker, (unsigned long)dev);
lp->timer.expires = jiffies + TIMED_CHECKER;
- lp->timer.data = (unsigned long)dev;
- lp->timer.function = atp_timed_checker; /* timer handler */
add_timer(&lp->timer);
netif_start_queue(dev);
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index fdf30bfa403b..2b962d349f5f 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -403,8 +403,9 @@ static void ravb_emac_init(struct net_device *ndev)
/* Receive frame limit set register */
ravb_write(ndev, ndev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, RFLR);
- /* PAUSE prohibition */
+ /* EMAC Mode: PAUSE prohibition; Duplex; RX Checksum; TX; RX */
ravb_write(ndev, ECMR_ZPF | (priv->duplex ? ECMR_DM : 0) |
+ (ndev->features & NETIF_F_RXCSUM ? ECMR_RCSC : 0) |
ECMR_TE | ECMR_RE, ECMR);
ravb_set_rate(ndev);
@@ -520,6 +521,19 @@ static void ravb_get_tx_tstamp(struct net_device *ndev)
}
}
+static void ravb_rx_csum(struct sk_buff *skb)
+{
+ u8 *hw_csum;
+
+ /* The hardware checksum is 2 bytes appended to packet data */
+ if (unlikely(skb->len < 2))
+ return;
+ hw_csum = skb_tail_pointer(skb) - 2;
+ skb->csum = csum_unfold((__force __sum16)get_unaligned_le16(hw_csum));
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb_trim(skb, skb->len - 2);
+}
+
/* Packet receive function for Ethernet AVB */
static bool ravb_rx(struct net_device *ndev, int *quota, int q)
{
@@ -587,8 +601,11 @@ static bool ravb_rx(struct net_device *ndev, int *quota, int q)
ts.tv_nsec = le32_to_cpu(desc->ts_n);
shhwtstamps->hwtstamp = timespec64_to_ktime(ts);
}
+
skb_put(skb, pkt_len);
skb->protocol = eth_type_trans(skb, ndev);
+ if (ndev->features & NETIF_F_RXCSUM)
+ ravb_rx_csum(skb);
napi_gro_receive(&priv->napi[q], skb);
stats->rx_packets++;
stats->rx_bytes += pkt_len;
@@ -1337,20 +1354,15 @@ static void ravb_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
{
struct ravb_private *priv = netdev_priv(ndev);
- wol->supported = 0;
- wol->wolopts = 0;
-
- if (priv->clk) {
- wol->supported = WAKE_MAGIC;
- wol->wolopts = priv->wol_enabled ? WAKE_MAGIC : 0;
- }
+ wol->supported = WAKE_MAGIC;
+ wol->wolopts = priv->wol_enabled ? WAKE_MAGIC : 0;
}
static int ravb_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
{
struct ravb_private *priv = netdev_priv(ndev);
- if (!priv->clk || wol->wolopts & ~WAKE_MAGIC)
+ if (wol->wolopts & ~WAKE_MAGIC)
return -EOPNOTSUPP;
priv->wol_enabled = !!(wol->wolopts & WAKE_MAGIC);
@@ -1842,6 +1854,38 @@ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd)
return phy_mii_ioctl(phydev, req, cmd);
}
+static void ravb_set_rx_csum(struct net_device *ndev, bool enable)
+{
+ struct ravb_private *priv = netdev_priv(ndev);
+ unsigned long flags;
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ /* Disable TX and RX */
+ ravb_rcv_snd_disable(ndev);
+
+ /* Modify RX Checksum setting */
+ ravb_modify(ndev, ECMR, ECMR_RCSC, enable ? ECMR_RCSC : 0);
+
+ /* Enable TX and RX */
+ ravb_rcv_snd_enable(ndev);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+}
+
+static int ravb_set_features(struct net_device *ndev,
+ netdev_features_t features)
+{
+ netdev_features_t changed = ndev->features ^ features;
+
+ if (changed & NETIF_F_RXCSUM)
+ ravb_set_rx_csum(ndev, features & NETIF_F_RXCSUM);
+
+ ndev->features = features;
+
+ return 0;
+}
+
static const struct net_device_ops ravb_netdev_ops = {
.ndo_open = ravb_open,
.ndo_stop = ravb_close,
@@ -1853,6 +1897,7 @@ static const struct net_device_ops ravb_netdev_ops = {
.ndo_do_ioctl = ravb_do_ioctl,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
+ .ndo_set_features = ravb_set_features,
};
/* MDIO bus init function */
@@ -1912,22 +1957,12 @@ MODULE_DEVICE_TABLE(of, ravb_match_table);
static int ravb_set_gti(struct net_device *ndev)
{
-
+ struct ravb_private *priv = netdev_priv(ndev);
struct device *dev = ndev->dev.parent;
- struct device_node *np = dev->of_node;
unsigned long rate;
- struct clk *clk;
uint64_t inc;
- clk = of_clk_get(np, 0);
- if (IS_ERR(clk)) {
- dev_err(dev, "could not get clock\n");
- return PTR_ERR(clk);
- }
-
- rate = clk_get_rate(clk);
- clk_put(clk);
-
+ rate = clk_get_rate(priv->clk);
if (!rate)
return -EINVAL;
@@ -2004,6 +2039,9 @@ static int ravb_probe(struct platform_device *pdev)
if (!ndev)
return -ENOMEM;
+ ndev->features = NETIF_F_RXCSUM;
+ ndev->hw_features = NETIF_F_RXCSUM;
+
pm_runtime_enable(&pdev->dev);
pm_runtime_get_sync(&pdev->dev);
@@ -2073,10 +2111,11 @@ static int ravb_probe(struct platform_device *pdev)
priv->chip_id = chip_id;
- /* Get clock, if not found that's OK but Wake-On-Lan is unavailable */
priv->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(priv->clk))
- priv->clk = NULL;
+ if (IS_ERR(priv->clk)) {
+ error = PTR_ERR(priv->clk);
+ goto out_release;
+ }
/* Set function */
ndev->netdev_ops = &ravb_netdev_ops;
@@ -2144,8 +2183,7 @@ static int ravb_probe(struct platform_device *pdev)
if (error)
goto out_napi_del;
- if (priv->clk)
- device_set_wakeup_capable(&pdev->dev, 1);
+ device_set_wakeup_capable(&pdev->dev, 1);
/* Print device information */
netdev_info(ndev, "Base address at %#x, %pM, IRQ %d.\n",
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index d2e88a30f57b..7e060aa9fbed 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -594,7 +594,7 @@ static struct sh_eth_cpu_data r8a7740_data = {
};
/* There is CPU dependent code */
-static void sh_eth_set_rate_r8a777x(struct net_device *ndev)
+static void sh_eth_set_rate_rcar(struct net_device *ndev)
{
struct sh_eth_private *mdp = netdev_priv(ndev);
@@ -608,10 +608,10 @@ static void sh_eth_set_rate_r8a777x(struct net_device *ndev)
}
}
-/* R8A7778/9 */
-static struct sh_eth_cpu_data r8a777x_data = {
+/* R-Car Gen1 */
+static struct sh_eth_cpu_data rcar_gen1_data = {
.set_duplex = sh_eth_set_duplex,
- .set_rate = sh_eth_set_rate_r8a777x,
+ .set_rate = sh_eth_set_rate_rcar,
.register_type = SH_ETH_REG_FAST_RCAR,
@@ -635,10 +635,10 @@ static struct sh_eth_cpu_data r8a777x_data = {
.hw_swap = 1,
};
-/* R8A7790/1 */
-static struct sh_eth_cpu_data r8a779x_data = {
+/* R-Car Gen2 and RZ/G1 */
+static struct sh_eth_cpu_data rcar_gen2_data = {
.set_duplex = sh_eth_set_duplex,
- .set_rate = sh_eth_set_rate_r8a777x,
+ .set_rate = sh_eth_set_rate_rcar,
.register_type = SH_ETH_REG_FAST_RCAR,
@@ -3086,15 +3086,17 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev)
static const struct of_device_id sh_eth_match_table[] = {
{ .compatible = "renesas,gether-r8a7740", .data = &r8a7740_data },
- { .compatible = "renesas,ether-r8a7743", .data = &r8a779x_data },
- { .compatible = "renesas,ether-r8a7745", .data = &r8a779x_data },
- { .compatible = "renesas,ether-r8a7778", .data = &r8a777x_data },
- { .compatible = "renesas,ether-r8a7779", .data = &r8a777x_data },
- { .compatible = "renesas,ether-r8a7790", .data = &r8a779x_data },
- { .compatible = "renesas,ether-r8a7791", .data = &r8a779x_data },
- { .compatible = "renesas,ether-r8a7793", .data = &r8a779x_data },
- { .compatible = "renesas,ether-r8a7794", .data = &r8a779x_data },
+ { .compatible = "renesas,ether-r8a7743", .data = &rcar_gen2_data },
+ { .compatible = "renesas,ether-r8a7745", .data = &rcar_gen2_data },
+ { .compatible = "renesas,ether-r8a7778", .data = &rcar_gen1_data },
+ { .compatible = "renesas,ether-r8a7779", .data = &rcar_gen1_data },
+ { .compatible = "renesas,ether-r8a7790", .data = &rcar_gen2_data },
+ { .compatible = "renesas,ether-r8a7791", .data = &rcar_gen2_data },
+ { .compatible = "renesas,ether-r8a7793", .data = &rcar_gen2_data },
+ { .compatible = "renesas,ether-r8a7794", .data = &rcar_gen2_data },
{ .compatible = "renesas,ether-r7s72100", .data = &r7s72100_data },
+ { .compatible = "renesas,rcar-gen1-ether", .data = &rcar_gen1_data },
+ { .compatible = "renesas,rcar-gen2-ether", .data = &rcar_gen2_data },
{ }
};
MODULE_DEVICE_TABLE(of, sh_eth_match_table);
diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c
index 244c1e171017..da4807723a06 100644
--- a/drivers/net/ethernet/seeq/ether3.c
+++ b/drivers/net/ethernet/seeq/ether3.c
@@ -170,9 +170,11 @@ ether3_setbuffer(struct net_device *dev, buffer_rw_t read, int start)
/*
* Switch LED off...
*/
-static void ether3_ledoff(unsigned long data)
+static void ether3_ledoff(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)data;
+ struct dev_priv *private = from_timer(priv, t, timer);
+ struct net_device *dev = private->dev;
+
ether3_outw(priv(dev)->regs.config2 |= CFG2_CTRLO, REG_CONFIG2);
}
@@ -183,8 +185,6 @@ static inline void ether3_ledon(struct net_device *dev)
{
del_timer(&priv(dev)->timer);
priv(dev)->timer.expires = jiffies + HZ / 50; /* leave on for 1/50th second */
- priv(dev)->timer.data = (unsigned long)dev;
- priv(dev)->timer.function = ether3_ledoff;
add_timer(&priv(dev)->timer);
if (priv(dev)->regs.config2 & CFG2_CTRLO)
ether3_outw(priv(dev)->regs.config2 &= ~CFG2_CTRLO, REG_CONFIG2);
@@ -783,7 +783,8 @@ ether3_probe(struct expansion_card *ec, const struct ecard_id *id)
ether3_addr(dev->dev_addr, ec);
- init_timer(&priv(dev)->timer);
+ priv(dev)->dev = dev;
+ timer_setup(&priv(dev)->timer, ether3_ledoff, 0);
/* Reset card...
*/
diff --git a/drivers/net/ethernet/seeq/ether3.h b/drivers/net/ethernet/seeq/ether3.h
index 2db63b08bdf3..ea2ba286e665 100644
--- a/drivers/net/ethernet/seeq/ether3.h
+++ b/drivers/net/ethernet/seeq/ether3.h
@@ -165,6 +165,7 @@ struct dev_priv {
unsigned char tx_tail; /* buffer nr of transmitting packet */
unsigned int rx_head; /* address to fetch next packet from */
struct timer_list timer;
+ net_device *dev;
int broken; /* 0 = ok, 1 = something went wrong */
};
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index 9c0488e0f08e..18d533fdf14c 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -764,9 +764,9 @@ static inline void ioc3_setup_duplex(struct ioc3_private *ip)
ioc3_w_emcr(ip->emcr);
}
-static void ioc3_timer(unsigned long data)
+static void ioc3_timer(struct timer_list *t)
{
- struct ioc3_private *ip = (struct ioc3_private *) data;
+ struct ioc3_private *ip = from_timer(ip, t, ioc3_timer);
/* Print the link status if it has changed */
mii_check_media(&ip->mii, 1, 0);
@@ -818,8 +818,6 @@ out:
static void ioc3_mii_start(struct ioc3_private *ip)
{
ip->ioc3_timer.expires = jiffies + (12 * HZ)/10; /* 1.2 sec. */
- ip->ioc3_timer.data = (unsigned long) ip;
- ip->ioc3_timer.function = ioc3_timer;
add_timer(&ip->ioc3_timer);
}
@@ -1291,7 +1289,7 @@ static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
#endif
spin_lock_init(&ip->ioc3_lock);
- init_timer(&ip->ioc3_timer);
+ timer_setup(&ip->ioc3_timer, ioc3_timer, 0);
ioc3_stop(ip);
ioc3_init(dev);
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 40bd88362e3d..cb61247b0526 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -1065,10 +1065,8 @@ sis900_open(struct net_device *net_dev)
/* Set the timer to switch to check for link beat and perhaps switch
to an alternate media type. */
- init_timer(&sis_priv->timer);
+ setup_timer(&sis_priv->timer, sis900_timer, (unsigned long)net_dev);
sis_priv->timer.expires = jiffies + HZ;
- sis_priv->timer.data = (unsigned long)net_dev;
- sis_priv->timer.function = sis900_timer;
add_timer(&sis_priv->timer);
return 0;
diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c
index 6a0e1d4b597c..2a9724898fcf 100644
--- a/drivers/net/ethernet/smsc/epic100.c
+++ b/drivers/net/ethernet/smsc/epic100.c
@@ -739,10 +739,8 @@ static int epic_open(struct net_device *dev)
/* Set the timer to switch to check for link beat and perhaps switch
to an alternate media type. */
- init_timer(&ep->timer);
+ setup_timer(&ep->timer, epic_timer, (unsigned long)dev);
ep->timer.expires = jiffies + 3*HZ;
- ep->timer.data = (unsigned long)dev;
- ep->timer.function = epic_timer; /* timer handler */
add_timer(&ep->timer);
return rc;
diff --git a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
index 6a9c954492f2..8b50afcdb52d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/altr_tse_pcs.c
@@ -118,10 +118,9 @@ int tse_pcs_init(void __iomem *base, struct tse_pcs *pcs)
return ret;
}
-static void pcs_link_timer_callback(unsigned long data)
+static void pcs_link_timer_callback(struct tse_pcs *pcs)
{
u16 val = 0;
- struct tse_pcs *pcs = (struct tse_pcs *)data;
void __iomem *tse_pcs_base = pcs->tse_pcs_base;
void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
@@ -138,12 +137,11 @@ static void pcs_link_timer_callback(unsigned long data)
}
}
-static void auto_nego_timer_callback(unsigned long data)
+static void auto_nego_timer_callback(struct tse_pcs *pcs)
{
u16 val = 0;
u16 speed = 0;
u16 duplex = 0;
- struct tse_pcs *pcs = (struct tse_pcs *)data;
void __iomem *tse_pcs_base = pcs->tse_pcs_base;
void __iomem *sgmii_adapter_base = pcs->sgmii_adapter_base;
@@ -201,14 +199,14 @@ static void auto_nego_timer_callback(unsigned long data)
}
}
-static void aneg_link_timer_callback(unsigned long data)
+static void aneg_link_timer_callback(struct timer_list *t)
{
- struct tse_pcs *pcs = (struct tse_pcs *)data;
+ struct tse_pcs *pcs = from_timer(pcs, t, aneg_link_timer);
if (pcs->autoneg == AUTONEG_ENABLE)
- auto_nego_timer_callback(data);
+ auto_nego_timer_callback(pcs);
else if (pcs->autoneg == AUTONEG_DISABLE)
- pcs_link_timer_callback(data);
+ pcs_link_timer_callback(pcs);
}
void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
@@ -237,8 +235,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
tse_pcs_reset(tse_pcs_base, pcs);
- setup_timer(&pcs->aneg_link_timer,
- aneg_link_timer_callback, (unsigned long)pcs);
+ timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback,
+ 0);
mod_timer(&pcs->aneg_link_timer, jiffies +
msecs_to_jiffies(AUTONEGO_LINK_TIMER));
} else if (phy_dev->autoneg == AUTONEG_DISABLE) {
@@ -270,8 +268,8 @@ void tse_pcs_fix_mac_speed(struct tse_pcs *pcs, struct phy_device *phy_dev,
tse_pcs_reset(tse_pcs_base, pcs);
- setup_timer(&pcs->aneg_link_timer,
- aneg_link_timer_callback, (unsigned long)pcs);
+ timer_setup(&pcs->aneg_link_timer, aneg_link_timer_callback,
+ 0);
mod_timer(&pcs->aneg_link_timer, jiffies +
msecs_to_jiffies(AUTONEGO_LINK_TIMER));
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index e82b4b70b7be..e1e5ac053760 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -442,8 +442,9 @@ struct stmmac_dma_ops {
void (*dma_mode)(void __iomem *ioaddr, int txmode, int rxmode,
int rxfifosz);
void (*dma_rx_mode)(void __iomem *ioaddr, int mode, u32 channel,
- int fifosz);
- void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel);
+ int fifosz, u8 qmode);
+ void (*dma_tx_mode)(void __iomem *ioaddr, int mode, u32 channel,
+ int fifosz, u8 qmode);
/* To track extra statistic (if supported) */
void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x,
void __iomem *ioaddr);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 866444b6c82f..2c6d7c69c8f7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -51,15 +51,11 @@
#define NSS_COMMON_CLK_SRC_CTRL_RGMII(x) 1
#define NSS_COMMON_CLK_SRC_CTRL_SGMII(x) ((x >= 2) ? 1 : 0)
-#define NSS_COMMON_MACSEC_CTL 0x28
-#define NSS_COMMON_MACSEC_CTL_EXT_BYPASS_EN(x) (1 << x)
-
#define NSS_COMMON_GMAC_CTL(x) (0x30 + (x * 4))
#define NSS_COMMON_GMAC_CTL_CSYS_REQ BIT(19)
#define NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL BIT(16)
#define NSS_COMMON_GMAC_CTL_IFG_LIMIT_OFFSET 8
#define NSS_COMMON_GMAC_CTL_IFG_OFFSET 0
-#define NSS_COMMON_GMAC_CTL_IFG_MASK 0x3f
#define NSS_COMMON_CLK_DIV_RGMII_1000 1
#define NSS_COMMON_CLK_DIV_RGMII_100 9
@@ -68,9 +64,6 @@
#define NSS_COMMON_CLK_DIV_SGMII_100 4
#define NSS_COMMON_CLK_DIV_SGMII_10 49
-#define QSGMII_PCS_MODE_CTL 0x68
-#define QSGMII_PCS_MODE_CTL_AUTONEG_EN(x) BIT((x * 8) + 7)
-
#define QSGMII_PCS_CAL_LCKDT_CTL 0x120
#define QSGMII_PCS_CAL_LCKDT_CTL_RST BIT(19)
@@ -83,15 +76,10 @@
#define QSGMII_PHY_TX_DRIVER_EN BIT(3)
#define QSGMII_PHY_QSGMII_EN BIT(7)
#define QSGMII_PHY_PHASE_LOOP_GAIN_OFFSET 12
-#define QSGMII_PHY_PHASE_LOOP_GAIN_MASK 0x7
#define QSGMII_PHY_RX_DC_BIAS_OFFSET 18
-#define QSGMII_PHY_RX_DC_BIAS_MASK 0x3
#define QSGMII_PHY_RX_INPUT_EQU_OFFSET 20
-#define QSGMII_PHY_RX_INPUT_EQU_MASK 0x3
#define QSGMII_PHY_CDR_PI_SLEW_OFFSET 22
-#define QSGMII_PHY_CDR_PI_SLEW_MASK 0x3
#define QSGMII_PHY_TX_DRV_AMP_OFFSET 28
-#define QSGMII_PHY_TX_DRV_AMP_MASK 0xf
struct ipq806x_gmac {
struct platform_device *pdev;
@@ -217,7 +205,7 @@ static int ipq806x_gmac_of_parse(struct ipq806x_gmac *gmac)
* code and keep it consistent with the Linux convention, we'll number
* them from 0 to 3 here.
*/
- if (gmac->id < 0 || gmac->id > 3) {
+ if (gmac->id > 3) {
dev_err(dev, "invalid gmac id\n");
return -EINVAL;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index d74cedf2a397..aeda3ab2d761 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -225,6 +225,8 @@ enum power_event {
#define MTL_CHAN_RX_DEBUG(x) (MTL_CHANX_BASE_ADDR(x) + 0x38)
#define MTL_OP_MODE_RSF BIT(5)
+#define MTL_OP_MODE_TXQEN_MASK GENMASK(3, 2)
+#define MTL_OP_MODE_TXQEN_AV BIT(2)
#define MTL_OP_MODE_TXQEN BIT(3)
#define MTL_OP_MODE_TSF BIT(1)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index e84831e1b63b..c110f6850ffa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -191,7 +191,7 @@ static void dwmac4_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 number_chan)
}
static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
- u32 channel, int fifosz)
+ u32 channel, int fifosz, u8 qmode)
{
unsigned int rqs = fifosz / 256 - 1;
u32 mtl_rx_op, mtl_rx_int;
@@ -218,8 +218,10 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
mtl_rx_op &= ~MTL_OP_MODE_RQS_MASK;
mtl_rx_op |= rqs << MTL_OP_MODE_RQS_SHIFT;
- /* enable flow control only if each channel gets 4 KiB or more FIFO */
- if (fifosz >= 4096) {
+ /* Enable flow control only if each channel gets 4 KiB or more FIFO and
+ * only if channel is not an AVB channel.
+ */
+ if ((fifosz >= 4096) && (qmode != MTL_QUEUE_AVB)) {
unsigned int rfd, rfa;
mtl_rx_op |= MTL_OP_MODE_EHFC;
@@ -271,9 +273,10 @@ static void dwmac4_dma_rx_chan_op_mode(void __iomem *ioaddr, int mode,
}
static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode,
- u32 channel)
+ u32 channel, int fifosz, u8 qmode)
{
u32 mtl_tx_op = readl(ioaddr + MTL_CHAN_TX_OP_MODE(channel));
+ unsigned int tqs = fifosz / 256 - 1;
if (mode == SF_DMA_MODE) {
pr_debug("GMAC: enable TX store and forward mode\n");
@@ -306,12 +309,18 @@ static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode,
* For an IP with DWC_EQOS_NUM_TXQ > 1, the fields TXQEN and TQS are R/W
* with reset values: TXQEN off, TQS 256 bytes.
*
- * Write the bits in both cases, since it will have no effect when RO.
- * For DWC_EQOS_NUM_TXQ > 1, the top bits in MTL_OP_MODE_TQS_MASK might
- * be RO, however, writing the whole TQS field will result in a value
- * equal to DWC_EQOS_TXFIFO_SIZE, just like for DWC_EQOS_NUM_TXQ == 1.
+ * TXQEN must be written for multi-channel operation and TQS must
+ * reflect the available fifo size per queue (total fifo size / number
+ * of enabled queues).
*/
- mtl_tx_op |= MTL_OP_MODE_TXQEN | MTL_OP_MODE_TQS_MASK;
+ mtl_tx_op &= ~MTL_OP_MODE_TXQEN_MASK;
+ if (qmode != MTL_QUEUE_AVB)
+ mtl_tx_op |= MTL_OP_MODE_TXQEN;
+ else
+ mtl_tx_op |= MTL_OP_MODE_TXQEN_AV;
+ mtl_tx_op &= ~MTL_OP_MODE_TQS_MASK;
+ mtl_tx_op |= tqs << MTL_OP_MODE_TQS_SHIFT;
+
writel(mtl_tx_op, ioaddr + MTL_CHAN_TX_OP_MODE(channel));
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 16bd50929084..c7a894ead274 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1749,12 +1749,20 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
u32 rx_channels_count = priv->plat->rx_queues_to_use;
u32 tx_channels_count = priv->plat->tx_queues_to_use;
int rxfifosz = priv->plat->rx_fifo_size;
+ int txfifosz = priv->plat->tx_fifo_size;
u32 txmode = 0;
u32 rxmode = 0;
u32 chan = 0;
+ u8 qmode = 0;
if (rxfifosz == 0)
rxfifosz = priv->dma_cap.rx_fifo_size;
+ if (txfifosz == 0)
+ txfifosz = priv->dma_cap.tx_fifo_size;
+
+ /* Adjust for real per queue fifo size */
+ rxfifosz /= rx_channels_count;
+ txfifosz /= tx_channels_count;
if (priv->plat->force_thresh_dma_mode) {
txmode = tc;
@@ -1777,12 +1785,19 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
/* configure all channels */
if (priv->synopsys_id >= DWMAC_CORE_4_00) {
- for (chan = 0; chan < rx_channels_count; chan++)
+ for (chan = 0; chan < rx_channels_count; chan++) {
+ qmode = priv->plat->rx_queues_cfg[chan].mode_to_use;
+
priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
- rxfifosz);
+ rxfifosz, qmode);
+ }
- for (chan = 0; chan < tx_channels_count; chan++)
- priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan);
+ for (chan = 0; chan < tx_channels_count; chan++) {
+ qmode = priv->plat->tx_queues_cfg[chan].mode_to_use;
+
+ priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan,
+ txfifosz, qmode);
+ }
} else {
priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
rxfifosz);
@@ -1946,15 +1961,27 @@ static void stmmac_tx_err(struct stmmac_priv *priv, u32 chan)
static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode,
u32 rxmode, u32 chan)
{
+ u8 rxqmode = priv->plat->rx_queues_cfg[chan].mode_to_use;
+ u8 txqmode = priv->plat->tx_queues_cfg[chan].mode_to_use;
+ u32 rx_channels_count = priv->plat->rx_queues_to_use;
+ u32 tx_channels_count = priv->plat->tx_queues_to_use;
int rxfifosz = priv->plat->rx_fifo_size;
+ int txfifosz = priv->plat->tx_fifo_size;
if (rxfifosz == 0)
rxfifosz = priv->dma_cap.rx_fifo_size;
+ if (txfifosz == 0)
+ txfifosz = priv->dma_cap.tx_fifo_size;
+
+ /* Adjust for real per queue fifo size */
+ rxfifosz /= rx_channels_count;
+ txfifosz /= tx_channels_count;
if (priv->synopsys_id >= DWMAC_CORE_4_00) {
priv->hw->dma->dma_rx_mode(priv->ioaddr, rxmode, chan,
- rxfifosz);
- priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan);
+ rxfifosz, rxqmode);
+ priv->hw->dma->dma_tx_mode(priv->ioaddr, txmode, chan,
+ txfifosz, txqmode);
} else {
priv->hw->dma->dma_mode(priv->ioaddr, txmode, rxmode,
rxfifosz);
@@ -2217,10 +2244,8 @@ static void stmmac_init_tx_coalesce(struct stmmac_priv *priv)
{
priv->tx_coal_frames = STMMAC_TX_FRAMES;
priv->tx_coal_timer = STMMAC_COAL_TX_TIMER;
- init_timer(&priv->txtimer);
+ setup_timer(&priv->txtimer, stmmac_tx_timer, (unsigned long)priv);
priv->txtimer.expires = STMMAC_COAL_TIMER(priv->tx_coal_timer);
- priv->txtimer.data = (unsigned long)priv;
- priv->txtimer.function = stmmac_tx_timer;
add_timer(&priv->txtimer);
}
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 382993c1561c..113bd57e2ea0 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -4079,9 +4079,9 @@ done:
#endif
}
-static void cas_link_timer(unsigned long data)
+static void cas_link_timer(struct timer_list *t)
{
- struct cas *cp = (struct cas *) data;
+ struct cas *cp = from_timer(cp, t, link_timer);
int mask, pending = 0, reset = 0;
unsigned long flags;
@@ -5039,9 +5039,7 @@ static int cas_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
spin_lock_init(&cp->stat_lock[N_TX_RINGS]);
mutex_init(&cp->pm_mutex);
- init_timer(&cp->link_timer);
- cp->link_timer.function = cas_link_timer;
- cp->link_timer.data = (unsigned long) cp;
+ timer_setup(&cp->link_timer, cas_link_timer, 0);
#if 1
/* Just in case the implementation of atomic operations
diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
index 5b56c24b6ed2..5ea037672e6f 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c
@@ -307,7 +307,7 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
/* Get (or create) the vnet associated with this port */
vp = vsw_get_vnet(hp, vdev->mp, &handle);
- if (unlikely(IS_ERR(vp))) {
+ if (IS_ERR(vp)) {
err = PTR_ERR(vp);
pr_err("Failed to get vnet for vsw-port\n");
mdesc_release(hp);
@@ -363,8 +363,7 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
list_add_rcu(&port->list, &vp->port_list);
spin_unlock_irqrestore(&vp->lock, flags);
- setup_timer(&port->clean_timer, sunvnet_clean_timer_expire_common,
- (unsigned long)port);
+ timer_setup(&port->clean_timer, sunvnet_clean_timer_expire_common, 0);
err = register_netdev(dev);
if (err) {
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 6a4e8e1bbd90..ab502ee35fb2 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -2221,9 +2221,9 @@ static int niu_link_status(struct niu *np, int *link_up_p)
return err;
}
-static void niu_timer(unsigned long __opaque)
+static void niu_timer(struct timer_list *t)
{
- struct niu *np = (struct niu *) __opaque;
+ struct niu *np = from_timer(np, t, timer);
unsigned long off;
int err, link_up;
@@ -6123,10 +6123,8 @@ static int niu_open(struct net_device *dev)
err = niu_init_hw(np);
if (!err) {
- init_timer(&np->timer);
+ timer_setup(&np->timer, niu_timer, 0);
np->timer.expires = jiffies + HZ;
- np->timer.data = (unsigned long) np;
- np->timer.function = niu_timer;
err = niu_enable_interrupts(np, 1);
if (err)
@@ -6775,10 +6773,8 @@ static int niu_change_mtu(struct net_device *dev, int new_mtu)
err = niu_init_hw(np);
if (!err) {
- init_timer(&np->timer);
+ timer_setup(&np->timer, niu_timer, 0);
np->timer.expires = jiffies + HZ;
- np->timer.data = (unsigned long) np;
- np->timer.function = niu_timer;
err = niu_enable_interrupts(np, 1);
if (err)
diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index 3189722110c2..0b1f41f6bceb 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c
@@ -523,9 +523,9 @@ static int try_next_permutation(struct bigmac *bp, void __iomem *tregs)
return -1;
}
-static void bigmac_timer(unsigned long data)
+static void bigmac_timer(struct timer_list *t)
{
- struct bigmac *bp = (struct bigmac *) data;
+ struct bigmac *bp = from_timer(bp, t, bigmac_timer);
void __iomem *tregs = bp->tregs;
int restart_timer = 0;
@@ -613,8 +613,6 @@ static void bigmac_begin_auto_negotiation(struct bigmac *bp)
bp->timer_state = ltrywait;
bp->timer_ticks = 0;
bp->bigmac_timer.expires = jiffies + (12 * HZ) / 10;
- bp->bigmac_timer.data = (unsigned long) bp;
- bp->bigmac_timer.function = bigmac_timer;
add_timer(&bp->bigmac_timer);
}
@@ -921,7 +919,7 @@ static int bigmac_open(struct net_device *dev)
printk(KERN_ERR "BIGMAC: Can't order irq %d to go.\n", dev->irq);
return ret;
}
- init_timer(&bp->bigmac_timer);
+ timer_setup(&bp->bigmac_timer, bigmac_timer, 0);
ret = bigmac_init_hw(bp, 0);
if (ret)
free_irq(dev->irq, bp);
@@ -1172,7 +1170,7 @@ static int bigmac_ether_init(struct platform_device *op,
"board-version", 1);
/* Init auto-negotiation timer state. */
- init_timer(&bp->bigmac_timer);
+ timer_setup(&bp->bigmac_timer, bigmac_timer, 0);
bp->timer_state = asleep;
bp->timer_ticks = 0;
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index fa607d062cb3..a7afcee3c5ae 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -1496,9 +1496,9 @@ static int gem_mdio_link_not_up(struct gem *gp)
}
}
-static void gem_link_timer(unsigned long data)
+static void gem_link_timer(struct timer_list *t)
{
- struct gem *gp = (struct gem *) data;
+ struct gem *gp = from_timer(gp, t, link_timer);
struct net_device *dev = gp->dev;
int restart_aneg = 0;
@@ -2910,9 +2910,7 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
gp->msg_enable = DEFAULT_MSG;
- init_timer(&gp->link_timer);
- gp->link_timer.function = gem_link_timer;
- gp->link_timer.data = (unsigned long) gp;
+ timer_setup(&gp->link_timer, gem_link_timer, 0);
INIT_WORK(&gp->reset_task, gem_reset_task);
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index 9e983e1d8249..0431f1e5f511 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -685,9 +685,9 @@ static int is_lucent_phy(struct happy_meal *hp)
return ret;
}
-static void happy_meal_timer(unsigned long data)
+static void happy_meal_timer(struct timer_list *t)
{
- struct happy_meal *hp = (struct happy_meal *) data;
+ struct happy_meal *hp = from_timer(hp, t, happy_timer);
void __iomem *tregs = hp->tcvregs;
int restart_timer = 0;
@@ -1413,8 +1413,6 @@ force_link:
hp->timer_ticks = 0;
hp->happy_timer.expires = jiffies + (12 * HZ)/10; /* 1.2 sec. */
- hp->happy_timer.data = (unsigned long) hp;
- hp->happy_timer.function = happy_meal_timer;
add_timer(&hp->happy_timer);
}
@@ -2819,7 +2817,7 @@ static int happy_meal_sbus_probe_one(struct platform_device *op, int is_qfe)
hp->timer_state = asleep;
hp->timer_ticks = 0;
- init_timer(&hp->happy_timer);
+ timer_setup(&hp->happy_timer, happy_meal_timer, 0);
hp->dev = dev;
dev->netdev_ops = &hme_netdev_ops;
@@ -3133,7 +3131,7 @@ static int happy_meal_pci_probe(struct pci_dev *pdev,
hp->timer_state = asleep;
hp->timer_ticks = 0;
- init_timer(&hp->happy_timer);
+ timer_setup(&hp->happy_timer, happy_meal_timer, 0);
hp->irq = pdev->irq;
hp->dev = dev;
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index 0b95105f7060..27fb22638885 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -492,8 +492,7 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
pr_info("%s: PORT ( remote-mac %pM%s )\n",
vp->dev->name, port->raddr, switch_port ? " switch-port" : "");
- setup_timer(&port->clean_timer, sunvnet_clean_timer_expire_common,
- (unsigned long)port);
+ timer_setup(&port->clean_timer, sunvnet_clean_timer_expire_common, 0);
napi_enable(&port->napi);
vio_port_up(&port->vio);
diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c
index ecf456c7b6d1..8aa3ce46bb81 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.c
+++ b/drivers/net/ethernet/sun/sunvnet_common.c
@@ -1040,9 +1040,9 @@ static inline void vnet_free_skbs(struct sk_buff *skb)
}
}
-void sunvnet_clean_timer_expire_common(unsigned long port0)
+void sunvnet_clean_timer_expire_common(struct timer_list *t)
{
- struct vnet_port *port = (struct vnet_port *)port0;
+ struct vnet_port *port = from_timer(port, t, clean_timer);
struct sk_buff *freeskbs;
unsigned pending;
diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h
index b20d6fa7ef25..656673c31066 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.h
+++ b/drivers/net/ethernet/sun/sunvnet_common.h
@@ -129,7 +129,7 @@ struct vnet {
((__port)->vsw ? (__port)->dev : (__port)->vp->dev)
/* Common funcs */
-void sunvnet_clean_timer_expire_common(unsigned long port0);
+void sunvnet_clean_timer_expire_common(struct timer_list *t);
int sunvnet_open_common(struct net_device *dev);
int sunvnet_close_common(struct net_device *dev);
void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp);
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index ddd43e09111e..cd1185e66133 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -859,9 +859,7 @@ void cpsw_ale_start(struct cpsw_ale *ale)
cpsw_ale_control_set(ale, 0, ALE_ENABLE, 1);
cpsw_ale_control_set(ale, 0, ALE_CLEAR, 1);
- init_timer(&ale->timer);
- ale->timer.data = (unsigned long)ale;
- ale->timer.function = cpsw_ale_timer;
+ setup_timer(&ale->timer, cpsw_ale_timer, (unsigned long)ale);
if (ale->ageout) {
ale->timer.expires = jiffies + ale->ageout;
add_timer(&ale->timer);
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 28cb38af1a34..4ad821655e51 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -3616,9 +3616,8 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
}
spin_unlock_bh(&gbe_dev->hw_stats_lock);
- init_timer(&gbe_dev->timer);
- gbe_dev->timer.data = (unsigned long)gbe_dev;
- gbe_dev->timer.function = netcp_ethss_timer;
+ setup_timer(&gbe_dev->timer, netcp_ethss_timer,
+ (unsigned long)gbe_dev);
gbe_dev->timer.expires = jiffies + GBE_TIMER_INTERVAL;
add_timer(&gbe_dev->timer);
*inst_priv = gbe_dev;
diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index c8d53d8c83ee..8f53d762fbc4 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c
@@ -172,7 +172,8 @@ static u32 tlan_handle_tx_eoc(struct net_device *, u16);
static u32 tlan_handle_status_check(struct net_device *, u16);
static u32 tlan_handle_rx_eoc(struct net_device *, u16);
-static void tlan_timer(unsigned long);
+static void tlan_timer(struct timer_list *t);
+static void tlan_phy_monitor(struct timer_list *t);
static void tlan_reset_lists(struct net_device *);
static void tlan_free_lists(struct net_device *);
@@ -190,7 +191,6 @@ static void tlan_phy_power_up(struct net_device *);
static void tlan_phy_reset(struct net_device *);
static void tlan_phy_start_link(struct net_device *);
static void tlan_phy_finish_auto_neg(struct net_device *);
-static void tlan_phy_monitor(unsigned long);
/*
static int tlan_phy_nop(struct net_device *);
@@ -254,11 +254,10 @@ tlan_set_timer(struct net_device *dev, u32 ticks, u32 type)
spin_unlock_irqrestore(&priv->lock, flags);
return;
}
- priv->timer.function = tlan_timer;
+ priv->timer.function = (TIMER_FUNC_TYPE)tlan_timer;
if (!in_irq())
spin_unlock_irqrestore(&priv->lock, flags);
- priv->timer.data = (unsigned long) dev;
priv->timer_set_at = jiffies;
priv->timer_type = type;
mod_timer(&priv->timer, jiffies + ticks);
@@ -926,8 +925,8 @@ static int tlan_open(struct net_device *dev)
return err;
}
- init_timer(&priv->timer);
- init_timer(&priv->media_timer);
+ timer_setup(&priv->timer, NULL, 0);
+ timer_setup(&priv->media_timer, tlan_phy_monitor, 0);
tlan_start(dev);
@@ -1426,8 +1425,7 @@ static u32 tlan_handle_tx_eof(struct net_device *dev, u16 host_int)
tlan_dio_write8(dev->base_addr,
TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT);
if (priv->timer.function == NULL) {
- priv->timer.function = tlan_timer;
- priv->timer.data = (unsigned long) dev;
+ priv->timer.function = (TIMER_FUNC_TYPE)tlan_timer;
priv->timer.expires = jiffies + TLAN_TIMER_ACT_DELAY;
priv->timer_set_at = jiffies;
priv->timer_type = TLAN_TIMER_ACTIVITY;
@@ -1578,8 +1576,7 @@ drop_and_reuse:
tlan_dio_write8(dev->base_addr,
TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT);
if (priv->timer.function == NULL) {
- priv->timer.function = tlan_timer;
- priv->timer.data = (unsigned long) dev;
+ priv->timer.function = (TIMER_FUNC_TYPE)tlan_timer;
priv->timer.expires = jiffies + TLAN_TIMER_ACT_DELAY;
priv->timer_set_at = jiffies;
priv->timer_type = TLAN_TIMER_ACTIVITY;
@@ -1836,10 +1833,10 @@ ThunderLAN driver timer function
*
**************************************************************/
-static void tlan_timer(unsigned long data)
+static void tlan_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *) data;
- struct tlan_priv *priv = netdev_priv(dev);
+ struct tlan_priv *priv = from_timer(priv, t, timer);
+ struct net_device *dev = priv->dev;
u32 elapsed;
unsigned long flags = 0;
@@ -1872,7 +1869,6 @@ static void tlan_timer(unsigned long data)
tlan_dio_write8(dev->base_addr,
TLAN_LED_REG, TLAN_LED_LINK);
} else {
- priv->timer.function = tlan_timer;
priv->timer.expires = priv->timer_set_at
+ TLAN_TIMER_ACT_DELAY;
spin_unlock_irqrestore(&priv->lock, flags);
@@ -2317,8 +2313,6 @@ tlan_finish_reset(struct net_device *dev)
} else
netdev_info(dev, "Link active\n");
/* Enabling link beat monitoring */
- priv->media_timer.function = tlan_phy_monitor;
- priv->media_timer.data = (unsigned long) dev;
priv->media_timer.expires = jiffies + HZ;
add_timer(&priv->media_timer);
}
@@ -2763,10 +2757,10 @@ static void tlan_phy_finish_auto_neg(struct net_device *dev)
*
*******************************************************************/
-static void tlan_phy_monitor(unsigned long data)
+static void tlan_phy_monitor(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *) data;
- struct tlan_priv *priv = netdev_priv(dev);
+ struct tlan_priv *priv = from_timer(priv, t, media_timer);
+ struct net_device *dev = priv->dev;
u16 phy;
u16 phy_status;
diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c
index cec9e70ab995..a913538d3213 100644
--- a/drivers/net/ethernet/toshiba/spider_net.c
+++ b/drivers/net/ethernet/toshiba/spider_net.c
@@ -2256,16 +2256,14 @@ spider_net_setup_netdev(struct spider_net_card *card)
pci_set_drvdata(card->pdev, netdev);
- init_timer(&card->tx_timer);
- card->tx_timer.function =
- (void (*)(unsigned long)) spider_net_cleanup_tx_ring;
- card->tx_timer.data = (unsigned long) card;
+ setup_timer(&card->tx_timer,
+ (void(*)(unsigned long))spider_net_cleanup_tx_ring,
+ (unsigned long)card);
netdev->irq = card->pdev->irq;
card->aneg_count = 0;
- init_timer(&card->aneg_timer);
- card->aneg_timer.function = spider_net_link_phy;
- card->aneg_timer.data = (unsigned long) card;
+ setup_timer(&card->aneg_timer, spider_net_link_phy,
+ (unsigned long)card);
netif_napi_add(netdev, &card->napi,
spider_net_poll, SPIDER_NET_NAPI_WEIGHT);
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index ed51018a813e..5ec39f113127 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1134,24 +1134,11 @@ static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
return t;
}
-static bool is_all_zero(const u8 *fp, size_t size)
-{
- int i;
-
- for (i = 0; i < size; i++)
- if (fp[i])
- return false;
- return true;
-}
-
static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
{
- if (info->key.tun_id || info->key.tun_flags || info->key.tos ||
- info->key.ttl || info->key.label || info->key.tp_src ||
- !is_all_zero((const u8 *)&info->key.u, sizeof(info->key.u)))
- return false;
- else
- return true;
+ return !(info->key.tun_id || info->key.tun_flags || info->key.tos ||
+ info->key.ttl || info->key.label || info->key.tp_src ||
+ memchr_inv(&info->key.u, 0, sizeof(info->key.u)));
}
static bool geneve_dst_addr_equal(struct ip_tunnel_info *a,
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 021a8ec411ab..32f49c4ce457 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -35,7 +35,7 @@
#include <linux/tcp.h>
#include <linux/semaphore.h>
#include <linux/compat.h>
-#include <linux/atomic.h>
+#include <linux/refcount.h>
#define SIXPACK_VERSION "Revision: 0.3.0"
@@ -120,7 +120,7 @@ struct sixpack {
struct timer_list tx_t;
struct timer_list resync_t;
- atomic_t refcnt;
+ refcount_t refcnt;
struct semaphore dead_sem;
spinlock_t lock;
};
@@ -136,9 +136,9 @@ static int encode_sixpack(unsigned char *, unsigned char *, int, unsigned char);
* Note that in case of DAMA operation, the data is not sent here.
*/
-static void sp_xmit_on_air(unsigned long channel)
+static void sp_xmit_on_air(struct timer_list *t)
{
- struct sixpack *sp = (struct sixpack *) channel;
+ struct sixpack *sp = from_timer(sp, t, tx_t);
int actual, when = sp->slottime;
static unsigned char random;
@@ -229,7 +229,7 @@ static void sp_encaps(struct sixpack *sp, unsigned char *icp, int len)
sp->xleft = count;
sp->xhead = sp->xbuff;
sp->status2 = count;
- sp_xmit_on_air((unsigned long)sp);
+ sp_xmit_on_air(&sp->tx_t);
}
return;
@@ -381,7 +381,7 @@ static struct sixpack *sp_get(struct tty_struct *tty)
read_lock(&disc_data_lock);
sp = tty->disc_data;
if (sp)
- atomic_inc(&sp->refcnt);
+ refcount_inc(&sp->refcnt);
read_unlock(&disc_data_lock);
return sp;
@@ -389,7 +389,7 @@ static struct sixpack *sp_get(struct tty_struct *tty)
static void sp_put(struct sixpack *sp)
{
- if (atomic_dec_and_test(&sp->refcnt))
+ if (refcount_dec_and_test(&sp->refcnt))
up(&sp->dead_sem);
}
@@ -500,9 +500,9 @@ static inline void tnc_set_sync_state(struct sixpack *sp, int new_tnc_state)
__tnc_set_sync_state(sp, new_tnc_state);
}
-static void resync_tnc(unsigned long channel)
+static void resync_tnc(struct timer_list *t)
{
- struct sixpack *sp = (struct sixpack *) channel;
+ struct sixpack *sp = from_timer(sp, t, resync_t);
static char resync_cmd = 0xe8;
/* clear any data that might have been received */
@@ -526,8 +526,6 @@ static void resync_tnc(unsigned long channel)
/* Start resync timer again -- the TNC might be still absent */
del_timer(&sp->resync_t);
- sp->resync_t.data = (unsigned long) sp;
- sp->resync_t.function = resync_tnc;
sp->resync_t.expires = jiffies + SIXP_RESYNC_TIMEOUT;
add_timer(&sp->resync_t);
}
@@ -541,8 +539,6 @@ static inline int tnc_init(struct sixpack *sp)
sp->tty->ops->write(sp->tty, &inbyte, 1);
del_timer(&sp->resync_t);
- sp->resync_t.data = (unsigned long) sp;
- sp->resync_t.function = resync_tnc;
sp->resync_t.expires = jiffies + SIXP_RESYNC_TIMEOUT;
add_timer(&sp->resync_t);
@@ -580,7 +576,7 @@ static int sixpack_open(struct tty_struct *tty)
sp->dev = dev;
spin_lock_init(&sp->lock);
- atomic_set(&sp->refcnt, 1);
+ refcount_set(&sp->refcnt, 1);
sema_init(&sp->dead_sem, 0);
/* !!! length of the buffers. MTU is IP MTU, not PACLEN! */
@@ -623,11 +619,9 @@ static int sixpack_open(struct tty_struct *tty)
netif_start_queue(dev);
- init_timer(&sp->tx_t);
- sp->tx_t.function = sp_xmit_on_air;
- sp->tx_t.data = (unsigned long) sp;
+ timer_setup(&sp->tx_t, sp_xmit_on_air, 0);
- init_timer(&sp->resync_t);
+ timer_setup(&sp->resync_t, resync_tnc, 0);
spin_unlock_bh(&sp->lock);
@@ -676,7 +670,7 @@ static void sixpack_close(struct tty_struct *tty)
* We have now ensured that nobody can start using ap from now on, but
* we have to wait for all existing users to finish.
*/
- if (!atomic_dec_and_test(&sp->refcnt))
+ if (!refcount_dec_and_test(&sp->refcnt))
down(&sp->dead_sem);
/* We must stop the queue to avoid potentially scribbling
@@ -928,8 +922,6 @@ static void decode_prio_command(struct sixpack *sp, unsigned char cmd)
if (sp->tnc_state == TNC_IN_SYNC) {
del_timer(&sp->resync_t);
- sp->resync_t.data = (unsigned long) sp;
- sp->resync_t.function = resync_tnc;
sp->resync_t.expires = jiffies + SIXP_INIT_RESYNC_TIMEOUT;
add_timer(&sp->resync_t);
}
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 1503f10122f7..1e62d00732f2 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -840,6 +840,7 @@ static int epp_open(struct net_device *dev)
unsigned char tmp[128];
unsigned char stat;
unsigned long tstart;
+ struct pardev_cb par_cb;
if (!pp) {
printk(KERN_ERR "%s: parport at 0x%lx unknown\n", bc_drvname, dev->base_addr);
@@ -859,8 +860,21 @@ static int epp_open(struct net_device *dev)
return -EIO;
}
memset(&bc->modem, 0, sizeof(bc->modem));
- bc->pdev = parport_register_device(pp, dev->name, NULL, epp_wakeup,
- NULL, PARPORT_DEV_EXCL, dev);
+ memset(&par_cb, 0, sizeof(par_cb));
+ par_cb.wakeup = epp_wakeup;
+ par_cb.private = (void *)dev;
+ par_cb.flags = PARPORT_DEV_EXCL;
+ for (i = 0; i < NR_PORTS; i++)
+ if (baycom_device[i] == dev)
+ break;
+
+ if (i == NR_PORTS) {
+ pr_err("%s: no device found\n", bc_drvname);
+ parport_put_port(pp);
+ return -ENODEV;
+ }
+
+ bc->pdev = parport_register_dev_model(pp, dev->name, &par_cb, i);
parport_put_port(pp);
if (!bc->pdev) {
printk(KERN_ERR "%s: cannot register parport at 0x%lx\n", bc_drvname, pp->base);
@@ -1185,6 +1199,23 @@ MODULE_LICENSE("GPL");
/* --------------------------------------------------------------------- */
+static int baycom_epp_par_probe(struct pardevice *par_dev)
+{
+ struct device_driver *drv = par_dev->dev.driver;
+ int len = strlen(drv->name);
+
+ if (strncmp(par_dev->name, drv->name, len))
+ return -ENODEV;
+
+ return 0;
+}
+
+static struct parport_driver baycom_epp_par_driver = {
+ .name = "bce",
+ .probe = baycom_epp_par_probe,
+ .devmodel = true,
+};
+
static void __init baycom_epp_dev_setup(struct net_device *dev)
{
struct baycom_state *bc = netdev_priv(dev);
@@ -1204,10 +1235,15 @@ static void __init baycom_epp_dev_setup(struct net_device *dev)
static int __init init_baycomepp(void)
{
- int i, found = 0;
+ int i, found = 0, ret;
char set_hw = 1;
printk(bc_drvinfo);
+
+ ret = parport_register_driver(&baycom_epp_par_driver);
+ if (ret)
+ return ret;
+
/*
* register net devices
*/
@@ -1241,7 +1277,12 @@ static int __init init_baycomepp(void)
found++;
}
- return found ? 0 : -ENXIO;
+ if (found == 0) {
+ parport_unregister_driver(&baycom_epp_par_driver);
+ return -ENXIO;
+ }
+
+ return 0;
}
static void __exit cleanup_baycomepp(void)
@@ -1260,6 +1301,7 @@ static void __exit cleanup_baycomepp(void)
printk(paranoia_str, "cleanup_module");
}
}
+ parport_unregister_driver(&baycom_epp_par_driver);
}
module_init(init_baycomepp);
diff --git a/drivers/net/hamradio/baycom_par.c b/drivers/net/hamradio/baycom_par.c
index e1783832d304..1f7ceafd61ff 100644
--- a/drivers/net/hamradio/baycom_par.c
+++ b/drivers/net/hamradio/baycom_par.c
@@ -311,7 +311,9 @@ static void par96_wakeup(void *handle)
static int par96_open(struct net_device *dev)
{
struct baycom_state *bc = netdev_priv(dev);
+ struct pardev_cb par_cb;
struct parport *pp;
+ int i;
if (!dev || !bc)
return -ENXIO;
@@ -332,8 +334,21 @@ static int par96_open(struct net_device *dev)
}
memset(&bc->modem, 0, sizeof(bc->modem));
bc->hdrv.par.bitrate = 9600;
- bc->pdev = parport_register_device(pp, dev->name, NULL, par96_wakeup,
- par96_interrupt, PARPORT_DEV_EXCL, dev);
+ memset(&par_cb, 0, sizeof(par_cb));
+ par_cb.wakeup = par96_wakeup;
+ par_cb.irq_func = par96_interrupt;
+ par_cb.private = (void *)dev;
+ par_cb.flags = PARPORT_DEV_EXCL;
+ for (i = 0; i < NR_PORTS; i++)
+ if (baycom_device[i] == dev)
+ break;
+
+ if (i == NR_PORTS) {
+ pr_err("%s: no device found\n", bc_drvname);
+ parport_put_port(pp);
+ return -ENODEV;
+ }
+ bc->pdev = parport_register_dev_model(pp, dev->name, &par_cb, i);
parport_put_port(pp);
if (!bc->pdev) {
printk(KERN_ERR "baycom_par: cannot register parport at 0x%lx\n", dev->base_addr);
@@ -490,12 +505,34 @@ MODULE_LICENSE("GPL");
/* --------------------------------------------------------------------- */
+static int baycom_par_probe(struct pardevice *par_dev)
+{
+ struct device_driver *drv = par_dev->dev.driver;
+ int len = strlen(drv->name);
+
+ if (strncmp(par_dev->name, drv->name, len))
+ return -ENODEV;
+
+ return 0;
+}
+
+static struct parport_driver baycom_par_driver = {
+ .name = "bcp",
+ .probe = baycom_par_probe,
+ .devmodel = true,
+};
+
static int __init init_baycompar(void)
{
- int i, found = 0;
+ int i, found = 0, ret;
char set_hw = 1;
printk(bc_drvinfo);
+
+ ret = parport_register_driver(&baycom_par_driver);
+ if (ret)
+ return ret;
+
/*
* register net devices
*/
@@ -524,8 +561,10 @@ static int __init init_baycompar(void)
baycom_device[i] = dev;
}
- if (!found)
+ if (!found) {
+ parport_unregister_driver(&baycom_par_driver);
return -ENXIO;
+ }
return 0;
}
@@ -539,6 +578,7 @@ static void __exit cleanup_baycompar(void)
if (dev)
hdlcdrv_unregister(dev);
}
+ parport_unregister_driver(&baycom_par_driver);
}
module_init(init_baycompar);
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index aec6c26563cf..54bf8e6e4a09 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -477,7 +477,8 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len)
cmd = 0;
}
ax->crcauto = (cmd ? 0 : 1);
- printk(KERN_INFO "mkiss: %s: crc mode %s %d\n", ax->dev->name, (len) ? "set to" : "is", cmd);
+ printk(KERN_INFO "mkiss: %s: crc mode set to %d\n",
+ ax->dev->name, cmd);
}
spin_unlock_bh(&ax->buflock);
netif_start_queue(dev);
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 295f267b73ea..c9f7215c5dc2 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -185,14 +185,15 @@
static const char banner[] __initconst = KERN_INFO \
"AX.25: Z8530 SCC driver version "VERSION".dl1bke\n";
-static void t_dwait(unsigned long);
-static void t_txdelay(unsigned long);
-static void t_tail(unsigned long);
-static void t_busy(unsigned long);
-static void t_maxkeyup(unsigned long);
-static void t_idle(unsigned long);
+static void t_dwait(struct timer_list *t);
+static void t_txdelay(struct timer_list *t);
+static void t_tail(struct timer_list *t);
+static void t_busy(struct timer_list *);
+static void t_maxkeyup(struct timer_list *);
+static void t_idle(struct timer_list *t);
static void scc_tx_done(struct scc_channel *);
-static void scc_start_tx_timer(struct scc_channel *, void (*)(unsigned long), unsigned long);
+static void scc_start_tx_timer(struct scc_channel *,
+ void (*)(struct timer_list *), unsigned long);
static void scc_start_maxkeyup(struct scc_channel *);
static void scc_start_defer(struct scc_channel *);
@@ -992,24 +993,27 @@ static void scc_key_trx(struct scc_channel *scc, char tx)
/* ----> SCC timer interrupt handler and friends. <---- */
-static void __scc_start_tx_timer(struct scc_channel *scc, void (*handler)(unsigned long), unsigned long when)
+static void __scc_start_tx_timer(struct scc_channel *scc,
+ void (*handler)(struct timer_list *t),
+ unsigned long when)
{
del_timer(&scc->tx_t);
if (when == 0)
{
- handler((unsigned long) scc);
+ handler(&scc->tx_t);
} else
if (when != TIMER_OFF)
{
- scc->tx_t.data = (unsigned long) scc;
- scc->tx_t.function = handler;
+ scc->tx_t.function = (TIMER_FUNC_TYPE)handler;
scc->tx_t.expires = jiffies + (when*HZ)/100;
add_timer(&scc->tx_t);
}
}
-static void scc_start_tx_timer(struct scc_channel *scc, void (*handler)(unsigned long), unsigned long when)
+static void scc_start_tx_timer(struct scc_channel *scc,
+ void (*handler)(struct timer_list *t),
+ unsigned long when)
{
unsigned long flags;
@@ -1027,8 +1031,7 @@ static void scc_start_defer(struct scc_channel *scc)
if (scc->kiss.maxdefer != 0 && scc->kiss.maxdefer != TIMER_OFF)
{
- scc->tx_wdog.data = (unsigned long) scc;
- scc->tx_wdog.function = t_busy;
+ scc->tx_wdog.function = (TIMER_FUNC_TYPE)t_busy;
scc->tx_wdog.expires = jiffies + HZ*scc->kiss.maxdefer;
add_timer(&scc->tx_wdog);
}
@@ -1044,8 +1047,7 @@ static void scc_start_maxkeyup(struct scc_channel *scc)
if (scc->kiss.maxkeyup != 0 && scc->kiss.maxkeyup != TIMER_OFF)
{
- scc->tx_wdog.data = (unsigned long) scc;
- scc->tx_wdog.function = t_maxkeyup;
+ scc->tx_wdog.function = (TIMER_FUNC_TYPE)t_maxkeyup;
scc->tx_wdog.expires = jiffies + HZ*scc->kiss.maxkeyup;
add_timer(&scc->tx_wdog);
}
@@ -1121,9 +1123,9 @@ static inline int is_grouped(struct scc_channel *scc)
* fulldup == 2: mintime expired, reset status or key trx and start txdelay
*/
-static void t_dwait(unsigned long channel)
+static void t_dwait(struct timer_list *t)
{
- struct scc_channel *scc = (struct scc_channel *) channel;
+ struct scc_channel *scc = from_timer(scc, t, tx_t);
if (scc->stat.tx_state == TXS_WAIT) /* maxkeyup or idle timeout */
{
@@ -1163,9 +1165,9 @@ static void t_dwait(unsigned long channel)
* kick transmission by a fake scc_txint(scc), start 'maxkeyup' watchdog.
*/
-static void t_txdelay(unsigned long channel)
+static void t_txdelay(struct timer_list *t)
{
- struct scc_channel *scc = (struct scc_channel *) channel;
+ struct scc_channel *scc = from_timer(scc, t, tx_t);
scc_start_maxkeyup(scc);
@@ -1184,9 +1186,9 @@ static void t_txdelay(unsigned long channel)
* transmission after 'mintime' seconds
*/
-static void t_tail(unsigned long channel)
+static void t_tail(struct timer_list *t)
{
- struct scc_channel *scc = (struct scc_channel *) channel;
+ struct scc_channel *scc = from_timer(scc, t, tx_t);
unsigned long flags;
spin_lock_irqsave(&scc->lock, flags);
@@ -1211,9 +1213,9 @@ static void t_tail(unsigned long channel)
* throw away send buffers if DCD remains active too long.
*/
-static void t_busy(unsigned long channel)
+static void t_busy(struct timer_list *t)
{
- struct scc_channel *scc = (struct scc_channel *) channel;
+ struct scc_channel *scc = from_timer(scc, t, tx_wdog);
del_timer(&scc->tx_t);
netif_stop_queue(scc->dev); /* don't pile on the wabbit! */
@@ -1230,9 +1232,9 @@ static void t_busy(unsigned long channel)
* this is our watchdog.
*/
-static void t_maxkeyup(unsigned long channel)
+static void t_maxkeyup(struct timer_list *t)
{
- struct scc_channel *scc = (struct scc_channel *) channel;
+ struct scc_channel *scc = from_timer(scc, t, tx_wdog);
unsigned long flags;
spin_lock_irqsave(&scc->lock, flags);
@@ -1264,9 +1266,9 @@ static void t_maxkeyup(unsigned long channel)
* expires.
*/
-static void t_idle(unsigned long channel)
+static void t_idle(struct timer_list *t)
{
- struct scc_channel *scc = (struct scc_channel *) channel;
+ struct scc_channel *scc = from_timer(scc, t, tx_t);
del_timer(&scc->tx_wdog);
@@ -1397,9 +1399,9 @@ static unsigned long scc_get_param(struct scc_channel *scc, unsigned int cmd)
/* * Send calibration pattern * */
/* ******************************************************************* */
-static void scc_stop_calibrate(unsigned long channel)
+static void scc_stop_calibrate(struct timer_list *t)
{
- struct scc_channel *scc = (struct scc_channel *) channel;
+ struct scc_channel *scc = from_timer(scc, t, tx_wdog);
unsigned long flags;
spin_lock_irqsave(&scc->lock, flags);
@@ -1426,8 +1428,7 @@ scc_start_calibrate(struct scc_channel *scc, int duration, unsigned char pattern
del_timer(&scc->tx_wdog);
- scc->tx_wdog.data = (unsigned long) scc;
- scc->tx_wdog.function = scc_stop_calibrate;
+ scc->tx_wdog.function = (TIMER_FUNC_TYPE)scc_stop_calibrate;
scc->tx_wdog.expires = jiffies + HZ*duration;
add_timer(&scc->tx_wdog);
@@ -1522,8 +1523,8 @@ static int scc_net_alloc(const char *name, struct scc_channel *scc)
dev->ml_priv = scc;
scc->dev = dev;
spin_lock_init(&scc->lock);
- init_timer(&scc->tx_t);
- init_timer(&scc->tx_wdog);
+ timer_setup(&scc->tx_t, NULL, 0);
+ timer_setup(&scc->tx_wdog, NULL, 0);
err = register_netdevice(dev);
if (err) {
diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
index 71ddadbf2368..76cc140774a2 100644
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -1229,10 +1229,8 @@ static int rr_open(struct net_device *dev)
/* Set the timer to switch to check for link beat and perhaps switch
to an alternate media type. */
- init_timer(&rrpriv->timer);
+ setup_timer(&rrpriv->timer, rr_timer, (unsigned long)dev);
rrpriv->timer.expires = RUN_AT(5*HZ); /* 5 sec. watchdog */
- rrpriv->timer.data = (unsigned long)dev;
- rrpriv->timer.function = rr_timer; /* timer handler */
add_timer(&rrpriv->timer);
netif_start_queue(dev);
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 5176be76ca7d..4958bb6b7376 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -179,7 +179,7 @@ struct rndis_device {
u8 hw_mac_adr[ETH_ALEN];
u8 rss_key[NETVSC_HASH_KEYLEN];
- u16 ind_table[ITAB_NUM];
+ u16 rx_table[ITAB_NUM];
};
@@ -686,6 +686,8 @@ struct netvsc_ethtool_stats {
unsigned long tx_busy;
unsigned long tx_send_full;
unsigned long rx_comp_busy;
+ unsigned long stop_queue;
+ unsigned long wake_queue;
};
struct netvsc_vf_pcpu_stats {
@@ -702,6 +704,14 @@ struct netvsc_reconfig {
u32 event;
};
+/* L4 hash bits for different protocols */
+#define HV_TCP4_L4HASH 1
+#define HV_TCP6_L4HASH 2
+#define HV_UDP4_L4HASH 4
+#define HV_UDP6_L4HASH 8
+#define HV_DEFAULT_L4HASH (HV_TCP4_L4HASH | HV_TCP6_L4HASH | HV_UDP4_L4HASH | \
+ HV_UDP6_L4HASH)
+
/* The context of the netvsc device */
struct net_device_context {
/* point back to our device context */
@@ -721,13 +731,12 @@ struct net_device_context {
u32 tx_checksum_mask;
- u32 tx_send_table[VRSS_SEND_TAB_SIZE];
+ u32 tx_table[VRSS_SEND_TAB_SIZE];
/* Ethtool settings */
- bool udp4_l4_hash;
- bool udp6_l4_hash;
u8 duplex;
u32 speed;
+ u32 l4_hash; /* L4 hash settings */
struct netvsc_ethtool_stats eth_stats;
/* State to manage the associated VF interface. */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 8d5077fb0492..5bb6a20072dd 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -484,7 +484,7 @@ static int netvsc_connect_vsp(struct hv_device *device,
struct netvsc_device *net_device,
const struct netvsc_device_info *device_info)
{
- const u32 ver_list[] = {
+ static const u32 ver_list[] = {
NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5
};
@@ -609,6 +609,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
{
struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
struct net_device *ndev = hv_get_drvdata(device);
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
struct vmbus_channel *channel = device->channel;
u16 q_idx = 0;
int queue_sends;
@@ -643,8 +644,10 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
(hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
- queue_sends < 1))
+ queue_sends < 1)) {
netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
+ ndev_ctx->eth_stats.wake_queue++;
+ }
}
static void netvsc_send_completion(struct netvsc_device *net_device,
@@ -749,6 +752,7 @@ static inline int netvsc_send_pkt(
&net_device->chan_table[packet->q_idx];
struct vmbus_channel *out_channel = nvchan->channel;
struct net_device *ndev = hv_get_drvdata(device);
+ struct net_device_context *ndev_ctx = netdev_priv(ndev);
struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
u64 req_id;
int ret;
@@ -789,12 +793,16 @@ static inline int netvsc_send_pkt(
if (ret == 0) {
atomic_inc_return(&nvchan->queue_sends);
- if (ring_avail < RING_AVAIL_PERCENT_LOWATER)
+ if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
netif_tx_stop_queue(txq);
+ ndev_ctx->eth_stats.stop_queue++;
+ }
} else if (ret == -EAGAIN) {
netif_tx_stop_queue(txq);
+ ndev_ctx->eth_stats.stop_queue++;
if (atomic_read(&nvchan->queue_sends) < 1) {
netif_tx_wake_queue(txq);
+ ndev_ctx->eth_stats.wake_queue++;
ret = -ENOSPC;
}
} else {
@@ -1102,7 +1110,7 @@ static void netvsc_send_table(struct hv_device *hdev,
nvmsg->msg.v5_msg.send_table.offset);
for (i = 0; i < count; i++)
- net_device_ctx->tx_send_table[i] = tab[i];
+ net_device_ctx->tx_table[i] = tab[i];
}
static void netvsc_send_vf(struct net_device_context *net_device_ctx,
@@ -1247,6 +1255,9 @@ struct netvsc_device *netvsc_device_add(struct hv_device *device,
if (!net_device)
return ERR_PTR(-ENOMEM);
+ for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
+ net_device_ctx->tx_table[i] = 0;
+
net_device->ring_size = ring_size;
/* Because the device uses NAPI, all the interrupt batching and
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index a32ae02e1b6c..da216ca4f2b2 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -203,7 +203,7 @@ static inline u32 netvsc_get_hash(
const struct net_device_context *ndc)
{
struct flow_keys flow;
- u32 hash;
+ u32 hash, pkt_proto = 0;
static u32 hashrnd __read_mostly;
net_get_random_once(&hashrnd, sizeof(hashrnd));
@@ -211,11 +211,25 @@ static inline u32 netvsc_get_hash(
if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
return 0;
- if (flow.basic.ip_proto == IPPROTO_TCP ||
- (flow.basic.ip_proto == IPPROTO_UDP &&
- ((flow.basic.n_proto == htons(ETH_P_IP) && ndc->udp4_l4_hash) ||
- (flow.basic.n_proto == htons(ETH_P_IPV6) &&
- ndc->udp6_l4_hash)))) {
+ switch (flow.basic.ip_proto) {
+ case IPPROTO_TCP:
+ if (flow.basic.n_proto == htons(ETH_P_IP))
+ pkt_proto = HV_TCP4_L4HASH;
+ else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+ pkt_proto = HV_TCP6_L4HASH;
+
+ break;
+
+ case IPPROTO_UDP:
+ if (flow.basic.n_proto == htons(ETH_P_IP))
+ pkt_proto = HV_UDP4_L4HASH;
+ else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+ pkt_proto = HV_UDP6_L4HASH;
+
+ break;
+ }
+
+ if (pkt_proto & ndc->l4_hash) {
return skb_get_hash(skb);
} else {
if (flow.basic.n_proto == htons(ETH_P_IP))
@@ -238,8 +252,8 @@ static inline int netvsc_get_tx_queue(struct net_device *ndev,
struct sock *sk = skb->sk;
int q_idx;
- q_idx = ndc->tx_send_table[netvsc_get_hash(skb, ndc) &
- (VRSS_SEND_TAB_SIZE - 1)];
+ q_idx = ndc->tx_table[netvsc_get_hash(skb, ndc) &
+ (VRSS_SEND_TAB_SIZE - 1)];
/* If queue index changed record the new value */
if (q_idx != old_idx &&
@@ -898,8 +912,7 @@ static void netvsc_init_settings(struct net_device *dev)
{
struct net_device_context *ndc = netdev_priv(dev);
- ndc->udp4_l4_hash = true;
- ndc->udp6_l4_hash = true;
+ ndc->l4_hash = HV_DEFAULT_L4HASH;
ndc->speed = SPEED_UNKNOWN;
ndc->duplex = DUPLEX_FULL;
@@ -1126,6 +1139,8 @@ static const struct {
{ "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) },
{ "tx_send_full", offsetof(struct netvsc_ethtool_stats, tx_send_full) },
{ "rx_comp_busy", offsetof(struct netvsc_ethtool_stats, rx_comp_busy) },
+ { "stop_queue", offsetof(struct netvsc_ethtool_stats, stop_queue) },
+ { "wake_queue", offsetof(struct netvsc_ethtool_stats, wake_queue) },
}, vf_stats[] = {
{ "vf_rx_packets", offsetof(struct netvsc_vf_pcpu_stats, rx_packets) },
{ "vf_rx_bytes", offsetof(struct netvsc_vf_pcpu_stats, rx_bytes) },
@@ -1243,23 +1258,32 @@ static int
netvsc_get_rss_hash_opts(struct net_device_context *ndc,
struct ethtool_rxnfc *info)
{
+ const u32 l4_flag = RXH_L4_B_0_1 | RXH_L4_B_2_3;
+
info->data = RXH_IP_SRC | RXH_IP_DST;
switch (info->flow_type) {
case TCP_V4_FLOW:
+ if (ndc->l4_hash & HV_TCP4_L4HASH)
+ info->data |= l4_flag;
+
+ break;
+
case TCP_V6_FLOW:
- info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ if (ndc->l4_hash & HV_TCP6_L4HASH)
+ info->data |= l4_flag;
+
break;
case UDP_V4_FLOW:
- if (ndc->udp4_l4_hash)
- info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ if (ndc->l4_hash & HV_UDP4_L4HASH)
+ info->data |= l4_flag;
break;
case UDP_V6_FLOW:
- if (ndc->udp6_l4_hash)
- info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ if (ndc->l4_hash & HV_UDP6_L4HASH)
+ info->data |= l4_flag;
break;
@@ -1300,23 +1324,51 @@ static int netvsc_set_rss_hash_opts(struct net_device_context *ndc,
{
if (info->data == (RXH_IP_SRC | RXH_IP_DST |
RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
- if (info->flow_type == UDP_V4_FLOW)
- ndc->udp4_l4_hash = true;
- else if (info->flow_type == UDP_V6_FLOW)
- ndc->udp6_l4_hash = true;
- else
+ switch (info->flow_type) {
+ case TCP_V4_FLOW:
+ ndc->l4_hash |= HV_TCP4_L4HASH;
+ break;
+
+ case TCP_V6_FLOW:
+ ndc->l4_hash |= HV_TCP6_L4HASH;
+ break;
+
+ case UDP_V4_FLOW:
+ ndc->l4_hash |= HV_UDP4_L4HASH;
+ break;
+
+ case UDP_V6_FLOW:
+ ndc->l4_hash |= HV_UDP6_L4HASH;
+ break;
+
+ default:
return -EOPNOTSUPP;
+ }
return 0;
}
if (info->data == (RXH_IP_SRC | RXH_IP_DST)) {
- if (info->flow_type == UDP_V4_FLOW)
- ndc->udp4_l4_hash = false;
- else if (info->flow_type == UDP_V6_FLOW)
- ndc->udp6_l4_hash = false;
- else
+ switch (info->flow_type) {
+ case TCP_V4_FLOW:
+ ndc->l4_hash &= ~HV_TCP4_L4HASH;
+ break;
+
+ case TCP_V6_FLOW:
+ ndc->l4_hash &= ~HV_TCP6_L4HASH;
+ break;
+
+ case UDP_V4_FLOW:
+ ndc->l4_hash &= ~HV_UDP4_L4HASH;
+ break;
+
+ case UDP_V6_FLOW:
+ ndc->l4_hash &= ~HV_UDP6_L4HASH;
+ break;
+
+ default:
return -EOPNOTSUPP;
+ }
return 0;
}
@@ -1382,7 +1434,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
rndis_dev = ndev->extension;
if (indir) {
for (i = 0; i < ITAB_NUM; i++)
- indir[i] = rndis_dev->ind_table[i];
+ indir[i] = rndis_dev->rx_table[i];
}
if (key)
@@ -1412,7 +1464,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
return -EINVAL;
for (i = 0; i < ITAB_NUM; i++)
- rndis_dev->ind_table[i] = indir[i];
+ rndis_dev->rx_table[i] = indir[i];
}
if (!key) {
@@ -1746,7 +1798,7 @@ static int netvsc_vf_join(struct net_device *vf_netdev,
goto rx_handler_failed;
}
- ret = netdev_upper_dev_link(vf_netdev, ndev);
+ ret = netdev_upper_dev_link(vf_netdev, ndev, NULL);
if (ret != 0) {
netdev_err(vf_netdev,
"can not set master device %s (err = %d)\n",
@@ -1935,6 +1987,12 @@ static int netvsc_probe(struct hv_device *dev,
/* We always need headroom for rndis header */
net->needed_headroom = RNDIS_AND_PPI_SIZE;
+ /* Initialize the number of queues to be 1, we may change it if more
+ * channels are offered later.
+ */
+ netif_set_real_num_tx_queues(net, 1);
+ netif_set_real_num_rx_queues(net, 1);
+
/* Notify the netvsc driver of the new device */
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 065b204d8e17..addf9f69c58c 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -759,7 +759,7 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev,
/* Set indirection table entries */
itab = (u32 *)(rssp + 1);
for (i = 0; i < ITAB_NUM; i++)
- itab[i] = rdev->ind_table[i];
+ itab[i] = rdev->rx_table[i];
/* Set hask key values */
keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
@@ -1284,8 +1284,8 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
net_device->num_chn = min(net_device->max_chn, device_info->num_chn);
for (i = 0; i < ITAB_NUM; i++)
- rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i,
- net_device->num_chn);
+ rndis_device->rx_table[i] = ethtool_rxfh_indir_default(
+ i, net_device->num_chn);
atomic_set(&net_device->open_chn, 1);
vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c
index ef688518ad77..d5584063049f 100644
--- a/drivers/net/ieee802154/atusb.c
+++ b/drivers/net/ieee802154/atusb.c
@@ -21,6 +21,9 @@
*
* USB initialization is
* Copyright (c) 2013 Alexander Aring <alex.aring@gmail.com>
+ *
+ * Busware HUL support is
+ * Copyright (c) 2017 Josef Filzmaier <j.filzmaier@gmx.at>
*/
#include <linux/kernel.h>
@@ -45,6 +48,7 @@
struct atusb {
struct ieee802154_hw *hw;
struct usb_device *usb_dev;
+ struct atusb_chip_data *data;
int shutdown; /* non-zero if shutting down */
int err; /* set by first error */
@@ -65,6 +69,14 @@ struct atusb {
unsigned char fw_hw_type; /* Firmware hardware type */
};
+struct atusb_chip_data {
+ u16 t_channel_switch;
+ int rssi_base_val;
+
+ int (*set_channel)(struct ieee802154_hw*, u8, u8);
+ int (*set_txpower)(struct ieee802154_hw*, s32);
+};
+
/* ----- USB commands without data ----------------------------------------- */
/* To reduce the number of error checks in the code, we record the first error
@@ -163,6 +175,18 @@ static int atusb_write_subreg(struct atusb *atusb, uint8_t reg, uint8_t mask,
return ret;
}
+static int atusb_read_subreg(struct atusb *lp,
+ unsigned int addr, unsigned int mask,
+ unsigned int shift)
+{
+ int rc;
+
+ rc = atusb_read_reg(lp, addr);
+ rc = (rc & mask) >> shift;
+
+ return rc;
+}
+
static int atusb_get_and_clear_error(struct atusb *atusb)
{
int err = atusb->err;
@@ -379,18 +403,6 @@ static int atusb_xmit(struct ieee802154_hw *hw, struct sk_buff *skb)
return ret;
}
-static int atusb_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
-{
- struct atusb *atusb = hw->priv;
- int ret;
-
- ret = atusb_write_subreg(atusb, SR_CHANNEL, channel);
- if (ret < 0)
- return ret;
- msleep(1); /* @@@ ugly synchronization */
- return 0;
-}
-
static int atusb_ed(struct ieee802154_hw *hw, u8 *level)
{
BUG_ON(!level);
@@ -475,6 +487,17 @@ static const s32 atusb_powers[ATUSB_MAX_TX_POWERS + 1] = {
};
static int
+atusb_txpower(struct ieee802154_hw *hw, s32 mbm)
+{
+ struct atusb *atusb = hw->priv;
+
+ if (atusb->data)
+ return atusb->data->set_txpower(hw, mbm);
+ else
+ return -ENOTSUPP;
+}
+
+static int
atusb_set_txpower(struct ieee802154_hw *hw, s32 mbm)
{
struct atusb *atusb = hw->priv;
@@ -488,12 +511,43 @@ atusb_set_txpower(struct ieee802154_hw *hw, s32 mbm)
return -EINVAL;
}
+static int
+hulusb_set_txpower(struct ieee802154_hw *hw, s32 mbm)
+{
+ u32 i;
+
+ for (i = 0; i < hw->phy->supported.tx_powers_size; i++) {
+ if (hw->phy->supported.tx_powers[i] == mbm)
+ return atusb_write_subreg(hw->priv, SR_TX_PWR_212, i);
+ }
+
+ return -EINVAL;
+}
+
#define ATUSB_MAX_ED_LEVELS 0xF
static const s32 atusb_ed_levels[ATUSB_MAX_ED_LEVELS + 1] = {
-9100, -8900, -8700, -8500, -8300, -8100, -7900, -7700, -7500, -7300,
-7100, -6900, -6700, -6500, -6300, -6100,
};
+#define AT86RF212_MAX_TX_POWERS 0x1F
+static const s32 at86rf212_powers[AT86RF212_MAX_TX_POWERS + 1] = {
+ 500, 400, 300, 200, 100, 0, -100, -200, -300, -400, -500, -600, -700,
+ -800, -900, -1000, -1100, -1200, -1300, -1400, -1500, -1600, -1700,
+ -1800, -1900, -2000, -2100, -2200, -2300, -2400, -2500, -2600,
+};
+
+#define AT86RF2XX_MAX_ED_LEVELS 0xF
+static const s32 at86rf212_ed_levels_100[AT86RF2XX_MAX_ED_LEVELS + 1] = {
+ -10000, -9800, -9600, -9400, -9200, -9000, -8800, -8600, -8400, -8200,
+ -8000, -7800, -7600, -7400, -7200, -7000,
+};
+
+static const s32 at86rf212_ed_levels_98[AT86RF2XX_MAX_ED_LEVELS + 1] = {
+ -9800, -9600, -9400, -9200, -9000, -8800, -8600, -8400, -8200, -8000,
+ -7800, -7600, -7400, -7200, -7000, -6800,
+};
+
static int
atusb_set_cca_mode(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca)
{
@@ -527,6 +581,30 @@ atusb_set_cca_mode(struct ieee802154_hw *hw, const struct wpan_phy_cca *cca)
return atusb_write_subreg(atusb, SR_CCA_MODE, val);
}
+static int hulusb_set_cca_ed_level(struct atusb *lp, int rssi_base_val)
+{
+ unsigned int cca_ed_thres;
+
+ cca_ed_thres = atusb_read_subreg(lp, SR_CCA_ED_THRES);
+
+ switch (rssi_base_val) {
+ case -98:
+ lp->hw->phy->supported.cca_ed_levels = at86rf212_ed_levels_98;
+ lp->hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(at86rf212_ed_levels_98);
+ lp->hw->phy->cca_ed_level = at86rf212_ed_levels_98[cca_ed_thres];
+ break;
+ case -100:
+ lp->hw->phy->supported.cca_ed_levels = at86rf212_ed_levels_100;
+ lp->hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(at86rf212_ed_levels_100);
+ lp->hw->phy->cca_ed_level = at86rf212_ed_levels_100[cca_ed_thres];
+ break;
+ default:
+ WARN_ON(1);
+ }
+
+ return 0;
+}
+
static int
atusb_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm)
{
@@ -541,6 +619,92 @@ atusb_set_cca_ed_level(struct ieee802154_hw *hw, s32 mbm)
return -EINVAL;
}
+static int atusb_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
+{
+ struct atusb *atusb = hw->priv;
+ int ret = -ENOTSUPP;
+
+ if (atusb->data) {
+ ret = atusb->data->set_channel(hw, page, channel);
+ /* @@@ ugly synchronization */
+ msleep(atusb->data->t_channel_switch);
+ }
+
+ return ret;
+}
+
+static int atusb_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
+{
+ struct atusb *atusb = hw->priv;
+ int ret;
+
+ ret = atusb_write_subreg(atusb, SR_CHANNEL, channel);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static int hulusb_set_channel(struct ieee802154_hw *hw, u8 page, u8 channel)
+{
+ int rc;
+ int rssi_base_val;
+
+ struct atusb *lp = hw->priv;
+
+ if (channel == 0)
+ rc = atusb_write_subreg(lp, SR_SUB_MODE, 0);
+ else
+ rc = atusb_write_subreg(lp, SR_SUB_MODE, 1);
+ if (rc < 0)
+ return rc;
+
+ if (page == 0) {
+ rc = atusb_write_subreg(lp, SR_BPSK_QPSK, 0);
+ rssi_base_val = -100;
+ } else {
+ rc = atusb_write_subreg(lp, SR_BPSK_QPSK, 1);
+ rssi_base_val = -98;
+ }
+ if (rc < 0)
+ return rc;
+
+ rc = hulusb_set_cca_ed_level(lp, rssi_base_val);
+ if (rc < 0)
+ return rc;
+
+ /* This sets the symbol_duration according frequency on the 212.
+ * TODO move this handling while set channel and page in cfg802154.
+ * We can do that, this timings are according 802.15.4 standard.
+ * If we do that in cfg802154, this is a more generic calculation.
+ *
+ * This should also protected from ifs_timer. Means cancel timer and
+ * init with a new value. For now, this is okay.
+ */
+ if (channel == 0) {
+ if (page == 0) {
+ /* SUB:0 and BPSK:0 -> BPSK-20 */
+ lp->hw->phy->symbol_duration = 50;
+ } else {
+ /* SUB:1 and BPSK:0 -> BPSK-40 */
+ lp->hw->phy->symbol_duration = 25;
+ }
+ } else {
+ if (page == 0)
+ /* SUB:0 and BPSK:1 -> OQPSK-100/200/400 */
+ lp->hw->phy->symbol_duration = 40;
+ else
+ /* SUB:1 and BPSK:1 -> OQPSK-250/500/1000 */
+ lp->hw->phy->symbol_duration = 16;
+ }
+
+ lp->hw->phy->lifs_period = IEEE802154_LIFS_PERIOD *
+ lp->hw->phy->symbol_duration;
+ lp->hw->phy->sifs_period = IEEE802154_SIFS_PERIOD *
+ lp->hw->phy->symbol_duration;
+
+ return atusb_write_subreg(lp, SR_CHANNEL, channel);
+}
+
static int
atusb_set_csma_params(struct ieee802154_hw *hw, u8 min_be, u8 max_be, u8 retries)
{
@@ -559,6 +723,14 @@ atusb_set_csma_params(struct ieee802154_hw *hw, u8 min_be, u8 max_be, u8 retries
}
static int
+hulusb_set_lbt(struct ieee802154_hw *hw, bool on)
+{
+ struct atusb *atusb = hw->priv;
+
+ return atusb_write_subreg(atusb, SR_CSMA_LBT_MODE, on);
+}
+
+static int
atusb_set_frame_retries(struct ieee802154_hw *hw, s8 retries)
{
struct atusb *atusb = hw->priv;
@@ -593,6 +765,20 @@ atusb_set_promiscuous_mode(struct ieee802154_hw *hw, const bool on)
return 0;
}
+static struct atusb_chip_data atusb_chip_data = {
+ .t_channel_switch = 1,
+ .rssi_base_val = -91,
+ .set_txpower = atusb_set_txpower,
+ .set_channel = atusb_set_channel,
+};
+
+static struct atusb_chip_data hulusb_chip_data = {
+ .t_channel_switch = 11,
+ .rssi_base_val = -100,
+ .set_txpower = hulusb_set_txpower,
+ .set_channel = hulusb_set_channel,
+};
+
static const struct ieee802154_ops atusb_ops = {
.owner = THIS_MODULE,
.xmit_async = atusb_xmit,
@@ -601,7 +787,8 @@ static const struct ieee802154_ops atusb_ops = {
.start = atusb_start,
.stop = atusb_stop,
.set_hw_addr_filt = atusb_set_hw_addr_filt,
- .set_txpower = atusb_set_txpower,
+ .set_txpower = atusb_txpower,
+ .set_lbt = hulusb_set_lbt,
.set_cca_mode = atusb_set_cca_mode,
.set_cca_ed_level = atusb_set_cca_ed_level,
.set_csma_params = atusb_set_csma_params,
@@ -614,6 +801,7 @@ static const struct ieee802154_ops atusb_ops = {
static int atusb_get_and_show_revision(struct atusb *atusb)
{
struct usb_device *usb_dev = atusb->usb_dev;
+ char *hw_name;
unsigned char *buffer;
int ret;
@@ -630,9 +818,31 @@ static int atusb_get_and_show_revision(struct atusb *atusb)
atusb->fw_ver_min = buffer[1];
atusb->fw_hw_type = buffer[2];
+ switch (atusb->fw_hw_type) {
+ case ATUSB_HW_TYPE_100813:
+ case ATUSB_HW_TYPE_101216:
+ case ATUSB_HW_TYPE_110131:
+ hw_name = "ATUSB";
+ atusb->data = &atusb_chip_data;
+ break;
+ case ATUSB_HW_TYPE_RZUSB:
+ hw_name = "RZUSB";
+ atusb->data = &atusb_chip_data;
+ break;
+ case ATUSB_HW_TYPE_HULUSB:
+ hw_name = "HULUSB";
+ atusb->data = &hulusb_chip_data;
+ break;
+ default:
+ hw_name = "UNKNOWN";
+ atusb->err = -ENOTSUPP;
+ ret = -ENOTSUPP;
+ break;
+ }
+
dev_info(&usb_dev->dev,
- "Firmware: major: %u, minor: %u, hardware type: %u\n",
- atusb->fw_ver_maj, atusb->fw_ver_min, atusb->fw_hw_type);
+ "Firmware: major: %u, minor: %u, hardware type: %s (%d)\n",
+ atusb->fw_ver_maj, atusb->fw_ver_min, hw_name, atusb->fw_hw_type);
}
if (atusb->fw_ver_maj == 0 && atusb->fw_ver_min < 2) {
dev_info(&usb_dev->dev,
@@ -667,11 +877,12 @@ static int atusb_get_and_show_build(struct atusb *atusb)
return ret;
}
-static int atusb_get_and_show_chip(struct atusb *atusb)
+static int atusb_get_and_conf_chip(struct atusb *atusb)
{
struct usb_device *usb_dev = atusb->usb_dev;
uint8_t man_id_0, man_id_1, part_num, version_num;
const char *chip;
+ struct ieee802154_hw *hw = atusb->hw;
man_id_0 = atusb_read_reg(atusb, RG_MAN_ID_0);
man_id_1 = atusb_read_reg(atusb, RG_MAN_ID_1);
@@ -681,6 +892,22 @@ static int atusb_get_and_show_chip(struct atusb *atusb)
if (atusb->err)
return atusb->err;
+ hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT |
+ IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_CSMA_PARAMS;
+
+ hw->phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL |
+ WPAN_PHY_FLAG_CCA_MODE;
+
+ hw->phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) |
+ BIT(NL802154_CCA_CARRIER) |
+ BIT(NL802154_CCA_ENERGY_CARRIER);
+ hw->phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) |
+ BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR);
+
+ hw->phy->cca.mode = NL802154_CCA_ENERGY;
+
+ hw->phy->current_page = 0;
+
if ((man_id_1 << 8 | man_id_0) != ATUSB_JEDEC_ATMEL) {
dev_err(&usb_dev->dev,
"non-Atmel transceiver xxxx%02x%02x\n",
@@ -691,9 +918,36 @@ static int atusb_get_and_show_chip(struct atusb *atusb)
switch (part_num) {
case 2:
chip = "AT86RF230";
+ atusb->hw->phy->supported.channels[0] = 0x7FFF800;
+ atusb->hw->phy->current_channel = 11; /* reset default */
+ atusb->hw->phy->symbol_duration = 16;
+ atusb->hw->phy->supported.tx_powers = atusb_powers;
+ atusb->hw->phy->supported.tx_powers_size = ARRAY_SIZE(atusb_powers);
+ hw->phy->supported.cca_ed_levels = atusb_ed_levels;
+ hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(atusb_ed_levels);
break;
case 3:
chip = "AT86RF231";
+ atusb->hw->phy->supported.channels[0] = 0x7FFF800;
+ atusb->hw->phy->current_channel = 11; /* reset default */
+ atusb->hw->phy->symbol_duration = 16;
+ atusb->hw->phy->supported.tx_powers = atusb_powers;
+ atusb->hw->phy->supported.tx_powers_size = ARRAY_SIZE(atusb_powers);
+ hw->phy->supported.cca_ed_levels = atusb_ed_levels;
+ hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(atusb_ed_levels);
+ break;
+ case 7:
+ chip = "AT86RF212";
+ atusb->hw->flags |= IEEE802154_HW_LBT;
+ atusb->hw->phy->supported.channels[0] = 0x00007FF;
+ atusb->hw->phy->supported.channels[2] = 0x00007FF;
+ atusb->hw->phy->current_channel = 5;
+ atusb->hw->phy->symbol_duration = 25;
+ atusb->hw->phy->supported.lbt = NL802154_SUPPORTED_BOOL_BOTH;
+ atusb->hw->phy->supported.tx_powers = at86rf212_powers;
+ atusb->hw->phy->supported.tx_powers_size = ARRAY_SIZE(at86rf212_powers);
+ atusb->hw->phy->supported.cca_ed_levels = at86rf212_ed_levels_100;
+ atusb->hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(at86rf212_ed_levels_100);
break;
default:
dev_err(&usb_dev->dev,
@@ -702,6 +956,9 @@ static int atusb_get_and_show_chip(struct atusb *atusb)
goto fail;
}
+ hw->phy->transmit_power = hw->phy->supported.tx_powers[0];
+ hw->phy->cca_ed_level = hw->phy->supported.cca_ed_levels[7];
+
dev_info(&usb_dev->dev, "ATUSB: %s version %d\n", chip, version_num);
return 0;
@@ -794,37 +1051,14 @@ static int atusb_probe(struct usb_interface *interface,
goto fail;
hw->parent = &usb_dev->dev;
- hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT |
- IEEE802154_HW_PROMISCUOUS | IEEE802154_HW_CSMA_PARAMS;
-
- hw->phy->flags = WPAN_PHY_FLAG_TXPOWER | WPAN_PHY_FLAG_CCA_ED_LEVEL |
- WPAN_PHY_FLAG_CCA_MODE;
-
- hw->phy->supported.cca_modes = BIT(NL802154_CCA_ENERGY) |
- BIT(NL802154_CCA_CARRIER) | BIT(NL802154_CCA_ENERGY_CARRIER);
- hw->phy->supported.cca_opts = BIT(NL802154_CCA_OPT_ENERGY_CARRIER_AND) |
- BIT(NL802154_CCA_OPT_ENERGY_CARRIER_OR);
-
- hw->phy->supported.cca_ed_levels = atusb_ed_levels;
- hw->phy->supported.cca_ed_levels_size = ARRAY_SIZE(atusb_ed_levels);
-
- hw->phy->cca.mode = NL802154_CCA_ENERGY;
-
- hw->phy->current_page = 0;
- hw->phy->current_channel = 11; /* reset default */
- hw->phy->supported.channels[0] = 0x7FFF800;
- hw->phy->supported.tx_powers = atusb_powers;
- hw->phy->supported.tx_powers_size = ARRAY_SIZE(atusb_powers);
- hw->phy->transmit_power = hw->phy->supported.tx_powers[0];
- hw->phy->cca_ed_level = hw->phy->supported.cca_ed_levels[7];
atusb_command(atusb, ATUSB_RF_RESET, 0);
- atusb_get_and_show_chip(atusb);
+ atusb_get_and_conf_chip(atusb);
atusb_get_and_show_revision(atusb);
atusb_get_and_show_build(atusb);
atusb_set_extended_addr(atusb);
- if (atusb->fw_ver_maj >= 0 && atusb->fw_ver_min >= 3)
+ if ((atusb->fw_ver_maj == 0 && atusb->fw_ver_min >= 3) || atusb->fw_ver_maj > 0)
hw->flags |= IEEE802154_HW_FRAME_RETRIES;
ret = atusb_get_and_clear_error(atusb);
@@ -941,5 +1175,6 @@ MODULE_AUTHOR("Alexander Aring <alex.aring@gmail.com>");
MODULE_AUTHOR("Richard Sharpe <realrichardsharpe@gmail.com>");
MODULE_AUTHOR("Stefan Schmidt <stefan@datenfreihafen.org>");
MODULE_AUTHOR("Werner Almesberger <werner@almesberger.net>");
+MODULE_AUTHOR("Josef Filzmaier <j.filzmaier@gmx.at>");
MODULE_DESCRIPTION("ATUSB IEEE 802.15.4 Driver");
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ieee802154/atusb.h b/drivers/net/ieee802154/atusb.h
index b22bbaa77590..555d14bf14a3 100644
--- a/drivers/net/ieee802154/atusb.h
+++ b/drivers/net/ieee802154/atusb.h
@@ -50,6 +50,14 @@ enum atusb_requests {
ATUSB_EUI64_READ,
};
+enum {
+ ATUSB_HW_TYPE_100813, /* 2010-08-13 */
+ ATUSB_HW_TYPE_101216, /* 2010-12-16 */
+ ATUSB_HW_TYPE_110131, /* 2011-01-31, ATmega32U2-based */
+ ATUSB_HW_TYPE_RZUSB, /* Atmel Raven USB dongle with at86rf230 */
+ ATUSB_HW_TYPE_HULUSB, /* Busware HUL USB dongle with at86rf212 */
+};
+
/*
* Direction bRequest wValue wIndex wLength
*
diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c
index 24a1eabbbc9d..e6b8ce81a6c3 100644
--- a/drivers/net/ieee802154/ca8210.c
+++ b/drivers/net/ieee802154/ca8210.c
@@ -1944,7 +1944,7 @@ static int ca8210_skb_tx(
)
{
int status;
- struct ieee802154_hdr header = { 0 };
+ struct ieee802154_hdr header = { };
struct secspec secspec;
unsigned int mac_len;
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 8870bd2a2e8a..0008da7e9d4c 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -231,6 +231,9 @@ static void ifb_setup(struct net_device *dev)
eth_hw_addr_random(dev);
dev->needs_free_netdev = true;
dev->priv_destructor = ifb_dev_free;
+
+ dev->min_mtu = 0;
+ dev->max_mtu = 0;
}
static netdev_tx_t ifb_xmit(struct sk_buff *skb, struct net_device *dev)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index c74893c1e620..f0ab55df57f1 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -407,7 +407,7 @@ static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
* while the packets use the mac-addr on the physical device.
*/
return dev_hard_header(skb, phy_dev, type, daddr,
- saddr ? : dev->dev_addr, len);
+ saddr ? : phy_dev->dev_addr, len);
}
static const struct header_ops ipvlan_header_ops = {
@@ -584,7 +584,7 @@ int ipvlan_link_new(struct net *src_net, struct net_device *dev,
if (err < 0)
goto remove_ida;
- err = netdev_upper_dev_link(phy_dev, dev);
+ err = netdev_upper_dev_link(phy_dev, dev, extack);
if (err) {
goto unregister_netdev;
}
@@ -730,6 +730,11 @@ static int ipvlan_device_event(struct notifier_block *unused,
ipvlan_adjust_mtu(ipvlan, dev);
break;
+ case NETDEV_CHANGEADDR:
+ list_for_each_entry(ipvlan, &port->ipvlans, pnode)
+ ether_addr_copy(ipvlan->dev->dev_addr, dev->dev_addr);
+ break;
+
case NETDEV_PRE_TYPE_CHANGE:
/* Forbid underlying device to change its type. */
return NOTIFY_BAD;
@@ -803,10 +808,6 @@ static int ipvlan_addr6_event(struct notifier_block *unused,
struct net_device *dev = (struct net_device *)if6->idev->dev;
struct ipvl_dev *ipvlan = netdev_priv(dev);
- /* FIXME IPv6 autoconf calls us from bh without RTNL */
- if (in_softirq())
- return NOTIFY_DONE;
-
if (!netif_is_ipvlan(dev))
return NOTIFY_DONE;
@@ -846,8 +847,11 @@ static int ipvlan_addr6_validator_event(struct notifier_block *unused,
switch (event) {
case NETDEV_UP:
- if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true))
+ if (ipvlan_addr_busy(ipvlan->port, &i6vi->i6vi_addr, true)) {
+ NL_SET_ERR_MSG(i6vi->extack,
+ "Address already assigned to an ipvlan device");
return notifier_from_errno(-EADDRINUSE);
+ }
break;
}
@@ -916,8 +920,11 @@ static int ipvlan_addr4_validator_event(struct notifier_block *unused,
switch (event) {
case NETDEV_UP:
- if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false))
+ if (ipvlan_addr_busy(ipvlan->port, &ivi->ivi_addr, false)) {
+ NL_SET_ERR_MSG(ivi->extack,
+ "Address already assigned to an ipvlan device");
return notifier_from_errno(-EADDRINUSE);
+ }
break;
}
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 5ab1b8849c30..2c98152d1e1b 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -16,6 +16,7 @@
#include <crypto/aead.h>
#include <linux/etherdevice.h>
#include <linux/rtnetlink.h>
+#include <linux/refcount.h>
#include <net/genetlink.h>
#include <net/sock.h>
#include <net/gro_cells.h>
@@ -146,7 +147,7 @@ struct macsec_rx_sa {
struct macsec_key key;
spinlock_t lock;
u32 next_pn;
- atomic_t refcnt;
+ refcount_t refcnt;
bool active;
struct macsec_rx_sa_stats __percpu *stats;
struct macsec_rx_sc *sc;
@@ -171,7 +172,7 @@ struct macsec_rx_sc {
bool active;
struct macsec_rx_sa __rcu *sa[MACSEC_NUM_AN];
struct pcpu_rx_sc_stats __percpu *stats;
- atomic_t refcnt;
+ refcount_t refcnt;
struct rcu_head rcu_head;
};
@@ -187,7 +188,7 @@ struct macsec_tx_sa {
struct macsec_key key;
spinlock_t lock;
u32 next_pn;
- atomic_t refcnt;
+ refcount_t refcnt;
bool active;
struct macsec_tx_sa_stats __percpu *stats;
struct rcu_head rcu;
@@ -314,7 +315,7 @@ static struct macsec_rx_sa *macsec_rxsa_get(struct macsec_rx_sa __rcu *ptr)
if (!sa || !sa->active)
return NULL;
- if (!atomic_inc_not_zero(&sa->refcnt))
+ if (!refcount_inc_not_zero(&sa->refcnt))
return NULL;
return sa;
@@ -330,12 +331,12 @@ static void free_rx_sc_rcu(struct rcu_head *head)
static struct macsec_rx_sc *macsec_rxsc_get(struct macsec_rx_sc *sc)
{
- return atomic_inc_not_zero(&sc->refcnt) ? sc : NULL;
+ return refcount_inc_not_zero(&sc->refcnt) ? sc : NULL;
}
static void macsec_rxsc_put(struct macsec_rx_sc *sc)
{
- if (atomic_dec_and_test(&sc->refcnt))
+ if (refcount_dec_and_test(&sc->refcnt))
call_rcu(&sc->rcu_head, free_rx_sc_rcu);
}
@@ -350,7 +351,7 @@ static void free_rxsa(struct rcu_head *head)
static void macsec_rxsa_put(struct macsec_rx_sa *sa)
{
- if (atomic_dec_and_test(&sa->refcnt))
+ if (refcount_dec_and_test(&sa->refcnt))
call_rcu(&sa->rcu, free_rxsa);
}
@@ -361,7 +362,7 @@ static struct macsec_tx_sa *macsec_txsa_get(struct macsec_tx_sa __rcu *ptr)
if (!sa || !sa->active)
return NULL;
- if (!atomic_inc_not_zero(&sa->refcnt))
+ if (!refcount_inc_not_zero(&sa->refcnt))
return NULL;
return sa;
@@ -378,7 +379,7 @@ static void free_txsa(struct rcu_head *head)
static void macsec_txsa_put(struct macsec_tx_sa *sa)
{
- if (atomic_dec_and_test(&sa->refcnt))
+ if (refcount_dec_and_test(&sa->refcnt))
call_rcu(&sa->rcu, free_txsa);
}
@@ -1341,7 +1342,7 @@ static int init_rx_sa(struct macsec_rx_sa *rx_sa, char *sak, int key_len,
rx_sa->active = false;
rx_sa->next_pn = 1;
- atomic_set(&rx_sa->refcnt, 1);
+ refcount_set(&rx_sa->refcnt, 1);
spin_lock_init(&rx_sa->lock);
return 0;
@@ -1412,7 +1413,7 @@ static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci)
rx_sc->sci = sci;
rx_sc->active = true;
- atomic_set(&rx_sc->refcnt, 1);
+ refcount_set(&rx_sc->refcnt, 1);
secy = &macsec_priv(dev)->secy;
rcu_assign_pointer(rx_sc->next, secy->rx_sc);
@@ -1438,7 +1439,7 @@ static int init_tx_sa(struct macsec_tx_sa *tx_sa, char *sak, int key_len,
}
tx_sa->active = false;
- atomic_set(&tx_sa->refcnt, 1);
+ refcount_set(&tx_sa->refcnt, 1);
spin_lock_init(&tx_sa->lock);
return 0;
@@ -3246,7 +3247,7 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
&macsec_netdev_addr_lock_key,
macsec_get_nest_level(dev));
- err = netdev_upper_dev_link(real_dev, dev);
+ err = netdev_upper_dev_link(real_dev, dev, extack);
if (err < 0)
goto unregister;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d2aea961e0f4..a178c5efd33e 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -413,7 +413,9 @@ static void macvlan_forward_source_one(struct sk_buff *skb,
len = nskb->len + ETH_HLEN;
nskb->dev = dev;
- nskb->pkt_type = PACKET_HOST;
+
+ if (ether_addr_equal_64bits(eth_hdr(skb)->h_dest, dev->dev_addr))
+ nskb->pkt_type = PACKET_HOST;
ret = netif_rx(nskb);
macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, false);
@@ -480,7 +482,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
struct macvlan_dev, list);
else
vlan = macvlan_hash_lookup(port, eth->h_dest);
- if (vlan == NULL)
+ if (!vlan || vlan->mode == MACVLAN_MODE_SOURCE)
return RX_HANDLER_PASS;
dev = vlan->dev;
@@ -596,8 +598,6 @@ static const struct header_ops macvlan_hard_header_ops = {
.cache_update = eth_header_cache_update,
};
-static struct rtnl_link_ops macvlan_link_ops;
-
static int macvlan_open(struct net_device *dev)
{
struct macvlan_dev *vlan = netdev_priv(dev);
@@ -613,8 +613,7 @@ static int macvlan_open(struct net_device *dev)
goto hash_add;
}
- if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD &&
- dev->rtnl_link_ops == &macvlan_link_ops) {
+ if (lowerdev->features & NETIF_F_HW_L2FW_DOFFLOAD) {
vlan->fwd_priv =
lowerdev->netdev_ops->ndo_dfwd_add_station(lowerdev, dev);
@@ -1231,11 +1230,14 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[],
return -EADDRNOTAVAIL;
}
- if (data && data[IFLA_MACVLAN_FLAGS] &&
+ if (!data)
+ return 0;
+
+ if (data[IFLA_MACVLAN_FLAGS] &&
nla_get_u16(data[IFLA_MACVLAN_FLAGS]) & ~MACVLAN_FLAG_NOPROMISC)
return -EINVAL;
- if (data && data[IFLA_MACVLAN_MODE]) {
+ if (data[IFLA_MACVLAN_MODE]) {
switch (nla_get_u32(data[IFLA_MACVLAN_MODE])) {
case MACVLAN_MODE_PRIVATE:
case MACVLAN_MODE_VEPA:
@@ -1248,7 +1250,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[],
}
}
- if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
+ if (data[IFLA_MACVLAN_MACADDR_MODE]) {
switch (nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE])) {
case MACVLAN_MACADDR_ADD:
case MACVLAN_MACADDR_DEL:
@@ -1260,7 +1262,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[],
}
}
- if (data && data[IFLA_MACVLAN_MACADDR]) {
+ if (data[IFLA_MACVLAN_MACADDR]) {
if (nla_len(data[IFLA_MACVLAN_MACADDR]) != ETH_ALEN)
return -EINVAL;
@@ -1268,7 +1270,7 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[],
return -EADDRNOTAVAIL;
}
- if (data && data[IFLA_MACVLAN_MACADDR_COUNT])
+ if (data[IFLA_MACVLAN_MACADDR_COUNT])
return -EINVAL;
return 0;
@@ -1341,7 +1343,8 @@ static int macvlan_changelink_sources(struct macvlan_dev *vlan, u32 mode,
}
int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
{
struct macvlan_dev *vlan = netdev_priv(dev);
struct macvlan_port *port;
@@ -1430,7 +1433,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
goto destroy_macvlan_port;
dev->priv_flags |= IFF_MACVLAN;
- err = netdev_upper_dev_link(lowerdev, dev);
+ err = netdev_upper_dev_link(lowerdev, dev, extack);
if (err)
goto unregister_netdev;
@@ -1453,7 +1456,7 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- return macvlan_common_newlink(src_net, dev, tb, data);
+ return macvlan_common_newlink(src_net, dev, tb, data, extack);
}
void macvlan_dellink(struct net_device *dev, struct list_head *head)
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index c2d0ea2fb019..f62aea2fcfa9 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -105,7 +105,7 @@ static int macvtap_newlink(struct net *src_net, struct net_device *dev,
/* Don't put anything that may fail after macvlan_common_newlink
* because we can't undo what it does.
*/
- err = macvlan_common_newlink(src_net, dev, tb, data);
+ err = macvlan_common_newlink(src_net, dev, tb, data, extack);
if (err) {
netdev_rx_handler_unregister(dev);
return err;
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index cd931cf9dcc2..8125412c8814 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -277,6 +277,11 @@ config DAVICOM_PHY
---help---
Currently supports dm9161e and dm9131
+config DP83822_PHY
+ tristate "Texas Instruments DP83822 PHY"
+ ---help---
+ Supports the DP83822 PHY.
+
config DP83848_PHY
tristate "Texas Instruments DP83848 PHY"
---help---
@@ -366,6 +371,11 @@ config REALTEK_PHY
---help---
Supports the Realtek 821x PHY.
+config RENESAS_PHY
+ tristate "Driver for Renesas PHYs"
+ ---help---
+ Supports the Renesas PHYs uPD60620 and uPD60620A.
+
config ROCKCHIP_PHY
tristate "Driver for Rockchip Ethernet PHYs"
---help---
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 416df92fbf4f..4b983be83566 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -55,6 +55,7 @@ obj-$(CONFIG_CICADA_PHY) += cicada.o
obj-$(CONFIG_CORTINA_PHY) += cortina.o
obj-$(CONFIG_DAVICOM_PHY) += davicom.o
obj-$(CONFIG_DP83640_PHY) += dp83640.o
+obj-$(CONFIG_DP83822_PHY) += dp83822.o
obj-$(CONFIG_DP83848_PHY) += dp83848.o
obj-$(CONFIG_DP83867_PHY) += dp83867.o
obj-$(CONFIG_FIXED_PHY) += fixed_phy.o
@@ -72,6 +73,7 @@ obj-$(CONFIG_MICROSEMI_PHY) += mscc.o
obj-$(CONFIG_NATIONAL_PHY) += national.o
obj-$(CONFIG_QSEMI_PHY) += qsemi.o
obj-$(CONFIG_REALTEK_PHY) += realtek.o
+obj-$(CONFIG_RENESAS_PHY) += uPD60620.o
obj-$(CONFIG_ROCKCHIP_PHY) += rockchip.o
obj-$(CONFIG_SMSC_PHY) += smsc.o
obj-$(CONFIG_STE10XP) += ste10Xp.o
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index c1e52b9dc58d..5f93e6add563 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -167,7 +167,7 @@ static int at803x_set_wol(struct phy_device *phydev,
mac = (const u8 *) ndev->dev_addr;
if (!is_valid_ether_addr(mac))
- return -EFAULT;
+ return -EINVAL;
for (i = 0; i < 3; i++) {
phy_write(phydev, AT803X_MMD_ACCESS_CONTROL,
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 1e9ad30a35c8..d7ed69deabfb 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -43,6 +43,12 @@ static int bcm54210e_config_init(struct phy_device *phydev)
val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN;
bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val);
+ if (phydev->dev_flags & PHY_BRCM_EN_MASTER_MODE) {
+ val = phy_read(phydev, MII_CTRL1000);
+ val |= CTL1000_AS_MASTER | CTL1000_ENABLE_MASTER;
+ phy_write(phydev, MII_CTRL1000, val);
+ }
+
return 0;
}
diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
new file mode 100644
index 000000000000..14335d14e9e4
--- /dev/null
+++ b/drivers/net/phy/dp83822.c
@@ -0,0 +1,344 @@
+/*
+ * Driver for the Texas Instruments DP83822 PHY
+ *
+ * Copyright (C) 2017 Texas Instruments Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/ethtool.h>
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+#include <linux/mii.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/phy.h>
+#include <linux/netdevice.h>
+
+#define DP83822_PHY_ID 0x2000a240
+#define DP83822_DEVADDR 0x1f
+
+#define MII_DP83822_PHYSCR 0x11
+#define MII_DP83822_MISR1 0x12
+#define MII_DP83822_MISR2 0x13
+#define MII_DP83822_RESET_CTRL 0x1f
+
+#define DP83822_HW_RESET BIT(15)
+#define DP83822_SW_RESET BIT(14)
+
+/* PHYSCR Register Fields */
+#define DP83822_PHYSCR_INT_OE BIT(0) /* Interrupt Output Enable */
+#define DP83822_PHYSCR_INTEN BIT(1) /* Interrupt Enable */
+
+/* MISR1 bits */
+#define DP83822_RX_ERR_HF_INT_EN BIT(0)
+#define DP83822_FALSE_CARRIER_HF_INT_EN BIT(1)
+#define DP83822_ANEG_COMPLETE_INT_EN BIT(2)
+#define DP83822_DUP_MODE_CHANGE_INT_EN BIT(3)
+#define DP83822_SPEED_CHANGED_INT_EN BIT(4)
+#define DP83822_LINK_STAT_INT_EN BIT(5)
+#define DP83822_ENERGY_DET_INT_EN BIT(6)
+#define DP83822_LINK_QUAL_INT_EN BIT(7)
+
+/* MISR2 bits */
+#define DP83822_JABBER_DET_INT_EN BIT(0)
+#define DP83822_WOL_PKT_INT_EN BIT(1)
+#define DP83822_SLEEP_MODE_INT_EN BIT(2)
+#define DP83822_MDI_XOVER_INT_EN BIT(3)
+#define DP83822_LB_FIFO_INT_EN BIT(4)
+#define DP83822_PAGE_RX_INT_EN BIT(5)
+#define DP83822_ANEG_ERR_INT_EN BIT(6)
+#define DP83822_EEE_ERROR_CHANGE_INT_EN BIT(7)
+
+/* INT_STAT1 bits */
+#define DP83822_WOL_INT_EN BIT(4)
+#define DP83822_WOL_INT_STAT BIT(12)
+
+#define MII_DP83822_RXSOP1 0x04a5
+#define MII_DP83822_RXSOP2 0x04a6
+#define MII_DP83822_RXSOP3 0x04a7
+
+/* WoL Registers */
+#define MII_DP83822_WOL_CFG 0x04a0
+#define MII_DP83822_WOL_STAT 0x04a1
+#define MII_DP83822_WOL_DA1 0x04a2
+#define MII_DP83822_WOL_DA2 0x04a3
+#define MII_DP83822_WOL_DA3 0x04a4
+
+/* WoL bits */
+#define DP83822_WOL_MAGIC_EN BIT(0)
+#define DP83822_WOL_SECURE_ON BIT(5)
+#define DP83822_WOL_EN BIT(7)
+#define DP83822_WOL_INDICATION_SEL BIT(8)
+#define DP83822_WOL_CLR_INDICATION BIT(11)
+
+static int dp83822_ack_interrupt(struct phy_device *phydev)
+{
+ int err;
+
+ err = phy_read(phydev, MII_DP83822_MISR1);
+ if (err < 0)
+ return err;
+
+ err = phy_read(phydev, MII_DP83822_MISR2);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static int dp83822_set_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol)
+{
+ struct net_device *ndev = phydev->attached_dev;
+ u16 value;
+ const u8 *mac;
+
+ if (wol->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) {
+ mac = (const u8 *)ndev->dev_addr;
+
+ if (!is_valid_ether_addr(mac))
+ return -EINVAL;
+
+ /* MAC addresses start with byte 5, but stored in mac[0].
+ * 822 PHYs store bytes 4|5, 2|3, 0|1
+ */
+ phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_DA1,
+ (mac[1] << 8) | mac[0]);
+ phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_DA2,
+ (mac[3] << 8) | mac[2]);
+ phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_DA3,
+ (mac[5] << 8) | mac[4]);
+
+ value = phy_read_mmd(phydev, DP83822_DEVADDR,
+ MII_DP83822_WOL_CFG);
+ if (wol->wolopts & WAKE_MAGIC)
+ value |= DP83822_WOL_MAGIC_EN;
+ else
+ value &= ~DP83822_WOL_MAGIC_EN;
+
+ if (wol->wolopts & WAKE_MAGICSECURE) {
+ phy_write_mmd(phydev, DP83822_DEVADDR,
+ MII_DP83822_RXSOP1,
+ (wol->sopass[1] << 8) | wol->sopass[0]);
+ phy_write_mmd(phydev, DP83822_DEVADDR,
+ MII_DP83822_RXSOP2,
+ (wol->sopass[3] << 8) | wol->sopass[2]);
+ phy_write_mmd(phydev, DP83822_DEVADDR,
+ MII_DP83822_RXSOP3,
+ (wol->sopass[5] << 8) | wol->sopass[4]);
+ value |= DP83822_WOL_SECURE_ON;
+ } else {
+ value &= ~DP83822_WOL_SECURE_ON;
+ }
+
+ value |= (DP83822_WOL_EN | DP83822_WOL_INDICATION_SEL |
+ DP83822_WOL_CLR_INDICATION);
+ phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG,
+ value);
+ } else {
+ value = phy_read_mmd(phydev, DP83822_DEVADDR,
+ MII_DP83822_WOL_CFG);
+ value &= ~DP83822_WOL_EN;
+ phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG,
+ value);
+ }
+
+ return 0;
+}
+
+static void dp83822_get_wol(struct phy_device *phydev,
+ struct ethtool_wolinfo *wol)
+{
+ int value;
+ u16 sopass_val;
+
+ wol->supported = (WAKE_MAGIC | WAKE_MAGICSECURE);
+ wol->wolopts = 0;
+
+ value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG);
+
+ if (value & DP83822_WOL_MAGIC_EN)
+ wol->wolopts |= WAKE_MAGIC;
+
+ if (value & DP83822_WOL_SECURE_ON) {
+ sopass_val = phy_read_mmd(phydev, DP83822_DEVADDR,
+ MII_DP83822_RXSOP1);
+ wol->sopass[0] = (sopass_val & 0xff);
+ wol->sopass[1] = (sopass_val >> 8);
+
+ sopass_val = phy_read_mmd(phydev, DP83822_DEVADDR,
+ MII_DP83822_RXSOP2);
+ wol->sopass[2] = (sopass_val & 0xff);
+ wol->sopass[3] = (sopass_val >> 8);
+
+ sopass_val = phy_read_mmd(phydev, DP83822_DEVADDR,
+ MII_DP83822_RXSOP3);
+ wol->sopass[4] = (sopass_val & 0xff);
+ wol->sopass[5] = (sopass_val >> 8);
+
+ wol->wolopts |= WAKE_MAGICSECURE;
+ }
+
+ /* WoL is not enabled so set wolopts to 0 */
+ if (!(value & DP83822_WOL_EN))
+ wol->wolopts = 0;
+}
+
+static int dp83822_config_intr(struct phy_device *phydev)
+{
+ int misr_status;
+ int physcr_status;
+ int err;
+
+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED) {
+ misr_status = phy_read(phydev, MII_DP83822_MISR1);
+ if (misr_status < 0)
+ return misr_status;
+
+ misr_status |= (DP83822_RX_ERR_HF_INT_EN |
+ DP83822_FALSE_CARRIER_HF_INT_EN |
+ DP83822_ANEG_COMPLETE_INT_EN |
+ DP83822_DUP_MODE_CHANGE_INT_EN |
+ DP83822_SPEED_CHANGED_INT_EN |
+ DP83822_LINK_STAT_INT_EN |
+ DP83822_ENERGY_DET_INT_EN |
+ DP83822_LINK_QUAL_INT_EN);
+
+ err = phy_write(phydev, MII_DP83822_MISR1, misr_status);
+ if (err < 0)
+ return err;
+
+ misr_status = phy_read(phydev, MII_DP83822_MISR2);
+ if (misr_status < 0)
+ return misr_status;
+
+ misr_status |= (DP83822_JABBER_DET_INT_EN |
+ DP83822_WOL_PKT_INT_EN |
+ DP83822_SLEEP_MODE_INT_EN |
+ DP83822_MDI_XOVER_INT_EN |
+ DP83822_LB_FIFO_INT_EN |
+ DP83822_PAGE_RX_INT_EN |
+ DP83822_ANEG_ERR_INT_EN |
+ DP83822_EEE_ERROR_CHANGE_INT_EN);
+
+ err = phy_write(phydev, MII_DP83822_MISR2, misr_status);
+ if (err < 0)
+ return err;
+
+ physcr_status = phy_read(phydev, MII_DP83822_PHYSCR);
+ if (physcr_status < 0)
+ return physcr_status;
+
+ physcr_status |= DP83822_PHYSCR_INT_OE | DP83822_PHYSCR_INTEN;
+
+ } else {
+ err = phy_write(phydev, MII_DP83822_MISR1, 0);
+ if (err < 0)
+ return err;
+
+ err = phy_write(phydev, MII_DP83822_MISR1, 0);
+ if (err < 0)
+ return err;
+
+ physcr_status = phy_read(phydev, MII_DP83822_PHYSCR);
+ if (physcr_status < 0)
+ return physcr_status;
+
+ physcr_status &= ~DP83822_PHYSCR_INTEN;
+ }
+
+ return phy_write(phydev, MII_DP83822_PHYSCR, physcr_status);
+}
+
+static int dp83822_config_init(struct phy_device *phydev)
+{
+ int err;
+ int value;
+
+ err = genphy_config_init(phydev);
+ if (err < 0)
+ return err;
+
+ value = DP83822_WOL_MAGIC_EN | DP83822_WOL_SECURE_ON | DP83822_WOL_EN;
+
+ return phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG,
+ value);
+}
+
+static int dp83822_phy_reset(struct phy_device *phydev)
+{
+ int err;
+
+ err = phy_write(phydev, MII_DP83822_RESET_CTRL, DP83822_HW_RESET);
+ if (err < 0)
+ return err;
+
+ dp83822_config_init(phydev);
+
+ return 0;
+}
+
+static int dp83822_suspend(struct phy_device *phydev)
+{
+ int value;
+
+ value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG);
+
+ if (!(value & DP83822_WOL_EN))
+ genphy_suspend(phydev);
+
+ return 0;
+}
+
+static int dp83822_resume(struct phy_device *phydev)
+{
+ int value;
+
+ genphy_resume(phydev);
+
+ value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG);
+
+ phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG, value |
+ DP83822_WOL_CLR_INDICATION);
+
+ return 0;
+}
+
+static struct phy_driver dp83822_driver[] = {
+ {
+ .phy_id = DP83822_PHY_ID,
+ .phy_id_mask = 0xfffffff0,
+ .name = "TI DP83822",
+ .features = PHY_BASIC_FEATURES,
+ .flags = PHY_HAS_INTERRUPT,
+ .config_init = dp83822_config_init,
+ .soft_reset = dp83822_phy_reset,
+ .get_wol = dp83822_get_wol,
+ .set_wol = dp83822_set_wol,
+ .ack_interrupt = dp83822_ack_interrupt,
+ .config_intr = dp83822_config_intr,
+ .config_aneg = genphy_config_aneg,
+ .read_status = genphy_read_status,
+ .suspend = dp83822_suspend,
+ .resume = dp83822_resume,
+ },
+};
+module_phy_driver(dp83822_driver);
+
+static struct mdio_device_id __maybe_unused dp83822_tbl[] = {
+ { DP83822_PHY_ID, 0xfffffff0 },
+ { },
+};
+MODULE_DEVICE_TABLE(mdio, dp83822_tbl);
+
+MODULE_DESCRIPTION("Texas Instruments DP83822 PHY driver");
+MODULE_AUTHOR("Dan Murphy <dmurphy@ti.com");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/dp83848.c b/drivers/net/phy/dp83848.c
index 3de4fe4dda77..3966d43c5146 100644
--- a/drivers/net/phy/dp83848.c
+++ b/drivers/net/phy/dp83848.c
@@ -20,7 +20,6 @@
#define TI_DP83620_PHY_ID 0x20005ce0
#define NS_DP83848C_PHY_ID 0x20005c90
#define TLK10X_PHY_ID 0x2000a210
-#define TI_DP83822_PHY_ID 0x2000a240
/* Registers */
#define DP83848_MICR 0x11 /* MII Interrupt Control Register */
@@ -80,7 +79,6 @@ static struct mdio_device_id __maybe_unused dp83848_tbl[] = {
{ NS_DP83848C_PHY_ID, 0xfffffff0 },
{ TI_DP83620_PHY_ID, 0xfffffff0 },
{ TLK10X_PHY_ID, 0xfffffff0 },
- { TI_DP83822_PHY_ID, 0xfffffff0 },
{ }
};
MODULE_DEVICE_TABLE(mdio, dp83848_tbl);
@@ -110,7 +108,6 @@ static struct phy_driver dp83848_driver[] = {
DP83848_PHY_DRIVER(NS_DP83848C_PHY_ID, "NS DP83848C 10/100 Mbps PHY"),
DP83848_PHY_DRIVER(TI_DP83620_PHY_ID, "TI DP83620 10/100 Mbps PHY"),
DP83848_PHY_DRIVER(TLK10X_PHY_ID, "TI TLK10X 10/100 Mbps PHY"),
- DP83848_PHY_DRIVER(TI_DP83822_PHY_ID, "TI DP83822 10/100 Mbps PHY"),
};
module_phy_driver(dp83848_driver);
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 9cbe645e3d89..d4670ecdb1ba 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -22,17 +22,29 @@
#define RTL821x_INER 0x12
#define RTL821x_INER_INIT 0x6400
#define RTL821x_INSR 0x13
+#define RTL821x_PAGE_SELECT 0x1f
#define RTL8211E_INER_LINK_STATUS 0x400
#define RTL8211F_INER_LINK_STATUS 0x0010
#define RTL8211F_INSR 0x1d
-#define RTL8211F_PAGE_SELECT 0x1f
#define RTL8211F_TX_DELAY 0x100
+#define RTL8201F_ISR 0x1e
+#define RTL8201F_IER 0x13
+
MODULE_DESCRIPTION("Realtek PHY driver");
MODULE_AUTHOR("Johnson Leung");
MODULE_LICENSE("GPL");
+static int rtl8201_ack_interrupt(struct phy_device *phydev)
+{
+ int err;
+
+ err = phy_read(phydev, RTL8201F_ISR);
+
+ return (err < 0) ? err : 0;
+}
+
static int rtl821x_ack_interrupt(struct phy_device *phydev)
{
int err;
@@ -46,14 +58,33 @@ static int rtl8211f_ack_interrupt(struct phy_device *phydev)
{
int err;
- phy_write(phydev, RTL8211F_PAGE_SELECT, 0xa43);
+ phy_write(phydev, RTL821x_PAGE_SELECT, 0xa43);
err = phy_read(phydev, RTL8211F_INSR);
/* restore to default page 0 */
- phy_write(phydev, RTL8211F_PAGE_SELECT, 0x0);
+ phy_write(phydev, RTL821x_PAGE_SELECT, 0x0);
return (err < 0) ? err : 0;
}
+static int rtl8201_config_intr(struct phy_device *phydev)
+{
+ int err;
+
+ /* switch to page 7 */
+ phy_write(phydev, RTL821x_PAGE_SELECT, 0x7);
+
+ if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+ err = phy_write(phydev, RTL8201F_IER,
+ BIT(13) | BIT(12) | BIT(11));
+ else
+ err = phy_write(phydev, RTL8201F_IER, 0);
+
+ /* restore to default page 0 */
+ phy_write(phydev, RTL821x_PAGE_SELECT, 0x0);
+
+ return err;
+}
+
static int rtl8211b_config_intr(struct phy_device *phydev)
{
int err;
@@ -102,7 +133,7 @@ static int rtl8211f_config_init(struct phy_device *phydev)
if (ret < 0)
return ret;
- phy_write(phydev, RTL8211F_PAGE_SELECT, 0xd08);
+ phy_write(phydev, RTL821x_PAGE_SELECT, 0xd08);
reg = phy_read(phydev, 0x11);
/* enable TX-delay for rgmii-id and rgmii-txid, otherwise disable it */
@@ -114,7 +145,7 @@ static int rtl8211f_config_init(struct phy_device *phydev)
phy_write(phydev, 0x11, reg);
/* restore to default page 0 */
- phy_write(phydev, RTL8211F_PAGE_SELECT, 0x0);
+ phy_write(phydev, RTL821x_PAGE_SELECT, 0x0);
return 0;
}
@@ -129,6 +160,18 @@ static struct phy_driver realtek_drvs[] = {
.config_aneg = &genphy_config_aneg,
.read_status = &genphy_read_status,
}, {
+ .phy_id = 0x001cc816,
+ .name = "RTL8201F 10/100Mbps Ethernet",
+ .phy_id_mask = 0x001fffff,
+ .features = PHY_BASIC_FEATURES,
+ .flags = PHY_HAS_INTERRUPT,
+ .config_aneg = &genphy_config_aneg,
+ .read_status = &genphy_read_status,
+ .ack_interrupt = &rtl8201_ack_interrupt,
+ .config_intr = &rtl8201_config_intr,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ }, {
.phy_id = 0x001cc912,
.name = "RTL8211B Gigabit Ethernet",
.phy_id_mask = 0x001fffff,
@@ -181,6 +224,7 @@ static struct phy_driver realtek_drvs[] = {
module_phy_driver(realtek_drvs);
static struct mdio_device_id __maybe_unused realtek_tbl[] = {
+ { 0x001cc816, 0x001fffff },
{ 0x001cc912, 0x001fffff },
{ 0x001cc914, 0x001fffff },
{ 0x001cc915, 0x001fffff },
diff --git a/drivers/net/phy/uPD60620.c b/drivers/net/phy/uPD60620.c
new file mode 100644
index 000000000000..96b33475ea5e
--- /dev/null
+++ b/drivers/net/phy/uPD60620.c
@@ -0,0 +1,109 @@
+/*
+ * Driver for the Renesas PHY uPD60620.
+ *
+ * Copyright (C) 2015 Softing Industrial Automation GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+
+#define UPD60620_PHY_ID 0xb8242824
+
+/* Extended Registers and values */
+/* PHY Special Control/Status */
+#define PHY_PHYSCR 0x1F /* PHY.31 */
+#define PHY_PHYSCR_10MB 0x0004 /* PHY speed = 10mb */
+#define PHY_PHYSCR_100MB 0x0008 /* PHY speed = 100mb */
+#define PHY_PHYSCR_DUPLEX 0x0010 /* PHY Duplex */
+
+/* PHY Special Modes */
+#define PHY_SPM 0x12 /* PHY.18 */
+
+/* Init PHY */
+
+static int upd60620_config_init(struct phy_device *phydev)
+{
+ /* Enable support for passive HUBs (could be a strap option) */
+ /* PHYMODE: All speeds, HD in parallel detect */
+ return phy_write(phydev, PHY_SPM, 0x0180 | phydev->mdio.addr);
+}
+
+/* Get PHY status from common registers */
+
+static int upd60620_read_status(struct phy_device *phydev)
+{
+ int phy_state;
+
+ /* Read negotiated state */
+ phy_state = phy_read(phydev, MII_BMSR);
+ if (phy_state < 0)
+ return phy_state;
+
+ phydev->link = 0;
+ phydev->lp_advertising = 0;
+ phydev->pause = 0;
+ phydev->asym_pause = 0;
+
+ if (phy_state & (BMSR_ANEGCOMPLETE | BMSR_LSTATUS)) {
+ phy_state = phy_read(phydev, PHY_PHYSCR);
+ if (phy_state < 0)
+ return phy_state;
+
+ if (phy_state & (PHY_PHYSCR_10MB | PHY_PHYSCR_100MB)) {
+ phydev->link = 1;
+ phydev->speed = SPEED_10;
+ phydev->duplex = DUPLEX_HALF;
+
+ if (phy_state & PHY_PHYSCR_100MB)
+ phydev->speed = SPEED_100;
+ if (phy_state & PHY_PHYSCR_DUPLEX)
+ phydev->duplex = DUPLEX_FULL;
+
+ phy_state = phy_read(phydev, MII_LPA);
+ if (phy_state < 0)
+ return phy_state;
+
+ phydev->lp_advertising
+ = mii_lpa_to_ethtool_lpa_t(phy_state);
+
+ if (phydev->duplex == DUPLEX_FULL) {
+ if (phy_state & LPA_PAUSE_CAP)
+ phydev->pause = 1;
+ if (phy_state & LPA_PAUSE_ASYM)
+ phydev->asym_pause = 1;
+ }
+ }
+ }
+ return 0;
+}
+
+MODULE_DESCRIPTION("Renesas uPD60620 PHY driver");
+MODULE_AUTHOR("Bernd Edlinger <bernd.edlinger@hotmail.de>");
+MODULE_LICENSE("GPL");
+
+static struct phy_driver upd60620_driver[1] = { {
+ .phy_id = UPD60620_PHY_ID,
+ .phy_id_mask = 0xfffffffe,
+ .name = "Renesas uPD60620",
+ .features = PHY_BASIC_FEATURES,
+ .flags = 0,
+ .config_init = upd60620_config_init,
+ .config_aneg = genphy_config_aneg,
+ .read_status = upd60620_read_status,
+} };
+
+module_phy_driver(upd60620_driver);
+
+static struct mdio_device_id __maybe_unused upd60620_tbl[] = {
+ { UPD60620_PHY_ID, 0xfffffffe },
+ { }
+};
+
+MODULE_DEVICE_TABLE(mdio, upd60620_tbl);
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index 814fd8fae67d..1b28e6e702f5 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -69,7 +69,7 @@ struct asyncppp {
struct tasklet_struct tsk;
- atomic_t refcnt;
+ refcount_t refcnt;
struct semaphore dead_sem;
struct ppp_channel chan; /* interface to generic ppp layer */
unsigned char obuf[OBUFSIZE];
@@ -140,14 +140,14 @@ static struct asyncppp *ap_get(struct tty_struct *tty)
read_lock(&disc_data_lock);
ap = tty->disc_data;
if (ap != NULL)
- atomic_inc(&ap->refcnt);
+ refcount_inc(&ap->refcnt);
read_unlock(&disc_data_lock);
return ap;
}
static void ap_put(struct asyncppp *ap)
{
- if (atomic_dec_and_test(&ap->refcnt))
+ if (refcount_dec_and_test(&ap->refcnt))
up(&ap->dead_sem);
}
@@ -185,7 +185,7 @@ ppp_asynctty_open(struct tty_struct *tty)
skb_queue_head_init(&ap->rqueue);
tasklet_init(&ap->tsk, ppp_async_process, (unsigned long) ap);
- atomic_set(&ap->refcnt, 1);
+ refcount_set(&ap->refcnt, 1);
sema_init(&ap->dead_sem, 0);
ap->chan.private = ap;
@@ -234,7 +234,7 @@ ppp_asynctty_close(struct tty_struct *tty)
* our channel ops (i.e. ppp_async_send/ioctl) are in progress
* by the time it returns.
*/
- if (!atomic_dec_and_test(&ap->refcnt))
+ if (!refcount_dec_and_test(&ap->refcnt))
down(&ap->dead_sem);
tasklet_kill(&ap->tsk);
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index e365866600ba..6566107cef84 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -51,6 +51,7 @@
#include <asm/unaligned.h>
#include <net/slhc_vj.h>
#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/nsproxy.h>
#include <net/net_namespace.h>
@@ -84,7 +85,7 @@ struct ppp_file {
struct sk_buff_head xq; /* pppd transmit queue */
struct sk_buff_head rq; /* receive queue for pppd */
wait_queue_head_t rwait; /* for poll on reading /dev/ppp */
- atomic_t refcnt; /* # refs (incl /dev/ppp attached) */
+ refcount_t refcnt; /* # refs (incl /dev/ppp attached) */
int hdrlen; /* space to leave for headers */
int index; /* interface unit / channel number */
int dead; /* unit/channel has been shut down */
@@ -408,7 +409,7 @@ static int ppp_release(struct inode *unused, struct file *file)
unregister_netdevice(ppp->dev);
rtnl_unlock();
}
- if (atomic_dec_and_test(&pf->refcnt)) {
+ if (refcount_dec_and_test(&pf->refcnt)) {
switch (pf->kind) {
case INTERFACE:
ppp_destroy_interface(PF_TO_PPP(pf));
@@ -881,7 +882,7 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
mutex_lock(&pn->all_ppp_mutex);
ppp = ppp_find_unit(pn, unit);
if (ppp) {
- atomic_inc(&ppp->file.refcnt);
+ refcount_inc(&ppp->file.refcnt);
file->private_data = &ppp->file;
err = 0;
}
@@ -896,7 +897,7 @@ static int ppp_unattached_ioctl(struct net *net, struct ppp_file *pf,
spin_lock_bh(&pn->all_channels_lock);
chan = ppp_find_channel(pn, unit);
if (chan) {
- atomic_inc(&chan->file.refcnt);
+ refcount_inc(&chan->file.refcnt);
file->private_data = &chan->file;
err = 0;
}
@@ -1348,7 +1349,7 @@ static int ppp_dev_init(struct net_device *dev)
* that ppp_destroy_interface() won't run before the device gets
* unregistered.
*/
- atomic_inc(&ppp->file.refcnt);
+ refcount_inc(&ppp->file.refcnt);
return 0;
}
@@ -1377,7 +1378,7 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
struct ppp *ppp;
ppp = netdev_priv(dev);
- if (atomic_dec_and_test(&ppp->file.refcnt))
+ if (refcount_dec_and_test(&ppp->file.refcnt))
ppp_destroy_interface(ppp);
}
@@ -2676,7 +2677,7 @@ ppp_unregister_channel(struct ppp_channel *chan)
pch->file.dead = 1;
wake_up_interruptible(&pch->file.rwait);
- if (atomic_dec_and_test(&pch->file.refcnt))
+ if (refcount_dec_and_test(&pch->file.refcnt))
ppp_destroy_channel(pch);
}
@@ -3046,7 +3047,7 @@ init_ppp_file(struct ppp_file *pf, int kind)
pf->kind = kind;
skb_queue_head_init(&pf->xq);
skb_queue_head_init(&pf->rq);
- atomic_set(&pf->refcnt, 1);
+ refcount_set(&pf->refcnt, 1);
init_waitqueue_head(&pf->rwait);
}
@@ -3164,7 +3165,7 @@ ppp_connect_channel(struct channel *pch, int unit)
list_add_tail(&pch->clist, &ppp->channels);
++ppp->n_channels;
pch->ppp = ppp;
- atomic_inc(&ppp->file.refcnt);
+ refcount_inc(&ppp->file.refcnt);
ppp_unlock(ppp);
ret = 0;
@@ -3195,7 +3196,7 @@ ppp_disconnect_channel(struct channel *pch)
if (--ppp->n_channels == 0)
wake_up_interruptible(&ppp->file.rwait);
ppp_unlock(ppp);
- if (atomic_dec_and_test(&ppp->file.refcnt))
+ if (refcount_dec_and_test(&ppp->file.refcnt))
ppp_destroy_interface(ppp);
err = 0;
}
diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index 7868c29071d4..7196f00f0991 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -46,6 +46,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
+#include <linux/refcount.h>
#include <asm/unaligned.h>
#include <linux/uaccess.h>
@@ -72,7 +73,7 @@ struct syncppp {
struct tasklet_struct tsk;
- atomic_t refcnt;
+ refcount_t refcnt;
struct completion dead_cmp;
struct ppp_channel chan; /* interface to generic ppp layer */
};
@@ -141,14 +142,14 @@ static struct syncppp *sp_get(struct tty_struct *tty)
read_lock(&disc_data_lock);
ap = tty->disc_data;
if (ap != NULL)
- atomic_inc(&ap->refcnt);
+ refcount_inc(&ap->refcnt);
read_unlock(&disc_data_lock);
return ap;
}
static void sp_put(struct syncppp *ap)
{
- if (atomic_dec_and_test(&ap->refcnt))
+ if (refcount_dec_and_test(&ap->refcnt))
complete(&ap->dead_cmp);
}
@@ -182,7 +183,7 @@ ppp_sync_open(struct tty_struct *tty)
skb_queue_head_init(&ap->rqueue);
tasklet_init(&ap->tsk, ppp_sync_process, (unsigned long) ap);
- atomic_set(&ap->refcnt, 1);
+ refcount_set(&ap->refcnt, 1);
init_completion(&ap->dead_cmp);
ap->chan.private = ap;
@@ -232,7 +233,7 @@ ppp_sync_close(struct tty_struct *tty)
* our channel ops (i.e. ppp_sync_send/ioctl) are in progress
* by the time it returns.
*/
- if (!atomic_dec_and_test(&ap->refcnt))
+ if (!refcount_dec_and_test(&ap->refcnt))
wait_for_completion(&ap->dead_cmp);
tasklet_kill(&ap->tsk);
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 436dd78c396a..eb8a18991d8c 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -763,12 +763,8 @@ static struct slip *sl_alloc(dev_t line)
sl->mode = SL_MODE_DEFAULT;
#ifdef CONFIG_SLIP_SMART
/* initialize timer_list struct */
- init_timer(&sl->keepalive_timer);
- sl->keepalive_timer.data = (unsigned long)sl;
- sl->keepalive_timer.function = sl_keepalive;
- init_timer(&sl->outfill_timer);
- sl->outfill_timer.data = (unsigned long)sl;
- sl->outfill_timer.function = sl_outfill;
+ setup_timer(&sl->keepalive_timer, sl_keepalive, (unsigned long)sl);
+ setup_timer(&sl->outfill_timer, sl_outfill, (unsigned long)sl);
#endif
slip_devs[i] = dev;
return sl;
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index ae53e899259f..a468439969df 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1112,7 +1112,7 @@ static int team_upper_dev_link(struct team *team, struct team_port *port)
lag_upper_info.tx_type = team->mode->lag_tx_type;
err = netdev_master_upper_dev_link(port->dev, team->dev, NULL,
- &lag_upper_info);
+ &lag_upper_info, NULL);
if (err)
return err;
port->dev->priv_flags |= IFF_TEAM_PORT;
@@ -1914,7 +1914,8 @@ static int team_netpoll_setup(struct net_device *dev,
}
#endif
-static int team_add_slave(struct net_device *dev, struct net_device *port_dev)
+static int team_add_slave(struct net_device *dev, struct net_device *port_dev,
+ struct netlink_ext_ack *extack)
{
struct team *team = netdev_priv(dev);
int err;
diff --git a/drivers/net/team/team_mode_loadbalance.c b/drivers/net/team/team_mode_loadbalance.c
index 1468ddf424cc..a5ef97010eb3 100644
--- a/drivers/net/team/team_mode_loadbalance.c
+++ b/drivers/net/team/team_mode_loadbalance.c
@@ -137,7 +137,13 @@ static struct team_port *lb_htpm_select_tx_port(struct team *team,
struct sk_buff *skb,
unsigned char hash)
{
- return rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash));
+ struct team_port *port;
+
+ port = rcu_dereference_bh(LB_HTPM_PORT_BY_HASH(lb_priv, hash));
+ if (likely(port))
+ return port;
+ /* If no valid port in the table, fall back to simple hash */
+ return lb_hash_select_tx_port(team, lb_priv, skb, hash);
}
struct lb_select_tx_port {
diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c
new file mode 100644
index 000000000000..435854688a7a
--- /dev/null
+++ b/drivers/net/thunderbolt.c
@@ -0,0 +1,1362 @@
+/*
+ * Networking over Thunderbolt cable using Apple ThunderboltIP protocol
+ *
+ * Copyright (C) 2017, Intel Corporation
+ * Authors: Amir Levy <amir.jer.levy@intel.com>
+ * Michael Jamet <michael.jamet@intel.com>
+ * Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/atomic.h>
+#include <linux/highmem.h>
+#include <linux/if_vlan.h>
+#include <linux/jhash.h>
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/sizes.h>
+#include <linux/thunderbolt.h>
+#include <linux/uuid.h>
+#include <linux/workqueue.h>
+
+#include <net/ip6_checksum.h>
+
+/* Protocol timeouts in ms */
+#define TBNET_LOGIN_DELAY 4500
+#define TBNET_LOGIN_TIMEOUT 500
+#define TBNET_LOGOUT_TIMEOUT 100
+
+#define TBNET_RING_SIZE 256
+#define TBNET_LOCAL_PATH 0xf
+#define TBNET_LOGIN_RETRIES 60
+#define TBNET_LOGOUT_RETRIES 5
+#define TBNET_MATCH_FRAGS_ID BIT(1)
+#define TBNET_MAX_MTU SZ_64K
+#define TBNET_FRAME_SIZE SZ_4K
+#define TBNET_MAX_PAYLOAD_SIZE \
+ (TBNET_FRAME_SIZE - sizeof(struct thunderbolt_ip_frame_header))
+/* Rx packets need to hold space for skb_shared_info */
+#define TBNET_RX_MAX_SIZE \
+ (TBNET_FRAME_SIZE + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+#define TBNET_RX_PAGE_ORDER get_order(TBNET_RX_MAX_SIZE)
+#define TBNET_RX_PAGE_SIZE (PAGE_SIZE << TBNET_RX_PAGE_ORDER)
+
+#define TBNET_L0_PORT_NUM(route) ((route) & GENMASK(5, 0))
+
+/**
+ * struct thunderbolt_ip_frame_header - Header for each Thunderbolt frame
+ * @frame_size: size of the data with the frame
+ * @frame_index: running index on the frames
+ * @frame_id: ID of the frame to match frames to specific packet
+ * @frame_count: how many frames assembles a full packet
+ *
+ * Each data frame passed to the high-speed DMA ring has this header. If
+ * the XDomain network directory announces that %TBNET_MATCH_FRAGS_ID is
+ * supported then @frame_id is filled, otherwise it stays %0.
+ */
+struct thunderbolt_ip_frame_header {
+ u32 frame_size;
+ u16 frame_index;
+ u16 frame_id;
+ u32 frame_count;
+};
+
+enum thunderbolt_ip_frame_pdf {
+ TBIP_PDF_FRAME_START = 1,
+ TBIP_PDF_FRAME_END,
+};
+
+enum thunderbolt_ip_type {
+ TBIP_LOGIN,
+ TBIP_LOGIN_RESPONSE,
+ TBIP_LOGOUT,
+ TBIP_STATUS,
+};
+
+struct thunderbolt_ip_header {
+ u32 route_hi;
+ u32 route_lo;
+ u32 length_sn;
+ uuid_t uuid;
+ uuid_t initiator_uuid;
+ uuid_t target_uuid;
+ u32 type;
+ u32 command_id;
+};
+
+#define TBIP_HDR_LENGTH_MASK GENMASK(5, 0)
+#define TBIP_HDR_SN_MASK GENMASK(28, 27)
+#define TBIP_HDR_SN_SHIFT 27
+
+struct thunderbolt_ip_login {
+ struct thunderbolt_ip_header hdr;
+ u32 proto_version;
+ u32 transmit_path;
+ u32 reserved[4];
+};
+
+#define TBIP_LOGIN_PROTO_VERSION 1
+
+struct thunderbolt_ip_login_response {
+ struct thunderbolt_ip_header hdr;
+ u32 status;
+ u32 receiver_mac[2];
+ u32 receiver_mac_len;
+ u32 reserved[4];
+};
+
+struct thunderbolt_ip_logout {
+ struct thunderbolt_ip_header hdr;
+};
+
+struct thunderbolt_ip_status {
+ struct thunderbolt_ip_header hdr;
+ u32 status;
+};
+
+struct tbnet_stats {
+ u64 tx_packets;
+ u64 rx_packets;
+ u64 tx_bytes;
+ u64 rx_bytes;
+ u64 rx_errors;
+ u64 tx_errors;
+ u64 rx_length_errors;
+ u64 rx_over_errors;
+ u64 rx_crc_errors;
+ u64 rx_missed_errors;
+};
+
+struct tbnet_frame {
+ struct net_device *dev;
+ struct page *page;
+ struct ring_frame frame;
+};
+
+struct tbnet_ring {
+ struct tbnet_frame frames[TBNET_RING_SIZE];
+ unsigned int cons;
+ unsigned int prod;
+ struct tb_ring *ring;
+};
+
+/**
+ * struct tbnet - ThunderboltIP network driver private data
+ * @svc: XDomain service the driver is bound to
+ * @xd: XDomain the service blongs to
+ * @handler: ThunderboltIP configuration protocol handler
+ * @dev: Networking device
+ * @napi: NAPI structure for Rx polling
+ * @stats: Network statistics
+ * @skb: Network packet that is currently processed on Rx path
+ * @command_id: ID used for next configuration protocol packet
+ * @login_sent: ThunderboltIP login message successfully sent
+ * @login_received: ThunderboltIP login message received from the remote
+ * host
+ * @transmit_path: HopID the other end needs to use building the
+ * opposite side path.
+ * @connection_lock: Lock serializing access to @login_sent,
+ * @login_received and @transmit_path.
+ * @login_retries: Number of login retries currently done
+ * @login_work: Worker to send ThunderboltIP login packets
+ * @connected_work: Worker that finalizes the ThunderboltIP connection
+ * setup and enables DMA paths for high speed data
+ * transfers
+ * @rx_hdr: Copy of the currently processed Rx frame. Used when a
+ * network packet consists of multiple Thunderbolt frames.
+ * In host byte order.
+ * @rx_ring: Software ring holding Rx frames
+ * @frame_id: Frame ID use for next Tx packet
+ * (if %TBNET_MATCH_FRAGS_ID is supported in both ends)
+ * @tx_ring: Software ring holding Tx frames
+ */
+struct tbnet {
+ const struct tb_service *svc;
+ struct tb_xdomain *xd;
+ struct tb_protocol_handler handler;
+ struct net_device *dev;
+ struct napi_struct napi;
+ struct tbnet_stats stats;
+ struct sk_buff *skb;
+ atomic_t command_id;
+ bool login_sent;
+ bool login_received;
+ u32 transmit_path;
+ struct mutex connection_lock;
+ int login_retries;
+ struct delayed_work login_work;
+ struct work_struct connected_work;
+ struct thunderbolt_ip_frame_header rx_hdr;
+ struct tbnet_ring rx_ring;
+ atomic_t frame_id;
+ struct tbnet_ring tx_ring;
+};
+
+/* Network property directory UUID: c66189ca-1cce-4195-bdb8-49592e5f5a4f */
+static const uuid_t tbnet_dir_uuid =
+ UUID_INIT(0xc66189ca, 0x1cce, 0x4195,
+ 0xbd, 0xb8, 0x49, 0x59, 0x2e, 0x5f, 0x5a, 0x4f);
+
+/* ThunderboltIP protocol UUID: 798f589e-3616-8a47-97c6-5664a920c8dd */
+static const uuid_t tbnet_svc_uuid =
+ UUID_INIT(0x798f589e, 0x3616, 0x8a47,
+ 0x97, 0xc6, 0x56, 0x64, 0xa9, 0x20, 0xc8, 0xdd);
+
+static struct tb_property_dir *tbnet_dir;
+
+static void tbnet_fill_header(struct thunderbolt_ip_header *hdr, u64 route,
+ u8 sequence, const uuid_t *initiator_uuid, const uuid_t *target_uuid,
+ enum thunderbolt_ip_type type, size_t size, u32 command_id)
+{
+ u32 length_sn;
+
+ /* Length does not include route_hi/lo and length_sn fields */
+ length_sn = (size - 3 * 4) / 4;
+ length_sn |= (sequence << TBIP_HDR_SN_SHIFT) & TBIP_HDR_SN_MASK;
+
+ hdr->route_hi = upper_32_bits(route);
+ hdr->route_lo = lower_32_bits(route);
+ hdr->length_sn = length_sn;
+ uuid_copy(&hdr->uuid, &tbnet_svc_uuid);
+ uuid_copy(&hdr->initiator_uuid, initiator_uuid);
+ uuid_copy(&hdr->target_uuid, target_uuid);
+ hdr->type = type;
+ hdr->command_id = command_id;
+}
+
+static int tbnet_login_response(struct tbnet *net, u64 route, u8 sequence,
+ u32 command_id)
+{
+ struct thunderbolt_ip_login_response reply;
+ struct tb_xdomain *xd = net->xd;
+
+ memset(&reply, 0, sizeof(reply));
+ tbnet_fill_header(&reply.hdr, route, sequence, xd->local_uuid,
+ xd->remote_uuid, TBIP_LOGIN_RESPONSE, sizeof(reply),
+ command_id);
+ memcpy(reply.receiver_mac, net->dev->dev_addr, ETH_ALEN);
+ reply.receiver_mac_len = ETH_ALEN;
+
+ return tb_xdomain_response(xd, &reply, sizeof(reply),
+ TB_CFG_PKG_XDOMAIN_RESP);
+}
+
+static int tbnet_login_request(struct tbnet *net, u8 sequence)
+{
+ struct thunderbolt_ip_login_response reply;
+ struct thunderbolt_ip_login request;
+ struct tb_xdomain *xd = net->xd;
+
+ memset(&request, 0, sizeof(request));
+ tbnet_fill_header(&request.hdr, xd->route, sequence, xd->local_uuid,
+ xd->remote_uuid, TBIP_LOGIN, sizeof(request),
+ atomic_inc_return(&net->command_id));
+
+ request.proto_version = TBIP_LOGIN_PROTO_VERSION;
+ request.transmit_path = TBNET_LOCAL_PATH;
+
+ return tb_xdomain_request(xd, &request, sizeof(request),
+ TB_CFG_PKG_XDOMAIN_RESP, &reply,
+ sizeof(reply), TB_CFG_PKG_XDOMAIN_RESP,
+ TBNET_LOGIN_TIMEOUT);
+}
+
+static int tbnet_logout_response(struct tbnet *net, u64 route, u8 sequence,
+ u32 command_id)
+{
+ struct thunderbolt_ip_status reply;
+ struct tb_xdomain *xd = net->xd;
+
+ memset(&reply, 0, sizeof(reply));
+ tbnet_fill_header(&reply.hdr, route, sequence, xd->local_uuid,
+ xd->remote_uuid, TBIP_STATUS, sizeof(reply),
+ atomic_inc_return(&net->command_id));
+ return tb_xdomain_response(xd, &reply, sizeof(reply),
+ TB_CFG_PKG_XDOMAIN_RESP);
+}
+
+static int tbnet_logout_request(struct tbnet *net)
+{
+ struct thunderbolt_ip_logout request;
+ struct thunderbolt_ip_status reply;
+ struct tb_xdomain *xd = net->xd;
+
+ memset(&request, 0, sizeof(request));
+ tbnet_fill_header(&request.hdr, xd->route, 0, xd->local_uuid,
+ xd->remote_uuid, TBIP_LOGOUT, sizeof(request),
+ atomic_inc_return(&net->command_id));
+
+ return tb_xdomain_request(xd, &request, sizeof(request),
+ TB_CFG_PKG_XDOMAIN_RESP, &reply,
+ sizeof(reply), TB_CFG_PKG_XDOMAIN_RESP,
+ TBNET_LOGOUT_TIMEOUT);
+}
+
+static void start_login(struct tbnet *net)
+{
+ mutex_lock(&net->connection_lock);
+ net->login_sent = false;
+ net->login_received = false;
+ mutex_unlock(&net->connection_lock);
+
+ queue_delayed_work(system_long_wq, &net->login_work,
+ msecs_to_jiffies(1000));
+}
+
+static void stop_login(struct tbnet *net)
+{
+ cancel_delayed_work_sync(&net->login_work);
+ cancel_work_sync(&net->connected_work);
+}
+
+static inline unsigned int tbnet_frame_size(const struct tbnet_frame *tf)
+{
+ return tf->frame.size ? : TBNET_FRAME_SIZE;
+}
+
+static void tbnet_free_buffers(struct tbnet_ring *ring)
+{
+ unsigned int i;
+
+ for (i = 0; i < TBNET_RING_SIZE; i++) {
+ struct device *dma_dev = tb_ring_dma_device(ring->ring);
+ struct tbnet_frame *tf = &ring->frames[i];
+ enum dma_data_direction dir;
+ unsigned int order;
+ size_t size;
+
+ if (!tf->page)
+ continue;
+
+ if (ring->ring->is_tx) {
+ dir = DMA_TO_DEVICE;
+ order = 0;
+ size = tbnet_frame_size(tf);
+ } else {
+ dir = DMA_FROM_DEVICE;
+ order = TBNET_RX_PAGE_ORDER;
+ size = TBNET_RX_PAGE_SIZE;
+ }
+
+ if (tf->frame.buffer_phy)
+ dma_unmap_page(dma_dev, tf->frame.buffer_phy, size,
+ dir);
+
+ __free_pages(tf->page, order);
+ tf->page = NULL;
+ }
+
+ ring->cons = 0;
+ ring->prod = 0;
+}
+
+static void tbnet_tear_down(struct tbnet *net, bool send_logout)
+{
+ netif_carrier_off(net->dev);
+ netif_stop_queue(net->dev);
+
+ stop_login(net);
+
+ mutex_lock(&net->connection_lock);
+
+ if (net->login_sent && net->login_received) {
+ int retries = TBNET_LOGOUT_RETRIES;
+
+ while (send_logout && retries-- > 0) {
+ int ret = tbnet_logout_request(net);
+ if (ret != -ETIMEDOUT)
+ break;
+ }
+
+ tb_ring_stop(net->rx_ring.ring);
+ tb_ring_stop(net->tx_ring.ring);
+ tbnet_free_buffers(&net->rx_ring);
+ tbnet_free_buffers(&net->tx_ring);
+
+ if (tb_xdomain_disable_paths(net->xd))
+ netdev_warn(net->dev, "failed to disable DMA paths\n");
+ }
+
+ net->login_retries = 0;
+ net->login_sent = false;
+ net->login_received = false;
+
+ mutex_unlock(&net->connection_lock);
+}
+
+static int tbnet_handle_packet(const void *buf, size_t size, void *data)
+{
+ const struct thunderbolt_ip_login *pkg = buf;
+ struct tbnet *net = data;
+ u32 command_id;
+ int ret = 0;
+ u32 sequence;
+ u64 route;
+
+ /* Make sure the packet is for us */
+ if (size < sizeof(struct thunderbolt_ip_header))
+ return 0;
+ if (!uuid_equal(&pkg->hdr.initiator_uuid, net->xd->remote_uuid))
+ return 0;
+ if (!uuid_equal(&pkg->hdr.target_uuid, net->xd->local_uuid))
+ return 0;
+
+ route = ((u64)pkg->hdr.route_hi << 32) | pkg->hdr.route_lo;
+ route &= ~BIT_ULL(63);
+ if (route != net->xd->route)
+ return 0;
+
+ sequence = pkg->hdr.length_sn & TBIP_HDR_SN_MASK;
+ sequence >>= TBIP_HDR_SN_SHIFT;
+ command_id = pkg->hdr.command_id;
+
+ switch (pkg->hdr.type) {
+ case TBIP_LOGIN:
+ if (!netif_running(net->dev))
+ break;
+
+ ret = tbnet_login_response(net, route, sequence,
+ pkg->hdr.command_id);
+ if (!ret) {
+ mutex_lock(&net->connection_lock);
+ net->login_received = true;
+ net->transmit_path = pkg->transmit_path;
+
+ /* If we reached the number of max retries or
+ * previous logout, schedule another round of
+ * login retries
+ */
+ if (net->login_retries >= TBNET_LOGIN_RETRIES ||
+ !net->login_sent) {
+ net->login_retries = 0;
+ queue_delayed_work(system_long_wq,
+ &net->login_work, 0);
+ }
+ mutex_unlock(&net->connection_lock);
+
+ queue_work(system_long_wq, &net->connected_work);
+ }
+ break;
+
+ case TBIP_LOGOUT:
+ ret = tbnet_logout_response(net, route, sequence, command_id);
+ if (!ret)
+ tbnet_tear_down(net, false);
+ break;
+
+ default:
+ return 0;
+ }
+
+ if (ret)
+ netdev_warn(net->dev, "failed to send ThunderboltIP response\n");
+
+ return 1;
+}
+
+static unsigned int tbnet_available_buffers(const struct tbnet_ring *ring)
+{
+ return ring->prod - ring->cons;
+}
+
+static int tbnet_alloc_rx_buffers(struct tbnet *net, unsigned int nbuffers)
+{
+ struct tbnet_ring *ring = &net->rx_ring;
+ int ret;
+
+ while (nbuffers--) {
+ struct device *dma_dev = tb_ring_dma_device(ring->ring);
+ unsigned int index = ring->prod & (TBNET_RING_SIZE - 1);
+ struct tbnet_frame *tf = &ring->frames[index];
+ dma_addr_t dma_addr;
+
+ if (tf->page)
+ break;
+
+ /* Allocate page (order > 0) so that it can hold maximum
+ * ThunderboltIP frame (4kB) and the additional room for
+ * SKB shared info required by build_skb().
+ */
+ tf->page = dev_alloc_pages(TBNET_RX_PAGE_ORDER);
+ if (!tf->page) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ dma_addr = dma_map_page(dma_dev, tf->page, 0,
+ TBNET_RX_PAGE_SIZE, DMA_FROM_DEVICE);
+ if (dma_mapping_error(dma_dev, dma_addr)) {
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ tf->frame.buffer_phy = dma_addr;
+ tf->dev = net->dev;
+
+ tb_ring_rx(ring->ring, &tf->frame);
+
+ ring->prod++;
+ }
+
+ return 0;
+
+err_free:
+ tbnet_free_buffers(ring);
+ return ret;
+}
+
+static struct tbnet_frame *tbnet_get_tx_buffer(struct tbnet *net)
+{
+ struct tbnet_ring *ring = &net->tx_ring;
+ struct tbnet_frame *tf;
+ unsigned int index;
+
+ if (!tbnet_available_buffers(ring))
+ return NULL;
+
+ index = ring->cons++ & (TBNET_RING_SIZE - 1);
+
+ tf = &ring->frames[index];
+ tf->frame.size = 0;
+ tf->frame.buffer_phy = 0;
+
+ return tf;
+}
+
+static void tbnet_tx_callback(struct tb_ring *ring, struct ring_frame *frame,
+ bool canceled)
+{
+ struct tbnet_frame *tf = container_of(frame, typeof(*tf), frame);
+ struct device *dma_dev = tb_ring_dma_device(ring);
+ struct tbnet *net = netdev_priv(tf->dev);
+
+ dma_unmap_page(dma_dev, tf->frame.buffer_phy, tbnet_frame_size(tf),
+ DMA_TO_DEVICE);
+
+ /* Return buffer to the ring */
+ net->tx_ring.prod++;
+
+ if (tbnet_available_buffers(&net->tx_ring) >= TBNET_RING_SIZE / 2)
+ netif_wake_queue(net->dev);
+}
+
+static int tbnet_alloc_tx_buffers(struct tbnet *net)
+{
+ struct tbnet_ring *ring = &net->tx_ring;
+ unsigned int i;
+
+ for (i = 0; i < TBNET_RING_SIZE; i++) {
+ struct tbnet_frame *tf = &ring->frames[i];
+
+ tf->page = alloc_page(GFP_KERNEL);
+ if (!tf->page) {
+ tbnet_free_buffers(ring);
+ return -ENOMEM;
+ }
+
+ tf->dev = net->dev;
+ tf->frame.callback = tbnet_tx_callback;
+ tf->frame.sof = TBIP_PDF_FRAME_START;
+ tf->frame.eof = TBIP_PDF_FRAME_END;
+ }
+
+ ring->cons = 0;
+ ring->prod = TBNET_RING_SIZE - 1;
+
+ return 0;
+}
+
+static void tbnet_connected_work(struct work_struct *work)
+{
+ struct tbnet *net = container_of(work, typeof(*net), connected_work);
+ bool connected;
+ int ret;
+
+ if (netif_carrier_ok(net->dev))
+ return;
+
+ mutex_lock(&net->connection_lock);
+ connected = net->login_sent && net->login_received;
+ mutex_unlock(&net->connection_lock);
+
+ if (!connected)
+ return;
+
+ /* Both logins successful so enable the high-speed DMA paths and
+ * start the network device queue.
+ */
+ ret = tb_xdomain_enable_paths(net->xd, TBNET_LOCAL_PATH,
+ net->rx_ring.ring->hop,
+ net->transmit_path,
+ net->tx_ring.ring->hop);
+ if (ret) {
+ netdev_err(net->dev, "failed to enable DMA paths\n");
+ return;
+ }
+
+ tb_ring_start(net->tx_ring.ring);
+ tb_ring_start(net->rx_ring.ring);
+
+ ret = tbnet_alloc_rx_buffers(net, TBNET_RING_SIZE);
+ if (ret)
+ goto err_stop_rings;
+
+ ret = tbnet_alloc_tx_buffers(net);
+ if (ret)
+ goto err_free_rx_buffers;
+
+ netif_carrier_on(net->dev);
+ netif_start_queue(net->dev);
+ return;
+
+err_free_rx_buffers:
+ tbnet_free_buffers(&net->rx_ring);
+err_stop_rings:
+ tb_ring_stop(net->rx_ring.ring);
+ tb_ring_stop(net->tx_ring.ring);
+}
+
+static void tbnet_login_work(struct work_struct *work)
+{
+ struct tbnet *net = container_of(work, typeof(*net), login_work.work);
+ unsigned long delay = msecs_to_jiffies(TBNET_LOGIN_DELAY);
+ int ret;
+
+ if (netif_carrier_ok(net->dev))
+ return;
+
+ ret = tbnet_login_request(net, net->login_retries % 4);
+ if (ret) {
+ if (net->login_retries++ < TBNET_LOGIN_RETRIES) {
+ queue_delayed_work(system_long_wq, &net->login_work,
+ delay);
+ } else {
+ netdev_info(net->dev, "ThunderboltIP login timed out\n");
+ }
+ } else {
+ net->login_retries = 0;
+
+ mutex_lock(&net->connection_lock);
+ net->login_sent = true;
+ mutex_unlock(&net->connection_lock);
+
+ queue_work(system_long_wq, &net->connected_work);
+ }
+}
+
+static bool tbnet_check_frame(struct tbnet *net, const struct tbnet_frame *tf,
+ const struct thunderbolt_ip_frame_header *hdr)
+{
+ u32 frame_id, frame_count, frame_size, frame_index;
+ unsigned int size;
+
+ if (tf->frame.flags & RING_DESC_CRC_ERROR) {
+ net->stats.rx_crc_errors++;
+ return false;
+ } else if (tf->frame.flags & RING_DESC_BUFFER_OVERRUN) {
+ net->stats.rx_over_errors++;
+ return false;
+ }
+
+ /* Should be greater than just header i.e. contains data */
+ size = tbnet_frame_size(tf);
+ if (size <= sizeof(*hdr)) {
+ net->stats.rx_length_errors++;
+ return false;
+ }
+
+ frame_count = le32_to_cpu(hdr->frame_count);
+ frame_size = le32_to_cpu(hdr->frame_size);
+ frame_index = le16_to_cpu(hdr->frame_index);
+ frame_id = le16_to_cpu(hdr->frame_id);
+
+ if ((frame_size > size - sizeof(*hdr)) || !frame_size) {
+ net->stats.rx_length_errors++;
+ return false;
+ }
+
+ /* In case we're in the middle of packet, validate the frame
+ * header based on first fragment of the packet.
+ */
+ if (net->skb && net->rx_hdr.frame_count) {
+ /* Check the frame count fits the count field */
+ if (frame_count != net->rx_hdr.frame_count) {
+ net->stats.rx_length_errors++;
+ return false;
+ }
+
+ /* Check the frame identifiers are incremented correctly,
+ * and id is matching.
+ */
+ if (frame_index != net->rx_hdr.frame_index + 1 ||
+ frame_id != net->rx_hdr.frame_id) {
+ net->stats.rx_missed_errors++;
+ return false;
+ }
+
+ if (net->skb->len + frame_size > TBNET_MAX_MTU) {
+ net->stats.rx_length_errors++;
+ return false;
+ }
+
+ return true;
+ }
+
+ /* Start of packet, validate the frame header */
+ if (frame_count == 0 || frame_count > TBNET_RING_SIZE / 4) {
+ net->stats.rx_length_errors++;
+ return false;
+ }
+ if (frame_index != 0) {
+ net->stats.rx_missed_errors++;
+ return false;
+ }
+
+ return true;
+}
+
+static int tbnet_poll(struct napi_struct *napi, int budget)
+{
+ struct tbnet *net = container_of(napi, struct tbnet, napi);
+ unsigned int cleaned_count = tbnet_available_buffers(&net->rx_ring);
+ struct device *dma_dev = tb_ring_dma_device(net->rx_ring.ring);
+ unsigned int rx_packets = 0;
+
+ while (rx_packets < budget) {
+ const struct thunderbolt_ip_frame_header *hdr;
+ unsigned int hdr_size = sizeof(*hdr);
+ struct sk_buff *skb = NULL;
+ struct ring_frame *frame;
+ struct tbnet_frame *tf;
+ struct page *page;
+ bool last = true;
+ u32 frame_size;
+
+ /* Return some buffers to hardware, one at a time is too
+ * slow so allocate MAX_SKB_FRAGS buffers at the same
+ * time.
+ */
+ if (cleaned_count >= MAX_SKB_FRAGS) {
+ tbnet_alloc_rx_buffers(net, cleaned_count);
+ cleaned_count = 0;
+ }
+
+ frame = tb_ring_poll(net->rx_ring.ring);
+ if (!frame)
+ break;
+
+ dma_unmap_page(dma_dev, frame->buffer_phy,
+ TBNET_RX_PAGE_SIZE, DMA_FROM_DEVICE);
+
+ tf = container_of(frame, typeof(*tf), frame);
+
+ page = tf->page;
+ tf->page = NULL;
+ net->rx_ring.cons++;
+ cleaned_count++;
+
+ hdr = page_address(page);
+ if (!tbnet_check_frame(net, tf, hdr)) {
+ __free_pages(page, TBNET_RX_PAGE_ORDER);
+ dev_kfree_skb_any(net->skb);
+ net->skb = NULL;
+ continue;
+ }
+
+ frame_size = le32_to_cpu(hdr->frame_size);
+
+ skb = net->skb;
+ if (!skb) {
+ skb = build_skb(page_address(page),
+ TBNET_RX_PAGE_SIZE);
+ if (!skb) {
+ __free_pages(page, TBNET_RX_PAGE_ORDER);
+ net->stats.rx_errors++;
+ break;
+ }
+
+ skb_reserve(skb, hdr_size);
+ skb_put(skb, frame_size);
+
+ net->skb = skb;
+ } else {
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+ page, hdr_size, frame_size,
+ TBNET_RX_PAGE_SIZE - hdr_size);
+ }
+
+ net->rx_hdr.frame_size = frame_size;
+ net->rx_hdr.frame_count = le32_to_cpu(hdr->frame_count);
+ net->rx_hdr.frame_index = le16_to_cpu(hdr->frame_index);
+ net->rx_hdr.frame_id = le16_to_cpu(hdr->frame_id);
+ last = net->rx_hdr.frame_index == net->rx_hdr.frame_count - 1;
+
+ rx_packets++;
+ net->stats.rx_bytes += frame_size;
+
+ if (last) {
+ skb->protocol = eth_type_trans(skb, net->dev);
+ napi_gro_receive(&net->napi, skb);
+ net->skb = NULL;
+ }
+ }
+
+ net->stats.rx_packets += rx_packets;
+
+ if (cleaned_count)
+ tbnet_alloc_rx_buffers(net, cleaned_count);
+
+ if (rx_packets >= budget)
+ return budget;
+
+ napi_complete_done(napi, rx_packets);
+ /* Re-enable the ring interrupt */
+ tb_ring_poll_complete(net->rx_ring.ring);
+
+ return rx_packets;
+}
+
+static void tbnet_start_poll(void *data)
+{
+ struct tbnet *net = data;
+
+ napi_schedule(&net->napi);
+}
+
+static int tbnet_open(struct net_device *dev)
+{
+ struct tbnet *net = netdev_priv(dev);
+ struct tb_xdomain *xd = net->xd;
+ u16 sof_mask, eof_mask;
+ struct tb_ring *ring;
+
+ netif_carrier_off(dev);
+
+ ring = tb_ring_alloc_tx(xd->tb->nhi, -1, TBNET_RING_SIZE,
+ RING_FLAG_FRAME);
+ if (!ring) {
+ netdev_err(dev, "failed to allocate Tx ring\n");
+ return -ENOMEM;
+ }
+ net->tx_ring.ring = ring;
+
+ sof_mask = BIT(TBIP_PDF_FRAME_START);
+ eof_mask = BIT(TBIP_PDF_FRAME_END);
+
+ ring = tb_ring_alloc_rx(xd->tb->nhi, -1, TBNET_RING_SIZE,
+ RING_FLAG_FRAME | RING_FLAG_E2E, sof_mask,
+ eof_mask, tbnet_start_poll, net);
+ if (!ring) {
+ netdev_err(dev, "failed to allocate Rx ring\n");
+ tb_ring_free(net->tx_ring.ring);
+ net->tx_ring.ring = NULL;
+ return -ENOMEM;
+ }
+ net->rx_ring.ring = ring;
+
+ napi_enable(&net->napi);
+ start_login(net);
+
+ return 0;
+}
+
+static int tbnet_stop(struct net_device *dev)
+{
+ struct tbnet *net = netdev_priv(dev);
+
+ napi_disable(&net->napi);
+
+ tbnet_tear_down(net, true);
+
+ tb_ring_free(net->rx_ring.ring);
+ net->rx_ring.ring = NULL;
+ tb_ring_free(net->tx_ring.ring);
+ net->tx_ring.ring = NULL;
+
+ return 0;
+}
+
+static bool tbnet_xmit_map(struct device *dma_dev, struct tbnet_frame *tf)
+{
+ dma_addr_t dma_addr;
+
+ dma_addr = dma_map_page(dma_dev, tf->page, 0, tbnet_frame_size(tf),
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dma_dev, dma_addr))
+ return false;
+
+ tf->frame.buffer_phy = dma_addr;
+ return true;
+}
+
+static bool tbnet_xmit_csum_and_map(struct tbnet *net, struct sk_buff *skb,
+ struct tbnet_frame **frames, u32 frame_count)
+{
+ struct thunderbolt_ip_frame_header *hdr = page_address(frames[0]->page);
+ struct device *dma_dev = tb_ring_dma_device(net->tx_ring.ring);
+ __wsum wsum = htonl(skb->len - skb_transport_offset(skb));
+ unsigned int i, len, offset = skb_transport_offset(skb);
+ __be16 protocol = skb->protocol;
+ void *data = skb->data;
+ void *dest = hdr + 1;
+ __sum16 *tucso;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ /* No need to calculate checksum so we just update the
+ * total frame count and map the frames for DMA.
+ */
+ for (i = 0; i < frame_count; i++) {
+ hdr = page_address(frames[i]->page);
+ hdr->frame_count = cpu_to_le32(frame_count);
+ if (!tbnet_xmit_map(dma_dev, frames[i]))
+ goto err_unmap;
+ }
+
+ return true;
+ }
+
+ if (protocol == htons(ETH_P_8021Q)) {
+ struct vlan_hdr *vhdr, vh;
+
+ vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(vh), &vh);
+ if (!vhdr)
+ return false;
+
+ protocol = vhdr->h_vlan_encapsulated_proto;
+ }
+
+ /* Data points on the beginning of packet.
+ * Check is the checksum absolute place in the packet.
+ * ipcso will update IP checksum.
+ * tucso will update TCP/UPD checksum.
+ */
+ if (protocol == htons(ETH_P_IP)) {
+ __sum16 *ipcso = dest + ((void *)&(ip_hdr(skb)->check) - data);
+
+ *ipcso = 0;
+ *ipcso = ip_fast_csum(dest + skb_network_offset(skb),
+ ip_hdr(skb)->ihl);
+
+ if (ip_hdr(skb)->protocol == IPPROTO_TCP)
+ tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data);
+ else if (ip_hdr(skb)->protocol == IPPROTO_UDP)
+ tucso = dest + ((void *)&(udp_hdr(skb)->check) - data);
+ else
+ return false;
+
+ *tucso = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr, 0,
+ ip_hdr(skb)->protocol, 0);
+ } else if (skb_is_gso_v6(skb)) {
+ tucso = dest + ((void *)&(tcp_hdr(skb)->check) - data);
+ *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, 0,
+ IPPROTO_TCP, 0);
+ return false;
+ } else if (protocol == htons(ETH_P_IPV6)) {
+ tucso = dest + skb_checksum_start_offset(skb) + skb->csum_offset;
+ *tucso = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr, 0,
+ ipv6_hdr(skb)->nexthdr, 0);
+ } else {
+ return false;
+ }
+
+ /* First frame was headers, rest of the frames contain data.
+ * Calculate checksum over each frame.
+ */
+ for (i = 0; i < frame_count; i++) {
+ hdr = page_address(frames[i]->page);
+ dest = (void *)(hdr + 1) + offset;
+ len = le32_to_cpu(hdr->frame_size) - offset;
+ wsum = csum_partial(dest, len, wsum);
+ hdr->frame_count = cpu_to_le32(frame_count);
+
+ offset = 0;
+ }
+
+ *tucso = csum_fold(wsum);
+
+ /* Checksum is finally calculated and we don't touch the memory
+ * anymore, so DMA map the frames now.
+ */
+ for (i = 0; i < frame_count; i++) {
+ if (!tbnet_xmit_map(dma_dev, frames[i]))
+ goto err_unmap;
+ }
+
+ return true;
+
+err_unmap:
+ while (i--)
+ dma_unmap_page(dma_dev, frames[i]->frame.buffer_phy,
+ tbnet_frame_size(frames[i]), DMA_TO_DEVICE);
+
+ return false;
+}
+
+static void *tbnet_kmap_frag(struct sk_buff *skb, unsigned int frag_num,
+ unsigned int *len)
+{
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_num];
+
+ *len = skb_frag_size(frag);
+ return kmap_atomic(skb_frag_page(frag)) + frag->page_offset;
+}
+
+static netdev_tx_t tbnet_start_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct tbnet *net = netdev_priv(dev);
+ struct tbnet_frame *frames[MAX_SKB_FRAGS];
+ u16 frame_id = atomic_read(&net->frame_id);
+ struct thunderbolt_ip_frame_header *hdr;
+ unsigned int len = skb_headlen(skb);
+ unsigned int data_len = skb->len;
+ unsigned int nframes, i;
+ unsigned int frag = 0;
+ void *src = skb->data;
+ u32 frame_index = 0;
+ bool unmap = false;
+ void *dest;
+
+ nframes = DIV_ROUND_UP(data_len, TBNET_MAX_PAYLOAD_SIZE);
+ if (tbnet_available_buffers(&net->tx_ring) < nframes) {
+ netif_stop_queue(net->dev);
+ return NETDEV_TX_BUSY;
+ }
+
+ frames[frame_index] = tbnet_get_tx_buffer(net);
+ if (!frames[frame_index])
+ goto err_drop;
+
+ hdr = page_address(frames[frame_index]->page);
+ dest = hdr + 1;
+
+ /* If overall packet is bigger than the frame data size */
+ while (data_len > TBNET_MAX_PAYLOAD_SIZE) {
+ unsigned int size_left = TBNET_MAX_PAYLOAD_SIZE;
+
+ hdr->frame_size = cpu_to_le32(TBNET_MAX_PAYLOAD_SIZE);
+ hdr->frame_index = cpu_to_le16(frame_index);
+ hdr->frame_id = cpu_to_le16(frame_id);
+
+ do {
+ if (len > size_left) {
+ /* Copy data onto Tx buffer data with
+ * full frame size then break and go to
+ * next frame
+ */
+ memcpy(dest, src, size_left);
+ len -= size_left;
+ dest += size_left;
+ src += size_left;
+ break;
+ }
+
+ memcpy(dest, src, len);
+ size_left -= len;
+ dest += len;
+
+ if (unmap) {
+ kunmap_atomic(src);
+ unmap = false;
+ }
+
+ /* Ensure all fragments have been processed */
+ if (frag < skb_shinfo(skb)->nr_frags) {
+ /* Map and then unmap quickly */
+ src = tbnet_kmap_frag(skb, frag++, &len);
+ unmap = true;
+ } else if (unlikely(size_left > 0)) {
+ goto err_drop;
+ }
+ } while (size_left > 0);
+
+ data_len -= TBNET_MAX_PAYLOAD_SIZE;
+ frame_index++;
+
+ frames[frame_index] = tbnet_get_tx_buffer(net);
+ if (!frames[frame_index])
+ goto err_drop;
+
+ hdr = page_address(frames[frame_index]->page);
+ dest = hdr + 1;
+ }
+
+ hdr->frame_size = cpu_to_le32(data_len);
+ hdr->frame_index = cpu_to_le16(frame_index);
+ hdr->frame_id = cpu_to_le16(frame_id);
+
+ frames[frame_index]->frame.size = data_len + sizeof(*hdr);
+
+ /* In case the remaining data_len is smaller than a frame */
+ while (len < data_len) {
+ memcpy(dest, src, len);
+ data_len -= len;
+ dest += len;
+
+ if (unmap) {
+ kunmap_atomic(src);
+ unmap = false;
+ }
+
+ if (frag < skb_shinfo(skb)->nr_frags) {
+ src = tbnet_kmap_frag(skb, frag++, &len);
+ unmap = true;
+ } else if (unlikely(data_len > 0)) {
+ goto err_drop;
+ }
+ }
+
+ memcpy(dest, src, data_len);
+
+ if (unmap)
+ kunmap_atomic(src);
+
+ if (!tbnet_xmit_csum_and_map(net, skb, frames, frame_index + 1))
+ goto err_drop;
+
+ for (i = 0; i < frame_index + 1; i++)
+ tb_ring_tx(net->tx_ring.ring, &frames[i]->frame);
+
+ if (net->svc->prtcstns & TBNET_MATCH_FRAGS_ID)
+ atomic_inc(&net->frame_id);
+
+ net->stats.tx_packets++;
+ net->stats.tx_bytes += skb->len;
+
+ dev_consume_skb_any(skb);
+
+ return NETDEV_TX_OK;
+
+err_drop:
+ /* We can re-use the buffers */
+ net->tx_ring.cons -= frame_index;
+
+ dev_kfree_skb_any(skb);
+ net->stats.tx_errors++;
+
+ return NETDEV_TX_OK;
+}
+
+static void tbnet_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *stats)
+{
+ struct tbnet *net = netdev_priv(dev);
+
+ stats->tx_packets = net->stats.tx_packets;
+ stats->rx_packets = net->stats.rx_packets;
+ stats->tx_bytes = net->stats.tx_bytes;
+ stats->rx_bytes = net->stats.rx_bytes;
+ stats->rx_errors = net->stats.rx_errors + net->stats.rx_length_errors +
+ net->stats.rx_over_errors + net->stats.rx_crc_errors +
+ net->stats.rx_missed_errors;
+ stats->tx_errors = net->stats.tx_errors;
+ stats->rx_length_errors = net->stats.rx_length_errors;
+ stats->rx_over_errors = net->stats.rx_over_errors;
+ stats->rx_crc_errors = net->stats.rx_crc_errors;
+ stats->rx_missed_errors = net->stats.rx_missed_errors;
+}
+
+static const struct net_device_ops tbnet_netdev_ops = {
+ .ndo_open = tbnet_open,
+ .ndo_stop = tbnet_stop,
+ .ndo_start_xmit = tbnet_start_xmit,
+ .ndo_get_stats64 = tbnet_get_stats64,
+};
+
+static void tbnet_generate_mac(struct net_device *dev)
+{
+ const struct tbnet *net = netdev_priv(dev);
+ const struct tb_xdomain *xd = net->xd;
+ u8 phy_port;
+ u32 hash;
+
+ phy_port = tb_phy_port_from_link(TBNET_L0_PORT_NUM(xd->route));
+
+ /* Unicast and locally administered MAC */
+ dev->dev_addr[0] = phy_port << 4 | 0x02;
+ hash = jhash2((u32 *)xd->local_uuid, 4, 0);
+ memcpy(dev->dev_addr + 1, &hash, sizeof(hash));
+ hash = jhash2((u32 *)xd->local_uuid, 4, hash);
+ dev->dev_addr[5] = hash & 0xff;
+}
+
+static int tbnet_probe(struct tb_service *svc, const struct tb_service_id *id)
+{
+ struct tb_xdomain *xd = tb_service_parent(svc);
+ struct net_device *dev;
+ struct tbnet *net;
+ int ret;
+
+ dev = alloc_etherdev(sizeof(*net));
+ if (!dev)
+ return -ENOMEM;
+
+ SET_NETDEV_DEV(dev, &svc->dev);
+
+ net = netdev_priv(dev);
+ INIT_DELAYED_WORK(&net->login_work, tbnet_login_work);
+ INIT_WORK(&net->connected_work, tbnet_connected_work);
+ mutex_init(&net->connection_lock);
+ atomic_set(&net->command_id, 0);
+ atomic_set(&net->frame_id, 0);
+ net->svc = svc;
+ net->dev = dev;
+ net->xd = xd;
+
+ tbnet_generate_mac(dev);
+
+ strcpy(dev->name, "thunderbolt%d");
+ dev->netdev_ops = &tbnet_netdev_ops;
+
+ /* ThunderboltIP takes advantage of TSO packets but instead of
+ * segmenting them we just split the packet into Thunderbolt
+ * frames (maximum payload size of each frame is 4084 bytes) and
+ * calculate checksum over the whole packet here.
+ *
+ * The receiving side does the opposite if the host OS supports
+ * LRO, otherwise it needs to split the large packet into MTU
+ * sized smaller packets.
+ *
+ * In order to receive large packets from the networking stack,
+ * we need to announce support for most of the offloading
+ * features here.
+ */
+ dev->hw_features = NETIF_F_SG | NETIF_F_ALL_TSO | NETIF_F_GRO |
+ NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+ dev->features = dev->hw_features | NETIF_F_HIGHDMA;
+ dev->hard_header_len += sizeof(struct thunderbolt_ip_frame_header);
+
+ netif_napi_add(dev, &net->napi, tbnet_poll, NAPI_POLL_WEIGHT);
+
+ /* MTU range: 68 - 65522 */
+ dev->min_mtu = ETH_MIN_MTU;
+ dev->max_mtu = TBNET_MAX_MTU - ETH_HLEN;
+
+ net->handler.uuid = &tbnet_svc_uuid;
+ net->handler.callback = tbnet_handle_packet,
+ net->handler.data = net;
+ tb_register_protocol_handler(&net->handler);
+
+ tb_service_set_drvdata(svc, net);
+
+ ret = register_netdev(dev);
+ if (ret) {
+ tb_unregister_protocol_handler(&net->handler);
+ free_netdev(dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void tbnet_remove(struct tb_service *svc)
+{
+ struct tbnet *net = tb_service_get_drvdata(svc);
+
+ unregister_netdev(net->dev);
+ tb_unregister_protocol_handler(&net->handler);
+ free_netdev(net->dev);
+}
+
+static void tbnet_shutdown(struct tb_service *svc)
+{
+ tbnet_tear_down(tb_service_get_drvdata(svc), true);
+}
+
+static int __maybe_unused tbnet_suspend(struct device *dev)
+{
+ struct tb_service *svc = tb_to_service(dev);
+ struct tbnet *net = tb_service_get_drvdata(svc);
+
+ stop_login(net);
+ if (netif_running(net->dev)) {
+ netif_device_detach(net->dev);
+ tb_ring_stop(net->rx_ring.ring);
+ tb_ring_stop(net->tx_ring.ring);
+ tbnet_free_buffers(&net->rx_ring);
+ tbnet_free_buffers(&net->tx_ring);
+ }
+
+ return 0;
+}
+
+static int __maybe_unused tbnet_resume(struct device *dev)
+{
+ struct tb_service *svc = tb_to_service(dev);
+ struct tbnet *net = tb_service_get_drvdata(svc);
+
+ netif_carrier_off(net->dev);
+ if (netif_running(net->dev)) {
+ netif_device_attach(net->dev);
+ start_login(net);
+ }
+
+ return 0;
+}
+
+static const struct dev_pm_ops tbnet_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(tbnet_suspend, tbnet_resume)
+};
+
+static const struct tb_service_id tbnet_ids[] = {
+ { TB_SERVICE("network", 1) },
+ { },
+};
+MODULE_DEVICE_TABLE(tbsvc, tbnet_ids);
+
+static struct tb_service_driver tbnet_driver = {
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = "thunderbolt-net",
+ .pm = &tbnet_pm_ops,
+ },
+ .probe = tbnet_probe,
+ .remove = tbnet_remove,
+ .shutdown = tbnet_shutdown,
+ .id_table = tbnet_ids,
+};
+
+static int __init tbnet_init(void)
+{
+ int ret;
+
+ tbnet_dir = tb_property_create_dir(&tbnet_dir_uuid);
+ if (!tbnet_dir)
+ return -ENOMEM;
+
+ tb_property_add_immediate(tbnet_dir, "prtcid", 1);
+ tb_property_add_immediate(tbnet_dir, "prtcvers", 1);
+ tb_property_add_immediate(tbnet_dir, "prtcrevs", 1);
+ tb_property_add_immediate(tbnet_dir, "prtcstns",
+ TBNET_MATCH_FRAGS_ID);
+
+ ret = tb_register_property_dir("network", tbnet_dir);
+ if (ret) {
+ tb_property_free_dir(tbnet_dir);
+ return ret;
+ }
+
+ return tb_register_service_driver(&tbnet_driver);
+}
+module_init(tbnet_init);
+
+static void __exit tbnet_exit(void)
+{
+ tb_unregister_service_driver(&tbnet_driver);
+ tb_unregister_property_dir("network", tbnet_dir);
+ tb_property_free_dir(tbnet_dir);
+}
+module_exit(tbnet_exit);
+
+MODULE_AUTHOR("Amir Levy <amir.jer.levy@intel.com>");
+MODULE_AUTHOR("Michael Jamet <michael.jamet@intel.com>");
+MODULE_AUTHOR("Mika Westerberg <mika.westerberg@linux.intel.com>");
+MODULE_DESCRIPTION("Thunderbolt network driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e21bf90b819f..ea29da91ea5a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -75,6 +75,7 @@
#include <linux/skb_array.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
+#include <linux/mutex.h>
#include <linux/uaccess.h>
@@ -121,7 +122,8 @@ do { \
#define TUN_VNET_BE 0x40000000
#define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
- IFF_MULTI_QUEUE)
+ IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS)
+
#define GOODCOPY_LEN 128
#define FLT_EXACT_COUNT 8
@@ -172,6 +174,9 @@ struct tun_file {
u16 queue_index;
unsigned int ifindex;
};
+ struct napi_struct napi;
+ bool napi_enabled;
+ struct mutex napi_mutex; /* Protects access to the above napi */
struct list_head next;
struct tun_struct *detached;
struct skb_array tx_array;
@@ -229,6 +234,75 @@ struct tun_struct {
struct bpf_prog __rcu *xdp_prog;
};
+static int tun_napi_receive(struct napi_struct *napi, int budget)
+{
+ struct tun_file *tfile = container_of(napi, struct tun_file, napi);
+ struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
+ struct sk_buff_head process_queue;
+ struct sk_buff *skb;
+ int received = 0;
+
+ __skb_queue_head_init(&process_queue);
+
+ spin_lock(&queue->lock);
+ skb_queue_splice_tail_init(queue, &process_queue);
+ spin_unlock(&queue->lock);
+
+ while (received < budget && (skb = __skb_dequeue(&process_queue))) {
+ napi_gro_receive(napi, skb);
+ ++received;
+ }
+
+ if (!skb_queue_empty(&process_queue)) {
+ spin_lock(&queue->lock);
+ skb_queue_splice(&process_queue, queue);
+ spin_unlock(&queue->lock);
+ }
+
+ return received;
+}
+
+static int tun_napi_poll(struct napi_struct *napi, int budget)
+{
+ unsigned int received;
+
+ received = tun_napi_receive(napi, budget);
+
+ if (received < budget)
+ napi_complete_done(napi, received);
+
+ return received;
+}
+
+static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
+ bool napi_en)
+{
+ tfile->napi_enabled = napi_en;
+ if (napi_en) {
+ netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
+ NAPI_POLL_WEIGHT);
+ napi_enable(&tfile->napi);
+ mutex_init(&tfile->napi_mutex);
+ }
+}
+
+static void tun_napi_disable(struct tun_struct *tun, struct tun_file *tfile)
+{
+ if (tfile->napi_enabled)
+ napi_disable(&tfile->napi);
+}
+
+static void tun_napi_del(struct tun_struct *tun, struct tun_file *tfile)
+{
+ if (tfile->napi_enabled)
+ netif_napi_del(&tfile->napi);
+}
+
+static bool tun_napi_frags_enabled(const struct tun_struct *tun)
+{
+ return READ_ONCE(tun->flags) & IFF_NAPI_FRAGS;
+}
+
#ifdef CONFIG_TUN_VNET_CROSS_LE
static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
{
@@ -380,25 +454,28 @@ static void tun_flow_cleanup(unsigned long data)
tun_debug(KERN_INFO, tun, "tun_flow_cleanup\n");
- spin_lock_bh(&tun->lock);
+ spin_lock(&tun->lock);
for (i = 0; i < TUN_NUM_FLOW_ENTRIES; i++) {
struct tun_flow_entry *e;
struct hlist_node *n;
hlist_for_each_entry_safe(e, n, &tun->flows[i], hash_link) {
unsigned long this_timer;
- count++;
+
this_timer = e->updated + delay;
- if (time_before_eq(this_timer, jiffies))
+ if (time_before_eq(this_timer, jiffies)) {
tun_flow_delete(tun, e);
- else if (time_before(this_timer, next_timer))
+ continue;
+ }
+ count++;
+ if (time_before(this_timer, next_timer))
next_timer = this_timer;
}
}
if (count)
mod_timer(&tun->flow_gc_timer, round_jiffies_up(next_timer));
- spin_unlock_bh(&tun->lock);
+ spin_unlock(&tun->lock);
}
static void tun_flow_update(struct tun_struct *tun, u32 rxhash,
@@ -541,6 +618,11 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
tun = rtnl_dereference(tfile->tun);
+ if (tun && clean) {
+ tun_napi_disable(tun, tfile);
+ tun_napi_del(tun, tfile);
+ }
+
if (tun && !tfile->detached) {
u16 index = tfile->queue_index;
BUG_ON(index >= tun->numqueues);
@@ -598,6 +680,7 @@ static void tun_detach_all(struct net_device *dev)
for (i = 0; i < n; i++) {
tfile = rtnl_dereference(tun->tfiles[i]);
BUG_ON(!tfile);
+ tun_napi_disable(tun, tfile);
tfile->socket.sk->sk_shutdown = RCV_SHUTDOWN;
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
RCU_INIT_POINTER(tfile->tun, NULL);
@@ -613,6 +696,7 @@ static void tun_detach_all(struct net_device *dev)
synchronize_net();
for (i = 0; i < n; i++) {
tfile = rtnl_dereference(tun->tfiles[i]);
+ tun_napi_del(tun, tfile);
/* Drop read queue */
tun_queue_purge(tfile);
sock_put(&tfile->sk);
@@ -631,7 +715,8 @@ static void tun_detach_all(struct net_device *dev)
module_put(THIS_MODULE);
}
-static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filter)
+static int tun_attach(struct tun_struct *tun, struct file *file,
+ bool skip_filter, bool napi)
{
struct tun_file *tfile = file->private_data;
struct net_device *dev = tun->dev;
@@ -677,10 +762,12 @@ static int tun_attach(struct tun_struct *tun, struct file *file, bool skip_filte
rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
tun->numqueues++;
- if (tfile->detached)
+ if (tfile->detached) {
tun_enable_queue(tfile);
- else
+ } else {
sock_hold(&tfile->sk);
+ tun_napi_init(tun, tfile, napi);
+ }
tun_set_real_num_queues(tun);
@@ -692,7 +779,7 @@ out:
return err;
}
-static struct tun_struct *__tun_get(struct tun_file *tfile)
+static struct tun_struct *tun_get(struct tun_file *tfile)
{
struct tun_struct *tun;
@@ -705,11 +792,6 @@ static struct tun_struct *__tun_get(struct tun_file *tfile)
return tun;
}
-static struct tun_struct *tun_get(struct file *file)
-{
- return __tun_get(file->private_data);
-}
-
static void tun_put(struct tun_struct *tun)
{
dev_put(tun->dev);
@@ -956,13 +1038,33 @@ static void tun_poll_controller(struct net_device *dev)
* Tun only receives frames when:
* 1) the char device endpoint gets data from user space
* 2) the tun socket gets a sendmsg call from user space
- * Since both of those are synchronous operations, we are guaranteed
- * never to have pending data when we poll for it
- * so there is nothing to do here but return.
+ * If NAPI is not enabled, since both of those are synchronous
+ * operations, we are guaranteed never to have pending data when we poll
+ * for it so there is nothing to do here but return.
* We need this though so netpoll recognizes us as an interface that
* supports polling, which enables bridge devices in virt setups to
* still use netconsole
+ * If NAPI is enabled, however, we need to schedule polling for all
+ * queues unless we are using napi_gro_frags(), which we call in
+ * process context and not in NAPI context.
*/
+ struct tun_struct *tun = netdev_priv(dev);
+
+ if (tun->flags & IFF_NAPI) {
+ struct tun_file *tfile;
+ int i;
+
+ if (tun_napi_frags_enabled(tun))
+ return;
+
+ rcu_read_lock();
+ for (i = 0; i < tun->numqueues; i++) {
+ tfile = rcu_dereference(tun->tfiles[i]);
+ if (tfile->napi_enabled)
+ napi_schedule(&tfile->napi);
+ }
+ rcu_read_unlock();
+ }
return;
}
#endif
@@ -1095,8 +1197,6 @@ static void tun_flow_init(struct tun_struct *tun)
tun->ageing_time = TUN_FLOW_EXPIRE;
setup_timer(&tun->flow_gc_timer, tun_flow_cleanup, (unsigned long)tun);
- mod_timer(&tun->flow_gc_timer,
- round_jiffies_up(jiffies + tun->ageing_time));
}
static void tun_flow_uninit(struct tun_struct *tun)
@@ -1149,7 +1249,7 @@ static void tun_net_init(struct net_device *dev)
static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
{
struct tun_file *tfile = file->private_data;
- struct tun_struct *tun = __tun_get(tfile);
+ struct tun_struct *tun = tun_get(tfile);
struct sock *sk;
unsigned int mask = 0;
@@ -1178,6 +1278,64 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
return mask;
}
+static struct sk_buff *tun_napi_alloc_frags(struct tun_file *tfile,
+ size_t len,
+ const struct iov_iter *it)
+{
+ struct sk_buff *skb;
+ size_t linear;
+ int err;
+ int i;
+
+ if (it->nr_segs > MAX_SKB_FRAGS + 1)
+ return ERR_PTR(-ENOMEM);
+
+ local_bh_disable();
+ skb = napi_get_frags(&tfile->napi);
+ local_bh_enable();
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ linear = iov_iter_single_seg_count(it);
+ err = __skb_grow(skb, linear);
+ if (err)
+ goto free;
+
+ skb->len = len;
+ skb->data_len = len - linear;
+ skb->truesize += skb->data_len;
+
+ for (i = 1; i < it->nr_segs; i++) {
+ size_t fragsz = it->iov[i].iov_len;
+ unsigned long offset;
+ struct page *page;
+ void *data;
+
+ if (fragsz == 0 || fragsz > PAGE_SIZE) {
+ err = -EINVAL;
+ goto free;
+ }
+
+ local_bh_disable();
+ data = napi_alloc_frag(fragsz);
+ local_bh_enable();
+ if (!data) {
+ err = -ENOMEM;
+ goto free;
+ }
+
+ page = virt_to_head_page(data);
+ offset = data - page_address(page);
+ skb_fill_page_desc(skb, i - 1, page, offset, fragsz);
+ }
+
+ return skb;
+free:
+ /* frees skb and all frags allocated with napi_alloc_frag() */
+ napi_free_frags(&tfile->napi);
+ return ERR_PTR(err);
+}
+
/* prepad is the amount to reserve at front. len is length after that.
* linear is a hint as to how much to copy (usually headers). */
static struct sk_buff *tun_alloc_skb(struct tun_file *tfile,
@@ -1314,6 +1472,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
xdp.data_hard_start = buf;
xdp.data = buf + pad;
+ xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -1390,6 +1549,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
int err;
u32 rxhash;
int skb_xdp = 1;
+ bool frags = tun_napi_frags_enabled(tun);
if (!(tun->dev->flags & IFF_UP))
return -EIO;
@@ -1447,7 +1607,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
zerocopy = true;
}
- if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
+ if (!frags && tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
/* For the packet that is not easy to be processed
* (e.g gso or jumbo packet), we will do it at after
* skb was created with generic XDP routine.
@@ -1468,10 +1628,24 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
linear = tun16_to_cpu(tun, gso.hdr_len);
}
- skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
+ if (frags) {
+ mutex_lock(&tfile->napi_mutex);
+ skb = tun_napi_alloc_frags(tfile, copylen, from);
+ /* tun_napi_alloc_frags() enforces a layout for the skb.
+ * If zerocopy is enabled, then this layout will be
+ * overwritten by zerocopy_sg_from_iter().
+ */
+ zerocopy = false;
+ } else {
+ skb = tun_alloc_skb(tfile, align, copylen, linear,
+ noblock);
+ }
+
if (IS_ERR(skb)) {
if (PTR_ERR(skb) != -EAGAIN)
this_cpu_inc(tun->pcpu_stats->rx_dropped);
+ if (frags)
+ mutex_unlock(&tfile->napi_mutex);
return PTR_ERR(skb);
}
@@ -1483,6 +1657,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (err) {
this_cpu_inc(tun->pcpu_stats->rx_dropped);
kfree_skb(skb);
+ if (frags) {
+ tfile->napi.skb = NULL;
+ mutex_unlock(&tfile->napi_mutex);
+ }
+
return -EFAULT;
}
}
@@ -1490,6 +1669,11 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
this_cpu_inc(tun->pcpu_stats->rx_frame_errors);
kfree_skb(skb);
+ if (frags) {
+ tfile->napi.skb = NULL;
+ mutex_unlock(&tfile->napi_mutex);
+ }
+
return -EINVAL;
}
@@ -1517,7 +1701,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
skb->dev = tun->dev;
break;
case IFF_TAP:
- skb->protocol = eth_type_trans(skb, tun->dev);
+ if (!frags)
+ skb->protocol = eth_type_trans(skb, tun->dev);
break;
}
@@ -1551,11 +1736,41 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
}
rxhash = __skb_get_hash_symmetric(skb);
-#ifndef CONFIG_4KSTACKS
- tun_rx_batched(tun, tfile, skb, more);
-#else
- netif_rx_ni(skb);
-#endif
+
+ if (frags) {
+ /* Exercise flow dissector code path. */
+ u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb));
+
+ if (unlikely(headlen > skb_headlen(skb))) {
+ this_cpu_inc(tun->pcpu_stats->rx_dropped);
+ napi_free_frags(&tfile->napi);
+ mutex_unlock(&tfile->napi_mutex);
+ WARN_ON(1);
+ return -ENOMEM;
+ }
+
+ local_bh_disable();
+ napi_gro_frags(&tfile->napi);
+ local_bh_enable();
+ mutex_unlock(&tfile->napi_mutex);
+ } else if (tfile->napi_enabled) {
+ struct sk_buff_head *queue = &tfile->sk.sk_write_queue;
+ int queue_len;
+
+ spin_lock_bh(&queue->lock);
+ __skb_queue_tail(queue, skb);
+ queue_len = skb_queue_len(queue);
+ spin_unlock(&queue->lock);
+
+ if (!more || queue_len > NAPI_POLL_WEIGHT)
+ napi_schedule(&tfile->napi);
+
+ local_bh_enable();
+ } else if (!IS_ENABLED(CONFIG_4KSTACKS)) {
+ tun_rx_batched(tun, tfile, skb, more);
+ } else {
+ netif_rx_ni(skb);
+ }
stats = get_cpu_ptr(tun->pcpu_stats);
u64_stats_update_begin(&stats->syncp);
@@ -1571,8 +1786,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
- struct tun_struct *tun = tun_get(file);
struct tun_file *tfile = file->private_data;
+ struct tun_struct *tun = tun_get(tfile);
ssize_t result;
if (!tun)
@@ -1756,7 +1971,7 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct tun_file *tfile = file->private_data;
- struct tun_struct *tun = __tun_get(tfile);
+ struct tun_struct *tun = tun_get(tfile);
ssize_t len = iov_iter_count(to), ret;
if (!tun)
@@ -1833,7 +2048,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
int ret;
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
- struct tun_struct *tun = __tun_get(tfile);
+ struct tun_struct *tun = tun_get(tfile);
if (!tun)
return -EBADFD;
@@ -1849,7 +2064,7 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
int flags)
{
struct tun_file *tfile = container_of(sock, struct tun_file, socket);
- struct tun_struct *tun = __tun_get(tfile);
+ struct tun_struct *tun = tun_get(tfile);
int ret;
if (!tun)
@@ -1881,7 +2096,7 @@ static int tun_peek_len(struct socket *sock)
struct tun_struct *tun;
int ret = 0;
- tun = __tun_get(tfile);
+ tun = tun_get(tfile);
if (!tun)
return 0;
@@ -1961,6 +2176,15 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
if (tfile->detached)
return -EINVAL;
+ if ((ifr->ifr_flags & IFF_NAPI_FRAGS)) {
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!(ifr->ifr_flags & IFF_NAPI) ||
+ (ifr->ifr_flags & TUN_TYPE_MASK) != IFF_TAP)
+ return -EINVAL;
+ }
+
dev = __dev_get_by_name(net, ifr->ifr_name);
if (dev) {
if (ifr->ifr_flags & IFF_TUN_EXCL)
@@ -1982,7 +2206,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
if (err < 0)
return err;
- err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER);
+ err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER,
+ ifr->ifr_flags & IFF_NAPI);
if (err < 0)
return err;
@@ -2071,7 +2296,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
NETIF_F_HW_VLAN_STAG_TX);
INIT_LIST_HEAD(&tun->disabled);
- err = tun_attach(tun, file, false);
+ err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI);
if (err < 0)
goto err_free_flow;
@@ -2221,7 +2446,7 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
ret = security_tun_dev_attach_queue(tun->security);
if (ret < 0)
goto unlock;
- ret = tun_attach(tun, file, false);
+ ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI);
} else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
tun = rtnl_dereference(tfile->tun);
if (!tun || !(tun->flags & IFF_MULTI_QUEUE) || tfile->detached)
@@ -2270,7 +2495,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
ret = 0;
rtnl_lock();
- tun = __tun_get(tfile);
+ tun = tun_get(tfile);
if (cmd == TUNSETIFF) {
ret = -EEXIST;
if (tun)
@@ -2617,15 +2842,16 @@ static int tun_chr_close(struct inode *inode, struct file *file)
}
#ifdef CONFIG_PROC_FS
-static void tun_chr_show_fdinfo(struct seq_file *m, struct file *f)
+static void tun_chr_show_fdinfo(struct seq_file *m, struct file *file)
{
+ struct tun_file *tfile = file->private_data;
struct tun_struct *tun;
struct ifreq ifr;
memset(&ifr, 0, sizeof(ifr));
rtnl_lock();
- tun = tun_get(f);
+ tun = tun_get(tfile);
if (tun)
tun_get_iff(current->nsproxy->net_ns, tun, &ifr);
rtnl_unlock();
diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c
index 522d2900cd1d..f4d7362eb325 100644
--- a/drivers/net/usb/asix_common.c
+++ b/drivers/net/usb/asix_common.c
@@ -245,7 +245,7 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
* - We are allowed to put 4 bytes at tail if skb_cloned()
* is false (and if we have 4 bytes of tailroom)
*
- * TCP packets for example are cloned, but skb_header_release()
+ * TCP packets for example are cloned, but __skb_header_release()
* was called in tcp stack, allowing us to use headroom for our needs.
*/
if (!skb_header_cloned(skb) &&
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index dbc90313f472..18d36dff97ea 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -611,9 +611,9 @@ static void catc_stats_done(struct catc *catc, struct ctrl_queue *q)
catc->stats_vals[index >> 1] = data;
}
-static void catc_stats_timer(unsigned long data)
+static void catc_stats_timer(struct timer_list *t)
{
- struct catc *catc = (void *) data;
+ struct catc *catc = from_timer(catc, t, timer);
int i;
for (i = 0; i < 8; i++)
@@ -805,9 +805,7 @@ static int catc_probe(struct usb_interface *intf, const struct usb_device_id *id
spin_lock_init(&catc->tx_lock);
spin_lock_init(&catc->ctrl_lock);
- init_timer(&catc->timer);
- catc->timer.data = (long) catc;
- catc->timer.function = catc_stats_timer;
+ timer_setup(&catc->timer, catc_stats_timer, 0);
catc->ctrl_urb = usb_alloc_urb(0, GFP_KERNEL);
catc->tx_urb = usb_alloc_urb(0, GFP_KERNEL);
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index 0161f77641fa..94c7804903c4 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -3516,11 +3516,9 @@ static const struct net_device_ops lan78xx_netdev_ops = {
.ndo_vlan_rx_kill_vid = lan78xx_vlan_rx_kill_vid,
};
-static void lan78xx_stat_monitor(unsigned long param)
+static void lan78xx_stat_monitor(struct timer_list *t)
{
- struct lan78xx_net *dev;
-
- dev = (struct lan78xx_net *)param;
+ struct lan78xx_net *dev = from_timer(dev, t, stat_monitor);
lan78xx_defer_kevent(dev, EVENT_STAT_UPDATE);
}
@@ -3571,10 +3569,8 @@ static int lan78xx_probe(struct usb_interface *intf,
netdev->watchdog_timeo = TX_TIMEOUT_JIFFIES;
netdev->ethtool_ops = &lan78xx_ethtool_ops;
- dev->stat_monitor.function = lan78xx_stat_monitor;
- dev->stat_monitor.data = (unsigned long)dev;
dev->delta = 1;
- init_timer(&dev->stat_monitor);
+ timer_setup(&dev->stat_monitor, lan78xx_stat_monitor, 0);
mutex_init(&dev->stats.access_lock);
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 8c3733608271..db7279d5b250 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -221,7 +221,7 @@ static int qmimux_register_device(struct net_device *real_dev, u8 mux_id)
/* Account for reference in struct qmimux_priv_priv */
dev_hold(real_dev);
- err = netdev_upper_dev_link(real_dev, new_dev);
+ err = netdev_upper_dev_link(real_dev, new_dev, NULL);
if (err)
goto out_unregister_netdev;
diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c
index 2110ab3513f0..c43087e06696 100644
--- a/drivers/net/usb/sierra_net.c
+++ b/drivers/net/usb/sierra_net.c
@@ -189,9 +189,6 @@ struct lsi_umts_dual {
#define SIERRA_NET_LSI_UMTS_DS_STATUS_LEN \
(SIERRA_NET_LSI_UMTS_DS_LEN - SIERRA_NET_LSI_COMMON_LEN)
-/* Forward definitions */
-static void sierra_sync_timer(unsigned long syncdata);
-
/* Our own net device operations structure */
static const struct net_device_ops sierra_net_device_ops = {
.ndo_open = usbnet_open,
@@ -475,8 +472,6 @@ static void sierra_net_dosync(struct usbnet *dev)
"Send SYNC failed, status %d\n", status);
/* Now, start a timer and make sure we get the Restart Indication */
- priv->sync_timer.function = sierra_sync_timer;
- priv->sync_timer.data = (unsigned long) dev;
priv->sync_timer.expires = jiffies + SIERRA_NET_SYNCDELAY;
add_timer(&priv->sync_timer);
}
@@ -593,9 +588,10 @@ static void sierra_net_defer_kevent(struct usbnet *dev, int work)
/*
* Sync Retransmit Timer Handler. On expiry, kick the work queue
*/
-static void sierra_sync_timer(unsigned long syncdata)
+static void sierra_sync_timer(struct timer_list *t)
{
- struct usbnet *dev = (struct usbnet *)syncdata;
+ struct sierra_net_data *priv = from_timer(priv, t, sync_timer);
+ struct usbnet *dev = priv->usbnet;
dev_dbg(&dev->udev->dev, "%s", __func__);
/* Kick the tasklet */
@@ -752,7 +748,7 @@ static int sierra_net_bind(struct usbnet *dev, struct usb_interface *intf)
INIT_WORK(&priv->sierra_net_kevent, sierra_net_kevent);
/* Only need to do this once */
- init_timer(&priv->sync_timer);
+ timer_setup(&priv->sync_timer, sierra_sync_timer, 0);
/* verify fw attributes */
status = sierra_net_get_fw_attr(dev, &fwattr);
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 6510e5cc1817..80348b6a8646 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1509,9 +1509,9 @@ err:
// tasklet (work deferred from completions, in_irq) or timer
-static void usbnet_bh (unsigned long param)
+static void usbnet_bh (struct timer_list *t)
{
- struct usbnet *dev = (struct usbnet *) param;
+ struct usbnet *dev = from_timer(dev, t, delay);
struct sk_buff *skb;
struct skb_data *entry;
@@ -1694,13 +1694,11 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
skb_queue_head_init (&dev->txq);
skb_queue_head_init (&dev->done);
skb_queue_head_init(&dev->rxq_pause);
- dev->bh.func = usbnet_bh;
- dev->bh.data = (unsigned long) dev;
+ dev->bh.func = (void (*)(unsigned long))usbnet_bh;
+ dev->bh.data = (unsigned long)&dev->delay;
INIT_WORK (&dev->kevent, usbnet_deferred_kevent);
init_usb_anchor(&dev->deferred);
- dev->delay.function = usbnet_bh;
- dev->delay.data = (unsigned long) dev;
- init_timer (&dev->delay);
+ timer_setup(&dev->delay, usbnet_bh, 0);
mutex_init (&dev->phy_mutex);
mutex_init(&dev->interrupt_mutex);
dev->interrupt_count = 0;
diff --git a/drivers/net/virtio_net. b/drivers/net/virtio_net.
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/drivers/net/virtio_net.
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 511f8339fa96..fc059f193e7d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -29,6 +29,7 @@
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/average.h>
+#include <linux/filter.h>
#include <net/route.h>
static int napi_weight = NAPI_POLL_WEIGHT;
@@ -372,9 +373,20 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
return skb;
}
-static bool virtnet_xdp_xmit(struct virtnet_info *vi,
- struct receive_queue *rq,
- struct xdp_buff *xdp)
+static void virtnet_xdp_flush(struct net_device *dev)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ struct send_queue *sq;
+ unsigned int qp;
+
+ qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
+ sq = &vi->sq[qp];
+
+ virtqueue_kick(sq->vq);
+}
+
+static bool __virtnet_xdp_xmit(struct virtnet_info *vi,
+ struct xdp_buff *xdp)
{
struct virtio_net_hdr_mrg_rxbuf *hdr;
unsigned int len;
@@ -408,10 +420,19 @@ static bool virtnet_xdp_xmit(struct virtnet_info *vi,
return false;
}
- virtqueue_kick(sq->vq);
return true;
}
+static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+{
+ struct virtnet_info *vi = netdev_priv(dev);
+ bool sent = __virtnet_xdp_xmit(vi, xdp);
+
+ if (!sent)
+ return -ENOSPC;
+ return 0;
+}
+
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
{
return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
@@ -484,7 +505,8 @@ static struct sk_buff *receive_small(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
void *buf, void *ctx,
- unsigned int len)
+ unsigned int len,
+ bool *xdp_xmit)
{
struct sk_buff *skb;
struct bpf_prog *xdp_prog;
@@ -494,7 +516,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
struct page *page = virt_to_head_page(buf);
- unsigned int delta = 0;
+ unsigned int delta = 0, err;
struct page *xdp_page;
len -= vi->hdr_len;
@@ -532,6 +554,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
xdp.data = xdp.data_hard_start + xdp_headroom;
+ xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + len;
orig_data = xdp.data;
act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -542,8 +565,16 @@ static struct sk_buff *receive_small(struct net_device *dev,
delta = orig_data - xdp.data;
break;
case XDP_TX:
- if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp)))
+ if (unlikely(!__virtnet_xdp_xmit(vi, &xdp)))
trace_xdp_exception(vi->dev, xdp_prog, act);
+ else
+ *xdp_xmit = true;
+ rcu_read_unlock();
+ goto xdp_xmit;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(dev, &xdp, xdp_prog);
+ if (!err)
+ *xdp_xmit = true;
rcu_read_unlock();
goto xdp_xmit;
default:
@@ -604,7 +635,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
struct receive_queue *rq,
void *buf,
void *ctx,
- unsigned int len)
+ unsigned int len,
+ bool *xdp_xmit)
{
struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
@@ -614,6 +646,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
struct bpf_prog *xdp_prog;
unsigned int truesize;
unsigned int headroom = mergeable_ctx_to_headroom(ctx);
+ int err;
head_skb = NULL;
@@ -654,9 +687,13 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
data = page_address(xdp_page) + offset;
xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
xdp.data = data + vi->hdr_len;
+ xdp_set_data_meta_invalid(&xdp);
xdp.data_end = xdp.data + (len - vi->hdr_len);
act = bpf_prog_run_xdp(xdp_prog, &xdp);
+ if (act != XDP_PASS)
+ ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
+
switch (act) {
case XDP_PASS:
/* recalculate offset to account for any header
@@ -672,18 +709,24 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
put_page(page);
head_skb = page_to_skb(vi, rq, xdp_page,
offset, len, PAGE_SIZE);
- ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
return head_skb;
}
break;
case XDP_TX:
- if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp)))
+ if (unlikely(!__virtnet_xdp_xmit(vi, &xdp)))
trace_xdp_exception(vi->dev, xdp_prog, act);
- ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
+ else
+ *xdp_xmit = true;
if (unlikely(xdp_page != page))
goto err_xdp;
rcu_read_unlock();
goto xdp_xmit;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(dev, &xdp, xdp_prog);
+ if (!err)
+ *xdp_xmit = true;
+ rcu_read_unlock();
+ goto xdp_xmit;
default:
bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED:
@@ -691,7 +734,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
case XDP_DROP:
if (unlikely(xdp_page != page))
__free_pages(xdp_page, 0);
- ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
goto err_xdp;
}
}
@@ -789,7 +831,7 @@ xdp_xmit:
}
static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
- void *buf, unsigned int len, void **ctx)
+ void *buf, unsigned int len, void **ctx, bool *xdp_xmit)
{
struct net_device *dev = vi->dev;
struct sk_buff *skb;
@@ -810,11 +852,11 @@ static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
}
if (vi->mergeable_rx_bufs)
- skb = receive_mergeable(dev, vi, rq, buf, ctx, len);
+ skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit);
else if (vi->big_packets)
skb = receive_big(dev, vi, rq, buf, len);
else
- skb = receive_small(dev, vi, rq, buf, ctx, len);
+ skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit);
if (unlikely(!skb))
return 0;
@@ -1072,7 +1114,7 @@ static void refill_work(struct work_struct *work)
}
}
-static int virtnet_receive(struct receive_queue *rq, int budget)
+static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
unsigned int len, received = 0, bytes = 0;
@@ -1084,13 +1126,13 @@ static int virtnet_receive(struct receive_queue *rq, int budget)
while (received < budget &&
(buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
- bytes += receive_buf(vi, rq, buf, len, ctx);
+ bytes += receive_buf(vi, rq, buf, len, ctx, xdp_xmit);
received++;
}
} else {
while (received < budget &&
(buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
- bytes += receive_buf(vi, rq, buf, len, NULL);
+ bytes += receive_buf(vi, rq, buf, len, NULL, xdp_xmit);
received++;
}
}
@@ -1162,15 +1204,19 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
struct receive_queue *rq =
container_of(napi, struct receive_queue, napi);
unsigned int received;
+ bool xdp_xmit = false;
virtnet_poll_cleantx(rq);
- received = virtnet_receive(rq, budget);
+ received = virtnet_receive(rq, budget, &xdp_xmit);
/* Out of packets? */
if (received < budget)
virtqueue_napi_complete(napi, rq->vq, received);
+ if (xdp_xmit)
+ xdp_do_flush_map();
+
return received;
}
@@ -2070,6 +2116,8 @@ static const struct net_device_ops virtnet_netdev = {
.ndo_poll_controller = virtnet_netpoll,
#endif
.ndo_xdp = virtnet_xdp,
+ .ndo_xdp_xmit = virtnet_xdp_xmit,
+ .ndo_xdp_flush = virtnet_xdp_flush,
.ndo_features_check = passthru_features_check,
};
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 9b243e6f3008..0b54f553228e 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -132,7 +132,6 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev,
skb_orphan(skb);
skb_dst_set(skb, dst);
- skb_dst_force(skb);
/* set pkt_type to avoid skb hitting packet taps twice -
* once on Tx and again in Rx processing
@@ -765,18 +764,22 @@ static void cycle_netdev(struct net_device *dev)
}
}
-static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
+static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev,
+ struct netlink_ext_ack *extack)
{
int ret;
/* do not allow loopback device to be enslaved to a VRF.
* The vrf device acts as the loopback for the vrf.
*/
- if (port_dev == dev_net(dev)->loopback_dev)
+ if (port_dev == dev_net(dev)->loopback_dev) {
+ NL_SET_ERR_MSG(extack,
+ "Can not enslave loopback device to a VRF");
return -EOPNOTSUPP;
+ }
port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
- ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL);
+ ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL, extack);
if (ret < 0)
goto err;
@@ -789,12 +792,19 @@ err:
return ret;
}
-static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
+static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev,
+ struct netlink_ext_ack *extack)
{
- if (netif_is_l3_master(port_dev) || netif_is_l3_slave(port_dev))
+ if (netif_is_l3_master(port_dev)) {
+ NL_SET_ERR_MSG(extack,
+ "Can not enslave an L3 master device to a VRF");
+ return -EINVAL;
+ }
+
+ if (netif_is_l3_slave(port_dev))
return -EINVAL;
- return do_vrf_add_slave(dev, port_dev);
+ return do_vrf_add_slave(dev, port_dev, extack);
}
/* inverse of do_vrf_add_slave */
diff --git a/drivers/net/wan/dscc4.c b/drivers/net/wan/dscc4.c
index a043fb1367bd..64f176496da4 100644
--- a/drivers/net/wan/dscc4.c
+++ b/drivers/net/wan/dscc4.c
@@ -1127,10 +1127,8 @@ static int dscc4_open(struct net_device *dev)
done:
netif_start_queue(dev);
- init_timer(&dpriv->timer);
+ setup_timer(&dpriv->timer, dscc4_timer, (unsigned long)dev);
dpriv->timer.expires = jiffies + 10*HZ;
- dpriv->timer.data = (unsigned long)dev;
- dpriv->timer.function = dscc4_timer;
add_timer(&dpriv->timer);
netif_carrier_on(dev);
diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index a408abc25512..320039d329c7 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -54,6 +54,7 @@ struct cisco_state {
cisco_proto settings;
struct timer_list timer;
+ struct net_device *dev;
spinlock_t lock;
unsigned long last_poll;
int up;
@@ -257,11 +258,10 @@ rx_error:
-static void cisco_timer(unsigned long arg)
+static void cisco_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)arg;
- hdlc_device *hdlc = dev_to_hdlc(dev);
- struct cisco_state *st = state(hdlc);
+ struct cisco_state *st = from_timer(st, t, timer);
+ struct net_device *dev = st->dev;
spin_lock(&st->lock);
if (st->up &&
@@ -276,8 +276,6 @@ static void cisco_timer(unsigned long arg)
spin_unlock(&st->lock);
st->timer.expires = jiffies + st->settings.interval * HZ;
- st->timer.function = cisco_timer;
- st->timer.data = arg;
add_timer(&st->timer);
}
@@ -293,10 +291,9 @@ static void cisco_start(struct net_device *dev)
st->up = st->txseq = st->rxseq = 0;
spin_unlock_irqrestore(&st->lock, flags);
- init_timer(&st->timer);
+ st->dev = dev;
+ timer_setup(&st->timer, cisco_timer, 0);
st->timer.expires = jiffies + HZ; /* First poll after 1 s */
- st->timer.function = cisco_timer;
- st->timer.data = (unsigned long)dev;
add_timer(&st->timer);
}
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index 78596e42a3f3..038236a9c60e 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -140,6 +140,7 @@ struct frad_state {
int dce_pvc_count;
struct timer_list timer;
+ struct net_device *dev;
unsigned long last_poll;
int reliable;
int dce_changed;
@@ -597,9 +598,10 @@ static void fr_set_link_state(int reliable, struct net_device *dev)
}
-static void fr_timer(unsigned long arg)
+static void fr_timer(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)arg;
+ struct frad_state *st = from_timer(st, t, timer);
+ struct net_device *dev = st->dev;
hdlc_device *hdlc = dev_to_hdlc(dev);
int i, cnt = 0, reliable;
u32 list;
@@ -644,8 +646,6 @@ static void fr_timer(unsigned long arg)
state(hdlc)->settings.t391 * HZ;
}
- state(hdlc)->timer.function = fr_timer;
- state(hdlc)->timer.data = arg;
add_timer(&state(hdlc)->timer);
}
@@ -1003,11 +1003,10 @@ static void fr_start(struct net_device *dev)
state(hdlc)->n391cnt = 0;
state(hdlc)->txseq = state(hdlc)->rxseq = 0;
- init_timer(&state(hdlc)->timer);
+ state(hdlc)->dev = dev;
+ timer_setup(&state(hdlc)->timer, fr_timer, 0);
/* First poll after 1 s */
state(hdlc)->timer.expires = jiffies + HZ;
- state(hdlc)->timer.function = fr_timer;
- state(hdlc)->timer.data = (unsigned long)dev;
add_timer(&state(hdlc)->timer);
} else
fr_set_link_state(1, dev);
diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c
index 0d2e00ece804..c7721c729541 100644
--- a/drivers/net/wan/hdlc_ppp.c
+++ b/drivers/net/wan/hdlc_ppp.c
@@ -610,9 +610,7 @@ static void ppp_start(struct net_device *dev)
for (i = 0; i < IDX_COUNT; i++) {
struct proto *proto = &ppp->protos[i];
proto->dev = dev;
- init_timer(&proto->timer);
- proto->timer.function = ppp_timer;
- proto->timer.data = (unsigned long)proto;
+ setup_timer(&proto->timer, ppp_timer, (unsigned long)proto);
proto->state = CLOSED;
}
ppp->protos[IDX_LCP].pid = PID_LCP;
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 4698450c77d1..ae69d65158e6 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -1084,10 +1084,8 @@ static int lmc_open(struct net_device *dev)
* Setup a timer for the watchdog on probe, and start it running.
* Since lmc_ok == 0, it will be a NOP for now.
*/
- init_timer (&sc->timer);
+ setup_timer(&sc->timer, lmc_watchdog, (unsigned long)dev);
sc->timer.expires = jiffies + HZ;
- sc->timer.data = (unsigned long) dev;
- sc->timer.function = lmc_watchdog;
add_timer (&sc->timer);
lmc_trace(dev, "lmc_open out");
diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c
index 236c62538036..0cc48902dbb9 100644
--- a/drivers/net/wan/sdla.c
+++ b/drivers/net/wan/sdla.c
@@ -1617,10 +1617,8 @@ static void setup_sdla(struct net_device *dev)
flp->deassoc = sdla_deassoc;
flp->dlci_conf = sdla_dlci_conf;
- init_timer(&flp->timer);
+ setup_timer(&flp->timer, sdla_poll, (unsigned long)dev);
flp->timer.expires = 1;
- flp->timer.data = (unsigned long) dev;
- flp->timer.function = sdla_poll;
}
static struct net_device *sdla;
diff --git a/drivers/net/wireless/ath/ath6kl/txrx.c b/drivers/net/wireless/ath/ath6kl/txrx.c
index e6b2517e6334..4e5cc2b7045a 100644
--- a/drivers/net/wireless/ath/ath6kl/txrx.c
+++ b/drivers/net/wireless/ath/ath6kl/txrx.c
@@ -1753,9 +1753,7 @@ void aggr_conn_init(struct ath6kl_vif *vif, struct aggr_info *aggr_info,
aggr_conn->aggr_sz = AGGR_SZ_DEFAULT;
aggr_conn->dev = vif->ndev;
- init_timer(&aggr_conn->timer);
- aggr_conn->timer.function = aggr_timeout;
- aggr_conn->timer.data = (unsigned long) aggr_conn;
+ setup_timer(&aggr_conn->timer, aggr_timeout, (unsigned long)aggr_conn);
aggr_conn->aggr_info = aggr_info;
aggr_conn->timer_scheduled = false;
diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c
index b68436b23a63..e816d53c2c05 100644
--- a/drivers/net/wireless/atmel/atmel.c
+++ b/drivers/net/wireless/atmel/atmel.c
@@ -1579,11 +1579,10 @@ struct net_device *init_atmel_card(unsigned short irq, unsigned long port,
priv->default_beacon_period = priv->beacon_period = 100;
priv->listen_interval = 1;
- init_timer(&priv->management_timer);
+ setup_timer(&priv->management_timer, atmel_management_timer,
+ (unsigned long)dev);
spin_lock_init(&priv->irqlock);
spin_lock_init(&priv->timerlock);
- priv->management_timer.function = atmel_management_timer;
- priv->management_timer.data = (unsigned long) dev;
dev->netdev_ops = &atmel_netdev_ops;
dev->wireless_handlers = &atmel_handler_def;
diff --git a/drivers/net/wireless/broadcom/b43/phy_g.c b/drivers/net/wireless/broadcom/b43/phy_g.c
index 822dcaa8ace6..f59c02166462 100644
--- a/drivers/net/wireless/broadcom/b43/phy_g.c
+++ b/drivers/net/wireless/broadcom/b43/phy_g.c
@@ -2297,7 +2297,7 @@ static u8 b43_gphy_aci_detect(struct b43_wldev *dev, u8 channel)
static u8 b43_gphy_aci_scan(struct b43_wldev *dev)
{
struct b43_phy *phy = &dev->phy;
- u8 ret[13];
+ u8 ret[13] = { 0 };
unsigned int channel = phy->channel;
unsigned int i, j, start, end;
diff --git a/drivers/net/wireless/broadcom/b43/phy_ht.c b/drivers/net/wireless/broadcom/b43/phy_ht.c
index 718c90e81696..c3158d085c2b 100644
--- a/drivers/net/wireless/broadcom/b43/phy_ht.c
+++ b/drivers/net/wireless/broadcom/b43/phy_ht.c
@@ -119,7 +119,7 @@ static void b43_radio_2059_rcal(struct b43_wldev *dev)
/* Calibrate the internal RC oscillator? */
static void b43_radio_2057_rccal(struct b43_wldev *dev)
{
- const u16 radio_values[3][2] = {
+ static const u16 radio_values[3][2] = {
{ 0x61, 0xE9 }, { 0x69, 0xD5 }, { 0x73, 0x99 },
};
int i;
@@ -154,7 +154,7 @@ static void b43_radio_2059_init_pre(struct b43_wldev *dev)
static void b43_radio_2059_init(struct b43_wldev *dev)
{
- const u16 routing[] = { R2059_C1, R2059_C2, R2059_C3 };
+ static const u16 routing[] = { R2059_C1, R2059_C2, R2059_C3 };
int i;
/* Prepare (reset?) radio */
@@ -263,7 +263,7 @@ static void b43_phy_ht_reset_cca(struct b43_wldev *dev)
static void b43_phy_ht_zero_extg(struct b43_wldev *dev)
{
u8 i, j;
- u16 base[] = { 0x40, 0x60, 0x80 };
+ static const u16 base[] = { 0x40, 0x60, 0x80 };
for (i = 0; i < ARRAY_SIZE(base); i++) {
for (j = 0; j < 4; j++)
diff --git a/drivers/net/wireless/broadcom/b43legacy/radio.c b/drivers/net/wireless/broadcom/b43legacy/radio.c
index 9501420340a9..eab1c9387846 100644
--- a/drivers/net/wireless/broadcom/b43legacy/radio.c
+++ b/drivers/net/wireless/broadcom/b43legacy/radio.c
@@ -280,7 +280,7 @@ u8 b43legacy_radio_aci_detect(struct b43legacy_wldev *dev, u8 channel)
u8 b43legacy_radio_aci_scan(struct b43legacy_wldev *dev)
{
struct b43legacy_phy *phy = &dev->phy;
- u8 ret[13];
+ u8 ret[13] = { 0 };
unsigned int channel = phy->channel;
unsigned int i;
unsigned int j;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 4157c90ad973..4bfd43a80273 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -3261,9 +3261,8 @@ static void brcmf_init_escan(struct brcmf_cfg80211_info *cfg)
brcmf_cfg80211_escan_handler);
cfg->escan_info.escan_state = WL_ESCAN_STATE_IDLE;
/* Init scan_timeout timer */
- init_timer(&cfg->escan_timeout);
- cfg->escan_timeout.data = (unsigned long) cfg;
- cfg->escan_timeout.function = brcmf_escan_timeout;
+ setup_timer(&cfg->escan_timeout, brcmf_escan_timeout,
+ (unsigned long)cfg);
INIT_WORK(&cfg->escan_timeout_work,
brcmf_cfg80211_escan_timeout_worker);
}
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c
index ef72baf6dd96..e7eaa57d11d9 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fweh.c
@@ -257,11 +257,6 @@ static void brcmf_fweh_event_worker(struct work_struct *work)
brcmf_dbg_hex_dump(BRCMF_EVENT_ON(), event->data,
min_t(u32, emsg.datalen, 64),
"event payload, len=%d\n", emsg.datalen);
- if (emsg.datalen > event->datalen) {
- brcmf_err("event invalid length header=%d, msg=%d\n",
- event->datalen, emsg.datalen);
- goto event_free;
- }
/* special handling of interface event */
if (event->code == BRCMF_E_IF) {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index 2ce675ab40ef..1c450c0727cb 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -1853,7 +1853,6 @@ s32 brcmf_p2p_notify_rx_mgmt_p2p_probereq(struct brcmf_if *ifp,
struct afx_hdl *afx_hdl = &p2p->afx_hdl;
struct brcmf_cfg80211_vif *vif = ifp->vif;
struct brcmf_rx_mgmt_data *rxframe = (struct brcmf_rx_mgmt_data *)data;
- u16 chanspec = be16_to_cpu(rxframe->chanspec);
struct brcmu_chan ch;
u8 *mgmt_frame;
u32 mgmt_frame_len;
@@ -1906,7 +1905,7 @@ s32 brcmf_p2p_notify_rx_mgmt_p2p_probereq(struct brcmf_if *ifp,
cfg80211_rx_mgmt(&vif->wdev, freq, 0, mgmt_frame, mgmt_frame_len, 0);
brcmf_dbg(INFO, "mgmt_frame_len (%d) , e->datalen (%d), chanspec (%04x), freq (%d)\n",
- mgmt_frame_len, e->datalen, chanspec, freq);
+ mgmt_frame_len, e->datalen, ch.chspec, freq);
return 0;
}
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 613caca7dc02..5adce0e36fe5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -4144,10 +4144,8 @@ struct brcmf_sdio *brcmf_sdio_probe(struct brcmf_sdio_dev *sdiodev)
init_waitqueue_head(&bus->dcmd_resp_wait);
/* Set up the watchdog timer */
- init_timer(&bus->timer);
- bus->timer.data = (unsigned long)bus;
- bus->timer.function = brcmf_sdio_watchdog;
-
+ setup_timer(&bus->timer, brcmf_sdio_watchdog,
+ (unsigned long)bus);
/* Initialize watchdog thread */
init_completion(&bus->watchdog_wait);
bus->watchdog_tsk = kthread_run(brcmf_sdio_watchdog_thread,
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c
index 1c4e9dd57960..3a13d176b221 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_cmn.c
@@ -1916,7 +1916,7 @@ void wlc_phy_txpower_update_shm(struct brcms_phy *pi)
pi->hwpwr_txcur);
for (j = TXP_FIRST_OFDM; j <= TXP_LAST_OFDM; j++) {
- const u8 ucode_ofdm_rates[] = {
+ static const u8 ucode_ofdm_rates[] = {
0x0c, 0x12, 0x18, 0x24, 0x30, 0x48, 0x60, 0x6c
};
offset = wlapi_bmac_rate_shm_offset(
diff --git a/drivers/net/wireless/intel/iwlegacy/4965-mac.c b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
index de9b6522c43f..65eba2c24292 100644
--- a/drivers/net/wireless/intel/iwlegacy/4965-mac.c
+++ b/drivers/net/wireless/intel/iwlegacy/4965-mac.c
@@ -1480,7 +1480,7 @@ il4965_get_ac_from_tid(u16 tid)
static inline int
il4965_get_fifo_from_tid(u16 tid)
{
- const u8 ac_to_fifo[] = {
+ static const u8 ac_to_fifo[] = {
IL_TX_FIFO_VO,
IL_TX_FIFO_VI,
IL_TX_FIFO_BE,
diff --git a/drivers/net/wireless/intel/iwlwifi/Makefile b/drivers/net/wireless/intel/iwlwifi/Makefile
index 35a32a3ec882..91b90e77d6d9 100644
--- a/drivers/net/wireless/intel/iwlwifi/Makefile
+++ b/drivers/net/wireless/intel/iwlwifi/Makefile
@@ -13,6 +13,7 @@ iwlwifi-objs += iwl-trans.o
iwlwifi-objs += fw/notif-wait.o
iwlwifi-$(CONFIG_IWLMVM) += fw/paging.o fw/smem.o fw/init.o fw/dbg.o
iwlwifi-$(CONFIG_IWLMVM) += fw/common_rx.o fw/nvm.o
+iwlwifi-$(CONFIG_ACPI) += fw/acpi.o
iwlwifi-objs += $(iwlwifi-m)
diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c
index a440140ed8dd..c911d55fde49 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/a000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/a000.c
@@ -134,77 +134,78 @@ static const struct iwl_ht_params iwl_a000_ht_params = {
.rf_id = true, \
.gen2 = true, \
.nvm_type = IWL_NVM_EXT, \
- .dbgc_supported = true
+ .dbgc_supported = true, \
+ .tx_cmd_queue_size = 32
const struct iwl_cfg iwla000_2ac_cfg_hr = {
- .name = "Intel(R) Dual Band Wireless AC a000",
- .fw_name_pre = IWL_A000_HR_FW_PRE,
- IWL_DEVICE_A000,
- .ht_params = &iwl_a000_ht_params,
- .nvm_ver = IWL_A000_NVM_VERSION,
- .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
- .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+ .name = "Intel(R) Dual Band Wireless AC a000",
+ .fw_name_pre = IWL_A000_HR_FW_PRE,
+ IWL_DEVICE_A000,
+ .ht_params = &iwl_a000_ht_params,
+ .nvm_ver = IWL_A000_NVM_VERSION,
+ .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
+ .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
};
const struct iwl_cfg iwla000_2ac_cfg_hr_cdb = {
- .name = "Intel(R) Dual Band Wireless AC a000",
- .fw_name_pre = IWL_A000_HR_CDB_FW_PRE,
- IWL_DEVICE_A000,
- .ht_params = &iwl_a000_ht_params,
- .nvm_ver = IWL_A000_NVM_VERSION,
- .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
- .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
- .cdb = true,
+ .name = "Intel(R) Dual Band Wireless AC a000",
+ .fw_name_pre = IWL_A000_HR_CDB_FW_PRE,
+ IWL_DEVICE_A000,
+ .ht_params = &iwl_a000_ht_params,
+ .nvm_ver = IWL_A000_NVM_VERSION,
+ .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
+ .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+ .cdb = true,
};
const struct iwl_cfg iwla000_2ac_cfg_jf = {
- .name = "Intel(R) Dual Band Wireless AC a000",
- .fw_name_pre = IWL_A000_JF_FW_PRE,
- IWL_DEVICE_A000,
- .ht_params = &iwl_a000_ht_params,
- .nvm_ver = IWL_A000_NVM_VERSION,
- .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
- .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+ .name = "Intel(R) Dual Band Wireless AC a000",
+ .fw_name_pre = IWL_A000_JF_FW_PRE,
+ IWL_DEVICE_A000,
+ .ht_params = &iwl_a000_ht_params,
+ .nvm_ver = IWL_A000_NVM_VERSION,
+ .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
+ .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
};
const struct iwl_cfg iwla000_2ax_cfg_hr = {
- .name = "Intel(R) Dual Band Wireless AX a000",
- .fw_name_pre = IWL_A000_HR_FW_PRE,
- IWL_DEVICE_A000,
- .ht_params = &iwl_a000_ht_params,
- .nvm_ver = IWL_A000_NVM_VERSION,
- .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
- .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+ .name = "Intel(R) Dual Band Wireless AX a000",
+ .fw_name_pre = IWL_A000_HR_FW_PRE,
+ IWL_DEVICE_A000,
+ .ht_params = &iwl_a000_ht_params,
+ .nvm_ver = IWL_A000_NVM_VERSION,
+ .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
+ .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
};
const struct iwl_cfg iwla000_2ax_cfg_qnj_hr_f0 = {
- .name = "Intel(R) Dual Band Wireless AX a000",
- .fw_name_pre = IWL_A000_HR_F0_FW_PRE,
- IWL_DEVICE_A000,
- .ht_params = &iwl_a000_ht_params,
- .nvm_ver = IWL_A000_NVM_VERSION,
- .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
- .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+ .name = "Intel(R) Dual Band Wireless AX a000",
+ .fw_name_pre = IWL_A000_HR_F0_FW_PRE,
+ IWL_DEVICE_A000,
+ .ht_params = &iwl_a000_ht_params,
+ .nvm_ver = IWL_A000_NVM_VERSION,
+ .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
+ .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
};
const struct iwl_cfg iwla000_2ax_cfg_qnj_jf_b0 = {
- .name = "Intel(R) Dual Band Wireless AX a000",
- .fw_name_pre = IWL_A000_JF_B0_FW_PRE,
- IWL_DEVICE_A000,
- .ht_params = &iwl_a000_ht_params,
- .nvm_ver = IWL_A000_NVM_VERSION,
- .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
- .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+ .name = "Intel(R) Dual Band Wireless AX a000",
+ .fw_name_pre = IWL_A000_JF_B0_FW_PRE,
+ IWL_DEVICE_A000,
+ .ht_params = &iwl_a000_ht_params,
+ .nvm_ver = IWL_A000_NVM_VERSION,
+ .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
+ .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
};
const struct iwl_cfg iwla000_2ax_cfg_qnj_hr_a0 = {
- .name = "Intel(R) Dual Band Wireless AX a000",
- .fw_name_pre = IWL_A000_HR_A0_FW_PRE,
- IWL_DEVICE_A000,
- .ht_params = &iwl_a000_ht_params,
- .nvm_ver = IWL_A000_NVM_VERSION,
- .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
- .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
+ .name = "Intel(R) Dual Band Wireless AX a000",
+ .fw_name_pre = IWL_A000_HR_A0_FW_PRE,
+ IWL_DEVICE_A000,
+ .ht_params = &iwl_a000_ht_params,
+ .nvm_ver = IWL_A000_NVM_VERSION,
+ .nvm_calib_ver = IWL_A000_TX_POWER_VERSION,
+ .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K,
};
MODULE_FIRMWARE(IWL_A000_HR_MODULE_FIRMWARE(IWL_A000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
new file mode 100644
index 000000000000..75cae54ea7de
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -0,0 +1,210 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ * Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#include "iwl-drv.h"
+#include "iwl-debug.h"
+#include "acpi.h"
+
+void *iwl_acpi_get_object(struct device *dev, acpi_string method)
+{
+ acpi_handle root_handle;
+ acpi_handle handle;
+ struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL};
+ acpi_status status;
+
+ root_handle = ACPI_HANDLE(dev);
+ if (!root_handle) {
+ IWL_DEBUG_DEV_RADIO(dev,
+ "Could not retrieve root port ACPI handle\n");
+ return ERR_PTR(-ENOENT);
+ }
+
+ /* Get the method's handle */
+ status = acpi_get_handle(root_handle, method, &handle);
+ if (ACPI_FAILURE(status)) {
+ IWL_DEBUG_DEV_RADIO(dev, "%s method not found\n", method);
+ return ERR_PTR(-ENOENT);
+ }
+
+ /* Call the method with no arguments */
+ status = acpi_evaluate_object(handle, NULL, NULL, &buf);
+ if (ACPI_FAILURE(status)) {
+ IWL_DEBUG_DEV_RADIO(dev, "%s invocation failed (0x%x)\n",
+ method, status);
+ return ERR_PTR(-ENOENT);
+ }
+
+ return buf.pointer;
+}
+IWL_EXPORT_SYMBOL(iwl_acpi_get_object);
+
+union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev,
+ union acpi_object *data,
+ int data_size)
+{
+ int i;
+ union acpi_object *wifi_pkg;
+
+ /*
+ * We need at least one entry in the wifi package that
+ * describes the domain, and one more entry, otherwise there's
+ * no point in reading it.
+ */
+ if (WARN_ON_ONCE(data_size < 2))
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * We need at least two packages, one for the revision and one
+ * for the data itself. Also check that the revision is valid
+ * (i.e. it is an integer set to 0).
+ */
+ if (data->type != ACPI_TYPE_PACKAGE ||
+ data->package.count < 2 ||
+ data->package.elements[0].type != ACPI_TYPE_INTEGER ||
+ data->package.elements[0].integer.value != 0) {
+ IWL_DEBUG_DEV_RADIO(dev, "Unsupported packages structure\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* loop through all the packages to find the one for WiFi */
+ for (i = 1; i < data->package.count; i++) {
+ union acpi_object *domain;
+
+ wifi_pkg = &data->package.elements[i];
+
+ /* skip entries that are not a package with the right size */
+ if (wifi_pkg->type != ACPI_TYPE_PACKAGE ||
+ wifi_pkg->package.count != data_size)
+ continue;
+
+ domain = &wifi_pkg->package.elements[0];
+ if (domain->type == ACPI_TYPE_INTEGER &&
+ domain->integer.value == ACPI_WIFI_DOMAIN)
+ goto found;
+ }
+
+ return ERR_PTR(-ENOENT);
+
+found:
+ return wifi_pkg;
+}
+IWL_EXPORT_SYMBOL(iwl_acpi_get_wifi_pkg);
+
+int iwl_acpi_get_mcc(struct device *dev, char *mcc)
+{
+ union acpi_object *wifi_pkg, *data;
+ u32 mcc_val;
+ int ret;
+
+ data = iwl_acpi_get_object(dev, ACPI_WRDD_METHOD);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data, ACPI_WRDD_WIFI_DATA_SIZE);
+ if (IS_ERR(wifi_pkg)) {
+ ret = PTR_ERR(wifi_pkg);
+ goto out_free;
+ }
+
+ if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ mcc_val = wifi_pkg->package.elements[1].integer.value;
+
+ mcc[0] = (mcc_val >> 8) & 0xff;
+ mcc[1] = mcc_val & 0xff;
+ mcc[2] = '\0';
+
+ ret = 0;
+out_free:
+ kfree(data);
+ return ret;
+}
+IWL_EXPORT_SYMBOL(iwl_acpi_get_mcc);
+
+u64 iwl_acpi_get_pwr_limit(struct device *dev)
+{
+ union acpi_object *data, *wifi_pkg;
+ u64 dflt_pwr_limit;
+
+ data = iwl_acpi_get_object(dev, ACPI_SPLC_METHOD);
+ if (IS_ERR(data)) {
+ dflt_pwr_limit = 0;
+ goto out;
+ }
+
+ wifi_pkg = iwl_acpi_get_wifi_pkg(dev, data,
+ ACPI_SPLC_WIFI_DATA_SIZE);
+ if (IS_ERR(wifi_pkg) ||
+ wifi_pkg->package.elements[1].integer.value != ACPI_TYPE_INTEGER) {
+ dflt_pwr_limit = 0;
+ goto out_free;
+ }
+
+ dflt_pwr_limit = wifi_pkg->package.elements[1].integer.value;
+out_free:
+ kfree(data);
+out:
+ return dflt_pwr_limit;
+}
+IWL_EXPORT_SYMBOL(iwl_acpi_get_pwr_limit);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
new file mode 100644
index 000000000000..cb5f32c1d705
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
@@ -0,0 +1,138 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ * Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Deutschland GmbH
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+#ifndef __iwl_fw_acpi__
+#define __iwl_fw_acpi__
+
+#include <linux/acpi.h>
+
+#define ACPI_WRDS_METHOD "WRDS"
+#define ACPI_EWRD_METHOD "EWRD"
+#define ACPI_WGDS_METHOD "WGDS"
+#define ACPI_WRDD_METHOD "WRDD"
+#define ACPI_SPLC_METHOD "SPLC"
+
+#define ACPI_WIFI_DOMAIN (0x07)
+
+#define ACPI_SAR_TABLE_SIZE 10
+#define ACPI_SAR_PROFILE_NUM 4
+
+#define ACPI_GEO_TABLE_SIZE 6
+#define ACPI_NUM_GEO_PROFILES 3
+#define ACPI_GEO_PER_CHAIN_SIZE 3
+
+#define ACPI_SAR_NUM_CHAIN_LIMITS 2
+#define ACPI_SAR_NUM_SUB_BANDS 5
+
+#define ACPI_WRDS_WIFI_DATA_SIZE (ACPI_SAR_TABLE_SIZE + 2)
+#define ACPI_EWRD_WIFI_DATA_SIZE ((ACPI_SAR_PROFILE_NUM - 1) * \
+ ACPI_SAR_TABLE_SIZE + 3)
+#define ACPI_WGDS_WIFI_DATA_SIZE 18
+#define ACPI_WRDD_WIFI_DATA_SIZE 2
+#define ACPI_SPLC_WIFI_DATA_SIZE 2
+
+#define ACPI_WGDS_NUM_BANDS 2
+#define ACPI_WGDS_TABLE_SIZE 3
+
+#ifdef CONFIG_ACPI
+
+void *iwl_acpi_get_object(struct device *dev, acpi_string method);
+union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev,
+ union acpi_object *data,
+ int data_size);
+
+/**
+ * iwl_acpi_get_mcc - read MCC from ACPI, if available
+ *
+ * @dev: the struct device
+ * @mcc: output buffer (3 bytes) that will get the MCC
+ *
+ * This function tries to read the current MCC from ACPI if available.
+ */
+int iwl_acpi_get_mcc(struct device *dev, char *mcc);
+
+u64 iwl_acpi_get_pwr_limit(struct device *dev);
+
+#else /* CONFIG_ACPI */
+
+static inline void *iwl_acpi_get_object(struct device *dev, acpi_string method)
+{
+ return ERR_PTR(-ENOENT);
+}
+
+static inline union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev,
+ union acpi_object *data,
+ int data_size)
+{
+ return ERR_PTR(-ENOENT);
+}
+
+static inline int iwl_acpi_get_mcc(struct device *dev, char *mcc)
+{
+ return -ENOENT;
+}
+
+static inline u64 iwl_acpi_get_pwr_limit(struct device *dev)
+{
+ return 0;
+}
+
+#endif /* CONFIG_ACPI */
+#endif /* __iwl_fw_acpi__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/binding.h b/drivers/net/wireless/intel/iwlwifi/fw/api/binding.h
index d2717fafdf5b..570f19026c91 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/binding.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/binding.h
@@ -116,14 +116,14 @@ struct iwl_binding_cmd {
#define IWL_MVM_MAX_QUOTA 128
/**
- * struct iwl_time_quota_data - configuration of time quota per binding
+ * struct iwl_time_quota_data_v1 - configuration of time quota per binding
* @id_and_color: ID and color of the relevant Binding,
* &enum iwl_ctxt_id_and_color
* @quota: absolute time quota in TU. The scheduler will try to divide the
* remainig quota (after Time Events) according to this quota.
* @max_duration: max uninterrupted context duration in TU
*/
-struct iwl_time_quota_data {
+struct iwl_time_quota_data_v1 {
__le32 id_and_color;
__le32 quota;
__le32 max_duration;
@@ -137,8 +137,43 @@ struct iwl_time_quota_data {
* essentially zero.
* On CDB the fourth one is a regular binding.
*/
+struct iwl_time_quota_cmd_v1 {
+ struct iwl_time_quota_data_v1 quotas[MAX_BINDINGS];
+} __packed; /* TIME_QUOTA_ALLOCATION_CMD_API_S_VER_1 */
+
+enum iwl_quota_low_latency {
+ IWL_QUOTA_LOW_LATENCY_NONE = 0,
+ IWL_QUOTA_LOW_LATENCY_TX = BIT(0),
+ IWL_QUOTA_LOW_LATENCY_RX = BIT(1),
+ IWL_QUOTA_LOW_LATENCY_TX_RX =
+ IWL_QUOTA_LOW_LATENCY_TX | IWL_QUOTA_LOW_LATENCY_RX,
+};
+
+/**
+ * struct iwl_time_quota_data - configuration of time quota per binding
+ * @id_and_color: ID and color of the relevant Binding.
+ * @quota: absolute time quota in TU. The scheduler will try to divide the
+ * remainig quota (after Time Events) according to this quota.
+ * @max_duration: max uninterrupted context duration in TU
+ * @low_latency: low latency status, &enum iwl_quota_low_latency
+ */
+struct iwl_time_quota_data {
+ __le32 id_and_color;
+ __le32 quota;
+ __le32 max_duration;
+ __le32 low_latency;
+} __packed; /* TIME_QUOTA_DATA_API_S_VER_2 */
+
+/**
+ * struct iwl_time_quota_cmd - configuration of time quota between bindings
+ * ( TIME_QUOTA_CMD = 0x2c )
+ * Note: on non-CDB the fourth one is the auxilary mac and is essentially zero.
+ * On CDB the fourth one is a regular binding.
+ *
+ * @quotas: allocations per binding
+ */
struct iwl_time_quota_cmd {
struct iwl_time_quota_data quotas[MAX_BINDINGS];
-} __packed; /* TIME_QUOTA_ALLOCATION_CMD_API_S_VER_1 */
+} __packed; /* TIME_QUOTA_ALLOCATION_CMD_API_S_VER_2 */
#endif /* __iwl_fw_api_binding_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
index 074868394427..7ebbf097488b 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/commands.h
@@ -504,6 +504,7 @@ enum iwl_legacy_cmds {
/**
* @MARKER_CMD: trace marker command, uses &struct iwl_mvm_marker
+ * with &struct iwl_mvm_marker_rsp
*/
MARKER_CMD = 0xcb,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h
index 9f88b61536bc..0a81fb1b6ed4 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h
@@ -278,6 +278,15 @@ struct iwl_mvm_marker {
__le32 metadata[0];
} __packed; /* MARKER_API_S_VER_1 */
+/**
+ * struct iwl_mvm_marker_rsp - Response to marker cmd
+ *
+ * @gp2: The gp2 clock value in the FW
+ */
+struct iwl_mvm_marker_rsp {
+ __le32 gp2;
+} __packed;
+
/* Operation types for the debug mem access */
enum {
DEBUG_MEM_OP_READ = 0,
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h
index 39c89e85fd2f..ec42c84e5df2 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/mac-cfg.h
@@ -68,78 +68,11 @@
*/
enum iwl_mac_conf_subcmd_ids {
/**
- * @LINK_QUALITY_MEASUREMENT_CMD: &struct iwl_link_qual_msrmnt_cmd
- */
- LINK_QUALITY_MEASUREMENT_CMD = 0x1,
-
- /**
- * @LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF:
- * &struct iwl_link_qual_msrmnt_notif
- */
- LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF = 0xFE,
-
- /**
* @CHANNEL_SWITCH_NOA_NOTIF: &struct iwl_channel_switch_noa_notif
*/
CHANNEL_SWITCH_NOA_NOTIF = 0xFF,
};
-#define LQM_NUMBER_OF_STATIONS_IN_REPORT 16
-
-enum iwl_lqm_cmd_operatrions {
- LQM_CMD_OPERATION_START_MEASUREMENT = 0x01,
- LQM_CMD_OPERATION_STOP_MEASUREMENT = 0x02,
-};
-
-enum iwl_lqm_status {
- LQM_STATUS_SUCCESS = 0,
- LQM_STATUS_TIMEOUT = 1,
- LQM_STATUS_ABORT = 2,
-};
-
-/**
- * struct iwl_link_qual_msrmnt_cmd - Link Quality Measurement command
- * @cmd_operation: command operation to be performed (start or stop)
- * as defined above.
- * @mac_id: MAC ID the measurement applies to.
- * @measurement_time: time of the total measurement to be performed, in uSec.
- * @timeout: maximum time allowed until a response is sent, in uSec.
- */
-struct iwl_link_qual_msrmnt_cmd {
- __le32 cmd_operation;
- __le32 mac_id;
- __le32 measurement_time;
- __le32 timeout;
-} __packed /* LQM_CMD_API_S_VER_1 */;
-
-/**
- * struct iwl_link_qual_msrmnt_notif - Link Quality Measurement notification
- *
- * @frequent_stations_air_time: an array containing the total air time
- * (in uSec) used by the most frequently transmitting stations.
- * @number_of_stations: the number of uniqe stations included in the array
- * (a number between 0 to 16)
- * @total_air_time_other_stations: the total air time (uSec) used by all the
- * stations which are not included in the above report.
- * @time_in_measurement_window: the total time in uSec in which a measurement
- * took place.
- * @tx_frame_dropped: the number of TX frames dropped due to retry limit during
- * measurement
- * @mac_id: MAC ID the measurement applies to.
- * @status: return status. may be one of the LQM_STATUS_* defined above.
- * @reserved: reserved.
- */
-struct iwl_link_qual_msrmnt_notif {
- __le32 frequent_stations_air_time[LQM_NUMBER_OF_STATIONS_IN_REPORT];
- __le32 number_of_stations;
- __le32 total_air_time_other_stations;
- __le32 time_in_measurement_window;
- __le32 tx_frame_dropped;
- __le32 mac_id;
- __le32 status;
- u8 reserved[12];
-} __packed; /* LQM_MEASUREMENT_COMPLETE_NTF_API_S_VER1 */
-
/**
* struct iwl_channel_switch_noa_notif - Channel switch NOA notification
*
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h
index a06afb5605d2..a3c77e01863b 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/power.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/power.h
@@ -357,8 +357,7 @@ struct iwl_dev_tx_power_cmd {
u8 reserved[3];
} __packed; /* TX_REDUCED_POWER_API_S_VER_4 */
-#define IWL_NUM_GEO_PROFILES 3
-#define IWL_GEO_PER_CHAIN_SIZE 3
+#define IWL_NUM_GEO_PROFILES 3
/**
* enum iwl_geo_per_chain_offset_operation - type of operation
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
index af369eba3795..dc40cbd52f92 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
@@ -68,9 +68,6 @@
* @STA_FLG_REDUCED_TX_PWR_DATA: reduced TX power (data frames)
* @STA_FLG_DISABLE_TX: set if TX should be disabled
* @STA_FLG_PS: set if STA is in Power Save
- * @STA_FLG_INVALID: set if STA is invalid
- * @STA_FLG_DLP_EN: Direct Link Protocol is enabled
- * @STA_FLG_SET_ALL_KEYS: the current key applies to all key IDs
* @STA_FLG_DRAIN_FLOW: drain flow
* @STA_FLG_PAN: STA is for PAN interface
* @STA_FLG_CLASS_AUTH: station is authenticated
@@ -100,7 +97,6 @@
* @STA_FLG_MIMO_EN_SISO: no support for MIMO
* @STA_FLG_MIMO_EN_MIMO2: 2 streams supported
* @STA_FLG_MIMO_EN_MIMO3: 3 streams supported
- * @STA_FLG_MFP_EN: Management Frame Protection
* @STA_FLG_AGG_MPDU_DENS_MSK: A-MPDU density (mask)
* @STA_FLG_AGG_MPDU_DENS_SHIFT: A-MPDU density (bit shift)
* @STA_FLG_AGG_MPDU_DENS_2US: A-MPDU density (2 usec gap)
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index f5dd7d83cd0a..8106fd4be996 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -93,6 +93,8 @@ static void iwl_read_radio_regs(struct iwl_fw_runtime *fwrt,
unsigned long flags;
int i;
+ IWL_DEBUG_INFO(fwrt, "WRT radio registers dump\n");
+
if (!iwl_trans_grab_nic_access(fwrt->trans, &flags))
return;
@@ -233,6 +235,8 @@ static void iwl_fw_dump_fifos(struct iwl_fw_runtime *fwrt,
unsigned long flags;
int i, j;
+ IWL_DEBUG_INFO(fwrt, "WRT FIFO dump\n");
+
if (!iwl_trans_grab_nic_access(fwrt->trans, &flags))
return;
@@ -476,6 +480,8 @@ static void iwl_dump_prph(struct iwl_trans *trans,
unsigned long flags;
u32 i;
+ IWL_DEBUG_INFO(trans, "WRT PRPH dump\n");
+
if (!iwl_trans_grab_nic_access(trans, &flags))
return;
@@ -559,6 +565,8 @@ void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt)
bool monitor_dump_only = false;
int i;
+ IWL_DEBUG_INFO(fwrt, "WRT dump start\n");
+
/* there's no point in fw dump if the bus is dead */
if (test_bit(STATUS_TRANS_DEAD, &fwrt->trans->status)) {
IWL_ERR(fwrt, "Skip fw error dump since bus is dead\n");
@@ -816,6 +824,9 @@ void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt)
dump_mem->type = fw_dbg_mem[i].data_type;
dump_mem->offset = cpu_to_le32(ofs);
+ IWL_DEBUG_INFO(fwrt, "WRT memory dump. Type=%u\n",
+ dump_mem->type);
+
switch (dump_mem->type & cpu_to_le32(FW_DBG_MEM_TYPE_MASK)) {
case cpu_to_le32(FW_DBG_MEM_TYPE_REGULAR):
iwl_trans_read_mem_bytes(fwrt->trans, ofs,
@@ -841,6 +852,7 @@ void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt)
}
if (smem_len) {
+ IWL_DEBUG_INFO(fwrt, "WRT SMEM dump\n");
dump_data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_MEM);
dump_data->len = cpu_to_le32(smem_len + sizeof(*dump_mem));
dump_mem = (void *)dump_data->data;
@@ -853,6 +865,7 @@ void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt)
}
if (sram2_len) {
+ IWL_DEBUG_INFO(fwrt, "WRT SRAM dump\n");
dump_data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_MEM);
dump_data->len = cpu_to_le32(sram2_len + sizeof(*dump_mem));
dump_mem = (void *)dump_data->data;
@@ -868,6 +881,7 @@ void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt)
if (!fwrt->trans->cfg->gen2 &&
fwrt->fw->img[fwrt->cur_fw_img].paging_mem_size &&
fwrt->fw_paging_db[0].fw_paging_block) {
+ IWL_DEBUG_INFO(fwrt, "WRT paging dump\n");
for (i = 1; i < fwrt->num_of_paging_blk + 1; i++) {
struct iwl_fw_error_dump_paging *paging;
struct page *pages =
@@ -930,6 +944,7 @@ out:
iwl_fw_free_dump_desc(fwrt);
fwrt->dump.trig = NULL;
clear_bit(IWL_FWRT_STATUS_DUMPING, &fwrt->status);
+ IWL_DEBUG_INFO(fwrt, "WRT dump done\n");
}
IWL_EXPORT_SYMBOL(iwl_fw_error_dump);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index 279248cd9cfb..efd7fb65de8b 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -248,6 +248,8 @@ typedef unsigned int __bitwise iwl_ucode_tlv_api_t;
* @IWL_UCODE_TLV_API_NEW_RX_STATS: should new RX STATISTICS API be used
* @IWL_UCODE_TLV_API_ATS_COEX_EXTERNAL: the coex notification is enlared to
* include information about ACL time sharing.
+ * @IWL_UCODE_TLV_API_QUOTA_LOW_LATENCY: Quota command includes a field
+ * indicating low latency direction.
*
* @NUM_IWL_UCODE_TLV_API: number of bits used
*/
@@ -265,6 +267,7 @@ enum iwl_ucode_tlv_api {
IWL_UCODE_TLV_API_NEW_BEACON_TEMPLATE = (__force iwl_ucode_tlv_api_t)34,
IWL_UCODE_TLV_API_NEW_RX_STATS = (__force iwl_ucode_tlv_api_t)35,
IWL_UCODE_TLV_API_COEX_ATS_EXTERNAL = (__force iwl_ucode_tlv_api_t)37,
+ IWL_UCODE_TLV_API_QUOTA_LOW_LATENCY = (__force iwl_ucode_tlv_api_t)38,
NUM_IWL_UCODE_TLV_API
#ifdef __CHECKER__
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index 71cb1ecde0f7..0347a03b6f1b 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -333,6 +333,8 @@ struct iwl_pwr_tx_backoff {
* @gen2: a000 and on transport operation
* @cdb: CDB support
* @nvm_type: see &enum iwl_nvm_type
+ * @tx_cmd_queue_size: size of the cmd queue. If zero, use the same value as
+ * the regular queues
*
* We enable the driver to be backward compatible wrt. hardware features.
* API differences in uCode shouldn't be handled here but through TLVs
@@ -383,6 +385,7 @@ struct iwl_cfg {
gen2:1,
cdb:1,
dbgc_supported:1;
+ u16 tx_cmd_queue_size;
u8 valid_tx_ant;
u8 valid_rx_ant;
u8 non_shared_ant;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-debug.h b/drivers/net/wireless/intel/iwlwifi/iwl-debug.h
index cd77c6971753..c023fcf5d452 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-debug.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-debug.h
@@ -216,6 +216,7 @@ do { \
#define IWL_DEBUG_TX_REPLY(p, f, a...) IWL_DEBUG(p, IWL_DL_TX_REPLY, f, ## a)
#define IWL_DEBUG_TX_QUEUES(p, f, a...) IWL_DEBUG(p, IWL_DL_TX_QUEUES, f, ## a)
#define IWL_DEBUG_RADIO(p, f, a...) IWL_DEBUG(p, IWL_DL_RADIO, f, ## a)
+#define IWL_DEBUG_DEV_RADIO(p, f, a...) IWL_DEBUG_DEV(p, IWL_DL_RADIO, f, ## a)
#define IWL_DEBUG_POWER(p, f, a...) IWL_DEBUG(p, IWL_DL_POWER, f, ## a)
#define IWL_DEBUG_11H(p, f, a...) IWL_DEBUG(p, IWL_DL_11H, f, ## a)
#define IWL_DEBUG_RPM(p, f, a...) IWL_DEBUG(p, IWL_DL_RPM, f, ## a)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index 99676d6c4713..ccdb247d68c5 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -832,7 +832,7 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv,
capa->standard_phy_calibration_size =
le32_to_cpup((__le32 *)tlv_data);
break;
- case IWL_UCODE_TLV_SEC_RT:
+ case IWL_UCODE_TLV_SEC_RT:
iwl_store_ucode_sec(pieces, tlv_data, IWL_UCODE_REGULAR,
tlv_len);
drv->fw.type = IWL_FW_MVM;
@@ -864,7 +864,7 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv,
FW_PHY_CFG_RX_CHAIN) >>
FW_PHY_CFG_RX_CHAIN_POS;
break;
- case IWL_UCODE_TLV_SECURE_SEC_RT:
+ case IWL_UCODE_TLV_SECURE_SEC_RT:
iwl_store_ucode_sec(pieces, tlv_data, IWL_UCODE_REGULAR,
tlv_len);
drv->fw.type = IWL_FW_MVM;
@@ -1335,7 +1335,8 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context)
/* Runtime instructions and 2 copies of data:
* 1) unmodified from disk
- * 2) backup cache for save/restore during power-downs */
+ * 2) backup cache for save/restore during power-downs
+ */
for (i = 0; i < IWL_UCODE_TYPE_MAX; i++)
if (iwl_alloc_ucode(drv, pieces, i))
goto out_free_fw;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index c3a5d8ccc95e..8928613e033e 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -68,13 +68,14 @@
#include <linux/export.h>
#include <linux/etherdevice.h>
#include <linux/pci.h>
-#include <linux/acpi.h>
+
#include "iwl-drv.h"
#include "iwl-modparams.h"
#include "iwl-nvm-parse.h"
#include "iwl-prph.h"
#include "iwl-io.h"
#include "iwl-csr.h"
+#include "fw/acpi.h"
/* NVM offsets (in words) definitions */
enum nvm_offsets {
@@ -937,91 +938,3 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
return regd;
}
IWL_EXPORT_SYMBOL(iwl_parse_nvm_mcc_info);
-
-#ifdef CONFIG_ACPI
-#define WRDD_METHOD "WRDD"
-#define WRDD_WIFI (0x07)
-#define WRDD_WIGIG (0x10)
-
-static u32 iwl_wrdd_get_mcc(struct device *dev, union acpi_object *wrdd)
-{
- union acpi_object *mcc_pkg, *domain_type, *mcc_value;
- u32 i;
-
- if (wrdd->type != ACPI_TYPE_PACKAGE ||
- wrdd->package.count < 2 ||
- wrdd->package.elements[0].type != ACPI_TYPE_INTEGER ||
- wrdd->package.elements[0].integer.value != 0) {
- IWL_DEBUG_EEPROM(dev, "Unsupported wrdd structure\n");
- return 0;
- }
-
- for (i = 1 ; i < wrdd->package.count ; ++i) {
- mcc_pkg = &wrdd->package.elements[i];
-
- if (mcc_pkg->type != ACPI_TYPE_PACKAGE ||
- mcc_pkg->package.count < 2 ||
- mcc_pkg->package.elements[0].type != ACPI_TYPE_INTEGER ||
- mcc_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) {
- mcc_pkg = NULL;
- continue;
- }
-
- domain_type = &mcc_pkg->package.elements[0];
- if (domain_type->integer.value == WRDD_WIFI)
- break;
-
- mcc_pkg = NULL;
- }
-
- if (mcc_pkg) {
- mcc_value = &mcc_pkg->package.elements[1];
- return mcc_value->integer.value;
- }
-
- return 0;
-}
-
-int iwl_get_bios_mcc(struct device *dev, char *mcc)
-{
- acpi_handle root_handle;
- acpi_handle handle;
- struct acpi_buffer wrdd = {ACPI_ALLOCATE_BUFFER, NULL};
- acpi_status status;
- u32 mcc_val;
-
- root_handle = ACPI_HANDLE(dev);
- if (!root_handle) {
- IWL_DEBUG_EEPROM(dev,
- "Could not retrieve root port ACPI handle\n");
- return -ENOENT;
- }
-
- /* Get the method's handle */
- status = acpi_get_handle(root_handle, (acpi_string)WRDD_METHOD,
- &handle);
- if (ACPI_FAILURE(status)) {
- IWL_DEBUG_EEPROM(dev, "WRD method not found\n");
- return -ENOENT;
- }
-
- /* Call WRDD with no arguments */
- status = acpi_evaluate_object(handle, NULL, NULL, &wrdd);
- if (ACPI_FAILURE(status)) {
- IWL_DEBUG_EEPROM(dev, "WRDC invocation failed (0x%x)\n",
- status);
- return -ENOENT;
- }
-
- mcc_val = iwl_wrdd_get_mcc(dev, wrdd.pointer);
- kfree(wrdd.pointer);
- if (!mcc_val)
- return -ENOENT;
-
- mcc[0] = (mcc_val >> 8) & 0xff;
- mcc[1] = mcc_val & 0xff;
- mcc[2] = '\0';
- return 0;
-}
-IWL_EXPORT_SYMBOL(iwl_get_bios_mcc);
-#endif
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
index 2d1a24dd8410..306736c7a042 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h
@@ -109,21 +109,4 @@ struct ieee80211_regdomain *
iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
int num_of_ch, __le32 *channels, u16 fw_mcc);
-#ifdef CONFIG_ACPI
-/**
- * iwl_get_bios_mcc - read MCC from BIOS, if available
- *
- * @dev: the struct device
- * @mcc: output buffer (3 bytes) that will get the MCC
- *
- * This function tries to read the current MCC from ACPI if available.
- */
-int iwl_get_bios_mcc(struct device *dev, char *mcc);
-#else
-static inline int iwl_get_bios_mcc(struct device *dev, char *mcc)
-{
- return -ENOENT;
-}
-#endif
-
#endif /* __iwl_nvm_parse_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c
index 784bdd0ed233..7e9c924e1220 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c
@@ -6,6 +6,7 @@
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -31,6 +32,7 @@
* BSD LICENSE
*
* Copyright(c) 2015 Intel Mobile Communications GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -205,3 +207,17 @@ int iwl_cmd_groups_verify_sorted(const struct iwl_trans_config *trans)
return 0;
}
IWL_EXPORT_SYMBOL(iwl_cmd_groups_verify_sorted);
+
+void iwl_trans_ref(struct iwl_trans *trans)
+{
+ if (trans->ops->ref)
+ trans->ops->ref(trans);
+}
+IWL_EXPORT_SYMBOL(iwl_trans_ref);
+
+void iwl_trans_unref(struct iwl_trans *trans)
+{
+ if (trans->ops->unref)
+ trans->ops->unref(trans);
+}
+IWL_EXPORT_SYMBOL(iwl_trans_unref);
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index e90abbfba718..bba4f54cbbbb 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -749,8 +749,6 @@ struct iwl_trans {
struct lockdep_map sync_cmd_lockdep_map;
#endif
- u64 dflt_pwr_limit;
-
const struct iwl_fw_dbg_dest_tlv *dbg_dest_tlv;
const struct iwl_fw_dbg_conf_tlv *dbg_conf_tlv[FW_DBG_CONF_MAX];
struct iwl_fw_dbg_trigger_tlv * const *dbg_trigger_tlv;
@@ -875,18 +873,6 @@ static inline int iwl_trans_d3_resume(struct iwl_trans *trans,
return trans->ops->d3_resume(trans, status, test, reset);
}
-static inline void iwl_trans_ref(struct iwl_trans *trans)
-{
- if (trans->ops->ref)
- trans->ops->ref(trans);
-}
-
-static inline void iwl_trans_unref(struct iwl_trans *trans)
-{
- if (trans->ops->unref)
- trans->ops->unref(trans);
-}
-
static inline int iwl_trans_suspend(struct iwl_trans *trans)
{
if (!trans->ops->suspend)
@@ -1191,6 +1177,8 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size,
const struct iwl_cfg *cfg,
const struct iwl_trans_ops *ops);
void iwl_trans_free(struct iwl_trans *trans);
+void iwl_trans_ref(struct iwl_trans *trans);
+void iwl_trans_unref(struct iwl_trans *trans);
/*****************************************************
* driver (transport) register/unregister functions
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index b205a7bfb828..b1f73dcabd31 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -664,6 +664,7 @@ static int iwl_mvm_d3_reprogram(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
int ret, i;
struct iwl_binding_cmd binding_cmd = {};
struct iwl_time_quota_cmd quota_cmd = {};
+ struct iwl_time_quota_data *quota;
u32 status;
int size;
@@ -745,17 +746,20 @@ static int iwl_mvm_d3_reprogram(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
return ret;
/* and some quota */
- quota_cmd.quotas[0].id_and_color =
+ quota = iwl_mvm_quota_cmd_get_quota(mvm, &quota_cmd, 0);
+ quota->id_and_color =
cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->phy_ctxt->id,
mvmvif->phy_ctxt->color));
- quota_cmd.quotas[0].quota = cpu_to_le32(IWL_MVM_MAX_QUOTA);
- quota_cmd.quotas[0].max_duration = cpu_to_le32(IWL_MVM_MAX_QUOTA);
+ quota->quota = cpu_to_le32(IWL_MVM_MAX_QUOTA);
+ quota->max_duration = cpu_to_le32(IWL_MVM_MAX_QUOTA);
- for (i = 1; i < MAX_BINDINGS; i++)
- quota_cmd.quotas[i].id_and_color = cpu_to_le32(FW_CTXT_INVALID);
+ for (i = 1; i < MAX_BINDINGS; i++) {
+ quota = iwl_mvm_quota_cmd_get_quota(mvm, &quota_cmd, i);
+ quota->id_and_color = cpu_to_le32(FW_CTXT_INVALID);
+ }
ret = iwl_mvm_send_cmd_pdu(mvm, TIME_QUOTA_CMD, 0,
- sizeof(quota_cmd), &quota_cmd);
+ iwl_mvm_quota_cmd_size(mvm), &quota_cmd);
if (ret)
IWL_ERR(mvm, "Failed to send quota: %d\n", ret);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index 71a01df96f8b..4228fac77f41 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@ -1455,80 +1455,6 @@ static const char * const chanwidths[] = {
[NL80211_CHAN_WIDTH_160] = "vht160",
};
-static bool iwl_mvm_lqm_notif_wait(struct iwl_notif_wait_data *notif_wait,
- struct iwl_rx_packet *pkt, void *data)
-{
- struct ieee80211_vif *vif = data;
- struct iwl_mvm *mvm =
- container_of(notif_wait, struct iwl_mvm, notif_wait);
- struct iwl_link_qual_msrmnt_notif *report = (void *)pkt->data;
- u32 num_of_stations = le32_to_cpu(report->number_of_stations);
- int i;
-
- IWL_INFO(mvm, "LQM report:\n");
- IWL_INFO(mvm, "\tstatus: %d\n", report->status);
- IWL_INFO(mvm, "\tmacID: %d\n", le32_to_cpu(report->mac_id));
- IWL_INFO(mvm, "\ttx_frame_dropped: %d\n",
- le32_to_cpu(report->tx_frame_dropped));
- IWL_INFO(mvm, "\ttime_in_measurement_window: %d us\n",
- le32_to_cpu(report->time_in_measurement_window));
- IWL_INFO(mvm, "\ttotal_air_time_other_stations: %d\n",
- le32_to_cpu(report->total_air_time_other_stations));
- IWL_INFO(mvm, "\tchannel_freq: %d\n",
- vif->bss_conf.chandef.center_freq1);
- IWL_INFO(mvm, "\tchannel_width: %s\n",
- chanwidths[vif->bss_conf.chandef.width]);
- IWL_INFO(mvm, "\tnumber_of_stations: %d\n", num_of_stations);
- for (i = 0; i < num_of_stations; i++)
- IWL_INFO(mvm, "\t\tsta[%d]: %d\n", i,
- report->frequent_stations_air_time[i]);
-
- return true;
-}
-
-static ssize_t iwl_dbgfs_lqm_send_cmd_write(struct ieee80211_vif *vif,
- char *buf, size_t count,
- loff_t *ppos)
-{
- struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
- struct iwl_mvm *mvm = mvmvif->mvm;
- struct iwl_notification_wait wait_lqm_notif;
- static u16 lqm_notif[] = {
- WIDE_ID(MAC_CONF_GROUP,
- LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF)
- };
- int err;
- u32 duration;
- u32 timeout;
-
- if (sscanf(buf, "%d,%d", &duration, &timeout) != 2)
- return -EINVAL;
-
- iwl_init_notification_wait(&mvm->notif_wait, &wait_lqm_notif,
- lqm_notif, ARRAY_SIZE(lqm_notif),
- iwl_mvm_lqm_notif_wait, vif);
- mutex_lock(&mvm->mutex);
- err = iwl_mvm_send_lqm_cmd(vif, LQM_CMD_OPERATION_START_MEASUREMENT,
- duration, timeout);
- mutex_unlock(&mvm->mutex);
-
- if (err) {
- IWL_ERR(mvm, "Failed to send lqm cmdf(err=%d)\n", err);
- iwl_remove_notification(&mvm->notif_wait, &wait_lqm_notif);
- return err;
- }
-
- /* wait for 2 * timeout (safety guard) and convert to jiffies*/
- timeout = msecs_to_jiffies((timeout * 2) / 1000);
-
- err = iwl_wait_notification(&mvm->notif_wait, &wait_lqm_notif,
- timeout);
- if (err)
- IWL_ERR(mvm, "Getting lqm notif timed out\n");
-
- return count;
-}
-
#define MVM_DEBUGFS_WRITE_FILE_OPS(name, bufsz) \
_MVM_DEBUGFS_WRITE_FILE_OPS(name, bufsz, struct ieee80211_vif)
#define MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz) \
@@ -1553,7 +1479,6 @@ MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_range_abort, 32);
MVM_DEBUGFS_READ_FILE_OPS(tof_range_response);
MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_responder_params, 32);
MVM_DEBUGFS_READ_WRITE_FILE_OPS(quota_min, 32);
-MVM_DEBUGFS_WRITE_FILE_OPS(lqm_send_cmd, 64);
MVM_DEBUGFS_READ_FILE_OPS(os_device_timediff);
@@ -1594,7 +1519,6 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
S_IRUSR | S_IWUSR);
MVM_DEBUGFS_ADD_FILE_VIF(quota_min, mvmvif->dbgfs_dir,
S_IRUSR | S_IWUSR);
- MVM_DEBUGFS_ADD_FILE_VIF(lqm_send_cmd, mvmvif->dbgfs_dir, S_IWUSR);
MVM_DEBUGFS_ADD_FILE_VIF(os_device_timediff,
mvmvif->dbgfs_dir, S_IRUSR);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index e97904c2c4d4..2ff594f11259 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -660,6 +660,36 @@ out:
return ret ?: count;
}
+static ssize_t iwl_dbgfs_fw_ver_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct iwl_mvm *mvm = file->private_data;
+ char *buff, *pos, *endpos;
+ static const size_t bufsz = 1024;
+ int ret;
+
+ buff = kmalloc(bufsz, GFP_KERNEL);
+ if (!buff)
+ return -ENOMEM;
+
+ pos = buff;
+ endpos = pos + bufsz;
+
+ pos += scnprintf(pos, endpos - pos, "FW prefix: %s\n",
+ mvm->trans->cfg->fw_name_pre);
+ pos += scnprintf(pos, endpos - pos, "FW: %s\n",
+ mvm->fwrt.fw->human_readable);
+ pos += scnprintf(pos, endpos - pos, "Device: %s\n",
+ mvm->fwrt.trans->cfg->name);
+ pos += scnprintf(pos, endpos - pos, "Bus: %s\n",
+ mvm->fwrt.dev->bus->name);
+
+ ret = simple_read_from_buffer(user_buf, count, ppos, buff, pos - buff);
+ kfree(buff);
+
+ return ret;
+}
+
#define PRINT_STATS_LE32(_struct, _memb) \
pos += scnprintf(buf + pos, bufsz - pos, \
fmt_table, #_memb, \
@@ -1662,6 +1692,7 @@ MVM_DEBUGFS_READ_FILE_OPS(bt_cmd);
MVM_DEBUGFS_READ_WRITE_FILE_OPS(disable_power_off, 64);
MVM_DEBUGFS_READ_FILE_OPS(fw_rx_stats);
MVM_DEBUGFS_READ_FILE_OPS(drv_rx_stats);
+MVM_DEBUGFS_READ_FILE_OPS(fw_ver);
MVM_DEBUGFS_WRITE_FILE_OPS(fw_restart, 10);
MVM_DEBUGFS_WRITE_FILE_OPS(fw_nmi, 10);
MVM_DEBUGFS_WRITE_FILE_OPS(bt_tx_prio, 10);
@@ -1843,6 +1874,7 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir)
MVM_DEBUGFS_ADD_FILE(bt_cmd, dbgfs_dir, S_IRUSR);
MVM_DEBUGFS_ADD_FILE(disable_power_off, mvm->debugfs_dir,
S_IRUSR | S_IWUSR);
+ MVM_DEBUGFS_ADD_FILE(fw_ver, mvm->debugfs_dir, S_IRUSR);
MVM_DEBUGFS_ADD_FILE(fw_rx_stats, mvm->debugfs_dir, S_IRUSR);
MVM_DEBUGFS_ADD_FILE(drv_rx_stats, mvm->debugfs_dir, S_IRUSR);
MVM_DEBUGFS_ADD_FILE(fw_restart, mvm->debugfs_dir, S_IWUSR);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 83485493a79a..f476882291ae 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -66,7 +66,6 @@
*****************************************************************************/
#include <net/mac80211.h>
#include <linux/netdevice.h>
-#include <linux/acpi.h>
#include "iwl-trans.h"
#include "iwl-op-mode.h"
@@ -75,7 +74,7 @@
#include "iwl-csr.h" /* for iwl_mvm_rx_card_state_notif */
#include "iwl-io.h" /* for iwl_mvm_rx_card_state_notif */
#include "iwl-prph.h"
-#include "iwl-eeprom-parse.h"
+#include "fw/acpi.h"
#include "mvm.h"
#include "fw/dbg.h"
@@ -579,17 +578,6 @@ static int iwl_mvm_config_ltr(struct iwl_mvm *mvm)
}
#ifdef CONFIG_ACPI
-#define ACPI_WRDS_METHOD "WRDS"
-#define ACPI_EWRD_METHOD "EWRD"
-#define ACPI_WGDS_METHOD "WGDS"
-#define ACPI_WIFI_DOMAIN (0x07)
-#define ACPI_WRDS_WIFI_DATA_SIZE (IWL_MVM_SAR_TABLE_SIZE + 2)
-#define ACPI_EWRD_WIFI_DATA_SIZE ((IWL_MVM_SAR_PROFILE_NUM - 1) * \
- IWL_MVM_SAR_TABLE_SIZE + 3)
-#define ACPI_WGDS_WIFI_DATA_SIZE 18
-#define ACPI_WGDS_NUM_BANDS 2
-#define ACPI_WGDS_TABLE_SIZE 3
-
static int iwl_mvm_sar_set_profile(struct iwl_mvm *mvm,
union acpi_object *table,
struct iwl_mvm_sar_profile *profile,
@@ -599,7 +587,7 @@ static int iwl_mvm_sar_set_profile(struct iwl_mvm *mvm,
profile->enabled = enabled;
- for (i = 0; i < IWL_MVM_SAR_TABLE_SIZE; i++) {
+ for (i = 0; i < ACPI_SAR_TABLE_SIZE; i++) {
if ((table[i].type != ACPI_TYPE_INTEGER) ||
(table[i].integer.value > U8_MAX))
return -EINVAL;
@@ -610,88 +598,18 @@ static int iwl_mvm_sar_set_profile(struct iwl_mvm *mvm,
return 0;
}
-static union acpi_object *iwl_mvm_sar_find_wifi_pkg(struct iwl_mvm *mvm,
- union acpi_object *data,
- int data_size)
-{
- union acpi_object *wifi_pkg = NULL;
- int i;
-
- /*
- * We need at least two packages, one for the revision and one
- * for the data itself. Also check that the revision is valid
- * (i.e. it is an integer set to 0).
- */
- if (data->type != ACPI_TYPE_PACKAGE ||
- data->package.count < 2 ||
- data->package.elements[0].type != ACPI_TYPE_INTEGER ||
- data->package.elements[0].integer.value != 0) {
- IWL_DEBUG_RADIO(mvm, "Unsupported packages structure\n");
- return ERR_PTR(-EINVAL);
- }
-
- /* loop through all the packages to find the one for WiFi */
- for (i = 1; i < data->package.count; i++) {
- union acpi_object *domain;
-
- wifi_pkg = &data->package.elements[i];
-
- /* Skip anything that is not a package with the right
- * amount of elements (i.e. domain_type,
- * enabled/disabled plus the actual data size.
- */
- if (wifi_pkg->type != ACPI_TYPE_PACKAGE ||
- wifi_pkg->package.count != data_size)
- continue;
-
- domain = &wifi_pkg->package.elements[0];
- if (domain->type == ACPI_TYPE_INTEGER &&
- domain->integer.value == ACPI_WIFI_DOMAIN)
- break;
-
- wifi_pkg = NULL;
- }
-
- if (!wifi_pkg)
- return ERR_PTR(-ENOENT);
-
- return wifi_pkg;
-}
-
static int iwl_mvm_sar_get_wrds_table(struct iwl_mvm *mvm)
{
- union acpi_object *wifi_pkg, *table;
- acpi_handle root_handle;
- acpi_handle handle;
- struct acpi_buffer wrds = {ACPI_ALLOCATE_BUFFER, NULL};
- acpi_status status;
+ union acpi_object *wifi_pkg, *table, *data;
bool enabled;
int ret;
- root_handle = ACPI_HANDLE(mvm->dev);
- if (!root_handle) {
- IWL_DEBUG_RADIO(mvm,
- "Could not retrieve root port ACPI handle\n");
- return -ENOENT;
- }
-
- /* Get the method's handle */
- status = acpi_get_handle(root_handle, (acpi_string)ACPI_WRDS_METHOD,
- &handle);
- if (ACPI_FAILURE(status)) {
- IWL_DEBUG_RADIO(mvm, "WRDS method not found\n");
- return -ENOENT;
- }
-
- /* Call WRDS with no arguments */
- status = acpi_evaluate_object(handle, NULL, NULL, &wrds);
- if (ACPI_FAILURE(status)) {
- IWL_DEBUG_RADIO(mvm, "WRDS invocation failed (0x%x)\n", status);
- return -ENOENT;
- }
+ data = iwl_acpi_get_object(mvm->dev, ACPI_WRDS_METHOD);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
- wifi_pkg = iwl_mvm_sar_find_wifi_pkg(mvm, wrds.pointer,
- ACPI_WRDS_WIFI_DATA_SIZE);
+ wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
+ ACPI_WRDS_WIFI_DATA_SIZE);
if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
@@ -712,46 +630,23 @@ static int iwl_mvm_sar_get_wrds_table(struct iwl_mvm *mvm)
*/
ret = iwl_mvm_sar_set_profile(mvm, table, &mvm->sar_profiles[0],
enabled);
-
out_free:
- kfree(wrds.pointer);
+ kfree(data);
return ret;
}
static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm)
{
- union acpi_object *wifi_pkg;
- acpi_handle root_handle;
- acpi_handle handle;
- struct acpi_buffer ewrd = {ACPI_ALLOCATE_BUFFER, NULL};
- acpi_status status;
+ union acpi_object *wifi_pkg, *data;
bool enabled;
int i, n_profiles, ret;
- root_handle = ACPI_HANDLE(mvm->dev);
- if (!root_handle) {
- IWL_DEBUG_RADIO(mvm,
- "Could not retrieve root port ACPI handle\n");
- return -ENOENT;
- }
-
- /* Get the method's handle */
- status = acpi_get_handle(root_handle, (acpi_string)ACPI_EWRD_METHOD,
- &handle);
- if (ACPI_FAILURE(status)) {
- IWL_DEBUG_RADIO(mvm, "EWRD method not found\n");
- return -ENOENT;
- }
+ data = iwl_acpi_get_object(mvm->dev, ACPI_EWRD_METHOD);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
- /* Call EWRD with no arguments */
- status = acpi_evaluate_object(handle, NULL, NULL, &ewrd);
- if (ACPI_FAILURE(status)) {
- IWL_DEBUG_RADIO(mvm, "EWRD invocation failed (0x%x)\n", status);
- return -ENOENT;
- }
-
- wifi_pkg = iwl_mvm_sar_find_wifi_pkg(mvm, ewrd.pointer,
- ACPI_EWRD_WIFI_DATA_SIZE);
+ wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
+ ACPI_EWRD_WIFI_DATA_SIZE);
if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
@@ -788,55 +683,33 @@ static int iwl_mvm_sar_get_ewrd_table(struct iwl_mvm *mvm)
break;
/* go to the next table */
- pos += IWL_MVM_SAR_TABLE_SIZE;
+ pos += ACPI_SAR_TABLE_SIZE;
}
out_free:
- kfree(ewrd.pointer);
+ kfree(data);
return ret;
}
static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm)
{
- union acpi_object *wifi_pkg;
- acpi_handle root_handle;
- acpi_handle handle;
- struct acpi_buffer wgds = {ACPI_ALLOCATE_BUFFER, NULL};
- acpi_status status;
+ union acpi_object *wifi_pkg, *data;
int i, j, ret;
int idx = 1;
- root_handle = ACPI_HANDLE(mvm->dev);
- if (!root_handle) {
- IWL_DEBUG_RADIO(mvm,
- "Could not retrieve root port ACPI handle\n");
- return -ENOENT;
- }
+ data = iwl_acpi_get_object(mvm->dev, ACPI_WGDS_METHOD);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
- /* Get the method's handle */
- status = acpi_get_handle(root_handle, (acpi_string)ACPI_WGDS_METHOD,
- &handle);
- if (ACPI_FAILURE(status)) {
- IWL_DEBUG_RADIO(mvm, "WGDS method not found\n");
- return -ENOENT;
- }
-
- /* Call WGDS with no arguments */
- status = acpi_evaluate_object(handle, NULL, NULL, &wgds);
- if (ACPI_FAILURE(status)) {
- IWL_DEBUG_RADIO(mvm, "WGDS invocation failed (0x%x)\n", status);
- return -ENOENT;
- }
-
- wifi_pkg = iwl_mvm_sar_find_wifi_pkg(mvm, wgds.pointer,
- ACPI_WGDS_WIFI_DATA_SIZE);
+ wifi_pkg = iwl_acpi_get_wifi_pkg(mvm->dev, data,
+ ACPI_WGDS_WIFI_DATA_SIZE);
if (IS_ERR(wifi_pkg)) {
ret = PTR_ERR(wifi_pkg);
goto out_free;
}
- for (i = 0; i < IWL_NUM_GEO_PROFILES; i++) {
- for (j = 0; j < IWL_MVM_GEO_TABLE_SIZE; j++) {
+ for (i = 0; i < ACPI_NUM_GEO_PROFILES; i++) {
+ for (j = 0; j < ACPI_GEO_TABLE_SIZE; j++) {
union acpi_object *entry;
entry = &wifi_pkg->package.elements[idx++];
@@ -851,7 +724,7 @@ static int iwl_mvm_sar_get_wgds_table(struct iwl_mvm *mvm)
}
ret = 0;
out_free:
- kfree(wgds.pointer);
+ kfree(data);
return ret;
}
@@ -861,25 +734,25 @@ int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b)
.v3.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS),
};
int i, j, idx;
- int profs[IWL_NUM_CHAIN_LIMITS] = { prof_a, prof_b };
+ int profs[ACPI_SAR_NUM_CHAIN_LIMITS] = { prof_a, prof_b };
int len = sizeof(cmd);
- BUILD_BUG_ON(IWL_NUM_CHAIN_LIMITS < 2);
- BUILD_BUG_ON(IWL_NUM_CHAIN_LIMITS * IWL_NUM_SUB_BANDS !=
- IWL_MVM_SAR_TABLE_SIZE);
+ BUILD_BUG_ON(ACPI_SAR_NUM_CHAIN_LIMITS < 2);
+ BUILD_BUG_ON(ACPI_SAR_NUM_CHAIN_LIMITS * ACPI_SAR_NUM_SUB_BANDS !=
+ ACPI_SAR_TABLE_SIZE);
if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TX_POWER_ACK))
len = sizeof(cmd.v3);
- for (i = 0; i < IWL_NUM_CHAIN_LIMITS; i++) {
+ for (i = 0; i < ACPI_SAR_NUM_CHAIN_LIMITS; i++) {
struct iwl_mvm_sar_profile *prof;
/* don't allow SAR to be disabled (profile 0 means disable) */
if (profs[i] == 0)
return -EPERM;
- /* we are off by one, so allow up to IWL_MVM_SAR_PROFILE_NUM */
- if (profs[i] > IWL_MVM_SAR_PROFILE_NUM)
+ /* we are off by one, so allow up to ACPI_SAR_PROFILE_NUM */
+ if (profs[i] > ACPI_SAR_PROFILE_NUM)
return -EINVAL;
/* profiles go from 1 to 4, so decrement to access the array */
@@ -894,8 +767,8 @@ int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b)
}
IWL_DEBUG_RADIO(mvm, " Chain[%d]:\n", i);
- for (j = 0; j < IWL_NUM_SUB_BANDS; j++) {
- idx = (i * IWL_NUM_SUB_BANDS) + j;
+ for (j = 0; j < ACPI_SAR_NUM_SUB_BANDS; j++) {
+ idx = (i * ACPI_SAR_NUM_SUB_BANDS) + j;
cmd.v3.per_chain_restriction[i][j] =
cpu_to_le16(prof->table[idx]);
IWL_DEBUG_RADIO(mvm, " Band[%d] = %d * .125dBm\n",
@@ -931,7 +804,7 @@ int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm)
resp = (void *)cmd.resp_pkt->data;
ret = le32_to_cpu(resp->profile_idx);
- if (WARN_ON(ret > IWL_NUM_GEO_PROFILES)) {
+ if (WARN_ON(ret > ACPI_NUM_GEO_PROFILES)) {
ret = -EIO;
IWL_WARN(mvm, "Invalid geographic profile idx (%d)\n", ret);
}
@@ -959,10 +832,12 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm)
IWL_DEBUG_RADIO(mvm, "Sending GEO_TX_POWER_LIMIT\n");
- BUILD_BUG_ON(IWL_NUM_GEO_PROFILES * ACPI_WGDS_NUM_BANDS *
+ BUILD_BUG_ON(ACPI_NUM_GEO_PROFILES * ACPI_WGDS_NUM_BANDS *
ACPI_WGDS_TABLE_SIZE != ACPI_WGDS_WIFI_DATA_SIZE);
- for (i = 0; i < IWL_NUM_GEO_PROFILES; i++) {
+ BUILD_BUG_ON(ACPI_NUM_GEO_PROFILES > IWL_NUM_GEO_PROFILES);
+
+ for (i = 0; i < ACPI_NUM_GEO_PROFILES; i++) {
struct iwl_per_chain_offset *chain =
(struct iwl_per_chain_offset *)&cmd.table[i];
@@ -970,7 +845,7 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm)
u8 *value;
value = &mvm->geo_profiles[i].values[j *
- IWL_GEO_PER_CHAIN_SIZE];
+ ACPI_GEO_PER_CHAIN_SIZE];
chain[j].max_tx_power = cpu_to_le16(value[0]);
chain[j].chain_a = value[1];
chain[j].chain_b = value[2];
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index a9ac872226fd..0b3d275fe177 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -1899,11 +1899,6 @@ static void iwl_mvm_bss_info_changed_station(struct iwl_mvm *mvm,
if (changes & BSS_CHANGED_ASSOC && bss_conf->assoc)
iwl_mvm_mac_ctxt_recalc_tsf_id(mvm, vif);
- if (changes & BSS_CHANGED_ASSOC && !bss_conf->assoc &&
- mvmvif->lqm_active)
- iwl_mvm_send_lqm_cmd(vif, LQM_CMD_OPERATION_STOP_MEASUREMENT,
- 0, 0);
-
/*
* If we're not associated yet, take the (new) BSSID before associating
* so the firmware knows. If we're already associated, then use the old
@@ -3432,10 +3427,24 @@ static void iwl_mvm_change_chanctx(struct ieee80211_hw *hw,
return;
mutex_lock(&mvm->mutex);
+
+ /* we are only changing the min_width, may be a noop */
+ if (changed == IEEE80211_CHANCTX_CHANGE_MIN_WIDTH) {
+ if (phy_ctxt->width == ctx->min_def.width)
+ goto out_unlock;
+
+ /* we are just toggling between 20_NOHT and 20 */
+ if (phy_ctxt->width <= NL80211_CHAN_WIDTH_20 &&
+ ctx->min_def.width <= NL80211_CHAN_WIDTH_20)
+ goto out_unlock;
+ }
+
iwl_mvm_bt_coex_vif_change(mvm);
iwl_mvm_phy_ctxt_changed(mvm, phy_ctxt, &ctx->min_def,
ctx->rx_chains_static,
ctx->rx_chains_dynamic);
+
+out_unlock:
mutex_unlock(&mvm->mutex);
}
@@ -3900,11 +3909,6 @@ static int iwl_mvm_pre_channel_switch(struct ieee80211_hw *hw,
break;
case NL80211_IFTYPE_STATION:
- if (mvmvif->lqm_active)
- iwl_mvm_send_lqm_cmd(vif,
- LQM_CMD_OPERATION_STOP_MEASUREMENT,
- 0, 0);
-
/* Schedule the time event to a bit before beacon 1,
* to make sure we're in the new channel when the
* GO/AP arrives. In case count <= 1 immediately schedule the
@@ -4259,31 +4263,6 @@ static void iwl_mvm_event_bar_rx_callback(struct iwl_mvm *mvm,
event->u.ba.ssn);
}
-static void
-iwl_mvm_event_frame_timeout_callback(struct iwl_mvm *mvm,
- struct ieee80211_vif *vif,
- const struct ieee80211_event *event)
-{
- struct iwl_fw_dbg_trigger_tlv *trig;
- struct iwl_fw_dbg_trigger_ba *ba_trig;
-
- if (!iwl_fw_dbg_trigger_enabled(mvm->fw, FW_DBG_TRIGGER_BA))
- return;
-
- trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_BA);
- ba_trig = (void *)trig->data;
- if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt,
- ieee80211_vif_to_wdev(vif), trig))
- return;
-
- if (!(le16_to_cpu(ba_trig->frame_timeout) & BIT(event->u.ba.tid)))
- return;
-
- iwl_fw_dbg_collect_trig(&mvm->fwrt, trig,
- "Frame from %pM timed out, tid %d",
- event->u.ba.sta->addr, event->u.ba.tid);
-}
-
static void iwl_mvm_mac_event_callback(struct ieee80211_hw *hw,
struct ieee80211_vif *vif,
const struct ieee80211_event *event)
@@ -4298,7 +4277,8 @@ static void iwl_mvm_mac_event_callback(struct ieee80211_hw *hw,
iwl_mvm_event_bar_rx_callback(mvm, vif, event);
break;
case BA_FRAME_TIMEOUT:
- iwl_mvm_event_frame_timeout_callback(mvm, vif, event);
+ iwl_mvm_event_frame_timeout_callback(mvm, vif, event->u.ba.sta,
+ event->u.ba.tid);
break;
default:
break;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 949e63418299..bf25c3ce7c95 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -89,6 +89,7 @@
#include "tof.h"
#include "fw/runtime.h"
#include "fw/dbg.h"
+#include "fw/acpi.h"
#define IWL_MVM_MAX_ADDRESSES 5
/* RSSI offset for WkP */
@@ -147,6 +148,8 @@ struct iwl_mvm_phy_ctxt {
u16 color;
u32 ref;
+ enum nl80211_chan_width width;
+
/*
* TODO: This should probably be removed. Currently here only for rate
* scaling algorithm
@@ -436,12 +439,6 @@ struct iwl_mvm_vif {
/* TCP Checksum Offload */
netdev_features_t features;
-
- /*
- * link quality measurement - used to check whether this interface
- * is in the middle of a link quality measurement
- */
- bool lqm_active;
};
static inline struct iwl_mvm_vif *
@@ -592,6 +589,7 @@ enum iwl_mvm_tdls_cs_state {
* @queue: queue of this reorder buffer
* @last_amsdu: track last ASMDU SN for duplication detection
* @last_sub_index: track ASMDU sub frame index for duplication detection
+ * @tid: the tid
* @entries: list of skbs stored
* @reorder_time: time the packet was stored in the reorder buffer
* @reorder_timer: timer for frames are in the reorder buffer. For AMSDU
@@ -609,6 +607,7 @@ struct iwl_mvm_reorder_buffer {
int queue;
u16 last_amsdu;
u8 last_sub_index;
+ u8 tid;
struct sk_buff_head entries[IEEE80211_MAX_AMPDU_BUF];
unsigned long reorder_time[IEEE80211_MAX_AMPDU_BUF];
struct timer_list reorder_timer;
@@ -685,20 +684,14 @@ enum iwl_mvm_queue_status {
#define IWL_MVM_NUM_CIPHERS 10
-#ifdef CONFIG_ACPI
-#define IWL_MVM_SAR_TABLE_SIZE 10
-#define IWL_MVM_SAR_PROFILE_NUM 4
-#define IWL_MVM_GEO_TABLE_SIZE 6
-
struct iwl_mvm_sar_profile {
bool enabled;
- u8 table[IWL_MVM_SAR_TABLE_SIZE];
+ u8 table[ACPI_SAR_TABLE_SIZE];
};
struct iwl_mvm_geo_profile {
- u8 values[IWL_MVM_GEO_TABLE_SIZE];
+ u8 values[ACPI_GEO_TABLE_SIZE];
};
-#endif
struct iwl_mvm {
/* for logger access */
@@ -1019,8 +1012,8 @@ struct iwl_mvm {
/* does a monitor vif exist (only one can exist hence bool) */
bool monitor_on;
#ifdef CONFIG_ACPI
- struct iwl_mvm_sar_profile sar_profiles[IWL_MVM_SAR_PROFILE_NUM];
- struct iwl_mvm_geo_profile geo_profiles[IWL_NUM_GEO_PROFILES];
+ struct iwl_mvm_sar_profile sar_profiles[ACPI_SAR_PROFILE_NUM];
+ struct iwl_mvm_geo_profile geo_profiles[ACPI_NUM_GEO_PROFILES];
#endif
};
@@ -1251,6 +1244,12 @@ static inline bool iwl_mvm_has_new_ats_coex_api(struct iwl_mvm *mvm)
IWL_UCODE_TLV_API_COEX_ATS_EXTERNAL);
}
+static inline bool iwl_mvm_has_quota_low_latency(struct iwl_mvm *mvm)
+{
+ return fw_has_api(&mvm->fw->ucode_capa,
+ IWL_UCODE_TLV_API_QUOTA_LOW_LATENCY);
+}
+
static inline struct agg_tx_status *
iwl_mvm_get_agg_status(struct iwl_mvm *mvm, void *tx_resp)
{
@@ -1489,6 +1488,27 @@ int iwl_mvm_binding_add_vif(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
int iwl_mvm_binding_remove_vif(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
/* Quota management */
+static inline size_t iwl_mvm_quota_cmd_size(struct iwl_mvm *mvm)
+{
+ return iwl_mvm_has_quota_low_latency(mvm) ?
+ sizeof(struct iwl_time_quota_cmd) :
+ sizeof(struct iwl_time_quota_cmd_v1);
+}
+
+static inline struct iwl_time_quota_data
+*iwl_mvm_quota_cmd_get_quota(struct iwl_mvm *mvm,
+ struct iwl_time_quota_cmd *cmd,
+ int i)
+{
+ struct iwl_time_quota_data_v1 *quotas;
+
+ if (iwl_mvm_has_quota_low_latency(mvm))
+ return &cmd->quotas[i];
+
+ quotas = (struct iwl_time_quota_data_v1 *)cmd->quotas;
+ return (struct iwl_time_quota_data *)&quotas[i];
+}
+
int iwl_mvm_update_quotas(struct iwl_mvm *mvm, bool force_upload,
struct ieee80211_vif *disabled_vif);
@@ -1821,12 +1841,10 @@ unsigned int iwl_mvm_get_wd_timeout(struct iwl_mvm *mvm,
bool tdls, bool cmd_q);
void iwl_mvm_connection_loss(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
const char *errmsg);
-
-/* Link Quality Measurement */
-int iwl_mvm_send_lqm_cmd(struct ieee80211_vif *vif,
- enum iwl_lqm_cmd_operatrions operation,
- u32 duration, u32 timeout);
-bool iwl_mvm_lqm_active(struct iwl_mvm *mvm);
+void iwl_mvm_event_frame_timeout_callback(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
+ const struct ieee80211_sta *sta,
+ u16 tid);
int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b);
int iwl_mvm_get_sar_geo_profile(struct iwl_mvm *mvm);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
index fb25b6f29323..5bfe5306524c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c
@@ -73,6 +73,7 @@
#include "iwl-eeprom-read.h"
#include "iwl-nvm-parse.h"
#include "iwl-prph.h"
+#include "fw/acpi.h"
/* Default NVM size to read */
#define IWL_NVM_DEFAULT_CHUNK_SIZE (2*1024)
@@ -784,7 +785,7 @@ int iwl_mvm_init_mcc(struct iwl_mvm *mvm)
return -EIO;
if (iwl_mvm_is_wifi_mcc_supported(mvm) &&
- !iwl_get_bios_mcc(mvm->dev, mcc)) {
+ !iwl_acpi_get_mcc(mvm->dev, mcc)) {
kfree(regd);
regd = iwl_mvm_get_regdomain(mvm->hw->wiphy, mcc,
MCC_SOURCE_BIOS, NULL);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 231878969332..e82b4462722b 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -86,6 +86,7 @@
#include "time-event.h"
#include "fw-api.h"
#include "fw/api/scan.h"
+#include "fw/acpi.h"
#define DRV_DESCRIPTION "The new Intel(R) wireless AGN driver for Linux"
MODULE_DESCRIPTION(DRV_DESCRIPTION);
@@ -423,8 +424,6 @@ static const struct iwl_hcmd_names iwl_mvm_system_names[] = {
* Access is done through binary search
*/
static const struct iwl_hcmd_names iwl_mvm_mac_conf_names[] = {
- HCMD_NAME(LINK_QUALITY_MEASUREMENT_CMD),
- HCMD_NAME(LINK_QUALITY_MEASUREMENT_COMPLETE_NOTIF),
HCMD_NAME(CHANNEL_SWITCH_NOA_NOTIF),
};
@@ -490,18 +489,21 @@ static const struct iwl_hcmd_arr iwl_mvm_groups[] = {
static void iwl_mvm_async_handlers_wk(struct work_struct *wk);
static void iwl_mvm_d0i3_exit_work(struct work_struct *wk);
-static u32 calc_min_backoff(struct iwl_trans *trans, const struct iwl_cfg *cfg)
+static u32 iwl_mvm_min_backoff(struct iwl_mvm *mvm)
{
- const struct iwl_pwr_tx_backoff *pwr_tx_backoff = cfg->pwr_tx_backoffs;
+ const struct iwl_pwr_tx_backoff *backoff = mvm->cfg->pwr_tx_backoffs;
+ u64 dflt_pwr_limit;
- if (!pwr_tx_backoff)
+ if (!backoff)
return 0;
- while (pwr_tx_backoff->pwr) {
- if (trans->dflt_pwr_limit >= pwr_tx_backoff->pwr)
- return pwr_tx_backoff->backoff;
+ dflt_pwr_limit = iwl_acpi_get_pwr_limit(mvm->dev);
- pwr_tx_backoff++;
+ while (backoff->pwr) {
+ if (dflt_pwr_limit >= backoff->pwr)
+ return backoff->backoff;
+
+ backoff++;
}
return 0;
@@ -771,7 +773,7 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
goto out_free;
mvm->hw_registered = true;
- min_backoff = calc_min_backoff(trans, cfg);
+ min_backoff = iwl_mvm_min_backoff(mvm);
iwl_mvm_thermal_initialize(mvm, min_backoff);
err = iwl_mvm_dbgfs_register(mvm, dbgfs_dir);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
index 7ee8e9077baf..305cd56bf746 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/phy-ctxt.c
@@ -272,6 +272,7 @@ int iwl_mvm_phy_ctxt_changed(struct iwl_mvm *mvm, struct iwl_mvm_phy_ctxt *ctxt,
}
ctxt->channel = chandef->chan;
+ ctxt->width = chandef->width;
return iwl_mvm_phy_ctxt_apply(mvm, ctxt, chandef,
chains_static, chains_dynamic,
action, 0);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/quota.c b/drivers/net/wireless/intel/iwlwifi/mvm/quota.c
index 2141db5bff82..b4a0264329b7 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/quota.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/quota.c
@@ -7,7 +7,7 @@
*
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
- * Copyright(c) 2016 Intel Deutschland GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
@@ -34,7 +34,7 @@
*
* Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
* Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
- * Copyright(c) 2016 Intel Deutschland GmbH
+ * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -164,9 +164,12 @@ static void iwl_mvm_adjust_quota_for_noa(struct iwl_mvm *mvm,
beacon_int = mvm->noa_vif->bss_conf.beacon_int;
for (i = 0; i < MAX_BINDINGS; i++) {
- u32 id_n_c = le32_to_cpu(cmd->quotas[i].id_and_color);
+ struct iwl_time_quota_data *data =
+ iwl_mvm_quota_cmd_get_quota(mvm, cmd,
+ i);
+ u32 id_n_c = le32_to_cpu(data->id_and_color);
u32 id = (id_n_c & FW_CTXT_ID_MSK) >> FW_CTXT_ID_POS;
- u32 quota = le32_to_cpu(cmd->quotas[i].quota);
+ u32 quota = le32_to_cpu(data->quota);
if (id != phy_id)
continue;
@@ -175,9 +178,9 @@ static void iwl_mvm_adjust_quota_for_noa(struct iwl_mvm *mvm,
quota /= beacon_int;
IWL_DEBUG_QUOTA(mvm, "quota: adjust for NoA from %d to %d\n",
- le32_to_cpu(cmd->quotas[i].quota), quota);
+ le32_to_cpu(data->quota), quota);
- cmd->quotas[i].quota = cpu_to_le32(quota);
+ data->quota = cpu_to_le32(quota);
}
#endif
}
@@ -194,6 +197,7 @@ int iwl_mvm_update_quotas(struct iwl_mvm *mvm,
.disabled_vif = disabled_vif,
};
struct iwl_time_quota_cmd *last = &mvm->last_quota_cmd;
+ struct iwl_time_quota_data *qdata, *last_data;
bool send = false;
lockdep_assert_held(&mvm->mutex);
@@ -216,7 +220,8 @@ int iwl_mvm_update_quotas(struct iwl_mvm *mvm,
*/
num_active_macs = 0;
for (i = 0; i < MAX_BINDINGS; i++) {
- cmd.quotas[i].id_and_color = cpu_to_le32(FW_CTXT_INVALID);
+ qdata = iwl_mvm_quota_cmd_get_quota(mvm, &cmd, i);
+ qdata->id_and_color = cpu_to_le32(FW_CTXT_INVALID);
num_active_macs += data.n_interfaces[i];
}
@@ -265,14 +270,16 @@ int iwl_mvm_update_quotas(struct iwl_mvm *mvm,
if (data.colors[i] < 0)
continue;
- cmd.quotas[idx].id_and_color =
+ qdata = iwl_mvm_quota_cmd_get_quota(mvm, &cmd, idx);
+
+ qdata->id_and_color =
cpu_to_le32(FW_CMD_ID_AND_COLOR(i, data.colors[i]));
if (data.n_interfaces[i] <= 0)
- cmd.quotas[idx].quota = cpu_to_le32(0);
+ qdata->quota = cpu_to_le32(0);
#ifdef CONFIG_IWLWIFI_DEBUGFS
else if (data.dbgfs_min[i])
- cmd.quotas[idx].quota =
+ qdata->quota =
cpu_to_le32(data.dbgfs_min[i] * QUOTA_100 / 100);
#endif
else if (data.n_low_latency_bindings == 1 && n_non_lowlat &&
@@ -283,24 +290,25 @@ int iwl_mvm_update_quotas(struct iwl_mvm *mvm,
* the minimal required quota for the low latency
* binding.
*/
- cmd.quotas[idx].quota = cpu_to_le32(QUOTA_LOWLAT_MIN);
+ qdata->quota = cpu_to_le32(QUOTA_LOWLAT_MIN);
else
- cmd.quotas[idx].quota =
+ qdata->quota =
cpu_to_le32(quota * data.n_interfaces[i]);
- WARN_ONCE(le32_to_cpu(cmd.quotas[idx].quota) > QUOTA_100,
+ WARN_ONCE(le32_to_cpu(qdata->quota) > QUOTA_100,
"Binding=%d, quota=%u > max=%u\n",
- idx, le32_to_cpu(cmd.quotas[idx].quota), QUOTA_100);
+ idx, le32_to_cpu(qdata->quota), QUOTA_100);
- cmd.quotas[idx].max_duration = cpu_to_le32(0);
+ qdata->max_duration = cpu_to_le32(0);
idx++;
}
/* Give the remainder of the session to the first data binding */
for (i = 0; i < MAX_BINDINGS; i++) {
- if (le32_to_cpu(cmd.quotas[i].quota) != 0) {
- le32_add_cpu(&cmd.quotas[i].quota, quota_rem);
+ qdata = iwl_mvm_quota_cmd_get_quota(mvm, &cmd, i);
+ if (le32_to_cpu(qdata->quota) != 0) {
+ le32_add_cpu(&qdata->quota, quota_rem);
IWL_DEBUG_QUOTA(mvm,
"quota: giving remainder of %d to binding %d\n",
quota_rem, i);
@@ -312,17 +320,19 @@ int iwl_mvm_update_quotas(struct iwl_mvm *mvm,
/* check that we have non-zero quota for all valid bindings */
for (i = 0; i < MAX_BINDINGS; i++) {
- if (cmd.quotas[i].id_and_color != last->quotas[i].id_and_color)
+ qdata = iwl_mvm_quota_cmd_get_quota(mvm, &cmd, i);
+ last_data = iwl_mvm_quota_cmd_get_quota(mvm, last, i);
+ if (qdata->id_and_color != last_data->id_and_color)
send = true;
- if (cmd.quotas[i].max_duration != last->quotas[i].max_duration)
+ if (qdata->max_duration != last_data->max_duration)
send = true;
- if (abs((int)le32_to_cpu(cmd.quotas[i].quota) -
- (int)le32_to_cpu(last->quotas[i].quota))
+ if (abs((int)le32_to_cpu(qdata->quota) -
+ (int)le32_to_cpu(last_data->quota))
> IWL_MVM_QUOTA_THRESHOLD)
send = true;
- if (cmd.quotas[i].id_and_color == cpu_to_le32(FW_CTXT_INVALID))
+ if (qdata->id_and_color == cpu_to_le32(FW_CTXT_INVALID))
continue;
- WARN_ONCE(cmd.quotas[i].quota == 0,
+ WARN_ONCE(qdata->quota == 0,
"zero quota on binding %d\n", i);
}
@@ -334,7 +344,8 @@ int iwl_mvm_update_quotas(struct iwl_mvm *mvm,
return 0;
}
- err = iwl_mvm_send_cmd_pdu(mvm, TIME_QUOTA_CMD, 0, sizeof(cmd), &cmd);
+ err = iwl_mvm_send_cmd_pdu(mvm, TIME_QUOTA_CMD, 0,
+ iwl_mvm_quota_cmd_size(mvm), &cmd);
if (err)
IWL_ERR(mvm, "Failed to send quota: %d\n", err);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
index 2d14a58cbdd7..d1a40688d5e1 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rx.c
@@ -663,11 +663,10 @@ void iwl_mvm_handle_rx_statistics(struct iwl_mvm *mvm,
expected_size = sizeof(struct iwl_notif_statistics_cdb);
}
- if (iwl_rx_packet_payload_len(pkt) != expected_size) {
- IWL_ERR(mvm, "received invalid statistics size (%d)!\n",
- iwl_rx_packet_payload_len(pkt));
+ if (WARN_ONCE(iwl_rx_packet_payload_len(pkt) != expected_size,
+ "received invalid statistics size (%d)!\n",
+ iwl_rx_packet_payload_len(pkt)))
return;
- }
if (!iwl_mvm_has_new_rx_stats_api(mvm)) {
struct iwl_notif_statistics_v11 *stats = (void *)&pkt->data;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 248699c2c4bf..5e679859f948 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@ -494,13 +494,18 @@ void iwl_mvm_reorder_timer_expired(unsigned long data)
if (expired) {
struct ieee80211_sta *sta;
+ struct iwl_mvm_sta *mvmsta;
rcu_read_lock();
sta = rcu_dereference(buf->mvm->fw_id_to_mac_id[buf->sta_id]);
+ mvmsta = iwl_mvm_sta_from_mac80211(sta);
+
/* SN is set to the last expired frame + 1 */
IWL_DEBUG_HT(buf->mvm,
"Releasing expired frames for sta %u, sn %d\n",
buf->sta_id, sn);
+ iwl_mvm_event_frame_timeout_callback(buf->mvm, mvmsta->vif,
+ sta, buf->tid);
iwl_mvm_release_frames(buf->mvm, sta, NULL, buf, sn);
rcu_read_unlock();
} else {
@@ -621,7 +626,8 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm,
return false;
/* no sta yet */
- if (WARN_ON(IS_ERR_OR_NULL(sta)))
+ if (WARN_ONCE(IS_ERR_OR_NULL(sta),
+ "Got valid BAID without a valid station assigned\n"))
return false;
mvm_sta = iwl_mvm_sta_from_mac80211(sta);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index c4a343534c5e..282424f40c43 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -2158,6 +2158,7 @@ static void iwl_mvm_init_reorder_buffer(struct iwl_mvm *mvm,
reorder_buf->mvm = mvm;
reorder_buf->queue = i;
reorder_buf->sta_id = sta_id;
+ reorder_buf->tid = data->tid;
reorder_buf->valid = false;
for (j = 0; j < reorder_buf->buf_size; j++)
__skb_queue_head_init(&reorder_buf->entries[j]);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index 2ea74abad73d..2da1b088ac01 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -1368,6 +1368,31 @@ void iwl_mvm_inactivity_check(struct iwl_mvm *mvm)
rcu_read_unlock();
}
+void iwl_mvm_event_frame_timeout_callback(struct iwl_mvm *mvm,
+ struct ieee80211_vif *vif,
+ const struct ieee80211_sta *sta,
+ u16 tid)
+{
+ struct iwl_fw_dbg_trigger_tlv *trig;
+ struct iwl_fw_dbg_trigger_ba *ba_trig;
+
+ if (!iwl_fw_dbg_trigger_enabled(mvm->fw, FW_DBG_TRIGGER_BA))
+ return;
+
+ trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_BA);
+ ba_trig = (void *)trig->data;
+ if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt,
+ ieee80211_vif_to_wdev(vif), trig))
+ return;
+
+ if (!(le16_to_cpu(ba_trig->frame_timeout) & BIT(tid)))
+ return;
+
+ iwl_fw_dbg_collect_trig(&mvm->fwrt, trig,
+ "Frame from %pM timed out, tid %d",
+ sta->addr, tid);
+}
+
void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime)
{
bool ps_disabled;
@@ -1389,74 +1414,3 @@ void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime)
iwl_mvm_power_update_device(mvm);
}
}
-
-int iwl_mvm_send_lqm_cmd(struct ieee80211_vif *vif,
- enum iwl_lqm_cmd_operatrions operation,
- u32 duration, u32 timeout)
-{
- struct iwl_mvm_vif *mvm_vif = iwl_mvm_vif_from_mac80211(vif);
- struct iwl_link_qual_msrmnt_cmd cmd = {
- .cmd_operation = cpu_to_le32(operation),
- .mac_id = cpu_to_le32(mvm_vif->id),
- .measurement_time = cpu_to_le32(duration),
- .timeout = cpu_to_le32(timeout),
- };
- u32 cmdid =
- iwl_cmd_id(LINK_QUALITY_MEASUREMENT_CMD, MAC_CONF_GROUP, 0);
- int ret;
-
- if (!fw_has_capa(&mvm_vif->mvm->fw->ucode_capa,
- IWL_UCODE_TLV_CAPA_LQM_SUPPORT))
- return -EOPNOTSUPP;
-
- if (vif->type != NL80211_IFTYPE_STATION || vif->p2p)
- return -EINVAL;
-
- switch (operation) {
- case LQM_CMD_OPERATION_START_MEASUREMENT:
- if (iwl_mvm_lqm_active(mvm_vif->mvm))
- return -EBUSY;
- if (!vif->bss_conf.assoc)
- return -EINVAL;
- mvm_vif->lqm_active = true;
- break;
- case LQM_CMD_OPERATION_STOP_MEASUREMENT:
- if (!iwl_mvm_lqm_active(mvm_vif->mvm))
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
-
- ret = iwl_mvm_send_cmd_pdu(mvm_vif->mvm, cmdid, 0, sizeof(cmd),
- &cmd);
-
- /* command failed - roll back lqm_active state */
- if (ret) {
- mvm_vif->lqm_active =
- operation == LQM_CMD_OPERATION_STOP_MEASUREMENT;
- }
-
- return ret;
-}
-
-static void iwl_mvm_lqm_active_iterator(void *_data, u8 *mac,
- struct ieee80211_vif *vif)
-{
- struct iwl_mvm_vif *mvm_vif = iwl_mvm_vif_from_mac80211(vif);
- bool *lqm_active = _data;
-
- *lqm_active = *lqm_active || mvm_vif->lqm_active;
-}
-
-bool iwl_mvm_lqm_active(struct iwl_mvm *mvm)
-{
- bool ret = false;
-
- lockdep_assert_held(&mvm->mutex);
- ieee80211_iterate_active_interfaces_atomic(
- mvm->hw, IEEE80211_IFACE_ITER_NORMAL,
- iwl_mvm_lqm_active_iterator, &ret);
-
- return ret;
-}
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
index 3fc4343581ee..5ef216f3a60b 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
@@ -244,7 +244,7 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans,
ctxt_info->hcmd_cfg.cmd_queue_addr =
cpu_to_le64(trans_pcie->txq[trans_pcie->cmd_queue]->dma_addr);
ctxt_info->hcmd_cfg.cmd_queue_size =
- TFD_QUEUE_CB_SIZE(TFD_CMD_SLOTS);
+ TFD_QUEUE_CB_SIZE(trans_pcie->tx_cmd_queue_size);
/* allocate ucode sections in dram and set addresses */
ret = iwl_pcie_ctxt_info_init_fw_sec(trans, fw, ctxt_info);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 858765fed8f8..ce7254ec0514 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -73,6 +73,8 @@
#include <linux/pci-aspm.h>
#include <linux/acpi.h>
+#include "fw/acpi.h"
+
#include "iwl-trans.h"
#include "iwl-drv.h"
#include "internal.h"
@@ -576,7 +578,8 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
{IWL_PCI_DEVICE(0x2720, 0x0000, iwla000_2ax_cfg_hr)},
{IWL_PCI_DEVICE(0x34F0, 0x0070, iwla000_2ax_cfg_hr)},
{IWL_PCI_DEVICE(0x2720, 0x0078, iwla000_2ax_cfg_hr)},
- {IWL_PCI_DEVICE(0x2720, 0x0070, iwla000_2ax_cfg_hr)},
+ {IWL_PCI_DEVICE(0x2720, 0x0070, iwla000_2ac_cfg_hr_cdb)},
+ {IWL_PCI_DEVICE(0x2720, 0x0030, iwla000_2ac_cfg_hr_cdb)},
{IWL_PCI_DEVICE(0x2720, 0x1080, iwla000_2ax_cfg_hr)},
#endif /* CONFIG_IWLMVM */
@@ -584,99 +587,6 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
};
MODULE_DEVICE_TABLE(pci, iwl_hw_card_ids);
-#ifdef CONFIG_ACPI
-#define ACPI_SPLC_METHOD "SPLC"
-#define ACPI_SPLC_DOMAIN_WIFI (0x07)
-
-static u64 splc_get_pwr_limit(struct iwl_trans *trans, union acpi_object *splc)
-{
- union acpi_object *data_pkg, *dflt_pwr_limit;
- int i;
-
- /* We need at least two elements, one for the revision and one
- * for the data itself. Also check that the revision is
- * supported (currently only revision 0).
- */
- if (splc->type != ACPI_TYPE_PACKAGE ||
- splc->package.count < 2 ||
- splc->package.elements[0].type != ACPI_TYPE_INTEGER ||
- splc->package.elements[0].integer.value != 0) {
- IWL_DEBUG_INFO(trans,
- "Unsupported structure returned by the SPLC method. Ignoring.\n");
- return 0;
- }
-
- /* loop through all the packages to find the one for WiFi */
- for (i = 1; i < splc->package.count; i++) {
- union acpi_object *domain;
-
- data_pkg = &splc->package.elements[i];
-
- /* Skip anything that is not a package with the right
- * amount of elements (i.e. at least 2 integers).
- */
- if (data_pkg->type != ACPI_TYPE_PACKAGE ||
- data_pkg->package.count < 2 ||
- data_pkg->package.elements[0].type != ACPI_TYPE_INTEGER ||
- data_pkg->package.elements[1].type != ACPI_TYPE_INTEGER)
- continue;
-
- domain = &data_pkg->package.elements[0];
- if (domain->integer.value == ACPI_SPLC_DOMAIN_WIFI)
- break;
-
- data_pkg = NULL;
- }
-
- if (!data_pkg) {
- IWL_DEBUG_INFO(trans,
- "No element for the WiFi domain returned by the SPLC method.\n");
- return 0;
- }
-
- dflt_pwr_limit = &data_pkg->package.elements[1];
- return dflt_pwr_limit->integer.value;
-}
-
-static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev)
-{
- acpi_handle pxsx_handle;
- acpi_handle handle;
- struct acpi_buffer splc = {ACPI_ALLOCATE_BUFFER, NULL};
- acpi_status status;
-
- pxsx_handle = ACPI_HANDLE(&pdev->dev);
- if (!pxsx_handle) {
- IWL_DEBUG_INFO(trans,
- "Could not retrieve root port ACPI handle\n");
- return;
- }
-
- /* Get the method's handle */
- status = acpi_get_handle(pxsx_handle, (acpi_string)ACPI_SPLC_METHOD,
- &handle);
- if (ACPI_FAILURE(status)) {
- IWL_DEBUG_INFO(trans, "SPLC method not found\n");
- return;
- }
-
- /* Call SPLC with no arguments */
- status = acpi_evaluate_object(handle, NULL, NULL, &splc);
- if (ACPI_FAILURE(status)) {
- IWL_ERR(trans, "SPLC invocation failed (0x%x)\n", status);
- return;
- }
-
- trans->dflt_pwr_limit = splc_get_pwr_limit(trans, splc.pointer);
- IWL_DEBUG_INFO(trans, "Default power limit set to %lld\n",
- trans->dflt_pwr_limit);
- kfree(splc.pointer);
-}
-
-#else /* CONFIG_ACPI */
-static void set_dflt_pwr_limit(struct iwl_trans *trans, struct pci_dev *pdev) {}
-#endif
-
/* PCI registers */
#define PCI_CFG_RETRY_TIMEOUT 0x041
@@ -740,8 +650,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto out_free_trans;
}
- set_dflt_pwr_limit(iwl_trans, pdev);
-
/* register transport layer debugfs here */
ret = iwl_trans_pcie_dbgfs_register(iwl_trans);
if (ret)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index 4fb7647995c3..d749abeca3ae 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -383,6 +383,7 @@ struct iwl_self_init_dram {
* @hw_init_mask: initial unmasked hw causes
* @fh_mask: current unmasked fh causes
* @hw_mask: current unmasked hw causes
+ * @tx_cmd_queue_size: the size of the tx command queue
*/
struct iwl_trans_pcie {
struct iwl_rxq *rxq;
@@ -442,6 +443,7 @@ struct iwl_trans_pcie {
bool bc_table_dword;
bool scd_set_active;
bool sw_csum_tx;
+ bool pcie_dbg_dumped_once;
u32 rx_page_order;
/*protect hw register */
@@ -463,6 +465,7 @@ struct iwl_trans_pcie {
u32 fh_mask;
u32 hw_mask;
cpumask_t affinity_mask[IWL_MAX_RX_HW_QUEUES];
+ u16 tx_cmd_queue_size;
};
static inline struct iwl_trans_pcie *
@@ -534,6 +537,7 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
struct sk_buff_head *skbs);
void iwl_trans_pcie_tx_reset(struct iwl_trans *trans);
+void iwl_pcie_set_tx_cmd_queue_size(struct iwl_trans *trans);
static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_trans *trans, void *_tfd,
u8 idx)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 2e3e013ec95a..0008ea323be3 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -88,6 +88,93 @@
#define IWL_FW_MEM_EXTENDED_START 0x40000
#define IWL_FW_MEM_EXTENDED_END 0x57FFF
+static void iwl_trans_pcie_err_dump(struct iwl_trans *trans)
+{
+#define PCI_DUMP_SIZE 64
+#define PREFIX_LEN 32
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ struct pci_dev *pdev = trans_pcie->pci_dev;
+ u32 i, pos, alloc_size, *ptr, *buf;
+ char *prefix;
+
+ if (trans_pcie->pcie_dbg_dumped_once)
+ return;
+
+ /* Should be a multiple of 4 */
+ BUILD_BUG_ON(PCI_DUMP_SIZE > 4096 || PCI_DUMP_SIZE & 0x3);
+ /* Alloc a max size buffer */
+ if (PCI_ERR_ROOT_ERR_SRC + 4 > PCI_DUMP_SIZE)
+ alloc_size = PCI_ERR_ROOT_ERR_SRC + 4 + PREFIX_LEN;
+ else
+ alloc_size = PCI_DUMP_SIZE + PREFIX_LEN;
+ buf = kmalloc(alloc_size, GFP_ATOMIC);
+ if (!buf)
+ return;
+ prefix = (char *)buf + alloc_size - PREFIX_LEN;
+
+ IWL_ERR(trans, "iwlwifi transaction failed, dumping registers\n");
+
+ /* Print wifi device registers */
+ sprintf(prefix, "iwlwifi %s: ", pci_name(pdev));
+ IWL_ERR(trans, "iwlwifi device config registers:\n");
+ for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++)
+ if (pci_read_config_dword(pdev, i, ptr))
+ goto err_read;
+ print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+
+ IWL_ERR(trans, "iwlwifi device memory mapped registers:\n");
+ for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++)
+ *ptr = iwl_read32(trans, i);
+ print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+ if (pos) {
+ IWL_ERR(trans, "iwlwifi device AER capability structure:\n");
+ for (i = 0, ptr = buf; i < PCI_ERR_ROOT_COMMAND; i += 4, ptr++)
+ if (pci_read_config_dword(pdev, pos + i, ptr))
+ goto err_read;
+ print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET,
+ 32, 4, buf, i, 0);
+ }
+
+ /* Print parent device registers next */
+ if (!pdev->bus->self)
+ goto out;
+
+ pdev = pdev->bus->self;
+ sprintf(prefix, "iwlwifi %s: ", pci_name(pdev));
+
+ IWL_ERR(trans, "iwlwifi parent port (%s) config registers:\n",
+ pci_name(pdev));
+ for (i = 0, ptr = buf; i < PCI_DUMP_SIZE; i += 4, ptr++)
+ if (pci_read_config_dword(pdev, i, ptr))
+ goto err_read;
+ print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+
+ /* Print root port AER registers */
+ pos = 0;
+ pdev = pcie_find_root_port(pdev);
+ if (pdev)
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR);
+ if (pos) {
+ IWL_ERR(trans, "iwlwifi root port (%s) AER cap structure:\n",
+ pci_name(pdev));
+ sprintf(prefix, "iwlwifi %s: ", pci_name(pdev));
+ for (i = 0, ptr = buf; i <= PCI_ERR_ROOT_ERR_SRC; i += 4, ptr++)
+ if (pci_read_config_dword(pdev, pos + i, ptr))
+ goto err_read;
+ print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32,
+ 4, buf, i, 0);
+ }
+
+err_read:
+ print_hex_dump(KERN_ERR, prefix, DUMP_PREFIX_OFFSET, 32, 4, buf, i, 0);
+ IWL_ERR(trans, "Read failed at 0x%X\n", i);
+out:
+ trans_pcie->pcie_dbg_dumped_once = 1;
+ kfree(buf);
+}
+
static void iwl_pcie_free_fw_monitor(struct iwl_trans *trans)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -649,6 +736,7 @@ static int iwl_pcie_load_firmware_chunk(struct iwl_trans *trans,
trans_pcie->ucode_write_complete, 5 * HZ);
if (!ret) {
IWL_ERR(trans, "Failed to load firmware chunk!\n");
+ iwl_trans_pcie_err_dump(trans);
return -ETIMEDOUT;
}
@@ -1868,6 +1956,7 @@ static bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans,
(CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000);
if (unlikely(ret < 0)) {
+ iwl_trans_pcie_err_dump(trans);
iwl_write32(trans, CSR_RESET, CSR_RESET_REG_FLAG_FORCE_NMI);
WARN_ONCE(1,
"Timeout waiting for hardware access (CSR_GP_CNTRL 0x%08x)\n",
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index d74613fcb756..79e4c73a9709 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -1160,6 +1160,8 @@ int iwl_pcie_gen2_tx_init(struct iwl_trans *trans)
struct iwl_txq *cmd_queue;
int txq_id = trans_pcie->cmd_queue, ret;
+ iwl_pcie_set_tx_cmd_queue_size(trans);
+
/* alloc and init the command queue */
if (!trans_pcie->txq[txq_id]) {
cmd_queue = kzalloc(sizeof(*cmd_queue), GFP_KERNEL);
@@ -1168,7 +1170,8 @@ int iwl_pcie_gen2_tx_init(struct iwl_trans *trans)
return -ENOMEM;
}
trans_pcie->txq[txq_id] = cmd_queue;
- ret = iwl_pcie_txq_alloc(trans, cmd_queue, TFD_CMD_SLOTS, true);
+ ret = iwl_pcie_txq_alloc(trans, cmd_queue,
+ trans_pcie->tx_cmd_queue_size, true);
if (ret) {
IWL_ERR(trans, "Tx %d queue init failed\n", txq_id);
goto error;
@@ -1177,7 +1180,8 @@ int iwl_pcie_gen2_tx_init(struct iwl_trans *trans)
cmd_queue = trans_pcie->txq[txq_id];
}
- ret = iwl_pcie_txq_init(trans, cmd_queue, TFD_CMD_SLOTS, true);
+ ret = iwl_pcie_txq_init(trans, cmd_queue,
+ trans_pcie->tx_cmd_queue_size, true);
if (ret) {
IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id);
goto error;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index c645d10d3707..e93c471ef9bf 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -951,7 +951,8 @@ static int iwl_pcie_tx_alloc(struct iwl_trans *trans)
txq_id++) {
bool cmd_queue = (txq_id == trans_pcie->cmd_queue);
- slots_num = cmd_queue ? TFD_CMD_SLOTS : TFD_TX_CMD_SLOTS;
+ slots_num = cmd_queue ? trans_pcie->tx_cmd_queue_size :
+ TFD_TX_CMD_SLOTS;
trans_pcie->txq[txq_id] = &trans_pcie->txq_memory[txq_id];
ret = iwl_pcie_txq_alloc(trans, trans_pcie->txq[txq_id],
slots_num, cmd_queue);
@@ -970,6 +971,21 @@ error:
return ret;
}
+void iwl_pcie_set_tx_cmd_queue_size(struct iwl_trans *trans)
+{
+ struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ int queue_size = TFD_CMD_SLOTS;
+
+ if (trans->cfg->tx_cmd_queue_size)
+ queue_size = trans->cfg->tx_cmd_queue_size;
+
+ if (WARN_ON(!(is_power_of_2(queue_size) &&
+ TFD_QUEUE_CB_SIZE(queue_size) > 0)))
+ trans_pcie->tx_cmd_queue_size = TFD_CMD_SLOTS;
+ else
+ trans_pcie->tx_cmd_queue_size = queue_size;
+}
+
int iwl_pcie_tx_init(struct iwl_trans *trans)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -977,6 +993,8 @@ int iwl_pcie_tx_init(struct iwl_trans *trans)
int txq_id, slots_num;
bool alloc = false;
+ iwl_pcie_set_tx_cmd_queue_size(trans);
+
if (!trans_pcie->txq_memory) {
ret = iwl_pcie_tx_alloc(trans);
if (ret)
@@ -1000,7 +1018,8 @@ int iwl_pcie_tx_init(struct iwl_trans *trans)
txq_id++) {
bool cmd_queue = (txq_id == trans_pcie->cmd_queue);
- slots_num = cmd_queue ? TFD_CMD_SLOTS : TFD_TX_CMD_SLOTS;
+ slots_num = cmd_queue ? trans_pcie->tx_cmd_queue_size :
+ TFD_TX_CMD_SLOTS;
ret = iwl_pcie_txq_init(trans, trans_pcie->txq[txq_id],
slots_num, cmd_queue);
if (ret) {
diff --git a/drivers/net/wireless/intersil/p54/main.c b/drivers/net/wireless/intersil/p54/main.c
index d5a3bf91a03e..ab6d39e12069 100644
--- a/drivers/net/wireless/intersil/p54/main.c
+++ b/drivers/net/wireless/intersil/p54/main.c
@@ -852,12 +852,11 @@ void p54_unregister_common(struct ieee80211_hw *dev)
{
struct p54_common *priv = dev->priv;
-#ifdef CONFIG_P54_LEDS
- p54_unregister_leds(priv);
-#endif /* CONFIG_P54_LEDS */
-
if (priv->registered) {
priv->registered = false;
+#ifdef CONFIG_P54_LEDS
+ p54_unregister_leds(priv);
+#endif /* CONFIG_P54_LEDS */
ieee80211_unregister_hw(dev);
}
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 6467ffac9811..ec2f4c31425a 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -396,7 +396,7 @@ static int mac80211_hwsim_vendor_cmd_test(struct wiphy *wiphy,
if (!tb[QCA_WLAN_VENDOR_ATTR_TEST])
return -EINVAL;
val = nla_get_u32(tb[QCA_WLAN_VENDOR_ATTR_TEST]);
- wiphy_debug(wiphy, "%s: test=%u\n", __func__, val);
+ wiphy_dbg(wiphy, "%s: test=%u\n", __func__, val);
/* Send a vendor event as a test. Note that this would not normally be
* done within a command handler, but rather, based on some other
@@ -643,9 +643,9 @@ static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif)
if (!vp->assoc)
return;
- wiphy_debug(data->hw->wiphy,
- "%s: send PS-Poll to %pM for aid %d\n",
- __func__, vp->bssid, vp->aid);
+ wiphy_dbg(data->hw->wiphy,
+ "%s: send PS-Poll to %pM for aid %d\n",
+ __func__, vp->bssid, vp->aid);
skb = dev_alloc_skb(sizeof(*pspoll));
if (!skb)
@@ -674,9 +674,9 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac,
if (!vp->assoc)
return;
- wiphy_debug(data->hw->wiphy,
- "%s: send data::nullfunc to %pM ps=%d\n",
- __func__, vp->bssid, ps);
+ wiphy_dbg(data->hw->wiphy,
+ "%s: send data::nullfunc to %pM ps=%d\n",
+ __func__, vp->bssid, ps);
skb = dev_alloc_skb(sizeof(*hdr));
if (!skb)
@@ -1034,7 +1034,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
msg_head = genlmsg_put(skb, 0, 0, &hwsim_genl_family, 0,
HWSIM_CMD_FRAME);
if (msg_head == NULL) {
- printk(KERN_DEBUG "mac80211_hwsim: problem with msg_head\n");
+ pr_debug("mac80211_hwsim: problem with msg_head\n");
goto nla_put_failure;
}
@@ -1093,7 +1093,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw,
nla_put_failure:
nlmsg_free(skb);
err_free_txskb:
- printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__);
+ pr_debug("mac80211_hwsim: error occurred in %s\n", __func__);
ieee80211_free_txskb(hw, my_skb);
data->tx_failed++;
}
@@ -1347,7 +1347,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw,
}
if (data->idle && !data->tmp_chan) {
- wiphy_debug(hw->wiphy, "Trying to TX when idle - reject\n");
+ wiphy_dbg(hw->wiphy, "Trying to TX when idle - reject\n");
ieee80211_free_txskb(hw, skb);
return;
}
@@ -1408,7 +1408,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw,
static int mac80211_hwsim_start(struct ieee80211_hw *hw)
{
struct mac80211_hwsim_data *data = hw->priv;
- wiphy_debug(hw->wiphy, "%s\n", __func__);
+ wiphy_dbg(hw->wiphy, "%s\n", __func__);
data->started = true;
return 0;
}
@@ -1419,16 +1419,16 @@ static void mac80211_hwsim_stop(struct ieee80211_hw *hw)
struct mac80211_hwsim_data *data = hw->priv;
data->started = false;
tasklet_hrtimer_cancel(&data->beacon_timer);
- wiphy_debug(hw->wiphy, "%s\n", __func__);
+ wiphy_dbg(hw->wiphy, "%s\n", __func__);
}
static int mac80211_hwsim_add_interface(struct ieee80211_hw *hw,
struct ieee80211_vif *vif)
{
- wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
- __func__, ieee80211_vif_type_p2p(vif),
- vif->addr);
+ wiphy_dbg(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
+ __func__, ieee80211_vif_type_p2p(vif),
+ vif->addr);
hwsim_set_magic(vif);
vif->cab_queue = 0;
@@ -1447,9 +1447,9 @@ static int mac80211_hwsim_change_interface(struct ieee80211_hw *hw,
bool newp2p)
{
newtype = ieee80211_iftype_p2p(newtype, newp2p);
- wiphy_debug(hw->wiphy,
- "%s (old type=%d, new type=%d, mac_addr=%pM)\n",
- __func__, ieee80211_vif_type_p2p(vif),
+ wiphy_dbg(hw->wiphy,
+ "%s (old type=%d, new type=%d, mac_addr=%pM)\n",
+ __func__, ieee80211_vif_type_p2p(vif),
newtype, vif->addr);
hwsim_check_magic(vif);
@@ -1465,9 +1465,9 @@ static int mac80211_hwsim_change_interface(struct ieee80211_hw *hw,
static void mac80211_hwsim_remove_interface(
struct ieee80211_hw *hw, struct ieee80211_vif *vif)
{
- wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
- __func__, ieee80211_vif_type_p2p(vif),
- vif->addr);
+ wiphy_dbg(hw->wiphy, "%s (type=%d mac_addr=%pM)\n",
+ __func__, ieee80211_vif_type_p2p(vif),
+ vif->addr);
hwsim_check_magic(vif);
hwsim_clear_magic(vif);
}
@@ -1589,23 +1589,23 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed)
int idx;
if (conf->chandef.chan)
- wiphy_debug(hw->wiphy,
- "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n",
- __func__,
- conf->chandef.chan->center_freq,
- conf->chandef.center_freq1,
- conf->chandef.center_freq2,
- hwsim_chanwidths[conf->chandef.width],
- !!(conf->flags & IEEE80211_CONF_IDLE),
- !!(conf->flags & IEEE80211_CONF_PS),
- smps_modes[conf->smps_mode]);
+ wiphy_dbg(hw->wiphy,
+ "%s (freq=%d(%d - %d)/%s idle=%d ps=%d smps=%s)\n",
+ __func__,
+ conf->chandef.chan->center_freq,
+ conf->chandef.center_freq1,
+ conf->chandef.center_freq2,
+ hwsim_chanwidths[conf->chandef.width],
+ !!(conf->flags & IEEE80211_CONF_IDLE),
+ !!(conf->flags & IEEE80211_CONF_PS),
+ smps_modes[conf->smps_mode]);
else
- wiphy_debug(hw->wiphy,
- "%s (freq=0 idle=%d ps=%d smps=%s)\n",
- __func__,
- !!(conf->flags & IEEE80211_CONF_IDLE),
- !!(conf->flags & IEEE80211_CONF_PS),
- smps_modes[conf->smps_mode]);
+ wiphy_dbg(hw->wiphy,
+ "%s (freq=0 idle=%d ps=%d smps=%s)\n",
+ __func__,
+ !!(conf->flags & IEEE80211_CONF_IDLE),
+ !!(conf->flags & IEEE80211_CONF_PS),
+ smps_modes[conf->smps_mode]);
data->idle = !!(conf->flags & IEEE80211_CONF_IDLE);
@@ -1659,7 +1659,7 @@ static void mac80211_hwsim_configure_filter(struct ieee80211_hw *hw,
{
struct mac80211_hwsim_data *data = hw->priv;
- wiphy_debug(hw->wiphy, "%s\n", __func__);
+ wiphy_dbg(hw->wiphy, "%s\n", __func__);
data->rx_filter = 0;
if (*total_flags & FIF_ALLMULTI)
@@ -1688,25 +1688,25 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
hwsim_check_magic(vif);
- wiphy_debug(hw->wiphy, "%s(changed=0x%x vif->addr=%pM)\n",
- __func__, changed, vif->addr);
+ wiphy_dbg(hw->wiphy, "%s(changed=0x%x vif->addr=%pM)\n",
+ __func__, changed, vif->addr);
if (changed & BSS_CHANGED_BSSID) {
- wiphy_debug(hw->wiphy, "%s: BSSID changed: %pM\n",
- __func__, info->bssid);
+ wiphy_dbg(hw->wiphy, "%s: BSSID changed: %pM\n",
+ __func__, info->bssid);
memcpy(vp->bssid, info->bssid, ETH_ALEN);
}
if (changed & BSS_CHANGED_ASSOC) {
- wiphy_debug(hw->wiphy, " ASSOC: assoc=%d aid=%d\n",
- info->assoc, info->aid);
+ wiphy_dbg(hw->wiphy, " ASSOC: assoc=%d aid=%d\n",
+ info->assoc, info->aid);
vp->assoc = info->assoc;
vp->aid = info->aid;
}
if (changed & BSS_CHANGED_BEACON_ENABLED) {
- wiphy_debug(hw->wiphy, " BCN EN: %d (BI=%u)\n",
- info->enable_beacon, info->beacon_int);
+ wiphy_dbg(hw->wiphy, " BCN EN: %d (BI=%u)\n",
+ info->enable_beacon, info->beacon_int);
vp->bcn_en = info->enable_beacon;
if (data->started &&
!hrtimer_is_queued(&data->beacon_timer.timer) &&
@@ -1725,8 +1725,8 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
ieee80211_iterate_active_interfaces_atomic(
data->hw, IEEE80211_IFACE_ITER_NORMAL,
mac80211_hwsim_bcn_en_iter, &count);
- wiphy_debug(hw->wiphy, " beaconing vifs remaining: %u",
- count);
+ wiphy_dbg(hw->wiphy, " beaconing vifs remaining: %u",
+ count);
if (count == 0) {
tasklet_hrtimer_cancel(&data->beacon_timer);
data->beacon_int = 0;
@@ -1735,31 +1735,31 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
}
if (changed & BSS_CHANGED_ERP_CTS_PROT) {
- wiphy_debug(hw->wiphy, " ERP_CTS_PROT: %d\n",
- info->use_cts_prot);
+ wiphy_dbg(hw->wiphy, " ERP_CTS_PROT: %d\n",
+ info->use_cts_prot);
}
if (changed & BSS_CHANGED_ERP_PREAMBLE) {
- wiphy_debug(hw->wiphy, " ERP_PREAMBLE: %d\n",
- info->use_short_preamble);
+ wiphy_dbg(hw->wiphy, " ERP_PREAMBLE: %d\n",
+ info->use_short_preamble);
}
if (changed & BSS_CHANGED_ERP_SLOT) {
- wiphy_debug(hw->wiphy, " ERP_SLOT: %d\n", info->use_short_slot);
+ wiphy_dbg(hw->wiphy, " ERP_SLOT: %d\n", info->use_short_slot);
}
if (changed & BSS_CHANGED_HT) {
- wiphy_debug(hw->wiphy, " HT: op_mode=0x%x\n",
- info->ht_operation_mode);
+ wiphy_dbg(hw->wiphy, " HT: op_mode=0x%x\n",
+ info->ht_operation_mode);
}
if (changed & BSS_CHANGED_BASIC_RATES) {
- wiphy_debug(hw->wiphy, " BASIC_RATES: 0x%llx\n",
- (unsigned long long) info->basic_rates);
+ wiphy_dbg(hw->wiphy, " BASIC_RATES: 0x%llx\n",
+ (unsigned long long) info->basic_rates);
}
if (changed & BSS_CHANGED_TXPOWER)
- wiphy_debug(hw->wiphy, " TX Power: %d dBm\n", info->txpower);
+ wiphy_dbg(hw->wiphy, " TX Power: %d dBm\n", info->txpower);
}
static int mac80211_hwsim_sta_add(struct ieee80211_hw *hw,
@@ -1813,11 +1813,11 @@ static int mac80211_hwsim_conf_tx(
struct ieee80211_vif *vif, u16 queue,
const struct ieee80211_tx_queue_params *params)
{
- wiphy_debug(hw->wiphy,
- "%s (queue=%d txop=%d cw_min=%d cw_max=%d aifs=%d)\n",
- __func__, queue,
- params->txop, params->cw_min,
- params->cw_max, params->aifs);
+ wiphy_dbg(hw->wiphy,
+ "%s (queue=%d txop=%d cw_min=%d cw_max=%d aifs=%d)\n",
+ __func__, queue,
+ params->txop, params->cw_min,
+ params->cw_max, params->aifs);
return 0;
}
@@ -1981,7 +1981,7 @@ static void hw_scan_work(struct work_struct *work)
.aborted = false,
};
- wiphy_debug(hwsim->hw->wiphy, "hw scan complete\n");
+ wiphy_dbg(hwsim->hw->wiphy, "hw scan complete\n");
ieee80211_scan_completed(hwsim->hw, &info);
hwsim->hw_scan_request = NULL;
hwsim->hw_scan_vif = NULL;
@@ -1990,8 +1990,8 @@ static void hw_scan_work(struct work_struct *work)
return;
}
- wiphy_debug(hwsim->hw->wiphy, "hw scan %d MHz\n",
- req->channels[hwsim->scan_chan_idx]->center_freq);
+ wiphy_dbg(hwsim->hw->wiphy, "hw scan %d MHz\n",
+ req->channels[hwsim->scan_chan_idx]->center_freq);
hwsim->tmp_chan = req->channels[hwsim->scan_chan_idx];
if (hwsim->tmp_chan->flags & (IEEE80211_CHAN_NO_IR |
@@ -2060,7 +2060,7 @@ static int mac80211_hwsim_hw_scan(struct ieee80211_hw *hw,
memset(hwsim->survey_data, 0, sizeof(hwsim->survey_data));
mutex_unlock(&hwsim->mutex);
- wiphy_debug(hw->wiphy, "hwsim hw_scan request\n");
+ wiphy_dbg(hw->wiphy, "hwsim hw_scan request\n");
ieee80211_queue_delayed_work(hwsim->hw, &hwsim->hw_scan, 0);
@@ -2075,7 +2075,7 @@ static void mac80211_hwsim_cancel_hw_scan(struct ieee80211_hw *hw,
.aborted = true,
};
- wiphy_debug(hw->wiphy, "hwsim cancel_hw_scan\n");
+ wiphy_dbg(hw->wiphy, "hwsim cancel_hw_scan\n");
cancel_delayed_work_sync(&hwsim->hw_scan);
@@ -2096,11 +2096,11 @@ static void mac80211_hwsim_sw_scan(struct ieee80211_hw *hw,
mutex_lock(&hwsim->mutex);
if (hwsim->scanning) {
- printk(KERN_DEBUG "two hwsim sw_scans detected!\n");
+ pr_debug("two hwsim sw_scans detected!\n");
goto out;
}
- printk(KERN_DEBUG "hwsim sw_scan request, prepping stuff\n");
+ pr_debug("hwsim sw_scan request, prepping stuff\n");
memcpy(hwsim->scan_addr, mac_addr, ETH_ALEN);
hwsim->scanning = true;
@@ -2117,7 +2117,7 @@ static void mac80211_hwsim_sw_scan_complete(struct ieee80211_hw *hw,
mutex_lock(&hwsim->mutex);
- printk(KERN_DEBUG "hwsim sw_scan_complete\n");
+ pr_debug("hwsim sw_scan_complete\n");
hwsim->scanning = false;
eth_zero_addr(hwsim->scan_addr);
@@ -2131,7 +2131,7 @@ static void hw_roc_start(struct work_struct *work)
mutex_lock(&hwsim->mutex);
- wiphy_debug(hwsim->hw->wiphy, "hwsim ROC begins\n");
+ wiphy_dbg(hwsim->hw->wiphy, "hwsim ROC begins\n");
hwsim->tmp_chan = hwsim->roc_chan;
ieee80211_ready_on_channel(hwsim->hw);
@@ -2151,7 +2151,7 @@ static void hw_roc_done(struct work_struct *work)
hwsim->tmp_chan = NULL;
mutex_unlock(&hwsim->mutex);
- wiphy_debug(hwsim->hw->wiphy, "hwsim ROC expired\n");
+ wiphy_dbg(hwsim->hw->wiphy, "hwsim ROC expired\n");
}
static int mac80211_hwsim_roc(struct ieee80211_hw *hw,
@@ -2172,8 +2172,8 @@ static int mac80211_hwsim_roc(struct ieee80211_hw *hw,
hwsim->roc_duration = duration;
mutex_unlock(&hwsim->mutex);
- wiphy_debug(hw->wiphy, "hwsim ROC (%d MHz, %d ms)\n",
- chan->center_freq, duration);
+ wiphy_dbg(hw->wiphy, "hwsim ROC (%d MHz, %d ms)\n",
+ chan->center_freq, duration);
ieee80211_queue_delayed_work(hw, &hwsim->roc_start, HZ/50);
return 0;
@@ -2190,7 +2190,7 @@ static int mac80211_hwsim_croc(struct ieee80211_hw *hw)
hwsim->tmp_chan = NULL;
mutex_unlock(&hwsim->mutex);
- wiphy_debug(hw->wiphy, "hwsim ROC canceled\n");
+ wiphy_dbg(hw->wiphy, "hwsim ROC canceled\n");
return 0;
}
@@ -2199,20 +2199,20 @@ static int mac80211_hwsim_add_chanctx(struct ieee80211_hw *hw,
struct ieee80211_chanctx_conf *ctx)
{
hwsim_set_chanctx_magic(ctx);
- wiphy_debug(hw->wiphy,
- "add channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
- ctx->def.chan->center_freq, ctx->def.width,
- ctx->def.center_freq1, ctx->def.center_freq2);
+ wiphy_dbg(hw->wiphy,
+ "add channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
+ ctx->def.chan->center_freq, ctx->def.width,
+ ctx->def.center_freq1, ctx->def.center_freq2);
return 0;
}
static void mac80211_hwsim_remove_chanctx(struct ieee80211_hw *hw,
struct ieee80211_chanctx_conf *ctx)
{
- wiphy_debug(hw->wiphy,
- "remove channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
- ctx->def.chan->center_freq, ctx->def.width,
- ctx->def.center_freq1, ctx->def.center_freq2);
+ wiphy_dbg(hw->wiphy,
+ "remove channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
+ ctx->def.chan->center_freq, ctx->def.width,
+ ctx->def.center_freq1, ctx->def.center_freq2);
hwsim_check_chanctx_magic(ctx);
hwsim_clear_chanctx_magic(ctx);
}
@@ -2222,10 +2222,10 @@ static void mac80211_hwsim_change_chanctx(struct ieee80211_hw *hw,
u32 changed)
{
hwsim_check_chanctx_magic(ctx);
- wiphy_debug(hw->wiphy,
- "change channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
- ctx->def.chan->center_freq, ctx->def.width,
- ctx->def.center_freq1, ctx->def.center_freq2);
+ wiphy_dbg(hw->wiphy,
+ "change channel context control: %d MHz/width: %d/cfreqs:%d/%d MHz\n",
+ ctx->def.chan->center_freq, ctx->def.width,
+ ctx->def.center_freq1, ctx->def.center_freq2);
}
static int mac80211_hwsim_assign_vif_chanctx(struct ieee80211_hw *hw,
@@ -2479,7 +2479,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
ops = &mac80211_hwsim_mchan_ops;
hw = ieee80211_alloc_hw_nm(sizeof(*data), ops, param->hwname);
if (!hw) {
- printk(KERN_DEBUG "mac80211_hwsim: ieee80211_alloc_hw failed\n");
+ pr_debug("mac80211_hwsim: ieee80211_alloc_hw failed\n");
err = -ENOMEM;
goto failed;
}
@@ -2507,7 +2507,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
data->dev->driver = &mac80211_hwsim_driver.driver;
err = device_bind_driver(data->dev);
if (err != 0) {
- printk(KERN_DEBUG "mac80211_hwsim: device_bind_driver failed (%d)\n",
+ pr_debug("mac80211_hwsim: device_bind_driver failed (%d)\n",
err);
goto failed_bind;
}
@@ -2698,12 +2698,12 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
err = ieee80211_register_hw(hw);
if (err < 0) {
- printk(KERN_DEBUG "mac80211_hwsim: ieee80211_register_hw failed (%d)\n",
+ pr_debug("mac80211_hwsim: ieee80211_register_hw failed (%d)\n",
err);
goto failed_hw;
}
- wiphy_debug(hw->wiphy, "hwaddr %pM registered\n", hw->wiphy->perm_addr);
+ wiphy_dbg(hw->wiphy, "hwaddr %pM registered\n", hw->wiphy->perm_addr);
if (param->reg_alpha2) {
data->alpha2[0] = param->reg_alpha2[0];
@@ -3067,7 +3067,7 @@ static int hwsim_cloned_frame_received_nl(struct sk_buff *skb_2,
return 0;
err:
- printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__);
+ pr_debug("mac80211_hwsim: error occurred in %s\n", __func__);
out:
dev_kfree_skb(skb);
return -EINVAL;
@@ -3098,7 +3098,7 @@ static int hwsim_register_received_nl(struct sk_buff *skb_2,
hwsim_register_wmediumd(net, info->snd_portid);
- printk(KERN_DEBUG "mac80211_hwsim: received a REGISTER, "
+ pr_debug("mac80211_hwsim: received a REGISTER, "
"switching to wmediumd mode with pid %d\n", info->snd_portid);
return 0;
@@ -3387,7 +3387,7 @@ static int __init hwsim_init_netlink(void)
return 0;
failure:
- printk(KERN_DEBUG "mac80211_hwsim: error occurred in %s\n", __func__);
+ pr_debug("mac80211_hwsim: error occurred in %s\n", __func__);
return -EINVAL;
}
@@ -3578,7 +3578,7 @@ module_init(init_mac80211_hwsim);
static void __exit exit_mac80211_hwsim(void)
{
- printk(KERN_DEBUG "mac80211_hwsim: unregister radios\n");
+ pr_debug("mac80211_hwsim: unregister radios\n");
hwsim_exit_netlink();
diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c
index 725206914911..8772e3949327 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n.c
@@ -658,12 +658,6 @@ void mwifiex_11n_delba(struct mwifiex_private *priv, int tid)
unsigned long flags;
spin_lock_irqsave(&priv->rx_reorder_tbl_lock, flags);
- if (list_empty(&priv->rx_reorder_tbl_ptr)) {
- dev_dbg(priv->adapter->dev,
- "mwifiex_11n_delba: rx_reorder_tbl_ptr empty\n");
- goto exit;
- }
-
list_for_each_entry(rx_reor_tbl_ptr, &priv->rx_reorder_tbl_ptr, list) {
if (rx_reor_tbl_ptr->tid == tid) {
dev_dbg(priv->adapter->dev,
@@ -854,9 +848,6 @@ mwifiex_send_delba_txbastream_tbl(struct mwifiex_private *priv, u8 tid)
struct mwifiex_adapter *adapter = priv->adapter;
struct mwifiex_tx_ba_stream_tbl *tx_ba_stream_tbl_ptr;
- if (list_empty(&priv->tx_ba_stream_tbl_ptr))
- return;
-
list_for_each_entry(tx_ba_stream_tbl_ptr,
&priv->tx_ba_stream_tbl_ptr, list) {
if (tx_ba_stream_tbl_ptr->ba_status == BA_SETUP_COMPLETE) {
diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
index 274dd5a1574a..d87df2dfcfa4 100644
--- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
+++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c
@@ -835,12 +835,6 @@ void mwifiex_update_rxreor_flags(struct mwifiex_adapter *adapter, u8 flags)
continue;
spin_lock_irqsave(&priv->rx_reorder_tbl_lock, lock_flags);
- if (list_empty(&priv->rx_reorder_tbl_ptr)) {
- spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock,
- lock_flags);
- continue;
- }
-
list_for_each_entry(tbl, &priv->rx_reorder_tbl_ptr, list)
tbl->flags = flags;
spin_unlock_irqrestore(&priv->rx_reorder_tbl_lock, lock_flags);
diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index 32c5074da84c..6e0d9a9c5cfb 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -142,7 +142,7 @@ mwifiex_cfg80211_del_key(struct wiphy *wiphy, struct net_device *netdev,
u8 key_index, bool pairwise, const u8 *mac_addr)
{
struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev);
- const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ static const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
const u8 *peer_mac = pairwise ? mac_addr : bc_mac;
if (mwifiex_set_encode(priv, NULL, NULL, 0, key_index, peer_mac, 1)) {
@@ -454,7 +454,7 @@ mwifiex_cfg80211_add_key(struct wiphy *wiphy, struct net_device *netdev,
{
struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev);
struct mwifiex_wep_key *wep_key;
- const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+ static const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
const u8 *peer_mac = pairwise ? mac_addr : bc_mac;
if (GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_UAP &&
@@ -2503,6 +2503,7 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy,
struct ieee80211_channel *chan;
struct ieee_types_header *ie;
struct mwifiex_user_scan_cfg *user_scan_cfg;
+ u8 mac_addr[ETH_ALEN];
mwifiex_dbg(priv->adapter, CMD,
"info: received scan request on %s\n", dev->name);
@@ -2529,15 +2530,10 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy,
priv->scan_request = request;
if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
- ether_addr_copy(priv->random_mac, request->mac_addr);
- for (i = 0; i < ETH_ALEN; i++) {
- priv->random_mac[i] &= request->mac_addr_mask[i];
- priv->random_mac[i] |= get_random_int() &
- ~(request->mac_addr_mask[i]);
- }
- ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac);
- } else {
- eth_zero_addr(priv->random_mac);
+ get_random_mask_addr(mac_addr, request->mac_addr,
+ request->mac_addr_mask);
+ ether_addr_copy(request->mac_addr, mac_addr);
+ ether_addr_copy(user_scan_cfg->random_mac, mac_addr);
}
user_scan_cfg->num_ssids = request->n_ssids;
@@ -2959,18 +2955,21 @@ struct wireless_dev *mwifiex_add_virtual_intf(struct wiphy *wiphy,
}
mwifiex_init_priv_params(priv, dev);
- mwifiex_set_mac_address(priv, dev);
priv->netdev = dev;
- ret = mwifiex_send_cmd(priv, HostCmd_CMD_SET_BSS_MODE,
- HostCmd_ACT_GEN_SET, 0, NULL, true);
- if (ret)
- goto err_set_bss_mode;
+ if (!adapter->mfg_mode) {
+ mwifiex_set_mac_address(priv, dev);
- ret = mwifiex_sta_init_cmd(priv, false, false);
- if (ret)
- goto err_sta_init;
+ ret = mwifiex_send_cmd(priv, HostCmd_CMD_SET_BSS_MODE,
+ HostCmd_ACT_GEN_SET, 0, NULL, true);
+ if (ret)
+ goto err_set_bss_mode;
+
+ ret = mwifiex_sta_init_cmd(priv, false, false);
+ if (ret)
+ goto err_sta_init;
+ }
mwifiex_setup_ht_caps(&wiphy->bands[NL80211_BAND_2GHZ]->ht_cap, priv);
if (adapter->is_hw_11ac_capable)
@@ -3250,8 +3249,8 @@ static int mwifiex_set_wowlan_mef_entry(struct mwifiex_private *priv,
int i, filt_num = 0, ret = 0;
bool first_pat = true;
u8 byte_seq[MWIFIEX_MEF_MAX_BYTESEQ + 1];
- const u8 ipv4_mc_mac[] = {0x33, 0x33};
- const u8 ipv6_mc_mac[] = {0x01, 0x00, 0x5e};
+ static const u8 ipv4_mc_mac[] = {0x33, 0x33};
+ static const u8 ipv6_mc_mac[] = {0x01, 0x00, 0x5e};
mef_entry->mode = MEF_MODE_HOST_SLEEP;
mef_entry->action = MEF_ACTION_ALLOW_AND_WAKEUP_HOST;
@@ -3544,9 +3543,9 @@ static int mwifiex_set_rekey_data(struct wiphy *wiphy, struct net_device *dev,
static int mwifiex_get_coalesce_pkt_type(u8 *byte_seq)
{
- const u8 ipv4_mc_mac[] = {0x33, 0x33};
- const u8 ipv6_mc_mac[] = {0x01, 0x00, 0x5e};
- const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff};
+ static const u8 ipv4_mc_mac[] = {0x33, 0x33};
+ static const u8 ipv6_mc_mac[] = {0x01, 0x00, 0x5e};
+ static const u8 bc_mac[] = {0xff, 0xff, 0xff, 0xff};
if ((byte_seq[0] & 0x01) &&
(byte_seq[MWIFIEX_COALESCE_MAX_BYTESEQ] == 1))
@@ -3795,9 +3794,8 @@ mwifiex_cfg80211_tdls_chan_switch(struct wiphy *wiphy, struct net_device *dev,
spin_lock_irqsave(&priv->sta_list_spinlock, flags);
sta_ptr = mwifiex_get_sta_entry(priv, addr);
- spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
-
if (!sta_ptr) {
+ spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
wiphy_err(wiphy, "%s: Invalid TDLS peer %pM\n",
__func__, addr);
return -ENOENT;
@@ -3805,15 +3803,18 @@ mwifiex_cfg80211_tdls_chan_switch(struct wiphy *wiphy, struct net_device *dev,
if (!(sta_ptr->tdls_cap.extcap.ext_capab[3] &
WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH)) {
+ spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
wiphy_err(wiphy, "%pM do not support tdls cs\n", addr);
return -ENOENT;
}
if (sta_ptr->tdls_status == TDLS_CHAN_SWITCHING ||
sta_ptr->tdls_status == TDLS_IN_OFF_CHAN) {
+ spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
wiphy_err(wiphy, "channel switch is running, abort request\n");
return -EALREADY;
}
+ spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
chan = chandef->chan->hw_value;
second_chan_offset = mwifiex_get_sec_chan_offset(chan);
@@ -3834,18 +3835,20 @@ mwifiex_cfg80211_tdls_cancel_chan_switch(struct wiphy *wiphy,
spin_lock_irqsave(&priv->sta_list_spinlock, flags);
sta_ptr = mwifiex_get_sta_entry(priv, addr);
- spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
-
if (!sta_ptr) {
+ spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
wiphy_err(wiphy, "%s: Invalid TDLS peer %pM\n",
__func__, addr);
} else if (!(sta_ptr->tdls_status == TDLS_CHAN_SWITCHING ||
sta_ptr->tdls_status == TDLS_IN_BASE_CHAN ||
sta_ptr->tdls_status == TDLS_IN_OFF_CHAN)) {
+ spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
wiphy_err(wiphy, "tdls chan switch not initialize by %pM\n",
addr);
- } else
+ } else {
+ spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
mwifiex_stop_tdls_cs(priv, addr);
+ }
}
static int
@@ -4202,7 +4205,10 @@ int mwifiex_init_channel_scan_gap(struct mwifiex_adapter *adapter)
if (adapter->config_bands & BAND_A)
n_channels_a = mwifiex_band_5ghz.n_channels;
- adapter->num_in_chan_stats = n_channels_bg + n_channels_a;
+ /* allocate twice the number total channels, since the driver issues an
+ * additional active scan request for hidden SSIDs on passive channels.
+ */
+ adapter->num_in_chan_stats = 2 * (n_channels_bg + n_channels_a);
adapter->chan_stats = vmalloc(sizeof(*adapter->chan_stats) *
adapter->num_in_chan_stats);
@@ -4306,10 +4312,12 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter)
wiphy->features |= NL80211_FEATURE_HT_IBSS |
NL80211_FEATURE_INACTIVITY_TIMER |
NL80211_FEATURE_LOW_PRIORITY_SCAN |
- NL80211_FEATURE_NEED_OBSS_SCAN |
- NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR |
- NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR |
- NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
+ NL80211_FEATURE_NEED_OBSS_SCAN;
+
+ if (ISSUPP_RANDOM_MAC(adapter->fw_cap_info))
+ wiphy->features |= NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR |
+ NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR |
+ NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
if (ISSUPP_TDLS_ENABLED(adapter->fw_cap_info))
wiphy->features |= NL80211_FEATURE_TDLS_CHANNEL_SWITCH;
diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
index 0edc5d621304..a9a1a736e6e8 100644
--- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
@@ -17,6 +17,7 @@
* this warranty disclaimer.
*/
+#include <asm/unaligned.h>
#include "decl.h"
#include "ioctl.h"
#include "util.h"
@@ -183,7 +184,6 @@ static int mwifiex_dnld_cmd_to_fw(struct mwifiex_private *priv,
uint16_t cmd_code;
uint16_t cmd_size;
unsigned long flags;
- __le32 tmp;
if (!adapter || !cmd_node)
return -1;
@@ -249,9 +249,9 @@ static int mwifiex_dnld_cmd_to_fw(struct mwifiex_private *priv,
mwifiex_dbg_dump(adapter, CMD_D, "cmd buffer:", host_cmd, cmd_size);
if (adapter->iface_type == MWIFIEX_USB) {
- tmp = cpu_to_le32(MWIFIEX_USB_TYPE_CMD);
skb_push(cmd_node->cmd_skb, MWIFIEX_TYPE_LEN);
- memcpy(cmd_node->cmd_skb->data, &tmp, MWIFIEX_TYPE_LEN);
+ put_unaligned_le32(MWIFIEX_USB_TYPE_CMD,
+ cmd_node->cmd_skb->data);
adapter->cmd_sent = true;
ret = adapter->if_ops.host_to_card(adapter,
MWIFIEX_USB_EP_CMD_EVENT,
@@ -317,7 +317,6 @@ static int mwifiex_dnld_sleep_confirm_cmd(struct mwifiex_adapter *adapter)
(struct mwifiex_opt_sleep_confirm *)
adapter->sleep_cfm->data;
struct sk_buff *sleep_cfm_tmp;
- __le32 tmp;
priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY);
@@ -342,8 +341,7 @@ static int mwifiex_dnld_sleep_confirm_cmd(struct mwifiex_adapter *adapter)
+ MWIFIEX_TYPE_LEN);
skb_put(sleep_cfm_tmp, sizeof(struct mwifiex_opt_sleep_confirm)
+ MWIFIEX_TYPE_LEN);
- tmp = cpu_to_le32(MWIFIEX_USB_TYPE_CMD);
- memcpy(sleep_cfm_tmp->data, &tmp, MWIFIEX_TYPE_LEN);
+ put_unaligned_le32(MWIFIEX_USB_TYPE_CMD, sleep_cfm_tmp->data);
memcpy(sleep_cfm_tmp->data + MWIFIEX_TYPE_LEN,
adapter->sleep_cfm->data,
sizeof(struct mwifiex_opt_sleep_confirm));
diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h
index 9e75522d248a..6b765f3f37fd 100644
--- a/drivers/net/wireless/marvell/mwifiex/fw.h
+++ b/drivers/net/wireless/marvell/mwifiex/fw.h
@@ -238,6 +238,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER {
#define ISSUPP_DRCS_ENABLED(FwCapInfo) (FwCapInfo & BIT(15))
#define ISSUPP_SDIO_SPA_ENABLED(FwCapInfo) (FwCapInfo & BIT(16))
#define ISSUPP_ADHOC_ENABLED(FwCapInfo) (FwCapInfo & BIT(25))
+#define ISSUPP_RANDOM_MAC(FwCapInfo) (FwCapInfo & BIT(27))
#define MWIFIEX_DEF_HT_CAP (IEEE80211_HT_CAP_DSSSCCK40 | \
(1 << IEEE80211_HT_CAP_RX_STBC_SHIFT) | \
diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c
index e11919db7818..1176706ab094 100644
--- a/drivers/net/wireless/marvell/mwifiex/init.c
+++ b/drivers/net/wireless/marvell/mwifiex/init.c
@@ -579,10 +579,6 @@ static void mwifiex_delete_bss_prio_tbl(struct mwifiex_private *priv)
{
spin_lock_irqsave(lock, flags);
- if (list_empty(head)) {
- spin_unlock_irqrestore(lock, flags);
- continue;
- }
list_for_each_entry_safe(bssprio_node, tmp_node, head,
list) {
if (bssprio_node->priv == priv) {
diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h
index a76bd797e454..a34de8582e91 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.h
+++ b/drivers/net/wireless/marvell/mwifiex/main.h
@@ -680,7 +680,6 @@ struct mwifiex_private {
struct mwifiex_user_scan_chan hidden_chan[MWIFIEX_USER_SCAN_CHAN_MAX];
u8 assoc_resp_ht_param;
bool ht_param_present;
- u8 random_mac[ETH_ALEN];
};
diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c
index c9d41ed77fc7..d7ce7f75ae38 100644
--- a/drivers/net/wireless/marvell/mwifiex/scan.c
+++ b/drivers/net/wireless/marvell/mwifiex/scan.c
@@ -1936,8 +1936,6 @@ mwifiex_active_scan_req_for_passive_chan(struct mwifiex_private *priv)
if (!user_scan_cfg)
return -ENOMEM;
- memset(user_scan_cfg, 0, sizeof(*user_scan_cfg));
-
for (id = 0; id < MWIFIEX_USER_SCAN_CHAN_MAX; id++) {
if (!priv->hidden_chan[id].chan_number)
break;
@@ -1948,7 +1946,8 @@ mwifiex_active_scan_req_for_passive_chan(struct mwifiex_private *priv)
adapter->active_scan_triggered = true;
if (priv->scan_request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
- ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac);
+ ether_addr_copy(user_scan_cfg->random_mac,
+ priv->scan_request->mac_addr);
user_scan_cfg->num_ssids = priv->scan_request->n_ssids;
user_scan_cfg->ssid_list = priv->scan_request->ssids;
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
index 0fba5b10ef2d..1bd4e13b8449 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c
@@ -70,11 +70,7 @@ mwifiex_process_cmdresp_error(struct mwifiex_private *priv,
break;
case HostCmd_CMD_802_11_SCAN:
case HostCmd_CMD_802_11_SCAN_EXT:
- mwifiex_cancel_pending_scan_cmd(adapter);
-
- spin_lock_irqsave(&adapter->mwifiex_cmd_lock, flags);
- adapter->scan_processing = false;
- spin_unlock_irqrestore(&adapter->mwifiex_cmd_lock, flags);
+ mwifiex_cancel_scan(adapter);
break;
case HostCmd_CMD_MAC_CONTROL:
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c
index 839df8a9634e..d8db412b76c6 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_event.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c
@@ -359,13 +359,12 @@ static void mwifiex_process_uap_tx_pause(struct mwifiex_private *priv,
} else {
spin_lock_irqsave(&priv->sta_list_spinlock, flags);
sta_ptr = mwifiex_get_sta_entry(priv, tp->peermac);
- spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
-
if (sta_ptr && sta_ptr->tx_pause != tp->tx_pause) {
sta_ptr->tx_pause = tp->tx_pause;
mwifiex_update_ralist_tx_pause(priv, tp->peermac,
tp->tx_pause);
}
+ spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
}
}
@@ -396,14 +395,13 @@ static void mwifiex_process_sta_tx_pause(struct mwifiex_private *priv,
if (mwifiex_is_tdls_link_setup(status)) {
spin_lock_irqsave(&priv->sta_list_spinlock, flags);
sta_ptr = mwifiex_get_sta_entry(priv, tp->peermac);
- spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
-
if (sta_ptr && sta_ptr->tx_pause != tp->tx_pause) {
sta_ptr->tx_pause = tp->tx_pause;
mwifiex_update_ralist_tx_pause(priv,
tp->peermac,
tp->tx_pause);
}
+ spin_unlock_irqrestore(&priv->sta_list_spinlock, flags);
}
}
}
diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c
index e76af2866a19..9fe0bae957b7 100644
--- a/drivers/net/wireless/marvell/mwifiex/tdls.c
+++ b/drivers/net/wireless/marvell/mwifiex/tdls.c
@@ -1413,13 +1413,6 @@ void mwifiex_check_auto_tdls(unsigned long context)
priv->check_tdls_tx = false;
- if (list_empty(&priv->auto_tdls_list)) {
- mod_timer(&priv->auto_tdls_timer,
- jiffies +
- msecs_to_jiffies(MWIFIEX_TIMER_10S));
- return;
- }
-
spin_lock_irqsave(&priv->auto_tdls_lock, flags);
list_for_each_entry(tdls_peer, &priv->auto_tdls_list, list) {
if ((jiffies - tdls_peer->rssi_jiffies) >
diff --git a/drivers/net/wireless/marvell/mwifiex/wmm.c b/drivers/net/wireless/marvell/mwifiex/wmm.c
index 0edd26881321..936a0a841af8 100644
--- a/drivers/net/wireless/marvell/mwifiex/wmm.c
+++ b/drivers/net/wireless/marvell/mwifiex/wmm.c
@@ -359,7 +359,8 @@ static enum mwifiex_wmm_ac_e
mwifiex_wmm_convert_tos_to_ac(struct mwifiex_adapter *adapter, u32 tos)
{
/* Map of TOS UP values to WMM AC */
- const enum mwifiex_wmm_ac_e tos_to_ac[] = { WMM_AC_BE,
+ static const enum mwifiex_wmm_ac_e tos_to_ac[] = {
+ WMM_AC_BE,
WMM_AC_BK,
WMM_AC_BK,
WMM_AC_BE,
diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c
index a450bc6bc774..ebc3792a95bd 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c
@@ -133,6 +133,7 @@ int qtnf_del_virtual_intf(struct wiphy *wiphy, struct wireless_dev *wdev)
vif->netdev = NULL;
vif->wdev.iftype = NL80211_IFTYPE_UNSPECIFIED;
eth_zero_addr(vif->mac_addr);
+ eth_zero_addr(vif->bssid);
return 0;
}
@@ -201,6 +202,8 @@ err_mac:
qtnf_cmd_send_del_intf(vif);
err_cmd:
vif->wdev.iftype = NL80211_IFTYPE_UNSPECIFIED;
+ eth_zero_addr(vif->mac_addr);
+ eth_zero_addr(vif->bssid);
return ERR_PTR(-EFAULT);
}
@@ -256,11 +259,6 @@ static int qtnf_change_beacon(struct wiphy *wiphy, struct net_device *dev,
{
struct qtnf_vif *vif = qtnf_netdev_get_priv(dev);
- if (!(vif->bss_status & QTNF_STATE_AP_START)) {
- pr_err("VIF%u.%u: not started\n", vif->mac->macid, vif->vifid);
- return -EFAULT;
- }
-
return qtnf_mgmt_set_appie(vif, info);
}
@@ -268,46 +266,15 @@ static int qtnf_start_ap(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_ap_settings *settings)
{
struct qtnf_vif *vif = qtnf_netdev_get_priv(dev);
- struct qtnf_wmac *mac = wiphy_priv(wiphy);
- struct qtnf_bss_config *bss_cfg;
int ret;
- if (!cfg80211_chandef_identical(&mac->chandef, &settings->chandef)) {
- memcpy(&mac->chandef, &settings->chandef, sizeof(mac->chandef));
- if (vif->vifid != 0)
- pr_warn("%s: unexpected chan %u (%u MHz)\n", dev->name,
- settings->chandef.chan->hw_value,
- settings->chandef.chan->center_freq);
- }
-
- bss_cfg = &vif->bss_cfg;
- memset(bss_cfg, 0, sizeof(*bss_cfg));
-
- bss_cfg->bcn_period = settings->beacon_interval;
- bss_cfg->dtim = settings->dtim_period;
- bss_cfg->auth_type = settings->auth_type;
- bss_cfg->privacy = settings->privacy;
-
- bss_cfg->ssid_len = settings->ssid_len;
- memcpy(&bss_cfg->ssid, settings->ssid, bss_cfg->ssid_len);
-
- memcpy(&bss_cfg->crypto, &settings->crypto,
- sizeof(struct cfg80211_crypto_settings));
-
- ret = qtnf_cmd_send_config_ap(vif);
+ ret = qtnf_cmd_send_config_ap(vif, settings);
if (ret) {
pr_err("VIF%u.%u: failed to push config to FW\n",
vif->mac->macid, vif->vifid);
goto out;
}
- if (!(vif->bss_status & QTNF_STATE_AP_CONFIG)) {
- pr_err("VIF%u.%u: AP config failed in FW\n", vif->mac->macid,
- vif->vifid);
- ret = -EFAULT;
- goto out;
- }
-
ret = qtnf_mgmt_set_appie(vif, &settings->beacon);
if (ret) {
pr_err("VIF%u.%u: failed to add IEs to beacon\n",
@@ -316,17 +283,9 @@ static int qtnf_start_ap(struct wiphy *wiphy, struct net_device *dev,
}
ret = qtnf_cmd_send_start_ap(vif);
- if (ret) {
+ if (ret)
pr_err("VIF%u.%u: failed to start AP\n", vif->mac->macid,
vif->vifid);
- goto out;
- }
-
- if (!(vif->bss_status & QTNF_STATE_AP_START)) {
- pr_err("VIF%u.%u: FW failed to start AP operation\n",
- vif->mac->macid, vif->vifid);
- ret = -EFAULT;
- }
out:
return ret;
@@ -343,8 +302,6 @@ static int qtnf_stop_ap(struct wiphy *wiphy, struct net_device *dev)
if (ret) {
pr_err("VIF%u.%u: failed to stop AP operation in FW\n",
vif->mac->macid, vif->vifid);
- vif->bss_status &= ~QTNF_STATE_AP_START;
- vif->bss_status &= ~QTNF_STATE_AP_CONFIG;
netif_carrier_off(vif->netdev);
}
@@ -615,9 +572,6 @@ qtnf_connect(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_connect_params *sme)
{
struct qtnf_vif *vif = qtnf_netdev_get_priv(dev);
- struct qtnf_wmac *mac = wiphy_priv(wiphy);
- struct cfg80211_chan_def chandef;
- struct qtnf_bss_config *bss_cfg;
int ret;
if (vif->wdev.iftype != NL80211_IFTYPE_STATION)
@@ -626,49 +580,10 @@ qtnf_connect(struct wiphy *wiphy, struct net_device *dev,
if (vif->sta_state != QTNF_STA_DISCONNECTED)
return -EBUSY;
- bss_cfg = &vif->bss_cfg;
- memset(bss_cfg, 0, sizeof(*bss_cfg));
-
- if (sme->channel) {
- /* FIXME: need to set proper nl80211_channel_type value */
- cfg80211_chandef_create(&chandef, sme->channel,
- NL80211_CHAN_HT20);
- /* fall-back to minimal safe chandef description */
- if (!cfg80211_chandef_valid(&chandef))
- cfg80211_chandef_create(&chandef, sme->channel,
- NL80211_CHAN_HT20);
-
- memcpy(&mac->chandef, &chandef, sizeof(mac->chandef));
- }
-
- bss_cfg->ssid_len = sme->ssid_len;
- memcpy(&bss_cfg->ssid, sme->ssid, bss_cfg->ssid_len);
- bss_cfg->auth_type = sme->auth_type;
- bss_cfg->privacy = sme->privacy;
- bss_cfg->mfp = sme->mfp;
-
- if ((sme->bg_scan_period > 0) &&
- (sme->bg_scan_period <= QTNF_MAX_BG_SCAN_PERIOD))
- bss_cfg->bg_scan_period = sme->bg_scan_period;
- else if (sme->bg_scan_period == -1)
- bss_cfg->bg_scan_period = QTNF_DEFAULT_BG_SCAN_PERIOD;
- else
- bss_cfg->bg_scan_period = 0; /* disabled */
-
- bss_cfg->connect_flags = 0;
-
- if (sme->flags & ASSOC_REQ_DISABLE_HT)
- bss_cfg->connect_flags |= QLINK_STA_CONNECT_DISABLE_HT;
- if (sme->flags & ASSOC_REQ_DISABLE_VHT)
- bss_cfg->connect_flags |= QLINK_STA_CONNECT_DISABLE_VHT;
- if (sme->flags & ASSOC_REQ_USE_RRM)
- bss_cfg->connect_flags |= QLINK_STA_CONNECT_USE_RRM;
-
- memcpy(&bss_cfg->crypto, &sme->crypto, sizeof(bss_cfg->crypto));
if (sme->bssid)
- ether_addr_copy(bss_cfg->bssid, sme->bssid);
+ ether_addr_copy(vif->bssid, sme->bssid);
else
- eth_zero_addr(bss_cfg->bssid);
+ eth_zero_addr(vif->bssid);
ret = qtnf_cmd_send_connect(vif, sme);
if (ret) {
@@ -717,15 +632,15 @@ qtnf_dump_survey(struct wiphy *wiphy, struct net_device *dev,
int idx, struct survey_info *survey)
{
struct qtnf_wmac *mac = wiphy_priv(wiphy);
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
struct ieee80211_supported_band *sband;
- struct cfg80211_chan_def *chandef;
+ const struct cfg80211_chan_def *chandef = &wdev->chandef;
struct ieee80211_channel *chan;
struct qtnf_chan_stats stats;
struct qtnf_vif *vif;
int ret;
vif = qtnf_netdev_get_priv(dev);
- chandef = &mac->chandef;
sband = wiphy->bands[NL80211_BAND_2GHZ];
if (sband && idx >= sband->n_channels) {
@@ -792,46 +707,35 @@ static int
qtnf_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev,
struct cfg80211_chan_def *chandef)
{
- struct qtnf_wmac *mac = wiphy_priv(wiphy);
struct net_device *ndev = wdev->netdev;
struct qtnf_vif *vif;
+ int ret;
if (!ndev)
return -ENODEV;
vif = qtnf_netdev_get_priv(wdev->netdev);
- switch (vif->wdev.iftype) {
- case NL80211_IFTYPE_STATION:
- if (vif->sta_state == QTNF_STA_DISCONNECTED) {
- pr_warn("%s: STA disconnected\n", ndev->name);
- return -ENODATA;
- }
- break;
- case NL80211_IFTYPE_AP:
- if (!(vif->bss_status & QTNF_STATE_AP_START)) {
- pr_warn("%s: AP not started\n", ndev->name);
- return -ENODATA;
- }
- break;
- default:
- pr_err("unsupported vif type (%d)\n", vif->wdev.iftype);
- return -ENODATA;
+ ret = qtnf_cmd_get_channel(vif, chandef);
+ if (ret) {
+ pr_err("%s: failed to get channel: %d\n", ndev->name, ret);
+ goto out;
}
- if (!cfg80211_chandef_valid(&mac->chandef)) {
- pr_err("invalid channel settings on %s\n", ndev->name);
- return -ENODATA;
+ if (!cfg80211_chandef_valid(chandef)) {
+ pr_err("%s: bad chan freq1=%u freq2=%u bw=%u\n", ndev->name,
+ chandef->center_freq1, chandef->center_freq2,
+ chandef->width);
+ ret = -ENODATA;
}
- memcpy(chandef, &mac->chandef, sizeof(*chandef));
- return 0;
+out:
+ return ret;
}
static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_csa_settings *params)
{
- struct qtnf_wmac *mac = wiphy_priv(wiphy);
struct qtnf_vif *vif = qtnf_netdev_get_priv(dev);
int ret;
@@ -839,41 +743,12 @@ static int qtnf_channel_switch(struct wiphy *wiphy, struct net_device *dev,
params->chandef.chan->hw_value, params->count,
params->radar_required, params->block_tx);
- switch (vif->wdev.iftype) {
- case NL80211_IFTYPE_AP:
- if (!(vif->bss_status & QTNF_STATE_AP_START)) {
- pr_warn("AP not started on %s\n", dev->name);
- return -ENOTCONN;
- }
- break;
- default:
- pr_err("unsupported vif type (%d) on %s\n",
- vif->wdev.iftype, dev->name);
- return -EOPNOTSUPP;
- }
-
- if (vif->vifid != 0) {
- if (!(mac->status & QTNF_MAC_CSA_ACTIVE))
- return -EOPNOTSUPP;
-
- if (!cfg80211_chandef_identical(&params->chandef,
- &mac->csa_chandef))
- return -EINVAL;
-
- return 0;
- }
-
if (!cfg80211_chandef_valid(&params->chandef)) {
pr_err("%s: invalid channel\n", dev->name);
return -EINVAL;
}
- if (cfg80211_chandef_identical(&params->chandef, &mac->chandef)) {
- pr_err("%s: switch request to the same channel\n", dev->name);
- return -EALREADY;
- }
-
- ret = qtnf_cmd_send_chan_switch(mac, params);
+ ret = qtnf_cmd_send_chan_switch(vif, params);
if (ret)
pr_warn("%s: failed to switch to channel (%u)\n",
dev->name, params->chandef.chan->hw_value);
@@ -1119,7 +994,7 @@ void qtnf_virtual_intf_cleanup(struct net_device *ndev)
break;
case QTNF_STA_CONNECTING:
cfg80211_connect_result(vif->netdev,
- vif->bss_cfg.bssid, NULL, 0,
+ vif->bssid, NULL, 0,
NULL, 0,
WLAN_STATUS_UNSPECIFIED_FAILURE,
GFP_KERNEL);
@@ -1147,7 +1022,7 @@ void qtnf_cfg80211_vif_reset(struct qtnf_vif *vif)
switch (vif->sta_state) {
case QTNF_STA_CONNECTING:
cfg80211_connect_result(vif->netdev,
- vif->bss_cfg.bssid, NULL, 0,
+ vif->bssid, NULL, 0,
NULL, 0,
WLAN_STATUS_UNSPECIFIED_FAILURE,
GFP_KERNEL);
diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c
index 4206886b110c..babdc600c193 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/commands.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c
@@ -173,7 +173,6 @@ int qtnf_cmd_send_start_ap(struct qtnf_vif *vif)
goto out;
}
- vif->bss_status |= QTNF_STATE_AP_START;
netif_carrier_on(vif->netdev);
out:
@@ -181,62 +180,68 @@ out:
return ret;
}
-int qtnf_cmd_send_config_ap(struct qtnf_vif *vif)
+int qtnf_cmd_send_config_ap(struct qtnf_vif *vif,
+ const struct cfg80211_ap_settings *s)
{
struct sk_buff *cmd_skb;
- struct qtnf_bss_config *bss_cfg = &vif->bss_cfg;
- struct cfg80211_chan_def *chandef = &vif->mac->chandef;
- struct qlink_tlv_channel *qchan;
- struct qlink_auth_encr aen;
+ struct qlink_cmd_config_ap *cmd;
+ struct qlink_auth_encr *aen;
u16 res_code = QLINK_CMD_RESULT_OK;
int ret;
int i;
cmd_skb = qtnf_cmd_alloc_new_cmdskb(vif->mac->macid, vif->vifid,
QLINK_CMD_CONFIG_AP,
- sizeof(struct qlink_cmd));
+ sizeof(*cmd));
if (unlikely(!cmd_skb))
return -ENOMEM;
- qtnf_bus_lock(vif->mac->bus);
-
- qtnf_cmd_skb_put_tlv_arr(cmd_skb, WLAN_EID_SSID, bss_cfg->ssid,
- bss_cfg->ssid_len);
- qtnf_cmd_skb_put_tlv_u16(cmd_skb, QTN_TLV_ID_BCN_PERIOD,
- bss_cfg->bcn_period);
- qtnf_cmd_skb_put_tlv_u8(cmd_skb, QTN_TLV_ID_DTIM, bss_cfg->dtim);
-
- qchan = skb_put_zero(cmd_skb, sizeof(*qchan));
- qchan->hdr.type = cpu_to_le16(QTN_TLV_ID_CHANNEL);
- qchan->hdr.len = cpu_to_le16(sizeof(*qchan) -
- sizeof(struct qlink_tlv_hdr));
- qchan->hw_value = cpu_to_le16(
- ieee80211_frequency_to_channel(chandef->chan->center_freq));
-
- memset(&aen, 0, sizeof(aen));
- aen.auth_type = bss_cfg->auth_type;
- aen.privacy = !!bss_cfg->privacy;
- aen.mfp = bss_cfg->mfp;
- aen.wpa_versions = cpu_to_le32(bss_cfg->crypto.wpa_versions);
- aen.cipher_group = cpu_to_le32(bss_cfg->crypto.cipher_group);
- aen.n_ciphers_pairwise = cpu_to_le32(
- bss_cfg->crypto.n_ciphers_pairwise);
+ cmd = (struct qlink_cmd_config_ap *)cmd_skb->data;
+ cmd->dtim_period = s->dtim_period;
+ cmd->beacon_interval = cpu_to_le16(s->beacon_interval);
+ cmd->hidden_ssid = qlink_hidden_ssid_nl2q(s->hidden_ssid);
+ cmd->inactivity_timeout = cpu_to_le16(s->inactivity_timeout);
+ cmd->smps_mode = s->smps_mode;
+ cmd->p2p_ctwindow = s->p2p_ctwindow;
+ cmd->p2p_opp_ps = s->p2p_opp_ps;
+ cmd->pbss = s->pbss;
+ cmd->ht_required = s->ht_required;
+ cmd->vht_required = s->vht_required;
+
+ aen = &cmd->aen;
+ aen->auth_type = s->auth_type;
+ aen->privacy = !!s->privacy;
+ aen->mfp = 0;
+ aen->wpa_versions = cpu_to_le32(s->crypto.wpa_versions);
+ aen->cipher_group = cpu_to_le32(s->crypto.cipher_group);
+ aen->n_ciphers_pairwise = cpu_to_le32(s->crypto.n_ciphers_pairwise);
for (i = 0; i < QLINK_MAX_NR_CIPHER_SUITES; i++)
- aen.ciphers_pairwise[i] = cpu_to_le32(
- bss_cfg->crypto.ciphers_pairwise[i]);
- aen.n_akm_suites = cpu_to_le32(
- bss_cfg->crypto.n_akm_suites);
+ aen->ciphers_pairwise[i] =
+ cpu_to_le32(s->crypto.ciphers_pairwise[i]);
+ aen->n_akm_suites = cpu_to_le32(s->crypto.n_akm_suites);
for (i = 0; i < QLINK_MAX_NR_AKM_SUITES; i++)
- aen.akm_suites[i] = cpu_to_le32(
- bss_cfg->crypto.akm_suites[i]);
- aen.control_port = bss_cfg->crypto.control_port;
- aen.control_port_no_encrypt =
- bss_cfg->crypto.control_port_no_encrypt;
- aen.control_port_ethertype = cpu_to_le16(be16_to_cpu(
- bss_cfg->crypto.control_port_ethertype));
+ aen->akm_suites[i] = cpu_to_le32(s->crypto.akm_suites[i]);
+ aen->control_port = s->crypto.control_port;
+ aen->control_port_no_encrypt = s->crypto.control_port_no_encrypt;
+ aen->control_port_ethertype =
+ cpu_to_le16(be16_to_cpu(s->crypto.control_port_ethertype));
+
+ if (s->ssid && s->ssid_len > 0 && s->ssid_len <= IEEE80211_MAX_SSID_LEN)
+ qtnf_cmd_skb_put_tlv_arr(cmd_skb, WLAN_EID_SSID, s->ssid,
+ s->ssid_len);
- qtnf_cmd_skb_put_tlv_arr(cmd_skb, QTN_TLV_ID_CRYPTO, (u8 *)&aen,
- sizeof(aen));
+ if (cfg80211_chandef_valid(&s->chandef)) {
+ struct qlink_tlv_chandef *chtlv =
+ (struct qlink_tlv_chandef *)skb_put(cmd_skb,
+ sizeof(*chtlv));
+
+ chtlv->hdr.type = cpu_to_le16(QTN_TLV_ID_CHANDEF);
+ chtlv->hdr.len = cpu_to_le16(sizeof(*chtlv) -
+ sizeof(chtlv->hdr));
+ qlink_chandef_cfg2q(&s->chandef, &chtlv->chan);
+ }
+
+ qtnf_bus_lock(vif->mac->bus);
ret = qtnf_cmd_send(vif->mac->bus, cmd_skb, &res_code);
@@ -250,8 +255,6 @@ int qtnf_cmd_send_config_ap(struct qtnf_vif *vif)
goto out;
}
- vif->bss_status |= QTNF_STATE_AP_CONFIG;
-
out:
qtnf_bus_unlock(vif->mac->bus);
return ret;
@@ -283,9 +286,6 @@ int qtnf_cmd_send_stop_ap(struct qtnf_vif *vif)
goto out;
}
- vif->bss_status &= ~QTNF_STATE_AP_START;
- vif->bss_status &= ~QTNF_STATE_AP_CONFIG;
-
netif_carrier_off(vif->netdev);
out:
@@ -2037,11 +2037,11 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif,
{
struct sk_buff *cmd_skb;
struct qlink_cmd_connect *cmd;
- struct qtnf_bss_config *bss_cfg = &vif->bss_cfg;
- struct qlink_auth_encr aen;
+ struct qlink_auth_encr *aen;
u16 res_code = QLINK_CMD_RESULT_OK;
int ret;
int i;
+ u32 connect_flags = 0;
cmd_skb = qtnf_cmd_alloc_new_cmdskb(vif->mac->macid, vif->vifid,
QLINK_CMD_CONNECT,
@@ -2049,52 +2049,65 @@ int qtnf_cmd_send_connect(struct qtnf_vif *vif,
if (unlikely(!cmd_skb))
return -ENOMEM;
- qtnf_bus_lock(vif->mac->bus);
-
cmd = (struct qlink_cmd_connect *)cmd_skb->data;
- ether_addr_copy(cmd->bssid, bss_cfg->bssid);
+ ether_addr_copy(cmd->bssid, vif->bssid);
+
+ if (sme->channel)
+ cmd->channel = cpu_to_le16(sme->channel->hw_value);
+ else
+ cmd->channel = 0;
+
+ if ((sme->bg_scan_period > 0) &&
+ (sme->bg_scan_period <= QTNF_MAX_BG_SCAN_PERIOD))
+ cmd->bg_scan_period = cpu_to_le16(sme->bg_scan_period);
+ else if (sme->bg_scan_period == -1)
+ cmd->bg_scan_period = cpu_to_le16(QTNF_DEFAULT_BG_SCAN_PERIOD);
+ else
+ cmd->bg_scan_period = 0; /* disabled */
- if (vif->mac->chandef.chan)
- cmd->channel = cpu_to_le16(vif->mac->chandef.chan->hw_value);
+ if (sme->flags & ASSOC_REQ_DISABLE_HT)
+ connect_flags |= QLINK_STA_CONNECT_DISABLE_HT;
+ if (sme->flags & ASSOC_REQ_DISABLE_VHT)
+ connect_flags |= QLINK_STA_CONNECT_DISABLE_VHT;
+ if (sme->flags & ASSOC_REQ_USE_RRM)
+ connect_flags |= QLINK_STA_CONNECT_USE_RRM;
- cmd->bg_scan_period = cpu_to_le16(bss_cfg->bg_scan_period);
+ cmd->flags = cpu_to_le32(connect_flags);
- memset(&aen, 0, sizeof(aen));
- aen.auth_type = bss_cfg->auth_type;
- aen.privacy = !!bss_cfg->privacy;
- aen.mfp = bss_cfg->mfp;
- aen.wpa_versions = cpu_to_le32(bss_cfg->crypto.wpa_versions);
- aen.cipher_group = cpu_to_le32(bss_cfg->crypto.cipher_group);
- aen.n_ciphers_pairwise = cpu_to_le32(
- bss_cfg->crypto.n_ciphers_pairwise);
+ aen = &cmd->aen;
+ aen->auth_type = sme->auth_type;
+ aen->privacy = !!sme->privacy;
+ aen->mfp = sme->mfp;
+ aen->wpa_versions = cpu_to_le32(sme->crypto.wpa_versions);
+ aen->cipher_group = cpu_to_le32(sme->crypto.cipher_group);
+ aen->n_ciphers_pairwise = cpu_to_le32(sme->crypto.n_ciphers_pairwise);
for (i = 0; i < QLINK_MAX_NR_CIPHER_SUITES; i++)
- aen.ciphers_pairwise[i] = cpu_to_le32(
- bss_cfg->crypto.ciphers_pairwise[i]);
+ aen->ciphers_pairwise[i] =
+ cpu_to_le32(sme->crypto.ciphers_pairwise[i]);
- aen.n_akm_suites = cpu_to_le32(bss_cfg->crypto.n_akm_suites);
+ aen->n_akm_suites = cpu_to_le32(sme->crypto.n_akm_suites);
for (i = 0; i < QLINK_MAX_NR_AKM_SUITES; i++)
- aen.akm_suites[i] = cpu_to_le32(
- bss_cfg->crypto.akm_suites[i]);
+ aen->akm_suites[i] = cpu_to_le32(sme->crypto.akm_suites[i]);
- aen.control_port = bss_cfg->crypto.control_port;
- aen.control_port_no_encrypt =
- bss_cfg->crypto.control_port_no_encrypt;
- aen.control_port_ethertype = cpu_to_le16(be16_to_cpu(
- bss_cfg->crypto.control_port_ethertype));
+ aen->control_port = sme->crypto.control_port;
+ aen->control_port_no_encrypt =
+ sme->crypto.control_port_no_encrypt;
+ aen->control_port_ethertype =
+ cpu_to_le16(be16_to_cpu(sme->crypto.control_port_ethertype));
- qtnf_cmd_skb_put_tlv_arr(cmd_skb, WLAN_EID_SSID, bss_cfg->ssid,
- bss_cfg->ssid_len);
- qtnf_cmd_skb_put_tlv_arr(cmd_skb, QTN_TLV_ID_CRYPTO, (u8 *)&aen,
- sizeof(aen));
+ qtnf_cmd_skb_put_tlv_arr(cmd_skb, WLAN_EID_SSID, sme->ssid,
+ sme->ssid_len);
if (sme->ie_len != 0)
qtnf_cmd_skb_put_tlv_arr(cmd_skb, QTN_TLV_ID_IE_SET,
sme->ie,
sme->ie_len);
+ qtnf_bus_lock(vif->mac->bus);
+
ret = qtnf_cmd_send(vif->mac->bus, cmd_skb, &res_code);
if (unlikely(ret))
@@ -2304,15 +2317,16 @@ out:
return ret;
}
-int qtnf_cmd_send_chan_switch(struct qtnf_wmac *mac,
+int qtnf_cmd_send_chan_switch(struct qtnf_vif *vif,
struct cfg80211_csa_settings *params)
{
+ struct qtnf_wmac *mac = vif->mac;
struct qlink_cmd_chan_switch *cmd;
struct sk_buff *cmd_skb;
u16 res_code = QLINK_CMD_RESULT_OK;
int ret;
- cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, 0x0,
+ cmd_skb = qtnf_cmd_alloc_new_cmdskb(mac->macid, vif->vifid,
QLINK_CMD_CHAN_SWITCH,
sizeof(*cmd));
@@ -2334,9 +2348,6 @@ int qtnf_cmd_send_chan_switch(struct qtnf_wmac *mac,
switch (res_code) {
case QLINK_CMD_RESULT_OK:
- memcpy(&mac->csa_chandef, &params->chandef,
- sizeof(mac->csa_chandef));
- mac->status |= QTNF_MAC_CSA_ACTIVE;
ret = 0;
break;
case QLINK_CMD_RESULT_ENOTFOUND:
@@ -2358,3 +2369,41 @@ out:
qtnf_bus_unlock(mac->bus);
return ret;
}
+
+int qtnf_cmd_get_channel(struct qtnf_vif *vif, struct cfg80211_chan_def *chdef)
+{
+ struct qtnf_bus *bus = vif->mac->bus;
+ const struct qlink_resp_channel_get *resp;
+ struct sk_buff *cmd_skb;
+ struct sk_buff *resp_skb = NULL;
+ u16 res_code = QLINK_CMD_RESULT_OK;
+ int ret;
+
+ cmd_skb = qtnf_cmd_alloc_new_cmdskb(vif->mac->macid, vif->vifid,
+ QLINK_CMD_CHAN_GET,
+ sizeof(struct qlink_cmd));
+ if (unlikely(!cmd_skb))
+ return -ENOMEM;
+
+ qtnf_bus_lock(bus);
+
+ ret = qtnf_cmd_send_with_reply(bus, cmd_skb, &resp_skb, &res_code,
+ sizeof(*resp), NULL);
+
+ qtnf_bus_unlock(bus);
+
+ if (unlikely(ret))
+ goto out;
+
+ if (unlikely(res_code != QLINK_CMD_RESULT_OK)) {
+ ret = -ENODATA;
+ goto out;
+ }
+
+ resp = (const struct qlink_resp_channel_get *)resp_skb->data;
+ qlink_chandef_q2cfg(priv_to_wiphy(vif->mac), &resp->chan, chdef);
+
+out:
+ consume_skb(resp_skb);
+ return ret;
+}
diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.h b/drivers/net/wireless/quantenna/qtnfmac/commands.h
index 783b20364296..e87c4a484dd4 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/commands.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/commands.h
@@ -33,7 +33,8 @@ int qtnf_cmd_send_del_intf(struct qtnf_vif *vif);
int qtnf_cmd_get_mac_chan_info(struct qtnf_wmac *mac,
struct ieee80211_supported_band *band);
int qtnf_cmd_send_regulatory_config(struct qtnf_wmac *mac, const char *alpha2);
-int qtnf_cmd_send_config_ap(struct qtnf_vif *vif);
+int qtnf_cmd_send_config_ap(struct qtnf_vif *vif,
+ const struct cfg80211_ap_settings *s);
int qtnf_cmd_send_start_ap(struct qtnf_vif *vif);
int qtnf_cmd_send_stop_ap(struct qtnf_vif *vif);
int qtnf_cmd_send_register_mgmt(struct qtnf_vif *vif, u16 frame_type, bool reg);
@@ -73,7 +74,8 @@ int qtnf_cmd_send_updown_intf(struct qtnf_vif *vif,
int qtnf_cmd_reg_notify(struct qtnf_bus *bus, struct regulatory_request *req);
int qtnf_cmd_get_chan_stats(struct qtnf_wmac *mac, u16 channel,
struct qtnf_chan_stats *stats);
-int qtnf_cmd_send_chan_switch(struct qtnf_wmac *mac,
+int qtnf_cmd_send_chan_switch(struct qtnf_vif *vif,
struct cfg80211_csa_settings *params);
+int qtnf_cmd_get_channel(struct qtnf_vif *vif, struct cfg80211_chan_def *chdef);
#endif /* QLINK_COMMANDS_H_ */
diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h
index 066fcd1095a0..44a2cbb19310 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/core.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/core.h
@@ -52,27 +52,11 @@
#define QTNF_DEF_WDOG_TIMEOUT 5
#define QTNF_TX_TIMEOUT_TRSHLD 100
-#define QTNF_STATE_AP_CONFIG BIT(2)
-#define QTNF_STATE_AP_START BIT(1)
-
extern const struct net_device_ops qtnf_netdev_ops;
+
struct qtnf_bus;
struct qtnf_vif;
-struct qtnf_bss_config {
- u8 ssid[IEEE80211_MAX_SSID_LEN];
- u8 bssid[ETH_ALEN];
- size_t ssid_len;
- u8 dtim;
- u16 bcn_period;
- u16 auth_type;
- bool privacy;
- enum nl80211_mfp mfp;
- struct cfg80211_crypto_settings crypto;
- u16 bg_scan_period;
- u32 connect_flags;
-};
-
struct qtnf_sta_node {
struct list_head list;
u8 mac_addr[ETH_ALEN];
@@ -89,12 +73,10 @@ enum qtnf_sta_state {
QTNF_STA_CONNECTED
};
-enum qtnf_mac_status {
- QTNF_MAC_CSA_ACTIVE = BIT(0)
-};
-
struct qtnf_vif {
struct wireless_dev wdev;
+ u8 bssid[ETH_ALEN];
+ u8 mac_addr[ETH_ALEN];
u8 vifid;
u8 bss_priority;
u8 bss_status;
@@ -102,9 +84,8 @@ struct qtnf_vif {
u16 mgmt_frames_bitmask;
struct net_device *netdev;
struct qtnf_wmac *mac;
- u8 mac_addr[ETH_ALEN];
+
struct work_struct reset_work;
- struct qtnf_bss_config bss_cfg;
struct qtnf_sta_list sta_list;
unsigned long cons_tx_timeout_cnt;
};
@@ -141,13 +122,10 @@ struct qtnf_wmac {
u8 macid;
u8 wiphy_registered;
u8 macaddr[ETH_ALEN];
- u32 status;
struct qtnf_bus *bus;
struct qtnf_mac_info macinfo;
struct qtnf_vif iflist[QTNF_MAX_INTF];
struct cfg80211_scan_request *scan_req;
- struct cfg80211_chan_def chandef;
- struct cfg80211_chan_def csa_chandef;
struct mutex mac_lock; /* lock during wmac speicific ops */
struct timer_list scan_timeout;
};
diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c
index 43d2e7fd6e02..4b29f9fb9c3c 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/event.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/event.c
@@ -25,6 +25,7 @@
#include "trans.h"
#include "util.h"
#include "event.h"
+#include "qlink_util.h"
static int
qtnf_event_handle_sta_assoc(struct qtnf_wmac *mac, struct qtnf_vif *vif,
@@ -52,12 +53,6 @@ qtnf_event_handle_sta_assoc(struct qtnf_wmac *mac, struct qtnf_vif *vif,
return -EPROTO;
}
- if (!(vif->bss_status & QTNF_STATE_AP_START)) {
- pr_err("VIF%u.%u: STA_ASSOC event when AP is not started\n",
- mac->macid, vif->vifid);
- return -EPROTO;
- }
-
sta_addr = sta_assoc->sta_addr;
frame_control = le16_to_cpu(sta_assoc->frame_control);
@@ -126,12 +121,6 @@ qtnf_event_handle_sta_deauth(struct qtnf_wmac *mac, struct qtnf_vif *vif,
return -EPROTO;
}
- if (!(vif->bss_status & QTNF_STATE_AP_START)) {
- pr_err("VIF%u.%u: STA_DEAUTH event when AP is not started\n",
- mac->macid, vif->vifid);
- return -EPROTO;
- }
-
sta_addr = sta_deauth->sta_addr;
reason = le16_to_cpu(sta_deauth->reason);
@@ -357,40 +346,29 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac,
{
struct wiphy *wiphy = priv_to_wiphy(mac);
struct cfg80211_chan_def chandef;
- struct ieee80211_channel *chan;
struct qtnf_vif *vif;
- int freq;
int i;
if (len < sizeof(*data)) {
- pr_err("payload is too short\n");
+ pr_err("MAC%u: payload is too short\n", mac->macid);
return -EINVAL;
}
- freq = le32_to_cpu(data->freq);
- chan = ieee80211_get_channel(wiphy, freq);
- if (!chan) {
- pr_err("channel at %d MHz not found\n", freq);
- return -EINVAL;
- }
+ if (!wiphy->registered)
+ return 0;
- pr_debug("MAC%d switch to new channel %u MHz\n", mac->macid, freq);
+ qlink_chandef_q2cfg(wiphy, &data->chan, &chandef);
- if (mac->status & QTNF_MAC_CSA_ACTIVE) {
- mac->status &= ~QTNF_MAC_CSA_ACTIVE;
- if (chan->hw_value != mac->csa_chandef.chan->hw_value)
- pr_warn("unexpected switch to %u during CSA to %u\n",
- chan->hw_value,
- mac->csa_chandef.chan->hw_value);
+ if (!cfg80211_chandef_valid(&chandef)) {
+ pr_err("MAC%u: bad channel f1=%u f2=%u bw=%u\n", mac->macid,
+ chandef.center_freq1, chandef.center_freq2,
+ chandef.width);
+ return -EINVAL;
}
- /* FIXME: need to figure out proper nl80211_channel_type value */
- cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20);
- /* fall-back to minimal safe chandef description */
- if (!cfg80211_chandef_valid(&chandef))
- cfg80211_chandef_create(&chandef, chan, NL80211_CHAN_HT20);
-
- memcpy(&mac->chandef, &chandef, sizeof(mac->chandef));
+ pr_debug("MAC%d: new channel ieee=%u freq1=%u freq2=%u bw=%u\n",
+ mac->macid, chandef.chan->hw_value, chandef.center_freq1,
+ chandef.center_freq2, chandef.width);
for (i = 0; i < QTNF_MAX_INTF; i++) {
vif = &mac->iflist[i];
diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h
index a8242f678496..7b313d38c30b 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h
@@ -108,16 +108,48 @@ enum qlink_sta_flags {
};
enum qlink_channel_width {
- QLINK_CHAN_WIDTH_5 = BIT(0),
- QLINK_CHAN_WIDTH_10 = BIT(1),
- QLINK_CHAN_WIDTH_20_NOHT = BIT(2),
- QLINK_CHAN_WIDTH_20 = BIT(3),
- QLINK_CHAN_WIDTH_40 = BIT(4),
- QLINK_CHAN_WIDTH_80 = BIT(5),
- QLINK_CHAN_WIDTH_80P80 = BIT(6),
- QLINK_CHAN_WIDTH_160 = BIT(7),
+ QLINK_CHAN_WIDTH_5 = 0,
+ QLINK_CHAN_WIDTH_10,
+ QLINK_CHAN_WIDTH_20_NOHT,
+ QLINK_CHAN_WIDTH_20,
+ QLINK_CHAN_WIDTH_40,
+ QLINK_CHAN_WIDTH_80,
+ QLINK_CHAN_WIDTH_80P80,
+ QLINK_CHAN_WIDTH_160,
};
+/**
+ * struct qlink_chandef - qlink channel definition
+ *
+ * @center_freq1: center frequency of first segment
+ * @center_freq2: center frequency of second segment (80+80 only)
+ * @width: channel width, one of @enum qlink_channel_width
+ */
+struct qlink_chandef {
+ __le16 center_freq1;
+ __le16 center_freq2;
+ u8 width;
+ u8 rsvd[3];
+} __packed;
+
+#define QLINK_MAX_NR_CIPHER_SUITES 5
+#define QLINK_MAX_NR_AKM_SUITES 2
+
+struct qlink_auth_encr {
+ __le32 wpa_versions;
+ __le32 cipher_group;
+ __le32 n_ciphers_pairwise;
+ __le32 ciphers_pairwise[QLINK_MAX_NR_CIPHER_SUITES];
+ __le32 n_akm_suites;
+ __le32 akm_suites[QLINK_MAX_NR_AKM_SUITES];
+ __le16 control_port_ethertype;
+ u8 auth_type;
+ u8 privacy;
+ u8 mfp;
+ u8 control_port;
+ u8 control_port_no_encrypt;
+} __packed;
+
/* QLINK Command messages related definitions
*/
@@ -155,6 +187,7 @@ enum qlink_cmd_type {
QLINK_CMD_REG_NOTIFY = 0x0019,
QLINK_CMD_CHANS_INFO_GET = 0x001A,
QLINK_CMD_CHAN_SWITCH = 0x001B,
+ QLINK_CMD_CHAN_GET = 0x001C,
QLINK_CMD_CONFIG_AP = 0x0020,
QLINK_CMD_START_AP = 0x0021,
QLINK_CMD_STOP_AP = 0x0022,
@@ -384,8 +417,9 @@ enum qlink_sta_connect_flags {
* struct qlink_cmd_connect - data for QLINK_CMD_CONNECT command
*
* @flags: for future use.
- * @freq: center frequence of a channel which should be used to connect.
+ * @channel: channel which should be used to connect.
* @bg_scan_period: period of background scan.
+ * @aen: authentication information.
* @bssid: BSSID of the BSS to connect to.
* @payload: variable portion of connection request.
*/
@@ -394,6 +428,7 @@ struct qlink_cmd_connect {
__le32 flags;
__le16 channel;
__le16 bg_scan_period;
+ struct qlink_auth_encr aen;
u8 bssid[ETH_ALEN];
u8 payload[0];
} __packed;
@@ -506,6 +541,46 @@ struct qlink_cmd_chan_switch {
u8 beacon_count;
} __packed;
+/**
+ * enum qlink_hidden_ssid - values for %NL80211_ATTR_HIDDEN_SSID
+ *
+ * Refer to &enum nl80211_hidden_ssid
+ */
+enum qlink_hidden_ssid {
+ QLINK_HIDDEN_SSID_NOT_IN_USE,
+ QLINK_HIDDEN_SSID_ZERO_LEN,
+ QLINK_HIDDEN_SSID_ZERO_CONTENTS
+};
+
+/**
+ * struct qlink_cmd_config_ap - data for QLINK_CMD_CONFIG_AP command
+ *
+ * @beacon_interval: beacon interval
+ * @inactivity_timeout: station's inactivity period in seconds
+ * @dtim_period: DTIM period
+ * @hidden_ssid: whether to hide the SSID, one of &enum qlink_hidden_ssid
+ * @smps_mode: SMPS mode
+ * @ht_required: stations must support HT
+ * @vht_required: stations must support VHT
+ * @aen: encryption info
+ * @info: variable configurations
+ */
+struct qlink_cmd_config_ap {
+ struct qlink_cmd chdr;
+ __le16 beacon_interval;
+ __le16 inactivity_timeout;
+ u8 dtim_period;
+ u8 hidden_ssid;
+ u8 smps_mode;
+ u8 p2p_ctwindow;
+ u8 p2p_opp_ps;
+ u8 pbss;
+ u8 ht_required;
+ u8 vht_required;
+ struct qlink_auth_encr aen;
+ u8 info[0];
+} __packed;
+
/* QLINK Command Responses messages related definitions
*/
@@ -680,6 +755,16 @@ struct qlink_resp_get_chan_stats {
u8 info[0];
} __packed;
+/**
+ * struct qlink_resp_channel_get - response for QLINK_CMD_CHAN_GET command
+ *
+ * @chan: definition of current operating channel.
+ */
+struct qlink_resp_channel_get {
+ struct qlink_resp rhdr;
+ struct qlink_chandef chan;
+} __packed;
+
/* QLINK Events messages related definitions
*/
@@ -764,11 +849,11 @@ struct qlink_event_bss_leave {
/**
* struct qlink_event_freq_change - data for QLINK_EVENT_FREQ_CHANGE event
*
- * @freq: new operating frequency in MHz
+ * @chan: new operating channel definition
*/
struct qlink_event_freq_change {
struct qlink_event ehdr;
- __le32 freq;
+ struct qlink_chandef chan;
} __packed;
enum qlink_rxmgmt_flags {
@@ -856,10 +941,9 @@ enum qlink_tlv_id {
QTN_TLV_ID_RTS_THRESH = 0x0202,
QTN_TLV_ID_SRETRY_LIMIT = 0x0203,
QTN_TLV_ID_LRETRY_LIMIT = 0x0204,
- QTN_TLV_ID_BCN_PERIOD = 0x0205,
- QTN_TLV_ID_DTIM = 0x0206,
QTN_TLV_ID_REG_RULE = 0x0207,
QTN_TLV_ID_CHANNEL = 0x020F,
+ QTN_TLV_ID_CHANDEF = 0x0210,
QTN_TLV_ID_COVERAGE_CLASS = 0x0213,
QTN_TLV_ID_IFACE_LIMIT = 0x0214,
QTN_TLV_ID_NUM_IFACE_COMB = 0x0215,
@@ -868,7 +952,6 @@ enum qlink_tlv_id {
QTN_TLV_ID_STA_GENERIC_INFO = 0x0301,
QTN_TLV_ID_KEY = 0x0302,
QTN_TLV_ID_SEQ = 0x0303,
- QTN_TLV_ID_CRYPTO = 0x0304,
QTN_TLV_ID_IE_SET = 0x0305,
};
@@ -1047,22 +1130,16 @@ struct qlink_tlv_channel {
u8 rsvd[2];
} __packed;
-#define QLINK_MAX_NR_CIPHER_SUITES 5
-#define QLINK_MAX_NR_AKM_SUITES 2
-
-struct qlink_auth_encr {
- __le32 wpa_versions;
- __le32 cipher_group;
- __le32 n_ciphers_pairwise;
- __le32 ciphers_pairwise[QLINK_MAX_NR_CIPHER_SUITES];
- __le32 n_akm_suites;
- __le32 akm_suites[QLINK_MAX_NR_AKM_SUITES];
- __le16 control_port_ethertype;
- u8 auth_type;
- u8 privacy;
- u8 mfp;
- u8 control_port;
- u8 control_port_no_encrypt;
+/**
+ * struct qlink_tlv_chandef - data for QTN_TLV_ID_CHANDEF TLV
+ *
+ * Channel definition.
+ *
+ * @chan: channel definition data.
+ */
+struct qlink_tlv_chandef {
+ struct qlink_tlv_hdr hdr;
+ struct qlink_chandef chan;
} __packed;
struct qlink_chan_stats {
diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c
index cf024c995fd6..61d999affb09 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.c
@@ -49,29 +49,126 @@ u8 qlink_chan_width_mask_to_nl(u16 qlink_mask)
{
u8 result = 0;
- if (qlink_mask & QLINK_CHAN_WIDTH_5)
+ if (qlink_mask & BIT(QLINK_CHAN_WIDTH_5))
result |= BIT(NL80211_CHAN_WIDTH_5);
- if (qlink_mask & QLINK_CHAN_WIDTH_10)
+ if (qlink_mask & BIT(QLINK_CHAN_WIDTH_10))
result |= BIT(NL80211_CHAN_WIDTH_10);
- if (qlink_mask & QLINK_CHAN_WIDTH_20_NOHT)
+ if (qlink_mask & BIT(QLINK_CHAN_WIDTH_20_NOHT))
result |= BIT(NL80211_CHAN_WIDTH_20_NOHT);
- if (qlink_mask & QLINK_CHAN_WIDTH_20)
+ if (qlink_mask & BIT(QLINK_CHAN_WIDTH_20))
result |= BIT(NL80211_CHAN_WIDTH_20);
- if (qlink_mask & QLINK_CHAN_WIDTH_40)
+ if (qlink_mask & BIT(QLINK_CHAN_WIDTH_40))
result |= BIT(NL80211_CHAN_WIDTH_40);
- if (qlink_mask & QLINK_CHAN_WIDTH_80)
+ if (qlink_mask & BIT(QLINK_CHAN_WIDTH_80))
result |= BIT(NL80211_CHAN_WIDTH_80);
- if (qlink_mask & QLINK_CHAN_WIDTH_80P80)
+ if (qlink_mask & BIT(QLINK_CHAN_WIDTH_80P80))
result |= BIT(NL80211_CHAN_WIDTH_80P80);
- if (qlink_mask & QLINK_CHAN_WIDTH_160)
+ if (qlink_mask & BIT(QLINK_CHAN_WIDTH_160))
result |= BIT(NL80211_CHAN_WIDTH_160);
return result;
}
+
+static enum nl80211_chan_width qlink_chanwidth_to_nl(u8 qlw)
+{
+ switch (qlw) {
+ case QLINK_CHAN_WIDTH_20_NOHT:
+ return NL80211_CHAN_WIDTH_20_NOHT;
+ case QLINK_CHAN_WIDTH_20:
+ return NL80211_CHAN_WIDTH_20;
+ case QLINK_CHAN_WIDTH_40:
+ return NL80211_CHAN_WIDTH_40;
+ case QLINK_CHAN_WIDTH_80:
+ return NL80211_CHAN_WIDTH_80;
+ case QLINK_CHAN_WIDTH_80P80:
+ return NL80211_CHAN_WIDTH_80P80;
+ case QLINK_CHAN_WIDTH_160:
+ return NL80211_CHAN_WIDTH_160;
+ case QLINK_CHAN_WIDTH_5:
+ return NL80211_CHAN_WIDTH_5;
+ case QLINK_CHAN_WIDTH_10:
+ return NL80211_CHAN_WIDTH_10;
+ default:
+ return -1;
+ }
+}
+
+void qlink_chandef_q2cfg(struct wiphy *wiphy,
+ const struct qlink_chandef *qch,
+ struct cfg80211_chan_def *chdef)
+{
+ chdef->center_freq1 = le16_to_cpu(qch->center_freq1);
+ chdef->center_freq2 = le16_to_cpu(qch->center_freq2);
+ chdef->width = qlink_chanwidth_to_nl(qch->width);
+
+ switch (chdef->width) {
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ case NL80211_CHAN_WIDTH_20:
+ case NL80211_CHAN_WIDTH_5:
+ case NL80211_CHAN_WIDTH_10:
+ chdef->chan = ieee80211_get_channel(wiphy, chdef->center_freq1);
+ break;
+ case NL80211_CHAN_WIDTH_40:
+ case NL80211_CHAN_WIDTH_80:
+ case NL80211_CHAN_WIDTH_80P80:
+ case NL80211_CHAN_WIDTH_160:
+ chdef->chan = ieee80211_get_channel(wiphy,
+ chdef->center_freq1 - 10);
+ break;
+ default:
+ chdef->chan = NULL;
+ break;
+ }
+}
+
+static u8 qlink_chanwidth_nl_to_qlink(enum nl80211_chan_width nlwidth)
+{
+ switch (nlwidth) {
+ case NL80211_CHAN_WIDTH_20_NOHT:
+ return QLINK_CHAN_WIDTH_20_NOHT;
+ case NL80211_CHAN_WIDTH_20:
+ return QLINK_CHAN_WIDTH_20;
+ case NL80211_CHAN_WIDTH_40:
+ return QLINK_CHAN_WIDTH_40;
+ case NL80211_CHAN_WIDTH_80:
+ return QLINK_CHAN_WIDTH_80;
+ case NL80211_CHAN_WIDTH_80P80:
+ return QLINK_CHAN_WIDTH_80P80;
+ case NL80211_CHAN_WIDTH_160:
+ return QLINK_CHAN_WIDTH_160;
+ case NL80211_CHAN_WIDTH_5:
+ return QLINK_CHAN_WIDTH_5;
+ case NL80211_CHAN_WIDTH_10:
+ return QLINK_CHAN_WIDTH_10;
+ default:
+ return -1;
+ }
+}
+
+void qlink_chandef_cfg2q(const struct cfg80211_chan_def *chdef,
+ struct qlink_chandef *qch)
+{
+ qch->center_freq1 = cpu_to_le16(chdef->center_freq1);
+ qch->center_freq2 = cpu_to_le16(chdef->center_freq2);
+ qch->width = qlink_chanwidth_nl_to_qlink(chdef->width);
+}
+
+enum qlink_hidden_ssid qlink_hidden_ssid_nl2q(enum nl80211_hidden_ssid nl_val)
+{
+ switch (nl_val) {
+ case NL80211_HIDDEN_SSID_ZERO_LEN:
+ return QLINK_HIDDEN_SSID_ZERO_LEN;
+ case NL80211_HIDDEN_SSID_ZERO_CONTENTS:
+ return QLINK_HIDDEN_SSID_ZERO_CONTENTS;
+ case NL80211_HIDDEN_SSID_NOT_IN_USE:
+ default:
+ return QLINK_HIDDEN_SSID_NOT_IN_USE;
+ }
+}
diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h
index de06c1e20b5b..260383d6d109 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/qlink_util.h
@@ -19,6 +19,7 @@
#include <linux/types.h>
#include <linux/skbuff.h>
+#include <net/cfg80211.h>
#include "qlink.h"
@@ -62,5 +63,11 @@ static inline void qtnf_cmd_skb_put_tlv_u16(struct sk_buff *skb,
u16 qlink_iface_type_to_nl_mask(u16 qlink_type);
u8 qlink_chan_width_mask_to_nl(u16 qlink_mask);
+void qlink_chandef_q2cfg(struct wiphy *wiphy,
+ const struct qlink_chandef *qch,
+ struct cfg80211_chan_def *chdef);
+void qlink_chandef_cfg2q(const struct cfg80211_chan_def *chdef,
+ struct qlink_chandef *qch);
+enum qlink_hidden_ssid qlink_hidden_ssid_nl2q(enum nl80211_hidden_ssid nl_val);
#endif /* _QTN_FMAC_QLINK_UTIL_H_ */
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 170cd504e8ff..d8afcdfca1ed 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -92,7 +92,7 @@ static const struct iw_handler_def ray_handler_def;
/***** Prototypes for raylink functions **************************************/
static void authenticate(ray_dev_t *local);
static int build_auth_frame(ray_dev_t *local, UCHAR *dest, int auth_type);
-static void authenticate_timeout(u_long);
+static void authenticate_timeout(struct timer_list *t);
static int get_free_ccs(ray_dev_t *local);
static int get_free_tx_ccs(ray_dev_t *local);
static void init_startup_params(ray_dev_t *local);
@@ -102,7 +102,7 @@ static int ray_init(struct net_device *dev);
static int interrupt_ecf(ray_dev_t *local, int ccs);
static void ray_reset(struct net_device *dev);
static void ray_update_parm(struct net_device *dev, UCHAR objid, UCHAR *value, int len);
-static void verify_dl_startup(u_long);
+static void verify_dl_startup(struct timer_list *t);
/* Prototypes for interrpt time functions **********************************/
static irqreturn_t ray_interrupt(int reg, void *dev_id);
@@ -120,9 +120,8 @@ static void associate(ray_dev_t *local);
/* Card command functions */
static int dl_startup_params(struct net_device *dev);
-static void join_net(u_long local);
-static void start_net(u_long local);
-/* void start_net(ray_dev_t *local); */
+static void join_net(struct timer_list *t);
+static void start_net(struct timer_list *t);
/*===========================================================================*/
/* Parameters that can be set with 'insmod' */
@@ -323,7 +322,7 @@ static int ray_probe(struct pcmcia_device *p_dev)
dev_dbg(&p_dev->dev, "ray_cs ray_attach calling ether_setup.)\n");
netif_stop_queue(dev);
- init_timer(&local->timer);
+ timer_setup(&local->timer, NULL, 0);
this_device = p_dev;
return ray_config(p_dev);
@@ -570,8 +569,7 @@ static int dl_startup_params(struct net_device *dev)
local->card_status = CARD_DL_PARAM;
/* Start kernel timer to wait for dl startup to complete. */
local->timer.expires = jiffies + HZ / 2;
- local->timer.data = (long)local;
- local->timer.function = verify_dl_startup;
+ local->timer.function = (TIMER_FUNC_TYPE)verify_dl_startup;
add_timer(&local->timer);
dev_dbg(&link->dev,
"ray_cs dl_startup_params started timer for verify_dl_startup\n");
@@ -641,9 +639,9 @@ static void init_startup_params(ray_dev_t *local)
} /* init_startup_params */
/*===========================================================================*/
-static void verify_dl_startup(u_long data)
+static void verify_dl_startup(struct timer_list *t)
{
- ray_dev_t *local = (ray_dev_t *) data;
+ ray_dev_t *local = from_timer(local, t, timer);
struct ccs __iomem *pccs = ccs_base(local) + local->dl_param_ccs;
UCHAR status;
struct pcmcia_device *link = local->finder;
@@ -676,16 +674,16 @@ static void verify_dl_startup(u_long data)
return;
}
if (local->sparm.b4.a_network_type == ADHOC)
- start_net((u_long) local);
+ start_net(&local->timer);
else
- join_net((u_long) local);
+ join_net(&local->timer);
} /* end verify_dl_startup */
/*===========================================================================*/
/* Command card to start a network */
-static void start_net(u_long data)
+static void start_net(struct timer_list *t)
{
- ray_dev_t *local = (ray_dev_t *) data;
+ ray_dev_t *local = from_timer(local, t, timer);
struct ccs __iomem *pccs;
int ccsindex;
struct pcmcia_device *link = local->finder;
@@ -710,9 +708,9 @@ static void start_net(u_long data)
/*===========================================================================*/
/* Command card to join a network */
-static void join_net(u_long data)
+static void join_net(struct timer_list *t)
{
- ray_dev_t *local = (ray_dev_t *) data;
+ ray_dev_t *local = from_timer(local, t, timer);
struct ccs __iomem *pccs;
int ccsindex;
@@ -1639,13 +1637,13 @@ static int get_free_ccs(ray_dev_t *local)
} /* get_free_ccs */
/*===========================================================================*/
-static void authenticate_timeout(u_long data)
+static void authenticate_timeout(struct timer_list *t)
{
- ray_dev_t *local = (ray_dev_t *) data;
+ ray_dev_t *local = from_timer(local, t, timer);
del_timer(&local->timer);
printk(KERN_INFO "ray_cs Authentication with access point failed"
" - timeout\n");
- join_net((u_long) local);
+ join_net(&local->timer);
}
/*===========================================================================*/
@@ -1945,17 +1943,16 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id)
del_timer(&local->timer);
local->timer.expires = jiffies + HZ * 5;
- local->timer.data = (long)local;
if (status == CCS_START_NETWORK) {
dev_dbg(&link->dev,
"ray_cs interrupt network \"%s\" start failed\n",
memtmp);
- local->timer.function = start_net;
+ local->timer.function = (TIMER_FUNC_TYPE)start_net;
} else {
dev_dbg(&link->dev,
"ray_cs interrupt network \"%s\" join failed\n",
memtmp);
- local->timer.function = join_net;
+ local->timer.function = (TIMER_FUNC_TYPE)join_net;
}
add_timer(&local->timer);
}
@@ -1967,7 +1964,7 @@ static irqreturn_t ray_interrupt(int irq, void *dev_id)
} else {
dev_dbg(&link->dev, "ray_cs association failed,\n");
local->card_status = CARD_ASSOC_FAILED;
- join_net((u_long) local);
+ join_net(&local->timer);
}
break;
case CCS_TX_REQUEST:
@@ -2420,12 +2417,11 @@ static void authenticate(ray_dev_t *local)
del_timer(&local->timer);
if (build_auth_frame(local, local->bss_id, OPEN_AUTH_REQUEST)) {
- local->timer.function = join_net;
+ local->timer.function = (TIMER_FUNC_TYPE)join_net;
} else {
- local->timer.function = authenticate_timeout;
+ local->timer.function = (TIMER_FUNC_TYPE)authenticate_timeout;
}
local->timer.expires = jiffies + HZ * 2;
- local->timer.data = (long)local;
add_timer(&local->timer);
local->authentication_state = AWAITING_RESPONSE;
} /* end authenticate */
@@ -2468,7 +2464,7 @@ static void rx_authenticate(ray_dev_t *local, struct rcs __iomem *prcs,
} else {
pr_debug("Authentication refused\n");
local->card_status = CARD_AUTH_REFUSED;
- join_net((u_long) local);
+ join_net(&local->timer);
local->authentication_state =
UNAUTHENTICATED;
}
@@ -2506,8 +2502,7 @@ static void associate(ray_dev_t *local)
del_timer(&local->timer);
local->timer.expires = jiffies + HZ * 2;
- local->timer.data = (long)local;
- local->timer.function = join_net;
+ local->timer.function = (TIMER_FUNC_TYPE)join_net;
add_timer(&local->timer);
local->card_status = CARD_ASSOC_FAILED;
return;
diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
index 80fee699f58a..38b2ba1ac6f8 100644
--- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
+++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c
@@ -614,7 +614,10 @@ static int rtl8192eu_parse_efuse(struct rtl8xxxu_priv *priv)
dev_info(&priv->udev->dev, "Vendor: %.7s\n", efuse->vendor_name);
dev_info(&priv->udev->dev, "Product: %.11s\n", efuse->device_name);
- dev_info(&priv->udev->dev, "Serial: %.11s\n", efuse->serial);
+ if (memchr_inv(efuse->serial, 0xff, 11))
+ dev_info(&priv->udev->dev, "Serial: %.11s\n", efuse->serial);
+ else
+ dev_info(&priv->udev->dev, "Serial not available.\n");
if (rtl8xxxu_debug & RTL8XXXU_DEBUG_EFUSE) {
unsigned char *raw = priv->efuse_wifi.raw;
diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index ea18aa7afecb..0b34886321f1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -249,8 +249,6 @@ static void _rtl_init_hw_vht_capab(struct ieee80211_hw *hw,
vht_cap->vht_supported = true;
vht_cap->cap =
- IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 |
- IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 |
IEEE80211_VHT_CAP_SHORT_GI_80 |
IEEE80211_VHT_CAP_TXSTBC |
@@ -283,8 +281,6 @@ static void _rtl_init_hw_vht_capab(struct ieee80211_hw *hw,
vht_cap->vht_supported = true;
vht_cap->cap =
- IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 |
- IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 |
IEEE80211_VHT_CAP_SHORT_GI_80 |
IEEE80211_VHT_CAP_TXSTBC |
@@ -835,7 +831,7 @@ static u8 _rtl_get_vht_highest_n_rate(struct ieee80211_hw *hw,
else if ((tx_mcs_map & 0x000c) >> 2 ==
IEEE80211_VHT_MCS_SUPPORT_0_8)
hw_rate =
- rtlpriv->cfg->maps[RTL_RC_VHT_RATE_2SS_MCS9];
+ rtlpriv->cfg->maps[RTL_RC_VHT_RATE_2SS_MCS8];
else
hw_rate =
rtlpriv->cfg->maps[RTL_RC_VHT_RATE_2SS_MCS9];
@@ -847,7 +843,7 @@ static u8 _rtl_get_vht_highest_n_rate(struct ieee80211_hw *hw,
else if ((tx_mcs_map & 0x0003) ==
IEEE80211_VHT_MCS_SUPPORT_0_8)
hw_rate =
- rtlpriv->cfg->maps[RTL_RC_VHT_RATE_1SS_MCS9];
+ rtlpriv->cfg->maps[RTL_RC_VHT_RATE_1SS_MCS8];
else
hw_rate =
rtlpriv->cfg->maps[RTL_RC_VHT_RATE_1SS_MCS9];
@@ -1103,6 +1099,42 @@ int rtlwifi_rate_mapping(struct ieee80211_hw *hw, bool isht, bool isvht,
}
EXPORT_SYMBOL(rtlwifi_rate_mapping);
+static u8 _rtl_get_tx_hw_rate(struct ieee80211_hw *hw,
+ struct ieee80211_tx_info *info)
+{
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
+ struct ieee80211_tx_rate *r = &info->status.rates[0];
+ struct ieee80211_rate *txrate;
+ u8 hw_value = 0x0;
+
+ if (r->flags & IEEE80211_TX_RC_MCS) {
+ /* HT MCS0-15 */
+ hw_value = rtlpriv->cfg->maps[RTL_RC_HT_RATEMCS15] - 15 +
+ r->idx;
+ } else if (r->flags & IEEE80211_TX_RC_VHT_MCS) {
+ /* VHT MCS0-9, NSS */
+ if (ieee80211_rate_get_vht_nss(r) == 2)
+ hw_value = rtlpriv->cfg->maps[RTL_RC_VHT_RATE_2SS_MCS9];
+ else
+ hw_value = rtlpriv->cfg->maps[RTL_RC_VHT_RATE_1SS_MCS9];
+
+ hw_value = hw_value - 9 + ieee80211_rate_get_vht_mcs(r);
+ } else {
+ /* legacy */
+ txrate = ieee80211_get_tx_rate(hw, info);
+
+ if (txrate)
+ hw_value = txrate->hw_value;
+ }
+
+ /* check 5G band */
+ if (rtlpriv->rtlhal.current_bandtype == BAND_ON_5G &&
+ hw_value < rtlpriv->cfg->maps[RTL_RC_OFDM_RATE6M])
+ hw_value = rtlpriv->cfg->maps[RTL_RC_OFDM_RATE6M];
+
+ return hw_value;
+}
+
void rtl_get_tcb_desc(struct ieee80211_hw *hw,
struct ieee80211_tx_info *info,
struct ieee80211_sta *sta,
@@ -1111,12 +1143,10 @@ void rtl_get_tcb_desc(struct ieee80211_hw *hw,
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_mac *rtlmac = rtl_mac(rtl_priv(hw));
struct ieee80211_hdr *hdr = rtl_get_hdr(skb);
- struct ieee80211_rate *txrate;
+
__le16 fc = rtl_get_fc(skb);
- txrate = ieee80211_get_tx_rate(hw, info);
- if (txrate)
- tcb_desc->hw_rate = txrate->hw_value;
+ tcb_desc->hw_rate = _rtl_get_tx_hw_rate(hw, info);
if (rtl_is_tx_report_skb(hw, skb))
tcb_desc->use_spe_rpt = 1;
@@ -1527,6 +1557,42 @@ void rtl_wait_tx_report_acked(struct ieee80211_hw *hw, u32 wait_ms)
"Wait 1ms (%d/%d) to disable key.\n", i, wait_ms);
}
}
+
+u32 rtl_get_hal_edca_param(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ enum wireless_mode wirelessmode,
+ struct ieee80211_tx_queue_params *param)
+{
+ u32 reg = 0;
+ u8 sifstime = 10;
+ u8 slottime = 20;
+
+ /* AIFS = AIFSN * slot time + SIFS */
+ switch (wirelessmode) {
+ case WIRELESS_MODE_A:
+ case WIRELESS_MODE_N_24G:
+ case WIRELESS_MODE_N_5G:
+ case WIRELESS_MODE_AC_5G:
+ case WIRELESS_MODE_AC_24G:
+ sifstime = 16;
+ slottime = 9;
+ break;
+ case WIRELESS_MODE_G:
+ slottime = (vif->bss_conf.use_short_slot ? 9 : 20);
+ break;
+ default:
+ break;
+ }
+
+ reg |= (param->txop & 0x7FF) << 16;
+ reg |= (fls(param->cw_max) & 0xF) << 12;
+ reg |= (fls(param->cw_min) & 0xF) << 8;
+ reg |= (param->aifs & 0x0F) * slottime + sifstime;
+
+ return reg;
+}
+EXPORT_SYMBOL_GPL(rtl_get_hal_edca_param);
+
/*********************************************************
*
* functions called by core.c
@@ -1932,6 +1998,22 @@ label_lps_done:
rtlpriv->link_info.tx_busy_traffic = tx_busy_traffic;
rtlpriv->link_info.higher_busyrxtraffic = higher_busyrxtraffic;
+ rtlpriv->stats.txbytesunicast_inperiod =
+ rtlpriv->stats.txbytesunicast -
+ rtlpriv->stats.txbytesunicast_last;
+ rtlpriv->stats.rxbytesunicast_inperiod =
+ rtlpriv->stats.rxbytesunicast -
+ rtlpriv->stats.rxbytesunicast_last;
+ rtlpriv->stats.txbytesunicast_last = rtlpriv->stats.txbytesunicast;
+ rtlpriv->stats.rxbytesunicast_last = rtlpriv->stats.rxbytesunicast;
+
+ rtlpriv->stats.txbytesunicast_inperiod_tp =
+ (u32)(rtlpriv->stats.txbytesunicast_inperiod * 8 / 2 /
+ 1024 / 1024);
+ rtlpriv->stats.rxbytesunicast_inperiod_tp =
+ (u32)(rtlpriv->stats.rxbytesunicast_inperiod * 8 / 2 /
+ 1024 / 1024);
+
/* <3> DM */
if (!rtlpriv->cfg->mod_params->disable_watchdog)
rtlpriv->cfg->ops->dm_watchdog(hw);
@@ -2204,7 +2286,7 @@ int rtl_send_smps_action(struct ieee80211_hw *hw,
struct rtl_sta_info *sta_entry =
(struct rtl_sta_info *) sta->drv_priv;
sta_entry->mimo_ps = smps;
- /* rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0); */
+ /* rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0, true); */
info->control.rates[0].idx = 0;
info->band = hw->conf.chandef.chan->band;
diff --git a/drivers/net/wireless/realtek/rtlwifi/base.h b/drivers/net/wireless/realtek/rtlwifi/base.h
index b56d1b7f5567..cfea9fc39a9f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.h
+++ b/drivers/net/wireless/realtek/rtlwifi/base.h
@@ -137,6 +137,10 @@ void rtl_tx_report_handler(struct ieee80211_hw *hw, u8 *tmp_buf,
u8 c2h_cmd_len);
bool rtl_check_tx_report_acked(struct ieee80211_hw *hw);
void rtl_wait_tx_report_acked(struct ieee80211_hw *hw, u32 wait_ms);
+u32 rtl_get_hal_edca_param(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ enum wireless_mode wirelessmode,
+ struct ieee80211_tx_queue_params *param);
void rtl_beacon_statistic(struct ieee80211_hw *hw, struct sk_buff *skb);
void rtl_collect_scan_list(struct ieee80211_hw *hw, struct sk_buff *skb);
diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
index c04425236ce4..5f726f6d3567 100644
--- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
+++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtc8723b1ant.c
@@ -2260,14 +2260,11 @@ static void halbtc8723b1ant_run_coexist_mechanism(struct btc_coexist *btcoexist)
if (iot_peer != BTC_IOT_PEER_CISCO &&
iot_peer != BTC_IOT_PEER_BROADCOM) {
- if (bt_link_info->sco_exist)
- halbtc8723b1ant_limited_rx(btcoexist,
- NORMAL_EXEC, false,
- false, 0x5);
- else
- halbtc8723b1ant_limited_rx(btcoexist,
- NORMAL_EXEC, false,
- false, 0x5);
+ bool sco_exist = bt_link_info->sco_exist;
+
+ halbtc8723b1ant_limited_rx(btcoexist,
+ NORMAL_EXEC, sco_exist,
+ false, 0x5);
} else {
if (bt_link_info->sco_exist) {
halbtc8723b1ant_limited_rx(btcoexist,
diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index c53cbf3d52bd..015a8ec36703 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -453,7 +453,8 @@ static void _rtl_add_wowlan_patterns(struct ieee80211_hw *hw,
for (i = 0; i < wow->n_patterns; i++) {
memset(&rtl_pattern, 0, sizeof(struct rtl_wow_pattern));
memset(mask, 0, MAX_WOL_BIT_MASK_SIZE);
- if (patterns[i].pattern_len > MAX_WOL_PATTERN_SIZE) {
+ if (patterns[i].pattern_len < 0 ||
+ patterns[i].pattern_len > MAX_WOL_PATTERN_SIZE) {
RT_TRACE(rtlpriv, COMP_POWER, DBG_WARNING,
"Pattern[%d] is too long\n", i);
continue;
@@ -945,7 +946,7 @@ static int rtl_op_sta_add(struct ieee80211_hw *hw,
memcpy(sta_entry->mac_addr, sta->addr, ETH_ALEN);
RT_TRACE(rtlpriv, COMP_MAC80211, DBG_DMESG,
"Add sta addr is %pM\n", sta->addr);
- rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0);
+ rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0, true);
}
return 0;
@@ -1151,7 +1152,8 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw,
}
if (vif->type == NL80211_IFTYPE_STATION)
- rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0);
+ rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0,
+ true);
rcu_read_unlock();
/* to avoid AP Disassociation caused by inactivity */
diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c
index 08dc8919ef60..b9a6d23364be 100644
--- a/drivers/net/wireless/realtek/rtlwifi/pci.c
+++ b/drivers/net/wireless/realtek/rtlwifi/pci.c
@@ -586,7 +586,7 @@ static void _rtl_pci_tx_isr(struct ieee80211_hw *hw, int prio)
skb = __skb_dequeue(&ring->queue);
pci_unmap_single(rtlpci->pdev,
rtlpriv->cfg->ops->
- get_desc((u8 *)entry, true,
+ get_desc(hw, (u8 *)entry, true,
HW_DESC_TXBUFF_ADDR),
skb->len, PCI_DMA_TODEVICE);
@@ -691,9 +691,10 @@ remap:
return 0;
rtlpci->rx_ring[rxring_idx].rx_buf[desc_idx] = skb;
if (rtlpriv->use_new_trx_flow) {
+ /* skb->cb may be 64 bit address */
rtlpriv->cfg->ops->set_desc(hw, (u8 *)entry, false,
HW_DESC_RX_PREPARE,
- (u8 *)&bufferaddress);
+ (u8 *)(dma_addr_t *)skb->cb);
} else {
rtlpriv->cfg->ops->set_desc(hw, (u8 *)entry, false,
HW_DESC_RXBUFF_ADDR,
@@ -798,7 +799,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw)
pdesc = &rtlpci->rx_ring[rxring_idx].desc[
rtlpci->rx_ring[rxring_idx].idx];
- own = (u8)rtlpriv->cfg->ops->get_desc((u8 *)pdesc,
+ own = (u8)rtlpriv->cfg->ops->get_desc(hw, (u8 *)pdesc,
false,
HW_DESC_OWN);
if (own) /* wait data to be filled by hardware */
@@ -825,7 +826,7 @@ static void _rtl_pci_rx_interrupt(struct ieee80211_hw *hw)
(u8 *)buffer_desc,
hw_queue);
- len = rtlpriv->cfg->ops->get_desc((u8 *)pdesc, false,
+ len = rtlpriv->cfg->ops->get_desc(hw, (u8 *)pdesc, false,
HW_DESC_RXPKT_LEN);
if (skb->end - skb->tail > len) {
@@ -1122,7 +1123,7 @@ static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw)
if (pskb) {
pci_unmap_single(rtlpci->pdev,
rtlpriv->cfg->ops->get_desc(
- (u8 *)entry, true, HW_DESC_TXBUFF_ADDR),
+ hw, (u8 *)entry, true, HW_DESC_TXBUFF_ADDR),
pskb->len, PCI_DMA_TODEVICE);
kfree_skb(pskb);
}
@@ -1378,7 +1379,8 @@ static void _rtl_pci_free_tx_ring(struct ieee80211_hw *hw,
pci_unmap_single(rtlpci->pdev,
rtlpriv->cfg->
- ops->get_desc((u8 *)entry, true,
+ ops->get_desc(hw, (u8 *)entry,
+ true,
HW_DESC_TXBUFF_ADDR),
skb->len, PCI_DMA_TODEVICE);
kfree_skb(skb);
@@ -1507,7 +1509,7 @@ int rtl_pci_reset_trx_ring(struct ieee80211_hw *hw)
for (i = 0; i < rtlpci->rxringcount; i++) {
entry = &rtlpci->rx_ring[rxring_idx].desc[i];
bufferaddress =
- rtlpriv->cfg->ops->get_desc((u8 *)entry,
+ rtlpriv->cfg->ops->get_desc(hw, (u8 *)entry,
false , HW_DESC_RXBUFF_ADDR);
memset((u8 *)entry , 0 ,
sizeof(*rtlpci->rx_ring
@@ -1560,7 +1562,7 @@ int rtl_pci_reset_trx_ring(struct ieee80211_hw *hw)
pci_unmap_single(rtlpci->pdev,
rtlpriv->cfg->ops->
- get_desc((u8 *)
+ get_desc(hw, (u8 *)
entry,
true,
HW_DESC_TXBUFF_ADDR),
@@ -1673,7 +1675,7 @@ static int rtl_pci_tx(struct ieee80211_hw *hw,
if (rtlpriv->use_new_trx_flow) {
ptx_bd_desc = &ring->buffer_desc[idx];
} else {
- own = (u8) rtlpriv->cfg->ops->get_desc((u8 *)pdesc,
+ own = (u8)rtlpriv->cfg->ops->get_desc(hw, (u8 *)pdesc,
true, HW_DESC_OWN);
if ((own == 1) && (hw_queue != BEACON_QUEUE)) {
@@ -2163,6 +2165,21 @@ static int rtl_pci_intr_mode_decide(struct ieee80211_hw *hw)
return ret;
}
+static void platform_enable_dma64(struct pci_dev *pdev, bool dma64)
+{
+ u8 value;
+
+ pci_read_config_byte(pdev, 0x719, &value);
+
+ /* 0x719 Bit5 is DMA64 bit fetch. */
+ if (dma64)
+ value |= BIT(5);
+ else
+ value &= ~BIT(5);
+
+ pci_write_config_byte(pdev, 0x719, value);
+}
+
int rtl_pci_probe(struct pci_dev *pdev,
const struct pci_device_id *id)
{
@@ -2181,13 +2198,25 @@ int rtl_pci_probe(struct pci_dev *pdev,
return err;
}
- if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+ if (((struct rtl_hal_cfg *)id->driver_data)->mod_params->dma64 &&
+ !pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+ if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64))) {
+ WARN_ONCE(true,
+ "Unable to obtain 64bit DMA for consistent allocations\n");
+ err = -ENOMEM;
+ goto fail1;
+ }
+
+ platform_enable_dma64(pdev, true);
+ } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32))) {
WARN_ONCE(true,
"rtlwifi: Unable to obtain 32bit DMA for consistent allocations\n");
err = -ENOMEM;
goto fail1;
}
+
+ platform_enable_dma64(pdev, false);
}
pci_set_master(pdev);
diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.h b/drivers/net/wireless/realtek/rtlwifi/pci.h
index d9039ea10ba4..1af92b34979d 100644
--- a/drivers/net/wireless/realtek/rtlwifi/pci.h
+++ b/drivers/net/wireless/realtek/rtlwifi/pci.h
@@ -143,13 +143,7 @@ struct rtl_pci_capabilities_header {
* RX wifi info == RX descriptor in old flow
*/
struct rtl_tx_buffer_desc {
-#if (RTL8192EE_SEG_NUM == 2)
- u32 dword[2*(DMA_IS_64BIT + 1)*8]; /*seg = 8*/
-#elif (RTL8192EE_SEG_NUM == 1)
- u32 dword[2*(DMA_IS_64BIT + 1)*4]; /*seg = 4*/
-#elif (RTL8192EE_SEG_NUM == 0)
- u32 dword[2*(DMA_IS_64BIT + 1)*2]; /*seg = 2*/
-#endif
+ u32 dword[4 * (1 << (BUFDESC_SEG_NUM + 1))];
} __packed;
struct rtl_tx_desc {
@@ -157,7 +151,7 @@ struct rtl_tx_desc {
} __packed;
struct rtl_rx_buffer_desc { /*rx buffer desc*/
- u32 dword[2];
+ u32 dword[4];
} __packed;
struct rtl_rx_desc { /*old: rx desc new: rx wifi info*/
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c
index f936a491371b..207411d1b015 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/dm.c
@@ -1221,7 +1221,8 @@ static void rtl88e_dm_refresh_rate_adaptive_mask(struct ieee80211_hw *hw)
sta = rtl_find_sta(hw, mac->bssid);
if (sta)
rtlpriv->cfg->ops->update_rate_tbl(hw, sta,
- p_ra->ratr_state);
+ p_ra->ratr_state,
+ true);
rcu_read_unlock();
p_ra->pre_ratr_state = p_ra->ratr_state;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
index 0ba26d27d11c..d31117d52381 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c
@@ -99,6 +99,7 @@ static void _rtl88ee_return_beacon_queue_skb(struct ieee80211_hw *hw)
pci_unmap_single(rtlpci->pdev,
rtlpriv->cfg->ops->get_desc(
+ hw,
(u8 *)entry, true, HW_DESC_TXBUFF_ADDR),
skb->len, PCI_DMA_TODEVICE);
kfree_skb(skb);
@@ -2076,7 +2077,7 @@ static void rtl88ee_update_hal_rate_table(struct ieee80211_hw *hw,
}
static void rtl88ee_update_hal_rate_mask(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level)
+ struct ieee80211_sta *sta, u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_phy *rtlphy = &(rtlpriv->phy);
@@ -2207,12 +2208,12 @@ static void rtl88ee_update_hal_rate_mask(struct ieee80211_hw *hw,
}
void rtl88ee_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level)
+ struct ieee80211_sta *sta, u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
if (rtlpriv->dm.useramask)
- rtl88ee_update_hal_rate_mask(hw, sta, rssi_level);
+ rtl88ee_update_hal_rate_mask(hw, sta, rssi_level, update_bw);
else
rtl88ee_update_hal_rate_table(hw, sta);
}
@@ -2235,7 +2236,7 @@ bool rtl88ee_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
- enum rf_pwrstate e_rfpowerstate_toset, cur_rfstate;
+ enum rf_pwrstate e_rfpowerstate_toset;
u32 u4tmp;
bool b_actuallyset = false;
@@ -2254,8 +2255,6 @@ bool rtl88ee_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid)
spin_unlock(&rtlpriv->locks.rf_ps_lock);
}
- cur_rfstate = ppsc->rfpwr_state;
-
u4tmp = rtl_read_dword(rtlpriv, REG_GPIO_OUTPUT);
e_rfpowerstate_toset = (u4tmp & BIT(31)) ? ERFON : ERFOFF;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h
index d38dbca3c19e..719b78a3b7db 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.h
@@ -43,7 +43,8 @@ void rtl88ee_update_interrupt_mask(struct ieee80211_hw *hw,
u32 add_msr, u32 rm_msr);
void rtl88ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val);
void rtl88ee_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level);
+ struct ieee80211_sta *sta, u8 rssi_level,
+ bool update_bw);
void rtl88ee_update_channel_access_setting(struct ieee80211_hw *hw);
bool rtl88ee_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid);
void rtl88ee_enable_hw_security_config(struct ieee80211_hw *hw);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c
index 57e5d5c1d24b..35de3aeafcc9 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c
@@ -41,6 +41,7 @@
static void rtl88e_init_aspm_vars(struct ieee80211_hw *hw)
{
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
/*close ASPM for AMD defaultly */
@@ -77,7 +78,7 @@ static void rtl88e_init_aspm_vars(struct ieee80211_hw *hw)
* 1 - Support ASPM,
* 2 - According to chipset.
*/
- rtlpci->const_support_pciaspm = 1;
+ rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support;
}
int rtl88e_init_sw_vars(struct ieee80211_hw *hw)
@@ -276,6 +277,7 @@ static struct rtl_mod_params rtl88ee_mod_params = {
.swctrl_lps = false,
.fwctrl_lps = false,
.msi_support = true,
+ .aspm_support = 1,
.debug_level = 0,
.debug_mask = 0,
};
@@ -399,6 +401,7 @@ module_param_named(ips, rtl88ee_mod_params.inactiveps, bool, 0444);
module_param_named(swlps, rtl88ee_mod_params.swctrl_lps, bool, 0444);
module_param_named(fwlps, rtl88ee_mod_params.fwctrl_lps, bool, 0444);
module_param_named(msi, rtl88ee_mod_params.msi_support, bool, 0444);
+module_param_named(aspm, rtl88ee_mod_params.aspm_support, int, 0444);
module_param_named(disable_watchdog, rtl88ee_mod_params.disable_watchdog,
bool, 0444);
MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n");
@@ -406,6 +409,7 @@ MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n");
MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n");
MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n");
MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n");
+MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n");
MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)");
MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)");
MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n");
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c
index dd3e12b74447..9670732b2bc6 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.c
@@ -786,7 +786,8 @@ void rtl88ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc,
}
}
-u32 rtl88ee_get_desc(u8 *pdesc, bool istx, u8 desc_name)
+u64 rtl88ee_get_desc(struct ieee80211_hw *hw,
+ u8 *pdesc, bool istx, u8 desc_name)
{
u32 ret = 0;
@@ -828,7 +829,7 @@ bool rtl88ee_is_tx_desc_closed(struct ieee80211_hw *hw, u8 hw_queue, u16 index)
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue];
u8 *entry = (u8 *)(&ring->desc[ring->idx]);
- u8 own = (u8)rtl88ee_get_desc(entry, true, HW_DESC_OWN);
+ u8 own = (u8)rtl88ee_get_desc(hw, entry, true, HW_DESC_OWN);
/*beacon packet will only use the first
*descriptor defautly,and the own may not
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h
index 9a1c2087adee..f902d6769aa8 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/trx.h
@@ -782,7 +782,8 @@ bool rtl88ee_rx_query_desc(struct ieee80211_hw *hw,
u8 *pdesc, struct sk_buff *skb);
void rtl88ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc,
bool istx, u8 desc_name, u8 *val);
-u32 rtl88ee_get_desc(u8 *pdesc, bool istx, u8 desc_name);
+u64 rtl88ee_get_desc(struct ieee80211_hw *hw,
+ u8 *pdesc, bool istx, u8 desc_name);
bool rtl88ee_is_tx_desc_closed(struct ieee80211_hw *hw,
u8 hw_queue, u16 index);
void rtl88ee_tx_polling(struct ieee80211_hw *hw, u8 hw_queue);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c
index 9956026bae0a..9310fad69cd9 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c
@@ -1865,7 +1865,7 @@ static void rtl92ce_update_hal_rate_table(struct ieee80211_hw *hw,
}
static void rtl92ce_update_hal_rate_mask(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level)
+ struct ieee80211_sta *sta, u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_phy *rtlphy = &(rtlpriv->phy);
@@ -1995,12 +1995,12 @@ static void rtl92ce_update_hal_rate_mask(struct ieee80211_hw *hw,
}
void rtl92ce_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level)
+ struct ieee80211_sta *sta, u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
if (rtlpriv->dm.useramask)
- rtl92ce_update_hal_rate_mask(hw, sta, rssi_level);
+ rtl92ce_update_hal_rate_mask(hw, sta, rssi_level, update_bw);
else
rtl92ce_update_hal_rate_table(hw, sta);
}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h
index 877f138a0cb9..7683c5dfe851 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.h
@@ -56,9 +56,8 @@ void rtl92ce_update_interrupt_mask(struct ieee80211_hw *hw,
u32 add_msr, u32 rm_msr);
void rtl92ce_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val);
void rtl92ce_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level);
-void rtl92ce_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level);
+ struct ieee80211_sta *sta, u8 rssi_level,
+ bool update_bw);
void rtl92ce_update_channel_access_setting(struct ieee80211_hw *hw);
bool rtl92ce_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid);
void rtl92ce_enable_hw_security_config(struct ieee80211_hw *hw);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c
index 38f85bfdf0c7..71a6761d3648 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c
@@ -44,6 +44,7 @@
static void rtl92c_init_aspm_vars(struct ieee80211_hw *hw)
{
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
/*close ASPM for AMD defaultly */
@@ -83,7 +84,7 @@ static void rtl92c_init_aspm_vars(struct ieee80211_hw *hw)
* 1 - Support ASPM,
* 2 - According to chipset.
*/
- rtlpci->const_support_pciaspm = 1;
+ rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support;
}
int rtl92c_init_sw_vars(struct ieee80211_hw *hw)
@@ -252,6 +253,7 @@ static struct rtl_mod_params rtl92ce_mod_params = {
.inactiveps = true,
.swctrl_lps = false,
.fwctrl_lps = true,
+ .aspm_support = 1,
.debug_level = 0,
.debug_mask = 0,
};
@@ -375,10 +377,12 @@ module_param_named(debug_mask, rtl92ce_mod_params.debug_mask, ullong, 0644);
module_param_named(ips, rtl92ce_mod_params.inactiveps, bool, 0444);
module_param_named(swlps, rtl92ce_mod_params.swctrl_lps, bool, 0444);
module_param_named(fwlps, rtl92ce_mod_params.fwctrl_lps, bool, 0444);
+module_param_named(aspm, rtl92ce_mod_params.aspm_support, int, 0444);
MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n");
MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n");
MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n");
MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n");
+MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n");
MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)");
MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)");
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c
index 94a4b39437cd..d36e0060cc7a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.c
@@ -697,7 +697,8 @@ void rtl92ce_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
}
}
-u32 rtl92ce_get_desc(u8 *p_desc, bool istx, u8 desc_name)
+u64 rtl92ce_get_desc(struct ieee80211_hw *hw, u8 *p_desc,
+ bool istx, u8 desc_name)
{
u32 ret = 0;
@@ -740,7 +741,7 @@ bool rtl92ce_is_tx_desc_closed(struct ieee80211_hw *hw,
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue];
u8 *entry = (u8 *)(&ring->desc[ring->idx]);
- u8 own = (u8)rtl92ce_get_desc(entry, true, HW_DESC_OWN);
+ u8 own = (u8)rtl92ce_get_desc(hw, entry, true, HW_DESC_OWN);
/*beacon packet will only use the first
*descriptor defautly,and the own may not
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.h
index 66291fc341e7..91f0bd6b752f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/trx.h
@@ -718,7 +718,8 @@ bool rtl92ce_rx_query_desc(struct ieee80211_hw *hw,
u8 *pdesc, struct sk_buff *skb);
void rtl92ce_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
u8 desc_name, u8 *val);
-u32 rtl92ce_get_desc(u8 *pdesc, bool istx, u8 desc_name);
+u64 rtl92ce_get_desc(struct ieee80211_hw *hw, u8 *p_desc,
+ bool istx, u8 desc_name);
bool rtl92ce_is_tx_desc_closed(struct ieee80211_hw *hw,
u8 hw_queue, u16 index);
void rtl92ce_tx_polling(struct ieee80211_hw *hw, u8 hw_queue);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
index 530e80f0ef0b..1e60f70481f5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c
@@ -2006,7 +2006,7 @@ static void rtl92cu_update_hal_rate_table(struct ieee80211_hw *hw,
static void rtl92cu_update_hal_rate_mask(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
- u8 rssi_level)
+ u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_phy *rtlphy = &(rtlpriv->phy);
@@ -2153,12 +2153,12 @@ static void rtl92cu_update_hal_rate_mask(struct ieee80211_hw *hw,
void rtl92cu_update_hal_rate_tbl(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
- u8 rssi_level)
+ u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
if (rtlpriv->dm.useramask)
- rtl92cu_update_hal_rate_mask(hw, sta, rssi_level);
+ rtl92cu_update_hal_rate_mask(hw, sta, rssi_level, update_bw);
else
rtl92cu_update_hal_rate_table(hw, sta);
}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.h
index 932f056f7ef8..ebd168400d45 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.h
@@ -104,6 +104,6 @@ void rtl92c_fill_h2c_cmd(struct ieee80211_hw *hw,
bool rtl92cu_phy_mac_config(struct ieee80211_hw *hw);
void rtl92cu_update_hal_rate_tbl(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
- u8 rssi_level);
+ u8 rssi_level, bool update_bw);
#endif
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c
index f4129cf96e7c..85cedd083d2b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c
@@ -490,7 +490,7 @@ static bool _rtl92d_cmd_send_packet(struct ieee80211_hw *hw,
spin_lock_irqsave(&rtlpriv->locks.irq_th_lock, flags);
pdesc = &ring->desc[idx];
/* discard output from call below */
- rtlpriv->cfg->ops->get_desc((u8 *) pdesc, true, HW_DESC_OWN);
+ rtlpriv->cfg->ops->get_desc(hw, (u8 *)pdesc, true, HW_DESC_OWN);
rtlpriv->cfg->ops->fill_tx_cmddesc(hw, (u8 *) pdesc, 1, 1, skb);
__skb_queue_tail(&ring->queue, skb);
spin_unlock_irqrestore(&rtlpriv->locks.irq_th_lock, flags);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
index cf28d25c551f..5a67f85fa165 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c
@@ -1897,7 +1897,7 @@ static void rtl92de_update_hal_rate_table(struct ieee80211_hw *hw,
}
static void rtl92de_update_hal_rate_mask(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level)
+ struct ieee80211_sta *sta, u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_phy *rtlphy = &(rtlpriv->phy);
@@ -2033,12 +2033,12 @@ static void rtl92de_update_hal_rate_mask(struct ieee80211_hw *hw,
}
void rtl92de_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level)
+ struct ieee80211_sta *sta, u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
if (rtlpriv->dm.useramask)
- rtl92de_update_hal_rate_mask(hw, sta, rssi_level);
+ rtl92de_update_hal_rate_mask(hw, sta, rssi_level, update_bw);
else
rtl92de_update_hal_rate_table(hw, sta);
}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h
index 24b03b9999be..85c565b86ae3 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.h
@@ -43,7 +43,8 @@ void rtl92de_update_interrupt_mask(struct ieee80211_hw *hw,
u32 add_msr, u32 rm_msr);
void rtl92de_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val);
void rtl92de_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level);
+ struct ieee80211_sta *sta, u8 rssi_level,
+ bool update_bw);
void rtl92de_update_channel_access_setting(struct ieee80211_hw *hw);
bool rtl92de_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid);
void rtl92de_enable_hw_security_config(struct ieee80211_hw *hw);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c
index a6549f5f6c59..d5ba2bace79b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c
@@ -40,6 +40,7 @@
static void rtl92d_init_aspm_vars(struct ieee80211_hw *hw)
{
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
/*close ASPM for AMD defaultly */
@@ -79,7 +80,7 @@ static void rtl92d_init_aspm_vars(struct ieee80211_hw *hw)
* 1 - Support ASPM,
* 2 - According to chipset.
*/
- rtlpci->const_support_pciaspm = 1;
+ rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support;
}
static int rtl92d_init_sw_vars(struct ieee80211_hw *hw)
@@ -254,6 +255,7 @@ static struct rtl_mod_params rtl92de_mod_params = {
.inactiveps = true,
.swctrl_lps = true,
.fwctrl_lps = false,
+ .aspm_support = 1,
.debug_level = 0,
.debug_mask = 0,
};
@@ -369,11 +371,13 @@ module_param_named(debug_level, rtl92de_mod_params.debug_level, int, 0644);
module_param_named(ips, rtl92de_mod_params.inactiveps, bool, 0444);
module_param_named(swlps, rtl92de_mod_params.swctrl_lps, bool, 0444);
module_param_named(fwlps, rtl92de_mod_params.fwctrl_lps, bool, 0444);
+module_param_named(aspm, rtl92de_mod_params.aspm_support, int, 0444);
module_param_named(debug_mask, rtl92de_mod_params.debug_mask, ullong, 0644);
MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n");
MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n");
MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 1)\n");
MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 0)\n");
+MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n");
MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)");
MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)");
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c
index 86019f654428..d7b023cf7400 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.c
@@ -821,7 +821,8 @@ void rtl92de_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
}
}
-u32 rtl92de_get_desc(u8 *p_desc, bool istx, u8 desc_name)
+u64 rtl92de_get_desc(struct ieee80211_hw *hw,
+ u8 *p_desc, bool istx, u8 desc_name)
{
u32 ret = 0;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h
index 9bb6cc648590..f7f776539438 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/trx.h
@@ -735,7 +735,8 @@ bool rtl92de_rx_query_desc(struct ieee80211_hw *hw,
u8 *pdesc, struct sk_buff *skb);
void rtl92de_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
u8 desc_name, u8 *val);
-u32 rtl92de_get_desc(u8 *pdesc, bool istx, u8 desc_name);
+u64 rtl92de_get_desc(struct ieee80211_hw *hw,
+ u8 *p_desc, bool istx, u8 desc_name);
void rtl92de_tx_polling(struct ieee80211_hw *hw, u8 hw_queue);
void rtl92de_tx_fill_cmddesc(struct ieee80211_hw *hw, u8 *pdesc,
bool b_firstseg, bool b_lastseg,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c
index e6b5786c7d4a..faed6e2dedf6 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/dm.c
@@ -1039,7 +1039,8 @@ static void rtl92ee_dm_refresh_rate_adaptive_mask(struct ieee80211_hw *hw)
sta = rtl_find_sta(hw, mac->bssid);
if (sta)
rtlpriv->cfg->ops->update_rate_tbl(hw, sta,
- p_ra->ratr_state);
+ p_ra->ratr_state,
+ true);
rcu_read_unlock();
p_ra->pre_ratr_state = p_ra->ratr_state;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c
index 7eae27f8e173..f9563ae301ad 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c
@@ -682,7 +682,7 @@ void rtl92ee_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished)
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_mac *mac = rtl_mac(rtl_priv(hw));
struct sk_buff *skb = NULL;
-
+ bool rtstatus;
u32 totalpacketlen;
u8 u1rsvdpageloc[5] = { 0 };
bool b_dlok = false;
@@ -768,7 +768,9 @@ void rtl92ee_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished)
skb = dev_alloc_skb(totalpacketlen);
skb_put_data(skb, &reserved_page_packet, totalpacketlen);
- b_dlok = true;
+ rtstatus = rtl_cmd_send_packet(hw, skb);
+ if (rtstatus)
+ b_dlok = true;
if (b_dlok) {
RT_TRACE(rtlpriv, COMP_POWER, DBG_LOUD ,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c
index ef9394be7016..6fc3090c4b72 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c
@@ -840,6 +840,31 @@ static bool _rtl92ee_init_mac(struct ieee80211_hw *hw)
/* Set TCR register */
rtl_write_dword(rtlpriv, REG_TCR, rtlpci->transmit_config);
+ /* Set TX/RX descriptor physical address -- HI part */
+ if (!rtlpriv->cfg->mod_params->dma64)
+ goto dma64_end;
+
+ rtl_write_dword(rtlpriv, REG_BCNQ_DESA + 4,
+ ((u64)rtlpci->tx_ring[BEACON_QUEUE].buffer_desc_dma) >>
+ 32);
+ rtl_write_dword(rtlpriv, REG_MGQ_DESA + 4,
+ (u64)rtlpci->tx_ring[MGNT_QUEUE].buffer_desc_dma >> 32);
+ rtl_write_dword(rtlpriv, REG_VOQ_DESA + 4,
+ (u64)rtlpci->tx_ring[VO_QUEUE].buffer_desc_dma >> 32);
+ rtl_write_dword(rtlpriv, REG_VIQ_DESA + 4,
+ (u64)rtlpci->tx_ring[VI_QUEUE].buffer_desc_dma >> 32);
+ rtl_write_dword(rtlpriv, REG_BEQ_DESA + 4,
+ (u64)rtlpci->tx_ring[BE_QUEUE].buffer_desc_dma >> 32);
+ rtl_write_dword(rtlpriv, REG_BKQ_DESA + 4,
+ (u64)rtlpci->tx_ring[BK_QUEUE].buffer_desc_dma >> 32);
+ rtl_write_dword(rtlpriv, REG_HQ0_DESA + 4,
+ (u64)rtlpci->tx_ring[HIGH_QUEUE].buffer_desc_dma >> 32);
+
+ rtl_write_dword(rtlpriv, REG_RX_DESA + 4,
+ (u64)rtlpci->rx_ring[RX_MPDU_QUEUE].dma >> 32);
+
+dma64_end:
+
/* Set TX/RX descriptor physical address(from OS API). */
rtl_write_dword(rtlpriv, REG_BCNQ_DESA,
((u64)rtlpci->tx_ring[BEACON_QUEUE].buffer_desc_dma) &
@@ -913,15 +938,9 @@ static bool _rtl92ee_init_mac(struct ieee80211_hw *hw)
rtl_write_word(rtlpriv, REG_HI7Q_TXBD_NUM,
TX_DESC_NUM_92E | ((RTL8192EE_SEG_NUM << 12) & 0x3000));
/*Rx*/
-#if (DMA_IS_64BIT == 1)
rtl_write_word(rtlpriv, REG_RX_RXBD_NUM,
RX_DESC_NUM_92E |
((RTL8192EE_SEG_NUM << 13) & 0x6000) | 0x8000);
-#else
- rtl_write_word(rtlpriv, REG_RX_RXBD_NUM,
- RX_DESC_NUM_92E |
- ((RTL8192EE_SEG_NUM << 13) & 0x6000) | 0x0000);
-#endif
rtl_write_dword(rtlpriv, REG_TSFTIMER_HCI, 0XFFFFFFFF);
@@ -2251,7 +2270,7 @@ static u8 _rtl92ee_mrate_idx_to_arfr_id(struct ieee80211_hw *hw, u8 rate_index)
static void rtl92ee_update_hal_rate_mask(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
- u8 rssi_level)
+ u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_phy *rtlphy = &rtlpriv->phy;
@@ -2370,7 +2389,7 @@ static void rtl92ee_update_hal_rate_mask(struct ieee80211_hw *hw,
(ratr_index << 28);
rate_mask[0] = macid;
rate_mask[1] = ratr_index | (b_shortgi ? 0x80 : 0x00);
- rate_mask[2] = curtxbw_40mhz;
+ rate_mask[2] = curtxbw_40mhz | ((!update_bw) << 3);
rate_mask[3] = (u8)(ratr_bitmap & 0x000000ff);
rate_mask[4] = (u8)((ratr_bitmap & 0x0000ff00) >> 8);
rate_mask[5] = (u8)((ratr_bitmap & 0x00ff0000) >> 16);
@@ -2385,12 +2404,13 @@ static void rtl92ee_update_hal_rate_mask(struct ieee80211_hw *hw,
}
void rtl92ee_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level)
+ struct ieee80211_sta *sta, u8 rssi_level,
+ bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
if (rtlpriv->dm.useramask)
- rtl92ee_update_hal_rate_mask(hw, sta, rssi_level);
+ rtl92ee_update_hal_rate_mask(hw, sta, rssi_level, update_bw);
}
void rtl92ee_update_channel_access_setting(struct ieee80211_hw *hw)
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h
index 05413f189685..cd6aeb44b996 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.h
@@ -43,7 +43,8 @@ void rtl92ee_update_interrupt_mask(struct ieee80211_hw *hw,
u32 add_msr, u32 rm_msr);
void rtl92ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val);
void rtl92ee_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level);
+ struct ieee80211_sta *sta, u8 rssi_level,
+ bool update_bw);
void rtl92ee_update_channel_access_setting(struct ieee80211_hw *hw);
bool rtl92ee_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid);
void rtl92ee_enable_hw_security_config(struct ieee80211_hw *hw);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c
index a3490080d066..ef92a789871d 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c
@@ -44,6 +44,7 @@
static void rtl92ee_init_aspm_vars(struct ieee80211_hw *hw)
{
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
/*close ASPM for AMD defaultly */
@@ -83,7 +84,7 @@ static void rtl92ee_init_aspm_vars(struct ieee80211_hw *hw)
* 1 - Support ASPM,
* 2 - According to chipset.
*/
- rtlpci->const_support_pciaspm = 1;
+ rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support;
}
int rtl92ee_init_sw_vars(struct ieee80211_hw *hw)
@@ -259,6 +260,8 @@ static struct rtl_mod_params rtl92ee_mod_params = {
.swctrl_lps = false,
.fwctrl_lps = true,
.msi_support = true,
+ .dma64 = false,
+ .aspm_support = 1,
.debug_level = 0,
.debug_mask = 0,
};
@@ -376,6 +379,8 @@ module_param_named(ips, rtl92ee_mod_params.inactiveps, bool, 0444);
module_param_named(swlps, rtl92ee_mod_params.swctrl_lps, bool, 0444);
module_param_named(fwlps, rtl92ee_mod_params.fwctrl_lps, bool, 0444);
module_param_named(msi, rtl92ee_mod_params.msi_support, bool, 0444);
+module_param_named(dma64, rtl92ee_mod_params.dma64, bool, 0444);
+module_param_named(aspm, rtl92ee_mod_params.aspm_support, int, 0444);
module_param_named(disable_watchdog, rtl92ee_mod_params.disable_watchdog,
bool, 0444);
MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n");
@@ -383,6 +388,8 @@ MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n");
MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n");
MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n");
MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n");
+MODULE_PARM_DESC(dma64, "Set to 1 to use DMA 64 (default 0)\n");
+MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n");
MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)");
MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)");
MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n");
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c
index c58393eab6a1..12255682e890 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.c
@@ -581,13 +581,9 @@ void rtl92ee_pre_fill_tx_bd_desc(struct ieee80211_hw *hw,
u8 i = 0;
u16 real_desc_size = 0x28;
u16 append_early_mode_size = 0;
-#if (RTL8192EE_SEG_NUM == 0)
- u8 segmentnum = 2;
-#elif (RTL8192EE_SEG_NUM == 1)
- u8 segmentnum = 4;
-#elif (RTL8192EE_SEG_NUM == 2)
- u8 segmentnum = 8;
-#endif
+ u8 segmentnum = 1 << (RTL8192EE_SEG_NUM + 1);
+ dma_addr_t desc_dma_addr;
+ bool dma64 = rtlpriv->cfg->mod_params->dma64;
tx_page_size = 2;
current_bd_desc = rtlpci->tx_ring[queue_index].cur_tx_wp;
@@ -609,6 +605,10 @@ void rtl92ee_pre_fill_tx_bd_desc(struct ieee80211_hw *hw,
psblen += 1;
}
+ /* tx desc addr */
+ desc_dma_addr = rtlpci->tx_ring[queue_index].dma +
+ (current_bd_desc * TX_DESC_SIZE);
+
/* Reset */
SET_TX_BUFF_DESC_LEN_0(tx_bd_desc, 0);
SET_TX_BUFF_DESC_PSB(tx_bd_desc, 0);
@@ -618,17 +618,9 @@ void rtl92ee_pre_fill_tx_bd_desc(struct ieee80211_hw *hw,
SET_TXBUFFER_DESC_LEN_WITH_OFFSET(tx_bd_desc, i, 0);
SET_TXBUFFER_DESC_AMSDU_WITH_OFFSET(tx_bd_desc, i, 0);
SET_TXBUFFER_DESC_ADD_LOW_WITH_OFFSET(tx_bd_desc, i, 0);
-#if (DMA_IS_64BIT == 1)
- SET_TXBUFFER_DESC_ADD_HIGT_WITH_OFFSET(tx_bd_desc, i, 0);
-#endif
+ SET_TXBUFFER_DESC_ADD_HIGH_WITH_OFFSET(tx_bd_desc, i, 0, dma64);
}
- SET_TX_BUFF_DESC_LEN_1(tx_bd_desc, 0);
- SET_TX_BUFF_DESC_AMSDU_1(tx_bd_desc, 0);
- SET_TX_BUFF_DESC_LEN_2(tx_bd_desc, 0);
- SET_TX_BUFF_DESC_AMSDU_2(tx_bd_desc, 0);
- SET_TX_BUFF_DESC_LEN_3(tx_bd_desc, 0);
- SET_TX_BUFF_DESC_AMSDU_3(tx_bd_desc, 0);
/* Clear all status */
CLEAR_PCI_TX_DESC_CONTENT(desc, TX_DESC_SIZE);
@@ -643,14 +635,16 @@ void rtl92ee_pre_fill_tx_bd_desc(struct ieee80211_hw *hw,
SET_TX_BUFF_DESC_LEN_0(tx_bd_desc, desc_size);
}
SET_TX_BUFF_DESC_PSB(tx_bd_desc, psblen);
- SET_TX_BUFF_DESC_ADDR_LOW_0(tx_bd_desc,
- rtlpci->tx_ring[queue_index].dma +
- (current_bd_desc * TX_DESC_SIZE));
+ SET_TX_BUFF_DESC_ADDR_LOW_0(tx_bd_desc, desc_dma_addr);
+ SET_TX_BUFF_DESC_ADDR_HIGH_0(tx_bd_desc, ((u64)desc_dma_addr >> 32),
+ dma64);
SET_TXBUFFER_DESC_LEN_WITH_OFFSET(tx_bd_desc, 1, pkt_len);
/* don't using extendsion mode. */
SET_TXBUFFER_DESC_AMSDU_WITH_OFFSET(tx_bd_desc, 1, 0);
SET_TXBUFFER_DESC_ADD_LOW_WITH_OFFSET(tx_bd_desc, 1, addr);
+ SET_TXBUFFER_DESC_ADD_HIGH_WITH_OFFSET(tx_bd_desc, 1,
+ ((u64)addr >> 32), dma64);
SET_TX_DESC_PKT_SIZE(desc, (u16)(pkt_len));
SET_TX_DESC_TX_BUFFER_SIZE(desc, (u16)(pkt_len));
@@ -918,6 +912,7 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
static bool over_run;
u32 tmp = 0;
u8 q_idx = *val;
+ bool dma64 = rtlpriv->cfg->mod_params->dma64;
if (istx) {
switch (desc_name) {
@@ -982,7 +977,12 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
MAX_RECEIVE_BUFFER_SIZE +
RX_DESC_SIZE);
- SET_RX_BUFFER_PHYSICAL_LOW(pdesc, *(u32 *)val);
+ SET_RX_BUFFER_PHYSICAL_LOW(pdesc, (*(dma_addr_t *)val) &
+ DMA_BIT_MASK(32));
+ SET_RX_BUFFER_PHYSICAL_HIGH(pdesc,
+ ((u64)(*(dma_addr_t *)val)
+ >> 32),
+ dma64);
break;
case HW_DESC_RXERO:
SET_RX_DESC_EOR(pdesc, 1);
@@ -996,9 +996,12 @@ void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
}
}
-u32 rtl92ee_get_desc(u8 *pdesc, bool istx, u8 desc_name)
+u64 rtl92ee_get_desc(struct ieee80211_hw *hw,
+ u8 *pdesc, bool istx, u8 desc_name)
{
- u32 ret = 0;
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
+ u64 ret = 0;
+ bool dma64 = rtlpriv->cfg->mod_params->dma64;
if (istx) {
switch (desc_name) {
@@ -1007,6 +1010,8 @@ u32 rtl92ee_get_desc(u8 *pdesc, bool istx, u8 desc_name)
break;
case HW_DESC_TXBUFF_ADDR:
ret = GET_TXBUFFER_DESC_ADDR_LOW(pdesc, 1);
+ ret |= (u64)GET_TXBUFFER_DESC_ADDR_HIGH(pdesc, 1,
+ dma64) << 32;
break;
default:
WARN_ONCE(true,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.h
index b0105c529010..48c16fff20c6 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/trx.h
@@ -26,24 +26,6 @@
#ifndef __RTL92E_TRX_H__
#define __RTL92E_TRX_H__
-#if (DMA_IS_64BIT == 1)
-#if (RTL8192EE_SEG_NUM == 2)
-#define TX_BD_DESC_SIZE 128
-#elif (RTL8192EE_SEG_NUM == 1)
-#define TX_BD_DESC_SIZE 64
-#elif (RTL8192EE_SEG_NUM == 0)
-#define TX_BD_DESC_SIZE 32
-#endif
-#else
-#if (RTL8192EE_SEG_NUM == 2)
-#define TX_BD_DESC_SIZE 64
-#elif (RTL8192EE_SEG_NUM == 1)
-#define TX_BD_DESC_SIZE 32
-#elif (RTL8192EE_SEG_NUM == 0)
-#define TX_BD_DESC_SIZE 16
-#endif
-#endif
-
#define TX_DESC_SIZE 64
#define RX_DRV_INFO_SIZE_UNIT 8
@@ -331,111 +313,34 @@
SET_BITS_TO_LE_4BYTE(__pdesc+(__set*16)+8, 0, 32, __val)
/* for Txfilldescroptor92ee, fill the desc content. */
-#if (DMA_IS_64BIT == 1)
-#define SET_TXBUFFER_DESC_LEN_WITH_OFFSET(__pdesc, __offset, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*16), 0, 16, __val)
-#define SET_TXBUFFER_DESC_AMSDU_WITH_OFFSET(__pdesc, __offset, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*16), 31, 1, __val)
-#define SET_TXBUFFER_DESC_ADD_LOW_WITH_OFFSET(__pdesc, __offset, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*16)+4, 0, 32, __val)
-#define SET_TXBUFFER_DESC_ADD_HIGT_WITH_OFFSET(__pdesc, __offset, __val)\
- SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*16)+8, 0, 32, __val)
-#define GET_TXBUFFER_DESC_ADDR_LOW(__pdesc, __offset) \
- LE_BITS_TO_4BYTE(__pdesc+(__offset*16)+4, 0, 32)
-#else
-#define SET_TXBUFFER_DESC_LEN_WITH_OFFSET(__pdesc, __offset, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*8), 0, 16, __val)
-#define SET_TXBUFFER_DESC_AMSDU_WITH_OFFSET(__pdesc, __offset, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*8), 31, 1, __val)
-#define SET_TXBUFFER_DESC_ADD_LOW_WITH_OFFSET(__pdesc, __offset, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+(__offset*8)+4, 0, 32, __val)
-#define SET_TXBUFFER_DESC_ADD_HIGT_WITH_OFFSET(__pdesc, __offset, __val)
-#define GET_TXBUFFER_DESC_ADDR_LOW(__pdesc, __offset) \
- LE_BITS_TO_4BYTE(__pdesc+(__offset*8)+4, 0, 32)
-#endif
+#define SET_TXBUFFER_DESC_LEN_WITH_OFFSET(__pdesc, __offset, __val) \
+ SET_BITS_TO_LE_4BYTE((__pdesc) + ((__offset) * 16), 0, 16, __val)
+#define SET_TXBUFFER_DESC_AMSDU_WITH_OFFSET(__pdesc, __offset, __val) \
+ SET_BITS_TO_LE_4BYTE((__pdesc) + ((__offset) * 16), 31, 1, __val)
+#define SET_TXBUFFER_DESC_ADD_LOW_WITH_OFFSET(__pdesc, __offset, __val) \
+ SET_BITS_TO_LE_4BYTE((__pdesc) + ((__offset) * 16) + 4, 0, 32, __val)
+#define SET_TXBUFFER_DESC_ADD_HIGH_WITH_OFFSET(pbd, off, val, dma64) \
+ (dma64 ? SET_BITS_TO_LE_4BYTE((pbd) + ((off) * 16) + 8, 0, 32, val) : 0)
+#define GET_TXBUFFER_DESC_ADDR_LOW(__pdesc, __offset) \
+ LE_BITS_TO_4BYTE((__pdesc) + ((__offset) * 16) + 4, 0, 32)
+#define GET_TXBUFFER_DESC_ADDR_HIGH(pbd, off, dma64) \
+ (dma64 ? LE_BITS_TO_4BYTE((pbd) + ((off) * 16) + 8, 0, 32) : 0)
/* Dword 0 */
-#define SET_TX_BUFF_DESC_LEN_0(__pdesc, __val) \
+#define SET_TX_BUFF_DESC_LEN_0(__pdesc, __val) \
SET_BITS_TO_LE_4BYTE(__pdesc, 0, 14, __val)
-#define SET_TX_BUFF_DESC_PSB(__pdesc, __val) \
+#define SET_TX_BUFF_DESC_PSB(__pdesc, __val) \
SET_BITS_TO_LE_4BYTE(__pdesc, 16, 15, __val)
-#define SET_TX_BUFF_DESC_OWN(__pdesc, __val) \
+#define SET_TX_BUFF_DESC_OWN(__pdesc, __val) \
SET_BITS_TO_LE_4BYTE(__pdesc, 31, 1, __val)
/* Dword 1 */
-#define SET_TX_BUFF_DESC_ADDR_LOW_0(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+4, 0, 32, __val)
-#if (DMA_IS_64BIT == 1)
+#define SET_TX_BUFF_DESC_ADDR_LOW_0(__pdesc, __val) \
+ SET_BITS_TO_LE_4BYTE((__pdesc) + 4, 0, 32, __val)
/* Dword 2 */
-#define SET_TX_BUFF_DESC_ADDR_HIGH_0(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+8, 0, 32, __val)
+#define SET_TX_BUFF_DESC_ADDR_HIGH_0(bdesc, val, dma64) \
+ SET_TXBUFFER_DESC_ADD_HIGH_WITH_OFFSET(bdesc, 0, val, dma64)
/* Dword 3 / RESERVED 0 */
-/* Dword 4 */
-#define SET_TX_BUFF_DESC_LEN_1(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+16, 0, 16, __val)
-#define SET_TX_BUFF_DESC_AMSDU_1(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+16, 31, 1, __val)
-/* Dword 5 */
-#define SET_TX_BUFF_DESC_ADDR_LOW_1(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+20, 0, 32, __val)
-/* Dword 6 */
-#define SET_TX_BUFF_DESC_ADDR_HIGH_1(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+24, 0, 32, __val)
-/* Dword 7 / RESERVED 0 */
-/* Dword 8 */
-#define SET_TX_BUFF_DESC_LEN_2(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+32, 0, 16, __val)
-#define SET_TX_BUFF_DESC_AMSDU_2(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+32, 31, 1, __val)
-/* Dword 9 */
-#define SET_TX_BUFF_DESC_ADDR_LOW_2(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+36, 0, 32, __val)
-/* Dword 10 */
-#define SET_TX_BUFF_DESC_ADDR_HIGH_2(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+40, 0, 32, __val)
-/* Dword 11 / RESERVED 0 */
-/* Dword 12 */
-#define SET_TX_BUFF_DESC_LEN_3(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+48, 0, 16, __val)
-#define SET_TX_BUFF_DESC_AMSDU_3(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+48, 31, 1, __val)
-/* Dword 13 */
-#define SET_TX_BUFF_DESC_ADDR_LOW_3(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+52, 0, 32, __val)
-/* Dword 14 */
-#define SET_TX_BUFF_DESC_ADDR_HIGH_3(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+56, 0, 32, __val)
-/* Dword 15 / RESERVED 0 */
-#else
-#define SET_TX_BUFF_DESC_ADDR_HIGH_0(__pdesc, __val)
-/* Dword 2 */
-#define SET_TX_BUFF_DESC_LEN_1(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+8, 0, 16, __val)
-#define SET_TX_BUFF_DESC_AMSDU_1(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+8, 31, 1, __val)
-/* Dword 3 */
-#define SET_TX_BUFF_DESC_ADDR_LOW_1(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+12, 0, 32, __val)
-#define SET_TX_BUFF_DESC_ADDR_HIGH_1(__pdesc, __val)
-/* Dword 4 */
-#define SET_TX_BUFF_DESC_LEN_2(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+16, 0, 16, __val)
-#define SET_TX_BUFF_DESC_AMSDU_2(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+16, 31, 1, __val)
-/* Dword 5 */
-#define SET_TX_BUFF_DESC_ADDR_LOW_2(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+20, 0, 32, __val)
-#define SET_TX_BUFF_DESC_ADDR_HIGH_2(__pdesc, __val)
-/* Dword 6 */
-#define SET_TX_BUFF_DESC_LEN_3(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+24, 0, 16, __val)
-#define SET_TX_BUFF_DESC_AMSDU_3(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+24, 31, 1, __val)
-/* Dword 7 */
-#define SET_TX_BUFF_DESC_ADDR_LOW_3(__pdesc, __val) \
- SET_BITS_TO_LE_4BYTE(__pdesc+28, 0, 32, __val)
-#define SET_TX_BUFF_DESC_ADDR_HIGH_3(__pdesc, __val)
-#endif
/* RX buffer */
@@ -463,8 +368,8 @@
SET_BITS_TO_LE_4BYTE(__status+4, 0, 32, __val)
/* DWORD 2 */
-#define SET_RX_BUFFER_PHYSICAL_HIGH(__status, __val) \
- SET_BITS_TO_LE_4BYTE(__status+8, 0, 32, __val)
+#define SET_RX_BUFFER_PHYSICAL_HIGH(__rx_status_desc, __val, dma64) \
+ (dma64 ? SET_BITS_TO_LE_4BYTE((__rx_status_desc) + 8, 0, 32, __val) : 0)
#define GET_RX_DESC_PKT_LEN(__pdesc) \
LE_BITS_TO_4BYTE(__pdesc, 0, 14)
@@ -850,7 +755,8 @@ bool rtl92ee_rx_query_desc(struct ieee80211_hw *hw,
void rtl92ee_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
u8 desc_name, u8 *val);
-u32 rtl92ee_get_desc(u8 *pdesc, bool istx, u8 desc_name);
+u64 rtl92ee_get_desc(struct ieee80211_hw *hw,
+ u8 *pdesc, bool istx, u8 desc_name);
bool rtl92ee_is_tx_desc_closed(struct ieee80211_hw *hw, u8 hw_queue, u16 index);
void rtl92ee_tx_polling(struct ieee80211_hw *hw, u8 hw_queue);
void rtl92ee_tx_fill_cmddesc(struct ieee80211_hw *hw, u8 *pdesc,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/dm.c
index 2c073a77b194..44f510a94b09 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/dm.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/dm.c
@@ -295,7 +295,8 @@ static void _rtl92s_dm_refresh_rateadaptive_mask(struct ieee80211_hw *hw)
sta = rtl_find_sta(hw, mac->bssid);
if (sta)
rtlpriv->cfg->ops->update_rate_tbl(hw, sta,
- ra->ratr_state);
+ ra->ratr_state,
+ true);
rcu_read_unlock();
ra->pre_ratr_state = ra->ratr_state;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c
index ba1bd782238b..66be79ca4247 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c
@@ -2129,7 +2129,7 @@ static void rtl92se_update_hal_rate_table(struct ieee80211_hw *hw,
static void rtl92se_update_hal_rate_mask(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
- u8 rssi_level)
+ u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_phy *rtlphy = &(rtlpriv->phy);
@@ -2288,12 +2288,12 @@ static void rtl92se_update_hal_rate_mask(struct ieee80211_hw *hw,
}
void rtl92se_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level)
+ struct ieee80211_sta *sta, u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
if (rtlpriv->dm.useramask)
- rtl92se_update_hal_rate_mask(hw, sta, rssi_level);
+ rtl92se_update_hal_rate_mask(hw, sta, rssi_level, update_bw);
else
rtl92se_update_hal_rate_table(hw, sta);
}
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h
index 86bce1be83ce..3c93d30fcae7 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.h
@@ -59,7 +59,7 @@ void rtl92se_update_interrupt_mask(struct ieee80211_hw *hw,
void rtl92se_set_hw_reg(struct ieee80211_hw *hw, u8 variable,
u8 *val);
void rtl92se_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level);
+ struct ieee80211_sta *sta, u8 rssi_level, bool update_bw);
void rtl92se_update_channel_access_setting(struct ieee80211_hw *hw);
bool rtl92se_gpio_radio_on_off_checking(struct ieee80211_hw *hw,
u8 *valid);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c
index d7945b9db493..d55554b7fa9a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c
@@ -41,6 +41,7 @@
static void rtl92s_init_aspm_vars(struct ieee80211_hw *hw)
{
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
/*close ASPM for AMD defaultly */
@@ -77,7 +78,7 @@ static void rtl92s_init_aspm_vars(struct ieee80211_hw *hw)
* 1 - Support ASPM,
* 2 - According to chipset.
*/
- rtlpci->const_support_pciaspm = 2;
+ rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support;
}
static void rtl92se_fw_cb(const struct firmware *firmware, void *context)
@@ -240,7 +241,7 @@ static bool rtl92se_is_tx_desc_closed(struct ieee80211_hw *hw, u8 hw_queue,
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue];
u8 *entry = (u8 *)(&ring->desc[ring->idx]);
- u8 own = (u8)rtl92se_get_desc(entry, true, HW_DESC_OWN);
+ u8 own = (u8)rtl92se_get_desc(hw, entry, true, HW_DESC_OWN);
if (own)
return false;
@@ -297,6 +298,7 @@ static struct rtl_mod_params rtl92se_mod_params = {
.inactiveps = true,
.swctrl_lps = true,
.fwctrl_lps = false,
+ .aspm_support = 2,
.debug_level = 0,
.debug_mask = 0,
};
@@ -422,10 +424,12 @@ module_param_named(debug_mask, rtl92se_mod_params.debug_mask, ullong, 0644);
module_param_named(ips, rtl92se_mod_params.inactiveps, bool, 0444);
module_param_named(swlps, rtl92se_mod_params.swctrl_lps, bool, 0444);
module_param_named(fwlps, rtl92se_mod_params.fwctrl_lps, bool, 0444);
+module_param_named(aspm, rtl92se_mod_params.aspm_support, int, 0444);
MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n");
MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n");
MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 1)\n");
MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 0)\n");
+MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n");
MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)");
MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)");
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c
index a01dbd31d1b4..e1904c39f147 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.c
@@ -610,7 +610,8 @@ void rtl92se_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
}
}
-u32 rtl92se_get_desc(u8 *desc, bool istx, u8 desc_name)
+u64 rtl92se_get_desc(struct ieee80211_hw *hw,
+ u8 *desc, bool istx, u8 desc_name)
{
u32 ret = 0;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.h
index 728589138072..81a5445c04a3 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/trx.h
@@ -38,7 +38,8 @@ bool rtl92se_rx_query_desc(struct ieee80211_hw *hw, struct rtl_stats *stats,
struct sk_buff *skb);
void rtl92se_set_desc(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
u8 desc_name, u8 *val);
-u32 rtl92se_get_desc(u8 *pdesc, bool istx, u8 desc_name);
+u64 rtl92se_get_desc(struct ieee80211_hw *hw,
+ u8 *desc, bool istx, u8 desc_name);
void rtl92se_tx_polling(struct ieee80211_hw *hw, u8 hw_queue);
#endif
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
index 5ac7b815648a..8cfd4993c90a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
@@ -1943,7 +1943,7 @@ static void rtl8723e_update_hal_rate_table(struct ieee80211_hw *hw,
static void rtl8723e_update_hal_rate_mask(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
- u8 rssi_level)
+ u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_phy *rtlphy = &(rtlpriv->phy);
@@ -2074,12 +2074,13 @@ static void rtl8723e_update_hal_rate_mask(struct ieee80211_hw *hw,
}
void rtl8723e_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level)
+ struct ieee80211_sta *sta, u8 rssi_level,
+ bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
if (rtlpriv->dm.useramask)
- rtl8723e_update_hal_rate_mask(hw, sta, rssi_level);
+ rtl8723e_update_hal_rate_mask(hw, sta, rssi_level, update_bw);
else
rtl8723e_update_hal_rate_table(hw, sta);
}
@@ -2103,7 +2104,7 @@ bool rtl8723e_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid)
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
struct rtl_phy *rtlphy = &(rtlpriv->phy);
- enum rf_pwrstate e_rfpowerstate_toset, cur_rfstate;
+ enum rf_pwrstate e_rfpowerstate_toset;
u8 u1tmp;
bool b_actuallyset = false;
@@ -2122,8 +2123,6 @@ bool rtl8723e_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid)
spin_unlock(&rtlpriv->locks.rf_ps_lock);
}
- cur_rfstate = ppsc->rfpwr_state;
-
rtl_write_byte(rtlpriv, REG_GPIO_IO_SEL_2,
rtl_read_byte(rtlpriv, REG_GPIO_IO_SEL_2)&~(BIT(1)));
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h
index 32c1ace97c3f..1e7063105c96 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.h
@@ -49,7 +49,8 @@ void rtl8723e_update_interrupt_mask(struct ieee80211_hw *hw,
u32 add_msr, u32 rm_msr);
void rtl8723e_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val);
void rtl8723e_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level);
+ struct ieee80211_sta *sta, u8 rssi_level,
+ bool update_bw);
void rtl8723e_update_channel_access_setting(struct ieee80211_hw *hw);
bool rtl8723e_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid);
void rtl8723e_enable_hw_security_config(struct ieee80211_hw *hw);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c
index 97b8bd294aa8..a545ea317323 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c
@@ -46,6 +46,7 @@
static void rtl8723e_init_aspm_vars(struct ieee80211_hw *hw)
{
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
/*close ASPM for AMD defaultly */
@@ -85,7 +86,7 @@ static void rtl8723e_init_aspm_vars(struct ieee80211_hw *hw)
* 1 - Support ASPM,
* 2 - According to chipset.
*/
- rtlpci->const_support_pciaspm = 1;
+ rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support;
}
int rtl8723e_init_sw_vars(struct ieee80211_hw *hw)
@@ -268,6 +269,7 @@ static struct rtl_mod_params rtl8723e_mod_params = {
.inactiveps = true,
.swctrl_lps = false,
.fwctrl_lps = true,
+ .aspm_support = 1,
.debug_level = 0,
.debug_mask = 0,
.msi_support = false,
@@ -389,6 +391,7 @@ module_param_named(ips, rtl8723e_mod_params.inactiveps, bool, 0444);
module_param_named(swlps, rtl8723e_mod_params.swctrl_lps, bool, 0444);
module_param_named(fwlps, rtl8723e_mod_params.fwctrl_lps, bool, 0444);
module_param_named(msi, rtl8723e_mod_params.msi_support, bool, 0444);
+module_param_named(aspm, rtl8723e_mod_params.aspm_support, int, 0444);
module_param_named(disable_watchdog, rtl8723e_mod_params.disable_watchdog,
bool, 0444);
MODULE_PARM_DESC(swenc, "Set to 1 for software crypto (default 0)\n");
@@ -396,6 +399,7 @@ MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n");
MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n");
MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n");
MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 0)\n");
+MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n");
MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)");
MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)");
MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n");
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c
index f713c7249fed..23485602a9a1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.c
@@ -643,7 +643,8 @@ void rtl8723e_set_desc(struct ieee80211_hw *hw, u8 *pdesc,
}
}
-u32 rtl8723e_get_desc(u8 *pdesc, bool istx, u8 desc_name)
+u64 rtl8723e_get_desc(struct ieee80211_hw *hw,
+ u8 *pdesc, bool istx, u8 desc_name)
{
u32 ret = 0;
@@ -686,7 +687,7 @@ bool rtl8723e_is_tx_desc_closed(struct ieee80211_hw *hw,
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue];
u8 *entry = (u8 *)(&ring->desc[ring->idx]);
- u8 own = (u8)rtl8723e_get_desc(entry, true, HW_DESC_OWN);
+ u8 own = (u8)rtl8723e_get_desc(hw, entry, true, HW_DESC_OWN);
/**
*beacon packet will only use the first
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.h
index 43d4c791d563..985ce0b77ea5 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/trx.h
@@ -708,7 +708,8 @@ bool rtl8723e_rx_query_desc(struct ieee80211_hw *hw,
u8 *pdesc, struct sk_buff *skb);
void rtl8723e_set_desc(struct ieee80211_hw *hw,
u8 *pdesc, bool istx, u8 desc_name, u8 *val);
-u32 rtl8723e_get_desc(u8 *pdesc, bool istx, u8 desc_name);
+u64 rtl8723e_get_desc(struct ieee80211_hw *hw,
+ u8 *pdesc, bool istx, u8 desc_name);
bool rtl8723e_is_tx_desc_closed(struct ieee80211_hw *hw,
u8 hw_queue, u16 index);
void rtl8723e_tx_polling(struct ieee80211_hw *hw, u8 hw_queue);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c
index 15c117e95a99..47e87a21ae27 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/dm.c
@@ -984,7 +984,8 @@ static void rtl8723be_dm_refresh_rate_adaptive_mask(struct ieee80211_hw *hw)
sta = rtl_find_sta(hw, mac->bssid);
if (sta)
rtlpriv->cfg->ops->update_rate_tbl(hw, sta,
- p_ra->ratr_state);
+ p_ra->ratr_state,
+ true);
rcu_read_unlock();
p_ra->pre_ratr_state = p_ra->ratr_state;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
index 4d47b97adfed..239518bd31f1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c
@@ -60,6 +60,7 @@ static void _rtl8723be_return_beacon_queue_skb(struct ieee80211_hw *hw)
pci_unmap_single(rtlpci->pdev,
rtlpriv->cfg->ops->get_desc(
+ hw,
(u8 *)entry, true, HW_DESC_TXBUFF_ADDR),
skb->len, PCI_DMA_TODEVICE);
kfree_skb(skb);
@@ -2324,7 +2325,7 @@ static u8 _rtl8723be_mrate_idx_to_arfr_id(struct ieee80211_hw *hw,
static void rtl8723be_update_hal_rate_mask(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
- u8 rssi_level)
+ u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_phy *rtlphy = &(rtlpriv->phy);
@@ -2440,7 +2441,7 @@ static void rtl8723be_update_hal_rate_mask(struct ieee80211_hw *hw,
rate_mask[0] = macid;
rate_mask[1] = _rtl8723be_mrate_idx_to_arfr_id(hw, ratr_index) |
(shortgi ? 0x80 : 0x00);
- rate_mask[2] = curtxbw_40mhz;
+ rate_mask[2] = curtxbw_40mhz | ((!update_bw) << 3);
rate_mask[3] = (u8)(ratr_bitmap & 0x000000ff);
rate_mask[4] = (u8)((ratr_bitmap & 0x0000ff00) >> 8);
@@ -2460,11 +2461,11 @@ static void rtl8723be_update_hal_rate_mask(struct ieee80211_hw *hw,
void rtl8723be_update_hal_rate_tbl(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
- u8 rssi_level)
+ u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
if (rtlpriv->dm.useramask)
- rtl8723be_update_hal_rate_mask(hw, sta, rssi_level);
+ rtl8723be_update_hal_rate_mask(hw, sta, rssi_level, update_bw);
}
void rtl8723be_update_channel_access_setting(struct ieee80211_hw *hw)
@@ -2486,7 +2487,7 @@ bool rtl8723be_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid)
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
struct rtl_phy *rtlphy = &(rtlpriv->phy);
- enum rf_pwrstate e_rfpowerstate_toset, cur_rfstate;
+ enum rf_pwrstate e_rfpowerstate_toset;
u8 u1tmp;
bool b_actuallyset = false;
@@ -2505,8 +2506,6 @@ bool rtl8723be_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid)
spin_unlock(&rtlpriv->locks.rf_ps_lock);
}
- cur_rfstate = ppsc->rfpwr_state;
-
rtl_write_byte(rtlpriv, REG_GPIO_IO_SEL_2,
rtl_read_byte(rtlpriv, REG_GPIO_IO_SEL_2) & ~(BIT(1)));
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h
index eae863d08de8..54d7afa7297e 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.h
@@ -46,7 +46,7 @@ void rtl8723be_update_interrupt_mask(struct ieee80211_hw *hw,
void rtl8723be_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val);
void rtl8723be_update_hal_rate_tbl(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
- u8 rssi_level);
+ u8 rssi_level, bool update_bw);
void rtl8723be_update_channel_access_setting(struct ieee80211_hw *hw);
bool rtl8723be_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid);
void rtl8723be_enable_hw_security_config(struct ieee80211_hw *hw);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
index 2b16a1467e78..6a42988aad65 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
@@ -46,6 +46,7 @@
static void rtl8723be_init_aspm_vars(struct ieee80211_hw *hw)
{
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
/*close ASPM for AMD defaultly */
@@ -82,7 +83,7 @@ static void rtl8723be_init_aspm_vars(struct ieee80211_hw *hw)
* 1 - Support ASPM,
* 2 - According to chipset.
*/
- rtlpci->const_support_pciaspm = 1;
+ rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support;
}
int rtl8723be_init_sw_vars(struct ieee80211_hw *hw)
@@ -271,6 +272,7 @@ static struct rtl_mod_params rtl8723be_mod_params = {
.swctrl_lps = false,
.fwctrl_lps = true,
.msi_support = false,
+ .aspm_support = 1,
.disable_watchdog = false,
.debug_level = 0,
.debug_mask = 0,
@@ -396,6 +398,7 @@ module_param_named(ips, rtl8723be_mod_params.inactiveps, bool, 0444);
module_param_named(swlps, rtl8723be_mod_params.swctrl_lps, bool, 0444);
module_param_named(fwlps, rtl8723be_mod_params.fwctrl_lps, bool, 0444);
module_param_named(msi, rtl8723be_mod_params.msi_support, bool, 0444);
+module_param_named(aspm, rtl8723be_mod_params.aspm_support, int, 0444);
module_param_named(disable_watchdog, rtl8723be_mod_params.disable_watchdog,
bool, 0444);
module_param_named(ant_sel, rtl8723be_mod_params.ant_sel, int, 0444);
@@ -404,6 +407,7 @@ MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n");
MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n");
MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n");
MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 0)\n");
+MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n");
MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)");
MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)");
MODULE_PARM_DESC(disable_watchdog,
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.c
index 0e8944119652..fd9b38aa08a1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.c
@@ -695,7 +695,8 @@ void rtl8723be_set_desc(struct ieee80211_hw *hw, u8 *pdesc,
}
}
-u32 rtl8723be_get_desc(u8 *pdesc, bool istx, u8 desc_name)
+u64 rtl8723be_get_desc(struct ieee80211_hw *hw,
+ u8 *pdesc, bool istx, u8 desc_name)
{
u32 ret = 0;
@@ -738,7 +739,7 @@ bool rtl8723be_is_tx_desc_closed(struct ieee80211_hw *hw,
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue];
u8 *entry = (u8 *)(&ring->desc[ring->idx]);
- u8 own = (u8)rtl8723be_get_desc(entry, true, HW_DESC_OWN);
+ u8 own = (u8)rtl8723be_get_desc(hw, entry, true, HW_DESC_OWN);
/*beacon packet will only use the first
*descriptor defautly,and the own may not
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.h
index 0274659f48ed..988bf0586674 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/trx.h
@@ -624,7 +624,8 @@ bool rtl8723be_rx_query_desc(struct ieee80211_hw *hw,
u8 *pdesc, struct sk_buff *skb);
void rtl8723be_set_desc(struct ieee80211_hw *hw, u8 *pdesc,
bool istx, u8 desc_name, u8 *val);
-u32 rtl8723be_get_desc(u8 *pdesc, bool istx, u8 desc_name);
+u64 rtl8723be_get_desc(struct ieee80211_hw *hw,
+ u8 *pdesc, bool istx, u8 desc_name);
bool rtl8723be_is_tx_desc_closed(struct ieee80211_hw *hw,
u8 hw_queue, u16 index);
void rtl8723be_tx_polling(struct ieee80211_hw *hw, u8 hw_queue);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723com/fw_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723com/fw_common.c
index ac573d69f6d6..efa7e1262461 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723com/fw_common.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723com/fw_common.c
@@ -253,7 +253,8 @@ bool rtl8723_cmd_send_packet(struct ieee80211_hw *hw,
spin_lock_irqsave(&rtlpriv->locks.irq_th_lock, flags);
pdesc = &ring->desc[0];
- own = (u8) rtlpriv->cfg->ops->get_desc((u8 *)pdesc, true, HW_DESC_OWN);
+ own = (u8)rtlpriv->cfg->ops->get_desc(hw, (u8 *)pdesc, true,
+ HW_DESC_OWN);
rtlpriv->cfg->ops->fill_tx_cmddesc(hw, (u8 *)pdesc, 1, 1, skb);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c
index 32900c51f024..b11365a5ee1f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/dm.c
@@ -2592,7 +2592,7 @@ static void rtl8821ae_dm_refresh_rate_adaptive_mask(struct ieee80211_hw *hw)
sta = rtl_find_sta(hw, mac->bssid);
if (sta)
rtlpriv->cfg->ops->update_rate_tbl(hw,
- sta, p_ra->ratr_state);
+ sta, p_ra->ratr_state, true);
rcu_read_unlock();
p_ra->pre_ratr_state = p_ra->ratr_state;
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
index 1d431d4bf6d2..6408bc8456f3 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c
@@ -57,6 +57,7 @@ static void _rtl8821ae_return_beacon_queue_skb(struct ieee80211_hw *hw)
pci_unmap_single(rtlpci->pdev,
rtlpriv->cfg->ops->get_desc(
+ hw,
(u8 *)entry, true, HW_DESC_TXBUFF_ADDR),
skb->len, PCI_DMA_TODEVICE);
kfree_skb(skb);
@@ -3598,7 +3599,7 @@ static bool _rtl8821ae_get_ra_shortgi(struct ieee80211_hw *hw, struct ieee80211_
}
static void rtl8821ae_update_hal_rate_mask(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level)
+ struct ieee80211_sta *sta, u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_phy *rtlphy = &rtlpriv->phy;
@@ -3777,7 +3778,7 @@ static void rtl8821ae_update_hal_rate_mask(struct ieee80211_hw *hw,
rate_mask[0] = macid;
rate_mask[1] = ratr_index | (b_shortgi ? 0x80 : 0x00);
- rate_mask[2] = rtlphy->current_chan_bw
+ rate_mask[2] = rtlphy->current_chan_bw | ((!update_bw) << 3)
| _rtl8821ae_get_vht_eni(wirelessmode, ratr_bitmap)
| _rtl8821ae_get_ra_ldpc(hw, macid, sta_entry, wirelessmode);
@@ -3798,11 +3799,11 @@ static void rtl8821ae_update_hal_rate_mask(struct ieee80211_hw *hw,
}
void rtl8821ae_update_hal_rate_tbl(struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level)
+ struct ieee80211_sta *sta, u8 rssi_level, bool update_bw)
{
struct rtl_priv *rtlpriv = rtl_priv(hw);
if (rtlpriv->dm.useramask)
- rtl8821ae_update_hal_rate_mask(hw, sta, rssi_level);
+ rtl8821ae_update_hal_rate_mask(hw, sta, rssi_level, update_bw);
else
/*RT_TRACE(rtlpriv, COMP_RATR,DBG_LOUD,
"rtl8821ae_update_hal_rate_tbl() Error! 8821ae FW RA Only\n");*/
@@ -3845,7 +3846,7 @@ bool rtl8821ae_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid)
struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
struct rtl_phy *rtlphy = &rtlpriv->phy;
- enum rf_pwrstate e_rfpowerstate_toset, cur_rfstate;
+ enum rf_pwrstate e_rfpowerstate_toset;
u8 u1tmp = 0;
bool b_actuallyset = false;
@@ -3864,8 +3865,6 @@ bool rtl8821ae_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid)
spin_unlock(&rtlpriv->locks.rf_ps_lock);
}
- cur_rfstate = ppsc->rfpwr_state;
-
rtl_write_byte(rtlpriv, REG_GPIO_IO_SEL_2,
rtl_read_byte(rtlpriv,
REG_GPIO_IO_SEL_2) & ~(BIT(1)));
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h
index a3553e3abaa1..50fa9c718189 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.h
@@ -46,7 +46,7 @@ void rtl8821ae_update_interrupt_mask(struct ieee80211_hw *hw,
void rtl8821ae_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val);
void rtl8821ae_update_hal_rate_tbl(struct ieee80211_hw *hw,
struct ieee80211_sta *sta,
- u8 rssi_level);
+ u8 rssi_level, bool update_bw);
void rtl8821ae_update_channel_access_setting(struct ieee80211_hw *hw);
bool rtl8821ae_gpio_radio_on_off_checking(struct ieee80211_hw *hw, u8 *valid);
void rtl8821ae_enable_hw_security_config(struct ieee80211_hw *hw);
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
index 0894ef48ab87..ab5d462b1a3a 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
@@ -43,6 +43,7 @@
static void rtl8821ae_init_aspm_vars(struct ieee80211_hw *hw)
{
+ struct rtl_priv *rtlpriv = rtl_priv(hw);
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
/*close ASPM for AMD defaultly */
@@ -82,7 +83,7 @@ static void rtl8821ae_init_aspm_vars(struct ieee80211_hw *hw)
* 1 - Support ASPM,
* 2 - According to chipset.
*/
- rtlpci->const_support_pciaspm = 1;
+ rtlpci->const_support_pciaspm = rtlpriv->cfg->mod_params->aspm_support;
}
/*InitializeVariables8812E*/
@@ -313,6 +314,7 @@ static struct rtl_mod_params rtl8821ae_mod_params = {
.swctrl_lps = false,
.fwctrl_lps = true,
.msi_support = true,
+ .aspm_support = 1,
.int_clear = true,
.debug_level = 0,
.debug_mask = 0,
@@ -444,6 +446,7 @@ module_param_named(ips, rtl8821ae_mod_params.inactiveps, bool, 0444);
module_param_named(swlps, rtl8821ae_mod_params.swctrl_lps, bool, 0444);
module_param_named(fwlps, rtl8821ae_mod_params.fwctrl_lps, bool, 0444);
module_param_named(msi, rtl8821ae_mod_params.msi_support, bool, 0444);
+module_param_named(aspm, rtl8821ae_mod_params.aspm_support, int, 0444);
module_param_named(disable_watchdog, rtl8821ae_mod_params.disable_watchdog,
bool, 0444);
module_param_named(int_clear, rtl8821ae_mod_params.int_clear, bool, 0444);
@@ -452,6 +455,7 @@ MODULE_PARM_DESC(ips, "Set to 0 to not use link power save (default 1)\n");
MODULE_PARM_DESC(swlps, "Set to 1 to use SW control power save (default 0)\n");
MODULE_PARM_DESC(fwlps, "Set to 1 to use FW control power save (default 1)\n");
MODULE_PARM_DESC(msi, "Set to 1 to use MSI interrupts mode (default 1)\n");
+MODULE_PARM_DESC(aspm, "Set to 1 to enable ASPM (default 1)\n");
MODULE_PARM_DESC(debug_level, "Set debug level (0-5) (default 0)");
MODULE_PARM_DESC(debug_mask, "Set debug mask (default 0)");
MODULE_PARM_DESC(disable_watchdog, "Set to 1 to disable the watchdog (default 0)\n");
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.c
index 749818929e8f..1e1bacf562f3 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.c
@@ -935,7 +935,8 @@ void rtl8821ae_set_desc(struct ieee80211_hw *hw, u8 *pdesc,
}
}
-u32 rtl8821ae_get_desc(u8 *pdesc, bool istx, u8 desc_name)
+u64 rtl8821ae_get_desc(struct ieee80211_hw *hw,
+ u8 *pdesc, bool istx, u8 desc_name)
{
u32 ret = 0;
@@ -980,7 +981,7 @@ bool rtl8821ae_is_tx_desc_closed(struct ieee80211_hw *hw,
struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
struct rtl8192_tx_ring *ring = &rtlpci->tx_ring[hw_queue];
u8 *entry = (u8 *)(&ring->desc[ring->idx]);
- u8 own = (u8)rtl8821ae_get_desc(entry, true, HW_DESC_OWN);
+ u8 own = (u8)rtl8821ae_get_desc(hw, entry, true, HW_DESC_OWN);
/**
*beacon packet will only use the first
diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h
index 9843a616dcec..221dd2b29d3b 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/trx.h
@@ -620,7 +620,8 @@ bool rtl8821ae_rx_query_desc(struct ieee80211_hw *hw,
u8 *pdesc, struct sk_buff *skb);
void rtl8821ae_set_desc(struct ieee80211_hw *hw, u8 *pdesc,
bool istx, u8 desc_name, u8 *val);
-u32 rtl8821ae_get_desc(u8 *pdesc, bool istx, u8 desc_name);
+u64 rtl8821ae_get_desc(struct ieee80211_hw *hw,
+ u8 *pdesc, bool istx, u8 desc_name);
bool rtl8821ae_is_tx_desc_closed(struct ieee80211_hw *hw,
u8 hw_queue, u16 index);
void rtl8821ae_tx_polling(struct ieee80211_hw *hw, u8 hw_queue);
diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h
index 1ab1024330fb..7dfc73b554f1 100644
--- a/drivers/net/wireless/realtek/rtlwifi/wifi.h
+++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h
@@ -169,16 +169,14 @@ enum rtl8192c_h2c_cmd {
#define MAX_BASE_NUM_IN_PHY_REG_PG_24G 6
#define MAX_BASE_NUM_IN_PHY_REG_PG_5G 5
-#define RTL8192EE_SEG_NUM 1 /* 0:2 seg, 1: 4 seg, 2: 8 seg */
+#define BUFDESC_SEG_NUM 1 /* 0:2 seg, 1: 4 seg, 2: 8 seg */
#define DEL_SW_IDX_SZ 30
-#define BAND_NUM 3
/* For now, it's just for 8192ee
* but not OK yet, keep it 0
*/
-#define DMA_IS_64BIT 0
-#define RTL8192EE_SEG_NUM 1 /* 0:2 seg, 1: 4 seg, 2: 8 seg */
+#define RTL8192EE_SEG_NUM BUFDESC_SEG_NUM
enum rf_tx_num {
RF_1TX = 0,
@@ -561,6 +559,11 @@ enum rf_type {
RF_1T2R = 1,
RF_2T2R = 2,
RF_2T2R_GREEN = 3,
+ RF_2T3R = 4,
+ RF_2T4R = 5,
+ RF_3T3R = 6,
+ RF_3T4R = 7,
+ RF_4T4R = 8,
};
enum ht_channel_width {
@@ -1014,10 +1017,17 @@ struct init_gain {
};
struct wireless_stats {
- unsigned long txbytesunicast;
- unsigned long txbytesmulticast;
- unsigned long txbytesbroadcast;
- unsigned long rxbytesunicast;
+ u64 txbytesunicast;
+ u64 txbytesmulticast;
+ u64 txbytesbroadcast;
+ u64 rxbytesunicast;
+
+ u64 txbytesunicast_inperiod;
+ u64 rxbytesunicast_inperiod;
+ u32 txbytesunicast_inperiod_tp;
+ u32 rxbytesunicast_inperiod_tp;
+ u64 txbytesunicast_last;
+ u64 rxbytesunicast_last;
long rx_snr_db[4];
/*Correct smoothed ss in Dbm, only used
@@ -1323,6 +1333,7 @@ struct rtl_sta_info {
struct rtl_tid_data tids[MAX_TID_COUNT];
/* just used for ap adhoc or mesh*/
struct rssi_sta rssi_stat;
+ u8 rssi_level;
u16 wireless_mode;
u8 ratr_index;
u8 mimo_ps;
@@ -1736,21 +1747,6 @@ struct rtl_dm {
s8 swing_diff_2g;
s8 swing_diff_5g;
- u8 delta_swing_table_idx_24gccka_p[DEL_SW_IDX_SZ];
- u8 delta_swing_table_idx_24gccka_n[DEL_SW_IDX_SZ];
- u8 delta_swing_table_idx_24gcckb_p[DEL_SW_IDX_SZ];
- u8 delta_swing_table_idx_24gcckb_n[DEL_SW_IDX_SZ];
- u8 delta_swing_table_idx_24ga_p[DEL_SW_IDX_SZ];
- u8 delta_swing_table_idx_24ga_n[DEL_SW_IDX_SZ];
- u8 delta_swing_table_idx_24gb_p[DEL_SW_IDX_SZ];
- u8 delta_swing_table_idx_24gb_n[DEL_SW_IDX_SZ];
- u8 delta_swing_table_idx_5ga_p[BAND_NUM][DEL_SW_IDX_SZ];
- u8 delta_swing_table_idx_5ga_n[BAND_NUM][DEL_SW_IDX_SZ];
- u8 delta_swing_table_idx_5gb_p[BAND_NUM][DEL_SW_IDX_SZ];
- u8 delta_swing_table_idx_5gb_n[BAND_NUM][DEL_SW_IDX_SZ];
- u8 delta_swing_table_idx_24ga_p_8188e[DEL_SW_IDX_SZ];
- u8 delta_swing_table_idx_24ga_n_8188e[DEL_SW_IDX_SZ];
-
/* DMSP */
bool supp_phymode_switch;
@@ -2127,7 +2123,8 @@ struct rtl_hal_ops {
void (*get_hw_reg) (struct ieee80211_hw *hw, u8 variable, u8 *val);
void (*set_hw_reg) (struct ieee80211_hw *hw, u8 variable, u8 *val);
void (*update_rate_tbl) (struct ieee80211_hw *hw,
- struct ieee80211_sta *sta, u8 rssi_level);
+ struct ieee80211_sta *sta, u8 rssi_leve,
+ bool update_bw);
void (*pre_fill_tx_bd_desc)(struct ieee80211_hw *hw, u8 *tx_bd_desc,
u8 *desc, u8 queue_index,
struct sk_buff *skb, dma_addr_t addr);
@@ -2162,7 +2159,8 @@ struct rtl_hal_ops {
enum led_ctl_mode ledaction);
void (*set_desc)(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
u8 desc_name, u8 *val);
- u32 (*get_desc) (u8 *pdesc, bool istx, u8 desc_name);
+ u64 (*get_desc)(struct ieee80211_hw *hw, u8 *pdesc, bool istx,
+ u8 desc_name);
bool (*is_tx_desc_closed) (struct ieee80211_hw *hw,
u8 hw_queue, u16 index);
void (*tx_polling) (struct ieee80211_hw *hw, u8 hw_queue);
@@ -2261,6 +2259,12 @@ struct rtl_mod_params {
*/
bool msi_support;
+ /* default: 0 = dma 32 */
+ bool dma64;
+
+ /* default: 1 = enable aspm */
+ int aspm_support;
+
/* default 0: 1 means disable */
bool disable_watchdog;
@@ -2857,19 +2861,19 @@ value to host byte ordering.*/
cpu_to_le32( \
LE_BITS_CLEARED_TO_4BYTE(__pstart, __bitoffset, __bitlen) | \
((((u32)__val) & BIT_LEN_MASK_32(__bitlen)) << (__bitoffset)) \
- );
+ )
#define SET_BITS_TO_LE_2BYTE(__pstart, __bitoffset, __bitlen, __val) \
*((__le16 *)(__pstart)) = \
cpu_to_le16( \
LE_BITS_CLEARED_TO_2BYTE(__pstart, __bitoffset, __bitlen) | \
((((u16)__val) & BIT_LEN_MASK_16(__bitlen)) << (__bitoffset)) \
- );
+ )
#define SET_BITS_TO_LE_1BYTE(__pstart, __bitoffset, __bitlen, __val) \
*((u8 *)(__pstart)) = EF1BYTE \
( \
LE_BITS_CLEARED_TO_1BYTE(__pstart, __bitoffset, __bitlen) | \
((((u8)__val) & BIT_LEN_MASK_8(__bitlen)) << (__bitoffset)) \
- );
+ )
#define N_BYTE_ALIGMENT(__value, __aligment) ((__aligment == 1) ? \
(__value) : (((__value + __aligment - 1) / __aligment) * __aligment))
diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c
index 2b0516d2f63d..bc18a191aef9 100644
--- a/drivers/net/wireless/rsi/rsi_91x_core.c
+++ b/drivers/net/wireless/rsi/rsi_91x_core.c
@@ -95,6 +95,8 @@ static u32 rsi_get_num_pkts_dequeue(struct rsi_common *common, u8 q_num)
s16 txop = common->tx_qinfo[q_num].txop * 32;
__le16 r_txop;
struct ieee80211_rate rate;
+ struct ieee80211_hdr *wh;
+ struct ieee80211_vif *vif;
rate.bitrate = RSI_RATE_MCS0 * 5 * 10; /* Convert to Kbps */
if (q_num == VI_Q)
@@ -106,8 +108,10 @@ static u32 rsi_get_num_pkts_dequeue(struct rsi_common *common, u8 q_num)
return 0;
do {
+ wh = (struct ieee80211_hdr *)skb->data;
+ vif = rsi_get_vif(adapter, wh->addr2);
r_txop = ieee80211_generic_frame_duration(adapter->hw,
- adapter->vifs[0],
+ vif,
common->band,
skb->len, &rate);
txop -= le16_to_cpu(r_txop);
@@ -334,6 +338,21 @@ struct rsi_sta *rsi_find_sta(struct rsi_common *common, u8 *mac_addr)
return NULL;
}
+struct ieee80211_vif *rsi_get_vif(struct rsi_hw *adapter, u8 *mac)
+{
+ struct ieee80211_vif *vif;
+ int i;
+
+ for (i = 0; i < RSI_MAX_VIFS; i++) {
+ vif = adapter->vifs[i];
+ if (!vif)
+ continue;
+ if (!memcmp(vif->addr, mac, ETH_ALEN))
+ return vif;
+ }
+ return NULL;
+}
+
/**
* rsi_core_xmit() - This function transmits the packets received from mac80211
* @common: Pointer to the driver private structure.
@@ -346,8 +365,8 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb)
struct rsi_hw *adapter = common->priv;
struct ieee80211_tx_info *info;
struct skb_info *tx_params;
- struct ieee80211_hdr *wh;
- struct ieee80211_vif *vif = adapter->vifs[0];
+ struct ieee80211_hdr *wh = NULL;
+ struct ieee80211_vif *vif;
u8 q_num, tid = 0;
struct rsi_sta *rsta = NULL;
@@ -366,6 +385,11 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb)
wh = (struct ieee80211_hdr *)&skb->data[0];
tx_params->sta_id = 0;
+ vif = rsi_get_vif(adapter, wh->addr2);
+ if (!vif)
+ goto xmit_fail;
+ tx_params->vif = vif;
+ tx_params->vap_id = ((struct vif_priv *)vif->drv_priv)->vap_id;
if ((ieee80211_is_mgmt(wh->frame_control)) ||
(ieee80211_is_ctl(wh->frame_control)) ||
(ieee80211_is_qos_nullfunc(wh->frame_control))) {
@@ -383,7 +407,8 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb)
q_num = skb->priority;
tx_params->tid = tid;
- if ((vif->type == NL80211_IFTYPE_AP) &&
+ if (((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO)) &&
(!is_broadcast_ether_addr(wh->addr1)) &&
(!is_multicast_ether_addr(wh->addr1))) {
rsta = rsi_find_sta(common, wh->addr1);
diff --git a/drivers/net/wireless/rsi/rsi_91x_debugfs.c b/drivers/net/wireless/rsi/rsi_91x_debugfs.c
index e98eb55c26cc..8c6ca8e689e4 100644
--- a/drivers/net/wireless/rsi/rsi_91x_debugfs.c
+++ b/drivers/net/wireless/rsi/rsi_91x_debugfs.c
@@ -83,19 +83,12 @@ static int rsi_version_read(struct seq_file *seq, void *data)
{
struct rsi_common *common = seq->private;
- common->driver_ver.major = 0;
- common->driver_ver.minor = 1;
- common->driver_ver.release_num = 0;
- common->driver_ver.patch_num = 0;
- seq_printf(seq, "Driver : %x.%d.%d.%d\nLMAC : %d.%d.%d.%d\n",
- common->driver_ver.major,
- common->driver_ver.minor,
- common->driver_ver.release_num,
- common->driver_ver.patch_num,
- common->fw_ver.major,
- common->fw_ver.minor,
- common->fw_ver.release_num,
- common->fw_ver.patch_num);
+ seq_printf(seq, "LMAC : %d.%d.%d.%d\n",
+ common->lmac_ver.major,
+ common->lmac_ver.minor,
+ common->lmac_ver.release_num,
+ common->lmac_ver.patch_num);
+
return 0;
}
diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c
index 070dfd68bb83..689527d7007a 100644
--- a/drivers/net/wireless/rsi/rsi_91x_hal.c
+++ b/drivers/net/wireless/rsi/rsi_91x_hal.c
@@ -42,7 +42,7 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb)
struct ieee80211_hdr *wh = NULL;
struct ieee80211_tx_info *info;
struct ieee80211_conf *conf = &adapter->hw->conf;
- struct ieee80211_vif *vif = adapter->vifs[0];
+ struct ieee80211_vif *vif;
struct rsi_mgmt_desc *mgmt_desc;
struct skb_info *tx_params;
struct ieee80211_bss_conf *bss = NULL;
@@ -57,6 +57,7 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb)
info = IEEE80211_SKB_CB(skb);
tx_params = (struct skb_info *)info->driver_data;
+ vif = tx_params->vif;
/* Update header size */
header_size = FRAME_DESC_SZ + sizeof(struct xtended_desc);
@@ -78,7 +79,7 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb)
tx_params->internal_hdr_size = header_size;
memset(&skb->data[0], 0, header_size);
- bss = &info->control.vif->bss_conf;
+ bss = &vif->bss_conf;
wh = (struct ieee80211_hdr *)&skb->data[header_size];
mgmt_desc = (struct rsi_mgmt_desc *)skb->data;
@@ -95,10 +96,10 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb)
mgmt_desc->seq_ctrl =
cpu_to_le16(IEEE80211_SEQ_TO_SN(le16_to_cpu(wh->seq_ctrl)));
- if (common->band == NL80211_BAND_2GHZ)
- mgmt_desc->rate_info = RSI_RATE_1;
+ if ((common->band == NL80211_BAND_2GHZ) && !common->p2p_enabled)
+ mgmt_desc->rate_info = cpu_to_le16(RSI_RATE_1);
else
- mgmt_desc->rate_info = RSI_RATE_6;
+ mgmt_desc->rate_info = cpu_to_le16(RSI_RATE_6);
if (conf_is_ht40(conf))
mgmt_desc->bbp_info = cpu_to_le16(FULL40M_ENABLE);
@@ -121,7 +122,8 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb)
xtend_desc->retry_cnt = PROBE_RESP_RETRY_CNT;
}
- if ((vif->type == NL80211_IFTYPE_AP) &&
+ if (((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO)) &&
(ieee80211_is_action(wh->frame_control))) {
struct rsi_sta *rsta = rsi_find_sta(common, wh->addr1);
@@ -130,6 +132,10 @@ static int rsi_prepare_mgmt_desc(struct rsi_common *common, struct sk_buff *skb)
else
return -EINVAL;
}
+ mgmt_desc->rate_info |=
+ cpu_to_le16((tx_params->vap_id << RSI_DESC_VAP_ID_OFST) &
+ RSI_DESC_VAP_ID_MASK);
+
return 0;
}
@@ -151,7 +157,8 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb)
u16 seq_num;
info = IEEE80211_SKB_CB(skb);
- bss = &info->control.vif->bss_conf;
+ vif = info->control.vif;
+ bss = &vif->bss_conf;
tx_params = (struct skb_info *)info->driver_data;
header_size = FRAME_DESC_SZ + sizeof(struct xtended_desc);
@@ -175,7 +182,6 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb)
xtend_desc = (struct xtended_desc *)&skb->data[FRAME_DESC_SZ];
wh = (struct ieee80211_hdr *)&skb->data[header_size];
seq_num = IEEE80211_SEQ_TO_SN(le16_to_cpu(wh->seq_ctrl));
- vif = adapter->vifs[0];
data_desc->xtend_desc_size = header_size - FRAME_DESC_SZ;
@@ -184,7 +190,8 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb)
data_desc->mac_flags |= cpu_to_le16(RSI_QOS_ENABLE);
}
- if ((vif->type == NL80211_IFTYPE_STATION) &&
+ if (((vif->type == NL80211_IFTYPE_STATION) ||
+ (vif->type == NL80211_IFTYPE_P2P_CLIENT)) &&
(adapter->ps_state == PS_ENABLED))
wh->frame_control |= cpu_to_le16(RSI_SET_PS_ENABLE);
@@ -240,17 +247,23 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb)
data_desc->frame_info |= cpu_to_le16(RSI_BROADCAST_PKT);
data_desc->sta_id = vap_id;
- if (vif->type == NL80211_IFTYPE_AP) {
+ if ((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO)) {
if (common->band == NL80211_BAND_5GHZ)
data_desc->rate_info = cpu_to_le16(RSI_RATE_6);
else
data_desc->rate_info = cpu_to_le16(RSI_RATE_1);
}
}
- if ((vif->type == NL80211_IFTYPE_AP) &&
+ if (((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO)) &&
(ieee80211_has_moredata(wh->frame_control)))
data_desc->frame_info |= cpu_to_le16(MORE_DATA_PRESENT);
+ data_desc->rate_info |=
+ cpu_to_le16((tx_params->vap_id << RSI_DESC_VAP_ID_OFST) &
+ RSI_DESC_VAP_ID_MASK);
+
return 0;
}
@@ -258,7 +271,7 @@ static int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb)
int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb)
{
struct rsi_hw *adapter = common->priv;
- struct ieee80211_vif *vif = adapter->vifs[0];
+ struct ieee80211_vif *vif;
struct ieee80211_tx_info *info;
struct ieee80211_bss_conf *bss;
int status = -EINVAL;
@@ -271,9 +284,12 @@ int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb)
info = IEEE80211_SKB_CB(skb);
if (!info->control.vif)
goto err;
- bss = &info->control.vif->bss_conf;
+ vif = info->control.vif;
+ bss = &vif->bss_conf;
- if ((vif->type == NL80211_IFTYPE_STATION) && (!bss->assoc))
+ if (((vif->type == NL80211_IFTYPE_STATION) ||
+ (vif->type == NL80211_IFTYPE_P2P_CLIENT)) &&
+ (!bss->assoc))
goto err;
status = rsi_prepare_data_desc(common, skb);
@@ -306,21 +322,11 @@ int rsi_send_mgmt_pkt(struct rsi_common *common,
struct ieee80211_tx_info *info;
struct skb_info *tx_params;
int status = -E2BIG;
- u8 extnd_size;
info = IEEE80211_SKB_CB(skb);
tx_params = (struct skb_info *)info->driver_data;
- extnd_size = ((uintptr_t)skb->data & 0x3);
if (tx_params->flags & INTERNAL_MGMT_PKT) {
- skb->data[1] |= BIT(7); /* Immediate Wakeup bit*/
- if ((extnd_size) > skb_headroom(skb)) {
- rsi_dbg(ERR_ZONE, "%s: Unable to send pkt\n", __func__);
- dev_kfree_skb(skb);
- return -ENOSPC;
- }
- skb_push(skb, extnd_size);
- skb->data[extnd_size + 4] = extnd_size;
status = adapter->host_intf_ops->write_pkt(common->priv,
(u8 *)skb->data,
skb->len);
@@ -352,12 +358,23 @@ int rsi_prepare_beacon(struct rsi_common *common, struct sk_buff *skb)
struct rsi_data_desc *bcn_frm;
struct ieee80211_hw *hw = common->priv->hw;
struct ieee80211_conf *conf = &hw->conf;
+ struct ieee80211_vif *vif;
struct sk_buff *mac_bcn;
- u8 vap_id = 0;
- u16 tim_offset;
-
+ u8 vap_id = 0, i;
+ u16 tim_offset = 0;
+
+ for (i = 0; i < RSI_MAX_VIFS; i++) {
+ vif = adapter->vifs[i];
+ if (!vif)
+ continue;
+ if ((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO))
+ break;
+ }
+ if (!vif)
+ return -EINVAL;
mac_bcn = ieee80211_beacon_get_tim(adapter->hw,
- adapter->vifs[adapter->sc_nvifs - 1],
+ vif,
&tim_offset, NULL);
if (!mac_bcn) {
rsi_dbg(ERR_ZONE, "Failed to get beacon from mac80211\n");
@@ -411,9 +428,8 @@ static void bl_cmd_timeout(unsigned long priv)
static int bl_start_cmd_timer(struct rsi_hw *adapter, u32 timeout)
{
- init_timer(&adapter->bl_cmd_timer);
- adapter->bl_cmd_timer.data = (unsigned long)adapter;
- adapter->bl_cmd_timer.function = (void *)&bl_cmd_timeout;
+ setup_timer(&adapter->bl_cmd_timer, (void *)&bl_cmd_timeout,
+ (unsigned long)adapter);
adapter->bl_cmd_timer.expires = (msecs_to_jiffies(timeout) + jiffies);
adapter->blcmd_timer_expired = false;
@@ -752,6 +768,7 @@ static int auto_fw_upgrade(struct rsi_hw *adapter, u8 *flash_content,
static int rsi_load_firmware(struct rsi_hw *adapter)
{
+ struct rsi_common *common = adapter->priv;
struct rsi_host_intf_ops *hif_ops = adapter->host_intf_ops;
const struct firmware *fw_entry = NULL;
u32 regout_val = 0, content_size;
@@ -827,6 +844,18 @@ static int rsi_load_firmware(struct rsi_hw *adapter)
content_size = fw_entry->size;
rsi_dbg(INFO_ZONE, "FW Length = %d bytes\n", content_size);
+ /* Get the firmware version */
+ common->lmac_ver.ver.info.fw_ver[0] =
+ flash_content[LMAC_VER_OFFSET] & 0xFF;
+ common->lmac_ver.ver.info.fw_ver[1] =
+ flash_content[LMAC_VER_OFFSET + 1] & 0xFF;
+ common->lmac_ver.major = flash_content[LMAC_VER_OFFSET + 2] & 0xFF;
+ common->lmac_ver.release_num =
+ flash_content[LMAC_VER_OFFSET + 3] & 0xFF;
+ common->lmac_ver.minor = flash_content[LMAC_VER_OFFSET + 4] & 0xFF;
+ common->lmac_ver.patch_num = 0;
+ rsi_print_version(common);
+
status = bl_write_header(adapter, flash_content, content_size);
if (status) {
rsi_dbg(ERR_ZONE,
diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
index fa12c05d9e23..b1f5dbbde3cb 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
@@ -139,6 +139,32 @@ static const u32 rsi_max_ap_stas[16] = {
4, /* 14 - AP + BT Dual */
};
+static const struct ieee80211_iface_limit rsi_iface_limits[] = {
+ {
+ .max = 1,
+ .types = BIT(NL80211_IFTYPE_STATION),
+ },
+ {
+ .max = 1,
+ .types = BIT(NL80211_IFTYPE_AP) |
+ BIT(NL80211_IFTYPE_P2P_CLIENT) |
+ BIT(NL80211_IFTYPE_P2P_GO),
+ },
+ {
+ .max = 1,
+ .types = BIT(NL80211_IFTYPE_P2P_DEVICE),
+ },
+};
+
+static const struct ieee80211_iface_combination rsi_iface_combinations[] = {
+ {
+ .num_different_channels = 1,
+ .max_interfaces = 3,
+ .limits = rsi_iface_limits,
+ .n_limits = ARRAY_SIZE(rsi_iface_limits),
+ },
+};
+
/**
* rsi_is_cipher_wep() - This function determines if the cipher is WEP or not.
* @common: Pointer to the driver private structure.
@@ -329,6 +355,24 @@ static void rsi_mac80211_stop(struct ieee80211_hw *hw)
mutex_unlock(&common->mutex);
}
+static int rsi_map_intf_mode(enum nl80211_iftype vif_type)
+{
+ switch (vif_type) {
+ case NL80211_IFTYPE_STATION:
+ return RSI_OPMODE_STA;
+ case NL80211_IFTYPE_AP:
+ return RSI_OPMODE_AP;
+ case NL80211_IFTYPE_P2P_DEVICE:
+ return RSI_OPMODE_P2P_CLIENT;
+ case NL80211_IFTYPE_P2P_CLIENT:
+ return RSI_OPMODE_P2P_CLIENT;
+ case NL80211_IFTYPE_P2P_GO:
+ return RSI_OPMODE_P2P_GO;
+ default:
+ return RSI_OPMODE_UNSUPPORTED;
+ }
+}
+
/**
* rsi_mac80211_add_interface() - This function is called when a netdevice
* attached to the hardware is enabled.
@@ -342,54 +386,62 @@ static int rsi_mac80211_add_interface(struct ieee80211_hw *hw,
{
struct rsi_hw *adapter = hw->priv;
struct rsi_common *common = adapter->priv;
+ struct vif_priv *vif_info = (struct vif_priv *)vif->drv_priv;
enum opmode intf_mode;
- int ret = -EOPNOTSUPP;
+ enum vap_status vap_status;
+ int vap_idx = -1, i;
vif->driver_flags |= IEEE80211_VIF_SUPPORTS_UAPSD;
mutex_lock(&common->mutex);
- if (adapter->sc_nvifs > 1) {
- mutex_unlock(&common->mutex);
- return -EOPNOTSUPP;
- }
-
- switch (vif->type) {
- case NL80211_IFTYPE_STATION:
- rsi_dbg(INFO_ZONE, "Station Mode");
- intf_mode = STA_OPMODE;
- break;
- case NL80211_IFTYPE_AP:
- rsi_dbg(INFO_ZONE, "AP Mode");
- intf_mode = AP_OPMODE;
- break;
- default:
+ intf_mode = rsi_map_intf_mode(vif->type);
+ if (intf_mode == RSI_OPMODE_UNSUPPORTED) {
rsi_dbg(ERR_ZONE,
"%s: Interface type %d not supported\n", __func__,
vif->type);
- goto out;
+ mutex_unlock(&common->mutex);
+ return -EOPNOTSUPP;
+ }
+ if ((vif->type == NL80211_IFTYPE_P2P_DEVICE) ||
+ (vif->type == NL80211_IFTYPE_P2P_CLIENT) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO))
+ common->p2p_enabled = true;
+
+ /* Get free vap index */
+ for (i = 0; i < RSI_MAX_VIFS; i++) {
+ if (!adapter->vifs[i]) {
+ vap_idx = i;
+ break;
+ }
+ }
+ if (vap_idx < 0) {
+ rsi_dbg(ERR_ZONE, "Reject: Max VAPs reached\n");
+ mutex_unlock(&common->mutex);
+ return -EOPNOTSUPP;
}
+ vif_info->vap_id = vap_idx;
+ adapter->vifs[vap_idx] = vif;
+ adapter->sc_nvifs++;
+ vap_status = VAP_ADD;
- adapter->vifs[adapter->sc_nvifs++] = vif;
- ret = rsi_set_vap_capabilities(common, intf_mode, common->mac_addr,
- 0, VAP_ADD);
- if (ret) {
+ if (rsi_set_vap_capabilities(common, intf_mode, vif->addr,
+ vif_info->vap_id, vap_status)) {
rsi_dbg(ERR_ZONE, "Failed to set VAP capabilities\n");
- goto out;
+ mutex_unlock(&common->mutex);
+ return -EINVAL;
}
- if (vif->type == NL80211_IFTYPE_AP) {
- int i;
-
+ if ((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO)) {
rsi_send_rx_filter_frame(common, DISALLOW_BEACONS);
common->min_rate = RSI_RATE_AUTO;
for (i = 0; i < common->max_stations; i++)
common->stations[i].sta = NULL;
}
-out:
mutex_unlock(&common->mutex);
- return ret;
+ return 0;
}
/**
@@ -406,6 +458,7 @@ static void rsi_mac80211_remove_interface(struct ieee80211_hw *hw,
struct rsi_hw *adapter = hw->priv;
struct rsi_common *common = adapter->priv;
enum opmode opmode;
+ int i;
rsi_dbg(INFO_ZONE, "Remove Interface Called\n");
@@ -416,23 +469,22 @@ static void rsi_mac80211_remove_interface(struct ieee80211_hw *hw,
return;
}
- switch (vif->type) {
- case NL80211_IFTYPE_STATION:
- opmode = STA_OPMODE;
- break;
- case NL80211_IFTYPE_AP:
- opmode = AP_OPMODE;
- break;
- default:
+ opmode = rsi_map_intf_mode(vif->type);
+ if (opmode == RSI_OPMODE_UNSUPPORTED) {
+ rsi_dbg(ERR_ZONE, "Opmode error : %d\n", opmode);
mutex_unlock(&common->mutex);
return;
}
- rsi_set_vap_capabilities(common, opmode, vif->addr,
- 0, VAP_DELETE);
- adapter->sc_nvifs--;
-
- if (!memcmp(adapter->vifs[0], vif, sizeof(struct ieee80211_vif)))
- adapter->vifs[0] = NULL;
+ for (i = 0; i < RSI_MAX_VIFS; i++) {
+ if (!adapter->vifs[i])
+ continue;
+ if (vif == adapter->vifs[i]) {
+ rsi_set_vap_capabilities(common, opmode, vif->addr,
+ i, VAP_DELETE);
+ adapter->sc_nvifs--;
+ adapter->vifs[i] = NULL;
+ }
+ }
mutex_unlock(&common->mutex);
}
@@ -451,35 +503,44 @@ static int rsi_channel_change(struct ieee80211_hw *hw)
int status = -EOPNOTSUPP;
struct ieee80211_channel *curchan = hw->conf.chandef.chan;
u16 channel = curchan->hw_value;
- struct ieee80211_bss_conf *bss = &adapter->vifs[0]->bss_conf;
+ struct ieee80211_vif *vif;
+ struct ieee80211_bss_conf *bss;
+ bool assoc = false;
+ int i;
rsi_dbg(INFO_ZONE,
"%s: Set channel: %d MHz type: %d channel_no %d\n",
__func__, curchan->center_freq,
curchan->flags, channel);
- if (bss->assoc) {
+ for (i = 0; i < RSI_MAX_VIFS; i++) {
+ vif = adapter->vifs[i];
+ if (!vif)
+ continue;
+ if (vif->type == NL80211_IFTYPE_STATION) {
+ bss = &vif->bss_conf;
+ if (bss->assoc) {
+ assoc = true;
+ break;
+ }
+ }
+ }
+ if (assoc) {
if (!common->hw_data_qs_blocked &&
- (rsi_get_connected_channel(adapter) != channel)) {
+ (rsi_get_connected_channel(vif) != channel)) {
rsi_dbg(INFO_ZONE, "blk data q %d\n", channel);
if (!rsi_send_block_unblock_frame(common, true))
common->hw_data_qs_blocked = true;
}
}
- status = rsi_band_check(common);
+ status = rsi_band_check(common, curchan);
if (!status)
status = rsi_set_channel(adapter->priv, curchan);
- if (bss->assoc) {
+ if (assoc) {
if (common->hw_data_qs_blocked &&
- (rsi_get_connected_channel(adapter) == channel)) {
- rsi_dbg(INFO_ZONE, "unblk data q %d\n", channel);
- if (!rsi_send_block_unblock_frame(common, false))
- common->hw_data_qs_blocked = false;
- }
- } else {
- if (common->hw_data_qs_blocked) {
+ (rsi_get_connected_channel(vif) == channel)) {
rsi_dbg(INFO_ZONE, "unblk data q %d\n", channel);
if (!rsi_send_block_unblock_frame(common, false))
common->hw_data_qs_blocked = false;
@@ -531,7 +592,6 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw,
{
struct rsi_hw *adapter = hw->priv;
struct rsi_common *common = adapter->priv;
- struct ieee80211_vif *vif = adapter->vifs[0];
struct ieee80211_conf *conf = &hw->conf;
int status = -EOPNOTSUPP;
@@ -547,16 +607,30 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw,
}
/* Power save parameters */
- if ((changed & IEEE80211_CONF_CHANGE_PS) &&
- (vif->type == NL80211_IFTYPE_STATION)) {
+ if (changed & IEEE80211_CONF_CHANGE_PS) {
+ struct ieee80211_vif *vif;
unsigned long flags;
+ int i, set_ps = 1;
- spin_lock_irqsave(&adapter->ps_lock, flags);
- if (conf->flags & IEEE80211_CONF_PS)
- rsi_enable_ps(adapter);
- else
- rsi_disable_ps(adapter);
- spin_unlock_irqrestore(&adapter->ps_lock, flags);
+ for (i = 0; i < RSI_MAX_VIFS; i++) {
+ vif = adapter->vifs[i];
+ if (!vif)
+ continue;
+ /* Don't go to power save if AP vap exists */
+ if ((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO)) {
+ set_ps = 0;
+ break;
+ }
+ }
+ if (set_ps) {
+ spin_lock_irqsave(&adapter->ps_lock, flags);
+ if (conf->flags & IEEE80211_CONF_PS)
+ rsi_enable_ps(adapter, vif);
+ else
+ rsi_disable_ps(adapter, vif);
+ spin_unlock_irqrestore(&adapter->ps_lock, flags);
+ }
}
/* RTS threshold */
@@ -580,16 +654,42 @@ static int rsi_mac80211_config(struct ieee80211_hw *hw,
*
* Return: Current connected AP's channel number is returned.
*/
-u16 rsi_get_connected_channel(struct rsi_hw *adapter)
+u16 rsi_get_connected_channel(struct ieee80211_vif *vif)
{
- struct ieee80211_vif *vif = adapter->vifs[0];
- if (vif) {
- struct ieee80211_bss_conf *bss = &vif->bss_conf;
- struct ieee80211_channel *channel = bss->chandef.chan;
- return channel->hw_value;
- }
+ struct ieee80211_bss_conf *bss;
+ struct ieee80211_channel *channel;
- return 0;
+ if (!vif)
+ return 0;
+
+ bss = &vif->bss_conf;
+ channel = bss->chandef.chan;
+
+ if (!channel)
+ return 0;
+
+ return channel->hw_value;
+}
+
+static void rsi_switch_channel(struct rsi_hw *adapter,
+ struct ieee80211_vif *vif)
+{
+ struct rsi_common *common = adapter->priv;
+ struct ieee80211_channel *channel;
+
+ if (common->iface_down)
+ return;
+ if (!vif)
+ return;
+
+ channel = vif->bss_conf.chandef.chan;
+
+ if (!channel)
+ return;
+
+ rsi_band_check(common, channel);
+ rsi_set_channel(common, channel);
+ rsi_dbg(INFO_ZONE, "Switched to channel - %d\n", channel->hw_value);
}
/**
@@ -626,12 +726,12 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw,
rsi_send_rx_filter_frame(common, rx_filter_word);
}
rsi_inform_bss_status(common,
- STA_OPMODE,
+ RSI_OPMODE_STA,
bss_conf->assoc,
bss_conf->bssid,
bss_conf->qos,
bss_conf->aid,
- NULL, 0);
+ NULL, 0, vif);
adapter->ps_info.dtim_interval_duration = bss->dtim_period;
adapter->ps_info.listen_interval = conf->listen_interval;
@@ -639,7 +739,7 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw,
if (bss->assoc) {
if (common->uapsd_bitmap) {
rsi_dbg(INFO_ZONE, "Configuring UAPSD\n");
- rsi_conf_uapsd(adapter);
+ rsi_conf_uapsd(adapter, vif);
}
} else {
common->uapsd_bitmap = 0;
@@ -656,7 +756,8 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw,
}
if ((changed & BSS_CHANGED_BEACON_ENABLED) &&
- (vif->type == NL80211_IFTYPE_AP)) {
+ ((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO))) {
if (bss->enable_beacon) {
rsi_dbg(INFO_ZONE, "===> BEACON ENABLED <===\n");
common->beacon_enabled = 1;
@@ -775,7 +876,8 @@ static int rsi_hal_key_config(struct ieee80211_hw *hw,
rsi_dbg(ERR_ZONE, "%s: Cipher 0x%x key_type: %d key_len: %d\n",
__func__, key->cipher, key_type, key->keylen);
- if (vif->type == NL80211_IFTYPE_AP) {
+ if ((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO)) {
if (sta) {
rsta = rsi_find_sta(adapter->priv, sta->addr);
if (rsta)
@@ -791,7 +893,8 @@ static int rsi_hal_key_config(struct ieee80211_hw *hw,
RSI_PAIRWISE_KEY,
key->keyidx,
key->cipher,
- sta_id);
+ sta_id,
+ vif);
if (status)
return status;
}
@@ -803,7 +906,8 @@ static int rsi_hal_key_config(struct ieee80211_hw *hw,
key_type,
key->keyidx,
key->cipher,
- sta_id);
+ sta_id,
+ vif);
}
/**
@@ -902,7 +1006,8 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw,
if (ssn != NULL)
seq_no = *ssn;
- if (vif->type == NL80211_IFTYPE_AP) {
+ if ((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO)) {
rsta = rsi_find_sta(common, sta->addr);
if (!rsta) {
rsi_dbg(ERR_ZONE, "No station mapped\n");
@@ -936,9 +1041,11 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw,
break;
case IEEE80211_AMPDU_TX_START:
- if (vif->type == NL80211_IFTYPE_STATION)
+ if ((vif->type == NL80211_IFTYPE_STATION) ||
+ (vif->type == NL80211_IFTYPE_P2P_CLIENT))
common->vif_info[ii].seq_start = seq_no;
- else if (vif->type == NL80211_IFTYPE_AP)
+ else if ((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO))
rsta->seq_start[tid] = seq_no;
ieee80211_start_tx_ba_cb_irqsafe(vif, sta->addr, tid);
status = 0;
@@ -958,9 +1065,11 @@ static int rsi_mac80211_ampdu_action(struct ieee80211_hw *hw,
break;
case IEEE80211_AMPDU_TX_OPERATIONAL:
- if (vif->type == NL80211_IFTYPE_STATION)
+ if ((vif->type == NL80211_IFTYPE_STATION) ||
+ (vif->type == NL80211_IFTYPE_P2P_CLIENT))
seq_start = common->vif_info[ii].seq_start;
- else if (vif->type == NL80211_IFTYPE_AP)
+ else if ((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO))
seq_start = rsta->seq_start[tid];
status = rsi_send_aggregation_params_frame(common,
tid,
@@ -1039,9 +1148,9 @@ static int rsi_mac80211_set_rate_mask(struct ieee80211_hw *hw,
*/
static void rsi_perform_cqm(struct rsi_common *common,
u8 *bssid,
- s8 rssi)
+ s8 rssi,
+ struct ieee80211_vif *vif)
{
- struct rsi_hw *adapter = common->priv;
s8 last_event = common->cqm_info.last_cqm_event_rssi;
int thold = common->cqm_info.rssi_thold;
u32 hyst = common->cqm_info.rssi_hyst;
@@ -1057,7 +1166,7 @@ static void rsi_perform_cqm(struct rsi_common *common,
common->cqm_info.last_cqm_event_rssi = rssi;
rsi_dbg(INFO_ZONE, "CQM: Notifying event: %d\n", event);
- ieee80211_cqm_rssi_notify(adapter->vifs[0], event, rssi, GFP_KERNEL);
+ ieee80211_cqm_rssi_notify(vif, event, rssi, GFP_KERNEL);
return;
}
@@ -1077,7 +1186,9 @@ static void rsi_fill_rx_status(struct ieee80211_hw *hw,
struct rsi_common *common,
struct ieee80211_rx_status *rxs)
{
- struct ieee80211_bss_conf *bss = &common->priv->vifs[0]->bss_conf;
+ struct rsi_hw *adapter = common->priv;
+ struct ieee80211_vif *vif;
+ struct ieee80211_bss_conf *bss = NULL;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
struct skb_info *rx_params = (struct skb_info *)info->driver_data;
struct ieee80211_hdr *hdr;
@@ -1085,6 +1196,7 @@ static void rsi_fill_rx_status(struct ieee80211_hw *hw,
u8 hdrlen = 0;
u8 channel = rx_params->channel;
s32 freq;
+ int i;
hdr = ((struct ieee80211_hdr *)(skb->data));
hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -1113,10 +1225,21 @@ static void rsi_fill_rx_status(struct ieee80211_hw *hw,
rxs->flag |= RX_FLAG_IV_STRIPPED;
}
+ for (i = 0; i < RSI_MAX_VIFS; i++) {
+ vif = adapter->vifs[i];
+ if (!vif)
+ continue;
+ if (vif->type == NL80211_IFTYPE_STATION) {
+ bss = &vif->bss_conf;
+ break;
+ }
+ }
+ if (!bss)
+ return;
/* CQM only for connected AP beacons, the RSSI is a weighted avg */
if (bss->assoc && !(memcmp(bss->bssid, hdr->addr2, ETH_ALEN))) {
if (ieee80211_is_beacon(hdr->frame_control))
- rsi_perform_cqm(common, hdr->addr2, rxs->signal);
+ rsi_perform_cqm(common, hdr->addr2, rxs->signal, vif);
}
return;
@@ -1210,7 +1333,8 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw,
mutex_lock(&common->mutex);
- if (vif->type == NL80211_IFTYPE_AP) {
+ if ((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO)) {
u8 cnt;
int sta_idx = -1;
int free_index = -1;
@@ -1259,8 +1383,9 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw,
/* Send peer notify to device */
rsi_dbg(INFO_ZONE, "Indicate bss status to device\n");
- rsi_inform_bss_status(common, AP_OPMODE, 1, sta->addr,
- sta->wme, sta->aid, sta, sta_idx);
+ rsi_inform_bss_status(common, RSI_OPMODE_AP, 1,
+ sta->addr, sta->wme, sta->aid,
+ sta, sta_idx, vif);
if (common->key) {
struct ieee80211_key_conf *key = common->key;
@@ -1273,14 +1398,16 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw,
RSI_PAIRWISE_KEY,
key->keyidx,
key->cipher,
- sta_idx);
+ sta_idx,
+ vif);
}
common->num_stations++;
}
}
- if (vif->type == NL80211_IFTYPE_STATION) {
+ if ((vif->type == NL80211_IFTYPE_STATION) ||
+ (vif->type == NL80211_IFTYPE_P2P_CLIENT)) {
rsi_set_min_rate(hw, sta, common);
if (sta->ht_cap.ht_supported) {
common->vif_info[0].is_ht = true;
@@ -1321,7 +1448,8 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw,
mutex_lock(&common->mutex);
- if (vif->type == NL80211_IFTYPE_AP) {
+ if ((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO)) {
u8 sta_idx, cnt;
/* Send peer notify to device */
@@ -1332,9 +1460,10 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw,
if (!rsta->sta)
continue;
if (!memcmp(rsta->sta->addr, sta->addr, ETH_ALEN)) {
- rsi_inform_bss_status(common, AP_OPMODE, 0,
+ rsi_inform_bss_status(common, RSI_OPMODE_AP, 0,
sta->addr, sta->wme,
- sta->aid, sta, sta_idx);
+ sta->aid, sta, sta_idx,
+ vif);
rsta->sta = NULL;
rsta->sta_id = -1;
for (cnt = 0; cnt < IEEE80211_NUM_TIDS; cnt++)
@@ -1348,7 +1477,8 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw,
rsi_dbg(ERR_ZONE, "%s: No station found\n", __func__);
}
- if (vif->type == NL80211_IFTYPE_STATION) {
+ if ((vif->type == NL80211_IFTYPE_STATION) ||
+ (vif->type == NL80211_IFTYPE_P2P_CLIENT)) {
/* Resetting all the fields to default values */
memcpy((u8 *)bss->bssid, (u8 *)sta->addr, ETH_ALEN);
bss->qos = sta->wme;
@@ -1508,6 +1638,114 @@ static void rsi_mac80211_rfkill_poll(struct ieee80211_hw *hw)
mutex_unlock(&common->mutex);
}
+static void rsi_resume_conn_channel(struct rsi_common *common)
+{
+ struct rsi_hw *adapter = common->priv;
+ struct ieee80211_vif *vif;
+ int cnt;
+
+ for (cnt = 0; cnt < RSI_MAX_VIFS; cnt++) {
+ vif = adapter->vifs[cnt];
+ if (!vif)
+ continue;
+
+ if ((vif->type == NL80211_IFTYPE_AP) ||
+ (vif->type == NL80211_IFTYPE_P2P_GO)) {
+ rsi_switch_channel(adapter, vif);
+ break;
+ }
+ if (((vif->type == NL80211_IFTYPE_STATION) ||
+ (vif->type == NL80211_IFTYPE_P2P_CLIENT)) &&
+ vif->bss_conf.assoc) {
+ rsi_switch_channel(adapter, vif);
+ break;
+ }
+ }
+}
+
+void rsi_roc_timeout(unsigned long data)
+{
+ struct rsi_common *common = (struct rsi_common *)data;
+
+ rsi_dbg(INFO_ZONE, "Remain on channel expired\n");
+
+ mutex_lock(&common->mutex);
+ ieee80211_remain_on_channel_expired(common->priv->hw);
+
+ if (timer_pending(&common->roc_timer))
+ del_timer(&common->roc_timer);
+
+ rsi_resume_conn_channel(common);
+ mutex_unlock(&common->mutex);
+}
+
+static int rsi_mac80211_roc(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+ struct ieee80211_channel *chan, int duration,
+ enum ieee80211_roc_type type)
+{
+ struct rsi_hw *adapter = (struct rsi_hw *)hw->priv;
+ struct rsi_common *common = (struct rsi_common *)adapter->priv;
+ int status = 0;
+
+ rsi_dbg(INFO_ZONE, "***** Remain on channel *****\n");
+
+ mutex_lock(&common->mutex);
+ rsi_dbg(INFO_ZONE, "%s: channel: %d duration: %dms\n",
+ __func__, chan->hw_value, duration);
+
+ if (timer_pending(&common->roc_timer)) {
+ rsi_dbg(INFO_ZONE, "Stop on-going ROC\n");
+ del_timer(&common->roc_timer);
+ }
+ common->roc_timer.expires = msecs_to_jiffies(duration) + jiffies;
+ add_timer(&common->roc_timer);
+
+ /* Configure band */
+ if (rsi_band_check(common, chan)) {
+ rsi_dbg(ERR_ZONE, "Failed to set band\n");
+ status = -EINVAL;
+ goto out;
+ }
+
+ /* Configure channel */
+ if (rsi_set_channel(common, chan)) {
+ rsi_dbg(ERR_ZONE, "Failed to set the channel\n");
+ status = -EINVAL;
+ goto out;
+ }
+
+ common->roc_vif = vif;
+ ieee80211_ready_on_channel(hw);
+ rsi_dbg(INFO_ZONE, "%s: Ready on channel :%d\n",
+ __func__, chan->hw_value);
+
+out:
+ mutex_unlock(&common->mutex);
+
+ return status;
+}
+
+static int rsi_mac80211_cancel_roc(struct ieee80211_hw *hw)
+{
+ struct rsi_hw *adapter = hw->priv;
+ struct rsi_common *common = adapter->priv;
+
+ rsi_dbg(INFO_ZONE, "Cancel remain on channel\n");
+
+ mutex_lock(&common->mutex);
+ if (!timer_pending(&common->roc_timer)) {
+ mutex_unlock(&common->mutex);
+ return 0;
+ }
+
+ del_timer(&common->roc_timer);
+
+ rsi_resume_conn_channel(common);
+ mutex_unlock(&common->mutex);
+
+ return 0;
+}
+
static const struct ieee80211_ops mac80211_ops = {
.tx = rsi_mac80211_tx,
.start = rsi_mac80211_start,
@@ -1527,6 +1765,8 @@ static const struct ieee80211_ops mac80211_ops = {
.set_antenna = rsi_mac80211_set_antenna,
.get_antenna = rsi_mac80211_get_antenna,
.rfkill_poll = rsi_mac80211_rfkill_poll,
+ .remain_on_channel = rsi_mac80211_roc,
+ .cancel_remain_on_channel = rsi_mac80211_cancel_roc,
};
/**
@@ -1581,7 +1821,11 @@ int rsi_mac80211_attach(struct rsi_common *common)
ether_addr_copy(hw->wiphy->addr_mask, addr_mask);
wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) |
- BIT(NL80211_IFTYPE_AP);
+ BIT(NL80211_IFTYPE_AP) |
+ BIT(NL80211_IFTYPE_P2P_DEVICE) |
+ BIT(NL80211_IFTYPE_P2P_CLIENT) |
+ BIT(NL80211_IFTYPE_P2P_GO);
+
wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
wiphy->retry_short = RETRY_SHORT;
wiphy->retry_long = RETRY_LONG;
@@ -1608,6 +1852,14 @@ int rsi_mac80211_attach(struct rsi_common *common)
wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST);
+ /* Wi-Fi direct parameters */
+ wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL;
+ wiphy->flags |= WIPHY_FLAG_OFFCHAN_TX;
+ wiphy->max_remain_on_channel_duration = 10000;
+ hw->max_listen_interval = 10;
+ wiphy->iface_combinations = rsi_iface_combinations;
+ wiphy->n_iface_combinations = ARRAY_SIZE(rsi_iface_combinations);
+
status = ieee80211_register_hw(hw);
if (status)
return status;
diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c
index 3e1e80888d98..71b8cfb8252e 100644
--- a/drivers/net/wireless/rsi/rsi_91x_main.c
+++ b/drivers/net/wireless/rsi/rsi_91x_main.c
@@ -20,6 +20,7 @@
#include <linux/firmware.h>
#include "rsi_mgmt.h"
#include "rsi_common.h"
+#include "rsi_hal.h"
u32 rsi_zone_enabled = /* INFO_ZONE |
INIT_ZONE |
@@ -56,6 +57,30 @@ void rsi_dbg(u32 zone, const char *fmt, ...)
}
EXPORT_SYMBOL_GPL(rsi_dbg);
+static char *opmode_str(int oper_mode)
+{
+ switch (oper_mode) {
+ case RSI_DEV_OPMODE_WIFI_ALONE:
+ return "Wi-Fi alone";
+ }
+
+ return "Unknown";
+}
+
+void rsi_print_version(struct rsi_common *common)
+{
+ rsi_dbg(ERR_ZONE, "================================================\n");
+ rsi_dbg(ERR_ZONE, "================ RSI Version Info ==============\n");
+ rsi_dbg(ERR_ZONE, "================================================\n");
+ rsi_dbg(ERR_ZONE, "FW Version\t: %d.%d.%d\n",
+ common->lmac_ver.major, common->lmac_ver.minor,
+ common->lmac_ver.release_num);
+ rsi_dbg(ERR_ZONE, "Operating mode\t: %d [%s]",
+ common->oper_mode, opmode_str(common->oper_mode));
+ rsi_dbg(ERR_ZONE, "Firmware file\t: %s", common->priv->fw_file_name);
+ rsi_dbg(ERR_ZONE, "================================================\n");
+}
+
/**
* rsi_prepare_skb() - This function prepares the skb.
* @common: Pointer to the driver private structure.
@@ -74,6 +99,8 @@ static struct sk_buff *rsi_prepare_skb(struct rsi_common *common,
struct skb_info *rx_params;
struct sk_buff *skb = NULL;
u8 payload_offset;
+ struct ieee80211_vif *vif;
+ struct ieee80211_hdr *wh;
if (WARN(!pkt_len, "%s: Dummy pkt received", __func__))
return NULL;
@@ -92,11 +119,13 @@ static struct sk_buff *rsi_prepare_skb(struct rsi_common *common,
payload_offset = (extended_desc + FRAME_DESC_SZ);
skb_put(skb, pkt_len);
memcpy((skb->data), (buffer + payload_offset), skb->len);
+ wh = (struct ieee80211_hdr *)skb->data;
+ vif = rsi_get_vif(common->priv, wh->addr1);
info = IEEE80211_SKB_CB(skb);
rx_params = (struct skb_info *)info->driver_data;
rx_params->rssi = rsi_get_rssi(buffer);
- rx_params->channel = rsi_get_connected_channel(common->priv);
+ rx_params->channel = rsi_get_connected_channel(vif);
return skb;
}
@@ -233,6 +262,9 @@ struct rsi_hw *rsi_91x_init(void)
rsi_default_ps_params(adapter);
spin_lock_init(&adapter->ps_lock);
+ common->roc_timer.data = (unsigned long)common;
+ common->roc_timer.function = (void *)&rsi_roc_timeout;
+ init_timer(&common->roc_timer);
common->init_done = true;
return adapter;
diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
index f7b550f900c4..4b94190c9797 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c
@@ -460,12 +460,12 @@ static int rsi_hal_send_sta_notify_frame(struct rsi_common *common,
const unsigned char *bssid,
u8 qos_enable,
u16 aid,
- u16 sta_id)
+ u16 sta_id,
+ struct ieee80211_vif *vif)
{
- struct ieee80211_vif *vif = common->priv->vifs[0];
struct sk_buff *skb = NULL;
struct rsi_peer_notify *peer_notify;
- u16 vap_id = 0;
+ u16 vap_id = ((struct vif_priv *)vif->drv_priv)->vap_id;
int status;
u16 frame_len = sizeof(struct rsi_peer_notify);
@@ -482,9 +482,9 @@ static int rsi_hal_send_sta_notify_frame(struct rsi_common *common,
memset(skb->data, 0, frame_len);
peer_notify = (struct rsi_peer_notify *)skb->data;
- if (opmode == STA_OPMODE)
+ if (opmode == RSI_OPMODE_STA)
peer_notify->command = cpu_to_le16(PEER_TYPE_AP << 1);
- else if (opmode == AP_OPMODE)
+ else if (opmode == RSI_OPMODE_AP)
peer_notify->command = cpu_to_le16(PEER_TYPE_STA << 1);
switch (notify_event) {
@@ -716,9 +716,9 @@ int rsi_hal_load_key(struct rsi_common *common,
u8 key_type,
u8 key_id,
u32 cipher,
- s16 sta_id)
+ s16 sta_id,
+ struct ieee80211_vif *vif)
{
- struct ieee80211_vif *vif = common->priv->vifs[0];
struct sk_buff *skb = NULL;
struct rsi_set_key *set_key;
u16 key_descriptor = 0;
@@ -926,13 +926,13 @@ static int rsi_send_reset_mac(struct rsi_common *common)
*
* Return: 0 on success, corresponding error code on failure.
*/
-int rsi_band_check(struct rsi_common *common)
+int rsi_band_check(struct rsi_common *common,
+ struct ieee80211_channel *curchan)
{
struct rsi_hw *adapter = common->priv;
struct ieee80211_hw *hw = adapter->hw;
u8 prev_bw = common->channel_width;
u8 prev_ep = common->endpoint;
- struct ieee80211_channel *curchan = hw->conf.chandef.chan;
int status = 0;
if (common->band != curchan->band) {
@@ -1160,9 +1160,9 @@ static bool rsi_map_rates(u16 rate, int *offset)
*/
static int rsi_send_auto_rate_request(struct rsi_common *common,
struct ieee80211_sta *sta,
- u16 sta_id)
+ u16 sta_id,
+ struct ieee80211_vif *vif)
{
- struct ieee80211_vif *vif = common->priv->vifs[0];
struct sk_buff *skb;
struct rsi_auto_rate *auto_rate;
int ii = 0, jj = 0, kk = 0;
@@ -1318,33 +1318,36 @@ void rsi_inform_bss_status(struct rsi_common *common,
u8 qos_enable,
u16 aid,
struct ieee80211_sta *sta,
- u16 sta_id)
+ u16 sta_id,
+ struct ieee80211_vif *vif)
{
if (status) {
- if (opmode == STA_OPMODE)
+ if (opmode == RSI_OPMODE_STA)
common->hw_data_qs_blocked = true;
rsi_hal_send_sta_notify_frame(common,
opmode,
STA_CONNECTED,
addr,
qos_enable,
- aid, sta_id);
+ aid, sta_id,
+ vif);
if (common->min_rate == 0xffff)
- rsi_send_auto_rate_request(common, sta, sta_id);
- if (opmode == STA_OPMODE) {
+ rsi_send_auto_rate_request(common, sta, sta_id, vif);
+ if (opmode == RSI_OPMODE_STA) {
if (!rsi_send_block_unblock_frame(common, false))
common->hw_data_qs_blocked = false;
}
} else {
- if (opmode == STA_OPMODE)
+ if (opmode == RSI_OPMODE_STA)
common->hw_data_qs_blocked = true;
rsi_hal_send_sta_notify_frame(common,
opmode,
STA_DISCONNECTED,
addr,
qos_enable,
- aid, sta_id);
- if (opmode == STA_OPMODE)
+ aid, sta_id,
+ vif);
+ if (opmode == RSI_OPMODE_STA)
rsi_send_block_unblock_frame(common, true);
}
}
@@ -1471,10 +1474,11 @@ int rsi_send_rx_filter_frame(struct rsi_common *common, u16 rx_filter_word)
return rsi_send_internal_mgmt_frame(common, skb);
}
-int rsi_send_ps_request(struct rsi_hw *adapter, bool enable)
+int rsi_send_ps_request(struct rsi_hw *adapter, bool enable,
+ struct ieee80211_vif *vif)
{
struct rsi_common *common = adapter->priv;
- struct ieee80211_bss_conf *bss = &adapter->vifs[0]->bss_conf;
+ struct ieee80211_bss_conf *bss = &vif->bss_conf;
struct rsi_request_ps *ps;
struct rsi_ps_info *ps_info;
struct sk_buff *skb;
diff --git a/drivers/net/wireless/rsi/rsi_91x_ps.c b/drivers/net/wireless/rsi/rsi_91x_ps.c
index 48c79f035c59..523f5329d2b7 100644
--- a/drivers/net/wireless/rsi/rsi_91x_ps.c
+++ b/drivers/net/wireless/rsi/rsi_91x_ps.c
@@ -67,7 +67,7 @@ void rsi_default_ps_params(struct rsi_hw *adapter)
ps_info->deep_sleep_wakeup_period = RSI_DEF_DS_WAKEUP_PERIOD;
}
-void rsi_enable_ps(struct rsi_hw *adapter)
+void rsi_enable_ps(struct rsi_hw *adapter, struct ieee80211_vif *vif)
{
if (adapter->ps_state != PS_NONE) {
rsi_dbg(ERR_ZONE,
@@ -76,7 +76,7 @@ void rsi_enable_ps(struct rsi_hw *adapter)
return;
}
- if (rsi_send_ps_request(adapter, true)) {
+ if (rsi_send_ps_request(adapter, true, vif)) {
rsi_dbg(ERR_ZONE,
"%s: Failed to send PS request to device\n",
__func__);
@@ -86,7 +86,8 @@ void rsi_enable_ps(struct rsi_hw *adapter)
rsi_modify_ps_state(adapter, PS_ENABLE_REQ_SENT);
}
-void rsi_disable_ps(struct rsi_hw *adapter)
+/* This function is used to disable power save */
+void rsi_disable_ps(struct rsi_hw *adapter, struct ieee80211_vif *vif)
{
if (adapter->ps_state != PS_ENABLED) {
rsi_dbg(ERR_ZONE,
@@ -95,7 +96,7 @@ void rsi_disable_ps(struct rsi_hw *adapter)
return;
}
- if (rsi_send_ps_request(adapter, false)) {
+ if (rsi_send_ps_request(adapter, false, vif)) {
rsi_dbg(ERR_ZONE,
"%s: Failed to send PS request to device\n",
__func__);
@@ -105,16 +106,16 @@ void rsi_disable_ps(struct rsi_hw *adapter)
rsi_modify_ps_state(adapter, PS_DISABLE_REQ_SENT);
}
-void rsi_conf_uapsd(struct rsi_hw *adapter)
+void rsi_conf_uapsd(struct rsi_hw *adapter, struct ieee80211_vif *vif)
{
int ret;
if (adapter->ps_state != PS_ENABLED)
return;
- ret = rsi_send_ps_request(adapter, false);
+ ret = rsi_send_ps_request(adapter, false, vif);
if (!ret)
- ret = rsi_send_ps_request(adapter, true);
+ ret = rsi_send_ps_request(adapter, true, vif);
if (ret)
rsi_dbg(ERR_ZONE,
"%s: Failed to send PS request to device\n",
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index 8d3a4839b6ef..b3f8006c0e9b 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -1059,16 +1059,136 @@ static void rsi_disconnect(struct sdio_func *pfunction)
}
#ifdef CONFIG_PM
+static int rsi_set_sdio_pm_caps(struct rsi_hw *adapter)
+{
+ struct rsi_91x_sdiodev *dev =
+ (struct rsi_91x_sdiodev *)adapter->rsi_dev;
+ struct sdio_func *func = dev->pfunction;
+ int ret;
+
+ ret = sdio_set_host_pm_flags(func, MMC_PM_KEEP_POWER);
+ if (ret)
+ rsi_dbg(ERR_ZONE, "Set sdio keep pwr flag failed: %d\n", ret);
+
+ return ret;
+}
+
+static int rsi_sdio_disable_interrupts(struct sdio_func *pfunc)
+{
+ struct rsi_hw *adapter = sdio_get_drvdata(pfunc);
+ u8 isr_status = 0, data = 0;
+ int ret;
+ unsigned long t1;
+
+ rsi_dbg(INFO_ZONE, "Waiting for interrupts to be cleared..");
+ t1 = jiffies;
+ do {
+ rsi_sdio_read_register(adapter, RSI_FN1_INT_REGISTER,
+ &isr_status);
+ rsi_dbg(INFO_ZONE, ".");
+ } while ((isr_status) && (jiffies_to_msecs(jiffies - t1) < 20));
+ rsi_dbg(INFO_ZONE, "Interrupts cleared\n");
+
+ sdio_claim_host(pfunc);
+ ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data);
+ if (ret < 0) {
+ rsi_dbg(ERR_ZONE,
+ "%s: Failed to read int enable register\n",
+ __func__);
+ goto done;
+ }
+
+ data &= RSI_INT_ENABLE_MASK;
+ ret = rsi_cmd52writebyte(pfunc->card, RSI_INT_ENABLE_REGISTER, data);
+ if (ret < 0) {
+ rsi_dbg(ERR_ZONE,
+ "%s: Failed to write to int enable register\n",
+ __func__);
+ goto done;
+ }
+ ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data);
+ if (ret < 0) {
+ rsi_dbg(ERR_ZONE,
+ "%s: Failed to read int enable register\n",
+ __func__);
+ goto done;
+ }
+ rsi_dbg(INFO_ZONE, "int enable reg content = %x\n", data);
+
+done:
+ sdio_release_host(pfunc);
+ return ret;
+}
+
+static int rsi_sdio_enable_interrupts(struct sdio_func *pfunc)
+{
+ u8 data;
+ int ret;
+
+ sdio_claim_host(pfunc);
+ ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data);
+ if (ret < 0) {
+ rsi_dbg(ERR_ZONE,
+ "%s: Failed to read int enable register\n", __func__);
+ goto done;
+ }
+
+ data |= ~RSI_INT_ENABLE_MASK & 0xff;
+
+ ret = rsi_cmd52writebyte(pfunc->card, RSI_INT_ENABLE_REGISTER, data);
+ if (ret < 0) {
+ rsi_dbg(ERR_ZONE,
+ "%s: Failed to write to int enable register\n",
+ __func__);
+ goto done;
+ }
+
+ ret = rsi_cmd52readbyte(pfunc->card, RSI_INT_ENABLE_REGISTER, &data);
+ if (ret < 0) {
+ rsi_dbg(ERR_ZONE,
+ "%s: Failed to read int enable register\n", __func__);
+ goto done;
+ }
+ rsi_dbg(INFO_ZONE, "int enable reg content = %x\n", data);
+
+done:
+ sdio_release_host(pfunc);
+ return ret;
+}
+
static int rsi_suspend(struct device *dev)
{
- /* Not yet implemented */
- return -ENOSYS;
+ int ret;
+ struct sdio_func *pfunction = dev_to_sdio_func(dev);
+ struct rsi_hw *adapter = sdio_get_drvdata(pfunction);
+ struct rsi_common *common;
+
+ if (!adapter) {
+ rsi_dbg(ERR_ZONE, "Device is not ready\n");
+ return -ENODEV;
+ }
+ common = adapter->priv;
+ rsi_sdio_disable_interrupts(pfunction);
+
+ ret = rsi_set_sdio_pm_caps(adapter);
+ if (ret)
+ rsi_dbg(INFO_ZONE,
+ "Setting power management caps failed\n");
+ common->fsm_state = FSM_CARD_NOT_READY;
+
+ return 0;
}
static int rsi_resume(struct device *dev)
{
- /* Not yet implemented */
- return -ENOSYS;
+ struct sdio_func *pfunction = dev_to_sdio_func(dev);
+ struct rsi_hw *adapter = sdio_get_drvdata(pfunction);
+ struct rsi_common *common = adapter->priv;
+
+ common->fsm_state = FSM_MAC_INIT_DONE;
+ rsi_sdio_enable_interrupts(pfunction);
+
+ return 0;
}
static const struct dev_pm_ops rsi_pm_ops = {
diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c
index 81df09dd2636..08730227cd18 100644
--- a/drivers/net/wireless/rsi/rsi_91x_usb.c
+++ b/drivers/net/wireless/rsi/rsi_91x_usb.c
@@ -73,8 +73,7 @@ static int rsi_write_multiple(struct rsi_hw *adapter,
u8 *data,
u32 count)
{
- struct rsi_91x_usbdev *dev =
- (struct rsi_91x_usbdev *)adapter->rsi_dev;
+ struct rsi_91x_usbdev *dev;
if (!adapter)
return -ENODEV;
@@ -82,6 +81,7 @@ static int rsi_write_multiple(struct rsi_hw *adapter,
if (endpoint == 0)
return -EINVAL;
+ dev = (struct rsi_91x_usbdev *)adapter->rsi_dev;
if (dev->write_fail)
return -ENETDOWN;
diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h
index e579d694d13c..272e18d750ff 100644
--- a/drivers/net/wireless/rsi/rsi_common.h
+++ b/drivers/net/wireless/rsi/rsi_common.h
@@ -79,9 +79,11 @@ static inline int rsi_kill_thread(struct rsi_thread *handle)
}
void rsi_mac80211_detach(struct rsi_hw *hw);
-u16 rsi_get_connected_channel(struct rsi_hw *adapter);
+u16 rsi_get_connected_channel(struct ieee80211_vif *vif);
struct rsi_hw *rsi_91x_init(void);
void rsi_91x_deinit(struct rsi_hw *adapter);
int rsi_read_pkt(struct rsi_common *common, s32 rcv_pkt_len);
struct rsi_sta *rsi_find_sta(struct rsi_common *common, u8 *mac_addr);
+struct ieee80211_vif *rsi_get_vif(struct rsi_hw *adapter, u8 *mac);
+void rsi_roc_timeout(unsigned long data);
#endif
diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h
index 7c145053da6d..a09d36b6b765 100644
--- a/drivers/net/wireless/rsi/rsi_hal.h
+++ b/drivers/net/wireless/rsi/rsi_hal.h
@@ -101,6 +101,9 @@
#define BBP_INFO_40MHZ 0x6
+#define FW_FLASH_OFFSET 0x820
+#define LMAC_VER_OFFSET (FW_FLASH_OFFSET + 0x200)
+
struct bl_header {
__le32 flags;
__le32 image_no;
@@ -121,8 +124,7 @@ struct rsi_mgmt_desc {
u8 xtend_desc_size;
u8 header_len;
__le16 frame_info;
- u8 rate_info;
- u8 reserved1;
+ __le16 rate_info;
__le16 bbp_info;
__le16 seq_ctrl;
u8 reserved2;
diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h
index 2c18dde633ea..a118b7aaeca0 100644
--- a/drivers/net/wireless/rsi/rsi_main.h
+++ b/drivers/net/wireless/rsi/rsi_main.h
@@ -60,7 +60,7 @@ enum RSI_FSM_STATES {
extern u32 rsi_zone_enabled;
extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...);
-#define RSI_MAX_VIFS 1
+#define RSI_MAX_VIFS 3
#define NUM_EDCA_QUEUES 4
#define IEEE80211_ADDR_LEN 6
#define FRAME_DESC_SZ 16
@@ -113,8 +113,13 @@ extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...);
struct version_info {
u16 major;
u16 minor;
- u16 release_num;
- u16 patch_num;
+ u8 release_num;
+ u8 patch_num;
+ union {
+ struct {
+ u8 fw_ver[8];
+ } info;
+ } ver;
} __packed;
struct skb_info {
@@ -124,6 +129,8 @@ struct skb_info {
s8 tid;
s8 sta_id;
u8 internal_hdr_size;
+ struct ieee80211_vif *vif;
+ u8 vap_id;
};
enum edca_queue {
@@ -157,6 +164,7 @@ struct vif_priv {
bool is_ht;
bool sgi;
u16 seq_start;
+ int vap_id;
};
struct rsi_event {
@@ -196,8 +204,7 @@ struct rsi_common {
struct vif_priv vif_info[RSI_MAX_VIFS];
bool mgmt_q_block;
- struct version_info driver_ver;
- struct version_info fw_ver;
+ struct version_info lmac_ver;
struct rsi_thread tx_thread;
struct sk_buff_head tx_queue[NUM_EDCA_QUEUES + 2];
@@ -270,6 +277,11 @@ struct rsi_common {
int num_stations;
int max_stations;
struct ieee80211_key_conf *key;
+
+ /* Wi-Fi direct mode related */
+ bool p2p_enabled;
+ struct timer_list roc_timer;
+ struct ieee80211_vif *roc_vif;
};
enum host_intf {
@@ -326,6 +338,8 @@ struct rsi_hw {
int (*determine_event_timeout)(struct rsi_hw *adapter);
};
+void rsi_print_version(struct rsi_common *common);
+
struct rsi_host_intf_ops {
int (*read_pkt)(struct rsi_hw *adapter, u8 *pkt, u32 len);
int (*write_pkt)(struct rsi_hw *adapter, u8 *pkt, u32 len);
diff --git a/drivers/net/wireless/rsi/rsi_mgmt.h b/drivers/net/wireless/rsi/rsi_mgmt.h
index c6e1fa669a27..e21723013f8d 100644
--- a/drivers/net/wireless/rsi/rsi_mgmt.h
+++ b/drivers/net/wireless/rsi/rsi_mgmt.h
@@ -189,6 +189,8 @@
IEEE80211_WMM_IE_STA_QOSINFO_AC_BE | \
IEEE80211_WMM_IE_STA_QOSINFO_AC_BK)
+#define RSI_DESC_VAP_ID_MASK 0xC000u
+#define RSI_DESC_VAP_ID_OFST 14
#define RSI_DATA_DESC_MAC_BBP_INFO BIT(0)
#define RSI_DATA_DESC_NO_ACK_IND BIT(9)
#define RSI_DATA_DESC_QOS_EN BIT(12)
@@ -200,8 +202,11 @@
#define RSI_DATA_DESC_INSERT_SEQ_NO BIT(2)
enum opmode {
- AP_OPMODE = 0,
- STA_OPMODE,
+ RSI_OPMODE_UNSUPPORTED = -1,
+ RSI_OPMODE_AP = 0,
+ RSI_OPMODE_STA,
+ RSI_OPMODE_P2P_GO,
+ RSI_OPMODE_P2P_CLIENT
};
enum vap_status {
@@ -363,9 +368,9 @@ struct rsi_vap_caps {
u8 vif_type;
u8 channel_bw;
__le16 antenna_info;
+ __le16 token;
u8 radioid_macid;
u8 vap_id;
- __le16 reserved3;
u8 mac_addr[6];
__le16 keep_alive_period;
u8 bssid[6];
@@ -613,14 +618,16 @@ int rsi_send_aggregation_params_frame(struct rsi_common *common, u16 tid,
u16 ssn, u8 buf_size, u8 event,
u8 sta_id);
int rsi_hal_load_key(struct rsi_common *common, u8 *data, u16 key_len,
- u8 key_type, u8 key_id, u32 cipher, s16 sta_id);
+ u8 key_type, u8 key_id, u32 cipher, s16 sta_id,
+ struct ieee80211_vif *vif);
int rsi_set_channel(struct rsi_common *common,
struct ieee80211_channel *channel);
int rsi_send_vap_dynamic_update(struct rsi_common *common);
int rsi_send_block_unblock_frame(struct rsi_common *common, bool event);
void rsi_inform_bss_status(struct rsi_common *common, enum opmode opmode,
u8 status, const u8 *addr, u8 qos_enable, u16 aid,
- struct ieee80211_sta *sta, u16 sta_id);
+ struct ieee80211_sta *sta, u16 sta_id,
+ struct ieee80211_vif *vif);
void rsi_indicate_pkt_to_os(struct rsi_common *common, struct sk_buff *skb);
int rsi_mac80211_attach(struct rsi_common *common);
void rsi_indicate_tx_status(struct rsi_hw *common, struct sk_buff *skb,
@@ -630,8 +637,10 @@ void rsi_core_qos_processor(struct rsi_common *common);
void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb);
int rsi_send_mgmt_pkt(struct rsi_common *common, struct sk_buff *skb);
int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb);
-int rsi_band_check(struct rsi_common *common);
+int rsi_band_check(struct rsi_common *common, struct ieee80211_channel *chan);
int rsi_send_rx_filter_frame(struct rsi_common *common, u16 rx_filter_word);
int rsi_send_radio_params_update(struct rsi_common *common);
int rsi_set_antenna(struct rsi_common *common, u8 antenna);
+int rsi_send_ps_request(struct rsi_hw *adapter, bool enable,
+ struct ieee80211_vif *vif);
#endif
diff --git a/drivers/net/wireless/rsi/rsi_ps.h b/drivers/net/wireless/rsi/rsi_ps.h
index d8475873df36..98ff6a4ced57 100644
--- a/drivers/net/wireless/rsi/rsi_ps.h
+++ b/drivers/net/wireless/rsi/rsi_ps.h
@@ -55,10 +55,9 @@ struct rsi_ps_info {
} __packed;
char *str_psstate(enum ps_state state);
-void rsi_enable_ps(struct rsi_hw *adapter);
-void rsi_disable_ps(struct rsi_hw *adapter);
+void rsi_enable_ps(struct rsi_hw *adapter, struct ieee80211_vif *vif);
+void rsi_disable_ps(struct rsi_hw *adapter, struct ieee80211_vif *vif);
int rsi_handle_ps_confirm(struct rsi_hw *adapter, u8 *msg);
void rsi_default_ps_params(struct rsi_hw *hw);
-int rsi_send_ps_request(struct rsi_hw *adapter, bool enable);
-void rsi_conf_uapsd(struct rsi_hw *adapter);
+void rsi_conf_uapsd(struct rsi_hw *adapter, struct ieee80211_vif *vif);
#endif
diff --git a/drivers/net/wireless/rsi/rsi_sdio.h b/drivers/net/wireless/rsi/rsi_sdio.h
index 95e4bed57baf..49c549ba6682 100644
--- a/drivers/net/wireless/rsi/rsi_sdio.h
+++ b/drivers/net/wireless/rsi/rsi_sdio.h
@@ -48,6 +48,8 @@ enum sdio_interrupt_type {
#define RSI_DEVICE_BUFFER_STATUS_REGISTER 0xf3
#define RSI_FN1_INT_REGISTER 0xf9
+#define RSI_INT_ENABLE_REGISTER 0x04
+#define RSI_INT_ENABLE_MASK 0xfc
#define RSI_SD_REQUEST_MASTER 0x10000
/* FOR SD CARD ONLY */
diff --git a/drivers/net/wireless/st/cw1200/pm.c b/drivers/net/wireless/st/cw1200/pm.c
index d2202ae92bdd..ded23df1ac1d 100644
--- a/drivers/net/wireless/st/cw1200/pm.c
+++ b/drivers/net/wireless/st/cw1200/pm.c
@@ -91,7 +91,7 @@ struct cw1200_suspend_state {
u8 prev_ps_mode;
};
-static void cw1200_pm_stay_awake_tmo(unsigned long arg)
+static void cw1200_pm_stay_awake_tmo(struct timer_list *unused)
{
/* XXX what's the point of this ? */
}
@@ -101,8 +101,7 @@ int cw1200_pm_init(struct cw1200_pm_state *pm,
{
spin_lock_init(&pm->lock);
- setup_timer(&pm->stay_awake, cw1200_pm_stay_awake_tmo,
- (unsigned long)pm);
+ timer_setup(&pm->stay_awake, cw1200_pm_stay_awake_tmo, 0);
return 0;
}
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 5b1d2e8402d9..a46a1e94505d 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -307,7 +307,7 @@ static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
return to_xenbus_device(vif->dev->dev.parent);
}
-void xenvif_tx_credit_callback(unsigned long data);
+void xenvif_tx_credit_callback(struct timer_list *t);
struct xenvif *xenvif_alloc(struct device *parent,
domid_t domid,
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 4491ca5aee90..d6dff347f896 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -520,8 +520,7 @@ int xenvif_init_queue(struct xenvif_queue *queue)
queue->credit_bytes = queue->remaining_credit = ~0UL;
queue->credit_usec = 0UL;
- init_timer(&queue->credit_timeout);
- queue->credit_timeout.function = xenvif_tx_credit_callback;
+ timer_setup(&queue->credit_timeout, xenvif_tx_credit_callback, 0);
queue->credit_window_start = get_jiffies_64();
queue->rx_queue_max = XENVIF_RX_QUEUE_BYTES;
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 5042ff8d449a..a27daa23c9dc 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -183,9 +183,9 @@ static void tx_add_credit(struct xenvif_queue *queue)
queue->rate_limited = false;
}
-void xenvif_tx_credit_callback(unsigned long data)
+void xenvif_tx_credit_callback(struct timer_list *t)
{
- struct xenvif_queue *queue = (struct xenvif_queue *)data;
+ struct xenvif_queue *queue = from_timer(queue, t, credit_timeout);
tx_add_credit(queue);
xenvif_napi_schedule_or_enable_events(queue);
}
@@ -700,8 +700,6 @@ static bool tx_credit_exceeded(struct xenvif_queue *queue, unsigned size)
/* Still too big to send right now? Set a callback. */
if (size > queue->remaining_credit) {
- queue->credit_timeout.data =
- (unsigned long)queue;
mod_timer(&queue->credit_timeout,
next_credit);
queue->credit_window_start = next_credit;
diff --git a/drivers/pcmcia/electra_cf.c b/drivers/pcmcia/electra_cf.c
index c6fe2a4a7a6a..9671ded549f0 100644
--- a/drivers/pcmcia/electra_cf.c
+++ b/drivers/pcmcia/electra_cf.c
@@ -79,9 +79,9 @@ static int electra_cf_ss_init(struct pcmcia_socket *s)
}
/* the timer is primarily to kick this socket's pccardd */
-static void electra_cf_timer(unsigned long _cf)
+static void electra_cf_timer(struct timer_list *t)
{
- struct electra_cf_socket *cf = (void *) _cf;
+ struct electra_cf_socket *cf = from_timer(cf, t, timer);
int present = electra_cf_present(cf);
if (present != cf->present) {
@@ -95,7 +95,9 @@ static void electra_cf_timer(unsigned long _cf)
static irqreturn_t electra_cf_irq(int irq, void *_cf)
{
- electra_cf_timer((unsigned long)_cf);
+ struct electra_cf_socket *cf = _cf;
+
+ electra_cf_timer(&cf->timer);
return IRQ_HANDLED;
}
@@ -206,7 +208,7 @@ static int electra_cf_probe(struct platform_device *ofdev)
if (!cf)
return -ENOMEM;
- setup_timer(&cf->timer, electra_cf_timer, (unsigned long)cf);
+ timer_setup(&cf->timer, electra_cf_timer, 0);
cf->irq = 0;
cf->ofdev = ofdev;
@@ -305,7 +307,7 @@ static int electra_cf_probe(struct platform_device *ofdev)
cf->mem_phys, io.start, cf->irq);
cf->active = 1;
- electra_cf_timer((unsigned long)cf);
+ electra_cf_timer(&cf->timer);
return 0;
fail3:
diff --git a/drivers/s390/net/fsm.c b/drivers/s390/net/fsm.c
index e5dea67f902e..8c14c6c3ad3d 100644
--- a/drivers/s390/net/fsm.c
+++ b/drivers/s390/net/fsm.c
@@ -170,9 +170,7 @@ fsm_addtimer(fsm_timer *this, int millisec, int event, void *arg)
this->fi->name, this, millisec);
#endif
- init_timer(&this->tl);
- this->tl.function = (void *)fsm_expire_timer;
- this->tl.data = (long)this;
+ setup_timer(&this->tl, (void *)fsm_expire_timer, (long)this);
this->expire_event = event;
this->event_arg = arg;
this->tl.expires = jiffies + (millisec * HZ) / 1000;
@@ -191,9 +189,7 @@ fsm_modtimer(fsm_timer *this, int millisec, int event, void *arg)
#endif
del_timer(&this->tl);
- init_timer(&this->tl);
- this->tl.function = (void *)fsm_expire_timer;
- this->tl.data = (long)this;
+ setup_timer(&this->tl, (void *)fsm_expire_timer, (long)this);
this->expire_event = event;
this->event_arg = arg;
this->tl.expires = jiffies + (millisec * HZ) / 1000;
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 59e09854c4f7..dbdf09a1299a 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -182,6 +182,21 @@ struct qeth_sbp_info {
__u32 reflect_promisc_primary:1;
};
+struct qeth_vnicc_info {
+ /* supported/currently configured VNICCs; updated in IPA exchanges */
+ u32 sup_chars;
+ u32 cur_chars;
+ /* supported commands: bitmasks which VNICCs support respective cmd */
+ u32 set_char_sup;
+ u32 getset_timeout_sup;
+ /* timeout value for the learning characteristic */
+ u32 learning_timeout;
+ /* characteristics wanted/configured by user */
+ u32 wanted_chars;
+ /* has user explicitly enabled rx_bcast while online? */
+ bool rx_bcast_enabled;
+};
+
static inline int qeth_is_ipa_supported(struct qeth_ipa_info *ipa,
enum qeth_ipa_funcs func)
{
@@ -216,20 +231,6 @@ static inline int qeth_is_ipa_enabled(struct qeth_ipa_info *ipa,
#define QETH_IDX_FUNC_LEVEL_OSD 0x0101
#define QETH_IDX_FUNC_LEVEL_IQD 0x4108
-#define QETH_MODELLIST_ARRAY \
- {{0x1731, 0x01, 0x1732, QETH_CARD_TYPE_OSD, QETH_MAX_QUEUES, 0}, \
- {0x1731, 0x05, 0x1732, QETH_CARD_TYPE_IQD, QETH_MAX_QUEUES, 0x103}, \
- {0x1731, 0x06, 0x1732, QETH_CARD_TYPE_OSN, QETH_MAX_QUEUES, 0}, \
- {0x1731, 0x02, 0x1732, QETH_CARD_TYPE_OSM, QETH_MAX_QUEUES, 0}, \
- {0x1731, 0x02, 0x1732, QETH_CARD_TYPE_OSX, QETH_MAX_QUEUES, 0}, \
- {0, 0, 0, 0, 0, 0} }
-#define QETH_CU_TYPE_IND 0
-#define QETH_CU_MODEL_IND 1
-#define QETH_DEV_TYPE_IND 2
-#define QETH_DEV_MODEL_IND 3
-#define QETH_QUEUE_NO_IND 4
-#define QETH_MULTICAST_IND 5
-
#define QETH_REAL_CARD 1
#define QETH_VLAN_CARD 2
#define QETH_BUFSIZE 4096
@@ -673,6 +674,7 @@ struct qeth_card_options {
struct qeth_routing_info route6;
struct qeth_ipa_info ipa6;
struct qeth_sbp_info sbp; /* SETBRIDGEPORT options */
+ struct qeth_vnicc_info vnicc; /* VNICC options */
int fake_broadcast;
int layer2;
int performance_stats;
@@ -946,13 +948,13 @@ int qeth_get_priority_queue(struct qeth_card *, struct sk_buff *, int, int);
int qeth_get_elements_no(struct qeth_card *card, struct sk_buff *skb,
int extra_elems, int data_offset);
int qeth_get_elements_for_frags(struct sk_buff *);
-int qeth_do_send_packet_fast(struct qeth_card *card,
- struct qeth_qdio_out_q *queue, struct sk_buff *skb,
+int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, struct sk_buff *skb,
struct qeth_hdr *hdr, unsigned int offset,
unsigned int hd_len);
int qeth_do_send_packet(struct qeth_card *card, struct qeth_qdio_out_q *queue,
struct sk_buff *skb, struct qeth_hdr *hdr,
- unsigned int hd_len, unsigned int offset, int elements);
+ unsigned int offset, unsigned int hd_len,
+ int elements_needed);
int qeth_do_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
int qeth_core_get_sset_count(struct net_device *, int);
void qeth_core_get_ethtool_stats(struct net_device *,
@@ -982,7 +984,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *,
__u16, __u16,
enum qeth_prot_versions);
int qeth_set_features(struct net_device *, netdev_features_t);
-int qeth_recover_features(struct net_device *);
+void qeth_recover_features(struct net_device *dev);
netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
int qeth_vm_request_mac(struct qeth_card *card);
int qeth_push_hdr(struct sk_buff *skb, struct qeth_hdr **hdr, unsigned int len);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index bae7440abc01..457a4b4e8212 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -52,7 +52,6 @@ EXPORT_SYMBOL_GPL(qeth_core_header_cache);
static struct kmem_cache *qeth_qdio_outbuf_cache;
static struct device *qeth_core_root_dev;
-static unsigned int known_devices[][6] = QETH_MODELLIST_ARRAY;
static struct lock_class_key qdio_out_skb_queue_key;
static struct mutex qeth_mod_mutex;
@@ -1386,6 +1385,7 @@ static void qeth_init_qdio_info(struct qeth_card *card)
QETH_DBF_TEXT(SETUP, 4, "intqdinf");
atomic_set(&card->qdio.state, QETH_QDIO_UNINITIALIZED);
/* inbound */
+ card->qdio.no_in_queues = 1;
card->qdio.in_buf_size = QETH_IN_BUF_SIZE_DEFAULT;
if (card->info.type == QETH_CARD_TYPE_IQD)
card->qdio.init_pool.buf_count = QETH_IN_BUF_COUNT_HSDEFAULT;
@@ -1519,34 +1519,17 @@ out:
return NULL;
}
-static int qeth_determine_card_type(struct qeth_card *card)
+static void qeth_determine_card_type(struct qeth_card *card)
{
- int i = 0;
-
QETH_DBF_TEXT(SETUP, 2, "detcdtyp");
card->qdio.do_prio_queueing = QETH_PRIOQ_DEFAULT;
card->qdio.default_out_queue = QETH_DEFAULT_QUEUE;
- while (known_devices[i][QETH_DEV_MODEL_IND]) {
- if ((CARD_RDEV(card)->id.dev_type ==
- known_devices[i][QETH_DEV_TYPE_IND]) &&
- (CARD_RDEV(card)->id.dev_model ==
- known_devices[i][QETH_DEV_MODEL_IND])) {
- card->info.type = known_devices[i][QETH_DEV_MODEL_IND];
- card->qdio.no_out_queues =
- known_devices[i][QETH_QUEUE_NO_IND];
- card->qdio.no_in_queues = 1;
- card->info.is_multicast_different =
- known_devices[i][QETH_MULTICAST_IND];
- qeth_update_from_chp_desc(card);
- return 0;
- }
- i++;
- }
- card->info.type = QETH_CARD_TYPE_UNKNOWN;
- dev_err(&card->gdev->dev, "The adapter hardware is of an "
- "unknown type\n");
- return -ENOENT;
+ card->info.type = CARD_RDEV(card)->id.driver_info;
+ card->qdio.no_out_queues = QETH_MAX_QUEUES;
+ if (card->info.type == QETH_CARD_TYPE_IQD)
+ card->info.is_multicast_different = 0x0103;
+ qeth_update_from_chp_desc(card);
}
static int qeth_clear_channel(struct qeth_channel *channel)
@@ -2090,7 +2073,6 @@ int qeth_send_control_data(struct qeth_card *card, int len,
spin_lock_irqsave(&card->lock, flags);
list_add_tail(&reply->list, &card->cmd_waiter_list);
spin_unlock_irqrestore(&card->lock, flags);
- QETH_DBF_HEX(CTRL, 2, iob->data, QETH_DBF_CTRL_LEN);
while (atomic_cmpxchg(&card->write.irq_pending, 0, 1)) ;
qeth_prepare_control_data(card, len, iob);
@@ -2233,23 +2215,15 @@ static int qeth_cm_setup(struct qeth_card *card)
static int qeth_get_initial_mtu_for_card(struct qeth_card *card)
{
switch (card->info.type) {
- case QETH_CARD_TYPE_UNKNOWN:
- return 1500;
case QETH_CARD_TYPE_IQD:
return card->info.max_mtu;
case QETH_CARD_TYPE_OSD:
- switch (card->info.link_type) {
- case QETH_LINK_TYPE_HSTR:
- case QETH_LINK_TYPE_LANE_TR:
- return 2000;
- default:
- return card->options.layer2 ? 1500 : 1492;
- }
- case QETH_CARD_TYPE_OSM:
case QETH_CARD_TYPE_OSX:
- return card->options.layer2 ? 1500 : 1492;
+ if (!card->options.layer2)
+ return ETH_DATA_LEN - 8; /* L3: allow for LLC + SNAP */
+ /* fall through */
default:
- return 1500;
+ return ETH_DATA_LEN;
}
}
@@ -2279,7 +2253,6 @@ static int qeth_mtu_is_valid(struct qeth_card *card, int mtu)
return ((mtu >= 576) &&
(mtu <= card->info.max_mtu));
case QETH_CARD_TYPE_OSN:
- case QETH_CARD_TYPE_UNKNOWN:
default:
return 1;
}
@@ -4040,35 +4013,23 @@ static int qeth_fill_buffer(struct qeth_qdio_out_q *queue,
return flush_cnt;
}
-int qeth_do_send_packet_fast(struct qeth_card *card,
- struct qeth_qdio_out_q *queue, struct sk_buff *skb,
+int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, struct sk_buff *skb,
struct qeth_hdr *hdr, unsigned int offset,
unsigned int hd_len)
{
- struct qeth_qdio_out_buffer *buffer;
- int index;
+ int index = queue->next_buf_to_fill;
+ struct qeth_qdio_out_buffer *buffer = queue->bufs[index];
- /* spin until we get the queue ... */
- while (atomic_cmpxchg(&queue->state, QETH_OUT_Q_UNLOCKED,
- QETH_OUT_Q_LOCKED) != QETH_OUT_Q_UNLOCKED);
- /* ... now we've got the queue */
- index = queue->next_buf_to_fill;
- buffer = queue->bufs[queue->next_buf_to_fill];
/*
* check if buffer is empty to make sure that we do not 'overtake'
* ourselves and try to fill a buffer that is already primed
*/
if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY)
- goto out;
- queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) %
- QDIO_MAX_BUFFERS_PER_Q;
- atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
+ return -EBUSY;
+ queue->next_buf_to_fill = (index + 1) % QDIO_MAX_BUFFERS_PER_Q;
qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len);
qeth_flush_buffers(queue, index, 1);
return 0;
-out:
- atomic_set(&queue->state, QETH_OUT_Q_UNLOCKED);
- return -EBUSY;
}
EXPORT_SYMBOL_GPL(qeth_do_send_packet_fast);
@@ -4923,7 +4884,6 @@ static void qeth_qdio_establish_cq(struct qeth_card *card,
if (card->options.cq == QETH_CQ_ENABLED) {
int offset = QDIO_MAX_BUFFERS_PER_Q *
(card->qdio.no_in_queues - 1);
- i = QDIO_MAX_BUFFERS_PER_Q * (card->qdio.no_in_queues - 1);
for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; ++i) {
in_sbal_ptrs[offset + i] = (struct qdio_buffer *)
virt_to_phys(card->qdio.c_q->bufs[i].buffer);
@@ -5209,49 +5169,27 @@ out:
}
EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card);
-static int qeth_create_skb_frag(struct qeth_qdio_buffer *qethbuffer,
- struct qdio_buffer_element *element,
- struct sk_buff **pskb, int offset, int *pfrag,
- int data_len)
+static void qeth_create_skb_frag(struct qdio_buffer_element *element,
+ struct sk_buff *skb, int offset, int data_len)
{
struct page *page = virt_to_page(element->addr);
- if (*pskb == NULL) {
- if (qethbuffer->rx_skb) {
- /* only if qeth_card.options.cq == QETH_CQ_ENABLED */
- *pskb = qethbuffer->rx_skb;
- qethbuffer->rx_skb = NULL;
- } else {
- *pskb = dev_alloc_skb(QETH_RX_PULL_LEN + ETH_HLEN);
- if (!(*pskb))
- return -ENOMEM;
- }
+ unsigned int next_frag;
- skb_reserve(*pskb, ETH_HLEN);
- if (data_len <= QETH_RX_PULL_LEN) {
- skb_put_data(*pskb, element->addr + offset, data_len);
- } else {
- get_page(page);
- skb_put_data(*pskb, element->addr + offset,
- QETH_RX_PULL_LEN);
- skb_fill_page_desc(*pskb, *pfrag, page,
- offset + QETH_RX_PULL_LEN,
- data_len - QETH_RX_PULL_LEN);
- (*pskb)->data_len += data_len - QETH_RX_PULL_LEN;
- (*pskb)->len += data_len - QETH_RX_PULL_LEN;
- (*pskb)->truesize += data_len - QETH_RX_PULL_LEN;
- (*pfrag)++;
- }
- } else {
- get_page(page);
- skb_fill_page_desc(*pskb, *pfrag, page, offset, data_len);
- (*pskb)->data_len += data_len;
- (*pskb)->len += data_len;
- (*pskb)->truesize += data_len;
- (*pfrag)++;
- }
+ /* first fill the linear space */
+ if (!skb->len) {
+ unsigned int linear = min(data_len, skb_tailroom(skb));
+ skb_put_data(skb, element->addr + offset, linear);
+ data_len -= linear;
+ if (!data_len)
+ return;
+ offset += linear;
+ /* fall through to add page frag for remaining data */
+ }
- return 0;
+ next_frag = skb_shinfo(skb)->nr_frags;
+ get_page(page);
+ skb_add_rx_frag(skb, next_frag, page, offset, data_len, data_len);
}
static inline int qeth_is_last_sbale(struct qdio_buffer_element *sbale)
@@ -5267,22 +5205,19 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card,
struct qdio_buffer_element *element = *__element;
struct qdio_buffer *buffer = qethbuffer->buffer;
int offset = *__offset;
- struct sk_buff *skb = NULL;
+ struct sk_buff *skb;
int skb_len = 0;
void *data_ptr;
int data_len;
int headroom = 0;
int use_rx_sg = 0;
- int frag = 0;
/* qeth_hdr must not cross element boundaries */
- if (element->length < offset + sizeof(struct qeth_hdr)) {
+ while (element->length < offset + sizeof(struct qeth_hdr)) {
if (qeth_is_last_sbale(element))
return NULL;
element++;
offset = 0;
- if (element->length < sizeof(struct qeth_hdr))
- return NULL;
}
*hdr = element->addr + offset;
@@ -5309,27 +5244,32 @@ struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card,
if (((skb_len >= card->options.rx_sg_cb) &&
(!(card->info.type == QETH_CARD_TYPE_OSN)) &&
(!atomic_read(&card->force_alloc_skb))) ||
- (card->options.cq == QETH_CQ_ENABLED)) {
+ (card->options.cq == QETH_CQ_ENABLED))
use_rx_sg = 1;
+
+ if (use_rx_sg && qethbuffer->rx_skb) {
+ /* QETH_CQ_ENABLED only: */
+ skb = qethbuffer->rx_skb;
+ qethbuffer->rx_skb = NULL;
} else {
- skb = dev_alloc_skb(skb_len + headroom);
- if (!skb)
- goto no_mem;
- if (headroom)
- skb_reserve(skb, headroom);
+ unsigned int linear = (use_rx_sg) ? QETH_RX_PULL_LEN : skb_len;
+
+ skb = dev_alloc_skb(linear + headroom);
}
+ if (!skb)
+ goto no_mem;
+ if (headroom)
+ skb_reserve(skb, headroom);
data_ptr = element->addr + offset;
while (skb_len) {
data_len = min(skb_len, (int)(element->length - offset));
if (data_len) {
- if (use_rx_sg) {
- if (qeth_create_skb_frag(qethbuffer, element,
- &skb, offset, &frag, data_len))
- goto no_mem;
- } else {
+ if (use_rx_sg)
+ qeth_create_skb_frag(element, skb, offset,
+ data_len);
+ else
skb_put_data(skb, data_ptr, data_len);
- }
}
skb_len -= data_len;
if (skb_len) {
@@ -5429,7 +5369,7 @@ int qeth_poll(struct napi_struct *napi, int budget)
}
}
- napi_complete(napi);
+ napi_complete_done(napi, work_done);
if (qdio_start_irq(card->data.ccwdev, 0))
napi_schedule(&card->napi);
out:
@@ -5737,11 +5677,7 @@ static int qeth_core_probe_device(struct ccwgroup_device *gdev)
gdev->cdev[1]->handler = qeth_irq;
gdev->cdev[2]->handler = qeth_irq;
- rc = qeth_determine_card_type(card);
- if (rc) {
- QETH_DBF_TEXT_(SETUP, 2, "3err%d", rc);
- goto err_card;
- }
+ qeth_determine_card_type(card);
rc = qeth_setup_card(card);
if (rc) {
QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc);
@@ -6416,32 +6352,29 @@ static int qeth_set_ipa_tso(struct qeth_card *card, int on)
return rc;
}
-/* try to restore device features on a device after recovery */
-int qeth_recover_features(struct net_device *dev)
+#define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO)
+
+/**
+ * qeth_recover_features() - Restore device features after recovery
+ * @dev: the recovering net_device
+ *
+ * Caller must hold rtnl lock.
+ */
+void qeth_recover_features(struct net_device *dev)
{
+ netdev_features_t features = dev->features;
struct qeth_card *card = dev->ml_priv;
- netdev_features_t recover = dev->features;
- if (recover & NETIF_F_IP_CSUM) {
- if (qeth_set_ipa_csum(card, 1, IPA_OUTBOUND_CHECKSUM))
- recover ^= NETIF_F_IP_CSUM;
- }
- if (recover & NETIF_F_RXCSUM) {
- if (qeth_set_ipa_csum(card, 1, IPA_INBOUND_CHECKSUM))
- recover ^= NETIF_F_RXCSUM;
- }
- if (recover & NETIF_F_TSO) {
- if (qeth_set_ipa_tso(card, 1))
- recover ^= NETIF_F_TSO;
- }
-
- if (recover == dev->features)
- return 0;
+ /* force-off any feature that needs an IPA sequence.
+ * netdev_update_features() will restart them.
+ */
+ dev->features &= ~QETH_HW_FEATURES;
+ netdev_update_features(dev);
+ if (features == dev->features)
+ return;
dev_warn(&card->gdev->dev,
"Device recovery failed to restore all offload features\n");
- dev->features = recover;
- return -EIO;
}
EXPORT_SYMBOL_GPL(qeth_recover_features);
@@ -6498,8 +6431,7 @@ netdev_features_t qeth_fix_features(struct net_device *dev,
/* if the card isn't up, remove features that require hw changes */
if (card->state == CARD_STATE_DOWN ||
card->state == CARD_STATE_RECOVER)
- features = features & ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
- NETIF_F_TSO);
+ features &= ~QETH_HW_FEATURES;
QETH_DBF_HEX(SETUP, 2, &features, sizeof(features));
return features;
}
diff --git a/drivers/s390/net/qeth_core_mpc.c b/drivers/s390/net/qeth_core_mpc.c
index 6dd7d05e5693..5f8a2b834d39 100644
--- a/drivers/s390/net/qeth_core_mpc.c
+++ b/drivers/s390/net/qeth_core_mpc.c
@@ -167,7 +167,7 @@ static struct ipa_rc_msg qeth_ipa_rc_msg[] = {
{IPA_RC_IP_TABLE_FULL, "Add Addr IP Table Full - ipv6"},
{IPA_RC_UNKNOWN_ERROR, "IPA command failed - reason unknown"},
{IPA_RC_UNSUPPORTED_COMMAND, "Command not supported"},
- {IPA_RC_TRACE_ALREADY_ACTIVE, "trace already active"},
+ {IPA_RC_VNICC_OOSEQ, "Command issued out of sequence"},
{IPA_RC_INVALID_FORMAT, "invalid format or length"},
{IPA_RC_DUP_IPV6_REMOTE, "ipv6 address already registered remote"},
{IPA_RC_SBP_IQD_NOT_CONFIGURED, "Not configured for bridgeport"},
@@ -193,6 +193,7 @@ static struct ipa_rc_msg qeth_ipa_rc_msg[] = {
{IPA_RC_L2_INVALID_VLAN_ID, "L2 invalid vlan id"},
{IPA_RC_L2_DUP_VLAN_ID, "L2 duplicate vlan id"},
{IPA_RC_L2_VLAN_ID_NOT_FOUND, "L2 vlan id not found"},
+ {IPA_RC_VNICC_VNICBP, "VNIC is BridgePort"},
{IPA_RC_SBP_OSA_NOT_CONFIGURED, "Not configured for bridgeport"},
{IPA_RC_SBP_OSA_OS_MISMATCH, "OS mismatch"},
{IPA_RC_SBP_OSA_ANO_DEV_PRIMARY, "Primary bridgeport exists already"},
@@ -253,6 +254,7 @@ static struct ipa_cmd_names qeth_ipa_cmd_names[] = {
{IPA_CMD_DELGMAC, "delgmac"},
{IPA_CMD_SETVLAN, "setvlan"},
{IPA_CMD_DELVLAN, "delvlan"},
+ {IPA_CMD_VNICC, "vnic_characteristics"},
{IPA_CMD_SETBRIDGEPORT_OSA, "set_bridge_port(osa)"},
{IPA_CMD_SETCCID, "setccid"},
{IPA_CMD_DELCCID, "delccid"},
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index 912e0107de8f..413f0f268b17 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -44,7 +44,6 @@ extern unsigned char IPA_PDU_HEADER[];
#define IPA_CMD_PRIM_VERSION_NO 0x01
enum qeth_card_types {
- QETH_CARD_TYPE_UNKNOWN = 0,
QETH_CARD_TYPE_OSD = 1,
QETH_CARD_TYPE_IQD = 5,
QETH_CARD_TYPE_OSN = 6,
@@ -90,6 +89,7 @@ enum qeth_ipa_cmds {
IPA_CMD_DELGMAC = 0x24,
IPA_CMD_SETVLAN = 0x25,
IPA_CMD_DELVLAN = 0x26,
+ IPA_CMD_VNICC = 0x2a,
IPA_CMD_SETBRIDGEPORT_OSA = 0x2b,
IPA_CMD_SETCCID = 0x41,
IPA_CMD_DELCCID = 0x42,
@@ -165,6 +165,8 @@ enum qeth_ipa_return_codes {
IPA_RC_L2_INVALID_VLAN_ID = 0x2015,
IPA_RC_L2_DUP_VLAN_ID = 0x2016,
IPA_RC_L2_VLAN_ID_NOT_FOUND = 0x2017,
+ IPA_RC_L2_VLAN_ID_NOT_ALLOWED = 0x2050,
+ IPA_RC_VNICC_VNICBP = 0x20B0,
IPA_RC_SBP_OSA_NOT_CONFIGURED = 0x2B0C,
IPA_RC_SBP_OSA_OS_MISMATCH = 0x2B10,
IPA_RC_SBP_OSA_ANO_DEV_PRIMARY = 0x2B14,
@@ -197,6 +199,9 @@ enum qeth_ipa_return_codes {
IPA_RC_ENOMEM = 0xfffe,
IPA_RC_FFFF = 0xffff
};
+/* for VNIC Characteristics */
+#define IPA_RC_VNICC_OOSEQ 0x0005
+
/* for SET_DIAGNOSTIC_ASSIST */
#define IPA_RC_INVALID_SUBCMD IPA_RC_IP_TABLE_FULL
#define IPA_RC_HARDWARE_AUTH_ERROR IPA_RC_UNKNOWN_ERROR
@@ -551,6 +556,71 @@ struct qeth_ipacmd_diagass {
__u8 cdata[64];
} __attribute__ ((packed));
+/* VNIC Characteristics IPA Command: *****************************************/
+/* IPA commands/sub commands for VNICC */
+#define IPA_VNICC_QUERY_CHARS 0x00000000L
+#define IPA_VNICC_QUERY_CMDS 0x00000001L
+#define IPA_VNICC_ENABLE 0x00000002L
+#define IPA_VNICC_DISABLE 0x00000004L
+#define IPA_VNICC_SET_TIMEOUT 0x00000008L
+#define IPA_VNICC_GET_TIMEOUT 0x00000010L
+
+/* VNICC flags */
+#define QETH_VNICC_FLOODING 0x80000000
+#define QETH_VNICC_MCAST_FLOODING 0x40000000
+#define QETH_VNICC_LEARNING 0x20000000
+#define QETH_VNICC_TAKEOVER_SETVMAC 0x10000000
+#define QETH_VNICC_TAKEOVER_LEARNING 0x08000000
+#define QETH_VNICC_BRIDGE_INVISIBLE 0x04000000
+#define QETH_VNICC_RX_BCAST 0x02000000
+
+/* VNICC default values */
+#define QETH_VNICC_ALL 0xff000000
+#define QETH_VNICC_DEFAULT QETH_VNICC_RX_BCAST
+/* default VNICC timeout in seconds */
+#define QETH_VNICC_DEFAULT_TIMEOUT 600
+
+/* VNICC header */
+struct qeth_ipacmd_vnicc_hdr {
+ u32 sup;
+ u32 cur;
+};
+
+/* VNICC sub command header */
+struct qeth_vnicc_sub_hdr {
+ u16 data_length;
+ u16 reserved;
+ u32 sub_command;
+};
+
+/* query supported commands for VNIC characteristic */
+struct qeth_vnicc_query_cmds {
+ u32 vnic_char;
+ u32 sup_cmds;
+};
+
+/* enable/disable VNIC characteristic */
+struct qeth_vnicc_set_char {
+ u32 vnic_char;
+};
+
+/* get/set timeout for VNIC characteristic */
+struct qeth_vnicc_getset_timeout {
+ u32 vnic_char;
+ u32 timeout;
+};
+
+/* complete VNICC IPA command message */
+struct qeth_ipacmd_vnicc {
+ struct qeth_ipacmd_vnicc_hdr hdr;
+ struct qeth_vnicc_sub_hdr sub_hdr;
+ union {
+ struct qeth_vnicc_query_cmds query_cmds;
+ struct qeth_vnicc_set_char set_char;
+ struct qeth_vnicc_getset_timeout getset_timeout;
+ };
+};
+
/* SETBRIDGEPORT IPA Command: *********************************************/
enum qeth_ipa_sbp_cmd {
IPA_SBP_QUERY_COMMANDS_SUPPORTED = 0x00000000L,
@@ -692,6 +762,7 @@ struct qeth_ipa_cmd {
struct qeth_ipacmd_diagass diagass;
struct qeth_ipacmd_setbridgeport sbp;
struct qeth_ipacmd_addr_change addrchange;
+ struct qeth_ipacmd_vnicc vnicc;
} data;
} __attribute__ ((packed));
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index d1ee9e30c68b..b22ed2a57acd 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -475,10 +475,8 @@ static ssize_t qeth_dev_isolation_store(struct device *dev,
return -EINVAL;
mutex_lock(&card->conf_mutex);
- /* check for unknown, too, in case we do not yet know who we are */
if (card->info.type != QETH_CARD_TYPE_OSD &&
- card->info.type != QETH_CARD_TYPE_OSX &&
- card->info.type != QETH_CARD_TYPE_UNKNOWN) {
+ card->info.type != QETH_CARD_TYPE_OSX) {
rc = -EOPNOTSUPP;
dev_err(&card->gdev->dev, "Adapter does not "
"support QDIO data connection isolation\n");
diff --git a/drivers/s390/net/qeth_l2.h b/drivers/s390/net/qeth_l2.h
index 0d59f9a45ea9..241df6b98ab4 100644
--- a/drivers/s390/net/qeth_l2.h
+++ b/drivers/s390/net/qeth_l2.h
@@ -14,6 +14,12 @@ int qeth_l2_create_device_attributes(struct device *);
void qeth_l2_remove_device_attributes(struct device *);
void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card);
+int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state);
+int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state);
+int qeth_l2_vnicc_set_timeout(struct qeth_card *card, u32 timeout);
+int qeth_l2_vnicc_get_timeout(struct qeth_card *card, u32 *timeout);
+bool qeth_l2_vnicc_is_in_use(struct qeth_card *card);
+
struct qeth_mac {
u8 mac_addr[OSA_ADDR_LEN];
u8 is_uc:1;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 760b023eae95..d2537c09126d 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -33,24 +33,10 @@ static void qeth_bridge_state_change(struct qeth_card *card,
struct qeth_ipa_cmd *cmd);
static void qeth_bridge_host_event(struct qeth_card *card,
struct qeth_ipa_cmd *cmd);
-
-static int qeth_l2_verify_dev(struct net_device *dev)
-{
- struct qeth_card *card;
- unsigned long flags;
- int rc = 0;
-
- read_lock_irqsave(&qeth_core_card_list.rwlock, flags);
- list_for_each_entry(card, &qeth_core_card_list.list, list) {
- if (card->dev == dev) {
- rc = QETH_REAL_CARD;
- break;
- }
- }
- read_unlock_irqrestore(&qeth_core_card_list.rwlock, flags);
-
- return rc;
-}
+static void qeth_l2_vnicc_set_defaults(struct qeth_card *card);
+static void qeth_l2_vnicc_init(struct qeth_card *card);
+static bool qeth_l2_vnicc_recover_timeout(struct qeth_card *card, u32 vnicc,
+ u32 *timeout);
static struct net_device *qeth_l2_netdev_by_devno(unsigned char *read_dev_no)
{
@@ -74,7 +60,7 @@ static struct net_device *qeth_l2_netdev_by_devno(unsigned char *read_dev_no)
return ndev;
}
-static int qeth_setdel_makerc(struct qeth_card *card, int retcode)
+static int qeth_setdelmac_makerc(struct qeth_card *card, int retcode)
{
int rc;
@@ -124,8 +110,8 @@ static int qeth_l2_send_setdelmac(struct qeth_card *card, __u8 *mac,
cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
cmd->data.setdelmac.mac_length = OSA_ADDR_LEN;
memcpy(&cmd->data.setdelmac.mac, mac, OSA_ADDR_LEN);
- return qeth_setdel_makerc(card, qeth_send_ipa_cmd(card, iob,
- NULL, NULL));
+ return qeth_setdelmac_makerc(card, qeth_send_ipa_cmd(card, iob,
+ NULL, NULL));
}
static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac)
@@ -285,17 +271,40 @@ static void qeth_l2_fill_header(struct qeth_hdr *hdr, struct sk_buff *skb,
}
}
+static int qeth_setdelvlan_makerc(struct qeth_card *card, int retcode)
+{
+ if (retcode)
+ QETH_CARD_TEXT_(card, 2, "err%04x", retcode);
+
+ switch (retcode) {
+ case IPA_RC_SUCCESS:
+ return 0;
+ case IPA_RC_L2_INVALID_VLAN_ID:
+ return -EINVAL;
+ case IPA_RC_L2_DUP_VLAN_ID:
+ return -EEXIST;
+ case IPA_RC_L2_VLAN_ID_NOT_FOUND:
+ return -ENOENT;
+ case IPA_RC_L2_VLAN_ID_NOT_ALLOWED:
+ return -EPERM;
+ case -ENOMEM:
+ return -ENOMEM;
+ default:
+ return -EIO;
+ }
+}
+
static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card,
- struct qeth_reply *reply, unsigned long data)
+ struct qeth_reply *reply,
+ unsigned long data)
{
- struct qeth_ipa_cmd *cmd;
+ struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
QETH_CARD_TEXT(card, 2, "L2sdvcb");
- cmd = (struct qeth_ipa_cmd *) data;
if (cmd->hdr.return_code) {
- QETH_DBF_MESSAGE(2, "Error in processing VLAN %i on %s: 0x%x. "
- "Continuing\n", cmd->data.setdelvlan.vlan_id,
- QETH_CARD_IFNAME(card), cmd->hdr.return_code);
+ QETH_DBF_MESSAGE(2, "Error in processing VLAN %i on %s: 0x%x.\n",
+ cmd->data.setdelvlan.vlan_id,
+ QETH_CARD_IFNAME(card), cmd->hdr.return_code);
QETH_CARD_TEXT_(card, 2, "L2VL%4x", cmd->hdr.command);
QETH_CARD_TEXT_(card, 2, "err%d", cmd->hdr.return_code);
}
@@ -303,7 +312,7 @@ static int qeth_l2_send_setdelvlan_cb(struct qeth_card *card,
}
static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i,
- enum qeth_ipa_cmds ipacmd)
+ enum qeth_ipa_cmds ipacmd)
{
struct qeth_ipa_cmd *cmd;
struct qeth_cmd_buffer *iob;
@@ -314,8 +323,8 @@ static int qeth_l2_send_setdelvlan(struct qeth_card *card, __u16 i,
return -ENOMEM;
cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
cmd->data.setdelvlan.vlan_id = i;
- return qeth_send_ipa_cmd(card, iob,
- qeth_l2_send_setdelvlan_cb, NULL);
+ return qeth_setdelvlan_makerc(card, qeth_send_ipa_cmd(card, iob,
+ qeth_l2_send_setdelvlan_cb, NULL));
}
static void qeth_l2_process_vlans(struct qeth_card *card)
@@ -339,10 +348,6 @@ static int qeth_l2_vlan_rx_add_vid(struct net_device *dev,
QETH_CARD_TEXT_(card, 4, "aid:%d", vid);
if (!vid)
return 0;
- if (card->info.type == QETH_CARD_TYPE_OSM) {
- QETH_CARD_TEXT(card, 3, "aidOSM");
- return 0;
- }
if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) {
QETH_CARD_TEXT(card, 3, "aidREC");
return 0;
@@ -372,10 +377,6 @@ static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev,
int rc = 0;
QETH_CARD_TEXT_(card, 4, "kid:%d", vid);
- if (card->info.type == QETH_CARD_TYPE_OSM) {
- QETH_CARD_TEXT(card, 3, "kidOSM");
- return 0;
- }
if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) {
QETH_CARD_TEXT(card, 3, "kidREC");
return 0;
@@ -541,11 +542,6 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p)
QETH_CARD_TEXT(card, 3, "setmac");
- if (qeth_l2_verify_dev(dev) != QETH_REAL_CARD) {
- QETH_CARD_TEXT(card, 3, "setmcINV");
- return -EOPNOTSUPP;
- }
-
if (card->info.type == QETH_CARD_TYPE_OSN ||
card->info.type == QETH_CARD_TYPE_OSM ||
card->info.type == QETH_CARD_TYPE_OSX) {
@@ -694,7 +690,7 @@ static int qeth_l2_xmit_iqd(struct qeth_card *card, struct sk_buff *skb,
rc = -E2BIG;
goto out;
}
- rc = qeth_do_send_packet_fast(card, queue, skb, hdr, data_offset,
+ rc = qeth_do_send_packet_fast(queue, skb, hdr, data_offset,
sizeof(*hdr) + data_offset);
out:
if (rc)
@@ -919,6 +915,7 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev)
hash_init(card->mac_htable);
card->options.layer2 = 1;
card->info.hwtrap = 0;
+ qeth_l2_vnicc_set_defaults(card);
return 0;
}
@@ -1005,7 +1002,11 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
} else {
card->dev->ethtool_ops = &qeth_l2_ethtool_ops;
}
- card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+
+ if (card->info.type == QETH_CARD_TYPE_OSM)
+ card->dev->features |= NETIF_F_VLAN_CHALLENGED;
+ else
+ card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) {
card->dev->hw_features = NETIF_F_SG;
card->dev->vlan_features = NETIF_F_SG;
@@ -1045,9 +1046,14 @@ static int qeth_l2_start_ipassists(struct qeth_card *card)
static void qeth_l2_trace_features(struct qeth_card *card)
{
- QETH_CARD_TEXT(card, 2, "l2featur");
+ /* Set BridgePort features */
+ QETH_CARD_TEXT(card, 2, "featuSBP");
QETH_CARD_HEX(card, 2, &card->options.sbp.supported_funcs,
sizeof(card->options.sbp.supported_funcs));
+ /* VNIC Characteristics features */
+ QETH_CARD_TEXT(card, 2, "feaVNICC");
+ QETH_CARD_HEX(card, 2, &card->options.vnicc.sup_chars,
+ sizeof(card->options.vnicc.sup_chars));
}
static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
@@ -1072,8 +1078,6 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
if (card->options.sbp.supported_funcs)
dev_info(&card->gdev->dev,
"The device represents a Bridge Capable Port\n");
- qeth_trace_features(card);
- qeth_l2_trace_features(card);
if (!card->dev && qeth_l2_setup_netdev(card)) {
rc = -ENODEV;
@@ -1090,6 +1094,12 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
} else
card->info.hwtrap = 0;
+ /* for the rx_bcast characteristic, init VNICC after setmac */
+ qeth_l2_vnicc_init(card);
+
+ qeth_trace_features(card);
+ qeth_l2_trace_features(card);
+
qeth_l2_setup_bridgeport_attrs(card);
card->state = CARD_STATE_HARDSETUP;
@@ -1106,8 +1116,7 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
goto out_remove;
}
- if (card->info.type != QETH_CARD_TYPE_OSN &&
- card->info.type != QETH_CARD_TYPE_OSM)
+ if (card->info.type != QETH_CARD_TYPE_OSN)
qeth_l2_process_vlans(card);
netif_tx_disable(card->dev);
@@ -2039,6 +2048,454 @@ int qeth_bridgeport_an_set(struct qeth_card *card, int enable)
}
EXPORT_SYMBOL_GPL(qeth_bridgeport_an_set);
+static bool qeth_bridgeport_is_in_use(struct qeth_card *card)
+{
+ return (card->options.sbp.role || card->options.sbp.reflect_promisc ||
+ card->options.sbp.hostnotification);
+}
+
+/* VNIC Characteristics support */
+
+/* handle VNICC IPA command return codes; convert to error codes */
+static int qeth_l2_vnicc_makerc(struct qeth_card *card, int ipa_rc)
+{
+ int rc;
+
+ switch (ipa_rc) {
+ case IPA_RC_SUCCESS:
+ return ipa_rc;
+ case IPA_RC_L2_UNSUPPORTED_CMD:
+ case IPA_RC_NOTSUPP:
+ rc = -EOPNOTSUPP;
+ break;
+ case IPA_RC_VNICC_OOSEQ:
+ rc = -EALREADY;
+ break;
+ case IPA_RC_VNICC_VNICBP:
+ rc = -EBUSY;
+ break;
+ case IPA_RC_L2_ADDR_TABLE_FULL:
+ rc = -ENOSPC;
+ break;
+ case IPA_RC_L2_MAC_NOT_AUTH_BY_ADP:
+ rc = -EACCES;
+ break;
+ default:
+ rc = -EIO;
+ }
+
+ QETH_CARD_TEXT_(card, 2, "err%04x", ipa_rc);
+ return rc;
+}
+
+/* generic VNICC request call back control */
+struct _qeth_l2_vnicc_request_cbctl {
+ u32 sub_cmd;
+ struct {
+ u32 vnic_char;
+ u32 timeout;
+ } param;
+ struct {
+ union{
+ u32 *sup_cmds;
+ u32 *timeout;
+ };
+ } result;
+};
+
+/* generic VNICC request call back */
+static int qeth_l2_vnicc_request_cb(struct qeth_card *card,
+ struct qeth_reply *reply,
+ unsigned long data)
+{
+ struct _qeth_l2_vnicc_request_cbctl *cbctl =
+ (struct _qeth_l2_vnicc_request_cbctl *) reply->param;
+ struct qeth_ipa_cmd *cmd = (struct qeth_ipa_cmd *) data;
+ struct qeth_ipacmd_vnicc *rep = &cmd->data.vnicc;
+
+ QETH_CARD_TEXT(card, 2, "vniccrcb");
+ if (cmd->hdr.return_code)
+ return 0;
+ /* return results to caller */
+ card->options.vnicc.sup_chars = rep->hdr.sup;
+ card->options.vnicc.cur_chars = rep->hdr.cur;
+
+ if (cbctl->sub_cmd == IPA_VNICC_QUERY_CMDS)
+ *cbctl->result.sup_cmds = rep->query_cmds.sup_cmds;
+
+ if (cbctl->sub_cmd == IPA_VNICC_GET_TIMEOUT)
+ *cbctl->result.timeout = rep->getset_timeout.timeout;
+
+ return 0;
+}
+
+/* generic VNICC request */
+static int qeth_l2_vnicc_request(struct qeth_card *card,
+ struct _qeth_l2_vnicc_request_cbctl *cbctl)
+{
+ struct qeth_ipacmd_vnicc *req;
+ struct qeth_cmd_buffer *iob;
+ struct qeth_ipa_cmd *cmd;
+ int rc;
+
+ QETH_CARD_TEXT(card, 2, "vniccreq");
+
+ /* get new buffer for request */
+ iob = qeth_get_ipacmd_buffer(card, IPA_CMD_VNICC, 0);
+ if (!iob)
+ return -ENOMEM;
+
+ /* create header for request */
+ cmd = (struct qeth_ipa_cmd *)(iob->data + IPA_PDU_HEADER_SIZE);
+ req = &cmd->data.vnicc;
+
+ /* create sub command header for request */
+ req->sub_hdr.data_length = sizeof(req->sub_hdr);
+ req->sub_hdr.sub_command = cbctl->sub_cmd;
+
+ /* create sub command specific request fields */
+ switch (cbctl->sub_cmd) {
+ case IPA_VNICC_QUERY_CHARS:
+ break;
+ case IPA_VNICC_QUERY_CMDS:
+ req->sub_hdr.data_length += sizeof(req->query_cmds);
+ req->query_cmds.vnic_char = cbctl->param.vnic_char;
+ break;
+ case IPA_VNICC_ENABLE:
+ case IPA_VNICC_DISABLE:
+ req->sub_hdr.data_length += sizeof(req->set_char);
+ req->set_char.vnic_char = cbctl->param.vnic_char;
+ break;
+ case IPA_VNICC_SET_TIMEOUT:
+ req->getset_timeout.timeout = cbctl->param.timeout;
+ /* fallthrough */
+ case IPA_VNICC_GET_TIMEOUT:
+ req->sub_hdr.data_length += sizeof(req->getset_timeout);
+ req->getset_timeout.vnic_char = cbctl->param.vnic_char;
+ break;
+ default:
+ qeth_release_buffer(iob->channel, iob);
+ return -EOPNOTSUPP;
+ }
+
+ /* send request */
+ rc = qeth_send_ipa_cmd(card, iob, qeth_l2_vnicc_request_cb,
+ (void *) cbctl);
+
+ return qeth_l2_vnicc_makerc(card, rc);
+}
+
+/* VNICC query VNIC characteristics request */
+static int qeth_l2_vnicc_query_chars(struct qeth_card *card)
+{
+ struct _qeth_l2_vnicc_request_cbctl cbctl;
+
+ /* prepare callback control */
+ cbctl.sub_cmd = IPA_VNICC_QUERY_CHARS;
+
+ QETH_CARD_TEXT(card, 2, "vniccqch");
+ return qeth_l2_vnicc_request(card, &cbctl);
+}
+
+/* VNICC query sub commands request */
+static int qeth_l2_vnicc_query_cmds(struct qeth_card *card, u32 vnic_char,
+ u32 *sup_cmds)
+{
+ struct _qeth_l2_vnicc_request_cbctl cbctl;
+
+ /* prepare callback control */
+ cbctl.sub_cmd = IPA_VNICC_QUERY_CMDS;
+ cbctl.param.vnic_char = vnic_char;
+ cbctl.result.sup_cmds = sup_cmds;
+
+ QETH_CARD_TEXT(card, 2, "vniccqcm");
+ return qeth_l2_vnicc_request(card, &cbctl);
+}
+
+/* VNICC enable/disable characteristic request */
+static int qeth_l2_vnicc_set_char(struct qeth_card *card, u32 vnic_char,
+ u32 cmd)
+{
+ struct _qeth_l2_vnicc_request_cbctl cbctl;
+
+ /* prepare callback control */
+ cbctl.sub_cmd = cmd;
+ cbctl.param.vnic_char = vnic_char;
+
+ QETH_CARD_TEXT(card, 2, "vniccedc");
+ return qeth_l2_vnicc_request(card, &cbctl);
+}
+
+/* VNICC get/set timeout for characteristic request */
+static int qeth_l2_vnicc_getset_timeout(struct qeth_card *card, u32 vnicc,
+ u32 cmd, u32 *timeout)
+{
+ struct _qeth_l2_vnicc_request_cbctl cbctl;
+
+ /* prepare callback control */
+ cbctl.sub_cmd = cmd;
+ cbctl.param.vnic_char = vnicc;
+ if (cmd == IPA_VNICC_SET_TIMEOUT)
+ cbctl.param.timeout = *timeout;
+ if (cmd == IPA_VNICC_GET_TIMEOUT)
+ cbctl.result.timeout = timeout;
+
+ QETH_CARD_TEXT(card, 2, "vniccgst");
+ return qeth_l2_vnicc_request(card, &cbctl);
+}
+
+/* set current VNICC flag state; called from sysfs store function */
+int qeth_l2_vnicc_set_state(struct qeth_card *card, u32 vnicc, bool state)
+{
+ int rc = 0;
+ u32 cmd;
+
+ QETH_CARD_TEXT(card, 2, "vniccsch");
+
+ /* do not change anything if BridgePort is enabled */
+ if (qeth_bridgeport_is_in_use(card))
+ return -EBUSY;
+
+ /* check if characteristic and enable/disable are supported */
+ if (!(card->options.vnicc.sup_chars & vnicc) ||
+ !(card->options.vnicc.set_char_sup & vnicc))
+ return -EOPNOTSUPP;
+
+ /* set enable/disable command and store wanted characteristic */
+ if (state) {
+ cmd = IPA_VNICC_ENABLE;
+ card->options.vnicc.wanted_chars |= vnicc;
+ } else {
+ cmd = IPA_VNICC_DISABLE;
+ card->options.vnicc.wanted_chars &= ~vnicc;
+ }
+
+ /* do we need to do anything? */
+ if (card->options.vnicc.cur_chars == card->options.vnicc.wanted_chars)
+ return rc;
+
+ /* if card is not ready, simply stop here */
+ if (!qeth_card_hw_is_reachable(card)) {
+ if (state)
+ card->options.vnicc.cur_chars |= vnicc;
+ else
+ card->options.vnicc.cur_chars &= ~vnicc;
+ return rc;
+ }
+
+ rc = qeth_l2_vnicc_set_char(card, vnicc, cmd);
+ if (rc)
+ card->options.vnicc.wanted_chars =
+ card->options.vnicc.cur_chars;
+ else {
+ /* successful online VNICC change; handle special cases */
+ if (state && vnicc == QETH_VNICC_RX_BCAST)
+ card->options.vnicc.rx_bcast_enabled = true;
+ if (!state && vnicc == QETH_VNICC_LEARNING)
+ qeth_l2_vnicc_recover_timeout(card, vnicc,
+ &card->options.vnicc.learning_timeout);
+ }
+
+ return rc;
+}
+
+/* get current VNICC flag state; called from sysfs show function */
+int qeth_l2_vnicc_get_state(struct qeth_card *card, u32 vnicc, bool *state)
+{
+ int rc = 0;
+
+ QETH_CARD_TEXT(card, 2, "vniccgch");
+
+ /* do not get anything if BridgePort is enabled */
+ if (qeth_bridgeport_is_in_use(card))
+ return -EBUSY;
+
+ /* check if characteristic is supported */
+ if (!(card->options.vnicc.sup_chars & vnicc))
+ return -EOPNOTSUPP;
+
+ /* if card is ready, query current VNICC state */
+ if (qeth_card_hw_is_reachable(card))
+ rc = qeth_l2_vnicc_query_chars(card);
+
+ *state = (card->options.vnicc.cur_chars & vnicc) ? true : false;
+ return rc;
+}
+
+/* set VNICC timeout; called from sysfs store function. Currently, only learning
+ * supports timeout
+ */
+int qeth_l2_vnicc_set_timeout(struct qeth_card *card, u32 timeout)
+{
+ int rc = 0;
+
+ QETH_CARD_TEXT(card, 2, "vniccsto");
+
+ /* do not change anything if BridgePort is enabled */
+ if (qeth_bridgeport_is_in_use(card))
+ return -EBUSY;
+
+ /* check if characteristic and set_timeout are supported */
+ if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) ||
+ !(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING))
+ return -EOPNOTSUPP;
+
+ /* do we need to do anything? */
+ if (card->options.vnicc.learning_timeout == timeout)
+ return rc;
+
+ /* if card is not ready, simply store the value internally and return */
+ if (!qeth_card_hw_is_reachable(card)) {
+ card->options.vnicc.learning_timeout = timeout;
+ return rc;
+ }
+
+ /* send timeout value to card; if successful, store value internally */
+ rc = qeth_l2_vnicc_getset_timeout(card, QETH_VNICC_LEARNING,
+ IPA_VNICC_SET_TIMEOUT, &timeout);
+ if (!rc)
+ card->options.vnicc.learning_timeout = timeout;
+
+ return rc;
+}
+
+/* get current VNICC timeout; called from sysfs show function. Currently, only
+ * learning supports timeout
+ */
+int qeth_l2_vnicc_get_timeout(struct qeth_card *card, u32 *timeout)
+{
+ int rc = 0;
+
+ QETH_CARD_TEXT(card, 2, "vniccgto");
+
+ /* do not get anything if BridgePort is enabled */
+ if (qeth_bridgeport_is_in_use(card))
+ return -EBUSY;
+
+ /* check if characteristic and get_timeout are supported */
+ if (!(card->options.vnicc.sup_chars & QETH_VNICC_LEARNING) ||
+ !(card->options.vnicc.getset_timeout_sup & QETH_VNICC_LEARNING))
+ return -EOPNOTSUPP;
+ /* if card is ready, get timeout. Otherwise, just return stored value */
+ *timeout = card->options.vnicc.learning_timeout;
+ if (qeth_card_hw_is_reachable(card))
+ rc = qeth_l2_vnicc_getset_timeout(card, QETH_VNICC_LEARNING,
+ IPA_VNICC_GET_TIMEOUT,
+ timeout);
+
+ return rc;
+}
+
+/* check if VNICC is currently enabled */
+bool qeth_l2_vnicc_is_in_use(struct qeth_card *card)
+{
+ /* if everything is turned off, VNICC is not active */
+ if (!card->options.vnicc.cur_chars)
+ return false;
+ /* default values are only OK if rx_bcast was not enabled by user
+ * or the card is offline.
+ */
+ if (card->options.vnicc.cur_chars == QETH_VNICC_DEFAULT) {
+ if (!card->options.vnicc.rx_bcast_enabled ||
+ !qeth_card_hw_is_reachable(card))
+ return false;
+ }
+ return true;
+}
+
+/* recover user timeout setting */
+static bool qeth_l2_vnicc_recover_timeout(struct qeth_card *card, u32 vnicc,
+ u32 *timeout)
+{
+ if (card->options.vnicc.sup_chars & vnicc &&
+ card->options.vnicc.getset_timeout_sup & vnicc &&
+ !qeth_l2_vnicc_getset_timeout(card, vnicc, IPA_VNICC_SET_TIMEOUT,
+ timeout))
+ return false;
+ *timeout = QETH_VNICC_DEFAULT_TIMEOUT;
+ return true;
+}
+
+/* recover user characteristic setting */
+static bool qeth_l2_vnicc_recover_char(struct qeth_card *card, u32 vnicc,
+ bool enable)
+{
+ u32 cmd = enable ? IPA_VNICC_ENABLE : IPA_VNICC_DISABLE;
+
+ if (card->options.vnicc.sup_chars & vnicc &&
+ card->options.vnicc.set_char_sup & vnicc &&
+ !qeth_l2_vnicc_set_char(card, vnicc, cmd))
+ return false;
+ card->options.vnicc.wanted_chars &= ~vnicc;
+ card->options.vnicc.wanted_chars |= QETH_VNICC_DEFAULT & vnicc;
+ return true;
+}
+
+/* (re-)initialize VNICC */
+static void qeth_l2_vnicc_init(struct qeth_card *card)
+{
+ u32 *timeout = &card->options.vnicc.learning_timeout;
+ unsigned int chars_len, i;
+ unsigned long chars_tmp;
+ u32 sup_cmds, vnicc;
+ bool enable, error;
+
+ QETH_CARD_TEXT(card, 2, "vniccini");
+ /* reset rx_bcast */
+ card->options.vnicc.rx_bcast_enabled = 0;
+ /* initial query and storage of VNIC characteristics */
+ if (qeth_l2_vnicc_query_chars(card)) {
+ if (card->options.vnicc.wanted_chars != QETH_VNICC_DEFAULT ||
+ *timeout != QETH_VNICC_DEFAULT_TIMEOUT)
+ dev_err(&card->gdev->dev, "Configuring the VNIC characteristics failed\n");
+ /* fail quietly if user didn't change the default config */
+ card->options.vnicc.sup_chars = 0;
+ card->options.vnicc.cur_chars = 0;
+ card->options.vnicc.wanted_chars = QETH_VNICC_DEFAULT;
+ return;
+ }
+ /* get supported commands for each supported characteristic */
+ chars_tmp = card->options.vnicc.sup_chars;
+ chars_len = sizeof(card->options.vnicc.sup_chars) * BITS_PER_BYTE;
+ for_each_set_bit(i, &chars_tmp, chars_len) {
+ vnicc = BIT(i);
+ qeth_l2_vnicc_query_cmds(card, vnicc, &sup_cmds);
+ if (!(sup_cmds & IPA_VNICC_SET_TIMEOUT) ||
+ !(sup_cmds & IPA_VNICC_GET_TIMEOUT))
+ card->options.vnicc.getset_timeout_sup &= ~vnicc;
+ if (!(sup_cmds & IPA_VNICC_ENABLE) ||
+ !(sup_cmds & IPA_VNICC_DISABLE))
+ card->options.vnicc.set_char_sup &= ~vnicc;
+ }
+ /* enforce assumed default values and recover settings, if changed */
+ error = qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING,
+ timeout);
+ chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT;
+ chars_tmp |= QETH_VNICC_BRIDGE_INVISIBLE;
+ chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE;
+ for_each_set_bit(i, &chars_tmp, chars_len) {
+ vnicc = BIT(i);
+ enable = card->options.vnicc.wanted_chars & vnicc;
+ error |= qeth_l2_vnicc_recover_char(card, vnicc, enable);
+ }
+ if (error)
+ dev_err(&card->gdev->dev, "Configuring the VNIC characteristics failed\n");
+}
+
+/* configure default values of VNIC characteristics */
+static void qeth_l2_vnicc_set_defaults(struct qeth_card *card)
+{
+ /* characteristics values */
+ card->options.vnicc.sup_chars = QETH_VNICC_ALL;
+ card->options.vnicc.cur_chars = QETH_VNICC_DEFAULT;
+ card->options.vnicc.learning_timeout = QETH_VNICC_DEFAULT_TIMEOUT;
+ /* supported commands */
+ card->options.vnicc.set_char_sup = QETH_VNICC_ALL;
+ card->options.vnicc.getset_timeout_sup = QETH_VNICC_LEARNING;
+ /* settings wanted by users */
+ card->options.vnicc.wanted_chars = QETH_VNICC_DEFAULT;
+}
+
module_init(qeth_l2_init);
module_exit(qeth_l2_exit);
MODULE_AUTHOR("Frank Blaschka <frank.blaschka@de.ibm.com>");
diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c
index 9696baa49e2d..470a4e5f3c62 100644
--- a/drivers/s390/net/qeth_l2_sys.c
+++ b/drivers/s390/net/qeth_l2_sys.c
@@ -20,6 +20,9 @@ static ssize_t qeth_bridge_port_role_state_show(struct device *dev,
if (!card)
return -EINVAL;
+ if (qeth_l2_vnicc_is_in_use(card))
+ return sprintf(buf, "n/a (VNIC characteristics)\n");
+
if (qeth_card_hw_is_reachable(card) &&
card->options.sbp.supported_funcs)
rc = qeth_bridgeport_query_ports(card,
@@ -60,6 +63,11 @@ static ssize_t qeth_bridge_port_role_state_show(struct device *dev,
static ssize_t qeth_bridge_port_role_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ struct qeth_card *card = dev_get_drvdata(dev);
+
+ if (qeth_l2_vnicc_is_in_use(card))
+ return sprintf(buf, "n/a (VNIC characteristics)\n");
+
return qeth_bridge_port_role_state_show(dev, attr, buf, 0);
}
@@ -83,7 +91,10 @@ static ssize_t qeth_bridge_port_role_store(struct device *dev,
mutex_lock(&card->conf_mutex);
- if (card->options.sbp.reflect_promisc) /* Forbid direct manipulation */
+ if (qeth_l2_vnicc_is_in_use(card))
+ rc = -EBUSY;
+ else if (card->options.sbp.reflect_promisc)
+ /* Forbid direct manipulation */
rc = -EPERM;
else if (qeth_card_hw_is_reachable(card)) {
rc = qeth_bridgeport_setrole(card, role);
@@ -103,6 +114,11 @@ static DEVICE_ATTR(bridge_role, 0644, qeth_bridge_port_role_show,
static ssize_t qeth_bridge_port_state_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ struct qeth_card *card = dev_get_drvdata(dev);
+
+ if (qeth_l2_vnicc_is_in_use(card))
+ return sprintf(buf, "n/a (VNIC characteristics)\n");
+
return qeth_bridge_port_role_state_show(dev, attr, buf, 1);
}
@@ -118,6 +134,9 @@ static ssize_t qeth_bridgeport_hostnotification_show(struct device *dev,
if (!card)
return -EINVAL;
+ if (qeth_l2_vnicc_is_in_use(card))
+ return sprintf(buf, "n/a (VNIC characteristics)\n");
+
enabled = card->options.sbp.hostnotification;
return sprintf(buf, "%d\n", enabled);
@@ -127,22 +146,21 @@ static ssize_t qeth_bridgeport_hostnotification_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct qeth_card *card = dev_get_drvdata(dev);
- int rc = 0;
- int enable;
+ bool enable;
+ int rc;
if (!card)
return -EINVAL;
- if (sysfs_streq(buf, "0"))
- enable = 0;
- else if (sysfs_streq(buf, "1"))
- enable = 1;
- else
- return -EINVAL;
+ rc = kstrtobool(buf, &enable);
+ if (rc)
+ return rc;
mutex_lock(&card->conf_mutex);
- if (qeth_card_hw_is_reachable(card)) {
+ if (qeth_l2_vnicc_is_in_use(card))
+ rc = -EBUSY;
+ else if (qeth_card_hw_is_reachable(card)) {
rc = qeth_bridgeport_an_set(card, enable);
if (!rc)
card->options.sbp.hostnotification = enable;
@@ -167,6 +185,9 @@ static ssize_t qeth_bridgeport_reflect_show(struct device *dev,
if (!card)
return -EINVAL;
+ if (qeth_l2_vnicc_is_in_use(card))
+ return sprintf(buf, "n/a (VNIC characteristics)\n");
+
if (card->options.sbp.reflect_promisc) {
if (card->options.sbp.reflect_promisc_primary)
state = "primary";
@@ -202,7 +223,9 @@ static ssize_t qeth_bridgeport_reflect_store(struct device *dev,
mutex_lock(&card->conf_mutex);
- if (card->options.sbp.role != QETH_SBP_ROLE_NONE)
+ if (qeth_l2_vnicc_is_in_use(card))
+ rc = -EBUSY;
+ else if (card->options.sbp.role != QETH_SBP_ROLE_NONE)
rc = -EPERM;
else {
card->options.sbp.reflect_promisc = enable;
@@ -231,16 +254,6 @@ static struct attribute_group qeth_l2_bridgeport_attr_group = {
.attrs = qeth_l2_bridgeport_attrs,
};
-int qeth_l2_create_device_attributes(struct device *dev)
-{
- return sysfs_create_group(&dev->kobj, &qeth_l2_bridgeport_attr_group);
-}
-
-void qeth_l2_remove_device_attributes(struct device *dev)
-{
- sysfs_remove_group(&dev->kobj, &qeth_l2_bridgeport_attr_group);
-}
-
/**
* qeth_l2_setup_bridgeport_attrs() - set/restore attrs when turning online.
* @card: qeth_card structure pointer
@@ -270,10 +283,168 @@ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card)
qeth_bridgeport_an_set(card, 0);
}
+/* VNIC CHARS support */
+
+/* convert sysfs attr name to VNIC characteristic */
+static u32 qeth_l2_vnicc_sysfs_attr_to_char(const char *attr_name)
+{
+ if (sysfs_streq(attr_name, "flooding"))
+ return QETH_VNICC_FLOODING;
+ else if (sysfs_streq(attr_name, "mcast_flooding"))
+ return QETH_VNICC_MCAST_FLOODING;
+ else if (sysfs_streq(attr_name, "learning"))
+ return QETH_VNICC_LEARNING;
+ else if (sysfs_streq(attr_name, "takeover_setvmac"))
+ return QETH_VNICC_TAKEOVER_SETVMAC;
+ else if (sysfs_streq(attr_name, "takeover_learning"))
+ return QETH_VNICC_TAKEOVER_LEARNING;
+ else if (sysfs_streq(attr_name, "bridge_invisible"))
+ return QETH_VNICC_BRIDGE_INVISIBLE;
+ else if (sysfs_streq(attr_name, "rx_bcast"))
+ return QETH_VNICC_RX_BCAST;
+
+ return 0;
+}
+
+/* get current timeout setting */
+static ssize_t qeth_vnicc_timeout_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct qeth_card *card = dev_get_drvdata(dev);
+ u32 timeout;
+ int rc;
+
+ if (!card)
+ return -EINVAL;
+
+ rc = qeth_l2_vnicc_get_timeout(card, &timeout);
+ if (rc == -EBUSY)
+ return sprintf(buf, "n/a (BridgePort)\n");
+ if (rc == -EOPNOTSUPP)
+ return sprintf(buf, "n/a\n");
+ return rc ? rc : sprintf(buf, "%d\n", timeout);
+}
+
+/* change timeout setting */
+static ssize_t qeth_vnicc_timeout_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct qeth_card *card = dev_get_drvdata(dev);
+ u32 timeout;
+ int rc;
+
+ if (!card)
+ return -EINVAL;
+
+ rc = kstrtou32(buf, 10, &timeout);
+ if (rc)
+ return rc;
+
+ mutex_lock(&card->conf_mutex);
+ rc = qeth_l2_vnicc_set_timeout(card, timeout);
+ mutex_unlock(&card->conf_mutex);
+ return rc ? rc : count;
+}
+
+/* get current setting of characteristic */
+static ssize_t qeth_vnicc_char_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct qeth_card *card = dev_get_drvdata(dev);
+ bool state;
+ u32 vnicc;
+ int rc;
+
+ if (!card)
+ return -EINVAL;
+
+ vnicc = qeth_l2_vnicc_sysfs_attr_to_char(attr->attr.name);
+ rc = qeth_l2_vnicc_get_state(card, vnicc, &state);
+
+ if (rc == -EBUSY)
+ return sprintf(buf, "n/a (BridgePort)\n");
+ if (rc == -EOPNOTSUPP)
+ return sprintf(buf, "n/a\n");
+ return rc ? rc : sprintf(buf, "%d\n", state);
+}
+
+/* change setting of characteristic */
+static ssize_t qeth_vnicc_char_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct qeth_card *card = dev_get_drvdata(dev);
+ bool state;
+ u32 vnicc;
+ int rc;
+
+ if (!card)
+ return -EINVAL;
+
+ if (kstrtobool(buf, &state))
+ return -EINVAL;
+
+ vnicc = qeth_l2_vnicc_sysfs_attr_to_char(attr->attr.name);
+ mutex_lock(&card->conf_mutex);
+ rc = qeth_l2_vnicc_set_state(card, vnicc, state);
+ mutex_unlock(&card->conf_mutex);
+
+ return rc ? rc : count;
+}
+
+static DEVICE_ATTR(flooding, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store);
+static DEVICE_ATTR(mcast_flooding, 0644, qeth_vnicc_char_show,
+ qeth_vnicc_char_store);
+static DEVICE_ATTR(learning, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store);
+static DEVICE_ATTR(learning_timeout, 0644, qeth_vnicc_timeout_show,
+ qeth_vnicc_timeout_store);
+static DEVICE_ATTR(takeover_setvmac, 0644, qeth_vnicc_char_show,
+ qeth_vnicc_char_store);
+static DEVICE_ATTR(takeover_learning, 0644, qeth_vnicc_char_show,
+ qeth_vnicc_char_store);
+static DEVICE_ATTR(bridge_invisible, 0644, qeth_vnicc_char_show,
+ qeth_vnicc_char_store);
+static DEVICE_ATTR(rx_bcast, 0644, qeth_vnicc_char_show, qeth_vnicc_char_store);
+
+static struct attribute *qeth_l2_vnicc_attrs[] = {
+ &dev_attr_flooding.attr,
+ &dev_attr_mcast_flooding.attr,
+ &dev_attr_learning.attr,
+ &dev_attr_learning_timeout.attr,
+ &dev_attr_takeover_setvmac.attr,
+ &dev_attr_takeover_learning.attr,
+ &dev_attr_bridge_invisible.attr,
+ &dev_attr_rx_bcast.attr,
+ NULL,
+};
+
+static struct attribute_group qeth_l2_vnicc_attr_group = {
+ .attrs = qeth_l2_vnicc_attrs,
+ .name = "vnicc",
+};
+
+static const struct attribute_group *qeth_l2_only_attr_groups[] = {
+ &qeth_l2_bridgeport_attr_group,
+ &qeth_l2_vnicc_attr_group,
+ NULL,
+};
+
+int qeth_l2_create_device_attributes(struct device *dev)
+{
+ return sysfs_create_groups(&dev->kobj, qeth_l2_only_attr_groups);
+}
+
+void qeth_l2_remove_device_attributes(struct device *dev)
+{
+ sysfs_remove_groups(&dev->kobj, qeth_l2_only_attr_groups);
+}
+
const struct attribute_group *qeth_l2_attr_groups[] = {
&qeth_device_attr_group,
&qeth_device_blkt_group,
- /* l2 specific, see l2_{create,remove}_device_attributes(): */
+ /* l2 specific, see qeth_l2_only_attr_groups: */
&qeth_l2_bridgeport_attr_group,
+ &qeth_l2_vnicc_attr_group,
NULL,
};
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index ab661a431f7c..aadd384316a3 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1553,7 +1553,7 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card,
addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV4);
if (!addr)
- return;
+ goto out;
spin_lock_bh(&card->ip_lock);
@@ -1567,6 +1567,7 @@ static void qeth_l3_free_vlan_addresses4(struct qeth_card *card,
spin_unlock_bh(&card->ip_lock);
kfree(addr);
+out:
in_dev_put(in_dev);
}
@@ -1591,7 +1592,7 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card,
addr = qeth_l3_get_addr_buffer(QETH_PROT_IPV6);
if (!addr)
- return;
+ goto out;
spin_lock_bh(&card->ip_lock);
@@ -1606,6 +1607,7 @@ static void qeth_l3_free_vlan_addresses6(struct qeth_card *card,
spin_unlock_bh(&card->ip_lock);
kfree(addr);
+out:
in6_dev_put(in6_dev);
#endif /* CONFIG_QETH_IPV6 */
}
@@ -1646,13 +1648,12 @@ static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev,
return 0;
}
-static int qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
- struct qeth_hdr *hdr, unsigned short *vlan_id)
+static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
+ struct qeth_hdr *hdr)
{
__u16 prot;
struct iphdr *ip_hdr;
unsigned char tg_addr[MAX_ADDR_LEN];
- int is_vlan = 0;
if (!(hdr->hdr.l3.flags & QETH_HDR_PASSTHRU)) {
prot = (hdr->hdr.l3.flags & QETH_HDR_IPV6) ? ETH_P_IPV6 :
@@ -1706,11 +1707,14 @@ static int qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
skb->protocol = eth_type_trans(skb, card->dev);
- if (hdr->hdr.l3.ext_flags &
- (QETH_HDR_EXT_VLAN_FRAME | QETH_HDR_EXT_INCLUDE_VLAN_TAG)) {
- *vlan_id = (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_VLAN_FRAME) ?
- hdr->hdr.l3.vlan_id : *((u16 *)&hdr->hdr.l3.dest_addr[12]);
- is_vlan = 1;
+ /* copy VLAN tag from hdr into skb */
+ if (!card->options.sniffer &&
+ (hdr->hdr.l3.ext_flags & (QETH_HDR_EXT_VLAN_FRAME |
+ QETH_HDR_EXT_INCLUDE_VLAN_TAG))) {
+ u16 tag = (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_VLAN_FRAME) ?
+ hdr->hdr.l3.vlan_id :
+ *((u16 *)&hdr->hdr.l3.dest_addr[12]);
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag);
}
if (card->dev->features & NETIF_F_RXCSUM) {
@@ -1724,7 +1728,6 @@ static int qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
skb->ip_summed = CHECKSUM_NONE;
} else
skb->ip_summed = CHECKSUM_NONE;
- return is_vlan;
}
static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
@@ -1733,8 +1736,6 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
int work_done = 0;
struct sk_buff *skb;
struct qeth_hdr *hdr;
- __u16 vlan_tag = 0;
- int is_vlan;
unsigned int len;
__u16 magic;
@@ -1764,12 +1765,8 @@ static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
card->dev->addr_len);
netif_receive_skb(skb);
} else {
- is_vlan = qeth_l3_rebuild_skb(card, skb, hdr,
- &vlan_tag);
+ qeth_l3_rebuild_skb(card, skb, hdr);
len = skb->len;
- if (is_vlan && !card->options.sniffer)
- __vlan_hwaccel_put_tag(skb,
- htons(ETH_P_8021Q), vlan_tag);
napi_gro_receive(&card->napi, skb);
}
break;
@@ -2771,8 +2768,8 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb,
rc = qeth_do_send_packet(card, queue, new_skb, hdr, hd_len,
hd_len, elements);
} else
- rc = qeth_do_send_packet_fast(card, queue, new_skb, hdr,
- data_offset, hd_len);
+ rc = qeth_do_send_packet_fast(queue, new_skb, hdr, data_offset,
+ hd_len);
if (!rc) {
card->stats.tx_packets++;
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index e8bcc314cc5f..0ad0f7f4b23f 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -1028,52 +1028,31 @@ static const struct attribute_group qeth_device_rxip_group = {
.attrs = qeth_rxip_device_attrs,
};
+static const struct attribute_group *qeth_l3_only_attr_groups[] = {
+ &qeth_l3_device_attr_group,
+ &qeth_device_ipato_group,
+ &qeth_device_vipa_group,
+ &qeth_device_rxip_group,
+ NULL,
+};
+
int qeth_l3_create_device_attributes(struct device *dev)
{
- int ret;
-
- ret = sysfs_create_group(&dev->kobj, &qeth_l3_device_attr_group);
- if (ret)
- return ret;
-
- ret = sysfs_create_group(&dev->kobj, &qeth_device_ipato_group);
- if (ret) {
- sysfs_remove_group(&dev->kobj, &qeth_l3_device_attr_group);
- return ret;
- }
-
- ret = sysfs_create_group(&dev->kobj, &qeth_device_vipa_group);
- if (ret) {
- sysfs_remove_group(&dev->kobj, &qeth_l3_device_attr_group);
- sysfs_remove_group(&dev->kobj, &qeth_device_ipato_group);
- return ret;
- }
-
- ret = sysfs_create_group(&dev->kobj, &qeth_device_rxip_group);
- if (ret) {
- sysfs_remove_group(&dev->kobj, &qeth_l3_device_attr_group);
- sysfs_remove_group(&dev->kobj, &qeth_device_ipato_group);
- sysfs_remove_group(&dev->kobj, &qeth_device_vipa_group);
- return ret;
- }
- return 0;
+ return sysfs_create_groups(&dev->kobj, qeth_l3_only_attr_groups);
}
void qeth_l3_remove_device_attributes(struct device *dev)
{
- sysfs_remove_group(&dev->kobj, &qeth_l3_device_attr_group);
- sysfs_remove_group(&dev->kobj, &qeth_device_ipato_group);
- sysfs_remove_group(&dev->kobj, &qeth_device_vipa_group);
- sysfs_remove_group(&dev->kobj, &qeth_device_rxip_group);
+ sysfs_remove_groups(&dev->kobj, qeth_l3_only_attr_groups);
}
const struct attribute_group *qeth_l3_attr_groups[] = {
&qeth_device_attr_group,
&qeth_device_blkt_group,
- /* l3 specific, see l3_{create,remove}_device_attributes(): */
+ /* l3 specific, see qeth_l3_only_attr_groups: */
&qeth_l3_device_attr_group,
&qeth_device_ipato_group,
&qeth_device_vipa_group,
&qeth_device_rxip_group,
-NULL,
+ NULL,
};
diff --git a/drivers/scsi/qedi/Kconfig b/drivers/scsi/qedi/Kconfig
index 2ff753ce6e27..d1db92d24889 100644
--- a/drivers/scsi/qedi/Kconfig
+++ b/drivers/scsi/qedi/Kconfig
@@ -4,6 +4,7 @@ config QEDI
depends on QED
select SCSI_ISCSI_ATTRS
select QED_LL2
+ select QED_OOO
select QED_ISCSI
select ISCSI_BOOT_SYSFS
---help---
diff --git a/drivers/thunderbolt/Makefile b/drivers/thunderbolt/Makefile
index 4900febc6c8a..f2f0de27252b 100644
--- a/drivers/thunderbolt/Makefile
+++ b/drivers/thunderbolt/Makefile
@@ -1,3 +1,3 @@
obj-${CONFIG_THUNDERBOLT} := thunderbolt.o
thunderbolt-objs := nhi.o ctl.o tb.o switch.o cap.o path.o tunnel_pci.o eeprom.o
-thunderbolt-objs += domain.o dma_port.o icm.o
+thunderbolt-objs += domain.o dma_port.o icm.o property.o xdomain.o
diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c
index fb40dd0588b9..d079dbba2c03 100644
--- a/drivers/thunderbolt/ctl.c
+++ b/drivers/thunderbolt/ctl.c
@@ -289,20 +289,6 @@ static void tb_cfg_print_error(struct tb_ctl *ctl,
}
}
-static void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len)
-{
- int i;
- for (i = 0; i < len; i++)
- dst[i] = cpu_to_be32(src[i]);
-}
-
-static void be32_to_cpu_array(u32 *dst, __be32 *src, size_t len)
-{
- int i;
- for (i = 0; i < len; i++)
- dst[i] = be32_to_cpu(src[i]);
-}
-
static __be32 tb_crc(const void *data, size_t len)
{
return cpu_to_be32(~__crc32c_le(~0, data, len));
@@ -373,7 +359,7 @@ static int tb_ctl_tx(struct tb_ctl *ctl, const void *data, size_t len,
cpu_to_be32_array(pkg->buffer, data, len / 4);
*(__be32 *) (pkg->buffer + len) = tb_crc(pkg->buffer, len);
- res = ring_tx(ctl->tx, &pkg->frame);
+ res = tb_ring_tx(ctl->tx, &pkg->frame);
if (res) /* ring is stopped */
tb_ctl_pkg_free(pkg);
return res;
@@ -382,15 +368,15 @@ static int tb_ctl_tx(struct tb_ctl *ctl, const void *data, size_t len,
/**
* tb_ctl_handle_event() - acknowledge a plug event, invoke ctl->callback
*/
-static void tb_ctl_handle_event(struct tb_ctl *ctl, enum tb_cfg_pkg_type type,
+static bool tb_ctl_handle_event(struct tb_ctl *ctl, enum tb_cfg_pkg_type type,
struct ctl_pkg *pkg, size_t size)
{
- ctl->callback(ctl->callback_data, type, pkg->buffer, size);
+ return ctl->callback(ctl->callback_data, type, pkg->buffer, size);
}
static void tb_ctl_rx_submit(struct ctl_pkg *pkg)
{
- ring_rx(pkg->ctl->rx, &pkg->frame); /*
+ tb_ring_rx(pkg->ctl->rx, &pkg->frame); /*
* We ignore failures during stop.
* All rx packets are referenced
* from ctl->rx_packets, so we do
@@ -458,6 +444,8 @@ static void tb_ctl_rx_callback(struct tb_ring *ring, struct ring_frame *frame,
break;
case TB_CFG_PKG_EVENT:
+ case TB_CFG_PKG_XDOMAIN_RESP:
+ case TB_CFG_PKG_XDOMAIN_REQ:
if (*(__be32 *)(pkg->buffer + frame->size) != crc32) {
tb_ctl_err(pkg->ctl,
"RX: checksum mismatch, dropping packet\n");
@@ -465,8 +453,9 @@ static void tb_ctl_rx_callback(struct tb_ring *ring, struct ring_frame *frame,
}
/* Fall through */
case TB_CFG_PKG_ICM_EVENT:
- tb_ctl_handle_event(pkg->ctl, frame->eof, pkg, frame->size);
- goto rx;
+ if (tb_ctl_handle_event(pkg->ctl, frame->eof, pkg, frame->size))
+ goto rx;
+ break;
default:
break;
@@ -625,11 +614,12 @@ struct tb_ctl *tb_ctl_alloc(struct tb_nhi *nhi, event_cb cb, void *cb_data)
if (!ctl->frame_pool)
goto err;
- ctl->tx = ring_alloc_tx(nhi, 0, 10, RING_FLAG_NO_SUSPEND);
+ ctl->tx = tb_ring_alloc_tx(nhi, 0, 10, RING_FLAG_NO_SUSPEND);
if (!ctl->tx)
goto err;
- ctl->rx = ring_alloc_rx(nhi, 0, 10, RING_FLAG_NO_SUSPEND);
+ ctl->rx = tb_ring_alloc_rx(nhi, 0, 10, RING_FLAG_NO_SUSPEND, 0xffff,
+ 0xffff, NULL, NULL);
if (!ctl->rx)
goto err;
@@ -662,9 +652,9 @@ void tb_ctl_free(struct tb_ctl *ctl)
return;
if (ctl->rx)
- ring_free(ctl->rx);
+ tb_ring_free(ctl->rx);
if (ctl->tx)
- ring_free(ctl->tx);
+ tb_ring_free(ctl->tx);
/* free RX packets */
for (i = 0; i < TB_CTL_RX_PKG_COUNT; i++)
@@ -683,8 +673,8 @@ void tb_ctl_start(struct tb_ctl *ctl)
{
int i;
tb_ctl_info(ctl, "control channel starting...\n");
- ring_start(ctl->tx); /* is used to ack hotplug packets, start first */
- ring_start(ctl->rx);
+ tb_ring_start(ctl->tx); /* is used to ack hotplug packets, start first */
+ tb_ring_start(ctl->rx);
for (i = 0; i < TB_CTL_RX_PKG_COUNT; i++)
tb_ctl_rx_submit(ctl->rx_packets[i]);
@@ -705,8 +695,8 @@ void tb_ctl_stop(struct tb_ctl *ctl)
ctl->running = false;
mutex_unlock(&ctl->request_queue_lock);
- ring_stop(ctl->rx);
- ring_stop(ctl->tx);
+ tb_ring_stop(ctl->rx);
+ tb_ring_stop(ctl->tx);
if (!list_empty(&ctl->request_queue))
tb_ctl_WARN(ctl, "dangling request in request_queue\n");
diff --git a/drivers/thunderbolt/ctl.h b/drivers/thunderbolt/ctl.h
index 36fd28b1c1c5..85c49dd301ea 100644
--- a/drivers/thunderbolt/ctl.h
+++ b/drivers/thunderbolt/ctl.h
@@ -8,6 +8,7 @@
#define _TB_CFG
#include <linux/kref.h>
+#include <linux/thunderbolt.h>
#include "nhi.h"
#include "tb_msgs.h"
@@ -15,7 +16,7 @@
/* control channel */
struct tb_ctl;
-typedef void (*event_cb)(void *data, enum tb_cfg_pkg_type type,
+typedef bool (*event_cb)(void *data, enum tb_cfg_pkg_type type,
const void *buf, size_t size);
struct tb_ctl *tb_ctl_alloc(struct tb_nhi *nhi, event_cb cb, void *cb_data);
diff --git a/drivers/thunderbolt/domain.c b/drivers/thunderbolt/domain.c
index 9f2dcd48974d..9b90115319ce 100644
--- a/drivers/thunderbolt/domain.c
+++ b/drivers/thunderbolt/domain.c
@@ -20,6 +20,98 @@
static DEFINE_IDA(tb_domain_ida);
+static bool match_service_id(const struct tb_service_id *id,
+ const struct tb_service *svc)
+{
+ if (id->match_flags & TBSVC_MATCH_PROTOCOL_KEY) {
+ if (strcmp(id->protocol_key, svc->key))
+ return false;
+ }
+
+ if (id->match_flags & TBSVC_MATCH_PROTOCOL_ID) {
+ if (id->protocol_id != svc->prtcid)
+ return false;
+ }
+
+ if (id->match_flags & TBSVC_MATCH_PROTOCOL_VERSION) {
+ if (id->protocol_version != svc->prtcvers)
+ return false;
+ }
+
+ if (id->match_flags & TBSVC_MATCH_PROTOCOL_VERSION) {
+ if (id->protocol_revision != svc->prtcrevs)
+ return false;
+ }
+
+ return true;
+}
+
+static const struct tb_service_id *__tb_service_match(struct device *dev,
+ struct device_driver *drv)
+{
+ struct tb_service_driver *driver;
+ const struct tb_service_id *ids;
+ struct tb_service *svc;
+
+ svc = tb_to_service(dev);
+ if (!svc)
+ return NULL;
+
+ driver = container_of(drv, struct tb_service_driver, driver);
+ if (!driver->id_table)
+ return NULL;
+
+ for (ids = driver->id_table; ids->match_flags != 0; ids++) {
+ if (match_service_id(ids, svc))
+ return ids;
+ }
+
+ return NULL;
+}
+
+static int tb_service_match(struct device *dev, struct device_driver *drv)
+{
+ return !!__tb_service_match(dev, drv);
+}
+
+static int tb_service_probe(struct device *dev)
+{
+ struct tb_service *svc = tb_to_service(dev);
+ struct tb_service_driver *driver;
+ const struct tb_service_id *id;
+
+ driver = container_of(dev->driver, struct tb_service_driver, driver);
+ id = __tb_service_match(dev, &driver->driver);
+
+ return driver->probe(svc, id);
+}
+
+static int tb_service_remove(struct device *dev)
+{
+ struct tb_service *svc = tb_to_service(dev);
+ struct tb_service_driver *driver;
+
+ driver = container_of(dev->driver, struct tb_service_driver, driver);
+ if (driver->remove)
+ driver->remove(svc);
+
+ return 0;
+}
+
+static void tb_service_shutdown(struct device *dev)
+{
+ struct tb_service_driver *driver;
+ struct tb_service *svc;
+
+ svc = tb_to_service(dev);
+ if (!svc || !dev->driver)
+ return;
+
+ driver = container_of(dev->driver, struct tb_service_driver, driver);
+ if (driver->shutdown)
+ driver->shutdown(svc);
+}
+
static const char * const tb_security_names[] = {
[TB_SECURITY_NONE] = "none",
[TB_SECURITY_USER] = "user",
@@ -52,6 +144,10 @@ static const struct attribute_group *domain_attr_groups[] = {
struct bus_type tb_bus_type = {
.name = "thunderbolt",
+ .match = tb_service_match,
+ .probe = tb_service_probe,
+ .remove = tb_service_remove,
+ .shutdown = tb_service_shutdown,
};
static void tb_domain_release(struct device *dev)
@@ -128,17 +224,26 @@ err_free:
return NULL;
}
-static void tb_domain_event_cb(void *data, enum tb_cfg_pkg_type type,
+static bool tb_domain_event_cb(void *data, enum tb_cfg_pkg_type type,
const void *buf, size_t size)
{
struct tb *tb = data;
if (!tb->cm_ops->handle_event) {
tb_warn(tb, "domain does not have event handler\n");
- return;
+ return true;
}
- tb->cm_ops->handle_event(tb, type, buf, size);
+ switch (type) {
+ case TB_CFG_PKG_XDOMAIN_REQ:
+ case TB_CFG_PKG_XDOMAIN_RESP:
+ return tb_xdomain_handle_request(tb, type, buf, size);
+
+ default:
+ tb->cm_ops->handle_event(tb, type, buf, size);
+ }
+
+ return true;
}
/**
@@ -443,9 +548,92 @@ int tb_domain_disconnect_pcie_paths(struct tb *tb)
return tb->cm_ops->disconnect_pcie_paths(tb);
}
+/**
+ * tb_domain_approve_xdomain_paths() - Enable DMA paths for XDomain
+ * @tb: Domain enabling the DMA paths
+ * @xd: XDomain DMA paths are created to
+ *
+ * Calls connection manager specific method to enable DMA paths to the
+ * XDomain in question.
+ *
+ * Return: 0% in case of success and negative errno otherwise. In
+ * particular returns %-ENOTSUPP if the connection manager
+ * implementation does not support XDomains.
+ */
+int tb_domain_approve_xdomain_paths(struct tb *tb, struct tb_xdomain *xd)
+{
+ if (!tb->cm_ops->approve_xdomain_paths)
+ return -ENOTSUPP;
+
+ return tb->cm_ops->approve_xdomain_paths(tb, xd);
+}
+
+/**
+ * tb_domain_disconnect_xdomain_paths() - Disable DMA paths for XDomain
+ * @tb: Domain disabling the DMA paths
+ * @xd: XDomain whose DMA paths are disconnected
+ *
+ * Calls connection manager specific method to disconnect DMA paths to
+ * the XDomain in question.
+ *
+ * Return: 0% in case of success and negative errno otherwise. In
+ * particular returns %-ENOTSUPP if the connection manager
+ * implementation does not support XDomains.
+ */
+int tb_domain_disconnect_xdomain_paths(struct tb *tb, struct tb_xdomain *xd)
+{
+ if (!tb->cm_ops->disconnect_xdomain_paths)
+ return -ENOTSUPP;
+
+ return tb->cm_ops->disconnect_xdomain_paths(tb, xd);
+}
+
+static int disconnect_xdomain(struct device *dev, void *data)
+{
+ struct tb_xdomain *xd;
+ struct tb *tb = data;
+ int ret = 0;
+
+ xd = tb_to_xdomain(dev);
+ if (xd && xd->tb == tb)
+ ret = tb_xdomain_disable_paths(xd);
+
+ return ret;
+}
+
+/**
+ * tb_domain_disconnect_all_paths() - Disconnect all paths for the domain
+ * @tb: Domain whose paths are disconnected
+ *
+ * This function can be used to disconnect all paths (PCIe, XDomain) for
+ * example in preparation for host NVM firmware upgrade. After this is
+ * called the paths cannot be established without resetting the switch.
+ *
+ * Return: %0 in case of success and negative errno otherwise.
+ */
+int tb_domain_disconnect_all_paths(struct tb *tb)
+{
+ int ret;
+
+ ret = tb_domain_disconnect_pcie_paths(tb);
+ if (ret)
+ return ret;
+
+ return bus_for_each_dev(&tb_bus_type, NULL, tb, disconnect_xdomain);
+}
+
int tb_domain_init(void)
{
- return bus_register(&tb_bus_type);
+ int ret;
+
+ ret = tb_xdomain_init();
+ if (ret)
+ return ret;
+ ret = bus_register(&tb_bus_type);
+ if (ret)
+ tb_xdomain_exit();
+
+ return ret;
}
void tb_domain_exit(void)
@@ -453,4 +641,5 @@ void tb_domain_exit(void)
bus_unregister(&tb_bus_type);
ida_destroy(&tb_domain_ida);
tb_switch_exit();
+ tb_xdomain_exit();
}
diff --git a/drivers/thunderbolt/icm.c b/drivers/thunderbolt/icm.c
index 53250fc057e1..ab02d13f40b7 100644
--- a/drivers/thunderbolt/icm.c
+++ b/drivers/thunderbolt/icm.c
@@ -60,6 +60,8 @@
* @get_route: Find a route string for given switch
* @device_connected: Handle device connected ICM message
* @device_disconnected: Handle device disconnected ICM message
+ * @xdomain_connected - Handle XDomain connected ICM message
+ * @xdomain_disconnected - Handle XDomain disconnected ICM message
*/
struct icm {
struct mutex request_lock;
@@ -74,6 +76,10 @@ struct icm {
const struct icm_pkg_header *hdr);
void (*device_disconnected)(struct tb *tb,
const struct icm_pkg_header *hdr);
+ void (*xdomain_connected)(struct tb *tb,
+ const struct icm_pkg_header *hdr);
+ void (*xdomain_disconnected)(struct tb *tb,
+ const struct icm_pkg_header *hdr);
};
struct icm_notification {
@@ -89,7 +95,10 @@ static inline struct tb *icm_to_tb(struct icm *icm)
static inline u8 phy_port_from_route(u64 route, u8 depth)
{
- return tb_switch_phy_port_from_link(route >> ((depth - 1) * 8));
+ u8 link;
+
+ link = depth ? route >> ((depth - 1) * 8) : route;
+ return tb_phy_port_from_link(link);
}
static inline u8 dual_link_from_link(u8 link)
@@ -320,6 +329,51 @@ static int icm_fr_challenge_switch_key(struct tb *tb, struct tb_switch *sw,
return 0;
}
+static int icm_fr_approve_xdomain_paths(struct tb *tb, struct tb_xdomain *xd)
+{
+ struct icm_fr_pkg_approve_xdomain_response reply;
+ struct icm_fr_pkg_approve_xdomain request;
+ int ret;
+
+ memset(&request, 0, sizeof(request));
+ request.hdr.code = ICM_APPROVE_XDOMAIN;
+ request.link_info = xd->depth << ICM_LINK_INFO_DEPTH_SHIFT | xd->link;
+ memcpy(&request.remote_uuid, xd->remote_uuid, sizeof(*xd->remote_uuid));
+
+ request.transmit_path = xd->transmit_path;
+ request.transmit_ring = xd->transmit_ring;
+ request.receive_path = xd->receive_path;
+ request.receive_ring = xd->receive_ring;
+
+ memset(&reply, 0, sizeof(reply));
+ ret = icm_request(tb, &request, sizeof(request), &reply, sizeof(reply),
+ 1, ICM_TIMEOUT);
+ if (ret)
+ return ret;
+
+ if (reply.hdr.flags & ICM_FLAGS_ERROR)
+ return -EIO;
+
+ return 0;
+}
+
+static int icm_fr_disconnect_xdomain_paths(struct tb *tb, struct tb_xdomain *xd)
+{
+ u8 phy_port;
+ u8 cmd;
+
+ phy_port = tb_phy_port_from_link(xd->link);
+ if (phy_port == 0)
+ cmd = NHI_MAILBOX_DISCONNECT_PA;
+ else
+ cmd = NHI_MAILBOX_DISCONNECT_PB;
+
+ nhi_mailbox_cmd(tb->nhi, cmd, 1);
+ usleep_range(10, 50);
+ nhi_mailbox_cmd(tb->nhi, cmd, 2);
+ return 0;
+}
+
static void remove_switch(struct tb_switch *sw)
{
struct tb_switch *parent_sw;
@@ -475,6 +529,141 @@ icm_fr_device_disconnected(struct tb *tb, const struct icm_pkg_header *hdr)
tb_switch_put(sw);
}
+static void remove_xdomain(struct tb_xdomain *xd)
+{
+ struct tb_switch *sw;
+
+ sw = tb_to_switch(xd->dev.parent);
+ tb_port_at(xd->route, sw)->xdomain = NULL;
+ tb_xdomain_remove(xd);
+}
+
+static void
+icm_fr_xdomain_connected(struct tb *tb, const struct icm_pkg_header *hdr)
+{
+ const struct icm_fr_event_xdomain_connected *pkg =
+ (const struct icm_fr_event_xdomain_connected *)hdr;
+ struct tb_xdomain *xd;
+ struct tb_switch *sw;
+ u8 link, depth;
+ bool approved;
+ u64 route;
+
+ /*
+ * After NVM upgrade adding root switch device fails because we
+ * initiated reset. During that time ICM might still send
+ * XDomain connected message which we ignore here.
+ */
+ if (!tb->root_switch)
+ return;
+
+ link = pkg->link_info & ICM_LINK_INFO_LINK_MASK;
+ depth = (pkg->link_info & ICM_LINK_INFO_DEPTH_MASK) >>
+ ICM_LINK_INFO_DEPTH_SHIFT;
+ approved = pkg->link_info & ICM_LINK_INFO_APPROVED;
+
+ if (link > ICM_MAX_LINK || depth > ICM_MAX_DEPTH) {
+ tb_warn(tb, "invalid topology %u.%u, ignoring\n", link, depth);
+ return;
+ }
+
+ route = get_route(pkg->local_route_hi, pkg->local_route_lo);
+
+ xd = tb_xdomain_find_by_uuid(tb, &pkg->remote_uuid);
+ if (xd) {
+ u8 xd_phy_port, phy_port;
+
+ xd_phy_port = phy_port_from_route(xd->route, xd->depth);
+ phy_port = phy_port_from_route(route, depth);
+
+ if (xd->depth == depth && xd_phy_port == phy_port) {
+ xd->link = link;
+ xd->route = route;
+ xd->is_unplugged = false;
+ tb_xdomain_put(xd);
+ return;
+ }
+
+ /*
+ * If we find an existing XDomain connection remove it
+ * now. We need to go through login handshake and
+ * everything anyway to be able to re-establish the
+ * connection.
+ */
+ remove_xdomain(xd);
+ tb_xdomain_put(xd);
+ }
+
+ /*
+ * Look if there already exists an XDomain in the same place
+ * than the new one and in that case remove it because it is
+ * most likely another host that got disconnected.
+ */
+ xd = tb_xdomain_find_by_link_depth(tb, link, depth);
+ if (!xd) {
+ u8 dual_link;
+
+ dual_link = dual_link_from_link(link);
+ if (dual_link)
+ xd = tb_xdomain_find_by_link_depth(tb, dual_link,
+ depth);
+ }
+ if (xd) {
+ remove_xdomain(xd);
+ tb_xdomain_put(xd);
+ }
+
+ /*
+ * If the user disconnected a switch during suspend and
+ * connected another host to the same port, remove the switch
+ * first.
+ */
+ sw = get_switch_at_route(tb->root_switch, route);
+ if (sw)
+ remove_switch(sw);
+
+ sw = tb_switch_find_by_link_depth(tb, link, depth);
+ if (!sw) {
+ tb_warn(tb, "no switch exists at %u.%u, ignoring\n", link,
+ depth);
+ return;
+ }
+
+ xd = tb_xdomain_alloc(sw->tb, &sw->dev, route,
+ &pkg->local_uuid, &pkg->remote_uuid);
+ if (!xd) {
+ tb_switch_put(sw);
+ return;
+ }
+
+ xd->link = link;
+ xd->depth = depth;
+
+ tb_port_at(route, sw)->xdomain = xd;
+
+ tb_xdomain_add(xd);
+ tb_switch_put(sw);
+}
+
+static void
+icm_fr_xdomain_disconnected(struct tb *tb, const struct icm_pkg_header *hdr)
+{
+ const struct icm_fr_event_xdomain_disconnected *pkg =
+ (const struct icm_fr_event_xdomain_disconnected *)hdr;
+ struct tb_xdomain *xd;
+
+ /*
+ * If the connection is through one or multiple devices, the
+ * XDomain device is removed along with them so it is fine if we
+ * cannot find it here.
+ */
+ xd = tb_xdomain_find_by_uuid(tb, &pkg->remote_uuid);
+ if (xd) {
+ remove_xdomain(xd);
+ tb_xdomain_put(xd);
+ }
+}
+
static struct pci_dev *get_upstream_port(struct pci_dev *pdev)
{
struct pci_dev *parent;
@@ -594,6 +783,12 @@ static void icm_handle_notification(struct work_struct *work)
case ICM_EVENT_DEVICE_DISCONNECTED:
icm->device_disconnected(tb, n->pkg);
break;
+ case ICM_EVENT_XDOMAIN_CONNECTED:
+ icm->xdomain_connected(tb, n->pkg);
+ break;
+ case ICM_EVENT_XDOMAIN_DISCONNECTED:
+ icm->xdomain_disconnected(tb, n->pkg);
+ break;
}
mutex_unlock(&tb->lock);
@@ -927,6 +1122,10 @@ static void icm_unplug_children(struct tb_switch *sw)
if (tb_is_upstream_port(port))
continue;
+ if (port->xdomain) {
+ port->xdomain->is_unplugged = true;
+ continue;
+ }
if (!port->remote)
continue;
@@ -943,6 +1142,13 @@ static void icm_free_unplugged_children(struct tb_switch *sw)
if (tb_is_upstream_port(port))
continue;
+
+ if (port->xdomain && port->xdomain->is_unplugged) {
+ tb_xdomain_remove(port->xdomain);
+ port->xdomain = NULL;
+ continue;
+ }
+
if (!port->remote)
continue;
@@ -1009,8 +1215,10 @@ static int icm_start(struct tb *tb)
tb->root_switch->no_nvm_upgrade = x86_apple_machine;
ret = tb_switch_add(tb->root_switch);
- if (ret)
+ if (ret) {
tb_switch_put(tb->root_switch);
+ tb->root_switch = NULL;
+ }
return ret;
}
@@ -1042,6 +1250,8 @@ static const struct tb_cm_ops icm_fr_ops = {
.add_switch_key = icm_fr_add_switch_key,
.challenge_switch_key = icm_fr_challenge_switch_key,
.disconnect_pcie_paths = icm_disconnect_pcie_paths,
+ .approve_xdomain_paths = icm_fr_approve_xdomain_paths,
+ .disconnect_xdomain_paths = icm_fr_disconnect_xdomain_paths,
};
struct tb *icm_probe(struct tb_nhi *nhi)
@@ -1064,6 +1274,8 @@ struct tb *icm_probe(struct tb_nhi *nhi)
icm->get_route = icm_fr_get_route;
icm->device_connected = icm_fr_device_connected;
icm->device_disconnected = icm_fr_device_disconnected;
+ icm->xdomain_connected = icm_fr_xdomain_connected;
+ icm->xdomain_disconnected = icm_fr_xdomain_disconnected;
tb->cm_ops = &icm_fr_ops;
break;
@@ -1077,6 +1289,8 @@ struct tb *icm_probe(struct tb_nhi *nhi)
icm->get_route = icm_ar_get_route;
icm->device_connected = icm_fr_device_connected;
icm->device_disconnected = icm_fr_device_disconnected;
+ icm->xdomain_connected = icm_fr_xdomain_connected;
+ icm->xdomain_disconnected = icm_fr_xdomain_disconnected;
tb->cm_ops = &icm_fr_ops;
break;
}
diff --git a/drivers/thunderbolt/nhi.c b/drivers/thunderbolt/nhi.c
index 05af126a2435..419a7a90bce0 100644
--- a/drivers/thunderbolt/nhi.c
+++ b/drivers/thunderbolt/nhi.c
@@ -22,6 +22,14 @@
#define RING_TYPE(ring) ((ring)->is_tx ? "TX ring" : "RX ring")
/*
+ * Used to enable end-to-end workaround for missing RX packets. Do not
+ * use this ring for anything else.
+ */
+#define RING_E2E_UNUSED_HOPID 2
+/* HopIDs 0-7 are reserved by the Thunderbolt protocol */
+#define RING_FIRST_USABLE_HOPID 8
+
+/*
* Minimal number of vectors when we use MSI-X. Two for control channel
* Rx/Tx and the rest four are for cross domain DMA paths.
*/
@@ -206,8 +214,10 @@ static void ring_work(struct work_struct *work)
struct tb_ring *ring = container_of(work, typeof(*ring), work);
struct ring_frame *frame;
bool canceled = false;
+ unsigned long flags;
LIST_HEAD(done);
- mutex_lock(&ring->lock);
+
+ spin_lock_irqsave(&ring->lock, flags);
if (!ring->running) {
/* Move all frames to done and mark them as canceled. */
@@ -229,30 +239,14 @@ static void ring_work(struct work_struct *work)
frame->eof = ring->descriptors[ring->tail].eof;
frame->sof = ring->descriptors[ring->tail].sof;
frame->flags = ring->descriptors[ring->tail].flags;
- if (frame->sof != 0)
- dev_WARN(&ring->nhi->pdev->dev,
- "%s %d got unexpected SOF: %#x\n",
- RING_TYPE(ring), ring->hop,
- frame->sof);
- /*
- * known flags:
- * raw not enabled, interupt not set: 0x2=0010
- * raw enabled: 0xa=1010
- * raw not enabled: 0xb=1011
- * partial frame (>MAX_FRAME_SIZE): 0xe=1110
- */
- if (frame->flags != 0xa)
- dev_WARN(&ring->nhi->pdev->dev,
- "%s %d got unexpected flags: %#x\n",
- RING_TYPE(ring), ring->hop,
- frame->flags);
}
ring->tail = (ring->tail + 1) % ring->size;
}
ring_write_descriptors(ring);
invoke_callback:
- mutex_unlock(&ring->lock); /* allow callbacks to schedule new work */
+ /* allow callbacks to schedule new work */
+ spin_unlock_irqrestore(&ring->lock, flags);
while (!list_empty(&done)) {
frame = list_first_entry(&done, typeof(*frame), list);
/*
@@ -260,29 +254,128 @@ invoke_callback:
* Do not hold on to it.
*/
list_del_init(&frame->list);
- frame->callback(ring, frame, canceled);
+ if (frame->callback)
+ frame->callback(ring, frame, canceled);
}
}
-int __ring_enqueue(struct tb_ring *ring, struct ring_frame *frame)
+int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame)
{
+ unsigned long flags;
int ret = 0;
- mutex_lock(&ring->lock);
+
+ spin_lock_irqsave(&ring->lock, flags);
if (ring->running) {
list_add_tail(&frame->list, &ring->queue);
ring_write_descriptors(ring);
} else {
ret = -ESHUTDOWN;
}
- mutex_unlock(&ring->lock);
+ spin_unlock_irqrestore(&ring->lock, flags);
return ret;
}
+EXPORT_SYMBOL_GPL(__tb_ring_enqueue);
+
+/**
+ * tb_ring_poll() - Poll one completed frame from the ring
+ * @ring: Ring to poll
+ *
+ * This function can be called when @start_poll callback of the @ring
+ * has been called. It will read one completed frame from the ring and
+ * return it to the caller. Returns %NULL if there is no more completed
+ * frames.
+ */
+struct ring_frame *tb_ring_poll(struct tb_ring *ring)
+{
+ struct ring_frame *frame = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ring->lock, flags);
+ if (!ring->running)
+ goto unlock;
+ if (ring_empty(ring))
+ goto unlock;
+
+ if (ring->descriptors[ring->tail].flags & RING_DESC_COMPLETED) {
+ frame = list_first_entry(&ring->in_flight, typeof(*frame),
+ list);
+ list_del_init(&frame->list);
+
+ if (!ring->is_tx) {
+ frame->size = ring->descriptors[ring->tail].length;
+ frame->eof = ring->descriptors[ring->tail].eof;
+ frame->sof = ring->descriptors[ring->tail].sof;
+ frame->flags = ring->descriptors[ring->tail].flags;
+ }
+
+ ring->tail = (ring->tail + 1) % ring->size;
+ }
+
+unlock:
+ spin_unlock_irqrestore(&ring->lock, flags);
+ return frame;
+}
+EXPORT_SYMBOL_GPL(tb_ring_poll);
+
+static void __ring_interrupt_mask(struct tb_ring *ring, bool mask)
+{
+ int idx = ring_interrupt_index(ring);
+ int reg = REG_RING_INTERRUPT_BASE + idx / 32 * 4;
+ int bit = idx % 32;
+ u32 val;
+
+ val = ioread32(ring->nhi->iobase + reg);
+ if (mask)
+ val &= ~BIT(bit);
+ else
+ val |= BIT(bit);
+ iowrite32(val, ring->nhi->iobase + reg);
+}
+
+/* Both @nhi->lock and @ring->lock should be held */
+static void __ring_interrupt(struct tb_ring *ring)
+{
+ if (!ring->running)
+ return;
+
+ if (ring->start_poll) {
+ __ring_interrupt_mask(ring, false);
+ ring->start_poll(ring->poll_data);
+ } else {
+ schedule_work(&ring->work);
+ }
+}
+
+/**
+ * tb_ring_poll_complete() - Re-start interrupt for the ring
+ * @ring: Ring to re-start the interrupt
+ *
+ * This will re-start (unmask) the ring interrupt once the user is done
+ * with polling.
+ */
+void tb_ring_poll_complete(struct tb_ring *ring)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ring->nhi->lock, flags);
+ spin_lock(&ring->lock);
+ if (ring->start_poll)
+ __ring_interrupt_mask(ring, false);
+ spin_unlock(&ring->lock);
+ spin_unlock_irqrestore(&ring->nhi->lock, flags);
+}
+EXPORT_SYMBOL_GPL(tb_ring_poll_complete);
static irqreturn_t ring_msix(int irq, void *data)
{
struct tb_ring *ring = data;
- schedule_work(&ring->work);
+ spin_lock(&ring->nhi->lock);
+ spin_lock(&ring->lock);
+ __ring_interrupt(ring);
+ spin_unlock(&ring->lock);
+ spin_unlock(&ring->nhi->lock);
+
return IRQ_HANDLED;
}
@@ -320,30 +413,81 @@ static void ring_release_msix(struct tb_ring *ring)
ring->irq = 0;
}
-static struct tb_ring *ring_alloc(struct tb_nhi *nhi, u32 hop, int size,
- bool transmit, unsigned int flags)
+static int nhi_alloc_hop(struct tb_nhi *nhi, struct tb_ring *ring)
+{
+ int ret = 0;
+
+ spin_lock_irq(&nhi->lock);
+
+ if (ring->hop < 0) {
+ unsigned int i;
+
+ /*
+ * Automatically allocate HopID from the non-reserved
+ * range 8 .. hop_count - 1.
+ */
+ for (i = RING_FIRST_USABLE_HOPID; i < nhi->hop_count; i++) {
+ if (ring->is_tx) {
+ if (!nhi->tx_rings[i]) {
+ ring->hop = i;
+ break;
+ }
+ } else {
+ if (!nhi->rx_rings[i]) {
+ ring->hop = i;
+ break;
+ }
+ }
+ }
+ }
+
+ if (ring->hop < 0 || ring->hop >= nhi->hop_count) {
+ dev_warn(&nhi->pdev->dev, "invalid hop: %d\n", ring->hop);
+ ret = -EINVAL;
+ goto err_unlock;
+ }
+ if (ring->is_tx && nhi->tx_rings[ring->hop]) {
+ dev_warn(&nhi->pdev->dev, "TX hop %d already allocated\n",
+ ring->hop);
+ ret = -EBUSY;
+ goto err_unlock;
+ } else if (!ring->is_tx && nhi->rx_rings[ring->hop]) {
+ dev_warn(&nhi->pdev->dev, "RX hop %d already allocated\n",
+ ring->hop);
+ ret = -EBUSY;
+ goto err_unlock;
+ }
+
+ if (ring->is_tx)
+ nhi->tx_rings[ring->hop] = ring;
+ else
+ nhi->rx_rings[ring->hop] = ring;
+
+err_unlock:
+ spin_unlock_irq(&nhi->lock);
+
+ return ret;
+}
+
+static struct tb_ring *tb_ring_alloc(struct tb_nhi *nhi, u32 hop, int size,
+ bool transmit, unsigned int flags,
+ u16 sof_mask, u16 eof_mask,
+ void (*start_poll)(void *),
+ void *poll_data)
{
struct tb_ring *ring = NULL;
dev_info(&nhi->pdev->dev, "allocating %s ring %d of size %d\n",
transmit ? "TX" : "RX", hop, size);
- mutex_lock(&nhi->lock);
- if (hop >= nhi->hop_count) {
- dev_WARN(&nhi->pdev->dev, "invalid hop: %d\n", hop);
- goto err;
- }
- if (transmit && nhi->tx_rings[hop]) {
- dev_WARN(&nhi->pdev->dev, "TX hop %d already allocated\n", hop);
- goto err;
- } else if (!transmit && nhi->rx_rings[hop]) {
- dev_WARN(&nhi->pdev->dev, "RX hop %d already allocated\n", hop);
- goto err;
- }
+ /* Tx Ring 2 is reserved for E2E workaround */
+ if (transmit && hop == RING_E2E_UNUSED_HOPID)
+ return NULL;
+
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
if (!ring)
- goto err;
+ return NULL;
- mutex_init(&ring->lock);
+ spin_lock_init(&ring->lock);
INIT_LIST_HEAD(&ring->queue);
INIT_LIST_HEAD(&ring->in_flight);
INIT_WORK(&ring->work, ring_work);
@@ -353,55 +497,88 @@ static struct tb_ring *ring_alloc(struct tb_nhi *nhi, u32 hop, int size,
ring->is_tx = transmit;
ring->size = size;
ring->flags = flags;
+ ring->sof_mask = sof_mask;
+ ring->eof_mask = eof_mask;
ring->head = 0;
ring->tail = 0;
ring->running = false;
-
- if (ring_request_msix(ring, flags & RING_FLAG_NO_SUSPEND))
- goto err;
+ ring->start_poll = start_poll;
+ ring->poll_data = poll_data;
ring->descriptors = dma_alloc_coherent(&ring->nhi->pdev->dev,
size * sizeof(*ring->descriptors),
&ring->descriptors_dma, GFP_KERNEL | __GFP_ZERO);
if (!ring->descriptors)
- goto err;
+ goto err_free_ring;
+
+ if (ring_request_msix(ring, flags & RING_FLAG_NO_SUSPEND))
+ goto err_free_descs;
+
+ if (nhi_alloc_hop(nhi, ring))
+ goto err_release_msix;
- if (transmit)
- nhi->tx_rings[hop] = ring;
- else
- nhi->rx_rings[hop] = ring;
- mutex_unlock(&nhi->lock);
return ring;
-err:
- if (ring)
- mutex_destroy(&ring->lock);
+err_release_msix:
+ ring_release_msix(ring);
+err_free_descs:
+ dma_free_coherent(&ring->nhi->pdev->dev,
+ ring->size * sizeof(*ring->descriptors),
+ ring->descriptors, ring->descriptors_dma);
+err_free_ring:
kfree(ring);
- mutex_unlock(&nhi->lock);
+
return NULL;
}
-struct tb_ring *ring_alloc_tx(struct tb_nhi *nhi, int hop, int size,
- unsigned int flags)
+/**
+ * tb_ring_alloc_tx() - Allocate DMA ring for transmit
+ * @nhi: Pointer to the NHI the ring is to be allocated
+ * @hop: HopID (ring) to allocate
+ * @size: Number of entries in the ring
+ * @flags: Flags for the ring
+ */
+struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size,
+ unsigned int flags)
{
- return ring_alloc(nhi, hop, size, true, flags);
+ return tb_ring_alloc(nhi, hop, size, true, flags, 0, 0, NULL, NULL);
}
+EXPORT_SYMBOL_GPL(tb_ring_alloc_tx);
-struct tb_ring *ring_alloc_rx(struct tb_nhi *nhi, int hop, int size,
- unsigned int flags)
+/**
+ * tb_ring_alloc_rx() - Allocate DMA ring for receive
+ * @nhi: Pointer to the NHI the ring is to be allocated
+ * @hop: HopID (ring) to allocate. Pass %-1 for automatic allocation.
+ * @size: Number of entries in the ring
+ * @flags: Flags for the ring
+ * @sof_mask: Mask of PDF values that start a frame
+ * @eof_mask: Mask of PDF values that end a frame
+ * @start_poll: If not %NULL the ring will call this function when an
+ * interrupt is triggered and masked, instead of callback
+ * in each Rx frame.
+ * @poll_data: Optional data passed to @start_poll
+ */
+struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size,
+ unsigned int flags, u16 sof_mask, u16 eof_mask,
+ void (*start_poll)(void *), void *poll_data)
{
- return ring_alloc(nhi, hop, size, false, flags);
+ return tb_ring_alloc(nhi, hop, size, false, flags, sof_mask, eof_mask,
+ start_poll, poll_data);
}
+EXPORT_SYMBOL_GPL(tb_ring_alloc_rx);
/**
- * ring_start() - enable a ring
+ * tb_ring_start() - enable a ring
*
- * Must not be invoked in parallel with ring_stop().
+ * Must not be invoked in parallel with tb_ring_stop().
*/
-void ring_start(struct tb_ring *ring)
+void tb_ring_start(struct tb_ring *ring)
{
- mutex_lock(&ring->nhi->lock);
- mutex_lock(&ring->lock);
+ u16 frame_size;
+ u32 flags;
+
+ spin_lock_irq(&ring->nhi->lock);
+ spin_lock(&ring->lock);
if (ring->nhi->going_away)
goto err;
if (ring->running) {
@@ -411,43 +588,65 @@ void ring_start(struct tb_ring *ring)
dev_info(&ring->nhi->pdev->dev, "starting %s %d\n",
RING_TYPE(ring), ring->hop);
+ if (ring->flags & RING_FLAG_FRAME) {
+ /* Means 4096 */
+ frame_size = 0;
+ flags = RING_FLAG_ENABLE;
+ } else {
+ frame_size = TB_FRAME_SIZE;
+ flags = RING_FLAG_ENABLE | RING_FLAG_RAW;
+ }
+
+ if (ring->flags & RING_FLAG_E2E && !ring->is_tx) {
+ u32 hop;
+
+ /*
+ * In order not to lose Rx packets we enable end-to-end
+ * workaround which transfers Rx credits to an unused Tx
+ * HopID.
+ */
+ hop = RING_E2E_UNUSED_HOPID << REG_RX_OPTIONS_E2E_HOP_SHIFT;
+ hop &= REG_RX_OPTIONS_E2E_HOP_MASK;
+ flags |= hop | RING_FLAG_E2E_FLOW_CONTROL;
+ }
+
ring_iowrite64desc(ring, ring->descriptors_dma, 0);
if (ring->is_tx) {
ring_iowrite32desc(ring, ring->size, 12);
ring_iowrite32options(ring, 0, 4); /* time releated ? */
- ring_iowrite32options(ring,
- RING_FLAG_ENABLE | RING_FLAG_RAW, 0);
+ ring_iowrite32options(ring, flags, 0);
} else {
- ring_iowrite32desc(ring,
- (TB_FRAME_SIZE << 16) | ring->size, 12);
- ring_iowrite32options(ring, 0xffffffff, 4); /* SOF EOF mask */
- ring_iowrite32options(ring,
- RING_FLAG_ENABLE | RING_FLAG_RAW, 0);
+ u32 sof_eof_mask = ring->sof_mask << 16 | ring->eof_mask;
+
+ ring_iowrite32desc(ring, (frame_size << 16) | ring->size, 12);
+ ring_iowrite32options(ring, sof_eof_mask, 4);
+ ring_iowrite32options(ring, flags, 0);
}
ring_interrupt_active(ring, true);
ring->running = true;
err:
- mutex_unlock(&ring->lock);
- mutex_unlock(&ring->nhi->lock);
+ spin_unlock(&ring->lock);
+ spin_unlock_irq(&ring->nhi->lock);
}
-
+EXPORT_SYMBOL_GPL(tb_ring_start);
/**
- * ring_stop() - shutdown a ring
+ * tb_ring_stop() - shutdown a ring
*
* Must not be invoked from a callback.
*
- * This method will disable the ring. Further calls to ring_tx/ring_rx will
- * return -ESHUTDOWN until ring_stop has been called.
+ * This method will disable the ring. Further calls to
+ * tb_ring_tx/tb_ring_rx will return -ESHUTDOWN until ring_stop has been
+ * called.
*
* All enqueued frames will be canceled and their callbacks will be executed
* with frame->canceled set to true (on the callback thread). This method
* returns only after all callback invocations have finished.
*/
-void ring_stop(struct tb_ring *ring)
+void tb_ring_stop(struct tb_ring *ring)
{
- mutex_lock(&ring->nhi->lock);
- mutex_lock(&ring->lock);
+ spin_lock_irq(&ring->nhi->lock);
+ spin_lock(&ring->lock);
dev_info(&ring->nhi->pdev->dev, "stopping %s %d\n",
RING_TYPE(ring), ring->hop);
if (ring->nhi->going_away)
@@ -468,8 +667,8 @@ void ring_stop(struct tb_ring *ring)
ring->running = false;
err:
- mutex_unlock(&ring->lock);
- mutex_unlock(&ring->nhi->lock);
+ spin_unlock(&ring->lock);
+ spin_unlock_irq(&ring->nhi->lock);
/*
* schedule ring->work to invoke callbacks on all remaining frames.
@@ -477,9 +676,10 @@ err:
schedule_work(&ring->work);
flush_work(&ring->work);
}
+EXPORT_SYMBOL_GPL(tb_ring_stop);
/*
- * ring_free() - free ring
+ * tb_ring_free() - free ring
*
* When this method returns all invocations of ring->callback will have
* finished.
@@ -488,9 +688,9 @@ err:
*
* Must NOT be called from ring_frame->callback!
*/
-void ring_free(struct tb_ring *ring)
+void tb_ring_free(struct tb_ring *ring)
{
- mutex_lock(&ring->nhi->lock);
+ spin_lock_irq(&ring->nhi->lock);
/*
* Dissociate the ring from the NHI. This also ensures that
* nhi_interrupt_work cannot reschedule ring->work.
@@ -504,6 +704,7 @@ void ring_free(struct tb_ring *ring)
dev_WARN(&ring->nhi->pdev->dev, "%s %d still running\n",
RING_TYPE(ring), ring->hop);
}
+ spin_unlock_irq(&ring->nhi->lock);
ring_release_msix(ring);
@@ -520,16 +721,15 @@ void ring_free(struct tb_ring *ring)
RING_TYPE(ring),
ring->hop);
- mutex_unlock(&ring->nhi->lock);
/**
* ring->work can no longer be scheduled (it is scheduled only
* by nhi_interrupt_work, ring_stop and ring_msix). Wait for it
* to finish before freeing the ring.
*/
flush_work(&ring->work);
- mutex_destroy(&ring->lock);
kfree(ring);
}
+EXPORT_SYMBOL_GPL(tb_ring_free);
/**
* nhi_mailbox_cmd() - Send a command through NHI mailbox
@@ -595,7 +795,7 @@ static void nhi_interrupt_work(struct work_struct *work)
int type = 0; /* current interrupt type 0: TX, 1: RX, 2: RX overflow */
struct tb_ring *ring;
- mutex_lock(&nhi->lock);
+ spin_lock_irq(&nhi->lock);
/*
* Starting at REG_RING_NOTIFY_BASE there are three status bitfields
@@ -630,10 +830,12 @@ static void nhi_interrupt_work(struct work_struct *work)
hop);
continue;
}
- /* we do not check ring->running, this is done in ring->work */
- schedule_work(&ring->work);
+
+ spin_lock(&ring->lock);
+ __ring_interrupt(ring);
+ spin_unlock(&ring->lock);
}
- mutex_unlock(&nhi->lock);
+ spin_unlock_irq(&nhi->lock);
}
static irqreturn_t nhi_msi(int irq, void *data)
@@ -651,6 +853,22 @@ static int nhi_suspend_noirq(struct device *dev)
return tb_domain_suspend_noirq(tb);
}
+static void nhi_enable_int_throttling(struct tb_nhi *nhi)
+{
+ /* Throttling is specified in 256ns increments */
+ u32 throttle = DIV_ROUND_UP(128 * NSEC_PER_USEC, 256);
+ unsigned int i;
+
+ /*
+ * Configure interrupt throttling for all vectors even if we
+ * only use few.
+ */
+ for (i = 0; i < MSIX_MAX_VECS; i++) {
+ u32 reg = REG_INT_THROTTLING_RATE + i * 4;
+ iowrite32(throttle, nhi->iobase + reg);
+ }
+}
+
static int nhi_resume_noirq(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -663,6 +881,8 @@ static int nhi_resume_noirq(struct device *dev)
*/
if (!pci_device_is_present(pdev))
tb->nhi->going_away = true;
+ else
+ nhi_enable_int_throttling(tb->nhi);
return tb_domain_resume_noirq(tb);
}
@@ -705,7 +925,6 @@ static void nhi_shutdown(struct tb_nhi *nhi)
devm_free_irq(&nhi->pdev->dev, nhi->pdev->irq, nhi);
flush_work(&nhi->interrupt_work);
}
- mutex_destroy(&nhi->lock);
ida_destroy(&nhi->msix_ida);
}
@@ -717,6 +936,8 @@ static int nhi_init_msi(struct tb_nhi *nhi)
/* In case someone left them on. */
nhi_disable_interrupts(nhi);
+ nhi_enable_int_throttling(nhi);
+
ida_init(&nhi->msix_ida);
/*
@@ -792,13 +1013,10 @@ static int nhi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return res;
}
- mutex_init(&nhi->lock);
+ spin_lock_init(&nhi->lock);
pci_set_master(pdev);
- /* magic value - clock related? */
- iowrite32(3906250 / 10000, nhi->iobase + 0x38c00);
-
tb = icm_probe(nhi);
if (!tb)
tb = tb_probe(nhi);
@@ -926,5 +1144,5 @@ static void __exit nhi_unload(void)
tb_domain_exit();
}
-module_init(nhi_init);
+fs_initcall(nhi_init);
module_exit(nhi_unload);
diff --git a/drivers/thunderbolt/nhi.h b/drivers/thunderbolt/nhi.h
index 5b5bb2c436be..771d09ca5dc5 100644
--- a/drivers/thunderbolt/nhi.h
+++ b/drivers/thunderbolt/nhi.h
@@ -7,144 +7,7 @@
#ifndef DSL3510_H_
#define DSL3510_H_
-#include <linux/idr.h>
-#include <linux/mutex.h>
-#include <linux/workqueue.h>
-
-/**
- * struct tb_nhi - thunderbolt native host interface
- * @lock: Must be held during ring creation/destruction. Is acquired by
- * interrupt_work when dispatching interrupts to individual rings.
- * @pdev: Pointer to the PCI device
- * @iobase: MMIO space of the NHI
- * @tx_rings: All Tx rings available on this host controller
- * @rx_rings: All Rx rings available on this host controller
- * @msix_ida: Used to allocate MSI-X vectors for rings
- * @going_away: The host controller device is about to disappear so when
- * this flag is set, avoid touching the hardware anymore.
- * @interrupt_work: Work scheduled to handle ring interrupt when no
- * MSI-X is used.
- * @hop_count: Number of rings (end point hops) supported by NHI.
- */
-struct tb_nhi {
- struct mutex lock;
- struct pci_dev *pdev;
- void __iomem *iobase;
- struct tb_ring **tx_rings;
- struct tb_ring **rx_rings;
- struct ida msix_ida;
- bool going_away;
- struct work_struct interrupt_work;
- u32 hop_count;
-};
-
-/**
- * struct tb_ring - thunderbolt TX or RX ring associated with a NHI
- * @lock: Lock serializing actions to this ring. Must be acquired after
- * nhi->lock.
- * @nhi: Pointer to the native host controller interface
- * @size: Size of the ring
- * @hop: Hop (DMA channel) associated with this ring
- * @head: Head of the ring (write next descriptor here)
- * @tail: Tail of the ring (complete next descriptor here)
- * @descriptors: Allocated descriptors for this ring
- * @queue: Queue holding frames to be transferred over this ring
- * @in_flight: Queue holding frames that are currently in flight
- * @work: Interrupt work structure
- * @is_tx: Is the ring Tx or Rx
- * @running: Is the ring running
- * @irq: MSI-X irq number if the ring uses MSI-X. %0 otherwise.
- * @vector: MSI-X vector number the ring uses (only set if @irq is > 0)
- * @flags: Ring specific flags
- */
-struct tb_ring {
- struct mutex lock;
- struct tb_nhi *nhi;
- int size;
- int hop;
- int head;
- int tail;
- struct ring_desc *descriptors;
- dma_addr_t descriptors_dma;
- struct list_head queue;
- struct list_head in_flight;
- struct work_struct work;
- bool is_tx:1;
- bool running:1;
- int irq;
- u8 vector;
- unsigned int flags;
-};
-
-/* Leave ring interrupt enabled on suspend */
-#define RING_FLAG_NO_SUSPEND BIT(0)
-
-struct ring_frame;
-typedef void (*ring_cb)(struct tb_ring*, struct ring_frame*, bool canceled);
-
-/**
- * struct ring_frame - for use with ring_rx/ring_tx
- */
-struct ring_frame {
- dma_addr_t buffer_phy;
- ring_cb callback;
- struct list_head list;
- u32 size:12; /* TX: in, RX: out*/
- u32 flags:12; /* RX: out */
- u32 eof:4; /* TX:in, RX: out */
- u32 sof:4; /* TX:in, RX: out */
-};
-
-#define TB_FRAME_SIZE 0x100 /* minimum size for ring_rx */
-
-struct tb_ring *ring_alloc_tx(struct tb_nhi *nhi, int hop, int size,
- unsigned int flags);
-struct tb_ring *ring_alloc_rx(struct tb_nhi *nhi, int hop, int size,
- unsigned int flags);
-void ring_start(struct tb_ring *ring);
-void ring_stop(struct tb_ring *ring);
-void ring_free(struct tb_ring *ring);
-
-int __ring_enqueue(struct tb_ring *ring, struct ring_frame *frame);
-
-/**
- * ring_rx() - enqueue a frame on an RX ring
- *
- * frame->buffer, frame->buffer_phy and frame->callback have to be set. The
- * buffer must contain at least TB_FRAME_SIZE bytes.
- *
- * frame->callback will be invoked with frame->size, frame->flags, frame->eof,
- * frame->sof set once the frame has been received.
- *
- * If ring_stop is called after the packet has been enqueued frame->callback
- * will be called with canceled set to true.
- *
- * Return: Returns ESHUTDOWN if ring_stop has been called. Zero otherwise.
- */
-static inline int ring_rx(struct tb_ring *ring, struct ring_frame *frame)
-{
- WARN_ON(ring->is_tx);
- return __ring_enqueue(ring, frame);
-}
-
-/**
- * ring_tx() - enqueue a frame on an TX ring
- *
- * frame->buffer, frame->buffer_phy, frame->callback, frame->size, frame->eof
- * and frame->sof have to be set.
- *
- * frame->callback will be invoked with once the frame has been transmitted.
- *
- * If ring_stop is called after the packet has been enqueued frame->callback
- * will be called with canceled set to true.
- *
- * Return: Returns ESHUTDOWN if ring_stop has been called. Zero otherwise.
- */
-static inline int ring_tx(struct tb_ring *ring, struct ring_frame *frame)
-{
- WARN_ON(!ring->is_tx);
- return __ring_enqueue(ring, frame);
-}
+#include <linux/thunderbolt.h>
enum nhi_fw_mode {
NHI_FW_SAFE_MODE,
@@ -157,6 +20,8 @@ enum nhi_mailbox_cmd {
NHI_MAILBOX_SAVE_DEVS = 0x05,
NHI_MAILBOX_DISCONNECT_PCIE_PATHS = 0x06,
NHI_MAILBOX_DRV_UNLOADS = 0x07,
+ NHI_MAILBOX_DISCONNECT_PA = 0x10,
+ NHI_MAILBOX_DISCONNECT_PB = 0x11,
NHI_MAILBOX_ALLOW_ALL_DEVS = 0x23,
};
diff --git a/drivers/thunderbolt/nhi_regs.h b/drivers/thunderbolt/nhi_regs.h
index 09ed574e92ff..5ed6934e31e7 100644
--- a/drivers/thunderbolt/nhi_regs.h
+++ b/drivers/thunderbolt/nhi_regs.h
@@ -17,13 +17,6 @@ enum ring_flags {
RING_FLAG_ENABLE = 1 << 31,
};
-enum ring_desc_flags {
- RING_DESC_ISOCH = 0x1, /* TX only? */
- RING_DESC_COMPLETED = 0x2, /* set by NHI */
- RING_DESC_POSTED = 0x4, /* always set this */
- RING_DESC_INTERRUPT = 0x8, /* request an interrupt on completion */
-};
-
/**
* struct ring_desc - TX/RX ring entry
*
@@ -77,6 +70,8 @@ struct ring_desc {
* ..: unknown
*/
#define REG_RX_OPTIONS_BASE 0x29800
+#define REG_RX_OPTIONS_E2E_HOP_MASK GENMASK(22, 12)
+#define REG_RX_OPTIONS_E2E_HOP_SHIFT 12
/*
* three bitfields: tx, rx, rx overflow
@@ -95,6 +90,8 @@ struct ring_desc {
#define REG_RING_INTERRUPT_BASE 0x38200
#define RING_INTERRUPT_REG_COUNT(nhi) ((31 + 2 * nhi->hop_count) / 32)
+#define REG_INT_THROTTLING_RATE 0x38c00
+
/* Interrupt Vector Allocation */
#define REG_INT_VEC_ALLOC_BASE 0x38c40
#define REG_INT_VEC_ALLOC_BITS 4
diff --git a/drivers/thunderbolt/property.c b/drivers/thunderbolt/property.c
new file mode 100644
index 000000000000..8fe913a95b4a
--- /dev/null
+++ b/drivers/thunderbolt/property.c
@@ -0,0 +1,670 @@
+/*
+ * Thunderbolt XDomain property support
+ *
+ * Copyright (C) 2017, Intel Corporation
+ * Authors: Michael Jamet <michael.jamet@intel.com>
+ * Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/uuid.h>
+#include <linux/thunderbolt.h>
+
+struct tb_property_entry {
+ u32 key_hi;
+ u32 key_lo;
+ u16 length;
+ u8 reserved;
+ u8 type;
+ u32 value;
+};
+
+struct tb_property_rootdir_entry {
+ u32 magic;
+ u32 length;
+ struct tb_property_entry entries[];
+};
+
+struct tb_property_dir_entry {
+ u32 uuid[4];
+ struct tb_property_entry entries[];
+};
+
+#define TB_PROPERTY_ROOTDIR_MAGIC 0x55584401
+
+static struct tb_property_dir *__tb_property_parse_dir(const u32 *block,
+ size_t block_len, unsigned int dir_offset, size_t dir_len,
+ bool is_root);
+
+static inline void parse_dwdata(void *dst, const void *src, size_t dwords)
+{
+ be32_to_cpu_array(dst, src, dwords);
+}
+
+static inline void format_dwdata(void *dst, const void *src, size_t dwords)
+{
+ cpu_to_be32_array(dst, src, dwords);
+}
+
+static bool tb_property_entry_valid(const struct tb_property_entry *entry,
+ size_t block_len)
+{
+ switch (entry->type) {
+ case TB_PROPERTY_TYPE_DIRECTORY:
+ case TB_PROPERTY_TYPE_DATA:
+ case TB_PROPERTY_TYPE_TEXT:
+ if (entry->length > block_len)
+ return false;
+ if (entry->value + entry->length > block_len)
+ return false;
+ break;
+
+ case TB_PROPERTY_TYPE_VALUE:
+ if (entry->length != 1)
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+static bool tb_property_key_valid(const char *key)
+{
+ return key && strlen(key) <= TB_PROPERTY_KEY_SIZE;
+}
+
+static struct tb_property *
+tb_property_alloc(const char *key, enum tb_property_type type)
+{
+ struct tb_property *property;
+
+ property = kzalloc(sizeof(*property), GFP_KERNEL);
+ if (!property)
+ return NULL;
+
+ strcpy(property->key, key);
+ property->type = type;
+ INIT_LIST_HEAD(&property->list);
+
+ return property;
+}
+
+static struct tb_property *tb_property_parse(const u32 *block, size_t block_len,
+ const struct tb_property_entry *entry)
+{
+ char key[TB_PROPERTY_KEY_SIZE + 1];
+ struct tb_property *property;
+ struct tb_property_dir *dir;
+
+ if (!tb_property_entry_valid(entry, block_len))
+ return NULL;
+
+ parse_dwdata(key, entry, 2);
+ key[TB_PROPERTY_KEY_SIZE] = '\0';
+
+ property = tb_property_alloc(key, entry->type);
+ if (!property)
+ return NULL;
+
+ property->length = entry->length;
+
+ switch (property->type) {
+ case TB_PROPERTY_TYPE_DIRECTORY:
+ dir = __tb_property_parse_dir(block, block_len, entry->value,
+ entry->length, false);
+ if (!dir) {
+ kfree(property);
+ return NULL;
+ }
+ property->value.dir = dir;
+ break;
+
+ case TB_PROPERTY_TYPE_DATA:
+ property->value.data = kcalloc(property->length, sizeof(u32),
+ GFP_KERNEL);
+ if (!property->value.data) {
+ kfree(property);
+ return NULL;
+ }
+ parse_dwdata(property->value.data, block + entry->value,
+ entry->length);
+ break;
+
+ case TB_PROPERTY_TYPE_TEXT:
+ property->value.text = kcalloc(property->length, sizeof(u32),
+ GFP_KERNEL);
+ if (!property->value.text) {
+ kfree(property);
+ return NULL;
+ }
+ parse_dwdata(property->value.text, block + entry->value,
+ entry->length);
+ /* Force null termination */
+ property->value.text[property->length * 4 - 1] = '\0';
+ break;
+
+ case TB_PROPERTY_TYPE_VALUE:
+ property->value.immediate = entry->value;
+ break;
+
+ default:
+ property->type = TB_PROPERTY_TYPE_UNKNOWN;
+ break;
+ }
+
+ return property;
+}
+
+static struct tb_property_dir *__tb_property_parse_dir(const u32 *block,
+ size_t block_len, unsigned int dir_offset, size_t dir_len, bool is_root)
+{
+ const struct tb_property_entry *entries;
+ size_t i, content_len, nentries;
+ unsigned int content_offset;
+ struct tb_property_dir *dir;
+
+ dir = kzalloc(sizeof(*dir), GFP_KERNEL);
+ if (!dir)
+ return NULL;
+
+ if (is_root) {
+ content_offset = dir_offset + 2;
+ content_len = dir_len;
+ } else {
+ dir->uuid = kmemdup(&block[dir_offset], sizeof(*dir->uuid),
+ GFP_KERNEL);
+ content_offset = dir_offset + 4;
+ content_len = dir_len - 4; /* Length includes UUID */
+ }
+
+ entries = (const struct tb_property_entry *)&block[content_offset];
+ nentries = content_len / (sizeof(*entries) / 4);
+
+ INIT_LIST_HEAD(&dir->properties);
+
+ for (i = 0; i < nentries; i++) {
+ struct tb_property *property;
+
+ property = tb_property_parse(block, block_len, &entries[i]);
+ if (!property) {
+ tb_property_free_dir(dir);
+ return NULL;
+ }
+
+ list_add_tail(&property->list, &dir->properties);
+ }
+
+ return dir;
+}
+
+/**
+ * tb_property_parse_dir() - Parses properties from given property block
+ * @block: Property block to parse
+ * @block_len: Number of dword elements in the property block
+ *
+ * This function parses the XDomain properties data block into format that
+ * can be traversed using the helper functions provided by this module.
+ * Upon success returns the parsed directory. In case of error returns
+ * %NULL. The resulting &struct tb_property_dir needs to be released by
+ * calling tb_property_free_dir() when not needed anymore.
+ *
+ * The @block is expected to be root directory.
+ */
+struct tb_property_dir *tb_property_parse_dir(const u32 *block,
+ size_t block_len)
+{
+ const struct tb_property_rootdir_entry *rootdir =
+ (const struct tb_property_rootdir_entry *)block;
+
+ if (rootdir->magic != TB_PROPERTY_ROOTDIR_MAGIC)
+ return NULL;
+ if (rootdir->length > block_len)
+ return NULL;
+
+ return __tb_property_parse_dir(block, block_len, 0, rootdir->length,
+ true);
+}
+
+/**
+ * tb_property_create_dir() - Creates new property directory
+ * @uuid: UUID used to identify the particular directory
+ *
+ * Creates new, empty property directory. If @uuid is %NULL then the
+ * directory is assumed to be root directory.
+ */
+struct tb_property_dir *tb_property_create_dir(const uuid_t *uuid)
+{
+ struct tb_property_dir *dir;
+
+ dir = kzalloc(sizeof(*dir), GFP_KERNEL);
+ if (!dir)
+ return NULL;
+
+ INIT_LIST_HEAD(&dir->properties);
+ if (uuid) {
+ dir->uuid = kmemdup(uuid, sizeof(*dir->uuid), GFP_KERNEL);
+ if (!dir->uuid) {
+ kfree(dir);
+ return NULL;
+ }
+ }
+
+ return dir;
+}
+EXPORT_SYMBOL_GPL(tb_property_create_dir);
+
+static void tb_property_free(struct tb_property *property)
+{
+ switch (property->type) {
+ case TB_PROPERTY_TYPE_DIRECTORY:
+ tb_property_free_dir(property->value.dir);
+ break;
+
+ case TB_PROPERTY_TYPE_DATA:
+ kfree(property->value.data);
+ break;
+
+ case TB_PROPERTY_TYPE_TEXT:
+ kfree(property->value.text);
+ break;
+
+ default:
+ break;
+ }
+
+ kfree(property);
+}
+
+/**
+ * tb_property_free_dir() - Release memory allocated for property directory
+ * @dir: Directory to release
+ *
+ * This will release all the memory the directory occupies including all
+ * descendants. It is OK to pass %NULL @dir, then the function does
+ * nothing.
+ */
+void tb_property_free_dir(struct tb_property_dir *dir)
+{
+ struct tb_property *property, *tmp;
+
+ if (!dir)
+ return;
+
+ list_for_each_entry_safe(property, tmp, &dir->properties, list) {
+ list_del(&property->list);
+ tb_property_free(property);
+ }
+ kfree(dir->uuid);
+ kfree(dir);
+}
+EXPORT_SYMBOL_GPL(tb_property_free_dir);
+
+static size_t tb_property_dir_length(const struct tb_property_dir *dir,
+ bool recurse, size_t *data_len)
+{
+ const struct tb_property *property;
+ size_t len = 0;
+
+ if (dir->uuid)
+ len += sizeof(*dir->uuid) / 4;
+ else
+ len += sizeof(struct tb_property_rootdir_entry) / 4;
+
+ list_for_each_entry(property, &dir->properties, list) {
+ len += sizeof(struct tb_property_entry) / 4;
+
+ switch (property->type) {
+ case TB_PROPERTY_TYPE_DIRECTORY:
+ if (recurse) {
+ len += tb_property_dir_length(
+ property->value.dir, recurse, data_len);
+ }
+ /* Reserve dword padding after each directory */
+ if (data_len)
+ *data_len += 1;
+ break;
+
+ case TB_PROPERTY_TYPE_DATA:
+ case TB_PROPERTY_TYPE_TEXT:
+ if (data_len)
+ *data_len += property->length;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return len;
+}
+
+static ssize_t __tb_property_format_dir(const struct tb_property_dir *dir,
+ u32 *block, unsigned int start_offset, size_t block_len)
+{
+ unsigned int data_offset, dir_end;
+ const struct tb_property *property;
+ struct tb_property_entry *entry;
+ size_t dir_len, data_len = 0;
+ int ret;
+
+ /*
+ * The structure of property block looks like following. Leaf
+ * data/text is included right after the directory and each
+ * directory follows each other (even nested ones).
+ *
+ * +----------+ <-- start_offset
+ * | header | <-- root directory header
+ * +----------+ ---
+ * | entry 0 | -^--------------------.
+ * +----------+ | |
+ * | entry 1 | -|--------------------|--.
+ * +----------+ | | |
+ * | entry 2 | -|-----------------. | |
+ * +----------+ | | | |
+ * : : | dir_len | | |
+ * . . | | | |
+ * : : | | | |
+ * +----------+ | | | |
+ * | entry n | v | | |
+ * +----------+ <-- data_offset | | |
+ * | data 0 | <------------------|--' |
+ * +----------+ | |
+ * | data 1 | <------------------|-----'
+ * +----------+ |
+ * | 00000000 | padding |
+ * +----------+ <-- dir_end <------'
+ * | UUID | <-- directory UUID (child directory)
+ * +----------+
+ * | entry 0 |
+ * +----------+
+ * | entry 1 |
+ * +----------+
+ * : :
+ * . .
+ * : :
+ * +----------+
+ * | entry n |
+ * +----------+
+ * | data 0 |
+ * +----------+
+ *
+ * We use dir_end to hold pointer to the end of the directory. It
+ * will increase as we add directories and each directory should be
+ * added starting from previous dir_end.
+ */
+ dir_len = tb_property_dir_length(dir, false, &data_len);
+ data_offset = start_offset + dir_len;
+ dir_end = start_offset + data_len + dir_len;
+
+ if (data_offset > dir_end)
+ return -EINVAL;
+ if (dir_end > block_len)
+ return -EINVAL;
+
+ /* Write headers first */
+ if (dir->uuid) {
+ struct tb_property_dir_entry *pe;
+
+ pe = (struct tb_property_dir_entry *)&block[start_offset];
+ memcpy(pe->uuid, dir->uuid, sizeof(pe->uuid));
+ entry = pe->entries;
+ } else {
+ struct tb_property_rootdir_entry *re;
+
+ re = (struct tb_property_rootdir_entry *)&block[start_offset];
+ re->magic = TB_PROPERTY_ROOTDIR_MAGIC;
+ re->length = dir_len - sizeof(*re) / 4;
+ entry = re->entries;
+ }
+
+ list_for_each_entry(property, &dir->properties, list) {
+ const struct tb_property_dir *child;
+
+ format_dwdata(entry, property->key, 2);
+ entry->type = property->type;
+
+ switch (property->type) {
+ case TB_PROPERTY_TYPE_DIRECTORY:
+ child = property->value.dir;
+ ret = __tb_property_format_dir(child, block, dir_end,
+ block_len);
+ if (ret < 0)
+ return ret;
+ entry->length = tb_property_dir_length(child, false,
+ NULL);
+ entry->value = dir_end;
+ dir_end = ret;
+ break;
+
+ case TB_PROPERTY_TYPE_DATA:
+ format_dwdata(&block[data_offset], property->value.data,
+ property->length);
+ entry->length = property->length;
+ entry->value = data_offset;
+ data_offset += entry->length;
+ break;
+
+ case TB_PROPERTY_TYPE_TEXT:
+ format_dwdata(&block[data_offset], property->value.text,
+ property->length);
+ entry->length = property->length;
+ entry->value = data_offset;
+ data_offset += entry->length;
+ break;
+
+ case TB_PROPERTY_TYPE_VALUE:
+ entry->length = property->length;
+ entry->value = property->value.immediate;
+ break;
+
+ default:
+ break;
+ }
+
+ entry++;
+ }
+
+ return dir_end;
+}
+
+/**
+ * tb_property_format_dir() - Formats directory to the packed XDomain format
+ * @dir: Directory to format
+ * @block: Property block where the packed data is placed
+ * @block_len: Length of the property block
+ *
+ * This function formats the directory to the packed format that can be
+ * then send over the thunderbolt fabric to receiving host. Returns %0 in
+ * case of success and negative errno on faulure. Passing %NULL in @block
+ * returns number of entries the block takes.
+ */
+ssize_t tb_property_format_dir(const struct tb_property_dir *dir, u32 *block,
+ size_t block_len)
+{
+ ssize_t ret;
+
+ if (!block) {
+ size_t dir_len, data_len = 0;
+
+ dir_len = tb_property_dir_length(dir, true, &data_len);
+ return dir_len + data_len;
+ }
+
+ ret = __tb_property_format_dir(dir, block, 0, block_len);
+ return ret < 0 ? ret : 0;
+}
+
+/**
+ * tb_property_add_immediate() - Add immediate property to directory
+ * @parent: Directory to add the property
+ * @key: Key for the property
+ * @value: Immediate value to store with the property
+ */
+int tb_property_add_immediate(struct tb_property_dir *parent, const char *key,
+ u32 value)
+{
+ struct tb_property *property;
+
+ if (!tb_property_key_valid(key))
+ return -EINVAL;
+
+ property = tb_property_alloc(key, TB_PROPERTY_TYPE_VALUE);
+ if (!property)
+ return -ENOMEM;
+
+ property->length = 1;
+ property->value.immediate = value;
+
+ list_add_tail(&property->list, &parent->properties);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tb_property_add_immediate);
+
+/**
+ * tb_property_add_data() - Adds arbitrary data property to directory
+ * @parent: Directory to add the property
+ * @key: Key for the property
+ * @buf: Data buffer to add
+ * @buflen: Number of bytes in the data buffer
+ *
+ * Function takes a copy of @buf and adds it to the directory.
+ */
+int tb_property_add_data(struct tb_property_dir *parent, const char *key,
+ const void *buf, size_t buflen)
+{
+ /* Need to pad to dword boundary */
+ size_t size = round_up(buflen, 4);
+ struct tb_property *property;
+
+ if (!tb_property_key_valid(key))
+ return -EINVAL;
+
+ property = tb_property_alloc(key, TB_PROPERTY_TYPE_DATA);
+ if (!property)
+ return -ENOMEM;
+
+ property->length = size / 4;
+ property->value.data = kzalloc(size, GFP_KERNEL);
+ memcpy(property->value.data, buf, buflen);
+
+ list_add_tail(&property->list, &parent->properties);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tb_property_add_data);
+
+/**
+ * tb_property_add_text() - Adds string property to directory
+ * @parent: Directory to add the property
+ * @key: Key for the property
+ * @text: String to add
+ *
+ * Function takes a copy of @text and adds it to the directory.
+ */
+int tb_property_add_text(struct tb_property_dir *parent, const char *key,
+ const char *text)
+{
+ /* Need to pad to dword boundary */
+ size_t size = round_up(strlen(text) + 1, 4);
+ struct tb_property *property;
+
+ if (!tb_property_key_valid(key))
+ return -EINVAL;
+
+ property = tb_property_alloc(key, TB_PROPERTY_TYPE_TEXT);
+ if (!property)
+ return -ENOMEM;
+
+ property->length = size / 4;
+ property->value.data = kzalloc(size, GFP_KERNEL);
+ strcpy(property->value.text, text);
+
+ list_add_tail(&property->list, &parent->properties);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tb_property_add_text);
+
+/**
+ * tb_property_add_dir() - Adds a directory to the parent directory
+ * @parent: Directory to add the property
+ * @key: Key for the property
+ * @dir: Directory to add
+ */
+int tb_property_add_dir(struct tb_property_dir *parent, const char *key,
+ struct tb_property_dir *dir)
+{
+ struct tb_property *property;
+
+ if (!tb_property_key_valid(key))
+ return -EINVAL;
+
+ property = tb_property_alloc(key, TB_PROPERTY_TYPE_DIRECTORY);
+ if (!property)
+ return -ENOMEM;
+
+ property->value.dir = dir;
+
+ list_add_tail(&property->list, &parent->properties);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tb_property_add_dir);
+
+/**
+ * tb_property_remove() - Removes property from a parent directory
+ * @property: Property to remove
+ *
+ * Note memory for @property is released as well so it is not allowed to
+ * touch the object after call to this function.
+ */
+void tb_property_remove(struct tb_property *property)
+{
+ list_del(&property->list);
+ kfree(property);
+}
+EXPORT_SYMBOL_GPL(tb_property_remove);
+
+/**
+ * tb_property_find() - Find a property from a directory
+ * @dir: Directory where the property is searched
+ * @key: Key to look for
+ * @type: Type of the property
+ *
+ * Finds and returns property from the given directory. Does not recurse
+ * into sub-directories. Returns %NULL if the property was not found.
+ */
+struct tb_property *tb_property_find(struct tb_property_dir *dir,
+ const char *key, enum tb_property_type type)
+{
+ struct tb_property *property;
+
+ list_for_each_entry(property, &dir->properties, list) {
+ if (property->type == type && !strcmp(property->key, key))
+ return property;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(tb_property_find);
+
+/**
+ * tb_property_get_next() - Get next property from directory
+ * @dir: Directory holding properties
+ * @prev: Previous property in the directory (%NULL returns the first)
+ */
+struct tb_property *tb_property_get_next(struct tb_property_dir *dir,
+ struct tb_property *prev)
+{
+ if (prev) {
+ if (list_is_last(&prev->list, &dir->properties))
+ return NULL;
+ return list_next_entry(prev, list);
+ }
+ return list_first_entry_or_null(&dir->properties, struct tb_property,
+ list);
+}
+EXPORT_SYMBOL_GPL(tb_property_get_next);
diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index 53f40c57df59..dfc357d33e1e 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -171,11 +171,11 @@ static int nvm_authenticate_host(struct tb_switch *sw)
/*
* Root switch NVM upgrade requires that we disconnect the
- * existing PCIe paths first (in case it is not in safe mode
+ * existing paths first (in case it is not in safe mode
* already).
*/
if (!sw->safe_mode) {
- ret = tb_domain_disconnect_pcie_paths(sw->tb);
+ ret = tb_domain_disconnect_all_paths(sw->tb);
if (ret)
return ret;
/*
@@ -1363,6 +1363,9 @@ void tb_switch_remove(struct tb_switch *sw)
if (sw->ports[i].remote)
tb_switch_remove(sw->ports[i].remote->sw);
sw->ports[i].remote = NULL;
+ if (sw->ports[i].xdomain)
+ tb_xdomain_remove(sw->ports[i].xdomain);
+ sw->ports[i].xdomain = NULL;
}
if (!sw->is_unplugged)
diff --git a/drivers/thunderbolt/tb.h b/drivers/thunderbolt/tb.h
index e0deee4f1eb0..74af9d4929ab 100644
--- a/drivers/thunderbolt/tb.h
+++ b/drivers/thunderbolt/tb.h
@@ -9,6 +9,7 @@
#include <linux/nvmem-provider.h>
#include <linux/pci.h>
+#include <linux/thunderbolt.h>
#include <linux/uuid.h>
#include "tb_regs.h"
@@ -39,23 +40,7 @@ struct tb_switch_nvm {
bool authenticating;
};
-/**
- * enum tb_security_level - Thunderbolt security level
- * @TB_SECURITY_NONE: No security, legacy mode
- * @TB_SECURITY_USER: User approval required at minimum
- * @TB_SECURITY_SECURE: One time saved key required at minimum
- * @TB_SECURITY_DPONLY: Only tunnel Display port (and USB)
- */
-enum tb_security_level {
- TB_SECURITY_NONE,
- TB_SECURITY_USER,
- TB_SECURITY_SECURE,
- TB_SECURITY_DPONLY,
-};
-
#define TB_SWITCH_KEY_SIZE 32
-/* Each physical port contains 2 links on modern controllers */
-#define TB_SWITCH_LINKS_PER_PHY_PORT 2
/**
* struct tb_switch - a thunderbolt switch
@@ -125,14 +110,25 @@ struct tb_switch {
/**
* struct tb_port - a thunderbolt port, part of a tb_switch
+ * @config: Cached port configuration read from registers
+ * @sw: Switch the port belongs to
+ * @remote: Remote port (%NULL if not connected)
+ * @xdomain: Remote host (%NULL if not connected)
+ * @cap_phy: Offset, zero if not found
+ * @port: Port number on switch
+ * @disabled: Disabled by eeprom
+ * @dual_link_port: If the switch is connected using two ports, points
+ * to the other port.
+ * @link_nr: Is this primary or secondary port on the dual_link.
*/
struct tb_port {
struct tb_regs_port_header config;
struct tb_switch *sw;
- struct tb_port *remote; /* remote port, NULL if not connected */
- int cap_phy; /* offset, zero if not found */
- u8 port; /* port number on switch */
- bool disabled; /* disabled by eeprom */
+ struct tb_port *remote;
+ struct tb_xdomain *xdomain;
+ int cap_phy;
+ u8 port;
+ bool disabled;
struct tb_port *dual_link_port;
u8 link_nr:1;
};
@@ -205,6 +201,8 @@ struct tb_path {
* @add_switch_key: Add key to switch
* @challenge_switch_key: Challenge switch using key
* @disconnect_pcie_paths: Disconnects PCIe paths before NVM update
+ * @approve_xdomain_paths: Approve (establish) XDomain DMA paths
+ * @disconnect_xdomain_paths: Disconnect XDomain DMA paths
*/
struct tb_cm_ops {
int (*driver_ready)(struct tb *tb);
@@ -221,33 +219,8 @@ struct tb_cm_ops {
int (*challenge_switch_key)(struct tb *tb, struct tb_switch *sw,
const u8 *challenge, u8 *response);
int (*disconnect_pcie_paths)(struct tb *tb);
-};
-
-/**
- * struct tb - main thunderbolt bus structure
- * @dev: Domain device
- * @lock: Big lock. Must be held when accessing any struct
- * tb_switch / struct tb_port.
- * @nhi: Pointer to the NHI structure
- * @ctl: Control channel for this domain
- * @wq: Ordered workqueue for all domain specific work
- * @root_switch: Root switch of this domain
- * @cm_ops: Connection manager specific operations vector
- * @index: Linux assigned domain number
- * @security_level: Current security level
- * @privdata: Private connection manager specific data
- */
-struct tb {
- struct device dev;
- struct mutex lock;
- struct tb_nhi *nhi;
- struct tb_ctl *ctl;
- struct workqueue_struct *wq;
- struct tb_switch *root_switch;
- const struct tb_cm_ops *cm_ops;
- int index;
- enum tb_security_level security_level;
- unsigned long privdata[0];
+ int (*approve_xdomain_paths)(struct tb *tb, struct tb_xdomain *xd);
+ int (*disconnect_xdomain_paths)(struct tb *tb, struct tb_xdomain *xd);
};
static inline void *tb_priv(struct tb *tb)
@@ -368,13 +341,14 @@ static inline int tb_port_write(struct tb_port *port, const void *buffer,
struct tb *icm_probe(struct tb_nhi *nhi);
struct tb *tb_probe(struct tb_nhi *nhi);
-extern struct bus_type tb_bus_type;
extern struct device_type tb_domain_type;
extern struct device_type tb_switch_type;
int tb_domain_init(void);
void tb_domain_exit(void);
void tb_switch_exit(void);
+int tb_xdomain_init(void);
+void tb_xdomain_exit(void);
struct tb *tb_domain_alloc(struct tb_nhi *nhi, size_t privsize);
int tb_domain_add(struct tb *tb);
@@ -387,6 +361,9 @@ int tb_domain_approve_switch(struct tb *tb, struct tb_switch *sw);
int tb_domain_approve_switch_key(struct tb *tb, struct tb_switch *sw);
int tb_domain_challenge_switch_key(struct tb *tb, struct tb_switch *sw);
int tb_domain_disconnect_pcie_paths(struct tb *tb);
+int tb_domain_approve_xdomain_paths(struct tb *tb, struct tb_xdomain *xd);
+int tb_domain_disconnect_xdomain_paths(struct tb *tb, struct tb_xdomain *xd);
+int tb_domain_disconnect_all_paths(struct tb *tb);
static inline void tb_domain_put(struct tb *tb)
{
@@ -409,11 +386,6 @@ struct tb_switch *tb_switch_find_by_link_depth(struct tb *tb, u8 link,
u8 depth);
struct tb_switch *tb_switch_find_by_uuid(struct tb *tb, const uuid_t *uuid);
-static inline unsigned int tb_switch_phy_port_from_link(unsigned int link)
-{
- return (link - 1) / TB_SWITCH_LINKS_PER_PHY_PORT;
-}
-
static inline void tb_switch_put(struct tb_switch *sw)
{
put_device(&sw->dev);
@@ -471,4 +443,14 @@ static inline u64 tb_downstream_route(struct tb_port *port)
| ((u64) port->port << (port->sw->config.depth * 8));
}
+bool tb_xdomain_handle_request(struct tb *tb, enum tb_cfg_pkg_type type,
+ const void *buf, size_t size);
+struct tb_xdomain *tb_xdomain_alloc(struct tb *tb, struct device *parent,
+ u64 route, const uuid_t *local_uuid,
+ const uuid_t *remote_uuid);
+void tb_xdomain_add(struct tb_xdomain *xd);
+void tb_xdomain_remove(struct tb_xdomain *xd);
+struct tb_xdomain *tb_xdomain_find_by_link_depth(struct tb *tb, u8 link,
+ u8 depth);
+
#endif
diff --git a/drivers/thunderbolt/tb_msgs.h b/drivers/thunderbolt/tb_msgs.h
index de6441e4a060..b0a092baa605 100644
--- a/drivers/thunderbolt/tb_msgs.h
+++ b/drivers/thunderbolt/tb_msgs.h
@@ -15,23 +15,6 @@
#include <linux/types.h>
#include <linux/uuid.h>
-enum tb_cfg_pkg_type {
- TB_CFG_PKG_READ = 1,
- TB_CFG_PKG_WRITE = 2,
- TB_CFG_PKG_ERROR = 3,
- TB_CFG_PKG_NOTIFY_ACK = 4,
- TB_CFG_PKG_EVENT = 5,
- TB_CFG_PKG_XDOMAIN_REQ = 6,
- TB_CFG_PKG_XDOMAIN_RESP = 7,
- TB_CFG_PKG_OVERRIDE = 8,
- TB_CFG_PKG_RESET = 9,
- TB_CFG_PKG_ICM_EVENT = 10,
- TB_CFG_PKG_ICM_CMD = 11,
- TB_CFG_PKG_ICM_RESP = 12,
- TB_CFG_PKG_PREPARE_TO_SLEEP = 0xd,
-
-};
-
enum tb_cfg_space {
TB_CFG_HOPS = 0,
TB_CFG_PORT = 1,
@@ -118,11 +101,14 @@ enum icm_pkg_code {
ICM_CHALLENGE_DEVICE = 0x5,
ICM_ADD_DEVICE_KEY = 0x6,
ICM_GET_ROUTE = 0xa,
+ ICM_APPROVE_XDOMAIN = 0x10,
};
enum icm_event_code {
ICM_EVENT_DEVICE_CONNECTED = 3,
ICM_EVENT_DEVICE_DISCONNECTED = 4,
+ ICM_EVENT_XDOMAIN_CONNECTED = 6,
+ ICM_EVENT_XDOMAIN_DISCONNECTED = 7,
};
struct icm_pkg_header {
@@ -130,7 +116,7 @@ struct icm_pkg_header {
u8 flags;
u8 packet_id;
u8 total_packets;
-} __packed;
+};
#define ICM_FLAGS_ERROR BIT(0)
#define ICM_FLAGS_NO_KEY BIT(1)
@@ -139,20 +125,20 @@ struct icm_pkg_header {
struct icm_pkg_driver_ready {
struct icm_pkg_header hdr;
-} __packed;
+};
struct icm_pkg_driver_ready_response {
struct icm_pkg_header hdr;
u8 romver;
u8 ramver;
u16 security_level;
-} __packed;
+};
/* Falcon Ridge & Alpine Ridge common messages */
struct icm_fr_pkg_get_topology {
struct icm_pkg_header hdr;
-} __packed;
+};
#define ICM_GET_TOPOLOGY_PACKETS 14
@@ -167,7 +153,7 @@ struct icm_fr_pkg_get_topology_response {
u32 reserved[2];
u32 ports[16];
u32 port_hop_info[16];
-} __packed;
+};
#define ICM_SWITCH_USED BIT(0)
#define ICM_SWITCH_UPSTREAM_PORT_MASK GENMASK(7, 1)
@@ -184,7 +170,7 @@ struct icm_fr_event_device_connected {
u8 connection_id;
u16 link_info;
u32 ep_name[55];
-} __packed;
+};
#define ICM_LINK_INFO_LINK_MASK 0x7
#define ICM_LINK_INFO_DEPTH_SHIFT 4
@@ -197,13 +183,32 @@ struct icm_fr_pkg_approve_device {
u8 connection_key;
u8 connection_id;
u16 reserved;
-} __packed;
+};
struct icm_fr_event_device_disconnected {
struct icm_pkg_header hdr;
u16 reserved;
u16 link_info;
-} __packed;
+};
+
+struct icm_fr_event_xdomain_connected {
+ struct icm_pkg_header hdr;
+ u16 reserved;
+ u16 link_info;
+ uuid_t remote_uuid;
+ uuid_t local_uuid;
+ u32 local_route_hi;
+ u32 local_route_lo;
+ u32 remote_route_hi;
+ u32 remote_route_lo;
+};
+
+struct icm_fr_event_xdomain_disconnected {
+ struct icm_pkg_header hdr;
+ u16 reserved;
+ u16 link_info;
+ uuid_t remote_uuid;
+};
struct icm_fr_pkg_add_device_key {
struct icm_pkg_header hdr;
@@ -212,7 +217,7 @@ struct icm_fr_pkg_add_device_key {
u8 connection_id;
u16 reserved;
u32 key[8];
-} __packed;
+};
struct icm_fr_pkg_add_device_key_response {
struct icm_pkg_header hdr;
@@ -220,7 +225,7 @@ struct icm_fr_pkg_add_device_key_response {
u8 connection_key;
u8 connection_id;
u16 reserved;
-} __packed;
+};
struct icm_fr_pkg_challenge_device {
struct icm_pkg_header hdr;
@@ -229,7 +234,7 @@ struct icm_fr_pkg_challenge_device {
u8 connection_id;
u16 reserved;
u32 challenge[8];
-} __packed;
+};
struct icm_fr_pkg_challenge_device_response {
struct icm_pkg_header hdr;
@@ -239,7 +244,29 @@ struct icm_fr_pkg_challenge_device_response {
u16 reserved;
u32 challenge[8];
u32 response[8];
-} __packed;
+};
+
+struct icm_fr_pkg_approve_xdomain {
+ struct icm_pkg_header hdr;
+ u16 reserved;
+ u16 link_info;
+ uuid_t remote_uuid;
+ u16 transmit_path;
+ u16 transmit_ring;
+ u16 receive_path;
+ u16 receive_ring;
+};
+
+struct icm_fr_pkg_approve_xdomain_response {
+ struct icm_pkg_header hdr;
+ u16 reserved;
+ u16 link_info;
+ uuid_t remote_uuid;
+ u16 transmit_path;
+ u16 transmit_ring;
+ u16 receive_path;
+ u16 receive_ring;
+};
/* Alpine Ridge only messages */
@@ -247,7 +274,7 @@ struct icm_ar_pkg_get_route {
struct icm_pkg_header hdr;
u16 reserved;
u16 link_info;
-} __packed;
+};
struct icm_ar_pkg_get_route_response {
struct icm_pkg_header hdr;
@@ -255,6 +282,85 @@ struct icm_ar_pkg_get_route_response {
u16 link_info;
u32 route_hi;
u32 route_lo;
-} __packed;
+};
+
+/* XDomain messages */
+
+struct tb_xdomain_header {
+ u32 route_hi;
+ u32 route_lo;
+ u32 length_sn;
+};
+
+#define TB_XDOMAIN_LENGTH_MASK GENMASK(5, 0)
+#define TB_XDOMAIN_SN_MASK GENMASK(28, 27)
+#define TB_XDOMAIN_SN_SHIFT 27
+
+enum tb_xdp_type {
+ UUID_REQUEST_OLD = 1,
+ UUID_RESPONSE = 2,
+ PROPERTIES_REQUEST,
+ PROPERTIES_RESPONSE,
+ PROPERTIES_CHANGED_REQUEST,
+ PROPERTIES_CHANGED_RESPONSE,
+ ERROR_RESPONSE,
+ UUID_REQUEST = 12,
+};
+
+struct tb_xdp_header {
+ struct tb_xdomain_header xd_hdr;
+ uuid_t uuid;
+ u32 type;
+};
+
+struct tb_xdp_properties {
+ struct tb_xdp_header hdr;
+ uuid_t src_uuid;
+ uuid_t dst_uuid;
+ u16 offset;
+ u16 reserved;
+};
+
+struct tb_xdp_properties_response {
+ struct tb_xdp_header hdr;
+ uuid_t src_uuid;
+ uuid_t dst_uuid;
+ u16 offset;
+ u16 data_length;
+ u32 generation;
+ u32 data[0];
+};
+
+/*
+ * Max length of data array single XDomain property response is allowed
+ * to carry.
+ */
+#define TB_XDP_PROPERTIES_MAX_DATA_LENGTH \
+ (((256 - 4 - sizeof(struct tb_xdp_properties_response))) / 4)
+
+/* Maximum size of the total property block in dwords we allow */
+#define TB_XDP_PROPERTIES_MAX_LENGTH 500
+
+struct tb_xdp_properties_changed {
+ struct tb_xdp_header hdr;
+ uuid_t src_uuid;
+};
+
+struct tb_xdp_properties_changed_response {
+ struct tb_xdp_header hdr;
+};
+
+enum tb_xdp_error {
+ ERROR_SUCCESS,
+ ERROR_UNKNOWN_PACKET,
+ ERROR_UNKNOWN_DOMAIN,
+ ERROR_NOT_SUPPORTED,
+ ERROR_NOT_READY,
+};
+
+struct tb_xdp_error_response {
+ struct tb_xdp_header hdr;
+ u32 error;
+};
#endif
diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c
new file mode 100644
index 000000000000..ff8d91189e99
--- /dev/null
+++ b/drivers/thunderbolt/xdomain.c
@@ -0,0 +1,1579 @@
+/*
+ * Thunderbolt XDomain discovery protocol support
+ *
+ * Copyright (C) 2017, Intel Corporation
+ * Authors: Michael Jamet <michael.jamet@intel.com>
+ * Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/kmod.h>
+#include <linux/module.h>
+#include <linux/utsname.h>
+#include <linux/uuid.h>
+#include <linux/workqueue.h>
+
+#include "tb.h"
+
+#define XDOMAIN_DEFAULT_TIMEOUT 5000 /* ms */
+#define XDOMAIN_PROPERTIES_RETRIES 60
+#define XDOMAIN_PROPERTIES_CHANGED_RETRIES 10
+
+struct xdomain_request_work {
+ struct work_struct work;
+ struct tb_xdp_header *pkg;
+ struct tb *tb;
+};
+
+/* Serializes access to the properties and protocol handlers below */
+static DEFINE_MUTEX(xdomain_lock);
+
+/* Properties exposed to the remote domains */
+static struct tb_property_dir *xdomain_property_dir;
+static u32 *xdomain_property_block;
+static u32 xdomain_property_block_len;
+static u32 xdomain_property_block_gen;
+
+/* Additional protocol handlers */
+static LIST_HEAD(protocol_handlers);
+
+/* UUID for XDomain discovery protocol: b638d70e-42ff-40bb-97c2-90e2c0b2ff07 */
+static const uuid_t tb_xdp_uuid =
+ UUID_INIT(0xb638d70e, 0x42ff, 0x40bb,
+ 0x97, 0xc2, 0x90, 0xe2, 0xc0, 0xb2, 0xff, 0x07);
+
+static bool tb_xdomain_match(const struct tb_cfg_request *req,
+ const struct ctl_pkg *pkg)
+{
+ switch (pkg->frame.eof) {
+ case TB_CFG_PKG_ERROR:
+ return true;
+
+ case TB_CFG_PKG_XDOMAIN_RESP: {
+ const struct tb_xdp_header *res_hdr = pkg->buffer;
+ const struct tb_xdp_header *req_hdr = req->request;
+ u32 req_seq, res_seq;
+
+ if (pkg->frame.size < req->response_size / 4)
+ return false;
+
+ /* Make sure route matches */
+ if ((res_hdr->xd_hdr.route_hi & ~BIT(31)) !=
+ req_hdr->xd_hdr.route_hi)
+ return false;
+ if ((res_hdr->xd_hdr.route_lo) != req_hdr->xd_hdr.route_lo)
+ return false;
+
+ /* Then check that the sequence number matches */
+ res_seq = res_hdr->xd_hdr.length_sn & TB_XDOMAIN_SN_MASK;
+ res_seq >>= TB_XDOMAIN_SN_SHIFT;
+ req_seq = req_hdr->xd_hdr.length_sn & TB_XDOMAIN_SN_MASK;
+ req_seq >>= TB_XDOMAIN_SN_SHIFT;
+ if (res_seq != req_seq)
+ return false;
+
+ /* Check that the XDomain protocol matches */
+ if (!uuid_equal(&res_hdr->uuid, &req_hdr->uuid))
+ return false;
+
+ return true;
+ }
+
+ default:
+ return false;
+ }
+}
+
+static bool tb_xdomain_copy(struct tb_cfg_request *req,
+ const struct ctl_pkg *pkg)
+{
+ memcpy(req->response, pkg->buffer, req->response_size);
+ req->result.err = 0;
+ return true;
+}
+
+static void response_ready(void *data)
+{
+ tb_cfg_request_put(data);
+}
+
+static int __tb_xdomain_response(struct tb_ctl *ctl, const void *response,
+ size_t size, enum tb_cfg_pkg_type type)
+{
+ struct tb_cfg_request *req;
+
+ req = tb_cfg_request_alloc();
+ if (!req)
+ return -ENOMEM;
+
+ req->match = tb_xdomain_match;
+ req->copy = tb_xdomain_copy;
+ req->request = response;
+ req->request_size = size;
+ req->request_type = type;
+
+ return tb_cfg_request(ctl, req, response_ready, req);
+}
+
+/**
+ * tb_xdomain_response() - Send a XDomain response message
+ * @xd: XDomain to send the message
+ * @response: Response to send
+ * @size: Size of the response
+ * @type: PDF type of the response
+ *
+ * This can be used to send a XDomain response message to the other
+ * domain. No response for the message is expected.
+ *
+ * Return: %0 in case of success and negative errno in case of failure
+ */
+int tb_xdomain_response(struct tb_xdomain *xd, const void *response,
+ size_t size, enum tb_cfg_pkg_type type)
+{
+ return __tb_xdomain_response(xd->tb->ctl, response, size, type);
+}
+EXPORT_SYMBOL_GPL(tb_xdomain_response);
+
+static int __tb_xdomain_request(struct tb_ctl *ctl, const void *request,
+ size_t request_size, enum tb_cfg_pkg_type request_type, void *response,
+ size_t response_size, enum tb_cfg_pkg_type response_type,
+ unsigned int timeout_msec)
+{
+ struct tb_cfg_request *req;
+ struct tb_cfg_result res;
+
+ req = tb_cfg_request_alloc();
+ if (!req)
+ return -ENOMEM;
+
+ req->match = tb_xdomain_match;
+ req->copy = tb_xdomain_copy;
+ req->request = request;
+ req->request_size = request_size;
+ req->request_type = request_type;
+ req->response = response;
+ req->response_size = response_size;
+ req->response_type = response_type;
+
+ res = tb_cfg_request_sync(ctl, req, timeout_msec);
+
+ tb_cfg_request_put(req);
+
+ return res.err == 1 ? -EIO : res.err;
+}
+
+/**
+ * tb_xdomain_request() - Send a XDomain request
+ * @xd: XDomain to send the request
+ * @request: Request to send
+ * @request_size: Size of the request in bytes
+ * @request_type: PDF type of the request
+ * @response: Response is copied here
+ * @response_size: Expected size of the response in bytes
+ * @response_type: Expected PDF type of the response
+ * @timeout_msec: Timeout in milliseconds to wait for the response
+ *
+ * This function can be used to send XDomain control channel messages to
+ * the other domain. The function waits until the response is received
+ * or when timeout triggers. Whichever comes first.
+ *
+ * Return: %0 in case of success and negative errno in case of failure
+ */
+int tb_xdomain_request(struct tb_xdomain *xd, const void *request,
+ size_t request_size, enum tb_cfg_pkg_type request_type,
+ void *response, size_t response_size,
+ enum tb_cfg_pkg_type response_type, unsigned int timeout_msec)
+{
+ return __tb_xdomain_request(xd->tb->ctl, request, request_size,
+ request_type, response, response_size,
+ response_type, timeout_msec);
+}
+EXPORT_SYMBOL_GPL(tb_xdomain_request);
+
+static inline void tb_xdp_fill_header(struct tb_xdp_header *hdr, u64 route,
+ u8 sequence, enum tb_xdp_type type, size_t size)
+{
+ u32 length_sn;
+
+ length_sn = (size - sizeof(hdr->xd_hdr)) / 4;
+ length_sn |= (sequence << TB_XDOMAIN_SN_SHIFT) & TB_XDOMAIN_SN_MASK;
+
+ hdr->xd_hdr.route_hi = upper_32_bits(route);
+ hdr->xd_hdr.route_lo = lower_32_bits(route);
+ hdr->xd_hdr.length_sn = length_sn;
+ hdr->type = type;
+ memcpy(&hdr->uuid, &tb_xdp_uuid, sizeof(tb_xdp_uuid));
+}
+
+static int tb_xdp_handle_error(const struct tb_xdp_header *hdr)
+{
+ const struct tb_xdp_error_response *error;
+
+ if (hdr->type != ERROR_RESPONSE)
+ return 0;
+
+ error = (const struct tb_xdp_error_response *)hdr;
+
+ switch (error->error) {
+ case ERROR_UNKNOWN_PACKET:
+ case ERROR_UNKNOWN_DOMAIN:
+ return -EIO;
+ case ERROR_NOT_SUPPORTED:
+ return -ENOTSUPP;
+ case ERROR_NOT_READY:
+ return -EAGAIN;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int tb_xdp_error_response(struct tb_ctl *ctl, u64 route, u8 sequence,
+ enum tb_xdp_error error)
+{
+ struct tb_xdp_error_response res;
+
+ memset(&res, 0, sizeof(res));
+ tb_xdp_fill_header(&res.hdr, route, sequence, ERROR_RESPONSE,
+ sizeof(res));
+ res.error = error;
+
+ return __tb_xdomain_response(ctl, &res, sizeof(res),
+ TB_CFG_PKG_XDOMAIN_RESP);
+}
+
+static int tb_xdp_properties_request(struct tb_ctl *ctl, u64 route,
+ const uuid_t *src_uuid, const uuid_t *dst_uuid, int retry,
+ u32 **block, u32 *generation)
+{
+ struct tb_xdp_properties_response *res;
+ struct tb_xdp_properties req;
+ u16 data_len, len;
+ size_t total_size;
+ u32 *data = NULL;
+ int ret;
+
+ total_size = sizeof(*res) + TB_XDP_PROPERTIES_MAX_DATA_LENGTH * 4;
+ res = kzalloc(total_size, GFP_KERNEL);
+ if (!res)
+ return -ENOMEM;
+
+ memset(&req, 0, sizeof(req));
+ tb_xdp_fill_header(&req.hdr, route, retry % 4, PROPERTIES_REQUEST,
+ sizeof(req));
+ memcpy(&req.src_uuid, src_uuid, sizeof(*src_uuid));
+ memcpy(&req.dst_uuid, dst_uuid, sizeof(*dst_uuid));
+
+ len = 0;
+ data_len = 0;
+
+ do {
+ ret = __tb_xdomain_request(ctl, &req, sizeof(req),
+ TB_CFG_PKG_XDOMAIN_REQ, res,
+ total_size, TB_CFG_PKG_XDOMAIN_RESP,
+ XDOMAIN_DEFAULT_TIMEOUT);
+ if (ret)
+ goto err;
+
+ ret = tb_xdp_handle_error(&res->hdr);
+ if (ret)
+ goto err;
+
+ /*
+ * Package length includes the whole payload without the
+ * XDomain header. Validate first that the package is at
+ * least size of the response structure.
+ */
+ len = res->hdr.xd_hdr.length_sn & TB_XDOMAIN_LENGTH_MASK;
+ if (len < sizeof(*res) / 4) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ len += sizeof(res->hdr.xd_hdr) / 4;
+ len -= sizeof(*res) / 4;
+
+ if (res->offset != req.offset) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ /*
+ * First time allocate block that has enough space for
+ * the whole properties block.
+ */
+ if (!data) {
+ data_len = res->data_length;
+ if (data_len > TB_XDP_PROPERTIES_MAX_LENGTH) {
+ ret = -E2BIG;
+ goto err;
+ }
+
+ data = kcalloc(data_len, sizeof(u32), GFP_KERNEL);
+ if (!data) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ }
+
+ memcpy(data + req.offset, res->data, len * 4);
+ req.offset += len;
+ } while (!data_len || req.offset < data_len);
+
+ *block = data;
+ *generation = res->generation;
+
+ kfree(res);
+
+ return data_len;
+
+err:
+ kfree(data);
+ kfree(res);
+
+ return ret;
+}
+
+static int tb_xdp_properties_response(struct tb *tb, struct tb_ctl *ctl,
+ u64 route, u8 sequence, const uuid_t *src_uuid,
+ const struct tb_xdp_properties *req)
+{
+ struct tb_xdp_properties_response *res;
+ size_t total_size;
+ u16 len;
+ int ret;
+
+ /*
+ * Currently we expect all requests to be directed to us. The
+ * protocol supports forwarding, though which we might add
+ * support later on.
+ */
+ if (!uuid_equal(src_uuid, &req->dst_uuid)) {
+ tb_xdp_error_response(ctl, route, sequence,
+ ERROR_UNKNOWN_DOMAIN);
+ return 0;
+ }
+
+ mutex_lock(&xdomain_lock);
+
+ if (req->offset >= xdomain_property_block_len) {
+ mutex_unlock(&xdomain_lock);
+ return -EINVAL;
+ }
+
+ len = xdomain_property_block_len - req->offset;
+ len = min_t(u16, len, TB_XDP_PROPERTIES_MAX_DATA_LENGTH);
+ total_size = sizeof(*res) + len * 4;
+
+ res = kzalloc(total_size, GFP_KERNEL);
+ if (!res) {
+ mutex_unlock(&xdomain_lock);
+ return -ENOMEM;
+ }
+
+ tb_xdp_fill_header(&res->hdr, route, sequence, PROPERTIES_RESPONSE,
+ total_size);
+ res->generation = xdomain_property_block_gen;
+ res->data_length = xdomain_property_block_len;
+ res->offset = req->offset;
+ uuid_copy(&res->src_uuid, src_uuid);
+ uuid_copy(&res->dst_uuid, &req->src_uuid);
+ memcpy(res->data, &xdomain_property_block[req->offset], len * 4);
+
+ mutex_unlock(&xdomain_lock);
+
+ ret = __tb_xdomain_response(ctl, res, total_size,
+ TB_CFG_PKG_XDOMAIN_RESP);
+
+ kfree(res);
+ return ret;
+}
+
+static int tb_xdp_properties_changed_request(struct tb_ctl *ctl, u64 route,
+ int retry, const uuid_t *uuid)
+{
+ struct tb_xdp_properties_changed_response res;
+ struct tb_xdp_properties_changed req;
+ int ret;
+
+ memset(&req, 0, sizeof(req));
+ tb_xdp_fill_header(&req.hdr, route, retry % 4,
+ PROPERTIES_CHANGED_REQUEST, sizeof(req));
+ uuid_copy(&req.src_uuid, uuid);
+
+ memset(&res, 0, sizeof(res));
+ ret = __tb_xdomain_request(ctl, &req, sizeof(req),
+ TB_CFG_PKG_XDOMAIN_REQ, &res, sizeof(res),
+ TB_CFG_PKG_XDOMAIN_RESP,
+ XDOMAIN_DEFAULT_TIMEOUT);
+ if (ret)
+ return ret;
+
+ return tb_xdp_handle_error(&res.hdr);
+}
+
+static int
+tb_xdp_properties_changed_response(struct tb_ctl *ctl, u64 route, u8 sequence)
+{
+ struct tb_xdp_properties_changed_response res;
+
+ memset(&res, 0, sizeof(res));
+ tb_xdp_fill_header(&res.hdr, route, sequence,
+ PROPERTIES_CHANGED_RESPONSE, sizeof(res));
+ return __tb_xdomain_response(ctl, &res, sizeof(res),
+ TB_CFG_PKG_XDOMAIN_RESP);
+}
+
+/**
+ * tb_register_protocol_handler() - Register protocol handler
+ * @handler: Handler to register
+ *
+ * This allows XDomain service drivers to hook into incoming XDomain
+ * messages. After this function is called the service driver needs to
+ * be able to handle calls to callback whenever a package with the
+ * registered protocol is received.
+ */
+int tb_register_protocol_handler(struct tb_protocol_handler *handler)
+{
+ if (!handler->uuid || !handler->callback)
+ return -EINVAL;
+ if (uuid_equal(handler->uuid, &tb_xdp_uuid))
+ return -EINVAL;
+
+ mutex_lock(&xdomain_lock);
+ list_add_tail(&handler->list, &protocol_handlers);
+ mutex_unlock(&xdomain_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tb_register_protocol_handler);
+
+/**
+ * tb_unregister_protocol_handler() - Unregister protocol handler
+ * @handler: Handler to unregister
+ *
+ * Removes the previously registered protocol handler.
+ */
+void tb_unregister_protocol_handler(struct tb_protocol_handler *handler)
+{
+ mutex_lock(&xdomain_lock);
+ list_del_init(&handler->list);
+ mutex_unlock(&xdomain_lock);
+}
+EXPORT_SYMBOL_GPL(tb_unregister_protocol_handler);
+
+static void tb_xdp_handle_request(struct work_struct *work)
+{
+ struct xdomain_request_work *xw = container_of(work, typeof(*xw), work);
+ const struct tb_xdp_header *pkg = xw->pkg;
+ const struct tb_xdomain_header *xhdr = &pkg->xd_hdr;
+ struct tb *tb = xw->tb;
+ struct tb_ctl *ctl = tb->ctl;
+ const uuid_t *uuid;
+ int ret = 0;
+ u32 sequence;
+ u64 route;
+
+ route = ((u64)xhdr->route_hi << 32 | xhdr->route_lo) & ~BIT_ULL(63);
+ sequence = xhdr->length_sn & TB_XDOMAIN_SN_MASK;
+ sequence >>= TB_XDOMAIN_SN_SHIFT;
+
+ mutex_lock(&tb->lock);
+ if (tb->root_switch)
+ uuid = tb->root_switch->uuid;
+ else
+ uuid = NULL;
+ mutex_unlock(&tb->lock);
+
+ if (!uuid) {
+ tb_xdp_error_response(ctl, route, sequence, ERROR_NOT_READY);
+ goto out;
+ }
+
+ switch (pkg->type) {
+ case PROPERTIES_REQUEST:
+ ret = tb_xdp_properties_response(tb, ctl, route, sequence, uuid,
+ (const struct tb_xdp_properties *)pkg);
+ break;
+
+ case PROPERTIES_CHANGED_REQUEST: {
+ const struct tb_xdp_properties_changed *xchg =
+ (const struct tb_xdp_properties_changed *)pkg;
+ struct tb_xdomain *xd;
+
+ ret = tb_xdp_properties_changed_response(ctl, route, sequence);
+
+ /*
+ * Since the properties have been changed, let's update
+ * the xdomain related to this connection as well in
+ * case there is a change in services it offers.
+ */
+ xd = tb_xdomain_find_by_uuid_locked(tb, &xchg->src_uuid);
+ if (xd) {
+ queue_delayed_work(tb->wq, &xd->get_properties_work,
+ msecs_to_jiffies(50));
+ tb_xdomain_put(xd);
+ }
+
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ if (ret) {
+ tb_warn(tb, "failed to send XDomain response for %#x\n",
+ pkg->type);
+ }
+
+out:
+ kfree(xw->pkg);
+ kfree(xw);
+}
+
+static void
+tb_xdp_schedule_request(struct tb *tb, const struct tb_xdp_header *hdr,
+ size_t size)
+{
+ struct xdomain_request_work *xw;
+
+ xw = kmalloc(sizeof(*xw), GFP_KERNEL);
+ if (!xw)
+ return;
+
+ INIT_WORK(&xw->work, tb_xdp_handle_request);
+ xw->pkg = kmemdup(hdr, size, GFP_KERNEL);
+ xw->tb = tb;
+
+ queue_work(tb->wq, &xw->work);
+}
+
+/**
+ * tb_register_service_driver() - Register XDomain service driver
+ * @drv: Driver to register
+ *
+ * Registers new service driver from @drv to the bus.
+ */
+int tb_register_service_driver(struct tb_service_driver *drv)
+{
+ drv->driver.bus = &tb_bus_type;
+ return driver_register(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(tb_register_service_driver);
+
+/**
+ * tb_unregister_service_driver() - Unregister XDomain service driver
+ * @xdrv: Driver to unregister
+ *
+ * Unregisters XDomain service driver from the bus.
+ */
+void tb_unregister_service_driver(struct tb_service_driver *drv)
+{
+ driver_unregister(&drv->driver);
+}
+EXPORT_SYMBOL_GPL(tb_unregister_service_driver);
+
+static ssize_t key_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct tb_service *svc = container_of(dev, struct tb_service, dev);
+
+ /*
+ * It should be null terminated but anything else is pretty much
+ * allowed.
+ */
+ return sprintf(buf, "%*pEp\n", (int)strlen(svc->key), svc->key);
+}
+static DEVICE_ATTR_RO(key);
+
+static int get_modalias(struct tb_service *svc, char *buf, size_t size)
+{
+ return snprintf(buf, size, "tbsvc:k%sp%08Xv%08Xr%08X", svc->key,
+ svc->prtcid, svc->prtcvers, svc->prtcrevs);
+}
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct tb_service *svc = container_of(dev, struct tb_service, dev);
+
+ /* Full buffer size except new line and null termination */
+ get_modalias(svc, buf, PAGE_SIZE - 2);
+ return sprintf(buf, "%s\n", buf);
+}
+static DEVICE_ATTR_RO(modalias);
+
+static ssize_t prtcid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct tb_service *svc = container_of(dev, struct tb_service, dev);
+
+ return sprintf(buf, "%u\n", svc->prtcid);
+}
+static DEVICE_ATTR_RO(prtcid);
+
+static ssize_t prtcvers_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct tb_service *svc = container_of(dev, struct tb_service, dev);
+
+ return sprintf(buf, "%u\n", svc->prtcvers);
+}
+static DEVICE_ATTR_RO(prtcvers);
+
+static ssize_t prtcrevs_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct tb_service *svc = container_of(dev, struct tb_service, dev);
+
+ return sprintf(buf, "%u\n", svc->prtcrevs);
+}
+static DEVICE_ATTR_RO(prtcrevs);
+
+static ssize_t prtcstns_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct tb_service *svc = container_of(dev, struct tb_service, dev);
+
+ return sprintf(buf, "0x%08x\n", svc->prtcstns);
+}
+static DEVICE_ATTR_RO(prtcstns);
+
+static struct attribute *tb_service_attrs[] = {
+ &dev_attr_key.attr,
+ &dev_attr_modalias.attr,
+ &dev_attr_prtcid.attr,
+ &dev_attr_prtcvers.attr,
+ &dev_attr_prtcrevs.attr,
+ &dev_attr_prtcstns.attr,
+ NULL,
+};
+
+static struct attribute_group tb_service_attr_group = {
+ .attrs = tb_service_attrs,
+};
+
+static const struct attribute_group *tb_service_attr_groups[] = {
+ &tb_service_attr_group,
+ NULL,
+};
+
+static int tb_service_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ struct tb_service *svc = container_of(dev, struct tb_service, dev);
+ char modalias[64];
+
+ get_modalias(svc, modalias, sizeof(modalias));
+ return add_uevent_var(env, "MODALIAS=%s", modalias);
+}
+
+static void tb_service_release(struct device *dev)
+{
+ struct tb_service *svc = container_of(dev, struct tb_service, dev);
+ struct tb_xdomain *xd = tb_service_parent(svc);
+
+ ida_simple_remove(&xd->service_ids, svc->id);
+ kfree(svc->key);
+ kfree(svc);
+}
+
+struct device_type tb_service_type = {
+ .name = "thunderbolt_service",
+ .groups = tb_service_attr_groups,
+ .uevent = tb_service_uevent,
+ .release = tb_service_release,
+};
+EXPORT_SYMBOL_GPL(tb_service_type);
+
+static int remove_missing_service(struct device *dev, void *data)
+{
+ struct tb_xdomain *xd = data;
+ struct tb_service *svc;
+
+ svc = tb_to_service(dev);
+ if (!svc)
+ return 0;
+
+ if (!tb_property_find(xd->properties, svc->key,
+ TB_PROPERTY_TYPE_DIRECTORY))
+ device_unregister(dev);
+
+ return 0;
+}
+
+static int find_service(struct device *dev, void *data)
+{
+ const struct tb_property *p = data;
+ struct tb_service *svc;
+
+ svc = tb_to_service(dev);
+ if (!svc)
+ return 0;
+
+ return !strcmp(svc->key, p->key);
+}
+
+static int populate_service(struct tb_service *svc,
+ struct tb_property *property)
+{
+ struct tb_property_dir *dir = property->value.dir;
+ struct tb_property *p;
+
+ /* Fill in standard properties */
+ p = tb_property_find(dir, "prtcid", TB_PROPERTY_TYPE_VALUE);
+ if (p)
+ svc->prtcid = p->value.immediate;
+ p = tb_property_find(dir, "prtcvers", TB_PROPERTY_TYPE_VALUE);
+ if (p)
+ svc->prtcvers = p->value.immediate;
+ p = tb_property_find(dir, "prtcrevs", TB_PROPERTY_TYPE_VALUE);
+ if (p)
+ svc->prtcrevs = p->value.immediate;
+ p = tb_property_find(dir, "prtcstns", TB_PROPERTY_TYPE_VALUE);
+ if (p)
+ svc->prtcstns = p->value.immediate;
+
+ svc->key = kstrdup(property->key, GFP_KERNEL);
+ if (!svc->key)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void enumerate_services(struct tb_xdomain *xd)
+{
+ struct tb_service *svc;
+ struct tb_property *p;
+ struct device *dev;
+
+ /*
+ * First remove all services that are not available anymore in
+ * the updated property block.
+ */
+ device_for_each_child_reverse(&xd->dev, xd, remove_missing_service);
+
+ /* Then re-enumerate properties creating new services as we go */
+ tb_property_for_each(xd->properties, p) {
+ if (p->type != TB_PROPERTY_TYPE_DIRECTORY)
+ continue;
+
+ /* If the service exists already we are fine */
+ dev = device_find_child(&xd->dev, p, find_service);
+ if (dev) {
+ put_device(dev);
+ continue;
+ }
+
+ svc = kzalloc(sizeof(*svc), GFP_KERNEL);
+ if (!svc)
+ break;
+
+ if (populate_service(svc, p)) {
+ kfree(svc);
+ break;
+ }
+
+ svc->id = ida_simple_get(&xd->service_ids, 0, 0, GFP_KERNEL);
+ svc->dev.bus = &tb_bus_type;
+ svc->dev.type = &tb_service_type;
+ svc->dev.parent = &xd->dev;
+ dev_set_name(&svc->dev, "%s.%d", dev_name(&xd->dev), svc->id);
+
+ if (device_register(&svc->dev)) {
+ put_device(&svc->dev);
+ break;
+ }
+ }
+}
+
+static int populate_properties(struct tb_xdomain *xd,
+ struct tb_property_dir *dir)
+{
+ const struct tb_property *p;
+
+ /* Required properties */
+ p = tb_property_find(dir, "deviceid", TB_PROPERTY_TYPE_VALUE);
+ if (!p)
+ return -EINVAL;
+ xd->device = p->value.immediate;
+
+ p = tb_property_find(dir, "vendorid", TB_PROPERTY_TYPE_VALUE);
+ if (!p)
+ return -EINVAL;
+ xd->vendor = p->value.immediate;
+
+ kfree(xd->device_name);
+ xd->device_name = NULL;
+ kfree(xd->vendor_name);
+ xd->vendor_name = NULL;
+
+ /* Optional properties */
+ p = tb_property_find(dir, "deviceid", TB_PROPERTY_TYPE_TEXT);
+ if (p)
+ xd->device_name = kstrdup(p->value.text, GFP_KERNEL);
+ p = tb_property_find(dir, "vendorid", TB_PROPERTY_TYPE_TEXT);
+ if (p)
+ xd->vendor_name = kstrdup(p->value.text, GFP_KERNEL);
+
+ return 0;
+}
+
+/* Called with @xd->lock held */
+static void tb_xdomain_restore_paths(struct tb_xdomain *xd)
+{
+ if (!xd->resume)
+ return;
+
+ xd->resume = false;
+ if (xd->transmit_path) {
+ dev_dbg(&xd->dev, "re-establishing DMA path\n");
+ tb_domain_approve_xdomain_paths(xd->tb, xd);
+ }
+}
+
+static void tb_xdomain_get_properties(struct work_struct *work)
+{
+ struct tb_xdomain *xd = container_of(work, typeof(*xd),
+ get_properties_work.work);
+ struct tb_property_dir *dir;
+ struct tb *tb = xd->tb;
+ bool update = false;
+ u32 *block = NULL;
+ u32 gen = 0;
+ int ret;
+
+ ret = tb_xdp_properties_request(tb->ctl, xd->route, xd->local_uuid,
+ xd->remote_uuid, xd->properties_retries,
+ &block, &gen);
+ if (ret < 0) {
+ if (xd->properties_retries-- > 0) {
+ queue_delayed_work(xd->tb->wq, &xd->get_properties_work,
+ msecs_to_jiffies(1000));
+ } else {
+ /* Give up now */
+ dev_err(&xd->dev,
+ "failed read XDomain properties from %pUb\n",
+ xd->remote_uuid);
+ }
+ return;
+ }
+
+ xd->properties_retries = XDOMAIN_PROPERTIES_RETRIES;
+
+ mutex_lock(&xd->lock);
+
+ /* Only accept newer generation properties */
+ if (xd->properties && gen <= xd->property_block_gen) {
+ /*
+ * On resume it is likely that the properties block is
+ * not changed (unless the other end added or removed
+ * services). However, we need to make sure the existing
+ * DMA paths are restored properly.
+ */
+ tb_xdomain_restore_paths(xd);
+ goto err_free_block;
+ }
+
+ dir = tb_property_parse_dir(block, ret);
+ if (!dir) {
+ dev_err(&xd->dev, "failed to parse XDomain properties\n");
+ goto err_free_block;
+ }
+
+ ret = populate_properties(xd, dir);
+ if (ret) {
+ dev_err(&xd->dev, "missing XDomain properties in response\n");
+ goto err_free_dir;
+ }
+
+ /* Release the existing one */
+ if (xd->properties) {
+ tb_property_free_dir(xd->properties);
+ update = true;
+ }
+
+ xd->properties = dir;
+ xd->property_block_gen = gen;
+
+ tb_xdomain_restore_paths(xd);
+
+ mutex_unlock(&xd->lock);
+
+ kfree(block);
+
+ /*
+ * Now the device should be ready enough so we can add it to the
+ * bus and let userspace know about it. If the device is already
+ * registered, we notify the userspace that it has changed.
+ */
+ if (!update) {
+ if (device_add(&xd->dev)) {
+ dev_err(&xd->dev, "failed to add XDomain device\n");
+ return;
+ }
+ } else {
+ kobject_uevent(&xd->dev.kobj, KOBJ_CHANGE);
+ }
+
+ enumerate_services(xd);
+ return;
+
+err_free_dir:
+ tb_property_free_dir(dir);
+err_free_block:
+ kfree(block);
+ mutex_unlock(&xd->lock);
+}
+
+static void tb_xdomain_properties_changed(struct work_struct *work)
+{
+ struct tb_xdomain *xd = container_of(work, typeof(*xd),
+ properties_changed_work.work);
+ int ret;
+
+ ret = tb_xdp_properties_changed_request(xd->tb->ctl, xd->route,
+ xd->properties_changed_retries, xd->local_uuid);
+ if (ret) {
+ if (xd->properties_changed_retries-- > 0)
+ queue_delayed_work(xd->tb->wq,
+ &xd->properties_changed_work,
+ msecs_to_jiffies(1000));
+ return;
+ }
+
+ xd->properties_changed_retries = XDOMAIN_PROPERTIES_CHANGED_RETRIES;
+}
+
+static ssize_t device_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev);
+
+ return sprintf(buf, "%#x\n", xd->device);
+}
+static DEVICE_ATTR_RO(device);
+
+static ssize_t
+device_name_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev);
+ int ret;
+
+ if (mutex_lock_interruptible(&xd->lock))
+ return -ERESTARTSYS;
+ ret = sprintf(buf, "%s\n", xd->device_name ? xd->device_name : "");
+ mutex_unlock(&xd->lock);
+
+ return ret;
+}
+static DEVICE_ATTR_RO(device_name);
+
+static ssize_t vendor_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev);
+
+ return sprintf(buf, "%#x\n", xd->vendor);
+}
+static DEVICE_ATTR_RO(vendor);
+
+static ssize_t
+vendor_name_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev);
+ int ret;
+
+ if (mutex_lock_interruptible(&xd->lock))
+ return -ERESTARTSYS;
+ ret = sprintf(buf, "%s\n", xd->vendor_name ? xd->vendor_name : "");
+ mutex_unlock(&xd->lock);
+
+ return ret;
+}
+static DEVICE_ATTR_RO(vendor_name);
+
+static ssize_t unique_id_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev);
+
+ return sprintf(buf, "%pUb\n", xd->remote_uuid);
+}
+static DEVICE_ATTR_RO(unique_id);
+
+static struct attribute *xdomain_attrs[] = {
+ &dev_attr_device.attr,
+ &dev_attr_device_name.attr,
+ &dev_attr_unique_id.attr,
+ &dev_attr_vendor.attr,
+ &dev_attr_vendor_name.attr,
+ NULL,
+};
+
+static struct attribute_group xdomain_attr_group = {
+ .attrs = xdomain_attrs,
+};
+
+static const struct attribute_group *xdomain_attr_groups[] = {
+ &xdomain_attr_group,
+ NULL,
+};
+
+static void tb_xdomain_release(struct device *dev)
+{
+ struct tb_xdomain *xd = container_of(dev, struct tb_xdomain, dev);
+
+ put_device(xd->dev.parent);
+
+ tb_property_free_dir(xd->properties);
+ ida_destroy(&xd->service_ids);
+
+ kfree(xd->local_uuid);
+ kfree(xd->remote_uuid);
+ kfree(xd->device_name);
+ kfree(xd->vendor_name);
+ kfree(xd);
+}
+
+static void start_handshake(struct tb_xdomain *xd)
+{
+ xd->properties_retries = XDOMAIN_PROPERTIES_RETRIES;
+ xd->properties_changed_retries = XDOMAIN_PROPERTIES_CHANGED_RETRIES;
+
+ /* Start exchanging properties with the other host */
+ queue_delayed_work(xd->tb->wq, &xd->properties_changed_work,
+ msecs_to_jiffies(100));
+ queue_delayed_work(xd->tb->wq, &xd->get_properties_work,
+ msecs_to_jiffies(1000));
+}
+
+static void stop_handshake(struct tb_xdomain *xd)
+{
+ xd->properties_retries = 0;
+ xd->properties_changed_retries = 0;
+
+ cancel_delayed_work_sync(&xd->get_properties_work);
+ cancel_delayed_work_sync(&xd->properties_changed_work);
+}
+
+static int __maybe_unused tb_xdomain_suspend(struct device *dev)
+{
+ stop_handshake(tb_to_xdomain(dev));
+ return 0;
+}
+
+static int __maybe_unused tb_xdomain_resume(struct device *dev)
+{
+ struct tb_xdomain *xd = tb_to_xdomain(dev);
+
+ /*
+ * Ask tb_xdomain_get_properties() restore any existing DMA
+ * paths after properties are re-read.
+ */
+ xd->resume = true;
+ start_handshake(xd);
+
+ return 0;
+}
+
+static const struct dev_pm_ops tb_xdomain_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(tb_xdomain_suspend, tb_xdomain_resume)
+};
+
+struct device_type tb_xdomain_type = {
+ .name = "thunderbolt_xdomain",
+ .release = tb_xdomain_release,
+ .pm = &tb_xdomain_pm_ops,
+};
+EXPORT_SYMBOL_GPL(tb_xdomain_type);
+
+/**
+ * tb_xdomain_alloc() - Allocate new XDomain object
+ * @tb: Domain where the XDomain belongs
+ * @parent: Parent device (the switch through the connection to the
+ * other domain is reached).
+ * @route: Route string used to reach the other domain
+ * @local_uuid: Our local domain UUID
+ * @remote_uuid: UUID of the other domain
+ *
+ * Allocates new XDomain structure and returns pointer to that. The
+ * object must be released by calling tb_xdomain_put().
+ */
+struct tb_xdomain *tb_xdomain_alloc(struct tb *tb, struct device *parent,
+ u64 route, const uuid_t *local_uuid,
+ const uuid_t *remote_uuid)
+{
+ struct tb_xdomain *xd;
+
+ xd = kzalloc(sizeof(*xd), GFP_KERNEL);
+ if (!xd)
+ return NULL;
+
+ xd->tb = tb;
+ xd->route = route;
+ ida_init(&xd->service_ids);
+ mutex_init(&xd->lock);
+ INIT_DELAYED_WORK(&xd->get_properties_work, tb_xdomain_get_properties);
+ INIT_DELAYED_WORK(&xd->properties_changed_work,
+ tb_xdomain_properties_changed);
+
+ xd->local_uuid = kmemdup(local_uuid, sizeof(uuid_t), GFP_KERNEL);
+ if (!xd->local_uuid)
+ goto err_free;
+
+ xd->remote_uuid = kmemdup(remote_uuid, sizeof(uuid_t), GFP_KERNEL);
+ if (!xd->remote_uuid)
+ goto err_free_local_uuid;
+
+ device_initialize(&xd->dev);
+ xd->dev.parent = get_device(parent);
+ xd->dev.bus = &tb_bus_type;
+ xd->dev.type = &tb_xdomain_type;
+ xd->dev.groups = xdomain_attr_groups;
+ dev_set_name(&xd->dev, "%u-%llx", tb->index, route);
+
+ return xd;
+
+err_free_local_uuid:
+ kfree(xd->local_uuid);
+err_free:
+ kfree(xd);
+
+ return NULL;
+}
+
+/**
+ * tb_xdomain_add() - Add XDomain to the bus
+ * @xd: XDomain to add
+ *
+ * This function starts XDomain discovery protocol handshake and
+ * eventually adds the XDomain to the bus. After calling this function
+ * the caller needs to call tb_xdomain_remove() in order to remove and
+ * release the object regardless whether the handshake succeeded or not.
+ */
+void tb_xdomain_add(struct tb_xdomain *xd)
+{
+ /* Start exchanging properties with the other host */
+ start_handshake(xd);
+}
+
+static int unregister_service(struct device *dev, void *data)
+{
+ device_unregister(dev);
+ return 0;
+}
+
+/**
+ * tb_xdomain_remove() - Remove XDomain from the bus
+ * @xd: XDomain to remove
+ *
+ * This will stop all ongoing configuration work and remove the XDomain
+ * along with any services from the bus. When the last reference to @xd
+ * is released the object will be released as well.
+ */
+void tb_xdomain_remove(struct tb_xdomain *xd)
+{
+ stop_handshake(xd);
+
+ device_for_each_child_reverse(&xd->dev, xd, unregister_service);
+
+ if (!device_is_registered(&xd->dev))
+ put_device(&xd->dev);
+ else
+ device_unregister(&xd->dev);
+}
+
+/**
+ * tb_xdomain_enable_paths() - Enable DMA paths for XDomain connection
+ * @xd: XDomain connection
+ * @transmit_path: HopID of the transmit path the other end is using to
+ * send packets
+ * @transmit_ring: DMA ring used to receive packets from the other end
+ * @receive_path: HopID of the receive path the other end is using to
+ * receive packets
+ * @receive_ring: DMA ring used to send packets to the other end
+ *
+ * The function enables DMA paths accordingly so that after successful
+ * return the caller can send and receive packets using high-speed DMA
+ * path.
+ *
+ * Return: %0 in case of success and negative errno in case of error
+ */
+int tb_xdomain_enable_paths(struct tb_xdomain *xd, u16 transmit_path,
+ u16 transmit_ring, u16 receive_path,
+ u16 receive_ring)
+{
+ int ret;
+
+ mutex_lock(&xd->lock);
+
+ if (xd->transmit_path) {
+ ret = xd->transmit_path == transmit_path ? 0 : -EBUSY;
+ goto exit_unlock;
+ }
+
+ xd->transmit_path = transmit_path;
+ xd->transmit_ring = transmit_ring;
+ xd->receive_path = receive_path;
+ xd->receive_ring = receive_ring;
+
+ ret = tb_domain_approve_xdomain_paths(xd->tb, xd);
+
+exit_unlock:
+ mutex_unlock(&xd->lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tb_xdomain_enable_paths);
+
+/**
+ * tb_xdomain_disable_paths() - Disable DMA paths for XDomain connection
+ * @xd: XDomain connection
+ *
+ * This does the opposite of tb_xdomain_enable_paths(). After call to
+ * this the caller is not expected to use the rings anymore.
+ *
+ * Return: %0 in case of success and negative errno in case of error
+ */
+int tb_xdomain_disable_paths(struct tb_xdomain *xd)
+{
+ int ret = 0;
+
+ mutex_lock(&xd->lock);
+ if (xd->transmit_path) {
+ xd->transmit_path = 0;
+ xd->transmit_ring = 0;
+ xd->receive_path = 0;
+ xd->receive_ring = 0;
+
+ ret = tb_domain_disconnect_xdomain_paths(xd->tb, xd);
+ }
+ mutex_unlock(&xd->lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tb_xdomain_disable_paths);
+
+struct tb_xdomain_lookup {
+ const uuid_t *uuid;
+ u8 link;
+ u8 depth;
+};
+
+static struct tb_xdomain *switch_find_xdomain(struct tb_switch *sw,
+ const struct tb_xdomain_lookup *lookup)
+{
+ int i;
+
+ for (i = 1; i <= sw->config.max_port_number; i++) {
+ struct tb_port *port = &sw->ports[i];
+ struct tb_xdomain *xd;
+
+ if (tb_is_upstream_port(port))
+ continue;
+
+ if (port->xdomain) {
+ xd = port->xdomain;
+
+ if (lookup->uuid) {
+ if (uuid_equal(xd->remote_uuid, lookup->uuid))
+ return xd;
+ } else if (lookup->link == xd->link &&
+ lookup->depth == xd->depth) {
+ return xd;
+ }
+ } else if (port->remote) {
+ xd = switch_find_xdomain(port->remote->sw, lookup);
+ if (xd)
+ return xd;
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * tb_xdomain_find_by_uuid() - Find an XDomain by UUID
+ * @tb: Domain where the XDomain belongs to
+ * @uuid: UUID to look for
+ *
+ * Finds XDomain by walking through the Thunderbolt topology below @tb.
+ * The returned XDomain will have its reference count increased so the
+ * caller needs to call tb_xdomain_put() when it is done with the
+ * object.
+ *
+ * This will find all XDomains including the ones that are not yet added
+ * to the bus (handshake is still in progress).
+ *
+ * The caller needs to hold @tb->lock.
+ */
+struct tb_xdomain *tb_xdomain_find_by_uuid(struct tb *tb, const uuid_t *uuid)
+{
+ struct tb_xdomain_lookup lookup;
+ struct tb_xdomain *xd;
+
+ memset(&lookup, 0, sizeof(lookup));
+ lookup.uuid = uuid;
+
+ xd = switch_find_xdomain(tb->root_switch, &lookup);
+ if (xd) {
+ get_device(&xd->dev);
+ return xd;
+ }
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(tb_xdomain_find_by_uuid);
+
+/**
+ * tb_xdomain_find_by_link_depth() - Find an XDomain by link and depth
+ * @tb: Domain where the XDomain belongs to
+ * @link: Root switch link number
+ * @depth: Depth in the link
+ *
+ * Finds XDomain by walking through the Thunderbolt topology below @tb.
+ * The returned XDomain will have its reference count increased so the
+ * caller needs to call tb_xdomain_put() when it is done with the
+ * object.
+ *
+ * This will find all XDomains including the ones that are not yet added
+ * to the bus (handshake is still in progress).
+ *
+ * The caller needs to hold @tb->lock.
+ */
+struct tb_xdomain *tb_xdomain_find_by_link_depth(struct tb *tb, u8 link,
+ u8 depth)
+{
+ struct tb_xdomain_lookup lookup;
+ struct tb_xdomain *xd;
+
+ memset(&lookup, 0, sizeof(lookup));
+ lookup.link = link;
+ lookup.depth = depth;
+
+ xd = switch_find_xdomain(tb->root_switch, &lookup);
+ if (xd) {
+ get_device(&xd->dev);
+ return xd;
+ }
+
+ return NULL;
+}
+
+bool tb_xdomain_handle_request(struct tb *tb, enum tb_cfg_pkg_type type,
+ const void *buf, size_t size)
+{
+ const struct tb_protocol_handler *handler, *tmp;
+ const struct tb_xdp_header *hdr = buf;
+ unsigned int length;
+ int ret = 0;
+
+ /* We expect the packet is at least size of the header */
+ length = hdr->xd_hdr.length_sn & TB_XDOMAIN_LENGTH_MASK;
+ if (length != size / 4 - sizeof(hdr->xd_hdr) / 4)
+ return true;
+ if (length < sizeof(*hdr) / 4 - sizeof(hdr->xd_hdr) / 4)
+ return true;
+
+ /*
+ * Handle XDomain discovery protocol packets directly here. For
+ * other protocols (based on their UUID) we call registered
+ * handlers in turn.
+ */
+ if (uuid_equal(&hdr->uuid, &tb_xdp_uuid)) {
+ if (type == TB_CFG_PKG_XDOMAIN_REQ) {
+ tb_xdp_schedule_request(tb, hdr, size);
+ return true;
+ }
+ return false;
+ }
+
+ mutex_lock(&xdomain_lock);
+ list_for_each_entry_safe(handler, tmp, &protocol_handlers, list) {
+ if (!uuid_equal(&hdr->uuid, handler->uuid))
+ continue;
+
+ mutex_unlock(&xdomain_lock);
+ ret = handler->callback(buf, size, handler->data);
+ mutex_lock(&xdomain_lock);
+
+ if (ret)
+ break;
+ }
+ mutex_unlock(&xdomain_lock);
+
+ return ret > 0;
+}
+
+static int rebuild_property_block(void)
+{
+ u32 *block, len;
+ int ret;
+
+ ret = tb_property_format_dir(xdomain_property_dir, NULL, 0);
+ if (ret < 0)
+ return ret;
+
+ len = ret;
+
+ block = kcalloc(len, sizeof(u32), GFP_KERNEL);
+ if (!block)
+ return -ENOMEM;
+
+ ret = tb_property_format_dir(xdomain_property_dir, block, len);
+ if (ret) {
+ kfree(block);
+ return ret;
+ }
+
+ kfree(xdomain_property_block);
+ xdomain_property_block = block;
+ xdomain_property_block_len = len;
+ xdomain_property_block_gen++;
+
+ return 0;
+}
+
+static int update_xdomain(struct device *dev, void *data)
+{
+ struct tb_xdomain *xd;
+
+ xd = tb_to_xdomain(dev);
+ if (xd) {
+ queue_delayed_work(xd->tb->wq, &xd->properties_changed_work,
+ msecs_to_jiffies(50));
+ }
+
+ return 0;
+}
+
+static void update_all_xdomains(void)
+{
+ bus_for_each_dev(&tb_bus_type, NULL, NULL, update_xdomain);
+}
+
+static bool remove_directory(const char *key, const struct tb_property_dir *dir)
+{
+ struct tb_property *p;
+
+ p = tb_property_find(xdomain_property_dir, key,
+ TB_PROPERTY_TYPE_DIRECTORY);
+ if (p && p->value.dir == dir) {
+ tb_property_remove(p);
+ return true;
+ }
+ return false;
+}
+
+/**
+ * tb_register_property_dir() - Register property directory to the host
+ * @key: Key (name) of the directory to add
+ * @dir: Directory to add
+ *
+ * Service drivers can use this function to add new property directory
+ * to the host available properties. The other connected hosts are
+ * notified so they can re-read properties of this host if they are
+ * interested.
+ *
+ * Return: %0 on success and negative errno on failure
+ */
+int tb_register_property_dir(const char *key, struct tb_property_dir *dir)
+{
+ int ret;
+
+ if (WARN_ON(!xdomain_property_dir))
+ return -EAGAIN;
+
+ if (!key || strlen(key) > 8)
+ return -EINVAL;
+
+ mutex_lock(&xdomain_lock);
+ if (tb_property_find(xdomain_property_dir, key,
+ TB_PROPERTY_TYPE_DIRECTORY)) {
+ ret = -EEXIST;
+ goto err_unlock;
+ }
+
+ ret = tb_property_add_dir(xdomain_property_dir, key, dir);
+ if (ret)
+ goto err_unlock;
+
+ ret = rebuild_property_block();
+ if (ret) {
+ remove_directory(key, dir);
+ goto err_unlock;
+ }
+
+ mutex_unlock(&xdomain_lock);
+ update_all_xdomains();
+ return 0;
+
+err_unlock:
+ mutex_unlock(&xdomain_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(tb_register_property_dir);
+
+/**
+ * tb_unregister_property_dir() - Removes property directory from host
+ * @key: Key (name) of the directory
+ * @dir: Directory to remove
+ *
+ * This will remove the existing directory from this host and notify the
+ * connected hosts about the change.
+ */
+void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir)
+{
+ int ret = 0;
+
+ mutex_lock(&xdomain_lock);
+ if (remove_directory(key, dir))
+ ret = rebuild_property_block();
+ mutex_unlock(&xdomain_lock);
+
+ if (!ret)
+ update_all_xdomains();
+}
+EXPORT_SYMBOL_GPL(tb_unregister_property_dir);
+
+int tb_xdomain_init(void)
+{
+ int ret;
+
+ xdomain_property_dir = tb_property_create_dir(NULL);
+ if (!xdomain_property_dir)
+ return -ENOMEM;
+
+ /*
+ * Initialize standard set of properties without any service
+ * directories. Those will be added by service drivers
+ * themselves when they are loaded.
+ */
+ tb_property_add_immediate(xdomain_property_dir, "vendorid",
+ PCI_VENDOR_ID_INTEL);
+ tb_property_add_text(xdomain_property_dir, "vendorid", "Intel Corp.");
+ tb_property_add_immediate(xdomain_property_dir, "deviceid", 0x1);
+ tb_property_add_text(xdomain_property_dir, "deviceid",
+ utsname()->nodename);
+ tb_property_add_immediate(xdomain_property_dir, "devicerv", 0x80000100);
+
+ ret = rebuild_property_block();
+ if (ret) {
+ tb_property_free_dir(xdomain_property_dir);
+ xdomain_property_dir = NULL;
+ }
+
+ return ret;
+}
+
+void tb_xdomain_exit(void)
+{
+ kfree(xdomain_property_block);
+ tb_property_free_dir(xdomain_property_dir);
+}
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 58585ec8699e..68677d930e20 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -436,8 +436,8 @@ static bool vhost_exceeds_maxpend(struct vhost_net *net)
struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
struct vhost_virtqueue *vq = &nvq->vq;
- return (nvq->upend_idx + vq->num - VHOST_MAX_PEND) % UIO_MAXIOV
- == nvq->done_idx;
+ return (nvq->upend_idx + UIO_MAXIOV - nvq->done_idx) % UIO_MAXIOV >
+ min_t(unsigned int, VHOST_MAX_PEND, vq->num >> 2);
}
/* Expects to be always run from workqueue - which acts as
@@ -480,11 +480,6 @@ static void handle_tx(struct vhost_net *net)
if (zcopy)
vhost_zerocopy_signal_used(net, vq);
- /* If more outstanding DMAs, queue the work.
- * Handle upend_idx wrap around
- */
- if (unlikely(vhost_exceeds_maxpend(net)))
- break;
head = vhost_net_tx_get_vq_desc(net, vq, vq->iov,
ARRAY_SIZE(vq->iov),
@@ -519,8 +514,7 @@ static void handle_tx(struct vhost_net *net)
len = msg_data_left(&msg);
zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
- && (nvq->upend_idx + 1) % UIO_MAXIOV !=
- nvq->done_idx
+ && !vhost_exceeds_maxpend(net)
&& vhost_net_tx_select_zcopy(net);
/* use msg_control to pass vhost zerocopy ubuf info to skb */
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 82e16556afea..3f03f7888302 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -100,6 +100,7 @@ struct afs_call {
bool send_pages; /* T if data from mapping should be sent */
bool need_attention; /* T if RxRPC poked us */
bool async; /* T if asynchronous */
+ bool upgrade; /* T to request service upgrade */
u16 service_id; /* RxRPC service ID to call */
__be16 port; /* target UDP port */
u32 operation_ID; /* operation ID for an incoming call */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 0bf191f0dbaf..bb1e2caa1720 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -387,7 +387,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
tx_total_len, gfp,
(async ?
afs_wake_up_async_call :
- afs_wake_up_call_waiter));
+ afs_wake_up_call_waiter),
+ call->upgrade);
call->key = NULL;
if (IS_ERR(rxcall)) {
ret = PTR_ERR(rxcall);
@@ -406,7 +407,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
call->request_size);
msg.msg_control = NULL;
msg.msg_controllen = 0;
- msg.msg_flags = (call->send_pages ? MSG_MORE : 0);
+ msg.msg_flags = MSG_WAITALL | (call->send_pages ? MSG_MORE : 0);
/* We have to change the state *before* sending the last packet as
* rxrpc might give us the reply before it returns from sending the
@@ -443,7 +444,7 @@ error_do_abort:
abort_code = 0;
offset = 0;
rxrpc_kernel_recv_data(afs_socket, rxcall, NULL, 0, &offset,
- false, &abort_code);
+ false, &abort_code, &call->service_id);
ret = call->type->abort_to_error(abort_code);
}
error_kill_call:
@@ -471,7 +472,8 @@ static void afs_deliver_to_call(struct afs_call *call)
size_t offset = 0;
ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
NULL, 0, &offset, false,
- &call->abort_code);
+ &call->abort_code,
+ &call->service_id);
trace_afs_recv_data(call, 0, offset, false, ret);
if (ret == -EINPROGRESS || ret == -EAGAIN)
@@ -536,15 +538,26 @@ call_complete:
*/
static int afs_wait_for_call_to_complete(struct afs_call *call)
{
+ signed long rtt2, timeout;
int ret;
+ u64 rtt;
+ u32 life, last_life;
DECLARE_WAITQUEUE(myself, current);
_enter("");
+ rtt = rxrpc_kernel_get_rtt(afs_socket, call->rxcall);
+ rtt2 = nsecs_to_jiffies64(rtt) * 2;
+ if (rtt2 < 2)
+ rtt2 = 2;
+
+ timeout = rtt2;
+ last_life = rxrpc_kernel_check_life(afs_socket, call->rxcall);
+
add_wait_queue(&call->waitq, &myself);
for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(TASK_UNINTERRUPTIBLE);
/* deliver any messages that are in the queue */
if (call->state < AFS_CALL_COMPLETE && call->need_attention) {
@@ -554,10 +567,20 @@ static int afs_wait_for_call_to_complete(struct afs_call *call)
continue;
}
- if (call->state == AFS_CALL_COMPLETE ||
- signal_pending(current))
+ if (call->state == AFS_CALL_COMPLETE)
break;
- schedule();
+
+ life = rxrpc_kernel_check_life(afs_socket, call->rxcall);
+ if (timeout == 0 &&
+ life == last_life && signal_pending(current))
+ break;
+
+ if (life != last_life) {
+ timeout = rtt2;
+ last_life = life;
+ }
+
+ timeout = schedule_timeout(timeout);
}
remove_wait_queue(&call->waitq, &myself);
@@ -851,7 +874,8 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall,
buf, count, &call->offset,
- want_more, &call->abort_code);
+ want_more, &call->abort_code,
+ &call->service_id);
trace_afs_recv_data(call, count, call->offset, want_more, ret);
if (ret == 0 || ret == -EAGAIN)
return ret;
diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index 2b038442c352..3ce61342fa31 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -135,6 +135,7 @@ enum virtchnl_ops {
VIRTCHNL_OP_SET_RSS_HENA = 26,
VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27,
VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28,
+ VIRTCHNL_OP_REQUEST_QUEUES = 29,
};
/* This macro is used to generate a compilation error if a structure
@@ -235,6 +236,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource);
#define VIRTCHNL_VF_OFFLOAD_RSS_AQ 0x00000008
#define VIRTCHNL_VF_OFFLOAD_RSS_REG 0x00000010
#define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020
+#define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES 0x00000040
#define VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000
#define VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000
#define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000
@@ -325,6 +327,21 @@ struct virtchnl_vsi_queue_config_info {
struct virtchnl_queue_pair_info qpair[1];
};
+/* VIRTCHNL_OP_REQUEST_QUEUES
+ * VF sends this message to request the PF to allocate additional queues to
+ * this VF. Each VF gets a guaranteed number of queues on init but asking for
+ * additional queues must be negotiated. This is a best effort request as it
+ * is possible the PF does not have enough queues left to support the request.
+ * If the PF cannot support the number requested it will respond with the
+ * maximum number it is able to support. If the request is successful, PF will
+ * then reset the VF to institute required changes.
+ */
+
+/* VF resource request */
+struct virtchnl_vf_res_request {
+ u16 num_queue_pairs;
+};
+
VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_vsi_queue_config_info);
/* VIRTCHNL_OP_CONFIG_IRQ_MAP
@@ -691,6 +708,9 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode,
case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING:
break;
+ case VIRTCHNL_OP_REQUEST_QUEUES:
+ valid_len = sizeof(struct virtchnl_vf_res_request);
+ break;
/* These are always errors coming from the VF. */
case VIRTCHNL_OP_EVENT:
case VIRTCHNL_OP_UNKNOWN:
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index d41d40ac3efd..359b6f5d3d90 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -14,27 +14,46 @@ struct bpf_sock_ops_kern;
extern struct static_key_false cgroup_bpf_enabled_key;
#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
+struct bpf_prog_list {
+ struct list_head node;
+ struct bpf_prog *prog;
+};
+
+struct bpf_prog_array;
+
struct cgroup_bpf {
- /*
- * Store two sets of bpf_prog pointers, one for programs that are
- * pinned directly to this cgroup, and one for those that are effective
- * when this cgroup is accessed.
+ /* array of effective progs in this cgroup */
+ struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE];
+
+ /* attached progs to this cgroup and attach flags
+ * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will
+ * have either zero or one element
+ * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS
*/
- struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE];
- struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE];
- bool disallow_override[MAX_BPF_ATTACH_TYPE];
+ struct list_head progs[MAX_BPF_ATTACH_TYPE];
+ u32 flags[MAX_BPF_ATTACH_TYPE];
+
+ /* temp storage for effective prog array used by prog_attach/detach */
+ struct bpf_prog_array __rcu *inactive;
};
void cgroup_bpf_put(struct cgroup *cgrp);
-void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
-
-int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
- struct bpf_prog *prog, enum bpf_attach_type type,
- bool overridable);
-
-/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
-int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
- enum bpf_attach_type type, bool overridable);
+int cgroup_bpf_inherit(struct cgroup *cgrp);
+
+int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
+ enum bpf_attach_type type, u32 flags);
+int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+ enum bpf_attach_type type, u32 flags);
+int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
+ union bpf_attr __user *uattr);
+
+/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
+int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
+ enum bpf_attach_type type, u32 flags);
+int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+ enum bpf_attach_type type, u32 flags);
+int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
+ union bpf_attr __user *uattr);
int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
@@ -96,8 +115,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct cgroup_bpf {};
static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
-static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
- struct cgroup *parent) {}
+static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f1af7d63d678..1e334b248ff6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -56,6 +56,10 @@ struct bpf_map {
struct work_struct work;
atomic_t usercnt;
struct bpf_map *inner_map_meta;
+ char name[BPF_OBJ_NAME_LEN];
+#ifdef CONFIG_SECURITY
+ void *security;
+#endif
};
/* function argument constraints */
@@ -137,6 +141,7 @@ enum bpf_reg_type {
PTR_TO_MAP_VALUE, /* reg points to map element value */
PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
PTR_TO_STACK, /* reg == frame_pointer + offset */
+ PTR_TO_PACKET_META, /* skb->data - meta_len */
PTR_TO_PACKET, /* reg points to skb->data */
PTR_TO_PACKET_END, /* skb->data + headlen */
};
@@ -155,6 +160,11 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size)
aux->ctx_field_size = size;
}
+struct bpf_prog_ops {
+ int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
+ union bpf_attr __user *uattr);
+};
+
struct bpf_verifier_ops {
/* return eBPF function prototype for verification */
const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
@@ -170,8 +180,6 @@ struct bpf_verifier_ops {
const struct bpf_insn *src,
struct bpf_insn *dst,
struct bpf_prog *prog, u32 *target_size);
- int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr,
- union bpf_attr __user *uattr);
};
struct bpf_prog_aux {
@@ -182,10 +190,15 @@ struct bpf_prog_aux {
u32 id;
struct latch_tree_node ksym_tnode;
struct list_head ksym_lnode;
- const struct bpf_verifier_ops *ops;
+ const struct bpf_prog_ops *ops;
struct bpf_map **used_maps;
struct bpf_prog *prog;
struct user_struct *user;
+ u64 load_time; /* ns since boottime */
+ char name[BPF_OBJ_NAME_LEN];
+#ifdef CONFIG_SECURITY
+ void *security;
+#endif
union {
struct work_struct work;
struct rcu_head rcu;
@@ -237,17 +250,59 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
union bpf_attr __user *uattr);
+/* an array of programs to be executed under rcu_lock.
+ *
+ * Typical usage:
+ * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN);
+ *
+ * the structure returned by bpf_prog_array_alloc() should be populated
+ * with program pointers and the last pointer must be NULL.
+ * The user has to keep refcnt on the program and make sure the program
+ * is removed from the array before bpf_prog_put().
+ * The 'struct bpf_prog_array *' should only be replaced with xchg()
+ * since other cpus are walking the array of pointers in parallel.
+ */
+struct bpf_prog_array {
+ struct rcu_head rcu;
+ struct bpf_prog *progs[0];
+};
+
+struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags);
+void bpf_prog_array_free(struct bpf_prog_array __rcu *progs);
+int bpf_prog_array_length(struct bpf_prog_array __rcu *progs);
+int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
+ __u32 __user *prog_ids, u32 cnt);
+
+#define BPF_PROG_RUN_ARRAY(array, ctx, func) \
+ ({ \
+ struct bpf_prog **_prog; \
+ u32 _ret = 1; \
+ rcu_read_lock(); \
+ _prog = rcu_dereference(array)->progs; \
+ for (; *_prog; _prog++) \
+ _ret &= func(*_prog, ctx); \
+ rcu_read_unlock(); \
+ _ret; \
+ })
+
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
-#define BPF_PROG_TYPE(_id, _ops) \
- extern const struct bpf_verifier_ops _ops;
+extern const struct file_operations bpf_map_fops;
+extern const struct file_operations bpf_prog_fops;
+
+#define BPF_PROG_TYPE(_id, _name) \
+ extern const struct bpf_prog_ops _name ## _prog_ops; \
+ extern const struct bpf_verifier_ops _name ## _verifier_ops;
#define BPF_MAP_TYPE(_id, _ops) \
extern const struct bpf_map_ops _ops;
#include <linux/bpf_types.h>
#undef BPF_PROG_TYPE
#undef BPF_MAP_TYPE
+extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops;
+extern const struct bpf_verifier_ops xdp_analyzer_ops;
+
struct bpf_prog *bpf_prog_get(u32 ufd);
struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type);
struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i);
@@ -269,11 +324,11 @@ void bpf_map_area_free(void *base);
extern int sysctl_unprivileged_bpf_disabled;
-int bpf_map_new_fd(struct bpf_map *map);
+int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog);
int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
-int bpf_obj_get_user(const char __user *pathname);
+int bpf_obj_get_user(const char __user *pathname, int flags);
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
@@ -292,6 +347,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file,
void *key, void *value, u64 map_flags);
int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value);
+int bpf_get_file_flag(int flags);
+
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
* forced to use 'long' read/writes to try to atomically copy long counters.
* Best-effort only. No barriers here, since it _will_ race with concurrent
@@ -316,6 +373,13 @@ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
void __dev_map_flush(struct bpf_map *map);
+struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
+void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
+void __cpu_map_flush(struct bpf_map *map);
+struct xdp_buff;
+int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
+ struct net_device *dev_rx);
+
/* Return map's numa specified by userspace */
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
{
@@ -323,7 +387,7 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
attr->numa_node : NUMA_NO_NODE;
}
-#else
+#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
return ERR_PTR(-EOPNOTSUPP);
@@ -368,7 +432,7 @@ static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages)
{
}
-static inline int bpf_obj_get_user(const char __user *pathname)
+static inline int bpf_obj_get_user(const char __user *pathname, int flags)
{
return -EOPNOTSUPP;
}
@@ -386,6 +450,28 @@ static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index)
static inline void __dev_map_flush(struct bpf_map *map)
{
}
+
+static inline
+struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+ return NULL;
+}
+
+static inline void __cpu_map_insert_ctx(struct bpf_map *map, u32 index)
+{
+}
+
+static inline void __cpu_map_flush(struct bpf_map *map)
+{
+}
+
+struct xdp_buff;
+static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
+ struct xdp_buff *xdp,
+ struct net_device *dev_rx)
+{
+ return 0;
+}
#endif /* CONFIG_BPF_SYSCALL */
#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL)
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 6f1a567667b8..36418ad43245 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -1,22 +1,22 @@
/* internal file - do not include directly */
#ifdef CONFIG_NET
-BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter_prog_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act_prog_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act_prog_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp_prog_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb_prog_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb_prog_ops)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act)
+BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb)
+BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout)
+BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops)
+BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb)
#endif
#ifdef CONFIG_BPF_EVENTS
-BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint_prog_ops)
-BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event_prog_ops)
+BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
+BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
+BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
@@ -41,4 +41,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
#ifdef CONFIG_STREAM_PARSER
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
#endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops)
#endif
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index b8d200f60a40..feeaea93d959 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -115,6 +115,21 @@ struct bpf_insn_aux_data {
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+#define BPF_VERIFIER_TMP_LOG_SIZE 1024
+
+struct bpf_verifer_log {
+ u32 level;
+ char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
+ char __user *ubuf;
+ u32 len_used;
+ u32 len_total;
+};
+
+static inline bool bpf_verifier_log_full(const struct bpf_verifer_log *log)
+{
+ return log->len_used >= log->len_total - 1;
+}
+
struct bpf_verifier_env;
struct bpf_ext_analyzer_ops {
int (*insn_hook)(struct bpf_verifier_env *env,
@@ -126,6 +141,7 @@ struct bpf_ext_analyzer_ops {
*/
struct bpf_verifier_env {
struct bpf_prog *prog; /* eBPF program being verified */
+ const struct bpf_verifier_ops *ops;
struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
int stack_size; /* number of states to be processed */
bool strict_alignment; /* perform strict pointer alignment checks */
@@ -139,6 +155,8 @@ struct bpf_verifier_env {
bool allow_ptr_leaks;
bool seen_direct_write;
struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
+
+ struct bpf_verifer_log log;
};
int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index abcda9b458ab..9ac9e3e3d1e5 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -63,6 +63,7 @@
#define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000
#define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000
#define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000
+#define PHY_BRCM_EN_MASTER_MODE 0x00010000
/* Broadcom BCM7xxx specific workarounds */
#define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff)
diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h
index 89f67c1c3160..805d16654459 100644
--- a/include/linux/byteorder/generic.h
+++ b/include/linux/byteorder/generic.h
@@ -170,4 +170,20 @@ static inline void be64_add_cpu(__be64 *var, u64 val)
*var = cpu_to_be64(be64_to_cpu(*var) + val);
}
+static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ dst[i] = cpu_to_be32(src[i]);
+}
+
+static inline void be32_to_cpu_array(u32 *dst, const __be32 *src, size_t len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ dst[i] = be32_to_cpu(src[i]);
+}
+
#endif /* _LINUX_BYTEORDER_GENERIC_H */
diff --git a/include/linux/connector.h b/include/linux/connector.h
index f8fe8637d771..032102b19645 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -22,7 +22,7 @@
#define __CONNECTOR_H
-#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/list.h>
#include <linux/workqueue.h>
@@ -49,7 +49,7 @@ struct cn_callback_id {
struct cn_callback_entry {
struct list_head callback_entry;
- atomic_t refcnt;
+ refcount_t refcnt;
struct cn_queue_dev *pdev;
struct cn_callback_id id;
diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
index a4be70398ce1..f69f98541953 100644
--- a/include/linux/dynamic_queue_limits.h
+++ b/include/linux/dynamic_queue_limits.h
@@ -98,7 +98,7 @@ void dql_completed(struct dql *dql, unsigned int count);
void dql_reset(struct dql *dql);
/* Initialize dql state */
-int dql_init(struct dql *dql, unsigned hold_time);
+void dql_init(struct dql *dql, unsigned int hold_time);
#endif /* _KERNEL_ */
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 4587a4c36923..c77fa3529e15 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -163,6 +163,16 @@ extern int
__ethtool_get_link_ksettings(struct net_device *dev,
struct ethtool_link_ksettings *link_ksettings);
+/**
+ * ethtool_intersect_link_masks - Given two link masks, AND them together
+ * @dst: first mask and where result is stored
+ * @src: second mask to intersect with
+ *
+ * Given two link mode masks, AND them together and save the result in dst.
+ */
+void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst,
+ struct ethtool_link_ksettings *src);
+
void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst,
u32 legacy_u32);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 818a0b26249e..cdd78a7beaae 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -481,30 +481,36 @@ struct sk_filter {
struct bpf_prog *prog;
};
-#define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi)
+#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi)
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
struct bpf_skb_data_end {
struct qdisc_skb_cb qdisc_cb;
+ void *data_meta;
void *data_end;
};
struct xdp_buff {
void *data;
void *data_end;
+ void *data_meta;
void *data_hard_start;
};
-/* compute the linear packet data range [data, data_end) which
- * will be accessed by cls_bpf, act_bpf and lwt programs
+/* Compute the linear packet data range [data, data_end) which
+ * will be accessed by various program types (cls_bpf, act_bpf,
+ * lwt, ...). Subsystems allowing direct data access must (!)
+ * ensure that cb[] area can be written to when BPF program is
+ * invoked (otherwise cb[] save/restore is necessary).
*/
-static inline void bpf_compute_data_end(struct sk_buff *skb)
+static inline void bpf_compute_data_pointers(struct sk_buff *skb)
{
struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb));
- cb->data_end = skb->data + skb_headlen(skb);
+ cb->data_meta = skb->data - skb_metadata_len(skb);
+ cb->data_end = skb->data + skb_headlen(skb);
}
static inline u8 *bpf_skb_cb(struct sk_buff *skb)
@@ -725,8 +731,22 @@ int xdp_do_redirect(struct net_device *dev,
struct bpf_prog *prog);
void xdp_do_flush_map(void);
+/* Drivers not supporting XDP metadata can use this helper, which
+ * rejects any room expansion for metadata as a result.
+ */
+static __always_inline void
+xdp_set_data_meta_invalid(struct xdp_buff *xdp)
+{
+ xdp->data_meta = xdp->data + 1;
+}
+
+static __always_inline bool
+xdp_data_meta_unsupported(const struct xdp_buff *xdp)
+{
+ return unlikely(xdp->data_meta > xdp->data);
+}
+
void bpf_warn_invalid_xdp_action(u32 act);
-void bpf_warn_invalid_xdp_redirect(u32 ifindex);
struct sock *do_sk_redirect_map(struct sk_buff *skb);
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 55a604ad459f..ee6657a0ed69 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2445,6 +2445,7 @@ enum ieee80211_sa_query_action {
#define WLAN_OUI_TYPE_MICROSOFT_WPA 1
#define WLAN_OUI_TYPE_MICROSOFT_WMM 2
#define WLAN_OUI_TYPE_MICROSOFT_WPS 4
+#define WLAN_OUI_TYPE_MICROSOFT_TPC 8
/*
* WMM/802.11e Tspec Element
diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index 3355efc89781..6756fea18b69 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -31,7 +31,7 @@ static inline struct arphdr *arp_hdr(const struct sk_buff *skb)
return (struct arphdr *)skb_network_header(skb);
}
-static inline int arp_hdr_len(struct net_device *dev)
+static inline unsigned int arp_hdr_len(const struct net_device *dev)
{
switch (dev->type) {
#if IS_ENABLED(CONFIG_FIREWIRE_NET)
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 3cd18ac0697f..02639ebea2f0 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -49,6 +49,7 @@ struct br_ip_list {
#define BR_MULTICAST_TO_UNICAST BIT(12)
#define BR_VLAN_TUNNEL BIT(13)
#define BR_BCAST_FLOOD BIT(14)
+#define BR_NEIGH_SUPPRESS BIT(15)
#define BR_DEFAULT_AGEING_TIME (300 * HZ)
@@ -63,6 +64,7 @@ int br_multicast_list_adjacent(struct net_device *dev,
bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto);
bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto);
bool br_multicast_enabled(const struct net_device *dev);
+bool br_multicast_router(const struct net_device *dev);
#else
static inline int br_multicast_list_adjacent(struct net_device *dev,
struct list_head *br_ip_list)
@@ -83,6 +85,10 @@ static inline bool br_multicast_enabled(const struct net_device *dev)
{
return false;
}
+static inline bool br_multicast_router(const struct net_device *dev)
+{
+ return false;
+}
#endif
#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING)
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index c9ec1343d187..10e319f41fb1 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -72,7 +72,8 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
extern void macvlan_common_setup(struct net_device *dev);
extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[]);
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack);
extern void macvlan_count_rx(const struct macvlan_dev *vlan,
unsigned int len, bool success,
diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h
index bbcdb0a767d8..a118ee4a8428 100644
--- a/include/linux/if_phonet.h
+++ b/include/linux/if_phonet.h
@@ -10,5 +10,5 @@
#include <uapi/linux/if_phonet.h>
-extern struct header_ops phonet_header_ops;
+extern const struct header_ops phonet_header_ops;
#endif
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index fb3f809e34e4..681dff30940b 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -154,6 +154,7 @@ struct in_ifaddr {
struct in_validator_info {
__be32 ivi_addr;
struct in_device *ivi_dev;
+ struct netlink_ext_ack *extack;
};
int register_inetaddr_notifier(struct notifier_block *nb);
@@ -179,6 +180,7 @@ __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst,
__be32 local, int scope);
struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
__be32 mask);
+struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr);
static __inline__ bool inet_ifa_match(__be32 addr, struct in_ifaddr *ifa)
{
return !((addr^ifa->ifa_address)&ifa->ifa_mask);
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index c9258124e417..7161d8e7ee79 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1351,6 +1351,40 @@
* @inode we wish to get the security context of.
* @ctx is a pointer in which to place the allocated security context.
* @ctxlen points to the place to put the length of @ctx.
+ *
+ * Security hooks for using the eBPF maps and programs functionalities through
+ * eBPF syscalls.
+ *
+ * @bpf:
+ * Do a initial check for all bpf syscalls after the attribute is copied
+ * into the kernel. The actual security module can implement their own
+ * rules to check the specific cmd they need.
+ *
+ * @bpf_map:
+ * Do a check when the kernel generate and return a file descriptor for
+ * eBPF maps.
+ *
+ * @map: bpf map that we want to access
+ * @mask: the access flags
+ *
+ * @bpf_prog:
+ * Do a check when the kernel generate and return a file descriptor for
+ * eBPF programs.
+ *
+ * @prog: bpf prog that userspace want to use.
+ *
+ * @bpf_map_alloc_security:
+ * Initialize the security field inside bpf map.
+ *
+ * @bpf_map_free_security:
+ * Clean up the security information stored inside bpf map.
+ *
+ * @bpf_prog_alloc_security:
+ * Initialize the security field inside bpf program.
+ *
+ * @bpf_prog_free_security:
+ * Clean up the security information stored inside bpf prog.
+ *
*/
union security_list_options {
int (*binder_set_context_mgr)(struct task_struct *mgr);
@@ -1682,6 +1716,17 @@ union security_list_options {
struct audit_context *actx);
void (*audit_rule_free)(void *lsmrule);
#endif /* CONFIG_AUDIT */
+
+#ifdef CONFIG_BPF_SYSCALL
+ int (*bpf)(int cmd, union bpf_attr *attr,
+ unsigned int size);
+ int (*bpf_map)(struct bpf_map *map, fmode_t fmode);
+ int (*bpf_prog)(struct bpf_prog *prog);
+ int (*bpf_map_alloc_security)(struct bpf_map *map);
+ void (*bpf_map_free_security)(struct bpf_map *map);
+ int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux);
+ void (*bpf_prog_free_security)(struct bpf_prog_aux *aux);
+#endif /* CONFIG_BPF_SYSCALL */
};
struct security_hook_heads {
@@ -1901,6 +1946,15 @@ struct security_hook_heads {
struct list_head audit_rule_match;
struct list_head audit_rule_free;
#endif /* CONFIG_AUDIT */
+#ifdef CONFIG_BPF_SYSCALL
+ struct list_head bpf;
+ struct list_head bpf_map;
+ struct list_head bpf_prog;
+ struct list_head bpf_map_alloc_security;
+ struct list_head bpf_map_free_security;
+ struct list_head bpf_prog_alloc_security;
+ struct list_head bpf_prog_free_security;
+#endif /* CONFIG_BPF_SYSCALL */
} __randomize_layout;
/*
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b0a57e043fa3..a9b5fed8f7c6 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -40,7 +40,7 @@
#include <linux/cpu_rmap.h>
#include <linux/crash_dump.h>
-#include <linux/atomic.h>
+#include <linux/refcount.h>
#include <linux/timecounter.h>
@@ -751,7 +751,7 @@ struct mlx4_cq {
int cqn;
unsigned vector;
- atomic_t refcount;
+ refcount_t refcount;
struct completion free;
struct {
struct list_head list;
@@ -768,7 +768,7 @@ struct mlx4_qp {
int qpn;
- atomic_t refcount;
+ refcount_t refcount;
struct completion free;
u8 usage;
};
@@ -781,7 +781,7 @@ struct mlx4_srq {
int max_gs;
int wqe_shift;
- atomic_t refcount;
+ refcount_t refcount;
struct completion free;
};
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 95898847c7d4..6a57ec2f1ef7 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -35,7 +35,7 @@
#include <rdma/ib_verbs.h>
#include <linux/mlx5/driver.h>
-
+#include <linux/refcount.h>
struct mlx5_core_cq {
u32 cqn;
@@ -43,7 +43,7 @@ struct mlx5_core_cq {
__be32 *set_ci_db;
__be32 *arm_db;
struct mlx5_uars_page *uar;
- atomic_t refcount;
+ refcount_t refcount;
struct completion free;
unsigned vector;
unsigned int irqn;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 401c8972cc3a..08c77b7e59cb 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -49,6 +49,8 @@
#include <linux/mlx5/device.h>
#include <linux/mlx5/doorbell.h>
#include <linux/mlx5/srq.h>
+#include <linux/timecounter.h>
+#include <linux/ptp_clock_kernel.h>
enum {
MLX5_BOARD_ID_LEN = 64,
@@ -760,6 +762,27 @@ struct mlx5_rsvd_gids {
struct ida ida;
};
+#define MAX_PIN_NUM 8
+struct mlx5_pps {
+ u8 pin_caps[MAX_PIN_NUM];
+ struct work_struct out_work;
+ u64 start[MAX_PIN_NUM];
+ u8 enabled;
+};
+
+struct mlx5_clock {
+ rwlock_t lock;
+ struct cyclecounter cycles;
+ struct timecounter tc;
+ struct hwtstamp_config hwtstamp_config;
+ u32 nominal_c_mult;
+ unsigned long overflow_period;
+ struct delayed_work overflow_work;
+ struct ptp_clock *ptp;
+ struct ptp_clock_info ptp_info;
+ struct mlx5_pps pps_info;
+};
+
struct mlx5_core_dev {
struct pci_dev *pdev;
/* sync pci state */
@@ -800,6 +823,7 @@ struct mlx5_core_dev {
#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *rmap;
#endif
+ struct mlx5_clock clock;
};
struct mlx5_db {
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 2657f9f51536..d9fb7abad445 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -686,5 +686,31 @@ struct fsl_mc_device_id {
const char obj_type[16];
};
+/**
+ * struct tb_service_id - Thunderbolt service identifiers
+ * @match_flags: Flags used to match the structure
+ * @protocol_key: Protocol key the service supports
+ * @protocol_id: Protocol id the service supports
+ * @protocol_version: Version of the protocol
+ * @protocol_revision: Revision of the protocol software
+ * @driver_data: Driver specific data
+ *
+ * Thunderbolt XDomain services are exposed as devices where each device
+ * carries the protocol information the service supports. Thunderbolt
+ * XDomain service drivers match against that information.
+ */
+struct tb_service_id {
+ __u32 match_flags;
+ char protocol_key[8 + 1];
+ __u32 protocol_id;
+ __u32 protocol_version;
+ __u32 protocol_revision;
+ kernel_ulong_t driver_data;
+};
+
+#define TBSVC_MATCH_PROTOCOL_KEY 0x0001
+#define TBSVC_MATCH_PROTOCOL_ID 0x0002
+#define TBSVC_MATCH_PROTOCOL_VERSION 0x0004
+#define TBSVC_MATCH_PROTOCOL_REVISION 0x0008
#endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index d7f63339ef0b..8242d05df35e 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -5,6 +5,8 @@
#include <linux/pim.h>
#include <linux/rhashtable.h>
#include <net/sock.h>
+#include <net/fib_rules.h>
+#include <net/fib_notifier.h>
#include <uapi/linux/mroute.h>
#ifdef CONFIG_IP_MROUTE
@@ -18,6 +20,7 @@ int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *);
int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg);
int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg);
int ip_mr_init(void);
+bool ipmr_rule_default(const struct fib_rule *rule);
#else
static inline int ip_mroute_setsockopt(struct sock *sock, int optname,
char __user *optval, unsigned int optlen)
@@ -45,10 +48,16 @@ static inline int ip_mroute_opt(int opt)
{
return 0;
}
+
+static inline bool ipmr_rule_default(const struct fib_rule *rule)
+{
+ return true;
+}
#endif
struct vif_device {
struct net_device *dev; /* Device we are using */
+ struct netdev_phys_item_id dev_parent_id; /* Device parent ID */
unsigned long bytes_in,bytes_out;
unsigned long pkt_in,pkt_out; /* Statistics */
unsigned long rate_limit; /* Traffic shaping (NI) */
@@ -58,6 +67,14 @@ struct vif_device {
int link; /* Physical interface index */
};
+struct vif_entry_notifier_info {
+ struct fib_notifier_info info;
+ struct net_device *dev;
+ vifi_t vif_index;
+ unsigned short vif_flags;
+ u32 tb_id;
+};
+
#define VIFF_STATIC 0x8000
#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
@@ -81,9 +98,11 @@ struct mr_table {
/* mfc_flags:
* MFC_STATIC - the entry was added statically (not by a routing daemon)
+ * MFC_OFFLOAD - the entry was offloaded to the hardware
*/
enum {
MFC_STATIC = BIT(0),
+ MFC_OFFLOAD = BIT(1),
};
struct mfc_cache_cmp_arg {
@@ -109,6 +128,7 @@ struct mfc_cache_cmp_arg {
* @wrong_if: number of wrong source interface hits
* @lastuse: time of last use of the group (traffic or update)
* @ttls: OIF TTL threshold array
+ * @refcount: reference count for this entry
* @list: global entry list
* @rcu: used for entry destruction
*/
@@ -138,14 +158,40 @@ struct mfc_cache {
unsigned long wrong_if;
unsigned long lastuse;
unsigned char ttls[MAXVIFS];
+ refcount_t refcount;
} res;
} mfc_un;
struct list_head list;
struct rcu_head rcu;
};
+struct mfc_entry_notifier_info {
+ struct fib_notifier_info info;
+ struct mfc_cache *mfc;
+ u32 tb_id;
+};
+
struct rtmsg;
int ipmr_get_route(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr,
struct rtmsg *rtm, u32 portid);
+
+#ifdef CONFIG_IP_MROUTE
+void ipmr_cache_free(struct mfc_cache *mfc_cache);
+#else
+static inline void ipmr_cache_free(struct mfc_cache *mfc_cache)
+{
+}
+#endif
+
+static inline void ipmr_cache_put(struct mfc_cache *c)
+{
+ if (refcount_dec_and_test(&c->mfc_un.res.refcount))
+ ipmr_cache_free(c);
+}
+static inline void ipmr_cache_hold(struct mfc_cache *c)
+{
+ refcount_inc(&c->mfc_un.res.refcount);
+}
+
#endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2eaac7d75af4..6c7960c8338a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -55,7 +55,7 @@
struct netpoll_info;
struct device;
struct phy_device;
-struct dsa_switch_tree;
+struct dsa_port;
/* 802.11 specific */
struct wireless_dev;
@@ -775,6 +775,7 @@ enum tc_setup_type {
TC_SETUP_CLSFLOWER,
TC_SETUP_CLSMATCHALL,
TC_SETUP_CLSBPF,
+ TC_SETUP_BLOCK,
};
/* These structures hold the attributes of xdp state that are being passed
@@ -826,6 +827,11 @@ struct xfrmdev_ops {
};
#endif
+struct dev_ifalias {
+ struct rcu_head rcuhead;
+ char ifalias[];
+};
+
/*
* This structure defines the management hooks for network devices.
* The following hooks can be defined; unless noted otherwise, they are
@@ -1241,7 +1247,8 @@ struct net_device_ops {
u32 flow_id);
#endif
int (*ndo_add_slave)(struct net_device *dev,
- struct net_device *slave_dev);
+ struct net_device *slave_dev,
+ struct netlink_ext_ack *extack);
int (*ndo_del_slave)(struct net_device *dev,
struct net_device *slave_dev);
netdev_features_t (*ndo_fix_features)(struct net_device *dev,
@@ -1632,7 +1639,7 @@ enum netdev_priv_flags {
struct net_device {
char name[IFNAMSIZ];
struct hlist_node name_hlist;
- char *ifalias;
+ struct dev_ifalias __rcu *ifalias;
/*
* I/O specific fields
* FIXME: Merge these and struct ifmap into one
@@ -1752,7 +1759,7 @@ struct net_device {
struct vlan_info __rcu *vlan_info;
#endif
#if IS_ENABLED(CONFIG_NET_DSA)
- struct dsa_switch_tree *dsa_ptr;
+ struct dsa_port *dsa_ptr;
#endif
#if IS_ENABLED(CONFIG_TIPC)
struct tipc_bearer __rcu *tipc_ptr;
@@ -2304,7 +2311,8 @@ int register_netdevice_notifier(struct notifier_block *nb);
int unregister_netdevice_notifier(struct notifier_block *nb);
struct netdev_notifier_info {
- struct net_device *dev;
+ struct net_device *dev;
+ struct netlink_ext_ack *extack;
};
struct netdev_notifier_change_info {
@@ -2329,6 +2337,7 @@ static inline void netdev_notifier_info_init(struct netdev_notifier_info *info,
struct net_device *dev)
{
info->dev = dev;
+ info->extack = NULL;
}
static inline struct net_device *
@@ -2337,6 +2346,12 @@ netdev_notifier_info_to_dev(const struct netdev_notifier_info *info)
return info->dev;
}
+static inline struct netlink_ext_ack *
+netdev_notifier_info_to_extack(const struct netdev_notifier_info *info)
+{
+ return info->extack;
+}
+
int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
@@ -3246,6 +3261,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
int netif_rx(struct sk_buff *skb);
int netif_rx_ni(struct sk_buff *skb);
int netif_receive_skb(struct sk_buff *skb);
+int netif_receive_skb_core(struct sk_buff *skb);
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
void napi_gro_flush(struct napi_struct *napi, bool flush_old);
struct sk_buff *napi_get_frags(struct napi_struct *napi);
@@ -3275,6 +3291,7 @@ void __dev_notify_flags(struct net_device *, unsigned int old_flags,
unsigned int gchanges);
int dev_change_name(struct net_device *, const char *);
int dev_set_alias(struct net_device *, const char *, size_t);
+int dev_get_alias(const struct net_device *, char *, size_t);
int dev_change_net_namespace(struct net_device *, struct net *, const char *);
int __dev_set_mtu(struct net_device *, int);
int dev_set_mtu(struct net_device *, int);
@@ -3907,10 +3924,12 @@ void *netdev_adjacent_get_private(struct list_head *adj_list);
void *netdev_lower_get_first_private_rcu(struct net_device *dev);
struct net_device *netdev_master_upper_dev_get(struct net_device *dev);
struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev);
-int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev);
+int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev,
+ struct netlink_ext_ack *extack);
int netdev_master_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev,
- void *upper_priv, void *upper_info);
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev);
void netdev_adjacent_rename_links(struct net_device *dev, char *oldname);
diff --git a/include/linux/once.h b/include/linux/once.h
index 9c98aaa87cbc..724724918e8b 100644
--- a/include/linux/once.h
+++ b/include/linux/once.h
@@ -5,7 +5,7 @@
#include <linux/jump_label.h>
bool __do_once_start(bool *done, unsigned long *flags);
-void __do_once_done(bool *done, struct static_key *once_key,
+void __do_once_done(bool *done, struct static_key_true *once_key,
unsigned long *flags);
/* Call a function exactly once. The idea of DO_ONCE() is to perform
@@ -38,8 +38,8 @@ void __do_once_done(bool *done, struct static_key *once_key,
({ \
bool ___ret = false; \
static bool ___done = false; \
- static struct static_key ___once_key = STATIC_KEY_INIT_TRUE; \
- if (static_key_true(&___once_key)) { \
+ static DEFINE_STATIC_KEY_TRUE(___once_key); \
+ if (static_branch_unlikely(&___once_key)) { \
unsigned long ___flags; \
___ret = __do_once_start(&___done, &___flags); \
if (unlikely(___ret)) { \
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8e22f24ded6a..79b18a20cf5d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -806,6 +806,7 @@ struct perf_output_handle {
struct bpf_perf_event_data_kern {
struct pt_regs *regs;
struct perf_sample_data *data;
+ struct perf_event *event;
};
#ifdef CONFIG_CGROUP_PERF
@@ -884,7 +885,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr,
void *context);
extern void perf_pmu_migrate_context(struct pmu *pmu,
int src_cpu, int dst_cpu);
-int perf_event_read_local(struct perf_event *event, u64 *value);
+int perf_event_read_local(struct perf_event *event, u64 *value,
+ u64 *enabled, u64 *running);
extern u64 perf_event_read_value(struct perf_event *event,
u64 *enabled, u64 *running);
@@ -1286,7 +1288,8 @@ static inline const struct perf_event_attr *perf_event_attrs(struct perf_event *
{
return ERR_PTR(-EINVAL);
}
-static inline int perf_event_read_local(struct perf_event *event, u64 *value)
+static inline int perf_event_read_local(struct perf_event *event, u64 *value,
+ u64 *enabled, u64 *running)
{
return -EINVAL;
}
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index dd7a3b86bb9e..e755954d85fd 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -64,6 +64,7 @@ enum qed_ll2_roce_flavor_type {
enum qed_ll2_tx_dest {
QED_LL2_TX_DEST_NW, /* Light L2 TX Destination to the Network */
QED_LL2_TX_DEST_LB, /* Light L2 TX Destination to the Loopback */
+ QED_LL2_TX_DEST_DROP, /* Light L2 Drop the TX packet */
QED_LL2_TX_DEST_MAX
};
@@ -101,6 +102,7 @@ struct qed_ll2_comp_rx_data {
void *cookie;
dma_addr_t rx_buf_addr;
u16 parse_flags;
+ u16 err_flags;
u16 vlan;
bool b_last_packet;
u8 connection_handle;
@@ -149,11 +151,16 @@ void (*qed_ll2_release_tx_packet_cb)(void *cxt,
dma_addr_t first_frag_addr,
bool b_last_fragment, bool b_last_packet);
+typedef
+void (*qed_ll2_slowpath_cb)(void *cxt, u8 connection_handle,
+ u32 opaque_data_0, u32 opaque_data_1);
+
struct qed_ll2_cbs {
qed_ll2_complete_rx_packet_cb rx_comp_cb;
qed_ll2_release_rx_packet_cb rx_release_cb;
qed_ll2_complete_tx_packet_cb tx_comp_cb;
qed_ll2_release_tx_packet_cb tx_release_cb;
+ qed_ll2_slowpath_cb slowpath_cb;
void *cookie;
};
@@ -170,6 +177,7 @@ struct qed_ll2_acquire_data_inputs {
enum qed_ll2_tx_dest tx_dest;
enum qed_ll2_error_handle ai_err_packet_too_big;
enum qed_ll2_error_handle ai_err_no_buf;
+ bool secondary_queue;
u8 gsi_enable;
};
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index dea59c8eec54..1251638e60d3 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -17,9 +17,11 @@ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
u32 id, long expires, u32 error);
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags);
+void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
+ gfp_t flags, int *new_nsid);
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned change, u32 event,
- gfp_t flags);
+ gfp_t flags, int *new_nsid);
void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev,
gfp_t flags);
diff --git a/include/linux/security.h b/include/linux/security.h
index ce6265960d6c..18800b0911e5 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -31,6 +31,7 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fs.h>
+#include <linux/bpf.h>
struct linux_binprm;
struct cred;
@@ -1730,6 +1731,50 @@ static inline void securityfs_remove(struct dentry *dentry)
#endif
+#ifdef CONFIG_BPF_SYSCALL
+#ifdef CONFIG_SECURITY
+extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size);
+extern int security_bpf_map(struct bpf_map *map, fmode_t fmode);
+extern int security_bpf_prog(struct bpf_prog *prog);
+extern int security_bpf_map_alloc(struct bpf_map *map);
+extern void security_bpf_map_free(struct bpf_map *map);
+extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux);
+extern void security_bpf_prog_free(struct bpf_prog_aux *aux);
+#else
+static inline int security_bpf(int cmd, union bpf_attr *attr,
+ unsigned int size)
+{
+ return 0;
+}
+
+static inline int security_bpf_map(struct bpf_map *map, fmode_t fmode)
+{
+ return 0;
+}
+
+static inline int security_bpf_prog(struct bpf_prog *prog)
+{
+ return 0;
+}
+
+static inline int security_bpf_map_alloc(struct bpf_map *map)
+{
+ return 0;
+}
+
+static inline void security_bpf_map_free(struct bpf_map *map)
+{ }
+
+static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux)
+{
+ return 0;
+}
+
+static inline void security_bpf_prog_free(struct bpf_prog_aux *aux)
+{ }
+#endif /* CONFIG_SECURITY */
+#endif /* CONFIG_BPF_SYSCALL */
+
#ifdef CONFIG_SECURITY
static inline char *alloc_secdata(void)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 72299ef00061..03634ec2f918 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -489,8 +489,9 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
* the end of the header data, ie. at skb->end.
*/
struct skb_shared_info {
- unsigned short _unused;
- unsigned char nr_frags;
+ __u8 __unused;
+ __u8 meta_len;
+ __u8 nr_frags;
__u8 tx_flags;
unsigned short gso_size;
/* Warning: this field is not always filled in (UFO)! */
@@ -616,6 +617,7 @@ typedef unsigned char *sk_buff_data_t;
* @nf_trace: netfilter packet trace flag
* @protocol: Packet protocol from driver
* @destructor: Destruct function
+ * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
* @_nfct: Associated connection, if any (with nfctinfo bits)
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
* @skb_iif: ifindex of device we arrived on
@@ -661,8 +663,12 @@ struct sk_buff {
struct sk_buff *prev;
union {
- ktime_t tstamp;
- u64 skb_mstamp;
+ struct net_device *dev;
+ /* Some protocols might use this space to store information,
+ * while device pointer would be NULL.
+ * UDP receive path is one user.
+ */
+ unsigned long dev_scratch;
};
};
struct rb_node rbnode; /* used in netem & tcp stack */
@@ -670,12 +676,8 @@ struct sk_buff {
struct sock *sk;
union {
- struct net_device *dev;
- /* Some protocols might use this space to store information,
- * while device pointer would be NULL.
- * UDP receive path is one user.
- */
- unsigned long dev_scratch;
+ ktime_t tstamp;
+ u64 skb_mstamp;
};
/*
* This is the control buffer. It is free to use for every
@@ -685,8 +687,14 @@ struct sk_buff {
*/
char cb[48] __aligned(8);
- unsigned long _skb_refdst;
- void (*destructor)(struct sk_buff *skb);
+ union {
+ struct {
+ unsigned long _skb_refdst;
+ void (*destructor)(struct sk_buff *skb);
+ };
+ struct list_head tcp_tsorted_anchor;
+ };
+
#ifdef CONFIG_XFRM
struct sec_path *sp;
#endif
@@ -771,6 +779,7 @@ struct sk_buff {
__u8 remcsum_offload:1;
#ifdef CONFIG_NET_SWITCHDEV
__u8 offload_fwd_mark:1;
+ __u8 offload_mr_fwd_mark:1;
#endif
#ifdef CONFIG_NET_CLS_ACT
__u8 tc_skip_classify:1;
@@ -1457,27 +1466,8 @@ static inline int skb_header_unclone(struct sk_buff *skb, gfp_t pri)
}
/**
- * skb_header_release - release reference to header
- * @skb: buffer to operate on
- *
- * Drop a reference to the header part of the buffer. This is done
- * by acquiring a payload reference. You must not read from the header
- * part of skb->data after this.
- * Note : Check if you can use __skb_header_release() instead.
- */
-static inline void skb_header_release(struct sk_buff *skb)
-{
- BUG_ON(skb->nohdr);
- skb->nohdr = 1;
- atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref);
-}
-
-/**
* __skb_header_release - release reference to header
* @skb: buffer to operate on
- *
- * Variant of skb_header_release() assuming skb is private to caller.
- * We can avoid one atomic operation.
*/
static inline void __skb_header_release(struct sk_buff *skb)
{
@@ -3168,6 +3158,12 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
return __skb_grow(skb, len);
}
+#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode)
+#define skb_rb_first(root) rb_to_skb(rb_first(root))
+#define skb_rb_last(root) rb_to_skb(rb_last(root))
+#define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode))
+#define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode))
+
#define skb_queue_walk(queue, skb) \
for (skb = (queue)->next; \
skb != (struct sk_buff *)(queue); \
@@ -3182,6 +3178,18 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len)
for (; skb != (struct sk_buff *)(queue); \
skb = skb->next)
+#define skb_rbtree_walk(skb, root) \
+ for (skb = skb_rb_first(root); skb != NULL; \
+ skb = skb_rb_next(skb))
+
+#define skb_rbtree_walk_from(skb) \
+ for (; skb != NULL; \
+ skb = skb_rb_next(skb))
+
+#define skb_rbtree_walk_from_safe(skb, tmp) \
+ for (; tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL); \
+ skb = tmp)
+
#define skb_queue_walk_from_safe(queue, skb, tmp) \
for (tmp = skb->next; \
skb != (struct sk_buff *)(queue); \
@@ -3419,6 +3427,69 @@ static inline ktime_t net_invalid_timestamp(void)
return 0;
}
+static inline u8 skb_metadata_len(const struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->meta_len;
+}
+
+static inline void *skb_metadata_end(const struct sk_buff *skb)
+{
+ return skb_mac_header(skb);
+}
+
+static inline bool __skb_metadata_differs(const struct sk_buff *skb_a,
+ const struct sk_buff *skb_b,
+ u8 meta_len)
+{
+ const void *a = skb_metadata_end(skb_a);
+ const void *b = skb_metadata_end(skb_b);
+ /* Using more efficient varaiant than plain call to memcmp(). */
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
+ u64 diffs = 0;
+
+ switch (meta_len) {
+#define __it(x, op) (x -= sizeof(u##op))
+#define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
+ case 32: diffs |= __it_diff(a, b, 64);
+ case 24: diffs |= __it_diff(a, b, 64);
+ case 16: diffs |= __it_diff(a, b, 64);
+ case 8: diffs |= __it_diff(a, b, 64);
+ break;
+ case 28: diffs |= __it_diff(a, b, 64);
+ case 20: diffs |= __it_diff(a, b, 64);
+ case 12: diffs |= __it_diff(a, b, 64);
+ case 4: diffs |= __it_diff(a, b, 32);
+ break;
+ }
+ return diffs;
+#else
+ return memcmp(a - meta_len, b - meta_len, meta_len);
+#endif
+}
+
+static inline bool skb_metadata_differs(const struct sk_buff *skb_a,
+ const struct sk_buff *skb_b)
+{
+ u8 len_a = skb_metadata_len(skb_a);
+ u8 len_b = skb_metadata_len(skb_b);
+
+ if (!(len_a | len_b))
+ return false;
+
+ return len_a != len_b ?
+ true : __skb_metadata_differs(skb_a, skb_b, len_a);
+}
+
+static inline void skb_metadata_set(struct sk_buff *skb, u8 meta_len)
+{
+ skb_shinfo(skb)->meta_len = meta_len;
+}
+
+static inline void skb_metadata_clear(struct sk_buff *skb)
+{
+ skb_metadata_set(skb, 0);
+}
+
struct sk_buff *skb_clone_sk(struct sk_buff *skb);
#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4aa40ef02d32..1d2c44e09e31 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -191,6 +191,7 @@ struct tcp_sock {
u32 tsoffset; /* timestamp offset */
struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
+ struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */
u32 snd_wl1; /* Sequence for window update */
u32 snd_wnd; /* The window we expect to receive */
diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h
new file mode 100644
index 000000000000..7b69853188b1
--- /dev/null
+++ b/include/linux/thunderbolt.h
@@ -0,0 +1,598 @@
+/*
+ * Thunderbolt service API
+ *
+ * Copyright (C) 2014 Andreas Noever <andreas.noever@gmail.com>
+ * Copyright (C) 2017, Intel Corporation
+ * Authors: Michael Jamet <michael.jamet@intel.com>
+ * Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef THUNDERBOLT_H_
+#define THUNDERBOLT_H_
+
+#include <linux/device.h>
+#include <linux/idr.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/mod_devicetable.h>
+#include <linux/pci.h>
+#include <linux/uuid.h>
+#include <linux/workqueue.h>
+
+enum tb_cfg_pkg_type {
+ TB_CFG_PKG_READ = 1,
+ TB_CFG_PKG_WRITE = 2,
+ TB_CFG_PKG_ERROR = 3,
+ TB_CFG_PKG_NOTIFY_ACK = 4,
+ TB_CFG_PKG_EVENT = 5,
+ TB_CFG_PKG_XDOMAIN_REQ = 6,
+ TB_CFG_PKG_XDOMAIN_RESP = 7,
+ TB_CFG_PKG_OVERRIDE = 8,
+ TB_CFG_PKG_RESET = 9,
+ TB_CFG_PKG_ICM_EVENT = 10,
+ TB_CFG_PKG_ICM_CMD = 11,
+ TB_CFG_PKG_ICM_RESP = 12,
+ TB_CFG_PKG_PREPARE_TO_SLEEP = 13,
+};
+
+/**
+ * enum tb_security_level - Thunderbolt security level
+ * @TB_SECURITY_NONE: No security, legacy mode
+ * @TB_SECURITY_USER: User approval required at minimum
+ * @TB_SECURITY_SECURE: One time saved key required at minimum
+ * @TB_SECURITY_DPONLY: Only tunnel Display port (and USB)
+ */
+enum tb_security_level {
+ TB_SECURITY_NONE,
+ TB_SECURITY_USER,
+ TB_SECURITY_SECURE,
+ TB_SECURITY_DPONLY,
+};
+
+/**
+ * struct tb - main thunderbolt bus structure
+ * @dev: Domain device
+ * @lock: Big lock. Must be held when accessing any struct
+ * tb_switch / struct tb_port.
+ * @nhi: Pointer to the NHI structure
+ * @ctl: Control channel for this domain
+ * @wq: Ordered workqueue for all domain specific work
+ * @root_switch: Root switch of this domain
+ * @cm_ops: Connection manager specific operations vector
+ * @index: Linux assigned domain number
+ * @security_level: Current security level
+ * @privdata: Private connection manager specific data
+ */
+struct tb {
+ struct device dev;
+ struct mutex lock;
+ struct tb_nhi *nhi;
+ struct tb_ctl *ctl;
+ struct workqueue_struct *wq;
+ struct tb_switch *root_switch;
+ const struct tb_cm_ops *cm_ops;
+ int index;
+ enum tb_security_level security_level;
+ unsigned long privdata[0];
+};
+
+extern struct bus_type tb_bus_type;
+extern struct device_type tb_service_type;
+extern struct device_type tb_xdomain_type;
+
+#define TB_LINKS_PER_PHY_PORT 2
+
+static inline unsigned int tb_phy_port_from_link(unsigned int link)
+{
+ return (link - 1) / TB_LINKS_PER_PHY_PORT;
+}
+
+/**
+ * struct tb_property_dir - XDomain property directory
+ * @uuid: Directory UUID or %NULL if root directory
+ * @properties: List of properties in this directory
+ *
+ * User needs to provide serialization if needed.
+ */
+struct tb_property_dir {
+ const uuid_t *uuid;
+ struct list_head properties;
+};
+
+enum tb_property_type {
+ TB_PROPERTY_TYPE_UNKNOWN = 0x00,
+ TB_PROPERTY_TYPE_DIRECTORY = 0x44,
+ TB_PROPERTY_TYPE_DATA = 0x64,
+ TB_PROPERTY_TYPE_TEXT = 0x74,
+ TB_PROPERTY_TYPE_VALUE = 0x76,
+};
+
+#define TB_PROPERTY_KEY_SIZE 8
+
+/**
+ * struct tb_property - XDomain property
+ * @list: Used to link properties together in a directory
+ * @key: Key for the property (always terminated).
+ * @type: Type of the property
+ * @length: Length of the property data in dwords
+ * @value: Property value
+ *
+ * Users use @type to determine which field in @value is filled.
+ */
+struct tb_property {
+ struct list_head list;
+ char key[TB_PROPERTY_KEY_SIZE + 1];
+ enum tb_property_type type;
+ size_t length;
+ union {
+ struct tb_property_dir *dir;
+ u8 *data;
+ char *text;
+ u32 immediate;
+ } value;
+};
+
+struct tb_property_dir *tb_property_parse_dir(const u32 *block,
+ size_t block_len);
+ssize_t tb_property_format_dir(const struct tb_property_dir *dir, u32 *block,
+ size_t block_len);
+struct tb_property_dir *tb_property_create_dir(const uuid_t *uuid);
+void tb_property_free_dir(struct tb_property_dir *dir);
+int tb_property_add_immediate(struct tb_property_dir *parent, const char *key,
+ u32 value);
+int tb_property_add_data(struct tb_property_dir *parent, const char *key,
+ const void *buf, size_t buflen);
+int tb_property_add_text(struct tb_property_dir *parent, const char *key,
+ const char *text);
+int tb_property_add_dir(struct tb_property_dir *parent, const char *key,
+ struct tb_property_dir *dir);
+void tb_property_remove(struct tb_property *tb_property);
+struct tb_property *tb_property_find(struct tb_property_dir *dir,
+ const char *key, enum tb_property_type type);
+struct tb_property *tb_property_get_next(struct tb_property_dir *dir,
+ struct tb_property *prev);
+
+#define tb_property_for_each(dir, property) \
+ for (property = tb_property_get_next(dir, NULL); \
+ property; \
+ property = tb_property_get_next(dir, property))
+
+int tb_register_property_dir(const char *key, struct tb_property_dir *dir);
+void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir);
+
+/**
+ * struct tb_xdomain - Cross-domain (XDomain) connection
+ * @dev: XDomain device
+ * @tb: Pointer to the domain
+ * @remote_uuid: UUID of the remote domain (host)
+ * @local_uuid: Cached local UUID
+ * @route: Route string the other domain can be reached
+ * @vendor: Vendor ID of the remote domain
+ * @device: Device ID of the demote domain
+ * @lock: Lock to serialize access to the following fields of this structure
+ * @vendor_name: Name of the vendor (or %NULL if not known)
+ * @device_name: Name of the device (or %NULL if not known)
+ * @is_unplugged: The XDomain is unplugged
+ * @resume: The XDomain is being resumed
+ * @transmit_path: HopID which the remote end expects us to transmit
+ * @transmit_ring: Local ring (hop) where outgoing packets are pushed
+ * @receive_path: HopID which we expect the remote end to transmit
+ * @receive_ring: Local ring (hop) where incoming packets arrive
+ * @service_ids: Used to generate IDs for the services
+ * @properties: Properties exported by the remote domain
+ * @property_block_gen: Generation of @properties
+ * @properties_lock: Lock protecting @properties.
+ * @get_properties_work: Work used to get remote domain properties
+ * @properties_retries: Number of times left to read properties
+ * @properties_changed_work: Work used to notify the remote domain that
+ * our properties have changed
+ * @properties_changed_retries: Number of times left to send properties
+ * changed notification
+ * @link: Root switch link the remote domain is connected (ICM only)
+ * @depth: Depth in the chain the remote domain is connected (ICM only)
+ *
+ * This structure represents connection across two domains (hosts).
+ * Each XDomain contains zero or more services which are exposed as
+ * &struct tb_service objects.
+ *
+ * Service drivers may access this structure if they need to enumerate
+ * non-standard properties but they need hold @lock when doing so
+ * because properties can be changed asynchronously in response to
+ * changes in the remote domain.
+ */
+struct tb_xdomain {
+ struct device dev;
+ struct tb *tb;
+ uuid_t *remote_uuid;
+ const uuid_t *local_uuid;
+ u64 route;
+ u16 vendor;
+ u16 device;
+ struct mutex lock;
+ const char *vendor_name;
+ const char *device_name;
+ bool is_unplugged;
+ bool resume;
+ u16 transmit_path;
+ u16 transmit_ring;
+ u16 receive_path;
+ u16 receive_ring;
+ struct ida service_ids;
+ struct tb_property_dir *properties;
+ u32 property_block_gen;
+ struct delayed_work get_properties_work;
+ int properties_retries;
+ struct delayed_work properties_changed_work;
+ int properties_changed_retries;
+ u8 link;
+ u8 depth;
+};
+
+int tb_xdomain_enable_paths(struct tb_xdomain *xd, u16 transmit_path,
+ u16 transmit_ring, u16 receive_path,
+ u16 receive_ring);
+int tb_xdomain_disable_paths(struct tb_xdomain *xd);
+struct tb_xdomain *tb_xdomain_find_by_uuid(struct tb *tb, const uuid_t *uuid);
+
+static inline struct tb_xdomain *
+tb_xdomain_find_by_uuid_locked(struct tb *tb, const uuid_t *uuid)
+{
+ struct tb_xdomain *xd;
+
+ mutex_lock(&tb->lock);
+ xd = tb_xdomain_find_by_uuid(tb, uuid);
+ mutex_unlock(&tb->lock);
+
+ return xd;
+}
+
+static inline struct tb_xdomain *tb_xdomain_get(struct tb_xdomain *xd)
+{
+ if (xd)
+ get_device(&xd->dev);
+ return xd;
+}
+
+static inline void tb_xdomain_put(struct tb_xdomain *xd)
+{
+ if (xd)
+ put_device(&xd->dev);
+}
+
+static inline bool tb_is_xdomain(const struct device *dev)
+{
+ return dev->type == &tb_xdomain_type;
+}
+
+static inline struct tb_xdomain *tb_to_xdomain(struct device *dev)
+{
+ if (tb_is_xdomain(dev))
+ return container_of(dev, struct tb_xdomain, dev);
+ return NULL;
+}
+
+int tb_xdomain_response(struct tb_xdomain *xd, const void *response,
+ size_t size, enum tb_cfg_pkg_type type);
+int tb_xdomain_request(struct tb_xdomain *xd, const void *request,
+ size_t request_size, enum tb_cfg_pkg_type request_type,
+ void *response, size_t response_size,
+ enum tb_cfg_pkg_type response_type,
+ unsigned int timeout_msec);
+
+/**
+ * tb_protocol_handler - Protocol specific handler
+ * @uuid: XDomain messages with this UUID are dispatched to this handler
+ * @callback: Callback called with the XDomain message. Returning %1
+ * here tells the XDomain core that the message was handled
+ * by this handler and should not be forwared to other
+ * handlers.
+ * @data: Data passed with the callback
+ * @list: Handlers are linked using this
+ *
+ * Thunderbolt services can hook into incoming XDomain requests by
+ * registering protocol handler. Only limitation is that the XDomain
+ * discovery protocol UUID cannot be registered since it is handled by
+ * the core XDomain code.
+ *
+ * The @callback must check that the message is really directed to the
+ * service the driver implements.
+ */
+struct tb_protocol_handler {
+ const uuid_t *uuid;
+ int (*callback)(const void *buf, size_t size, void *data);
+ void *data;
+ struct list_head list;
+};
+
+int tb_register_protocol_handler(struct tb_protocol_handler *handler);
+void tb_unregister_protocol_handler(struct tb_protocol_handler *handler);
+
+/**
+ * struct tb_service - Thunderbolt service
+ * @dev: XDomain device
+ * @id: ID of the service (shown in sysfs)
+ * @key: Protocol key from the properties directory
+ * @prtcid: Protocol ID from the properties directory
+ * @prtcvers: Protocol version from the properties directory
+ * @prtcrevs: Protocol software revision from the properties directory
+ * @prtcstns: Protocol settings mask from the properties directory
+ *
+ * Each domain exposes set of services it supports as collection of
+ * properties. For each service there will be one corresponding
+ * &struct tb_service. Service drivers are bound to these.
+ */
+struct tb_service {
+ struct device dev;
+ int id;
+ const char *key;
+ u32 prtcid;
+ u32 prtcvers;
+ u32 prtcrevs;
+ u32 prtcstns;
+};
+
+static inline struct tb_service *tb_service_get(struct tb_service *svc)
+{
+ if (svc)
+ get_device(&svc->dev);
+ return svc;
+}
+
+static inline void tb_service_put(struct tb_service *svc)
+{
+ if (svc)
+ put_device(&svc->dev);
+}
+
+static inline bool tb_is_service(const struct device *dev)
+{
+ return dev->type == &tb_service_type;
+}
+
+static inline struct tb_service *tb_to_service(struct device *dev)
+{
+ if (tb_is_service(dev))
+ return container_of(dev, struct tb_service, dev);
+ return NULL;
+}
+
+/**
+ * tb_service_driver - Thunderbolt service driver
+ * @driver: Driver structure
+ * @probe: Called when the driver is probed
+ * @remove: Called when the driver is removed (optional)
+ * @shutdown: Called at shutdown time to stop the service (optional)
+ * @id_table: Table of service identifiers the driver supports
+ */
+struct tb_service_driver {
+ struct device_driver driver;
+ int (*probe)(struct tb_service *svc, const struct tb_service_id *id);
+ void (*remove)(struct tb_service *svc);
+ void (*shutdown)(struct tb_service *svc);
+ const struct tb_service_id *id_table;
+};
+
+#define TB_SERVICE(key, id) \
+ .match_flags = TBSVC_MATCH_PROTOCOL_KEY | \
+ TBSVC_MATCH_PROTOCOL_ID, \
+ .protocol_key = (key), \
+ .protocol_id = (id)
+
+int tb_register_service_driver(struct tb_service_driver *drv);
+void tb_unregister_service_driver(struct tb_service_driver *drv);
+
+static inline void *tb_service_get_drvdata(const struct tb_service *svc)
+{
+ return dev_get_drvdata(&svc->dev);
+}
+
+static inline void tb_service_set_drvdata(struct tb_service *svc, void *data)
+{
+ dev_set_drvdata(&svc->dev, data);
+}
+
+static inline struct tb_xdomain *tb_service_parent(struct tb_service *svc)
+{
+ return tb_to_xdomain(svc->dev.parent);
+}
+
+/**
+ * struct tb_nhi - thunderbolt native host interface
+ * @lock: Must be held during ring creation/destruction. Is acquired by
+ * interrupt_work when dispatching interrupts to individual rings.
+ * @pdev: Pointer to the PCI device
+ * @iobase: MMIO space of the NHI
+ * @tx_rings: All Tx rings available on this host controller
+ * @rx_rings: All Rx rings available on this host controller
+ * @msix_ida: Used to allocate MSI-X vectors for rings
+ * @going_away: The host controller device is about to disappear so when
+ * this flag is set, avoid touching the hardware anymore.
+ * @interrupt_work: Work scheduled to handle ring interrupt when no
+ * MSI-X is used.
+ * @hop_count: Number of rings (end point hops) supported by NHI.
+ */
+struct tb_nhi {
+ spinlock_t lock;
+ struct pci_dev *pdev;
+ void __iomem *iobase;
+ struct tb_ring **tx_rings;
+ struct tb_ring **rx_rings;
+ struct ida msix_ida;
+ bool going_away;
+ struct work_struct interrupt_work;
+ u32 hop_count;
+};
+
+/**
+ * struct tb_ring - thunderbolt TX or RX ring associated with a NHI
+ * @lock: Lock serializing actions to this ring. Must be acquired after
+ * nhi->lock.
+ * @nhi: Pointer to the native host controller interface
+ * @size: Size of the ring
+ * @hop: Hop (DMA channel) associated with this ring
+ * @head: Head of the ring (write next descriptor here)
+ * @tail: Tail of the ring (complete next descriptor here)
+ * @descriptors: Allocated descriptors for this ring
+ * @queue: Queue holding frames to be transferred over this ring
+ * @in_flight: Queue holding frames that are currently in flight
+ * @work: Interrupt work structure
+ * @is_tx: Is the ring Tx or Rx
+ * @running: Is the ring running
+ * @irq: MSI-X irq number if the ring uses MSI-X. %0 otherwise.
+ * @vector: MSI-X vector number the ring uses (only set if @irq is > 0)
+ * @flags: Ring specific flags
+ * @sof_mask: Bit mask used to detect start of frame PDF
+ * @eof_mask: Bit mask used to detect end of frame PDF
+ * @start_poll: Called when ring interrupt is triggered to start
+ * polling. Passing %NULL keeps the ring in interrupt mode.
+ * @poll_data: Data passed to @start_poll
+ */
+struct tb_ring {
+ spinlock_t lock;
+ struct tb_nhi *nhi;
+ int size;
+ int hop;
+ int head;
+ int tail;
+ struct ring_desc *descriptors;
+ dma_addr_t descriptors_dma;
+ struct list_head queue;
+ struct list_head in_flight;
+ struct work_struct work;
+ bool is_tx:1;
+ bool running:1;
+ int irq;
+ u8 vector;
+ unsigned int flags;
+ u16 sof_mask;
+ u16 eof_mask;
+ void (*start_poll)(void *data);
+ void *poll_data;
+};
+
+/* Leave ring interrupt enabled on suspend */
+#define RING_FLAG_NO_SUSPEND BIT(0)
+/* Configure the ring to be in frame mode */
+#define RING_FLAG_FRAME BIT(1)
+/* Enable end-to-end flow control */
+#define RING_FLAG_E2E BIT(2)
+
+struct ring_frame;
+typedef void (*ring_cb)(struct tb_ring *, struct ring_frame *, bool canceled);
+
+/**
+ * enum ring_desc_flags - Flags for DMA ring descriptor
+ * %RING_DESC_ISOCH: Enable isonchronous DMA (Tx only)
+ * %RING_DESC_CRC_ERROR: In frame mode CRC check failed for the frame (Rx only)
+ * %RING_DESC_COMPLETED: Descriptor completed (set by NHI)
+ * %RING_DESC_POSTED: Always set this
+ * %RING_DESC_BUFFER_OVERRUN: RX buffer overrun
+ * %RING_DESC_INTERRUPT: Request an interrupt on completion
+ */
+enum ring_desc_flags {
+ RING_DESC_ISOCH = 0x1,
+ RING_DESC_CRC_ERROR = 0x1,
+ RING_DESC_COMPLETED = 0x2,
+ RING_DESC_POSTED = 0x4,
+ RING_DESC_BUFFER_OVERRUN = 0x04,
+ RING_DESC_INTERRUPT = 0x8,
+};
+
+/**
+ * struct ring_frame - For use with ring_rx/ring_tx
+ * @buffer_phy: DMA mapped address of the frame
+ * @callback: Callback called when the frame is finished (optional)
+ * @list: Frame is linked to a queue using this
+ * @size: Size of the frame in bytes (%0 means %4096)
+ * @flags: Flags for the frame (see &enum ring_desc_flags)
+ * @eof: End of frame protocol defined field
+ * @sof: Start of frame protocol defined field
+ */
+struct ring_frame {
+ dma_addr_t buffer_phy;
+ ring_cb callback;
+ struct list_head list;
+ u32 size:12;
+ u32 flags:12;
+ u32 eof:4;
+ u32 sof:4;
+};
+
+/* Minimum size for ring_rx */
+#define TB_FRAME_SIZE 0x100
+
+struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size,
+ unsigned int flags);
+struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size,
+ unsigned int flags, u16 sof_mask, u16 eof_mask,
+ void (*start_poll)(void *), void *poll_data);
+void tb_ring_start(struct tb_ring *ring);
+void tb_ring_stop(struct tb_ring *ring);
+void tb_ring_free(struct tb_ring *ring);
+
+int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame);
+
+/**
+ * tb_ring_rx() - enqueue a frame on an RX ring
+ * @ring: Ring to enqueue the frame
+ * @frame: Frame to enqueue
+ *
+ * @frame->buffer, @frame->buffer_phy have to be set. The buffer must
+ * contain at least %TB_FRAME_SIZE bytes.
+ *
+ * @frame->callback will be invoked with @frame->size, @frame->flags,
+ * @frame->eof, @frame->sof set once the frame has been received.
+ *
+ * If ring_stop() is called after the packet has been enqueued
+ * @frame->callback will be called with canceled set to true.
+ *
+ * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise.
+ */
+static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame)
+{
+ WARN_ON(ring->is_tx);
+ return __tb_ring_enqueue(ring, frame);
+}
+
+/**
+ * tb_ring_tx() - enqueue a frame on an TX ring
+ * @ring: Ring the enqueue the frame
+ * @frame: Frame to enqueue
+ *
+ * @frame->buffer, @frame->buffer_phy, @frame->size, @frame->eof and
+ * @frame->sof have to be set.
+ *
+ * @frame->callback will be invoked with once the frame has been transmitted.
+ *
+ * If ring_stop() is called after the packet has been enqueued @frame->callback
+ * will be called with canceled set to true.
+ *
+ * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise.
+ */
+static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame)
+{
+ WARN_ON(!ring->is_tx);
+ return __tb_ring_enqueue(ring, frame);
+}
+
+/* Used only when the ring is in polling mode */
+struct ring_frame *tb_ring_poll(struct tb_ring *ring);
+void tb_ring_poll_complete(struct tb_ring *ring);
+
+/**
+ * tb_ring_dma_device() - Return device used for DMA mapping
+ * @ring: Ring whose DMA device is retrieved
+ *
+ * Use this function when you are mapping DMA for buffers that are
+ * passed to the ring for sending/receiving.
+ */
+static inline struct device *tb_ring_dma_device(struct tb_ring *ring)
+{
+ return &ring->nhi->pdev->dev;
+}
+
+#endif /* THUNDERBOLT_H_ */
diff --git a/include/net/act_api.h b/include/net/act_api.h
index b944e0eb93be..f5e8c9048fb0 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -93,8 +93,7 @@ struct tc_action_ops {
int (*walk)(struct net *, struct sk_buff *,
struct netlink_callback *, int, const struct tc_action_ops *);
void (*stats_update)(struct tc_action *, u64, u32, u64);
- int (*get_dev)(const struct tc_action *a, struct net *net,
- struct net_device **mirred_dev);
+ struct net_device *(*get_dev)(const struct tc_action *a);
};
struct tc_action_net {
@@ -175,4 +174,38 @@ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
#endif
}
+typedef int tc_setup_cb_t(enum tc_setup_type type,
+ void *type_data, void *cb_priv);
+
+#ifdef CONFIG_NET_CLS_ACT
+int tc_setup_cb_egdev_register(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv);
+void tc_setup_cb_egdev_unregister(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv);
+int tc_setup_cb_egdev_call(const struct net_device *dev,
+ enum tc_setup_type type, void *type_data,
+ bool err_stop);
+#else
+static inline
+int tc_setup_cb_egdev_register(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ return 0;
+}
+
+static inline
+void tc_setup_cb_egdev_unregister(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+}
+
+static inline
+int tc_setup_cb_egdev_call(const struct net_device *dev,
+ enum tc_setup_type type, void *type_data,
+ bool err_stop)
+{
+ return 0;
+}
+#endif
+
#endif
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index f44ff2476758..b8b16437c6d5 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -55,6 +55,7 @@ struct prefix_info {
struct in6_validator_info {
struct in6_addr i6vi_addr;
struct inet6_dev *i6vi_dev;
+ struct netlink_ext_ack *extack;
};
#define IN6_ADDR_HSIZE_SHIFT 4
@@ -94,8 +95,8 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
u32 banned_flags);
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
u32 banned_flags);
-int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard);
+bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard);
void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr);
void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 3ac79150291f..2b3a6eec4570 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -49,17 +49,19 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
unsigned long,
s64,
gfp_t,
- rxrpc_notify_rx_t);
+ rxrpc_notify_rx_t,
+ bool);
int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
struct msghdr *, size_t,
rxrpc_notify_end_tx_t);
int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
- void *, size_t, size_t *, bool, u32 *);
+ void *, size_t, size_t *, bool, u32 *, u16 *);
bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
u32, int, const char *);
void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
struct sockaddr_rxrpc *);
+u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *);
int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t);
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
@@ -67,5 +69,6 @@ int rxrpc_kernel_retry_call(struct socket *, struct rxrpc_call *,
struct sockaddr_rxrpc *, struct key *);
int rxrpc_kernel_check_call(struct socket *, struct rxrpc_call *,
enum rxrpc_call_completion *, u32 *);
+u32 rxrpc_kernel_check_life(struct socket *, struct rxrpc_call *);
#endif /* _NET_RXRPC_H */
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index f9fb566e75cf..9324ac2d9ff2 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -22,11 +22,13 @@
#include "vsock_addr.h"
-/* vsock-specific sock->sk_state constants */
-#define VSOCK_SS_LISTEN 255
-
#define LAST_RESERVED_PORT 1023
+#define VSOCK_HASH_SIZE 251
+extern struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
+extern struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
+extern spinlock_t vsock_table_lock;
+
#define vsock_sk(__sk) ((struct vsock_sock *)__sk)
#define sk_vsock(__vsk) (&(__vsk)->sk)
@@ -175,6 +177,18 @@ const struct vsock_transport *vsock_core_get_transport(void);
/**** UTILS ****/
+/* vsock_table_lock must be held */
+static inline bool __vsock_in_bound_table(struct vsock_sock *vsk)
+{
+ return !list_empty(&vsk->bound_table);
+}
+
+/* vsock_table_lock must be held */
+static inline bool __vsock_in_connected_table(struct vsock_sock *vsk)
+{
+ return !list_empty(&vsk->connected_table);
+}
+
void vsock_release_pending(struct sock *pending);
void vsock_add_pending(struct sock *listener, struct sock *pending);
void vsock_remove_pending(struct sock *listener, struct sock *pending);
diff --git a/include/net/bonding.h b/include/net/bonding.h
index b2e68657a216..2860cc66c2bb 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -596,7 +596,8 @@ void bond_destroy_sysfs(struct bond_net *net);
void bond_prepare_sysfs_group(struct bonding *bond);
int bond_sysfs_slave_add(struct slave *slave);
void bond_sysfs_slave_del(struct slave *slave);
-int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev);
+int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
+ struct netlink_ext_ack *extack);
int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb);
int bond_set_carrier(struct bonding *bond);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f12fa5245a45..8b8118a7fadb 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4347,19 +4347,6 @@ static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
}
/**
- * ieee80211_data_from_8023 - convert an 802.3 frame to 802.11
- * @skb: the 802.3 frame
- * @addr: the device MAC address
- * @iftype: the virtual interface type
- * @bssid: the network bssid (used only for iftype STATION and ADHOC)
- * @qos: build 802.11 QoS data frame
- * Return: 0 on success, or a negative error code.
- */
-int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
- enum nl80211_iftype iftype, const u8 *bssid,
- bool qos);
-
-/**
* ieee80211_amsdu_to_8023s - decode an IEEE 802.11n A-MSDU frame
*
* Decode an IEEE 802.11 A-MSDU and convert it to a list of 802.3 frames.
@@ -5441,9 +5428,6 @@ cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid,
* @req_ie_len: association request IEs length
* @resp_ie: association response IEs (may be %NULL)
* @resp_ie_len: assoc response IEs length
- * @authorized: true if the 802.1X authentication was done by the driver or is
- * not needed (e.g., when Fast Transition protocol was used), false
- * otherwise. Ignored for networks that don't use 802.1X authentication.
*/
struct cfg80211_roam_info {
struct ieee80211_channel *channel;
@@ -5453,7 +5437,6 @@ struct cfg80211_roam_info {
size_t req_ie_len;
const u8 *resp_ie;
size_t resp_ie_len;
- bool authorized;
};
/**
@@ -5478,6 +5461,23 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
gfp_t gfp);
/**
+ * cfg80211_port_authorized - notify cfg80211 of successful security association
+ *
+ * @dev: network device
+ * @bssid: the BSSID of the AP
+ * @gfp: allocation flags
+ *
+ * This function should be called by a driver that supports 4 way handshake
+ * offload after a security association was successfully established (i.e.,
+ * the 4 way handshake was completed successfully). The call to this function
+ * should be preceded with a call to cfg80211_connect_result(),
+ * cfg80211_connect_done(), cfg80211_connect_bss() or cfg80211_roamed() to
+ * indicate the 802.11 association.
+ */
+void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
+ gfp_t gfp);
+
+/**
* cfg80211_disconnected - notify cfg80211 that connection was dropped
*
* @dev: network device
@@ -5934,7 +5934,8 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
* @ies: the IE buffer
* @ielen: the length of the IE buffer
* @ids: an array with element IDs that are allowed before
- * the split
+ * the split. A WLAN_EID_EXTENSION value means that the next
+ * EID in the list is a sub-element of the EXTENSION IE.
* @n_ids: the size of the element ID array
* @after_ric: array IE types that come after the RIC element
* @n_after_ric: size of the @after_ric array
@@ -5965,7 +5966,8 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
* @ies: the IE buffer
* @ielen: the length of the IE buffer
* @ids: an array with element IDs that are allowed before
- * the split
+ * the split. A WLAN_EID_EXTENSION value means that the next
+ * EID in the list is a sub-element of the EXTENSION IE.
* @n_ids: the size of the element ID array
* @offset: offset where to start splitting in the buffer
*
diff --git a/include/net/dn.h b/include/net/dn.h
index 913b73d239f5..4394f7d5cfe8 100644
--- a/include/net/dn.h
+++ b/include/net/dn.h
@@ -122,13 +122,6 @@ struct dn_scp /* Session Control Port */
unsigned long keepalive;
void (*keepalive_fxn)(struct sock *sk);
- /*
- * This stuff is for the fast timer for delayed acks
- */
- struct timer_list delack_timer;
- int delack_pending;
- void (*delack_fxn)(struct sock *sk);
-
};
static inline struct dn_scp *DN_SK(struct sock *sk)
diff --git a/include/net/dn_nsp.h b/include/net/dn_nsp.h
index 3a3e33d18456..413a15e5339c 100644
--- a/include/net/dn_nsp.h
+++ b/include/net/dn_nsp.h
@@ -17,7 +17,6 @@
void dn_nsp_send_data_ack(struct sock *sk);
void dn_nsp_send_oth_ack(struct sock *sk);
-void dn_nsp_delayed_ack(struct sock *sk);
void dn_send_conn_ack(struct sock *sk);
void dn_send_conn_conf(struct sock *sk, gfp_t gfp);
void dn_nsp_send_disc(struct sock *sk, unsigned char type,
diff --git a/include/net/dsa.h b/include/net/dsa.h
index dd44d6ce1097..38961ef91d3d 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -130,11 +130,6 @@ struct dsa_switch_tree {
*/
struct dsa_platform_data *pd;
- /* Copy of tag_ops->rcv for faster access in hot path */
- struct sk_buff * (*rcv)(struct sk_buff *skb,
- struct net_device *dev,
- struct packet_type *pt);
-
/*
* The switch port to which the CPU is attached.
*/
@@ -144,12 +139,6 @@ struct dsa_switch_tree {
* Data for the individual switch chips.
*/
struct dsa_switch *ds[DSA_MAX_SWITCHES];
-
- /*
- * Tagging protocol operations for adding and removing an
- * encapsulation tag.
- */
- const struct dsa_device_ops *tag_ops;
};
/* TC matchall action types, only mirroring for now */
@@ -175,11 +164,26 @@ struct dsa_mall_tc_entry {
struct dsa_port {
+ /* A CPU port is physically connected to a master device.
+ * A user port exposed to userspace has a slave device.
+ */
+ union {
+ struct net_device *master;
+ struct net_device *slave;
+ };
+
+ /* CPU port tagging operations used by master or slave devices */
+ const struct dsa_device_ops *tag_ops;
+
+ /* Copies for faster access in master receive hot path */
+ struct dsa_switch_tree *dst;
+ struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt);
+
struct dsa_switch *ds;
unsigned int index;
const char *name;
struct dsa_port *cpu_dp;
- struct net_device *netdev;
struct device_node *dn;
unsigned int ageing_time;
u8 stp_state;
@@ -188,7 +192,6 @@ struct dsa_port {
/*
* Original copy of the master netdev ethtool_ops
*/
- struct ethtool_ops ethtool_ops;
const struct ethtool_ops *orig_ethtool_ops;
};
@@ -266,6 +269,11 @@ static inline bool dsa_is_normal_port(struct dsa_switch *ds, int p)
return !dsa_is_cpu_port(ds, p) && !dsa_is_dsa_port(ds, p);
}
+static inline const struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p)
+{
+ return &ds->ports[p];
+}
+
static inline u8 dsa_upstream_port(struct dsa_switch *ds)
{
struct dsa_switch_tree *dst = ds->dst;
@@ -295,7 +303,6 @@ struct dsa_switch_ops {
enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds);
int (*setup)(struct dsa_switch *ds);
- int (*set_addr)(struct dsa_switch *ds, u8 *addr);
u32 (*get_phy_flags)(struct dsa_switch *ds, int port);
/*
@@ -475,4 +482,54 @@ static inline int dsa_switch_resume(struct dsa_switch *ds)
}
#endif /* CONFIG_PM_SLEEP */
+enum dsa_notifier_type {
+ DSA_PORT_REGISTER,
+ DSA_PORT_UNREGISTER,
+};
+
+struct dsa_notifier_info {
+ struct net_device *dev;
+};
+
+struct dsa_notifier_register_info {
+ struct dsa_notifier_info info; /* must be first */
+ struct net_device *master;
+ unsigned int port_number;
+ unsigned int switch_number;
+};
+
+static inline struct net_device *
+dsa_notifier_info_to_dev(const struct dsa_notifier_info *info)
+{
+ return info->dev;
+}
+
+#if IS_ENABLED(CONFIG_NET_DSA)
+int register_dsa_notifier(struct notifier_block *nb);
+int unregister_dsa_notifier(struct notifier_block *nb);
+int call_dsa_notifiers(unsigned long val, struct net_device *dev,
+ struct dsa_notifier_info *info);
+#else
+static inline int register_dsa_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline int unregister_dsa_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+
+static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
+ struct dsa_notifier_info *info)
+{
+ return NOTIFY_DONE;
+}
+#endif
+
+/* Broadcom tag specific helpers to insert and extract queue/port number */
+#define BRCM_TAG_SET_PORT_QUEUE(p, q) ((p) << 8 | q)
+#define BRCM_TAG_GET_PORT(v) ((v) >> 8)
+#define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
+
#endif
diff --git a/include/net/dst.h b/include/net/dst.h
index 06a6765da074..5047e8053d6c 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -101,7 +101,7 @@ struct dst_entry {
union {
struct dst_entry *next;
struct rtable __rcu *rt_next;
- struct rt6_info *rt6_next;
+ struct rt6_info __rcu *rt6_next;
struct dn_route __rcu *dn_next;
};
};
@@ -255,17 +255,18 @@ static inline void dst_hold(struct dst_entry *dst)
WARN_ON(atomic_inc_not_zero(&dst->__refcnt) == 0);
}
-static inline void dst_use(struct dst_entry *dst, unsigned long time)
+static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
{
- dst_hold(dst);
- dst->__use++;
- dst->lastuse = time;
+ if (time != dst->lastuse) {
+ dst->__use++;
+ dst->lastuse = time;
+ }
}
-static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
+static inline void dst_hold_and_use(struct dst_entry *dst, unsigned long time)
{
- dst->__use++;
- dst->lastuse = time;
+ dst_hold(dst);
+ dst_use_noref(dst, time);
}
static inline struct dst_entry *dst_clone(struct dst_entry *dst)
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index a803129a4849..87a0bb8d449f 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -24,7 +24,7 @@ struct metadata_dst {
} u;
};
-static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
+static inline struct metadata_dst *skb_metadata_dst(const struct sk_buff *skb)
{
struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb);
@@ -34,7 +34,8 @@ static inline struct metadata_dst *skb_metadata_dst(struct sk_buff *skb)
return NULL;
}
-static inline struct ip_tunnel_info *skb_tunnel_info(struct sk_buff *skb)
+static inline struct ip_tunnel_info *
+skb_tunnel_info(const struct sk_buff *skb)
{
struct metadata_dst *md_dst = skb_metadata_dst(skb);
struct dst_entry *dst;
@@ -86,6 +87,7 @@ static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a,
void metadata_dst_free(struct metadata_dst *);
struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type,
gfp_t flags);
+void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst);
struct metadata_dst __percpu *
metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags);
diff --git a/include/net/fib_notifier.h b/include/net/fib_notifier.h
index 669b9716dc7a..54cd6b839d2f 100644
--- a/include/net/fib_notifier.h
+++ b/include/net/fib_notifier.h
@@ -20,6 +20,8 @@ enum fib_event_type {
FIB_EVENT_RULE_DEL,
FIB_EVENT_NH_ADD,
FIB_EVENT_NH_DEL,
+ FIB_EVENT_VIF_ADD,
+ FIB_EVENT_VIF_DEL,
};
struct fib_notifier_ops {
diff --git a/include/net/fq.h b/include/net/fq.h
index 6d8521a30c5c..ac944a686840 100644
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -90,6 +90,13 @@ typedef void fq_skb_free_t(struct fq *,
struct fq_flow *,
struct sk_buff *);
+/* Return %true to filter (drop) the frame. */
+typedef bool fq_skb_filter_t(struct fq *,
+ struct fq_tin *,
+ struct fq_flow *,
+ struct sk_buff *,
+ void *);
+
typedef struct fq_flow *fq_flow_get_default_t(struct fq *,
struct fq_tin *,
int idx,
diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
index 4e6131cd3f43..8b237e4afee6 100644
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -12,24 +12,22 @@
/* functions that are embedded into includer */
-static struct sk_buff *fq_flow_dequeue(struct fq *fq,
- struct fq_flow *flow)
+static void fq_adjust_removal(struct fq *fq,
+ struct fq_flow *flow,
+ struct sk_buff *skb)
{
struct fq_tin *tin = flow->tin;
- struct fq_flow *i;
- struct sk_buff *skb;
-
- lockdep_assert_held(&fq->lock);
-
- skb = __skb_dequeue(&flow->queue);
- if (!skb)
- return NULL;
tin->backlog_bytes -= skb->len;
tin->backlog_packets--;
flow->backlog -= skb->len;
fq->backlog--;
fq->memory_usage -= skb->truesize;
+}
+
+static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow)
+{
+ struct fq_flow *i;
if (flow->backlog == 0) {
list_del_init(&flow->backlogchain);
@@ -43,6 +41,21 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq,
list_move_tail(&flow->backlogchain,
&i->backlogchain);
}
+}
+
+static struct sk_buff *fq_flow_dequeue(struct fq *fq,
+ struct fq_flow *flow)
+{
+ struct sk_buff *skb;
+
+ lockdep_assert_held(&fq->lock);
+
+ skb = __skb_dequeue(&flow->queue);
+ if (!skb)
+ return NULL;
+
+ fq_adjust_removal(fq, flow, skb);
+ fq_rejigger_backlog(fq, flow);
return skb;
}
@@ -188,6 +201,45 @@ static void fq_tin_enqueue(struct fq *fq,
}
}
+static void fq_flow_filter(struct fq *fq,
+ struct fq_flow *flow,
+ fq_skb_filter_t filter_func,
+ void *filter_data,
+ fq_skb_free_t free_func)
+{
+ struct fq_tin *tin = flow->tin;
+ struct sk_buff *skb, *tmp;
+
+ lockdep_assert_held(&fq->lock);
+
+ skb_queue_walk_safe(&flow->queue, skb, tmp) {
+ if (!filter_func(fq, tin, flow, skb, filter_data))
+ continue;
+
+ __skb_unlink(skb, &flow->queue);
+ fq_adjust_removal(fq, flow, skb);
+ free_func(fq, tin, flow, skb);
+ }
+
+ fq_rejigger_backlog(fq, flow);
+}
+
+static void fq_tin_filter(struct fq *fq,
+ struct fq_tin *tin,
+ fq_skb_filter_t filter_func,
+ void *filter_data,
+ fq_skb_free_t free_func)
+{
+ struct fq_flow *flow;
+
+ lockdep_assert_held(&fq->lock);
+
+ list_for_each_entry(flow, &tin->new_flows, flowchain)
+ fq_flow_filter(fq, flow, filter_func, filter_data, free_func);
+ list_for_each_entry(flow, &tin->old_flows, flowchain)
+ fq_flow_filter(fq, flow, filter_func, filter_data, free_func);
+}
+
static void fq_flow_reset(struct fq *fq,
struct fq_flow *flow,
fq_skb_free_t free_func)
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 13e4c89a8231..0358745ea059 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -169,9 +169,9 @@ enum inet_csk_ack_state_t {
};
void inet_csk_init_xmit_timers(struct sock *sk,
- void (*retransmit_handler)(unsigned long),
- void (*delack_handler)(unsigned long),
- void (*keepalive_handler)(unsigned long));
+ void (*retransmit_handler)(struct timer_list *),
+ void (*delack_handler)(struct timer_list *),
+ void (*keepalive_handler)(struct timer_list *));
void inet_csk_clear_xmit_timers(struct sock *sk);
static inline void inet_csk_schedule_ack(struct sock *sk)
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index dce2d586d9ce..f5ff16d72fe6 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -133,11 +133,6 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
return 1;
}
-static inline void IP6_ECN_clear(struct ipv6hdr *iph)
-{
- *(__be32*)iph &= ~htonl(INET_ECN_MASK << 20);
-}
-
static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
{
dscp &= ~INET_ECN_MASK;
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index fc59e0775e00..c695807ca707 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -95,7 +95,7 @@ struct inet_frags {
void (*constructor)(struct inet_frag_queue *q,
const void *arg);
void (*destructor)(struct inet_frag_queue *);
- void (*frag_expire)(unsigned long data);
+ void (*frag_expire)(struct timer_list *t);
struct kmem_cache *frags_cachep;
const char *frags_cache_name;
};
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index d060d711a624..10c913816032 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -29,6 +29,14 @@
#define FIB6_TABLE_HASHSZ 1
#endif
+#define RT6_DEBUG 2
+
+#if RT6_DEBUG >= 3
+#define RT6_TRACE(x...) pr_debug(x)
+#else
+#define RT6_TRACE(x...) do { ; } while (0)
+#endif
+
struct rt6_info;
struct fib6_config {
@@ -60,25 +68,30 @@ struct fib6_config {
};
struct fib6_node {
- struct fib6_node *parent;
- struct fib6_node *left;
- struct fib6_node *right;
+ struct fib6_node __rcu *parent;
+ struct fib6_node __rcu *left;
+ struct fib6_node __rcu *right;
#ifdef CONFIG_IPV6_SUBTREES
- struct fib6_node *subtree;
+ struct fib6_node __rcu *subtree;
#endif
- struct rt6_info *leaf;
+ struct rt6_info __rcu *leaf;
__u16 fn_bit; /* bit key */
__u16 fn_flags;
int fn_sernum;
- struct rt6_info *rr_ptr;
+ struct rt6_info __rcu *rr_ptr;
struct rcu_head rcu;
};
+struct fib6_gc_args {
+ int timeout;
+ int more;
+};
+
#ifndef CONFIG_IPV6_SUBTREES
#define FIB6_SUBTREE(fn) NULL
#else
-#define FIB6_SUBTREE(fn) ((fn)->subtree)
+#define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1))
#endif
struct mx6_config {
@@ -98,6 +111,22 @@ struct rt6key {
struct fib6_table;
+struct rt6_exception_bucket {
+ struct hlist_head chain;
+ int depth;
+};
+
+struct rt6_exception {
+ struct hlist_node hlist;
+ struct rt6_info *rt6i;
+ unsigned long stamp;
+ struct rcu_head rcu;
+};
+
+#define FIB6_EXCEPTION_BUCKET_SIZE_SHIFT 10
+#define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT)
+#define FIB6_MAX_DEPTH 5
+
struct rt6_info {
struct dst_entry dst;
@@ -134,14 +163,25 @@ struct rt6_info {
struct inet6_dev *rt6i_idev;
struct rt6_info * __percpu *rt6i_pcpu;
+ struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
u32 rt6i_metric;
u32 rt6i_pmtu;
/* more non-fragment space at head required */
unsigned short rt6i_nfheader_len;
u8 rt6i_protocol;
+ u8 exception_bucket_flushed:1,
+ unused:7;
};
+#define for_each_fib6_node_rt_rcu(fn) \
+ for (rt = rcu_dereference((fn)->leaf); rt; \
+ rt = rcu_dereference(rt->dst.rt6_next))
+
+#define for_each_fib6_walker_rt(w) \
+ for (rt = (w)->leaf; rt; \
+ rt = rcu_dereference_protected(rt->dst.rt6_next, 1))
+
static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst)
{
return ((struct rt6_info *)dst)->rt6i_idev;
@@ -188,6 +228,8 @@ static inline bool rt6_get_cookie_safe(const struct rt6_info *rt,
if (fn) {
*cookie = fn->fn_sernum;
+ /* pairs with smp_wmb() in fib6_update_sernum_upto_root() */
+ smp_rmb();
status = true;
}
@@ -248,7 +290,6 @@ struct fib6_walker {
struct fib6_node *root, *node;
struct rt6_info *leaf;
enum fib6_walk_state state;
- bool prune;
unsigned int skip;
unsigned int count;
int (*func)(struct fib6_walker *);
@@ -256,12 +297,15 @@ struct fib6_walker {
};
struct rt6_statistics {
- __u32 fib_nodes;
- __u32 fib_route_nodes;
- __u32 fib_rt_alloc; /* permanent routes */
- __u32 fib_rt_entries; /* rt entries in table */
- __u32 fib_rt_cache; /* cache routes */
- __u32 fib_discarded_routes;
+ __u32 fib_nodes; /* all fib6 nodes */
+ __u32 fib_route_nodes; /* intermediate nodes */
+ __u32 fib_rt_entries; /* rt entries in fib table */
+ __u32 fib_rt_cache; /* cached rt entries in exception table */
+ __u32 fib_discarded_routes; /* total number of routes delete */
+
+ /* The following stats are not protected by any lock */
+ atomic_t fib_rt_alloc; /* total number of routes alloced */
+ atomic_t fib_rt_uncache; /* rt entries in uncached list */
};
#define RTN_TL_ROOT 0x0001
@@ -277,7 +321,7 @@ struct rt6_statistics {
struct fib6_table {
struct hlist_node tb6_hlist;
u32 tb6_id;
- rwlock_t tb6_lock;
+ spinlock_t tb6_lock;
struct fib6_node tb6_root;
struct inet_peer_base tb6_peers;
unsigned int flags;
@@ -325,7 +369,8 @@ struct fib6_node *fib6_lookup(struct fib6_node *root,
struct fib6_node *fib6_locate(struct fib6_node *root,
const struct in6_addr *daddr, int dst_len,
- const struct in6_addr *saddr, int src_len);
+ const struct in6_addr *saddr, int src_len,
+ bool exact_match);
void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
void *arg);
@@ -358,6 +403,8 @@ void __net_exit fib6_notifier_exit(struct net *net);
unsigned int fib6_tables_seq_read(struct net *net);
int fib6_tables_dump(struct net *net, struct notifier_block *nb);
+void fib6_update_sernum(struct rt6_info *rt);
+
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
int fib6_rules_init(void);
void fib6_rules_cleanup(void);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index ee96f402cb75..a0087fb9864b 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -95,6 +95,11 @@ int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack);
int ip6_ins_rt(struct rt6_info *);
int ip6_del_rt(struct rt6_info *);
+void rt6_flush_exceptions(struct rt6_info *rt);
+int rt6_remove_exception_rt(struct rt6_info *rt);
+void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args,
+ unsigned long now);
+
static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt,
const struct in6_addr *daddr,
unsigned int prefs,
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 1a7f7e424320..f80524396c06 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -122,9 +122,6 @@ struct fib_info {
#define fib_rtt fib_metrics->metrics[RTAX_RTT-1]
#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1]
int fib_nhs;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- int fib_weight;
-#endif
struct rcu_head rcu;
struct fib_nh fib_nh[0];
#define fib_dev fib_nh[0].nh_dev
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 992652856fe8..b41a1e057fce 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -258,7 +258,8 @@ int ip_tunnel_get_iflink(const struct net_device *dev);
int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname);
-void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops);
+void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
+ struct rtnl_link_ops *ops);
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 6eac5cf8f1e6..3cda3b521c36 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -300,8 +300,8 @@ static inline void fl6_sock_release(struct ip6_flowlabel *fl)
void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info);
-int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
- struct icmp6hdr *thdr, int len);
+void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+ struct icmp6hdr *thdr, int len);
int ip6_ra_control(struct sock *sk, int sel);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 885690fa39c8..cc9073e45be9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -5441,8 +5441,14 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
*/
void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn);
+/**
+ * ieee80211_manage_rx_ba_offl - helper to queue an RX BA work
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback
+ * @addr: station mac address
+ * @tid: the rx tid
+ */
void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, const u8 *addr,
- unsigned int bit);
+ unsigned int tid);
/**
* ieee80211_start_rx_ba_session_offl - start a Rx BA session
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 9816df225af3..2492000e1035 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -190,8 +190,8 @@ struct neigh_hash_table {
struct neigh_table {
int family;
- int entry_size;
- int key_len;
+ unsigned int entry_size;
+ unsigned int key_len;
__be16 protocol;
__u32 (*hash)(const void *pkey,
const struct net_device *dev,
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 20d061c805e3..2c4222a5d102 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -36,6 +36,8 @@ struct inet_timewait_death_row {
int sysctl_max_tw_buckets;
};
+struct tcp_fastopen_context;
+
struct netns_ipv4 {
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
@@ -52,6 +54,7 @@ struct netns_ipv4 {
struct fib_table __rcu *fib_main;
struct fib_table __rcu *fib_default;
#endif
+ bool fib_has_custom_local_routes;
#ifdef CONFIG_IP_ROUTE_CLASSID
int fib_num_tclassid_users;
#endif
@@ -127,6 +130,12 @@ struct netns_ipv4 {
int sysctl_tcp_timestamps;
struct inet_timewait_death_row tcp_death_row;
int sysctl_max_syn_backlog;
+ int sysctl_tcp_fastopen;
+ struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
+ spinlock_t tcp_fastopen_ctx_lock;
+ unsigned int sysctl_tcp_fastopen_blackhole_timeout;
+ atomic_t tfo_active_disable_times;
+ unsigned long tfo_active_disable_stamp;
#ifdef CONFIG_NET_L3_MASTER_DEV
int sysctl_udp_l3mdev_accept;
@@ -162,6 +171,9 @@ struct netns_ipv4 {
struct fib_notifier_ops *notifier_ops;
unsigned int fib_seq; /* protected by rtnl_mutex */
+ struct fib_notifier_ops *ipmr_notifier_ops;
+ unsigned int ipmr_seq; /* protected by rtnl_mutex */
+
atomic_t rt_genid;
};
#endif
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 2544f9760a42..2ea1ed341ef8 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -89,6 +89,11 @@ struct netns_ipv6 {
atomic_t fib6_sernum;
struct seg6_pernet_data *seg6_data;
struct fib_notifier_ops *notifier_ops;
+ struct {
+ struct hlist_head head;
+ spinlock_t lock;
+ u32 seq;
+ } ip6addrlbl_table;
};
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h
index 039cc29cb4a8..51e1a2a45d02 100644
--- a/include/net/phonet/phonet.h
+++ b/include/net/phonet/phonet.h
@@ -108,8 +108,10 @@ struct phonet_protocol {
int sock_type;
};
-int phonet_proto_register(unsigned int protocol, struct phonet_protocol *pp);
-void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp);
+int phonet_proto_register(unsigned int protocol,
+ const struct phonet_protocol *pp);
+void phonet_proto_unregister(unsigned int protocol,
+ const struct phonet_protocol *pp);
int phonet_sysctl_init(void);
void phonet_sysctl_exit(void);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e80edd8879ef..04caa246e747 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -17,20 +17,72 @@ struct tcf_walker {
int register_tcf_proto_ops(struct tcf_proto_ops *ops);
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
+enum tcf_block_binder_type {
+ TCF_BLOCK_BINDER_TYPE_UNSPEC,
+ TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
+ TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
+};
+
+struct tcf_block_ext_info {
+ enum tcf_block_binder_type binder_type;
+};
+
+struct tcf_block_cb;
+
#ifdef CONFIG_NET_CLS
struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
bool create);
void tcf_chain_put(struct tcf_chain *chain);
int tcf_block_get(struct tcf_block **p_block,
- struct tcf_proto __rcu **p_filter_chain);
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q);
+int tcf_block_get_ext(struct tcf_block **p_block,
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+ struct tcf_block_ext_info *ei);
void tcf_block_put(struct tcf_block *block);
+void tcf_block_put_ext(struct tcf_block *block,
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+ struct tcf_block_ext_info *ei);
+
+static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
+{
+ return block->q;
+}
+
+static inline struct net_device *tcf_block_dev(struct tcf_block *block)
+{
+ return tcf_block_q(block)->dev_queue->dev;
+}
+
+void *tcf_block_cb_priv(struct tcf_block_cb *block_cb);
+struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident);
+void tcf_block_cb_incref(struct tcf_block_cb *block_cb);
+unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb);
+struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv);
+int tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv);
+void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb);
+void tcf_block_cb_unregister(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident);
+
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode);
#else
static inline
int tcf_block_get(struct tcf_block **p_block,
- struct tcf_proto __rcu **p_filter_chain)
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q)
+{
+ return 0;
+}
+
+static inline
+int tcf_block_get_ext(struct tcf_block **p_block,
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
{
return 0;
}
@@ -39,6 +91,87 @@ static inline void tcf_block_put(struct tcf_block *block)
{
}
+static inline
+void tcf_block_put_ext(struct tcf_block *block,
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
+{
+}
+
+static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
+{
+ return NULL;
+}
+
+static inline struct net_device *tcf_block_dev(struct tcf_block *block)
+{
+ return NULL;
+}
+
+static inline
+int tc_setup_cb_block_register(struct tcf_block *block, tc_setup_cb_t *cb,
+ void *cb_priv)
+{
+ return 0;
+}
+
+static inline
+void tc_setup_cb_block_unregister(struct tcf_block *block, tc_setup_cb_t *cb,
+ void *cb_priv)
+{
+}
+
+static inline
+void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
+{
+ return NULL;
+}
+
+static inline
+struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident)
+{
+ return NULL;
+}
+
+static inline
+void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
+{
+}
+
+static inline
+unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
+{
+ return 0;
+}
+
+static inline
+struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv)
+{
+ return NULL;
+}
+
+static inline
+int tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv)
+{
+ return 0;
+}
+
+static inline
+void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+{
+}
+
+static inline
+void tcf_block_cb_unregister(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident)
+{
+}
+
static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode)
{
@@ -53,36 +186,43 @@ __cls_set_class(unsigned long *clp, unsigned long cl)
}
static inline unsigned long
-cls_set_class(struct tcf_proto *tp, unsigned long *clp,
- unsigned long cl)
+cls_set_class(struct Qdisc *q, unsigned long *clp, unsigned long cl)
{
unsigned long old_cl;
-
- tcf_tree_lock(tp);
+
+ sch_tree_lock(q);
old_cl = __cls_set_class(clp, cl);
- tcf_tree_unlock(tp);
-
+ sch_tree_unlock(q);
return old_cl;
}
static inline void
tcf_bind_filter(struct tcf_proto *tp, struct tcf_result *r, unsigned long base)
{
+ struct Qdisc *q = tp->chain->block->q;
unsigned long cl;
- cl = tp->q->ops->cl_ops->bind_tcf(tp->q, base, r->classid);
- cl = cls_set_class(tp, &r->class, cl);
+ /* Check q as it is not set for shared blocks. In that case,
+ * setting class is not supported.
+ */
+ if (!q)
+ return;
+ cl = q->ops->cl_ops->bind_tcf(q, base, r->classid);
+ cl = cls_set_class(q, &r->class, cl);
if (cl)
- tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
+ q->ops->cl_ops->unbind_tcf(q, cl);
}
static inline void
tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r)
{
+ struct Qdisc *q = tp->chain->block->q;
unsigned long cl;
+ if (!q)
+ return;
if ((cl = __cls_set_class(&r->class, 0)) != 0)
- tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
+ q->ops->cl_ops->unbind_tcf(q, cl);
}
struct tcf_exts {
@@ -204,8 +344,6 @@ void tcf_exts_destroy(struct tcf_exts *exts);
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
-int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
- struct net_device **hw_dev);
/**
* struct tcf_pkt_info - packet information
@@ -405,11 +543,24 @@ tcf_match_indev(struct sk_buff *skb, int ifindex)
}
#endif /* CONFIG_NET_CLS_IND */
+int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
+ enum tc_setup_type type, void *type_data, bool err_stop);
+
+enum tc_block_command {
+ TC_BLOCK_BIND,
+ TC_BLOCK_UNBIND,
+};
+
+struct tc_block_offload {
+ enum tc_block_command command;
+ enum tcf_block_binder_type binder_type;
+ struct tcf_block *block;
+};
+
struct tc_cls_common_offload {
u32 chain_index;
__be16 protocol;
u32 prio;
- u32 classid;
};
static inline void
@@ -419,7 +570,6 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common,
cls_common->chain_index = tp->chain->index;
cls_common->protocol = tp->protocol;
cls_common->prio = tp->prio;
- cls_common->classid = tp->classid;
}
struct tc_cls_u32_knode {
@@ -514,7 +664,6 @@ struct tc_cls_flower_offload {
struct fl_flow_key *mask;
struct fl_flow_key *key;
struct tcf_exts *exts;
- bool egress_dev;
};
enum tc_matchall_command {
@@ -546,6 +695,15 @@ struct tc_cls_bpf_offload {
u32 gen_flags;
};
+struct tc_mqprio_qopt_offload {
+ /* struct tc_mqprio_qopt must always be the first element */
+ struct tc_mqprio_qopt qopt;
+ u16 mode;
+ u16 shaper;
+ u32 flags;
+ u64 min_rate[TC_QOPT_MAX_QUEUE];
+ u64 max_rate[TC_QOPT_MAX_QUEUE];
+};
/* This structure holds cookie structure that is passed from user
* to the kernel for actions and classifiers
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 259bc191ba59..b8ecafce4ba1 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -4,7 +4,9 @@
#include <linux/jiffies.h>
#include <linux/ktime.h>
#include <linux/if_vlan.h>
+#include <linux/netdevice.h>
#include <net/sch_generic.h>
+#include <net/net_namespace.h>
#include <uapi/linux/pkt_sched.h>
#define DEFAULT_TX_QUEUE_LEN 1000
@@ -133,17 +135,9 @@ static inline unsigned int psched_mtu(const struct net_device *dev)
return dev->mtu + dev->hard_header_len;
}
-static inline bool is_classid_clsact_ingress(u32 classid)
+static inline struct net *qdisc_net(struct Qdisc *q)
{
- /* This also returns true for ingress qdisc */
- return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) &&
- TC_H_MIN(classid) != TC_H_MIN(TC_H_MIN_EGRESS);
-}
-
-static inline bool is_classid_clsact_egress(u32 classid)
-{
- return TC_H_MAJ(classid) == TC_H_MAJ(TC_H_CLSACT) &&
- TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_EGRESS);
+ return dev_net(q->dev_queue->dev);
}
#endif
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 23e22054aa60..347015515a7d 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -150,6 +150,8 @@ struct fastopen_queue {
spinlock_t lock;
int qlen; /* # of pending (TCP_SYN_RECV) reqs */
int max_qlen; /* != 0 iff TFO is currently enabled */
+
+ struct tcp_fastopen_context __rcu *ctx; /* cipher context for cookie */
};
/** struct request_sock_queue - queue of request_socks
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 21837ca68ecc..e3ca8e2e3103 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -93,9 +93,6 @@ struct rtnl_link_ops {
int slave_maxtype;
const struct nla_policy *slave_policy;
- int (*slave_validate)(struct nlattr *tb[],
- struct nlattr *data[],
- struct netlink_ext_ack *extack);
int (*slave_changelink)(struct net_device *dev,
struct net_device *slave_dev,
struct nlattr *tb[],
@@ -154,8 +151,6 @@ struct rtnl_af_ops {
size_t (*get_stats_af_size)(const struct net_device *dev);
};
-void __rtnl_af_unregister(struct rtnl_af_ops *ops);
-
void rtnl_af_register(struct rtnl_af_ops *ops);
void rtnl_af_unregister(struct rtnl_af_ops *ops);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 135f5a2dd931..031dffd5836c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -93,7 +93,6 @@ struct Qdisc {
unsigned long state;
struct Qdisc *next_sched;
struct sk_buff *skb_bad_txq;
- struct rcu_head rcu_head;
int padded;
refcount_t refcnt;
@@ -271,6 +270,9 @@ struct tcf_chain {
struct tcf_block {
struct list_head chain_list;
+ struct net *net;
+ struct Qdisc *q;
+ struct list_head cb_list;
};
static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz)
@@ -358,9 +360,6 @@ static inline void sch_tree_unlock(const struct Qdisc *q)
spin_unlock_bh(qdisc_root_sleeping_lock(q));
}
-#define tcf_tree_lock(tp) sch_tree_lock((tp)->q)
-#define tcf_tree_unlock(tp) sch_tree_unlock((tp)->q)
-
extern struct Qdisc noop_qdisc;
extern struct Qdisc_ops noop_qdisc_ops;
extern struct Qdisc_ops pfifo_fast_ops;
diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h
new file mode 100644
index 000000000000..c676550a4c7d
--- /dev/null
+++ b/include/net/sctp/stream_sched.h
@@ -0,0 +1,72 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * These are definitions used by the stream schedulers, defined in RFC
+ * draft ndata (https://tools.ietf.org/html/draft-ietf-tsvwg-sctp-ndata-11)
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresses:
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#ifndef __sctp_stream_sched_h__
+#define __sctp_stream_sched_h__
+
+struct sctp_sched_ops {
+ /* Property handling for a given stream */
+ int (*set)(struct sctp_stream *stream, __u16 sid, __u16 value,
+ gfp_t gfp);
+ int (*get)(struct sctp_stream *stream, __u16 sid, __u16 *value);
+
+ /* Init the specific scheduler */
+ int (*init)(struct sctp_stream *stream);
+ /* Init a stream */
+ int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp);
+ /* Frees the entire thing */
+ void (*free)(struct sctp_stream *stream);
+
+ /* Enqueue a chunk */
+ void (*enqueue)(struct sctp_outq *q, struct sctp_datamsg *msg);
+ /* Dequeue a chunk */
+ struct sctp_chunk *(*dequeue)(struct sctp_outq *q);
+ /* Called only if the chunk fit the packet */
+ void (*dequeue_done)(struct sctp_outq *q, struct sctp_chunk *chunk);
+ /* Sched all chunks already enqueued */
+ void (*sched_all)(struct sctp_stream *steam);
+ /* Unched all chunks already enqueued */
+ void (*unsched_all)(struct sctp_stream *steam);
+};
+
+int sctp_sched_set_sched(struct sctp_association *asoc,
+ enum sctp_sched_type sched);
+int sctp_sched_get_sched(struct sctp_association *asoc);
+int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid,
+ __u16 value, gfp_t gfp);
+int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid,
+ __u16 *value);
+void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch);
+
+void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch);
+int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp);
+struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream);
+
+#endif /* __sctp_stream_sched_h__ */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 0477945de1a3..16f949eef52f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -380,6 +380,7 @@ struct sctp_sender_hb_info {
int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
gfp_t gfp);
+int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid);
void sctp_stream_free(struct sctp_stream *stream);
void sctp_stream_clear(struct sctp_stream *stream);
void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new);
@@ -529,8 +530,12 @@ struct sctp_chunk {
/* How many times this chunk have been sent, for prsctp RTX policy */
int sent_count;
- /* This is our link to the per-transport transmitted list. */
- struct list_head transmitted_list;
+ union {
+ /* This is our link to the per-transport transmitted list. */
+ struct list_head transmitted_list;
+ /* List in specific stream outq */
+ struct list_head stream_list;
+ };
/* This field is used by chunks that hold fragmented data.
* For the first fragment this is the list that holds the rest of
@@ -640,6 +645,11 @@ void sctp_init_addrs(struct sctp_chunk *, union sctp_addr *,
union sctp_addr *);
const union sctp_addr *sctp_source(const struct sctp_chunk *chunk);
+static inline __u16 sctp_chunk_stream_no(struct sctp_chunk *ch)
+{
+ return ntohs(ch->subh.data_hdr->stream);
+}
+
enum {
SCTP_ADDR_NEW, /* new address added to assoc/ep */
SCTP_ADDR_SRC, /* address can be used as source */
@@ -1012,6 +1022,9 @@ struct sctp_outq {
/* Data pending that has never been transmitted. */
struct list_head out_chunk_list;
+ /* Stream scheduler being used */
+ struct sctp_sched_ops *sched;
+
unsigned int out_qlen; /* Total length of queued data chunks. */
/* Error of send failed, may used in SCTP_SEND_FAILED event. */
@@ -1315,11 +1328,37 @@ struct sctp_inithdr_host {
__u32 initial_tsn;
};
+struct sctp_stream_priorities {
+ /* List of priorities scheduled */
+ struct list_head prio_sched;
+ /* List of streams scheduled */
+ struct list_head active;
+ /* The next stream stream in line */
+ struct sctp_stream_out_ext *next;
+ __u16 prio;
+};
+
+struct sctp_stream_out_ext {
+ __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
+ __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
+ struct list_head outq; /* chunks enqueued by this stream */
+ union {
+ struct {
+ /* Scheduled streams list */
+ struct list_head prio_list;
+ struct sctp_stream_priorities *prio_head;
+ };
+ /* Fields used by RR scheduler */
+ struct {
+ struct list_head rr_list;
+ };
+ };
+};
+
struct sctp_stream_out {
__u16 ssn;
__u8 state;
- __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1];
- __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1];
+ struct sctp_stream_out_ext *ext;
};
struct sctp_stream_in {
@@ -1331,6 +1370,22 @@ struct sctp_stream {
struct sctp_stream_in *in;
__u16 outcnt;
__u16 incnt;
+ /* Current stream being sent, if any */
+ struct sctp_stream_out *out_curr;
+ union {
+ /* Fields used by priority scheduler */
+ struct {
+ /* List of priorities scheduled */
+ struct list_head prio_list;
+ };
+ /* Fields used by RR scheduler */
+ struct {
+ /* List of streams scheduled */
+ struct list_head rr_list;
+ /* The next stream stream in line */
+ struct sctp_stream_out_ext *rr_next;
+ };
+ };
};
#define SCTP_STREAM_CLOSED 0x00
diff --git a/include/net/sock.h b/include/net/sock.h
index a6b9a8d1a6df..4827094f1db4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -60,7 +60,7 @@
#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/cgroup-defs.h>
-
+#include <linux/rbtree.h>
#include <linux/filter.h>
#include <linux/rculist_nulls.h>
#include <linux/poll.h>
@@ -397,7 +397,10 @@ struct sock {
int sk_wmem_queued;
refcount_t sk_wmem_alloc;
unsigned long sk_tsq_flags;
- struct sk_buff *sk_send_head;
+ union {
+ struct sk_buff *sk_send_head;
+ struct rb_root tcp_rtx_queue;
+ };
struct sk_buff_head sk_write_queue;
__s32 sk_peek_off;
int sk_write_pending;
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index d767b7991887..d756fbe46625 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -51,6 +51,7 @@ enum switchdev_attr_id {
SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
+ SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
};
struct switchdev_attr {
diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index 41afe1ce7b16..d979a0d48f9e 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -33,6 +33,11 @@ static inline bool __is_tcf_gact_act(const struct tc_action *a, int act,
return false;
}
+static inline bool is_tcf_gact_ok(const struct tc_action *a)
+{
+ return __is_tcf_gact_act(a, TC_ACT_OK, false);
+}
+
static inline bool is_tcf_gact_shot(const struct tc_action *a)
{
return __is_tcf_gact_act(a, TC_ACT_SHOT, false);
diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h
index 30ba459ddd34..c7fb99c3f76c 100644
--- a/include/net/tc_act/tc_ife.h
+++ b/include/net/tc_act/tc_ife.h
@@ -6,12 +6,18 @@
#include <linux/rtnetlink.h>
#include <linux/module.h>
-struct tcf_ife_info {
- struct tc_action common;
+struct tcf_ife_params {
u8 eth_dst[ETH_ALEN];
u8 eth_src[ETH_ALEN];
u16 eth_type;
u16 flags;
+
+ struct rcu_head rcu;
+};
+
+struct tcf_ife_info {
+ struct tc_action common;
+ struct tcf_ife_params __rcu *params;
/* list of metaids allowed */
struct list_head metalist;
};
@@ -40,7 +46,7 @@ struct tcf_meta_ops {
struct module *owner;
};
-#define MODULE_ALIAS_IFE_META(metan) MODULE_ALIAS("ifemeta" __stringify_1(metan))
+#define MODULE_ALIAS_IFE_META(metan) MODULE_ALIAS("ife-meta-" metan)
int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi);
int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi);
diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h
index 604bc31e23ab..21a656569840 100644
--- a/include/net/tc_act/tc_mirred.h
+++ b/include/net/tc_act/tc_mirred.h
@@ -10,6 +10,7 @@ struct tcf_mirred {
int tcfm_ifindex;
bool tcfm_mac_header_xmit;
struct net_device __rcu *tcfm_dev;
+ struct net *net;
struct list_head tcfm_list;
};
#define to_mirred(a) ((struct tcf_mirred *)a)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b1ef98ebce53..2c13484704cb 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -240,7 +240,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
/* sysctl variables for tcp */
-extern int sysctl_tcp_fastopen;
extern int sysctl_tcp_retrans_collapse;
extern int sysctl_tcp_stdurg;
extern int sysctl_tcp_rfc1337;
@@ -417,6 +416,7 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
void tcp_disable_fack(struct tcp_sock *tp);
void tcp_close(struct sock *sk, long timeout);
void tcp_init_sock(struct sock *sk);
+void tcp_init_transfer(struct sock *sk, int bpf_op);
unsigned int tcp_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int tcp_getsockopt(struct sock *sk, int level, int optname,
@@ -551,7 +551,13 @@ void tcp_xmit_retransmit_queue(struct sock *);
void tcp_simple_retransmit(struct sock *);
void tcp_enter_recovery(struct sock *sk, bool ece_ack);
int tcp_trim_head(struct sock *, struct sk_buff *, u32);
-int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t);
+enum tcp_queue {
+ TCP_FRAG_IN_WRITE_QUEUE,
+ TCP_FRAG_IN_RTX_QUEUE,
+};
+int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+ struct sk_buff *skb, u32 len,
+ unsigned int mss_now, gfp_t gfp);
void tcp_send_probe0(struct sock *);
void tcp_send_partial(struct sock *);
@@ -796,12 +802,6 @@ struct tcp_skb_cb {
u16 tcp_gso_segs;
u16 tcp_gso_size;
};
-
- /* Used to stash the receive timestamp while this skb is in the
- * out of order queue, as skb->tstamp is overwritten by the
- * rbnode.
- */
- ktime_t swtstamp;
};
__u8 tcp_flags; /* TCP header flags. (tcp[13]) */
@@ -1560,14 +1560,15 @@ struct tcp_fastopen_request {
int copied; /* queued in tcp_connect() */
};
void tcp_free_fastopen_req(struct tcp_sock *tp);
-
-extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-int tcp_fastopen_reset_cipher(void *key, unsigned int len);
+void tcp_fastopen_destroy_cipher(struct sock *sk);
+void tcp_fastopen_ctx_destroy(struct net *net);
+int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
+ void *key, unsigned int len);
void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct tcp_fastopen_cookie *foc);
-void tcp_fastopen_init_key_once(bool publish);
+void tcp_fastopen_init_key_once(struct net *net);
bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie);
bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
@@ -1600,52 +1601,46 @@ enum tcp_chrono {
void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type);
void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type);
-/* write queue abstraction */
-static inline void tcp_write_queue_purge(struct sock *sk)
+/* This helper is needed, because skb->tcp_tsorted_anchor uses
+ * the same memory storage than skb->destructor/_skb_refdst
+ */
+static inline void tcp_skb_tsorted_anchor_cleanup(struct sk_buff *skb)
{
- struct sk_buff *skb;
-
- tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
- while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL)
- sk_wmem_free_skb(sk, skb);
- sk_mem_reclaim(sk);
- tcp_clear_all_retrans_hints(tcp_sk(sk));
+ skb->destructor = NULL;
+ skb->_skb_refdst = 0UL;
}
-static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
-{
- return skb_peek(&sk->sk_write_queue);
+#define tcp_skb_tsorted_save(skb) { \
+ unsigned long _save = skb->_skb_refdst; \
+ skb->_skb_refdst = 0UL;
+
+#define tcp_skb_tsorted_restore(skb) \
+ skb->_skb_refdst = _save; \
}
-static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk)
+void tcp_write_queue_purge(struct sock *sk);
+
+static inline struct sk_buff *tcp_rtx_queue_head(const struct sock *sk)
{
- return skb_peek_tail(&sk->sk_write_queue);
+ return skb_rb_first(&sk->tcp_rtx_queue);
}
-static inline struct sk_buff *tcp_write_queue_next(const struct sock *sk,
- const struct sk_buff *skb)
+static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
{
- return skb_queue_next(&sk->sk_write_queue, skb);
+ return skb_peek(&sk->sk_write_queue);
}
-static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk,
- const struct sk_buff *skb)
+static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk)
{
- return skb_queue_prev(&sk->sk_write_queue, skb);
+ return skb_peek_tail(&sk->sk_write_queue);
}
-#define tcp_for_write_queue(skb, sk) \
- skb_queue_walk(&(sk)->sk_write_queue, skb)
-
-#define tcp_for_write_queue_from(skb, sk) \
- skb_queue_walk_from(&(sk)->sk_write_queue, skb)
-
#define tcp_for_write_queue_from_safe(skb, tmp, sk) \
skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
static inline struct sk_buff *tcp_send_head(const struct sock *sk)
{
- return sk->sk_send_head;
+ return skb_peek(&sk->sk_write_queue);
}
static inline bool tcp_skb_is_last(const struct sock *sk,
@@ -1654,29 +1649,30 @@ static inline bool tcp_skb_is_last(const struct sock *sk,
return skb_queue_is_last(&sk->sk_write_queue, skb);
}
-static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb)
+static inline bool tcp_write_queue_empty(const struct sock *sk)
{
- if (tcp_skb_is_last(sk, skb))
- sk->sk_send_head = NULL;
- else
- sk->sk_send_head = tcp_write_queue_next(sk, skb);
+ return skb_queue_empty(&sk->sk_write_queue);
+}
+
+static inline bool tcp_rtx_queue_empty(const struct sock *sk)
+{
+ return RB_EMPTY_ROOT(&sk->tcp_rtx_queue);
+}
+
+static inline bool tcp_rtx_and_write_queues_empty(const struct sock *sk)
+{
+ return tcp_rtx_queue_empty(sk) && tcp_write_queue_empty(sk);
}
static inline void tcp_check_send_head(struct sock *sk, struct sk_buff *skb_unlinked)
{
- if (sk->sk_send_head == skb_unlinked) {
- sk->sk_send_head = NULL;
+ if (tcp_write_queue_empty(sk))
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
- }
+
if (tcp_sk(sk)->highest_sack == skb_unlinked)
tcp_sk(sk)->highest_sack = NULL;
}
-static inline void tcp_init_send_head(struct sock *sk)
-{
- sk->sk_send_head = NULL;
-}
-
static inline void __tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb)
{
__skb_queue_tail(&sk->sk_write_queue, skb);
@@ -1687,8 +1683,7 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb
__tcp_add_write_queue_tail(sk, skb);
/* Queue it, remembering where we must start sending. */
- if (sk->sk_send_head == NULL) {
- sk->sk_send_head = skb;
+ if (sk->sk_write_queue.next == skb) {
tcp_chrono_start(sk, TCP_CHRONO_BUSY);
if (tcp_sk(sk)->highest_sack == NULL)
@@ -1696,38 +1691,33 @@ static inline void tcp_add_write_queue_tail(struct sock *sk, struct sk_buff *skb
}
}
-static inline void __tcp_add_write_queue_head(struct sock *sk, struct sk_buff *skb)
-{
- __skb_queue_head(&sk->sk_write_queue, skb);
-}
-
-/* Insert buff after skb on the write queue of sk. */
-static inline void tcp_insert_write_queue_after(struct sk_buff *skb,
- struct sk_buff *buff,
- struct sock *sk)
-{
- __skb_queue_after(&sk->sk_write_queue, skb, buff);
-}
-
/* Insert new before skb on the write queue of sk. */
static inline void tcp_insert_write_queue_before(struct sk_buff *new,
struct sk_buff *skb,
struct sock *sk)
{
__skb_queue_before(&sk->sk_write_queue, skb, new);
-
- if (sk->sk_send_head == skb)
- sk->sk_send_head = new;
}
static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk)
{
+ tcp_skb_tsorted_anchor_cleanup(skb);
__skb_unlink(skb, &sk->sk_write_queue);
}
-static inline bool tcp_write_queue_empty(struct sock *sk)
+void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb);
+
+static inline void tcp_rtx_queue_unlink(struct sk_buff *skb, struct sock *sk)
{
- return skb_queue_empty(&sk->sk_write_queue);
+ tcp_skb_tsorted_anchor_cleanup(skb);
+ rb_erase(&skb->rbnode, &sk->tcp_rtx_queue);
+}
+
+static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct sock *sk)
+{
+ list_del(&skb->tcp_tsorted_anchor);
+ tcp_rtx_queue_unlink(skb, sk);
+ sk_wmem_free_skb(sk, skb);
}
static inline void tcp_push_pending_frames(struct sock *sk)
@@ -1756,8 +1746,9 @@ static inline u32 tcp_highest_sack_seq(struct tcp_sock *tp)
static inline void tcp_advance_highest_sack(struct sock *sk, struct sk_buff *skb)
{
- tcp_sk(sk)->highest_sack = tcp_skb_is_last(sk, skb) ? NULL :
- tcp_write_queue_next(sk, skb);
+ struct sk_buff *next = skb_rb_next(skb);
+
+ tcp_sk(sk)->highest_sack = next ?: tcp_send_head(sk);
}
static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
@@ -1767,7 +1758,9 @@ static inline struct sk_buff *tcp_highest_sack(struct sock *sk)
static inline void tcp_highest_sack_reset(struct sock *sk)
{
- tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk);
+ struct sk_buff *skb = tcp_rtx_queue_head(sk);
+
+ tcp_sk(sk)->highest_sack = skb ?: tcp_send_head(sk);
}
/* Called when old skb is about to be deleted (to be combined with new skb) */
@@ -1937,7 +1930,7 @@ extern void tcp_rack_reo_timeout(struct sock *sk);
/* At how many usecs into the future should the RTO fire? */
static inline s64 tcp_rto_delta_us(const struct sock *sk)
{
- const struct sk_buff *skb = tcp_write_queue_head(sk);
+ const struct sk_buff *skb = tcp_rtx_queue_head(sk);
u32 rto = inet_csk(sk)->icsk_rto;
u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
diff --git a/include/trace/events/bpf.h b/include/trace/events/bpf.h
index 52c8425d144b..1fb58faa4a44 100644
--- a/include/trace/events/bpf.h
+++ b/include/trace/events/bpf.h
@@ -4,6 +4,9 @@
#if !defined(_TRACE_BPF_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_BPF_H
+/* These are only used within the BPF_SYSCALL code */
+#ifdef CONFIG_BPF_SYSCALL
+
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/fs.h>
@@ -345,7 +348,7 @@ TRACE_EVENT(bpf_map_next_key,
__print_hex(__get_dynamic_array(nxt), __entry->key_len),
__entry->key_trunc ? " ..." : "")
);
-
+#endif /* CONFIG_BPF_SYSCALL */
#endif /* _TRACE_BPF_H */
#include <trace/define_trace.h>
diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h
index d60096cddb2a..b34bed17abc7 100644
--- a/include/trace/events/fib6.h
+++ b/include/trace/events/fib6.h
@@ -12,9 +12,9 @@
TRACE_EVENT(fib6_table_lookup,
TP_PROTO(const struct net *net, const struct rt6_info *rt,
- u32 tb_id, const struct flowi6 *flp),
+ struct fib6_table *table, const struct flowi6 *flp),
- TP_ARGS(net, rt, tb_id, flp),
+ TP_ARGS(net, rt, table, flp),
TP_STRUCT__entry(
__field( u32, tb_id )
@@ -34,7 +34,7 @@ TRACE_EVENT(fib6_table_lookup,
TP_fast_assign(
struct in6_addr *in6;
- __entry->tb_id = tb_id;
+ __entry->tb_id = table->tb6_id;
__entry->oif = flp->flowi6_oif;
__entry->iif = flp->flowi6_iif;
__entry->tos = ip6_tclass(flp->flowlabel);
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
new file mode 100644
index 000000000000..c3220d914475
--- /dev/null
+++ b/include/trace/events/tcp.h
@@ -0,0 +1,70 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tcp
+
+#if !defined(_TRACE_TCP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TCP_H
+
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/tracepoint.h>
+#include <net/ipv6.h>
+
+TRACE_EVENT(tcp_retransmit_skb,
+
+ TP_PROTO(struct sock *sk, struct sk_buff *skb),
+
+ TP_ARGS(sk, skb),
+
+ TP_STRUCT__entry(
+ __field(void *, skbaddr)
+ __field(void *, skaddr)
+ __field(__u16, sport)
+ __field(__u16, dport)
+ __array(__u8, saddr, 4)
+ __array(__u8, daddr, 4)
+ __array(__u8, saddr_v6, 16)
+ __array(__u8, daddr_v6, 16)
+ ),
+
+ TP_fast_assign(
+ struct inet_sock *inet = inet_sk(sk);
+ struct in6_addr *pin6;
+ __be32 *p32;
+
+ __entry->skbaddr = skb;
+ __entry->skaddr = sk;
+
+ __entry->sport = ntohs(inet->inet_sport);
+ __entry->dport = ntohs(inet->inet_dport);
+
+ p32 = (__be32 *) __entry->saddr;
+ *p32 = inet->inet_saddr;
+
+ p32 = (__be32 *) __entry->daddr;
+ *p32 = inet->inet_daddr;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6) {
+ pin6 = (struct in6_addr *)__entry->saddr_v6;
+ *pin6 = sk->sk_v6_rcv_saddr;
+ pin6 = (struct in6_addr *)__entry->daddr_v6;
+ *pin6 = sk->sk_v6_daddr;
+ } else
+#endif
+ {
+ pin6 = (struct in6_addr *)__entry->saddr_v6;
+ ipv6_addr_set_v4mapped(inet->inet_saddr, pin6);
+ pin6 = (struct in6_addr *)__entry->daddr_v6;
+ ipv6_addr_set_v4mapped(inet->inet_daddr, pin6);
+ }
+ ),
+
+ TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c",
+ __entry->sport, __entry->dport, __entry->saddr, __entry->daddr,
+ __entry->saddr_v6, __entry->daddr_v6)
+);
+
+#endif /* _TRACE_TCP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 4e16c43fba10..0c8dec61987e 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -136,14 +136,90 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
__entry->map_id, __entry->map_index)
);
+#define devmap_ifindex(fwd, map) \
+ (!fwd ? 0 : \
+ (!map ? 0 : \
+ ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
+ ((struct net_device *)fwd)->ifindex : 0)))
+
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
- trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0, \
+ trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
0, map, idx)
#define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \
- trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0, \
+ trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map), \
err, map, idx)
+TRACE_EVENT(xdp_cpumap_kthread,
+
+ TP_PROTO(int map_id, unsigned int processed, unsigned int drops,
+ int sched),
+
+ TP_ARGS(map_id, processed, drops, sched),
+
+ TP_STRUCT__entry(
+ __field(int, map_id)
+ __field(u32, act)
+ __field(int, cpu)
+ __field(unsigned int, drops)
+ __field(unsigned int, processed)
+ __field(int, sched)
+ ),
+
+ TP_fast_assign(
+ __entry->map_id = map_id;
+ __entry->act = XDP_REDIRECT;
+ __entry->cpu = smp_processor_id();
+ __entry->drops = drops;
+ __entry->processed = processed;
+ __entry->sched = sched;
+ ),
+
+ TP_printk("kthread"
+ " cpu=%d map_id=%d action=%s"
+ " processed=%u drops=%u"
+ " sched=%d",
+ __entry->cpu, __entry->map_id,
+ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+ __entry->processed, __entry->drops,
+ __entry->sched)
+);
+
+TRACE_EVENT(xdp_cpumap_enqueue,
+
+ TP_PROTO(int map_id, unsigned int processed, unsigned int drops,
+ int to_cpu),
+
+ TP_ARGS(map_id, processed, drops, to_cpu),
+
+ TP_STRUCT__entry(
+ __field(int, map_id)
+ __field(u32, act)
+ __field(int, cpu)
+ __field(unsigned int, drops)
+ __field(unsigned int, processed)
+ __field(int, to_cpu)
+ ),
+
+ TP_fast_assign(
+ __entry->map_id = map_id;
+ __entry->act = XDP_REDIRECT;
+ __entry->cpu = smp_processor_id();
+ __entry->drops = drops;
+ __entry->processed = processed;
+ __entry->to_cpu = to_cpu;
+ ),
+
+ TP_printk("enqueue"
+ " cpu=%d map_id=%d action=%s"
+ " processed=%u drops=%u"
+ " to_cpu=%d",
+ __entry->cpu, __entry->map_id,
+ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+ __entry->processed, __entry->drops,
+ __entry->to_cpu)
+);
+
#endif /* _TRACE_XDP_H */
#include <trace/define_trace.h>
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f90860d1f897..f650346aaa1a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -92,6 +92,7 @@ enum bpf_cmd {
BPF_PROG_GET_FD_BY_ID,
BPF_MAP_GET_FD_BY_ID,
BPF_OBJ_GET_INFO_BY_FD,
+ BPF_PROG_QUERY,
};
enum bpf_map_type {
@@ -111,6 +112,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_HASH_OF_MAPS,
BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP,
+ BPF_MAP_TYPE_CPUMAP,
};
enum bpf_prog_type {
@@ -143,11 +145,47 @@ enum bpf_attach_type {
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
-/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
- * to the given target_fd cgroup the descendent cgroup will be able to
- * override effective bpf program that was inherited from this cgroup
+/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
+ *
+ * NONE(default): No further bpf programs allowed in the subtree.
+ *
+ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
+ * the program in this cgroup yields to sub-cgroup program.
+ *
+ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
+ * that cgroup program gets run in addition to the program in this cgroup.
+ *
+ * Only one program is allowed to be attached to a cgroup with
+ * NONE or BPF_F_ALLOW_OVERRIDE flag.
+ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
+ * release old program and attach the new one. Attach flags has to match.
+ *
+ * Multiple programs are allowed to be attached to a cgroup with
+ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
+ * (those that were attached first, run first)
+ * The programs of sub-cgroup are executed first, then programs of
+ * this cgroup and then programs of parent cgroup.
+ * When children program makes decision (like picking TCP CA or sock bind)
+ * parent program has a chance to override it.
+ *
+ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
+ * A cgroup with NONE doesn't allow any programs in sub-cgroups.
+ * Ex1:
+ * cgrp1 (MULTI progs A, B) ->
+ * cgrp2 (OVERRIDE prog C) ->
+ * cgrp3 (MULTI prog D) ->
+ * cgrp4 (OVERRIDE prog E) ->
+ * cgrp5 (NONE prog F)
+ * the event in cgrp5 triggers execution of F,D,A,B in that order.
+ * if prog F is detached, the execution is E,D,A,B
+ * if prog F and D are detached, the execution is E,A,B
+ * if prog F, E and D are detached, the execution is C,A,B
+ *
+ * All eligible programs are executed regardless of return code from
+ * earlier programs.
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
+#define BPF_F_ALLOW_MULTI (1U << 1)
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will perform strict alignment checking as if the kernel
@@ -175,6 +213,15 @@ enum bpf_attach_type {
/* Specify numa node during map creation */
#define BPF_F_NUMA_NODE (1U << 2)
+/* flags for BPF_PROG_QUERY */
+#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+
+#define BPF_OBJ_NAME_LEN 16U
+
+/* Flags for accessing BPF object */
+#define BPF_F_RDONLY (1U << 3)
+#define BPF_F_WRONLY (1U << 4)
+
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
@@ -188,6 +235,7 @@ union bpf_attr {
__u32 numa_node; /* numa node (effective only if
* BPF_F_NUMA_NODE is set).
*/
+ char map_name[BPF_OBJ_NAME_LEN];
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -210,11 +258,13 @@ union bpf_attr {
__aligned_u64 log_buf; /* user supplied buffer */
__u32 kern_version; /* checked when prog_type=kprobe */
__u32 prog_flags;
+ char prog_name[BPF_OBJ_NAME_LEN];
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
__aligned_u64 pathname;
__u32 bpf_fd;
+ __u32 file_flags;
};
struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
@@ -242,6 +292,7 @@ union bpf_attr {
__u32 map_id;
};
__u32 next_id;
+ __u32 open_flags;
};
struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
@@ -249,6 +300,15 @@ union bpf_attr {
__u32 info_len;
__aligned_u64 info;
} info;
+
+ struct { /* anonymous struct used by BPF_PROG_QUERY command */
+ __u32 target_fd; /* container object to query */
+ __u32 attach_type;
+ __u32 query_flags;
+ __u32 attach_flags;
+ __aligned_u64 prog_ids;
+ __u32 prog_cnt;
+ } query;
} __attribute__((aligned(8)));
/* BPF helper function descriptions:
@@ -553,12 +613,22 @@ union bpf_attr {
* int bpf_setsockopt(bpf_socket, level, optname, optval, optlen)
* Calls setsockopt. Not all opts are available, only those with
* integer optvals plus TCP_CONGESTION.
- * Supported levels: SOL_SOCKET and IPROTO_TCP
+ * Supported levels: SOL_SOCKET and IPPROTO_TCP
* @bpf_socket: pointer to bpf_socket
- * @level: SOL_SOCKET or IPROTO_TCP
+ * @level: SOL_SOCKET or IPPROTO_TCP
* @optname: option name
* @optval: pointer to option value
- * @optlen: length of optval in byes
+ * @optlen: length of optval in bytes
+ * Return: 0 or negative error
+ *
+ * int bpf_getsockopt(bpf_socket, level, optname, optval, optlen)
+ * Calls getsockopt. Not all opts are available.
+ * Supported levels: IPPROTO_TCP
+ * @bpf_socket: pointer to bpf_socket
+ * @level: IPPROTO_TCP
+ * @optname: option name
+ * @optval: pointer to option value
+ * @optlen: length of optval in bytes
* Return: 0 or negative error
*
* int bpf_skb_adjust_room(skb, len_diff, mode, flags)
@@ -582,6 +652,27 @@ union bpf_attr {
* @map: pointer to sockmap to update
* @key: key to insert/update sock in map
* @flags: same flags as map update elem
+ *
+ * int bpf_xdp_adjust_meta(xdp_md, delta)
+ * Adjust the xdp_md.data_meta by delta
+ * @xdp_md: pointer to xdp_md
+ * @delta: An positive/negative integer to be added to xdp_md.data_meta
+ * Return: 0 on success or negative on error
+ *
+ * int bpf_perf_event_read_value(map, flags, buf, buf_size)
+ * read perf event counter value and perf event enabled/running time
+ * @map: pointer to perf_event_array map
+ * @flags: index of event in the map or bitmask flags
+ * @buf: buf to fill
+ * @buf_size: size of the buf
+ * Return: 0 on success or negative error code
+ *
+ * int bpf_perf_prog_read_value(ctx, buf, buf_size)
+ * read perf prog attached perf event counter and enabled/running time
+ * @ctx: pointer to ctx
+ * @buf: buf to fill
+ * @buf_size: size of the buf
+ * Return : 0 on success or negative error code
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -638,6 +729,10 @@ union bpf_attr {
FN(redirect_map), \
FN(sk_redirect_map), \
FN(sock_map_update), \
+ FN(xdp_adjust_meta), \
+ FN(perf_event_read_value), \
+ FN(perf_prog_read_value), \
+ FN(getsockopt),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -681,7 +776,9 @@ enum bpf_func_id {
#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
#define BPF_F_DONT_FRAGMENT (1ULL << 2)
-/* BPF_FUNC_perf_event_output and BPF_FUNC_perf_event_read flags. */
+/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
+ * BPF_FUNC_perf_event_read_value flags.
+ */
#define BPF_F_INDEX_MASK 0xffffffffULL
#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
/* BPF_FUNC_perf_event_output for sk_buff input context. */
@@ -715,7 +812,7 @@ struct __sk_buff {
__u32 data_end;
__u32 napi_id;
- /* accessed by BPF_PROG_TYPE_sk_skb types */
+ /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
__u32 local_ip4; /* Stored in network byte order */
@@ -723,6 +820,9 @@ struct __sk_buff {
__u32 local_ip6[4]; /* Stored in network byte order */
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
+ /* ... here. */
+
+ __u32 data_meta;
};
struct bpf_tunnel_key {
@@ -783,6 +883,7 @@ enum xdp_action {
struct xdp_md {
__u32 data;
__u32 data_end;
+ __u32 data_meta;
};
enum sk_action {
@@ -801,6 +902,11 @@ struct bpf_prog_info {
__u32 xlated_prog_len;
__aligned_u64 jited_prog_insns;
__aligned_u64 xlated_prog_insns;
+ __u64 load_time; /* ns since boottime */
+ __u32 created_by_uid;
+ __u32 nr_map_ids;
+ __aligned_u64 map_ids;
+ char name[BPF_OBJ_NAME_LEN];
} __attribute__((aligned(8)));
struct bpf_map_info {
@@ -810,6 +916,7 @@ struct bpf_map_info {
__u32 value_size;
__u32 max_entries;
__u32 map_flags;
+ char name[BPF_OBJ_NAME_LEN];
} __attribute__((aligned(8)));
/* User bpf_sock_ops struct to access socket values and specify request ops
@@ -859,9 +966,22 @@ enum {
BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
* needs ECN
*/
+ BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is
+ * based on the path and may be
+ * dependent on the congestion control
+ * algorithm. In general it indicates
+ * a congestion threshold. RTTs above
+ * this indicate congestion
+ */
};
#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+struct bpf_perf_event_value {
+ __u64 counter;
+ __u64 enabled;
+ __u64 running;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8d062c58d5cb..b037e0ab1975 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -158,6 +158,7 @@ enum {
IFLA_PAD,
IFLA_XDP,
IFLA_EVENT,
+ IFLA_NEW_NETNSID,
__IFLA_MAX
};
@@ -325,6 +326,8 @@ enum {
IFLA_BRPORT_MCAST_TO_UCAST,
IFLA_BRPORT_VLAN_TUNNEL,
IFLA_BRPORT_BCAST_FLOOD,
+ IFLA_BRPORT_GROUP_FWD_MASK,
+ IFLA_BRPORT_NEIGH_SUPPRESS,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 3cb5e1d85ddd..365ade5685c9 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -60,6 +60,8 @@
/* TUNSETIFF ifr flags */
#define IFF_TUN 0x0001
#define IFF_TAP 0x0002
+#define IFF_NAPI 0x0010
+#define IFF_NAPI_FRAGS 0x0020
#define IFF_NO_PI 0x1000
/* This flag has no real effect */
#define IFF_ONE_QUEUE 0x2000
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 2e520883c054..a2f48c01365e 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -84,6 +84,7 @@ enum tunnel_encap_types {
TUNNEL_ENCAP_NONE,
TUNNEL_ENCAP_FOU,
TUNNEL_ENCAP_GUE,
+ TUNNEL_ENCAP_MPLS,
};
#define TUNNEL_ENCAP_FLAG_CSUM (1<<0)
diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h
index 46444f8fbee4..4f8f3eb0699f 100644
--- a/include/uapi/linux/in6.h
+++ b/include/uapi/linux/in6.h
@@ -284,6 +284,7 @@ struct in6_flowlabel_req {
#define IPV6_TRANSPARENT 75
#define IPV6_UNICAST_IF 76
#define IPV6_RECVFRAGSIZE 77
+#define IPV6_FREEBIND 78
/*
* Multicast Routing:
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 51626b4175c0..f882fe1f9709 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -569,13 +569,14 @@
* authentication/association or not receiving a response from the AP.
* Non-zero %NL80211_ATTR_STATUS_CODE value is indicated in that case as
* well to remain backwards compatible.
- * @NL80211_CMD_ROAM: notifcation indicating the card/driver roamed by itself.
- * When the driver roamed in a network that requires 802.1X authentication,
- * %NL80211_ATTR_PORT_AUTHORIZED should be set if the 802.1X authentication
- * was done by the driver or if roaming was done using Fast Transition
- * protocol (in which case 802.1X authentication is not needed). If
- * %NL80211_ATTR_PORT_AUTHORIZED is not set, user space is responsible for
- * the 802.1X authentication.
+ * When establishing a security association, drivers that support 4 way
+ * handshake offload should send %NL80211_CMD_PORT_AUTHORIZED event when
+ * the 4 way handshake is completed successfully.
+ * @NL80211_CMD_ROAM: Notification indicating the card/driver roamed by itself.
+ * When a security association was established with the new AP (e.g. if
+ * the FT protocol was used for roaming or the driver completed the 4 way
+ * handshake), this event should be followed by an
+ * %NL80211_CMD_PORT_AUTHORIZED event.
* @NL80211_CMD_DISCONNECT: drop a given connection; also used to notify
* userspace that a connection was dropped by the AP or due to other
* reasons, for this the %NL80211_ATTR_DISCONNECTED_BY_AP and
@@ -982,6 +983,14 @@
* @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously
* configured PMK for the authenticator address identified by
* &NL80211_ATTR_MAC.
+ * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates that the 4 way
+ * handshake was completed successfully by the driver. The BSSID is
+ * specified with &NL80211_ATTR_MAC. Drivers that support 4 way handshake
+ * offload should send this event after indicating 802.11 association with
+ * &NL80211_CMD_CONNECT or &NL80211_CMD_ROAM. If the 4 way handshake failed
+ * &NL80211_CMD_DISCONNECT should be indicated instead.
+ *
+ * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded.
*
* @NL80211_CMD_MAX: highest used command number
* @__NL80211_CMD_AFTER_LAST: internal use
@@ -1185,6 +1194,10 @@ enum nl80211_commands {
NL80211_CMD_SET_PMK,
NL80211_CMD_DEL_PMK,
+ NL80211_CMD_PORT_AUTHORIZED,
+
+ NL80211_CMD_RELOAD_REGDB,
+
/* add new commands above here */
/* used to define NL80211_CMD_MAX below */
@@ -1407,8 +1420,12 @@ enum nl80211_commands {
*
* @NL80211_ATTR_USE_MFP: Whether management frame protection (IEEE 802.11w) is
* used for the association (&enum nl80211_mfp, represented as a u32);
- * this attribute can be used
- * with %NL80211_CMD_ASSOCIATE and %NL80211_CMD_CONNECT requests
+ * this attribute can be used with %NL80211_CMD_ASSOCIATE and
+ * %NL80211_CMD_CONNECT requests. %NL80211_MFP_OPTIONAL is not allowed for
+ * %NL80211_CMD_ASSOCIATE since user space SME is expected and hence, it
+ * must have decided whether to use management frame protection or not.
+ * Setting %NL80211_MFP_OPTIONAL with a %NL80211_CMD_CONNECT request will
+ * let the driver (or the firmware) decide whether to use MFP or not.
*
* @NL80211_ATTR_STA_FLAGS2: Attribute containing a
* &struct nl80211_sta_flag_update.
@@ -2134,10 +2151,7 @@ enum nl80211_commands {
* in %NL80211_CMD_CONNECT to indicate that for 802.1X authentication it
* wants to use the supported offload of the 4-way handshake.
* @NL80211_ATTR_PMKR0_NAME: PMK-R0 Name for offloaded FT.
- * @NL80211_ATTR_PORT_AUTHORIZED: flag attribute used in %NL80211_CMD_ROAMED
- * notification indicating that that 802.1X authentication was done by
- * the driver or is not needed (because roaming used the Fast Transition
- * protocol).
+ * @NL80211_ATTR_PORT_AUTHORIZED: (reserved)
*
* @NUM_NL80211_ATTR: total number of nl80211_attrs available
* @NL80211_ATTR_MAX: highest attribute number currently defined
@@ -3947,10 +3961,12 @@ enum nl80211_key_type {
* enum nl80211_mfp - Management frame protection state
* @NL80211_MFP_NO: Management frame protection not used
* @NL80211_MFP_REQUIRED: Management frame protection required
+ * @NL80211_MFP_OPTIONAL: Management frame protection is optional
*/
enum nl80211_mfp {
NL80211_MFP_NO,
NL80211_MFP_REQUIRED,
+ NL80211_MFP_OPTIONAL,
};
enum nl80211_wpa_versions {
@@ -4914,6 +4930,17 @@ enum nl80211_feature_flags {
* handshake with 802.1X in station mode (will pass EAP frames to the host
* and accept the set_pmk/del_pmk commands), doing it in the host might not
* be supported.
+ * @NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME: Driver is capable of overriding
+ * the max channel attribute in the FILS request params IE with the
+ * actual dwell time.
+ * @NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP: Driver accepts broadcast probe
+ * response
+ * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE: Driver supports sending
+ * the first probe request in each channel at rate of at least 5.5Mbps.
+ * @NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: Driver supports
+ * probe request tx deferral and suppression
+ * @NL80211_EXT_FEATURE_MFP_OPTIONAL: Driver supports the %NL80211_MFP_OPTIONAL
+ * value in %NL80211_ATTR_USE_MFP.
*
* @NUM_NL80211_EXT_FEATURES: number of extended features.
* @MAX_NL80211_EXT_FEATURES: highest extended feature index.
@@ -4936,6 +4963,11 @@ enum nl80211_ext_feature_index {
NL80211_EXT_FEATURE_FILS_SK_OFFLOAD,
NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK,
NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X,
+ NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME,
+ NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION,
+ NL80211_EXT_FEATURE_MFP_OPTIONAL,
/* add new features before the definition below */
NUM_NL80211_EXT_FEATURES,
@@ -5012,12 +5044,28 @@ enum nl80211_timeout_reason {
* locally administered 1, multicast 0) is assumed.
* This flag must not be requested when the feature isn't supported, check
* the nl80211 feature flags for the device.
+ * @NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME: fill the dwell time in the FILS
+ * request parameters IE in the probe request
+ * @NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP: accept broadcast probe responses
+ * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE: send probe request frames at
+ * rate of at least 5.5M. In case non OCE AP is dicovered in the channel,
+ * only the first probe req in the channel will be sent in high rate.
+ * @NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION: allow probe request
+ * tx deferral (dot11FILSProbeDelay shall be set to 15ms)
+ * and suppression (if it has received a broadcast Probe Response frame,
+ * Beacon frame or FILS Discovery frame from an AP that the STA considers
+ * a suitable candidate for (re-)association - suitable in terms of
+ * SSID and/or RSSI
*/
enum nl80211_scan_flags {
- NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0,
- NL80211_SCAN_FLAG_FLUSH = 1<<1,
- NL80211_SCAN_FLAG_AP = 1<<2,
- NL80211_SCAN_FLAG_RANDOM_ADDR = 1<<3,
+ NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0,
+ NL80211_SCAN_FLAG_FLUSH = 1<<1,
+ NL80211_SCAN_FLAG_AP = 1<<2,
+ NL80211_SCAN_FLAG_RANDOM_ADDR = 1<<3,
+ NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME = 1<<4,
+ NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP = 1<<5,
+ NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE = 1<<6,
+ NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION = 1<<7,
};
/**
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 156ee4cab82e..0cd6f8833147 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -359,6 +359,7 @@ enum ovs_tunnel_key_attr {
OVS_TUNNEL_KEY_ATTR_IPV6_SRC, /* struct in6_addr src IPv6 address. */
OVS_TUNNEL_KEY_ATTR_IPV6_DST, /* struct in6_addr dst IPv6 address. */
OVS_TUNNEL_KEY_ATTR_PAD,
+ OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, /* be32 ERSPAN index. */
__OVS_TUNNEL_KEY_ATTR_MAX
};
@@ -806,6 +807,7 @@ struct ovs_action_push_eth {
* packet.
* @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the
* packet.
+ * @OVS_ACTION_ATTR_CT_CLEAR: Clear conntrack state from the packet.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -835,6 +837,7 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_TRUNC, /* u32 struct ovs_action_trunc. */
OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */
OVS_ACTION_ATTR_POP_ETH, /* No argument. */
+ OVS_ACTION_ATTR_CT_CLEAR, /* No argument. */
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
* from userspace. */
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 099bf5528fed..e7cc3d3c7421 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -74,6 +74,7 @@ struct tc_estimator {
#define TC_H_INGRESS (0xFFFFFFF1U)
#define TC_H_CLSACT TC_H_INGRESS
+#define TC_H_MIN_PRIORITY 0xFFE0U
#define TC_H_MIN_INGRESS 0xFFF2U
#define TC_H_MIN_EGRESS 0xFFF3U
@@ -625,6 +626,22 @@ enum {
#define TC_MQPRIO_HW_OFFLOAD_MAX (__TC_MQPRIO_HW_OFFLOAD_MAX - 1)
+enum {
+ TC_MQPRIO_MODE_DCB,
+ TC_MQPRIO_MODE_CHANNEL,
+ __TC_MQPRIO_MODE_MAX
+};
+
+#define __TC_MQPRIO_MODE_MAX (__TC_MQPRIO_MODE_MAX - 1)
+
+enum {
+ TC_MQPRIO_SHAPER_DCB,
+ TC_MQPRIO_SHAPER_BW_RATE, /* Add new shapers below */
+ __TC_MQPRIO_SHAPER_MAX
+};
+
+#define __TC_MQPRIO_SHAPER_MAX (__TC_MQPRIO_SHAPER_MAX - 1)
+
struct tc_mqprio_qopt {
__u8 num_tc;
__u8 prio_tc_map[TC_QOPT_BITMASK + 1];
@@ -633,6 +650,22 @@ struct tc_mqprio_qopt {
__u16 offset[TC_QOPT_MAX_QUEUE];
};
+#define TC_MQPRIO_F_MODE 0x1
+#define TC_MQPRIO_F_SHAPER 0x2
+#define TC_MQPRIO_F_MIN_RATE 0x4
+#define TC_MQPRIO_F_MAX_RATE 0x8
+
+enum {
+ TCA_MQPRIO_UNSPEC,
+ TCA_MQPRIO_MODE,
+ TCA_MQPRIO_SHAPER,
+ TCA_MQPRIO_MIN_RATE64,
+ TCA_MQPRIO_MAX_RATE64,
+ __TCA_MQPRIO_MAX,
+};
+
+#define TCA_MQPRIO_MAX (__TCA_MQPRIO_MAX - 1)
+
/* SFB */
enum {
diff --git a/include/uapi/linux/qrtr.h b/include/uapi/linux/qrtr.h
index 9d76c566f66e..179af64846e0 100644
--- a/include/uapi/linux/qrtr.h
+++ b/include/uapi/linux/qrtr.h
@@ -4,10 +4,45 @@
#include <linux/socket.h>
#include <linux/types.h>
+#define QRTR_NODE_BCAST 0xffffffffu
+#define QRTR_PORT_CTRL 0xfffffffeu
+
struct sockaddr_qrtr {
__kernel_sa_family_t sq_family;
__u32 sq_node;
__u32 sq_port;
};
+enum qrtr_pkt_type {
+ QRTR_TYPE_DATA = 1,
+ QRTR_TYPE_HELLO = 2,
+ QRTR_TYPE_BYE = 3,
+ QRTR_TYPE_NEW_SERVER = 4,
+ QRTR_TYPE_DEL_SERVER = 5,
+ QRTR_TYPE_DEL_CLIENT = 6,
+ QRTR_TYPE_RESUME_TX = 7,
+ QRTR_TYPE_EXIT = 8,
+ QRTR_TYPE_PING = 9,
+ QRTR_TYPE_NEW_LOOKUP = 10,
+ QRTR_TYPE_DEL_LOOKUP = 11,
+};
+
+struct qrtr_ctrl_pkt {
+ __le32 cmd;
+
+ union {
+ struct {
+ __le32 service;
+ __le32 instance;
+ __le32 node;
+ __le32 port;
+ } server;
+
+ struct {
+ __le32 node;
+ __le32 port;
+ } client;
+ };
+} __packed;
+
#endif /* _LINUX_QRTR_H */
diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h
index 6217ff8500a1..6cd7d416ca40 100644
--- a/include/uapi/linux/sctp.h
+++ b/include/uapi/linux/sctp.h
@@ -122,6 +122,8 @@ typedef __s32 sctp_assoc_t;
#define SCTP_RESET_ASSOC 120
#define SCTP_ADD_STREAMS 121
#define SCTP_SOCKOPT_PEELOFF_FLAGS 122
+#define SCTP_STREAM_SCHEDULER 123
+#define SCTP_STREAM_SCHEDULER_VALUE 124
/* PR-SCTP policies */
#define SCTP_PR_SCTP_NONE 0x0000
@@ -814,6 +816,12 @@ struct sctp_assoc_value {
uint32_t assoc_value;
};
+struct sctp_stream_value {
+ sctp_assoc_t assoc_id;
+ uint16_t stream_id;
+ uint16_t stream_value;
+};
+
/*
* 7.2.2 Peer Address Information
*
@@ -1088,4 +1096,12 @@ struct sctp_add_streams {
uint16_t sas_outstrms;
};
+/* SCTP Stream schedulers */
+enum sctp_sched_type {
+ SCTP_SS_FCFS,
+ SCTP_SS_PRIO,
+ SCTP_SS_RR,
+ SCTP_SS_MAX = SCTP_SS_RR
+};
+
#endif /* _UAPI_SCTP_H */
diff --git a/include/uapi/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h
index 3d7a2b352a62..69038c29e8a9 100644
--- a/include/uapi/linux/tc_act/tc_mirred.h
+++ b/include/uapi/linux/tc_act/tc_mirred.h
@@ -9,13 +9,13 @@
#define TCA_EGRESS_MIRROR 2 /* mirror packet to EGRESS */
#define TCA_INGRESS_REDIR 3 /* packet redirect to INGRESS*/
#define TCA_INGRESS_MIRROR 4 /* mirror packet to INGRESS */
-
+
struct tc_mirred {
tc_gen;
int eaction; /* one of IN/EGRESS_MIRROR/REDIR */
__u32 ifindex; /* ifindex of egress port */
};
-
+
enum {
TCA_MIRRED_UNSPEC,
TCA_MIRRED_TM,
@@ -24,5 +24,5 @@ enum {
__TCA_MIRRED_MAX
};
#define TCA_MIRRED_MAX (__TCA_MIRRED_MAX - 1)
-
+
#endif
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 15c25eccab2b..69c7493e42f8 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -119,6 +119,7 @@ enum {
#define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */
#define TCP_ULP 31 /* Attach a ULP to a TCP connection */
#define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */
+#define TCP_FASTOPEN_KEY 33 /* Set the key for Fast Open (cookie) */
struct tcp_repair_opt {
__u32 opt_code;
diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h
index 5351b08c897a..ef41c11a7f38 100644
--- a/include/uapi/linux/tipc.h
+++ b/include/uapi/linux/tipc.h
@@ -231,6 +231,21 @@ struct sockaddr_tipc {
#define TIPC_SOCK_RECVQ_DEPTH 132 /* Default: none (read only) */
#define TIPC_MCAST_BROADCAST 133 /* Default: TIPC selects. No arg */
#define TIPC_MCAST_REPLICAST 134 /* Default: TIPC selects. No arg */
+#define TIPC_GROUP_JOIN 135 /* Takes struct tipc_group_req* */
+#define TIPC_GROUP_LEAVE 136 /* No argument */
+
+/*
+ * Flag values
+ */
+#define TIPC_GROUP_LOOPBACK 0x1 /* Receive copy of sent msg when match */
+#define TIPC_GROUP_MEMBER_EVTS 0x2 /* Receive membership events in socket */
+
+struct tipc_group_req {
+ __u32 type; /* group id */
+ __u32 instance; /* member id */
+ __u32 scope; /* zone/cluster/node */
+ __u32 flags;
+};
/*
* Maximum sizes of TIPC bearer-related names (including terminating NULL)
diff --git a/include/uapi/linux/vm_sockets_diag.h b/include/uapi/linux/vm_sockets_diag.h
new file mode 100644
index 000000000000..14cd7dc5a187
--- /dev/null
+++ b/include/uapi/linux/vm_sockets_diag.h
@@ -0,0 +1,33 @@
+/* AF_VSOCK sock_diag(7) interface for querying open sockets */
+
+#ifndef _UAPI__VM_SOCKETS_DIAG_H__
+#define _UAPI__VM_SOCKETS_DIAG_H__
+
+#include <linux/types.h>
+
+/* Request */
+struct vsock_diag_req {
+ __u8 sdiag_family; /* must be AF_VSOCK */
+ __u8 sdiag_protocol; /* must be 0 */
+ __u16 pad; /* must be 0 */
+ __u32 vdiag_states; /* query bitmap (e.g. 1 << TCP_LISTEN) */
+ __u32 vdiag_ino; /* must be 0 (reserved) */
+ __u32 vdiag_show; /* must be 0 (reserved) */
+ __u32 vdiag_cookie[2];
+};
+
+/* Response */
+struct vsock_diag_msg {
+ __u8 vdiag_family; /* AF_VSOCK */
+ __u8 vdiag_type; /* SOCK_STREAM or SOCK_DGRAM */
+ __u8 vdiag_state; /* sk_state (e.g. TCP_LISTEN) */
+ __u8 vdiag_shutdown; /* local RCV_SHUTDOWN | SEND_SHUTDOWN */
+ __u32 vdiag_src_cid;
+ __u32 vdiag_src_port;
+ __u32 vdiag_dst_cid;
+ __u32 vdiag_dst_port;
+ __u32 vdiag_ino;
+ __u32 vdiag_cookie[2];
+};
+
+#endif /* _UAPI__VM_SOCKETS_DIAG_H__ */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 897daa005b23..e597daae6120 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -2,8 +2,10 @@ obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
+obj-$(CONFIG_BPF_SYSCALL) += disasm.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
+obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
ifeq ($(CONFIG_STREAM_PARSER),y)
obj-$(CONFIG_BPF_SYSCALL) += sockmap.o
endif
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index e2636737b69b..7c25426d3cf5 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -19,6 +19,9 @@
#include "map_in_map.h"
+#define ARRAY_CREATE_FLAG_MASK \
+ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
static void bpf_array_free_percpu(struct bpf_array *array)
{
int i;
@@ -56,7 +59,8 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
- attr->value_size == 0 || attr->map_flags & ~BPF_F_NUMA_NODE ||
+ attr->value_size == 0 ||
+ attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
(percpu && numa_node != NUMA_NO_NODE))
return ERR_PTR(-EINVAL);
@@ -492,7 +496,7 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
ee = ERR_PTR(-EOPNOTSUPP);
event = perf_file->private_data;
- if (perf_event_read_local(event, &value) == -EOPNOTSUPP)
+ if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
goto err_out;
ee = bpf_event_entry_gen(perf_file, map_file);
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 546113430049..3db5a17fcfe8 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -27,129 +27,405 @@ void cgroup_bpf_put(struct cgroup *cgrp)
{
unsigned int type;
- for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) {
- struct bpf_prog *prog = cgrp->bpf.prog[type];
-
- if (prog) {
- bpf_prog_put(prog);
+ for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
+ struct list_head *progs = &cgrp->bpf.progs[type];
+ struct bpf_prog_list *pl, *tmp;
+
+ list_for_each_entry_safe(pl, tmp, progs, node) {
+ list_del(&pl->node);
+ bpf_prog_put(pl->prog);
+ kfree(pl);
static_branch_dec(&cgroup_bpf_enabled_key);
}
+ bpf_prog_array_free(cgrp->bpf.effective[type]);
+ }
+}
+
+/* count number of elements in the list.
+ * it's slow but the list cannot be long
+ */
+static u32 prog_list_length(struct list_head *head)
+{
+ struct bpf_prog_list *pl;
+ u32 cnt = 0;
+
+ list_for_each_entry(pl, head, node) {
+ if (!pl->prog)
+ continue;
+ cnt++;
}
+ return cnt;
+}
+
+/* if parent has non-overridable prog attached,
+ * disallow attaching new programs to the descendent cgroup.
+ * if parent has overridable or multi-prog, allow attaching
+ */
+static bool hierarchy_allows_attach(struct cgroup *cgrp,
+ enum bpf_attach_type type,
+ u32 new_flags)
+{
+ struct cgroup *p;
+
+ p = cgroup_parent(cgrp);
+ if (!p)
+ return true;
+ do {
+ u32 flags = p->bpf.flags[type];
+ u32 cnt;
+
+ if (flags & BPF_F_ALLOW_MULTI)
+ return true;
+ cnt = prog_list_length(&p->bpf.progs[type]);
+ WARN_ON_ONCE(cnt > 1);
+ if (cnt == 1)
+ return !!(flags & BPF_F_ALLOW_OVERRIDE);
+ p = cgroup_parent(p);
+ } while (p);
+ return true;
+}
+
+/* compute a chain of effective programs for a given cgroup:
+ * start from the list of programs in this cgroup and add
+ * all parent programs.
+ * Note that parent's F_ALLOW_OVERRIDE-type program is yielding
+ * to programs in this cgroup
+ */
+static int compute_effective_progs(struct cgroup *cgrp,
+ enum bpf_attach_type type,
+ struct bpf_prog_array __rcu **array)
+{
+ struct bpf_prog_array __rcu *progs;
+ struct bpf_prog_list *pl;
+ struct cgroup *p = cgrp;
+ int cnt = 0;
+
+ /* count number of effective programs by walking parents */
+ do {
+ if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+ cnt += prog_list_length(&p->bpf.progs[type]);
+ p = cgroup_parent(p);
+ } while (p);
+
+ progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
+ if (!progs)
+ return -ENOMEM;
+
+ /* populate the array with effective progs */
+ cnt = 0;
+ p = cgrp;
+ do {
+ if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
+ list_for_each_entry(pl,
+ &p->bpf.progs[type], node) {
+ if (!pl->prog)
+ continue;
+ rcu_dereference_protected(progs, 1)->
+ progs[cnt++] = pl->prog;
+ }
+ p = cgroup_parent(p);
+ } while (p);
+
+ *array = progs;
+ return 0;
+}
+
+static void activate_effective_progs(struct cgroup *cgrp,
+ enum bpf_attach_type type,
+ struct bpf_prog_array __rcu *array)
+{
+ struct bpf_prog_array __rcu *old_array;
+
+ old_array = xchg(&cgrp->bpf.effective[type], array);
+ /* free prog array after grace period, since __cgroup_bpf_run_*()
+ * might be still walking the array
+ */
+ bpf_prog_array_free(old_array);
}
/**
* cgroup_bpf_inherit() - inherit effective programs from parent
* @cgrp: the cgroup to modify
- * @parent: the parent to inherit from
*/
-void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
+int cgroup_bpf_inherit(struct cgroup *cgrp)
{
- unsigned int type;
+/* has to use marco instead of const int, since compiler thinks
+ * that array below is variable length
+ */
+#define NR ARRAY_SIZE(cgrp->bpf.effective)
+ struct bpf_prog_array __rcu *arrays[NR] = {};
+ int i;
- for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) {
- struct bpf_prog *e;
+ for (i = 0; i < NR; i++)
+ INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
- e = rcu_dereference_protected(parent->bpf.effective[type],
- lockdep_is_held(&cgroup_mutex));
- rcu_assign_pointer(cgrp->bpf.effective[type], e);
- cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type];
- }
+ for (i = 0; i < NR; i++)
+ if (compute_effective_progs(cgrp, i, &arrays[i]))
+ goto cleanup;
+
+ for (i = 0; i < NR; i++)
+ activate_effective_progs(cgrp, i, arrays[i]);
+
+ return 0;
+cleanup:
+ for (i = 0; i < NR; i++)
+ bpf_prog_array_free(arrays[i]);
+ return -ENOMEM;
}
+#define BPF_CGROUP_MAX_PROGS 64
+
/**
- * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
+ * __cgroup_bpf_attach() - Attach the program to a cgroup, and
* propagate the change to descendants
* @cgrp: The cgroup which descendants to traverse
- * @parent: The parent of @cgrp, or %NULL if @cgrp is the root
- * @prog: A new program to pin
- * @type: Type of pinning operation (ingress/egress)
- *
- * Each cgroup has a set of two pointers for bpf programs; one for eBPF
- * programs it owns, and which is effective for execution.
- *
- * If @prog is not %NULL, this function attaches a new program to the cgroup
- * and releases the one that is currently attached, if any. @prog is then made
- * the effective program of type @type in that cgroup.
- *
- * If @prog is %NULL, the currently attached program of type @type is released,
- * and the effective program of the parent cgroup (if any) is inherited to
- * @cgrp.
- *
- * Then, the descendants of @cgrp are walked and the effective program for
- * each of them is set to the effective program of @cgrp unless the
- * descendant has its own program attached, in which case the subbranch is
- * skipped. This ensures that delegated subcgroups with own programs are left
- * untouched.
+ * @prog: A program to attach
+ * @type: Type of attach operation
*
* Must be called with cgroup_mutex held.
*/
-int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent,
- struct bpf_prog *prog, enum bpf_attach_type type,
- bool new_overridable)
+int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
+ enum bpf_attach_type type, u32 flags)
{
- struct bpf_prog *old_prog, *effective = NULL;
- struct cgroup_subsys_state *pos;
- bool overridable = true;
-
- if (parent) {
- overridable = !parent->bpf.disallow_override[type];
- effective = rcu_dereference_protected(parent->bpf.effective[type],
- lockdep_is_held(&cgroup_mutex));
- }
-
- if (prog && effective && !overridable)
- /* if parent has non-overridable prog attached, disallow
- * attaching new programs to descendent cgroup
- */
+ struct list_head *progs = &cgrp->bpf.progs[type];
+ struct bpf_prog *old_prog = NULL;
+ struct cgroup_subsys_state *css;
+ struct bpf_prog_list *pl;
+ bool pl_was_allocated;
+ int err;
+
+ if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
+ /* invalid combination */
+ return -EINVAL;
+
+ if (!hierarchy_allows_attach(cgrp, type, flags))
return -EPERM;
- if (prog && effective && overridable != new_overridable)
- /* if parent has overridable prog attached, only
- * allow overridable programs in descendent cgroup
+ if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
+ /* Disallow attaching non-overridable on top
+ * of existing overridable in this cgroup.
+ * Disallow attaching multi-prog if overridable or none
*/
return -EPERM;
- old_prog = cgrp->bpf.prog[type];
-
- if (prog) {
- overridable = new_overridable;
- effective = prog;
- if (old_prog &&
- cgrp->bpf.disallow_override[type] == new_overridable)
- /* disallow attaching non-overridable on top
- * of existing overridable in this cgroup
- * and vice versa
- */
- return -EPERM;
+ if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
+ return -E2BIG;
+
+ if (flags & BPF_F_ALLOW_MULTI) {
+ list_for_each_entry(pl, progs, node)
+ if (pl->prog == prog)
+ /* disallow attaching the same prog twice */
+ return -EINVAL;
+
+ pl = kmalloc(sizeof(*pl), GFP_KERNEL);
+ if (!pl)
+ return -ENOMEM;
+ pl_was_allocated = true;
+ pl->prog = prog;
+ list_add_tail(&pl->node, progs);
+ } else {
+ if (list_empty(progs)) {
+ pl = kmalloc(sizeof(*pl), GFP_KERNEL);
+ if (!pl)
+ return -ENOMEM;
+ pl_was_allocated = true;
+ list_add_tail(&pl->node, progs);
+ } else {
+ pl = list_first_entry(progs, typeof(*pl), node);
+ old_prog = pl->prog;
+ pl_was_allocated = false;
+ }
+ pl->prog = prog;
}
- if (!prog && !old_prog)
- /* report error when trying to detach and nothing is attached */
- return -ENOENT;
-
- cgrp->bpf.prog[type] = prog;
+ cgrp->bpf.flags[type] = flags;
- css_for_each_descendant_pre(pos, &cgrp->self) {
- struct cgroup *desc = container_of(pos, struct cgroup, self);
+ /* allocate and recompute effective prog arrays */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
- /* skip the subtree if the descendant has its own program */
- if (desc->bpf.prog[type] && desc != cgrp) {
- pos = css_rightmost_descendant(pos);
- } else {
- rcu_assign_pointer(desc->bpf.effective[type],
- effective);
- desc->bpf.disallow_override[type] = !overridable;
- }
+ err = compute_effective_progs(desc, type, &desc->bpf.inactive);
+ if (err)
+ goto cleanup;
}
- if (prog)
- static_branch_inc(&cgroup_bpf_enabled_key);
+ /* all allocations were successful. Activate all prog arrays */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ activate_effective_progs(desc, type, desc->bpf.inactive);
+ desc->bpf.inactive = NULL;
+ }
+ static_branch_inc(&cgroup_bpf_enabled_key);
if (old_prog) {
bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key);
}
return 0;
+
+cleanup:
+ /* oom while computing effective. Free all computed effective arrays
+ * since they were not activated
+ */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ bpf_prog_array_free(desc->bpf.inactive);
+ desc->bpf.inactive = NULL;
+ }
+
+ /* and cleanup the prog list */
+ pl->prog = old_prog;
+ if (pl_was_allocated) {
+ list_del(&pl->node);
+ kfree(pl);
+ }
+ return err;
+}
+
+/**
+ * __cgroup_bpf_detach() - Detach the program from a cgroup, and
+ * propagate the change to descendants
+ * @cgrp: The cgroup which descendants to traverse
+ * @prog: A program to detach or NULL
+ * @type: Type of detach operation
+ *
+ * Must be called with cgroup_mutex held.
+ */
+int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+ enum bpf_attach_type type, u32 unused_flags)
+{
+ struct list_head *progs = &cgrp->bpf.progs[type];
+ u32 flags = cgrp->bpf.flags[type];
+ struct bpf_prog *old_prog = NULL;
+ struct cgroup_subsys_state *css;
+ struct bpf_prog_list *pl;
+ int err;
+
+ if (flags & BPF_F_ALLOW_MULTI) {
+ if (!prog)
+ /* to detach MULTI prog the user has to specify valid FD
+ * of the program to be detached
+ */
+ return -EINVAL;
+ } else {
+ if (list_empty(progs))
+ /* report error when trying to detach and nothing is attached */
+ return -ENOENT;
+ }
+
+ if (flags & BPF_F_ALLOW_MULTI) {
+ /* find the prog and detach it */
+ list_for_each_entry(pl, progs, node) {
+ if (pl->prog != prog)
+ continue;
+ old_prog = prog;
+ /* mark it deleted, so it's ignored while
+ * recomputing effective
+ */
+ pl->prog = NULL;
+ break;
+ }
+ if (!old_prog)
+ return -ENOENT;
+ } else {
+ /* to maintain backward compatibility NONE and OVERRIDE cgroups
+ * allow detaching with invalid FD (prog==NULL)
+ */
+ pl = list_first_entry(progs, typeof(*pl), node);
+ old_prog = pl->prog;
+ pl->prog = NULL;
+ }
+
+ /* allocate and recompute effective prog arrays */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ err = compute_effective_progs(desc, type, &desc->bpf.inactive);
+ if (err)
+ goto cleanup;
+ }
+
+ /* all allocations were successful. Activate all prog arrays */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ activate_effective_progs(desc, type, desc->bpf.inactive);
+ desc->bpf.inactive = NULL;
+ }
+
+ /* now can actually delete it from this cgroup list */
+ list_del(&pl->node);
+ kfree(pl);
+ if (list_empty(progs))
+ /* last program was detached, reset flags to zero */
+ cgrp->bpf.flags[type] = 0;
+
+ bpf_prog_put(old_prog);
+ static_branch_dec(&cgroup_bpf_enabled_key);
+ return 0;
+
+cleanup:
+ /* oom while computing effective. Free all computed effective arrays
+ * since they were not activated
+ */
+ css_for_each_descendant_pre(css, &cgrp->self) {
+ struct cgroup *desc = container_of(css, struct cgroup, self);
+
+ bpf_prog_array_free(desc->bpf.inactive);
+ desc->bpf.inactive = NULL;
+ }
+
+ /* and restore back old_prog */
+ pl->prog = old_prog;
+ return err;
+}
+
+/* Must be called with cgroup_mutex held to avoid races. */
+int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+ enum bpf_attach_type type = attr->query.attach_type;
+ struct list_head *progs = &cgrp->bpf.progs[type];
+ u32 flags = cgrp->bpf.flags[type];
+ int cnt, ret = 0, i;
+
+ if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
+ cnt = bpf_prog_array_length(cgrp->bpf.effective[type]);
+ else
+ cnt = prog_list_length(progs);
+
+ if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
+ return -EFAULT;
+ if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
+ return -EFAULT;
+ if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
+ /* return early if user requested only program count + flags */
+ return 0;
+ if (attr->query.prog_cnt < cnt) {
+ cnt = attr->query.prog_cnt;
+ ret = -ENOSPC;
+ }
+
+ if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
+ return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type],
+ prog_ids, cnt);
+ } else {
+ struct bpf_prog_list *pl;
+ u32 id;
+
+ i = 0;
+ list_for_each_entry(pl, progs, node) {
+ id = pl->prog->aux->id;
+ if (copy_to_user(prog_ids + i, &id, sizeof(id)))
+ return -EFAULT;
+ if (++i == cnt)
+ break;
+ }
+ }
+ return ret;
}
/**
@@ -171,36 +447,26 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
struct sk_buff *skb,
enum bpf_attach_type type)
{
- struct bpf_prog *prog;
+ unsigned int offset = skb->data - skb_network_header(skb);
+ struct sock *save_sk;
struct cgroup *cgrp;
- int ret = 0;
+ int ret;
if (!sk || !sk_fullsock(sk))
return 0;
- if (sk->sk_family != AF_INET &&
- sk->sk_family != AF_INET6)
+ if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
return 0;
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-
- rcu_read_lock();
-
- prog = rcu_dereference(cgrp->bpf.effective[type]);
- if (prog) {
- unsigned int offset = skb->data - skb_network_header(skb);
- struct sock *save_sk = skb->sk;
-
- skb->sk = sk;
- __skb_push(skb, offset);
- ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM;
- __skb_pull(skb, offset);
- skb->sk = save_sk;
- }
-
- rcu_read_unlock();
-
- return ret;
+ save_sk = skb->sk;
+ skb->sk = sk;
+ __skb_push(skb, offset);
+ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
+ bpf_prog_run_save_cb);
+ __skb_pull(skb, offset);
+ skb->sk = save_sk;
+ return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
@@ -221,19 +487,10 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
enum bpf_attach_type type)
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- struct bpf_prog *prog;
- int ret = 0;
-
-
- rcu_read_lock();
-
- prog = rcu_dereference(cgrp->bpf.effective[type]);
- if (prog)
- ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM;
-
- rcu_read_unlock();
+ int ret;
- return ret;
+ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
+ return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
@@ -258,18 +515,10 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
enum bpf_attach_type type)
{
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
- struct bpf_prog *prog;
- int ret = 0;
-
-
- rcu_read_lock();
+ int ret;
- prog = rcu_dereference(cgrp->bpf.effective[type]);
- if (prog)
- ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM;
-
- rcu_read_unlock();
-
- return ret;
+ ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
+ BPF_PROG_RUN);
+ return ret == 1 ? 0 : -EPERM;
}
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7b62df86be1d..8e7c8bf2b687 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -309,12 +309,25 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog,
static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym)
{
+ const char *end = sym + KSYM_NAME_LEN;
+
BUILD_BUG_ON(sizeof("bpf_prog_") +
- sizeof(prog->tag) * 2 + 1 > KSYM_NAME_LEN);
+ sizeof(prog->tag) * 2 +
+ /* name has been null terminated.
+ * We should need +1 for the '_' preceding
+ * the name. However, the null character
+ * is double counted between the name and the
+ * sizeof("bpf_prog_") above, so we omit
+ * the +1 here.
+ */
+ sizeof(prog->aux->name) > KSYM_NAME_LEN);
sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
sym = bin2hex(sym, prog->tag, sizeof(prog->tag));
- *sym = 0;
+ if (prog->aux->name[0])
+ snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
+ else
+ *sym = 0;
}
static __always_inline unsigned long
@@ -1381,6 +1394,75 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
}
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
+/* to avoid allocating empty bpf_prog_array for cgroups that
+ * don't have bpf program attached use one global 'empty_prog_array'
+ * It will not be modified the caller of bpf_prog_array_alloc()
+ * (since caller requested prog_cnt == 0)
+ * that pointer should be 'freed' by bpf_prog_array_free()
+ */
+static struct {
+ struct bpf_prog_array hdr;
+ struct bpf_prog *null_prog;
+} empty_prog_array = {
+ .null_prog = NULL,
+};
+
+struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags)
+{
+ if (prog_cnt)
+ return kzalloc(sizeof(struct bpf_prog_array) +
+ sizeof(struct bpf_prog *) * (prog_cnt + 1),
+ flags);
+
+ return &empty_prog_array.hdr;
+}
+
+void bpf_prog_array_free(struct bpf_prog_array __rcu *progs)
+{
+ if (!progs ||
+ progs == (struct bpf_prog_array __rcu *)&empty_prog_array.hdr)
+ return;
+ kfree_rcu(progs, rcu);
+}
+
+int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)
+{
+ struct bpf_prog **prog;
+ u32 cnt = 0;
+
+ rcu_read_lock();
+ prog = rcu_dereference(progs)->progs;
+ for (; *prog; prog++)
+ cnt++;
+ rcu_read_unlock();
+ return cnt;
+}
+
+int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,
+ __u32 __user *prog_ids, u32 cnt)
+{
+ struct bpf_prog **prog;
+ u32 i = 0, id;
+
+ rcu_read_lock();
+ prog = rcu_dereference(progs)->progs;
+ for (; *prog; prog++) {
+ id = (*prog)->aux->id;
+ if (copy_to_user(prog_ids + i, &id, sizeof(id))) {
+ rcu_read_unlock();
+ return -EFAULT;
+ }
+ if (++i == cnt) {
+ prog++;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ if (*prog)
+ return -ENOSPC;
+ return 0;
+}
+
static void bpf_prog_free_deferred(struct work_struct *work)
{
struct bpf_prog_aux *aux;
@@ -1498,5 +1580,8 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception);
+/* These are only used within the BPF_SYSCALL code */
+#ifdef CONFIG_BPF_SYSCALL
EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_get_type);
EXPORT_TRACEPOINT_SYMBOL_GPL(bpf_prog_put_rcu);
+#endif
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
new file mode 100644
index 000000000000..b4358d84ddf1
--- /dev/null
+++ b/kernel/bpf/cpumap.c
@@ -0,0 +1,702 @@
+/* bpf/cpumap.c
+ *
+ * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
+ * Released under terms in GPL version 2. See COPYING.
+ */
+
+/* The 'cpumap' is primarily used as a backend map for XDP BPF helper
+ * call bpf_redirect_map() and XDP_REDIRECT action, like 'devmap'.
+ *
+ * Unlike devmap which redirects XDP frames out another NIC device,
+ * this map type redirects raw XDP frames to another CPU. The remote
+ * CPU will do SKB-allocation and call the normal network stack.
+ *
+ * This is a scalability and isolation mechanism, that allow
+ * separating the early driver network XDP layer, from the rest of the
+ * netstack, and assigning dedicated CPUs for this stage. This
+ * basically allows for 10G wirespeed pre-filtering via bpf.
+ */
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/ptr_ring.h>
+
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+#include <linux/capability.h>
+#include <trace/events/xdp.h>
+
+#include <linux/netdevice.h> /* netif_receive_skb_core */
+#include <linux/etherdevice.h> /* eth_type_trans */
+
+/* General idea: XDP packets getting XDP redirected to another CPU,
+ * will maximum be stored/queued for one driver ->poll() call. It is
+ * guaranteed that setting flush bit and flush operation happen on
+ * same CPU. Thus, cpu_map_flush operation can deduct via this_cpu_ptr()
+ * which queue in bpf_cpu_map_entry contains packets.
+ */
+
+#define CPU_MAP_BULK_SIZE 8 /* 8 == one cacheline on 64-bit archs */
+struct xdp_bulk_queue {
+ void *q[CPU_MAP_BULK_SIZE];
+ unsigned int count;
+};
+
+/* Struct for every remote "destination" CPU in map */
+struct bpf_cpu_map_entry {
+ u32 cpu; /* kthread CPU and map index */
+ int map_id; /* Back reference to map */
+ u32 qsize; /* Queue size placeholder for map lookup */
+
+ /* XDP can run multiple RX-ring queues, need __percpu enqueue store */
+ struct xdp_bulk_queue __percpu *bulkq;
+
+ /* Queue with potential multi-producers, and single-consumer kthread */
+ struct ptr_ring *queue;
+ struct task_struct *kthread;
+ struct work_struct kthread_stop_wq;
+
+ atomic_t refcnt; /* Control when this struct can be free'ed */
+ struct rcu_head rcu;
+};
+
+struct bpf_cpu_map {
+ struct bpf_map map;
+ /* Below members specific for map type */
+ struct bpf_cpu_map_entry **cpu_map;
+ unsigned long __percpu *flush_needed;
+};
+
+static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
+ struct xdp_bulk_queue *bq);
+
+static u64 cpu_map_bitmap_size(const union bpf_attr *attr)
+{
+ return BITS_TO_LONGS(attr->max_entries) * sizeof(unsigned long);
+}
+
+static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
+{
+ struct bpf_cpu_map *cmap;
+ int err = -ENOMEM;
+ u64 cost;
+ int ret;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ /* check sanity of attributes */
+ if (attr->max_entries == 0 || attr->key_size != 4 ||
+ attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
+ return ERR_PTR(-EINVAL);
+
+ cmap = kzalloc(sizeof(*cmap), GFP_USER);
+ if (!cmap)
+ return ERR_PTR(-ENOMEM);
+
+ /* mandatory map attributes */
+ cmap->map.map_type = attr->map_type;
+ cmap->map.key_size = attr->key_size;
+ cmap->map.value_size = attr->value_size;
+ cmap->map.max_entries = attr->max_entries;
+ cmap->map.map_flags = attr->map_flags;
+ cmap->map.numa_node = bpf_map_attr_numa_node(attr);
+
+ /* Pre-limit array size based on NR_CPUS, not final CPU check */
+ if (cmap->map.max_entries > NR_CPUS) {
+ err = -E2BIG;
+ goto free_cmap;
+ }
+
+ /* make sure page count doesn't overflow */
+ cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
+ cost += cpu_map_bitmap_size(attr) * num_possible_cpus();
+ if (cost >= U32_MAX - PAGE_SIZE)
+ goto free_cmap;
+ cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
+
+ /* Notice returns -EPERM on if map size is larger than memlock limit */
+ ret = bpf_map_precharge_memlock(cmap->map.pages);
+ if (ret) {
+ err = ret;
+ goto free_cmap;
+ }
+
+ /* A per cpu bitfield with a bit per possible CPU in map */
+ cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),
+ __alignof__(unsigned long));
+ if (!cmap->flush_needed)
+ goto free_cmap;
+
+ /* Alloc array for possible remote "destination" CPUs */
+ cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
+ sizeof(struct bpf_cpu_map_entry *),
+ cmap->map.numa_node);
+ if (!cmap->cpu_map)
+ goto free_percpu;
+
+ return &cmap->map;
+free_percpu:
+ free_percpu(cmap->flush_needed);
+free_cmap:
+ kfree(cmap);
+ return ERR_PTR(err);
+}
+
+void __cpu_map_queue_destructor(void *ptr)
+{
+ /* The tear-down procedure should have made sure that queue is
+ * empty. See __cpu_map_entry_replace() and work-queue
+ * invoked cpu_map_kthread_stop(). Catch any broken behaviour
+ * gracefully and warn once.
+ */
+ if (WARN_ON_ONCE(ptr))
+ page_frag_free(ptr);
+}
+
+static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
+{
+ if (atomic_dec_and_test(&rcpu->refcnt)) {
+ /* The queue should be empty at this point */
+ ptr_ring_cleanup(rcpu->queue, __cpu_map_queue_destructor);
+ kfree(rcpu->queue);
+ kfree(rcpu);
+ }
+}
+
+static void get_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
+{
+ atomic_inc(&rcpu->refcnt);
+}
+
+/* called from workqueue, to workaround syscall using preempt_disable */
+static void cpu_map_kthread_stop(struct work_struct *work)
+{
+ struct bpf_cpu_map_entry *rcpu;
+
+ rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
+
+ /* Wait for flush in __cpu_map_entry_free(), via full RCU barrier,
+ * as it waits until all in-flight call_rcu() callbacks complete.
+ */
+ rcu_barrier();
+
+ /* kthread_stop will wake_up_process and wait for it to complete */
+ kthread_stop(rcpu->kthread);
+}
+
+/* For now, xdp_pkt is a cpumap internal data structure, with info
+ * carried between enqueue to dequeue. It is mapped into the top
+ * headroom of the packet, to avoid allocating separate mem.
+ */
+struct xdp_pkt {
+ void *data;
+ u16 len;
+ u16 headroom;
+ u16 metasize;
+ struct net_device *dev_rx;
+};
+
+/* Convert xdp_buff to xdp_pkt */
+static struct xdp_pkt *convert_to_xdp_pkt(struct xdp_buff *xdp)
+{
+ struct xdp_pkt *xdp_pkt;
+ int metasize;
+ int headroom;
+
+ /* Assure headroom is available for storing info */
+ headroom = xdp->data - xdp->data_hard_start;
+ metasize = xdp->data - xdp->data_meta;
+ metasize = metasize > 0 ? metasize : 0;
+ if ((headroom - metasize) < sizeof(*xdp_pkt))
+ return NULL;
+
+ /* Store info in top of packet */
+ xdp_pkt = xdp->data_hard_start;
+
+ xdp_pkt->data = xdp->data;
+ xdp_pkt->len = xdp->data_end - xdp->data;
+ xdp_pkt->headroom = headroom - sizeof(*xdp_pkt);
+ xdp_pkt->metasize = metasize;
+
+ return xdp_pkt;
+}
+
+struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
+ struct xdp_pkt *xdp_pkt)
+{
+ unsigned int frame_size;
+ void *pkt_data_start;
+ struct sk_buff *skb;
+
+ /* build_skb need to place skb_shared_info after SKB end, and
+ * also want to know the memory "truesize". Thus, need to
+ * know the memory frame size backing xdp_buff.
+ *
+ * XDP was designed to have PAGE_SIZE frames, but this
+ * assumption is not longer true with ixgbe and i40e. It
+ * would be preferred to set frame_size to 2048 or 4096
+ * depending on the driver.
+ * frame_size = 2048;
+ * frame_len = frame_size - sizeof(*xdp_pkt);
+ *
+ * Instead, with info avail, skb_shared_info in placed after
+ * packet len. This, unfortunately fakes the truesize.
+ * Another disadvantage of this approach, the skb_shared_info
+ * is not at a fixed memory location, with mixed length
+ * packets, which is bad for cache-line hotness.
+ */
+ frame_size = SKB_DATA_ALIGN(xdp_pkt->len) + xdp_pkt->headroom +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+ pkt_data_start = xdp_pkt->data - xdp_pkt->headroom;
+ skb = build_skb(pkt_data_start, frame_size);
+ if (!skb)
+ return NULL;
+
+ skb_reserve(skb, xdp_pkt->headroom);
+ __skb_put(skb, xdp_pkt->len);
+ if (xdp_pkt->metasize)
+ skb_metadata_set(skb, xdp_pkt->metasize);
+
+ /* Essential SKB info: protocol and skb->dev */
+ skb->protocol = eth_type_trans(skb, xdp_pkt->dev_rx);
+
+ /* Optional SKB info, currently missing:
+ * - HW checksum info (skb->ip_summed)
+ * - HW RX hash (skb_set_hash)
+ * - RX ring dev queue index (skb_record_rx_queue)
+ */
+
+ return skb;
+}
+
+static int cpu_map_kthread_run(void *data)
+{
+ struct bpf_cpu_map_entry *rcpu = data;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ /* When kthread gives stop order, then rcpu have been disconnected
+ * from map, thus no new packets can enter. Remaining in-flight
+ * per CPU stored packets are flushed to this queue. Wait honoring
+ * kthread_stop signal until queue is empty.
+ */
+ while (!kthread_should_stop() || !__ptr_ring_empty(rcpu->queue)) {
+ unsigned int processed = 0, drops = 0, sched = 0;
+ struct xdp_pkt *xdp_pkt;
+
+ /* Release CPU reschedule checks */
+ if (__ptr_ring_empty(rcpu->queue)) {
+ __set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ sched = 1;
+ } else {
+ sched = cond_resched();
+ }
+ __set_current_state(TASK_RUNNING);
+
+ /* Process packets in rcpu->queue */
+ local_bh_disable();
+ /*
+ * The bpf_cpu_map_entry is single consumer, with this
+ * kthread CPU pinned. Lockless access to ptr_ring
+ * consume side valid as no-resize allowed of queue.
+ */
+ while ((xdp_pkt = __ptr_ring_consume(rcpu->queue))) {
+ struct sk_buff *skb;
+ int ret;
+
+ skb = cpu_map_build_skb(rcpu, xdp_pkt);
+ if (!skb) {
+ page_frag_free(xdp_pkt);
+ continue;
+ }
+
+ /* Inject into network stack */
+ ret = netif_receive_skb_core(skb);
+ if (ret == NET_RX_DROP)
+ drops++;
+
+ /* Limit BH-disable period */
+ if (++processed == 8)
+ break;
+ }
+ /* Feedback loop via tracepoint */
+ trace_xdp_cpumap_kthread(rcpu->map_id, processed, drops, sched);
+
+ local_bh_enable(); /* resched point, may call do_softirq() */
+ }
+ __set_current_state(TASK_RUNNING);
+
+ put_cpu_map_entry(rcpu);
+ return 0;
+}
+
+struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id)
+{
+ gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN;
+ struct bpf_cpu_map_entry *rcpu;
+ int numa, err;
+
+ /* Have map->numa_node, but choose node of redirect target CPU */
+ numa = cpu_to_node(cpu);
+
+ rcpu = kzalloc_node(sizeof(*rcpu), gfp, numa);
+ if (!rcpu)
+ return NULL;
+
+ /* Alloc percpu bulkq */
+ rcpu->bulkq = __alloc_percpu_gfp(sizeof(*rcpu->bulkq),
+ sizeof(void *), gfp);
+ if (!rcpu->bulkq)
+ goto free_rcu;
+
+ /* Alloc queue */
+ rcpu->queue = kzalloc_node(sizeof(*rcpu->queue), gfp, numa);
+ if (!rcpu->queue)
+ goto free_bulkq;
+
+ err = ptr_ring_init(rcpu->queue, qsize, gfp);
+ if (err)
+ goto free_queue;
+
+ rcpu->cpu = cpu;
+ rcpu->map_id = map_id;
+ rcpu->qsize = qsize;
+
+ /* Setup kthread */
+ rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
+ "cpumap/%d/map:%d", cpu, map_id);
+ if (IS_ERR(rcpu->kthread))
+ goto free_ptr_ring;
+
+ get_cpu_map_entry(rcpu); /* 1-refcnt for being in cmap->cpu_map[] */
+ get_cpu_map_entry(rcpu); /* 1-refcnt for kthread */
+
+ /* Make sure kthread runs on a single CPU */
+ kthread_bind(rcpu->kthread, cpu);
+ wake_up_process(rcpu->kthread);
+
+ return rcpu;
+
+free_ptr_ring:
+ ptr_ring_cleanup(rcpu->queue, NULL);
+free_queue:
+ kfree(rcpu->queue);
+free_bulkq:
+ free_percpu(rcpu->bulkq);
+free_rcu:
+ kfree(rcpu);
+ return NULL;
+}
+
+void __cpu_map_entry_free(struct rcu_head *rcu)
+{
+ struct bpf_cpu_map_entry *rcpu;
+ int cpu;
+
+ /* This cpu_map_entry have been disconnected from map and one
+ * RCU graze-period have elapsed. Thus, XDP cannot queue any
+ * new packets and cannot change/set flush_needed that can
+ * find this entry.
+ */
+ rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu);
+
+ /* Flush remaining packets in percpu bulkq */
+ for_each_online_cpu(cpu) {
+ struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);
+
+ /* No concurrent bq_enqueue can run at this point */
+ bq_flush_to_queue(rcpu, bq);
+ }
+ free_percpu(rcpu->bulkq);
+ /* Cannot kthread_stop() here, last put free rcpu resources */
+ put_cpu_map_entry(rcpu);
+}
+
+/* After xchg pointer to bpf_cpu_map_entry, use the call_rcu() to
+ * ensure any driver rcu critical sections have completed, but this
+ * does not guarantee a flush has happened yet. Because driver side
+ * rcu_read_lock/unlock only protects the running XDP program. The
+ * atomic xchg and NULL-ptr check in __cpu_map_flush() makes sure a
+ * pending flush op doesn't fail.
+ *
+ * The bpf_cpu_map_entry is still used by the kthread, and there can
+ * still be pending packets (in queue and percpu bulkq). A refcnt
+ * makes sure to last user (kthread_stop vs. call_rcu) free memory
+ * resources.
+ *
+ * The rcu callback __cpu_map_entry_free flush remaining packets in
+ * percpu bulkq to queue. Due to caller map_delete_elem() disable
+ * preemption, cannot call kthread_stop() to make sure queue is empty.
+ * Instead a work_queue is started for stopping kthread,
+ * cpu_map_kthread_stop, which waits for an RCU graze period before
+ * stopping kthread, emptying the queue.
+ */
+void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
+ u32 key_cpu, struct bpf_cpu_map_entry *rcpu)
+{
+ struct bpf_cpu_map_entry *old_rcpu;
+
+ old_rcpu = xchg(&cmap->cpu_map[key_cpu], rcpu);
+ if (old_rcpu) {
+ call_rcu(&old_rcpu->rcu, __cpu_map_entry_free);
+ INIT_WORK(&old_rcpu->kthread_stop_wq, cpu_map_kthread_stop);
+ schedule_work(&old_rcpu->kthread_stop_wq);
+ }
+}
+
+int cpu_map_delete_elem(struct bpf_map *map, void *key)
+{
+ struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
+ u32 key_cpu = *(u32 *)key;
+
+ if (key_cpu >= map->max_entries)
+ return -EINVAL;
+
+ /* notice caller map_delete_elem() use preempt_disable() */
+ __cpu_map_entry_replace(cmap, key_cpu, NULL);
+ return 0;
+}
+
+int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
+ u64 map_flags)
+{
+ struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
+ struct bpf_cpu_map_entry *rcpu;
+
+ /* Array index key correspond to CPU number */
+ u32 key_cpu = *(u32 *)key;
+ /* Value is the queue size */
+ u32 qsize = *(u32 *)value;
+
+ if (unlikely(map_flags > BPF_EXIST))
+ return -EINVAL;
+ if (unlikely(key_cpu >= cmap->map.max_entries))
+ return -E2BIG;
+ if (unlikely(map_flags == BPF_NOEXIST))
+ return -EEXIST;
+ if (unlikely(qsize > 16384)) /* sanity limit on qsize */
+ return -EOVERFLOW;
+
+ /* Make sure CPU is a valid possible cpu */
+ if (!cpu_possible(key_cpu))
+ return -ENODEV;
+
+ if (qsize == 0) {
+ rcpu = NULL; /* Same as deleting */
+ } else {
+ /* Updating qsize cause re-allocation of bpf_cpu_map_entry */
+ rcpu = __cpu_map_entry_alloc(qsize, key_cpu, map->id);
+ if (!rcpu)
+ return -ENOMEM;
+ }
+ rcu_read_lock();
+ __cpu_map_entry_replace(cmap, key_cpu, rcpu);
+ rcu_read_unlock();
+ return 0;
+}
+
+void cpu_map_free(struct bpf_map *map)
+{
+ struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
+ int cpu;
+ u32 i;
+
+ /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+ * so the bpf programs (can be more than one that used this map) were
+ * disconnected from events. Wait for outstanding critical sections in
+ * these programs to complete. The rcu critical section only guarantees
+ * no further "XDP/bpf-side" reads against bpf_cpu_map->cpu_map.
+ * It does __not__ ensure pending flush operations (if any) are
+ * complete.
+ */
+ synchronize_rcu();
+
+ /* To ensure all pending flush operations have completed wait for flush
+ * bitmap to indicate all flush_needed bits to be zero on _all_ cpus.
+ * Because the above synchronize_rcu() ensures the map is disconnected
+ * from the program we can assume no new bits will be set.
+ */
+ for_each_online_cpu(cpu) {
+ unsigned long *bitmap = per_cpu_ptr(cmap->flush_needed, cpu);
+
+ while (!bitmap_empty(bitmap, cmap->map.max_entries))
+ cond_resched();
+ }
+
+ /* For cpu_map the remote CPUs can still be using the entries
+ * (struct bpf_cpu_map_entry).
+ */
+ for (i = 0; i < cmap->map.max_entries; i++) {
+ struct bpf_cpu_map_entry *rcpu;
+
+ rcpu = READ_ONCE(cmap->cpu_map[i]);
+ if (!rcpu)
+ continue;
+
+ /* bq flush and cleanup happens after RCU graze-period */
+ __cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */
+ }
+ free_percpu(cmap->flush_needed);
+ bpf_map_area_free(cmap->cpu_map);
+ kfree(cmap);
+}
+
+struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
+{
+ struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
+ struct bpf_cpu_map_entry *rcpu;
+
+ if (key >= map->max_entries)
+ return NULL;
+
+ rcpu = READ_ONCE(cmap->cpu_map[key]);
+ return rcpu;
+}
+
+static void *cpu_map_lookup_elem(struct bpf_map *map, void *key)
+{
+ struct bpf_cpu_map_entry *rcpu =
+ __cpu_map_lookup_elem(map, *(u32 *)key);
+
+ return rcpu ? &rcpu->qsize : NULL;
+}
+
+static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+ struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
+ u32 index = key ? *(u32 *)key : U32_MAX;
+ u32 *next = next_key;
+
+ if (index >= cmap->map.max_entries) {
+ *next = 0;
+ return 0;
+ }
+
+ if (index == cmap->map.max_entries - 1)
+ return -ENOENT;
+ *next = index + 1;
+ return 0;
+}
+
+const struct bpf_map_ops cpu_map_ops = {
+ .map_alloc = cpu_map_alloc,
+ .map_free = cpu_map_free,
+ .map_delete_elem = cpu_map_delete_elem,
+ .map_update_elem = cpu_map_update_elem,
+ .map_lookup_elem = cpu_map_lookup_elem,
+ .map_get_next_key = cpu_map_get_next_key,
+};
+
+static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
+ struct xdp_bulk_queue *bq)
+{
+ unsigned int processed = 0, drops = 0;
+ const int to_cpu = rcpu->cpu;
+ struct ptr_ring *q;
+ int i;
+
+ if (unlikely(!bq->count))
+ return 0;
+
+ q = rcpu->queue;
+ spin_lock(&q->producer_lock);
+
+ for (i = 0; i < bq->count; i++) {
+ void *xdp_pkt = bq->q[i];
+ int err;
+
+ err = __ptr_ring_produce(q, xdp_pkt);
+ if (err) {
+ drops++;
+ page_frag_free(xdp_pkt); /* Free xdp_pkt */
+ }
+ processed++;
+ }
+ bq->count = 0;
+ spin_unlock(&q->producer_lock);
+
+ /* Feedback loop via tracepoints */
+ trace_xdp_cpumap_enqueue(rcpu->map_id, processed, drops, to_cpu);
+ return 0;
+}
+
+/* Runs under RCU-read-side, plus in softirq under NAPI protection.
+ * Thus, safe percpu variable access.
+ */
+static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_pkt *xdp_pkt)
+{
+ struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
+
+ if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
+ bq_flush_to_queue(rcpu, bq);
+
+ /* Notice, xdp_buff/page MUST be queued here, long enough for
+ * driver to code invoking us to finished, due to driver
+ * (e.g. ixgbe) recycle tricks based on page-refcnt.
+ *
+ * Thus, incoming xdp_pkt is always queued here (else we race
+ * with another CPU on page-refcnt and remaining driver code).
+ * Queue time is very short, as driver will invoke flush
+ * operation, when completing napi->poll call.
+ */
+ bq->q[bq->count++] = xdp_pkt;
+ return 0;
+}
+
+int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
+ struct net_device *dev_rx)
+{
+ struct xdp_pkt *xdp_pkt;
+
+ xdp_pkt = convert_to_xdp_pkt(xdp);
+ if (!xdp_pkt)
+ return -EOVERFLOW;
+
+ /* Info needed when constructing SKB on remote CPU */
+ xdp_pkt->dev_rx = dev_rx;
+
+ bq_enqueue(rcpu, xdp_pkt);
+ return 0;
+}
+
+void __cpu_map_insert_ctx(struct bpf_map *map, u32 bit)
+{
+ struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
+ unsigned long *bitmap = this_cpu_ptr(cmap->flush_needed);
+
+ __set_bit(bit, bitmap);
+}
+
+void __cpu_map_flush(struct bpf_map *map)
+{
+ struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
+ unsigned long *bitmap = this_cpu_ptr(cmap->flush_needed);
+ u32 bit;
+
+ /* The napi->poll softirq makes sure __cpu_map_insert_ctx()
+ * and __cpu_map_flush() happen on same CPU. Thus, the percpu
+ * bitmap indicate which percpu bulkq have packets.
+ */
+ for_each_set_bit(bit, bitmap, map->max_entries) {
+ struct bpf_cpu_map_entry *rcpu = READ_ONCE(cmap->cpu_map[bit]);
+ struct xdp_bulk_queue *bq;
+
+ /* This is possible if entry is removed by user space
+ * between xdp redirect and flush op.
+ */
+ if (unlikely(!rcpu))
+ continue;
+
+ __clear_bit(bit, bitmap);
+
+ /* Flush all frames in bulkq to real queue */
+ bq = this_cpu_ptr(rcpu->bulkq);
+ bq_flush_to_queue(rcpu, bq);
+
+ /* If already running, costs spin_lock_irqsave + smb_mb */
+ wake_up_process(rcpu->kthread);
+ }
+}
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index e745d6a88224..ebdef54bf7df 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -50,6 +50,9 @@
#include <linux/bpf.h>
#include <linux/filter.h>
+#define DEV_CREATE_FLAG_MASK \
+ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
struct bpf_dtab_netdev {
struct net_device *dev;
struct bpf_dtab *dtab;
@@ -83,7 +86,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
- attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
+ attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
dtab = kzalloc(sizeof(*dtab), GFP_USER);
diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c
new file mode 100644
index 000000000000..e682850c9715
--- /dev/null
+++ b/kernel/bpf/disasm.c
@@ -0,0 +1,214 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/bpf.h>
+
+#include "disasm.h"
+
+#define __BPF_FUNC_STR_FN(x) [BPF_FUNC_ ## x] = __stringify(bpf_ ## x)
+static const char * const func_id_str[] = {
+ __BPF_FUNC_MAPPER(__BPF_FUNC_STR_FN)
+};
+#undef __BPF_FUNC_STR_FN
+
+const char *func_id_name(int id)
+{
+ BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID);
+
+ if (id >= 0 && id < __BPF_FUNC_MAX_ID && func_id_str[id])
+ return func_id_str[id];
+ else
+ return "unknown";
+}
+
+const char *const bpf_class_string[8] = {
+ [BPF_LD] = "ld",
+ [BPF_LDX] = "ldx",
+ [BPF_ST] = "st",
+ [BPF_STX] = "stx",
+ [BPF_ALU] = "alu",
+ [BPF_JMP] = "jmp",
+ [BPF_RET] = "BUG",
+ [BPF_ALU64] = "alu64",
+};
+
+const char *const bpf_alu_string[16] = {
+ [BPF_ADD >> 4] = "+=",
+ [BPF_SUB >> 4] = "-=",
+ [BPF_MUL >> 4] = "*=",
+ [BPF_DIV >> 4] = "/=",
+ [BPF_OR >> 4] = "|=",
+ [BPF_AND >> 4] = "&=",
+ [BPF_LSH >> 4] = "<<=",
+ [BPF_RSH >> 4] = ">>=",
+ [BPF_NEG >> 4] = "neg",
+ [BPF_MOD >> 4] = "%=",
+ [BPF_XOR >> 4] = "^=",
+ [BPF_MOV >> 4] = "=",
+ [BPF_ARSH >> 4] = "s>>=",
+ [BPF_END >> 4] = "endian",
+};
+
+static const char *const bpf_ldst_string[] = {
+ [BPF_W >> 3] = "u32",
+ [BPF_H >> 3] = "u16",
+ [BPF_B >> 3] = "u8",
+ [BPF_DW >> 3] = "u64",
+};
+
+static const char *const bpf_jmp_string[16] = {
+ [BPF_JA >> 4] = "jmp",
+ [BPF_JEQ >> 4] = "==",
+ [BPF_JGT >> 4] = ">",
+ [BPF_JLT >> 4] = "<",
+ [BPF_JGE >> 4] = ">=",
+ [BPF_JLE >> 4] = "<=",
+ [BPF_JSET >> 4] = "&",
+ [BPF_JNE >> 4] = "!=",
+ [BPF_JSGT >> 4] = "s>",
+ [BPF_JSLT >> 4] = "s<",
+ [BPF_JSGE >> 4] = "s>=",
+ [BPF_JSLE >> 4] = "s<=",
+ [BPF_CALL >> 4] = "call",
+ [BPF_EXIT >> 4] = "exit",
+};
+
+static void print_bpf_end_insn(bpf_insn_print_cb verbose,
+ struct bpf_verifier_env *env,
+ const struct bpf_insn *insn)
+{
+ verbose(env, "(%02x) r%d = %s%d r%d\n", insn->code, insn->dst_reg,
+ BPF_SRC(insn->code) == BPF_TO_BE ? "be" : "le",
+ insn->imm, insn->dst_reg);
+}
+
+void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env,
+ const struct bpf_insn *insn, bool allow_ptr_leaks)
+{
+ u8 class = BPF_CLASS(insn->code);
+
+ if (class == BPF_ALU || class == BPF_ALU64) {
+ if (BPF_OP(insn->code) == BPF_END) {
+ if (class == BPF_ALU64)
+ verbose(env, "BUG_alu64_%02x\n", insn->code);
+ else
+ print_bpf_end_insn(verbose, env, insn);
+ } else if (BPF_OP(insn->code) == BPF_NEG) {
+ verbose(env, "(%02x) r%d = %s-r%d\n",
+ insn->code, insn->dst_reg,
+ class == BPF_ALU ? "(u32) " : "",
+ insn->dst_reg);
+ } else if (BPF_SRC(insn->code) == BPF_X) {
+ verbose(env, "(%02x) %sr%d %s %sr%d\n",
+ insn->code, class == BPF_ALU ? "(u32) " : "",
+ insn->dst_reg,
+ bpf_alu_string[BPF_OP(insn->code) >> 4],
+ class == BPF_ALU ? "(u32) " : "",
+ insn->src_reg);
+ } else {
+ verbose(env, "(%02x) %sr%d %s %s%d\n",
+ insn->code, class == BPF_ALU ? "(u32) " : "",
+ insn->dst_reg,
+ bpf_alu_string[BPF_OP(insn->code) >> 4],
+ class == BPF_ALU ? "(u32) " : "",
+ insn->imm);
+ }
+ } else if (class == BPF_STX) {
+ if (BPF_MODE(insn->code) == BPF_MEM)
+ verbose(env, "(%02x) *(%s *)(r%d %+d) = r%d\n",
+ insn->code,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->dst_reg,
+ insn->off, insn->src_reg);
+ else if (BPF_MODE(insn->code) == BPF_XADD)
+ verbose(env, "(%02x) lock *(%s *)(r%d %+d) += r%d\n",
+ insn->code,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->dst_reg, insn->off,
+ insn->src_reg);
+ else
+ verbose(env, "BUG_%02x\n", insn->code);
+ } else if (class == BPF_ST) {
+ if (BPF_MODE(insn->code) != BPF_MEM) {
+ verbose(env, "BUG_st_%02x\n", insn->code);
+ return;
+ }
+ verbose(env, "(%02x) *(%s *)(r%d %+d) = %d\n",
+ insn->code,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->dst_reg,
+ insn->off, insn->imm);
+ } else if (class == BPF_LDX) {
+ if (BPF_MODE(insn->code) != BPF_MEM) {
+ verbose(env, "BUG_ldx_%02x\n", insn->code);
+ return;
+ }
+ verbose(env, "(%02x) r%d = *(%s *)(r%d %+d)\n",
+ insn->code, insn->dst_reg,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->src_reg, insn->off);
+ } else if (class == BPF_LD) {
+ if (BPF_MODE(insn->code) == BPF_ABS) {
+ verbose(env, "(%02x) r0 = *(%s *)skb[%d]\n",
+ insn->code,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->imm);
+ } else if (BPF_MODE(insn->code) == BPF_IND) {
+ verbose(env, "(%02x) r0 = *(%s *)skb[r%d + %d]\n",
+ insn->code,
+ bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
+ insn->src_reg, insn->imm);
+ } else if (BPF_MODE(insn->code) == BPF_IMM &&
+ BPF_SIZE(insn->code) == BPF_DW) {
+ /* At this point, we already made sure that the second
+ * part of the ldimm64 insn is accessible.
+ */
+ u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
+ bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
+
+ if (map_ptr && !allow_ptr_leaks)
+ imm = 0;
+
+ verbose(env, "(%02x) r%d = 0x%llx\n", insn->code,
+ insn->dst_reg, (unsigned long long)imm);
+ } else {
+ verbose(env, "BUG_ld_%02x\n", insn->code);
+ return;
+ }
+ } else if (class == BPF_JMP) {
+ u8 opcode = BPF_OP(insn->code);
+
+ if (opcode == BPF_CALL) {
+ verbose(env, "(%02x) call %s#%d\n", insn->code,
+ func_id_name(insn->imm), insn->imm);
+ } else if (insn->code == (BPF_JMP | BPF_JA)) {
+ verbose(env, "(%02x) goto pc%+d\n",
+ insn->code, insn->off);
+ } else if (insn->code == (BPF_JMP | BPF_EXIT)) {
+ verbose(env, "(%02x) exit\n", insn->code);
+ } else if (BPF_SRC(insn->code) == BPF_X) {
+ verbose(env, "(%02x) if r%d %s r%d goto pc%+d\n",
+ insn->code, insn->dst_reg,
+ bpf_jmp_string[BPF_OP(insn->code) >> 4],
+ insn->src_reg, insn->off);
+ } else {
+ verbose(env, "(%02x) if r%d %s 0x%x goto pc%+d\n",
+ insn->code, insn->dst_reg,
+ bpf_jmp_string[BPF_OP(insn->code) >> 4],
+ insn->imm, insn->off);
+ }
+ } else {
+ verbose(env, "(%02x) %s\n",
+ insn->code, bpf_class_string[class]);
+ }
+}
diff --git a/kernel/bpf/disasm.h b/kernel/bpf/disasm.h
new file mode 100644
index 000000000000..8de977e420b6
--- /dev/null
+++ b/kernel/bpf/disasm.h
@@ -0,0 +1,32 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef __BPF_DISASM_H__
+#define __BPF_DISASM_H__
+
+#include <linux/bpf.h>
+#include <linux/kernel.h>
+#include <linux/stringify.h>
+
+extern const char *const bpf_alu_string[16];
+extern const char *const bpf_class_string[8];
+
+const char *func_id_name(int id);
+
+struct bpf_verifier_env;
+typedef void (*bpf_insn_print_cb)(struct bpf_verifier_env *env,
+ const char *, ...);
+void print_bpf_insn(bpf_insn_print_cb verbose, struct bpf_verifier_env *env,
+ const struct bpf_insn *insn, bool allow_ptr_leaks);
+
+#endif
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 6533f08d1238..e469e05c8e83 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -18,8 +18,9 @@
#include "bpf_lru_list.h"
#include "map_in_map.h"
-#define HTAB_CREATE_FLAG_MASK \
- (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE)
+#define HTAB_CREATE_FLAG_MASK \
+ (BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
+ BPF_F_RDONLY | BPF_F_WRONLY)
struct bucket {
struct hlist_nulls_head head;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index be1dde967208..01aaef1a77c5 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -295,7 +295,7 @@ out:
}
static void *bpf_obj_do_get(const struct filename *pathname,
- enum bpf_type *type)
+ enum bpf_type *type, int flags)
{
struct inode *inode;
struct path path;
@@ -307,7 +307,7 @@ static void *bpf_obj_do_get(const struct filename *pathname,
return ERR_PTR(ret);
inode = d_backing_inode(path.dentry);
- ret = inode_permission(inode, MAY_WRITE);
+ ret = inode_permission(inode, ACC_MODE(flags));
if (ret)
goto out;
@@ -326,18 +326,23 @@ out:
return ERR_PTR(ret);
}
-int bpf_obj_get_user(const char __user *pathname)
+int bpf_obj_get_user(const char __user *pathname, int flags)
{
enum bpf_type type = BPF_TYPE_UNSPEC;
struct filename *pname;
int ret = -ENOENT;
+ int f_flags;
void *raw;
+ f_flags = bpf_get_file_flag(flags);
+ if (f_flags < 0)
+ return f_flags;
+
pname = getname(pathname);
if (IS_ERR(pname))
return PTR_ERR(pname);
- raw = bpf_obj_do_get(pname, &type);
+ raw = bpf_obj_do_get(pname, &type, f_flags);
if (IS_ERR(raw)) {
ret = PTR_ERR(raw);
goto out;
@@ -346,7 +351,7 @@ int bpf_obj_get_user(const char __user *pathname)
if (type == BPF_TYPE_PROG)
ret = bpf_prog_new_fd(raw);
else if (type == BPF_TYPE_MAP)
- ret = bpf_map_new_fd(raw);
+ ret = bpf_map_new_fd(raw, f_flags);
else
goto out;
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 1b767844a76f..885e45479680 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -389,10 +389,99 @@ out:
return ret;
}
-static int trie_delete_elem(struct bpf_map *map, void *key)
+/* Called from syscall or from eBPF program */
+static int trie_delete_elem(struct bpf_map *map, void *_key)
{
- /* TODO */
- return -ENOSYS;
+ struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+ struct bpf_lpm_trie_key *key = _key;
+ struct lpm_trie_node __rcu **trim, **trim2;
+ struct lpm_trie_node *node, *parent;
+ unsigned long irq_flags;
+ unsigned int next_bit;
+ size_t matchlen = 0;
+ int ret = 0;
+
+ if (key->prefixlen > trie->max_prefixlen)
+ return -EINVAL;
+
+ raw_spin_lock_irqsave(&trie->lock, irq_flags);
+
+ /* Walk the tree looking for an exact key/length match and keeping
+ * track of the path we traverse. We will need to know the node
+ * we wish to delete, and the slot that points to the node we want
+ * to delete. We may also need to know the nodes parent and the
+ * slot that contains it.
+ */
+ trim = &trie->root;
+ trim2 = trim;
+ parent = NULL;
+ while ((node = rcu_dereference_protected(
+ *trim, lockdep_is_held(&trie->lock)))) {
+ matchlen = longest_prefix_match(trie, node, key);
+
+ if (node->prefixlen != matchlen ||
+ node->prefixlen == key->prefixlen)
+ break;
+
+ parent = node;
+ trim2 = trim;
+ next_bit = extract_bit(key->data, node->prefixlen);
+ trim = &node->child[next_bit];
+ }
+
+ if (!node || node->prefixlen != key->prefixlen ||
+ (node->flags & LPM_TREE_NODE_FLAG_IM)) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ trie->n_entries--;
+
+ /* If the node we are removing has two children, simply mark it
+ * as intermediate and we are done.
+ */
+ if (rcu_access_pointer(node->child[0]) &&
+ rcu_access_pointer(node->child[1])) {
+ node->flags |= LPM_TREE_NODE_FLAG_IM;
+ goto out;
+ }
+
+ /* If the parent of the node we are about to delete is an intermediate
+ * node, and the deleted node doesn't have any children, we can delete
+ * the intermediate parent as well and promote its other child
+ * up the tree. Doing this maintains the invariant that all
+ * intermediate nodes have exactly 2 children and that there are no
+ * unnecessary intermediate nodes in the tree.
+ */
+ if (parent && (parent->flags & LPM_TREE_NODE_FLAG_IM) &&
+ !node->child[0] && !node->child[1]) {
+ if (node == rcu_access_pointer(parent->child[0]))
+ rcu_assign_pointer(
+ *trim2, rcu_access_pointer(parent->child[1]));
+ else
+ rcu_assign_pointer(
+ *trim2, rcu_access_pointer(parent->child[0]));
+ kfree_rcu(parent, rcu);
+ kfree_rcu(node, rcu);
+ goto out;
+ }
+
+ /* The node we are removing has either zero or one child. If there
+ * is a child, move it into the removed node's slot then delete
+ * the node. Otherwise just clear the slot and delete the node.
+ */
+ if (node->child[0])
+ rcu_assign_pointer(*trim, rcu_access_pointer(node->child[0]));
+ else if (node->child[1])
+ rcu_assign_pointer(*trim, rcu_access_pointer(node->child[1]));
+ else
+ RCU_INIT_POINTER(*trim, NULL);
+ kfree_rcu(node, rcu);
+
+out:
+ raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
+
+ return ret;
}
#define LPM_DATA_SIZE_MAX 256
@@ -406,7 +495,8 @@ static int trie_delete_elem(struct bpf_map *map, void *key)
#define LPM_KEY_SIZE_MAX LPM_KEY_SIZE(LPM_DATA_SIZE_MAX)
#define LPM_KEY_SIZE_MIN LPM_KEY_SIZE(LPM_DATA_SIZE_MIN)
-#define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE)
+#define LPM_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_NUMA_NODE | \
+ BPF_F_RDONLY | BPF_F_WRONLY)
static struct bpf_map *trie_alloc(union bpf_attr *attr)
{
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 2b6eb35ae5d3..eef843c3b419 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -41,6 +41,9 @@
#include <net/strparser.h>
#include <net/tcp.h>
+#define SOCK_CREATE_FLAG_MASK \
+ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
struct bpf_stab {
struct bpf_map map;
struct sock **sock_map;
@@ -108,7 +111,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
*/
TCP_SKB_CB(skb)->bpf.map = NULL;
skb->sk = psock->sock;
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
preempt_disable();
rc = (*prog->bpf_func)(skb, prog->insnsi);
preempt_enable();
@@ -368,7 +371,7 @@ static int smap_parse_func_strparser(struct strparser *strp,
* any socket yet.
*/
skb->sk = psock->sock;
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
rc = (*prog->bpf_func)(skb, prog->insnsi);
skb->sk = NULL;
rcu_read_unlock();
@@ -491,7 +494,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
- attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
+ attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
if (attr->value_size > KMALLOC_MAX_SIZE)
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 135be433e9a0..a15bc636cc98 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -11,6 +11,9 @@
#include <linux/perf_event.h>
#include "percpu_freelist.h"
+#define STACK_CREATE_FLAG_MASK \
+ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
+
struct stack_map_bucket {
struct pcpu_freelist_node fnode;
u32 hash;
@@ -60,7 +63,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
- if (attr->map_flags & ~BPF_F_NUMA_NODE)
+ if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL);
/* check sanity of attributes */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 25d074920a00..323be2473c4b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -23,6 +23,9 @@
#include <linux/version.h>
#include <linux/kernel.h>
#include <linux/idr.h>
+#include <linux/cred.h>
+#include <linux/timekeeping.h>
+#include <linux/ctype.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \
(map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
@@ -31,6 +34,8 @@
#define IS_FD_HASH(map) ((map)->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
#define IS_FD_MAP(map) (IS_FD_ARRAY(map) || IS_FD_HASH(map))
+#define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY)
+
DEFINE_PER_CPU(int, bpf_prog_active);
static DEFINE_IDR(prog_idr);
static DEFINE_SPINLOCK(prog_idr_lock);
@@ -207,6 +212,7 @@ static void bpf_map_free_deferred(struct work_struct *work)
struct bpf_map *map = container_of(work, struct bpf_map, work);
bpf_map_uncharge_memlock(map);
+ security_bpf_map_free(map);
/* implementation dependent freeing */
map->ops->map_free(map);
}
@@ -291,17 +297,54 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
}
#endif
-static const struct file_operations bpf_map_fops = {
+static ssize_t bpf_dummy_read(struct file *filp, char __user *buf, size_t siz,
+ loff_t *ppos)
+{
+ /* We need this handler such that alloc_file() enables
+ * f_mode with FMODE_CAN_READ.
+ */
+ return -EINVAL;
+}
+
+static ssize_t bpf_dummy_write(struct file *filp, const char __user *buf,
+ size_t siz, loff_t *ppos)
+{
+ /* We need this handler such that alloc_file() enables
+ * f_mode with FMODE_CAN_WRITE.
+ */
+ return -EINVAL;
+}
+
+const struct file_operations bpf_map_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_map_show_fdinfo,
#endif
.release = bpf_map_release,
+ .read = bpf_dummy_read,
+ .write = bpf_dummy_write,
};
-int bpf_map_new_fd(struct bpf_map *map)
+int bpf_map_new_fd(struct bpf_map *map, int flags)
{
+ int ret;
+
+ ret = security_bpf_map(map, OPEN_FMODE(flags));
+ if (ret < 0)
+ return ret;
+
return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
- O_RDWR | O_CLOEXEC);
+ flags | O_CLOEXEC);
+}
+
+int bpf_get_file_flag(int flags)
+{
+ if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY))
+ return -EINVAL;
+ if (flags & BPF_F_RDONLY)
+ return O_RDONLY;
+ if (flags & BPF_F_WRONLY)
+ return O_WRONLY;
+ return O_RDWR;
}
/* helper macro to check that unused fields 'union bpf_attr' are zero */
@@ -312,18 +355,46 @@ int bpf_map_new_fd(struct bpf_map *map)
offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
sizeof(attr->CMD##_LAST_FIELD)) != NULL
-#define BPF_MAP_CREATE_LAST_FIELD numa_node
+/* dst and src must have at least BPF_OBJ_NAME_LEN number of bytes.
+ * Return 0 on success and < 0 on error.
+ */
+static int bpf_obj_name_cpy(char *dst, const char *src)
+{
+ const char *end = src + BPF_OBJ_NAME_LEN;
+
+ memset(dst, 0, BPF_OBJ_NAME_LEN);
+
+ /* Copy all isalnum() and '_' char */
+ while (src < end && *src) {
+ if (!isalnum(*src) && *src != '_')
+ return -EINVAL;
+ *dst++ = *src++;
+ }
+
+ /* No '\0' found in BPF_OBJ_NAME_LEN number of bytes */
+ if (src == end)
+ return -EINVAL;
+
+ return 0;
+}
+
+#define BPF_MAP_CREATE_LAST_FIELD map_name
/* called via syscall */
static int map_create(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_map *map;
+ int f_flags;
int err;
err = CHECK_ATTR(BPF_MAP_CREATE);
if (err)
return -EINVAL;
+ f_flags = bpf_get_file_flag(attr->map_flags);
+ if (f_flags < 0)
+ return f_flags;
+
if (numa_node != NUMA_NO_NODE &&
((unsigned int)numa_node >= nr_node_ids ||
!node_online(numa_node)))
@@ -334,18 +405,26 @@ static int map_create(union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
+ err = bpf_obj_name_cpy(map->name, attr->map_name);
+ if (err)
+ goto free_map_nouncharge;
+
atomic_set(&map->refcnt, 1);
atomic_set(&map->usercnt, 1);
- err = bpf_map_charge_memlock(map);
+ err = security_bpf_map_alloc(map);
if (err)
goto free_map_nouncharge;
+ err = bpf_map_charge_memlock(map);
+ if (err)
+ goto free_map_sec;
+
err = bpf_map_alloc_id(map);
if (err)
goto free_map;
- err = bpf_map_new_fd(map);
+ err = bpf_map_new_fd(map, f_flags);
if (err < 0) {
/* failed to allocate fd.
* bpf_map_put() is needed because the above
@@ -362,6 +441,8 @@ static int map_create(union bpf_attr *attr)
free_map:
bpf_map_uncharge_memlock(map);
+free_map_sec:
+ security_bpf_map_free(map);
free_map_nouncharge:
map->ops->map_free(map);
return err;
@@ -460,6 +541,11 @@ static int map_lookup_elem(union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
+ if (!(f.file->f_mode & FMODE_CAN_READ)) {
+ err = -EPERM;
+ goto err_put;
+ }
+
key = memdup_user(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
@@ -540,6 +626,11 @@ static int map_update_elem(union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
+ if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+ err = -EPERM;
+ goto err_put;
+ }
+
key = memdup_user(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
@@ -562,6 +653,12 @@ static int map_update_elem(union bpf_attr *attr)
if (copy_from_user(value, uvalue, value_size) != 0)
goto free_value;
+ /* Need to create a kthread, thus must support schedule */
+ if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
+ err = map->ops->map_update_elem(map, key, value, attr->flags);
+ goto out;
+ }
+
/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
* inside bpf map update or delete otherwise deadlocks are possible
*/
@@ -592,7 +689,7 @@ static int map_update_elem(union bpf_attr *attr)
}
__this_cpu_dec(bpf_prog_active);
preempt_enable();
-
+out:
if (!err)
trace_bpf_map_update_elem(map, ufd, key, value);
free_value:
@@ -623,6 +720,11 @@ static int map_delete_elem(union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
+ if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
+ err = -EPERM;
+ goto err_put;
+ }
+
key = memdup_user(ukey, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
@@ -666,6 +768,11 @@ static int map_get_next_key(union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
+ if (!(f.file->f_mode & FMODE_CAN_READ)) {
+ err = -EPERM;
+ goto err_put;
+ }
+
if (ukey) {
key = memdup_user(ukey, map->key_size);
if (IS_ERR(key)) {
@@ -703,9 +810,9 @@ err_put:
return err;
}
-static const struct bpf_verifier_ops * const bpf_prog_types[] = {
-#define BPF_PROG_TYPE(_id, _ops) \
- [_id] = &_ops,
+static const struct bpf_prog_ops * const bpf_prog_types[] = {
+#define BPF_PROG_TYPE(_id, _name) \
+ [_id] = & _name ## _prog_ops,
#define BPF_MAP_TYPE(_id, _ops)
#include <linux/bpf_types.h>
#undef BPF_PROG_TYPE
@@ -820,6 +927,7 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
free_used_maps(aux);
bpf_prog_uncharge_memlock(aux->prog);
+ security_bpf_prog_free(aux);
bpf_prog_free(aux->prog);
}
@@ -867,15 +975,23 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
}
#endif
-static const struct file_operations bpf_prog_fops = {
+const struct file_operations bpf_prog_fops = {
#ifdef CONFIG_PROC_FS
.show_fdinfo = bpf_prog_show_fdinfo,
#endif
.release = bpf_prog_release,
+ .read = bpf_dummy_read,
+ .write = bpf_dummy_write,
};
int bpf_prog_new_fd(struct bpf_prog *prog)
{
+ int ret;
+
+ ret = security_bpf_prog(prog);
+ if (ret < 0)
+ return ret;
+
return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
O_RDWR | O_CLOEXEC);
}
@@ -973,7 +1089,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type)
EXPORT_SYMBOL_GPL(bpf_prog_get_type);
/* last field in 'union bpf_attr' used by this command */
-#define BPF_PROG_LOAD_LAST_FIELD prog_flags
+#define BPF_PROG_LOAD_LAST_FIELD prog_name
static int bpf_prog_load(union bpf_attr *attr)
{
@@ -1015,10 +1131,14 @@ static int bpf_prog_load(union bpf_attr *attr)
if (!prog)
return -ENOMEM;
- err = bpf_prog_charge_memlock(prog);
+ err = security_bpf_prog_alloc(prog->aux);
if (err)
goto free_prog_nouncharge;
+ err = bpf_prog_charge_memlock(prog);
+ if (err)
+ goto free_prog_sec;
+
prog->len = attr->insn_cnt;
err = -EFAULT;
@@ -1037,6 +1157,11 @@ static int bpf_prog_load(union bpf_attr *attr)
if (err < 0)
goto free_prog;
+ prog->aux->load_time = ktime_get_boot_ns();
+ err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name);
+ if (err)
+ goto free_prog;
+
/* run eBPF verifier */
err = bpf_check(&prog, attr);
if (err < 0)
@@ -1071,16 +1196,18 @@ free_used_maps:
free_used_maps(prog->aux);
free_prog:
bpf_prog_uncharge_memlock(prog);
+free_prog_sec:
+ security_bpf_prog_free(prog->aux);
free_prog_nouncharge:
bpf_prog_free(prog);
return err;
}
-#define BPF_OBJ_LAST_FIELD bpf_fd
+#define BPF_OBJ_LAST_FIELD file_flags
static int bpf_obj_pin(const union bpf_attr *attr)
{
- if (CHECK_ATTR(BPF_OBJ))
+ if (CHECK_ATTR(BPF_OBJ) || attr->file_flags != 0)
return -EINVAL;
return bpf_obj_pin_user(attr->bpf_fd, u64_to_user_ptr(attr->pathname));
@@ -1088,10 +1215,12 @@ static int bpf_obj_pin(const union bpf_attr *attr)
static int bpf_obj_get(const union bpf_attr *attr)
{
- if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
+ if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0 ||
+ attr->file_flags & ~BPF_OBJ_FLAG_MASK)
return -EINVAL;
- return bpf_obj_get_user(u64_to_user_ptr(attr->pathname));
+ return bpf_obj_get_user(u64_to_user_ptr(attr->pathname),
+ attr->file_flags);
}
#ifdef CONFIG_CGROUP_BPF
@@ -1132,6 +1261,9 @@ static int sockmap_get_from_fd(const union bpf_attr *attr, bool attach)
return 0;
}
+#define BPF_F_ATTACH_MASK \
+ (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
+
static int bpf_prog_attach(const union bpf_attr *attr)
{
enum bpf_prog_type ptype;
@@ -1145,7 +1277,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
if (CHECK_ATTR(BPF_PROG_ATTACH))
return -EINVAL;
- if (attr->attach_flags & ~BPF_F_ALLOW_OVERRIDE)
+ if (attr->attach_flags & ~BPF_F_ATTACH_MASK)
return -EINVAL;
switch (attr->attach_type) {
@@ -1176,8 +1308,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
return PTR_ERR(cgrp);
}
- ret = cgroup_bpf_update(cgrp, prog, attr->attach_type,
- attr->attach_flags & BPF_F_ALLOW_OVERRIDE);
+ ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
+ attr->attach_flags);
if (ret)
bpf_prog_put(prog);
cgroup_put(cgrp);
@@ -1189,6 +1321,8 @@ static int bpf_prog_attach(const union bpf_attr *attr)
static int bpf_prog_detach(const union bpf_attr *attr)
{
+ enum bpf_prog_type ptype;
+ struct bpf_prog *prog;
struct cgroup *cgrp;
int ret;
@@ -1201,26 +1335,67 @@ static int bpf_prog_detach(const union bpf_attr *attr)
switch (attr->attach_type) {
case BPF_CGROUP_INET_INGRESS:
case BPF_CGROUP_INET_EGRESS:
+ ptype = BPF_PROG_TYPE_CGROUP_SKB;
+ break;
case BPF_CGROUP_INET_SOCK_CREATE:
+ ptype = BPF_PROG_TYPE_CGROUP_SOCK;
+ break;
case BPF_CGROUP_SOCK_OPS:
- cgrp = cgroup_get_from_fd(attr->target_fd);
- if (IS_ERR(cgrp))
- return PTR_ERR(cgrp);
-
- ret = cgroup_bpf_update(cgrp, NULL, attr->attach_type, false);
- cgroup_put(cgrp);
+ ptype = BPF_PROG_TYPE_SOCK_OPS;
break;
case BPF_SK_SKB_STREAM_PARSER:
case BPF_SK_SKB_STREAM_VERDICT:
- ret = sockmap_get_from_fd(attr, false);
- break;
+ return sockmap_get_from_fd(attr, false);
default:
return -EINVAL;
}
+ cgrp = cgroup_get_from_fd(attr->target_fd);
+ if (IS_ERR(cgrp))
+ return PTR_ERR(cgrp);
+
+ prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
+ if (IS_ERR(prog))
+ prog = NULL;
+
+ ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
+ if (prog)
+ bpf_prog_put(prog);
+ cgroup_put(cgrp);
return ret;
}
+#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
+
+static int bpf_prog_query(const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
+{
+ struct cgroup *cgrp;
+ int ret;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+ if (CHECK_ATTR(BPF_PROG_QUERY))
+ return -EINVAL;
+ if (attr->query.query_flags & ~BPF_F_QUERY_EFFECTIVE)
+ return -EINVAL;
+
+ switch (attr->query.attach_type) {
+ case BPF_CGROUP_INET_INGRESS:
+ case BPF_CGROUP_INET_EGRESS:
+ case BPF_CGROUP_INET_SOCK_CREATE:
+ case BPF_CGROUP_SOCK_OPS:
+ break;
+ default:
+ return -EINVAL;
+ }
+ cgrp = cgroup_get_from_fd(attr->query.target_fd);
+ if (IS_ERR(cgrp))
+ return PTR_ERR(cgrp);
+ ret = cgroup_bpf_query(cgrp, attr, uattr);
+ cgroup_put(cgrp);
+ return ret;
+}
#endif /* CONFIG_CGROUP_BPF */
#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
@@ -1305,20 +1480,26 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
return fd;
}
-#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD map_id
+#define BPF_MAP_GET_FD_BY_ID_LAST_FIELD open_flags
static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
{
struct bpf_map *map;
u32 id = attr->map_id;
+ int f_flags;
int fd;
- if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID))
+ if (CHECK_ATTR(BPF_MAP_GET_FD_BY_ID) ||
+ attr->open_flags & ~BPF_OBJ_FLAG_MASK)
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ f_flags = bpf_get_file_flag(attr->open_flags);
+ if (f_flags < 0)
+ return f_flags;
+
spin_lock_bh(&map_idr_lock);
map = idr_find(&map_idr, id);
if (map)
@@ -1330,7 +1511,7 @@ static int bpf_map_get_fd_by_id(const union bpf_attr *attr)
if (IS_ERR(map))
return PTR_ERR(map);
- fd = bpf_map_new_fd(map);
+ fd = bpf_map_new_fd(map, f_flags);
if (fd < 0)
bpf_map_put(map);
@@ -1358,8 +1539,25 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
info.type = prog->type;
info.id = prog->aux->id;
+ info.load_time = prog->aux->load_time;
+ info.created_by_uid = from_kuid_munged(current_user_ns(),
+ prog->aux->user->uid);
memcpy(info.tag, prog->tag, sizeof(prog->tag));
+ memcpy(info.name, prog->aux->name, sizeof(prog->aux->name));
+
+ ulen = info.nr_map_ids;
+ info.nr_map_ids = prog->aux->used_map_cnt;
+ ulen = min_t(u32, info.nr_map_ids, ulen);
+ if (ulen) {
+ u32 __user *user_map_ids = u64_to_user_ptr(info.map_ids);
+ u32 i;
+
+ for (i = 0; i < ulen; i++)
+ if (put_user(prog->aux->used_maps[i]->id,
+ &user_map_ids[i]))
+ return -EFAULT;
+ }
if (!capable(CAP_SYS_ADMIN)) {
info.jited_prog_len = 0;
@@ -1413,6 +1611,7 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map,
info.value_size = map->value_size;
info.max_entries = map->max_entries;
info.map_flags = map->map_flags;
+ memcpy(info.name, map->name, sizeof(map->name));
if (copy_to_user(uinfo, &info, info_len) ||
put_user(info_len, &uattr->info.info_len))
@@ -1467,6 +1666,10 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
if (copy_from_user(&attr, uattr, size) != 0)
return -EFAULT;
+ err = security_bpf(cmd, &attr, size);
+ if (err < 0)
+ return err;
+
switch (cmd) {
case BPF_MAP_CREATE:
err = map_create(&attr);
@@ -1499,6 +1702,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_PROG_DETACH:
err = bpf_prog_detach(&attr);
break;
+ case BPF_PROG_QUERY:
+ err = bpf_prog_query(&attr, uattr);
+ break;
#endif
case BPF_PROG_TEST_RUN:
err = bpf_prog_test_run(&attr, uattr);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c48ca2a34b5e..d906775e12c1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -21,6 +21,17 @@
#include <linux/vmalloc.h>
#include <linux/stringify.h>
+#include "disasm.h"
+
+static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
+#define BPF_PROG_TYPE(_id, _name) \
+ [_id] = & _name ## _verifier_ops,
+#define BPF_MAP_TYPE(_id, _ops)
+#include <linux/bpf_types.h>
+#undef BPF_PROG_TYPE
+#undef BPF_MAP_TYPE
+};
+
/* bpf_check() is a static code analyzer that walks eBPF program
* instruction by instruction and updates register/stack state.
* All paths of conditional branches are analyzed until 'bpf_exit' insn.
@@ -153,28 +164,42 @@ struct bpf_call_arg_meta {
int access_size;
};
-/* verbose verifier prints what it's seeing
- * bpf_check() is called under lock, so no race to access these global vars
- */
-static u32 log_level, log_size, log_len;
-static char *log_buf;
-
static DEFINE_MUTEX(bpf_verifier_lock);
/* log_level controls verbosity level of eBPF verifier.
* verbose() is used to dump the verification trace to the log, so the user
* can figure out what's wrong with the program
*/
-static __printf(1, 2) void verbose(const char *fmt, ...)
+static __printf(2, 3) void verbose(struct bpf_verifier_env *env,
+ const char *fmt, ...)
{
+ struct bpf_verifer_log *log = &env->log;
+ unsigned int n;
va_list args;
- if (log_level == 0 || log_len >= log_size - 1)
+ if (!log->level || !log->ubuf || bpf_verifier_log_full(log))
return;
va_start(args, fmt);
- log_len += vscnprintf(log_buf + log_len, log_size - log_len, fmt, args);
+ n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
va_end(args);
+
+ WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
+ "verifier log line truncated - local buffer too short\n");
+
+ n = min(log->len_total - log->len_used - 1, n);
+ log->kbuf[n] = '\0';
+
+ if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
+ log->len_used += n;
+ else
+ log->ubuf = NULL;
+}
+
+static bool type_is_pkt_pointer(enum bpf_reg_type type)
+{
+ return type == PTR_TO_PACKET ||
+ type == PTR_TO_PACKET_META;
}
/* string representation of 'enum bpf_reg_type' */
@@ -187,26 +212,12 @@ static const char * const reg_type_str[] = {
[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
[PTR_TO_STACK] = "fp",
[PTR_TO_PACKET] = "pkt",
+ [PTR_TO_PACKET_META] = "pkt_meta",
[PTR_TO_PACKET_END] = "pkt_end",
};
-#define __BPF_FUNC_STR_FN(x) [BPF_FUNC_ ## x] = __stringify(bpf_ ## x)
-static const char * const func_id_str[] = {
- __BPF_FUNC_MAPPER(__BPF_FUNC_STR_FN)
-};
-#undef __BPF_FUNC_STR_FN
-
-static const char *func_id_name(int id)
-{
- BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID);
-
- if (id >= 0 && id < __BPF_FUNC_MAX_ID && func_id_str[id])
- return func_id_str[id];
- else
- return "unknown";
-}
-
-static void print_verifier_state(struct bpf_verifier_state *state)
+static void print_verifier_state(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *state)
{
struct bpf_reg_state *reg;
enum bpf_reg_type t;
@@ -217,21 +228,21 @@ static void print_verifier_state(struct bpf_verifier_state *state)
t = reg->type;
if (t == NOT_INIT)
continue;
- verbose(" R%d=%s", i, reg_type_str[t]);
+ verbose(env, " R%d=%s", i, reg_type_str[t]);
if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
tnum_is_const(reg->var_off)) {
/* reg->off should be 0 for SCALAR_VALUE */
- verbose("%lld", reg->var_off.value + reg->off);
+ verbose(env, "%lld", reg->var_off.value + reg->off);
} else {
- verbose("(id=%d", reg->id);
+ verbose(env, "(id=%d", reg->id);
if (t != SCALAR_VALUE)
- verbose(",off=%d", reg->off);
- if (t == PTR_TO_PACKET)
- verbose(",r=%d", reg->range);
+ verbose(env, ",off=%d", reg->off);
+ if (type_is_pkt_pointer(t))
+ verbose(env, ",r=%d", reg->range);
else if (t == CONST_PTR_TO_MAP ||
t == PTR_TO_MAP_VALUE ||
t == PTR_TO_MAP_VALUE_OR_NULL)
- verbose(",ks=%d,vs=%d",
+ verbose(env, ",ks=%d,vs=%d",
reg->map_ptr->key_size,
reg->map_ptr->value_size);
if (tnum_is_const(reg->var_off)) {
@@ -239,199 +250,38 @@ static void print_verifier_state(struct bpf_verifier_state *state)
* could be a pointer whose offset is too big
* for reg->off
*/
- verbose(",imm=%llx", reg->var_off.value);
+ verbose(env, ",imm=%llx", reg->var_off.value);
} else {
if (reg->smin_value != reg->umin_value &&
reg->smin_value != S64_MIN)
- verbose(",smin_value=%lld",
+ verbose(env, ",smin_value=%lld",
(long long)reg->smin_value);
if (reg->smax_value != reg->umax_value &&
reg->smax_value != S64_MAX)
- verbose(",smax_value=%lld",
+ verbose(env, ",smax_value=%lld",
(long long)reg->smax_value);
if (reg->umin_value != 0)
- verbose(",umin_value=%llu",
+ verbose(env, ",umin_value=%llu",
(unsigned long long)reg->umin_value);
if (reg->umax_value != U64_MAX)
- verbose(",umax_value=%llu",
+ verbose(env, ",umax_value=%llu",
(unsigned long long)reg->umax_value);
if (!tnum_is_unknown(reg->var_off)) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose(",var_off=%s", tn_buf);
+ verbose(env, ",var_off=%s", tn_buf);
}
}
- verbose(")");
+ verbose(env, ")");
}
}
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
if (state->stack_slot_type[i] == STACK_SPILL)
- verbose(" fp%d=%s", -MAX_BPF_STACK + i,
+ verbose(env, " fp%d=%s", -MAX_BPF_STACK + i,
reg_type_str[state->spilled_regs[i / BPF_REG_SIZE].type]);
}
- verbose("\n");
-}
-
-static const char *const bpf_class_string[] = {
- [BPF_LD] = "ld",
- [BPF_LDX] = "ldx",
- [BPF_ST] = "st",
- [BPF_STX] = "stx",
- [BPF_ALU] = "alu",
- [BPF_JMP] = "jmp",
- [BPF_RET] = "BUG",
- [BPF_ALU64] = "alu64",
-};
-
-static const char *const bpf_alu_string[16] = {
- [BPF_ADD >> 4] = "+=",
- [BPF_SUB >> 4] = "-=",
- [BPF_MUL >> 4] = "*=",
- [BPF_DIV >> 4] = "/=",
- [BPF_OR >> 4] = "|=",
- [BPF_AND >> 4] = "&=",
- [BPF_LSH >> 4] = "<<=",
- [BPF_RSH >> 4] = ">>=",
- [BPF_NEG >> 4] = "neg",
- [BPF_MOD >> 4] = "%=",
- [BPF_XOR >> 4] = "^=",
- [BPF_MOV >> 4] = "=",
- [BPF_ARSH >> 4] = "s>>=",
- [BPF_END >> 4] = "endian",
-};
-
-static const char *const bpf_ldst_string[] = {
- [BPF_W >> 3] = "u32",
- [BPF_H >> 3] = "u16",
- [BPF_B >> 3] = "u8",
- [BPF_DW >> 3] = "u64",
-};
-
-static const char *const bpf_jmp_string[16] = {
- [BPF_JA >> 4] = "jmp",
- [BPF_JEQ >> 4] = "==",
- [BPF_JGT >> 4] = ">",
- [BPF_JLT >> 4] = "<",
- [BPF_JGE >> 4] = ">=",
- [BPF_JLE >> 4] = "<=",
- [BPF_JSET >> 4] = "&",
- [BPF_JNE >> 4] = "!=",
- [BPF_JSGT >> 4] = "s>",
- [BPF_JSLT >> 4] = "s<",
- [BPF_JSGE >> 4] = "s>=",
- [BPF_JSLE >> 4] = "s<=",
- [BPF_CALL >> 4] = "call",
- [BPF_EXIT >> 4] = "exit",
-};
-
-static void print_bpf_insn(const struct bpf_verifier_env *env,
- const struct bpf_insn *insn)
-{
- u8 class = BPF_CLASS(insn->code);
-
- if (class == BPF_ALU || class == BPF_ALU64) {
- if (BPF_SRC(insn->code) == BPF_X)
- verbose("(%02x) %sr%d %s %sr%d\n",
- insn->code, class == BPF_ALU ? "(u32) " : "",
- insn->dst_reg,
- bpf_alu_string[BPF_OP(insn->code) >> 4],
- class == BPF_ALU ? "(u32) " : "",
- insn->src_reg);
- else
- verbose("(%02x) %sr%d %s %s%d\n",
- insn->code, class == BPF_ALU ? "(u32) " : "",
- insn->dst_reg,
- bpf_alu_string[BPF_OP(insn->code) >> 4],
- class == BPF_ALU ? "(u32) " : "",
- insn->imm);
- } else if (class == BPF_STX) {
- if (BPF_MODE(insn->code) == BPF_MEM)
- verbose("(%02x) *(%s *)(r%d %+d) = r%d\n",
- insn->code,
- bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
- insn->dst_reg,
- insn->off, insn->src_reg);
- else if (BPF_MODE(insn->code) == BPF_XADD)
- verbose("(%02x) lock *(%s *)(r%d %+d) += r%d\n",
- insn->code,
- bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
- insn->dst_reg, insn->off,
- insn->src_reg);
- else
- verbose("BUG_%02x\n", insn->code);
- } else if (class == BPF_ST) {
- if (BPF_MODE(insn->code) != BPF_MEM) {
- verbose("BUG_st_%02x\n", insn->code);
- return;
- }
- verbose("(%02x) *(%s *)(r%d %+d) = %d\n",
- insn->code,
- bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
- insn->dst_reg,
- insn->off, insn->imm);
- } else if (class == BPF_LDX) {
- if (BPF_MODE(insn->code) != BPF_MEM) {
- verbose("BUG_ldx_%02x\n", insn->code);
- return;
- }
- verbose("(%02x) r%d = *(%s *)(r%d %+d)\n",
- insn->code, insn->dst_reg,
- bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
- insn->src_reg, insn->off);
- } else if (class == BPF_LD) {
- if (BPF_MODE(insn->code) == BPF_ABS) {
- verbose("(%02x) r0 = *(%s *)skb[%d]\n",
- insn->code,
- bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
- insn->imm);
- } else if (BPF_MODE(insn->code) == BPF_IND) {
- verbose("(%02x) r0 = *(%s *)skb[r%d + %d]\n",
- insn->code,
- bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
- insn->src_reg, insn->imm);
- } else if (BPF_MODE(insn->code) == BPF_IMM &&
- BPF_SIZE(insn->code) == BPF_DW) {
- /* At this point, we already made sure that the second
- * part of the ldimm64 insn is accessible.
- */
- u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
- bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
-
- if (map_ptr && !env->allow_ptr_leaks)
- imm = 0;
-
- verbose("(%02x) r%d = 0x%llx\n", insn->code,
- insn->dst_reg, (unsigned long long)imm);
- } else {
- verbose("BUG_ld_%02x\n", insn->code);
- return;
- }
- } else if (class == BPF_JMP) {
- u8 opcode = BPF_OP(insn->code);
-
- if (opcode == BPF_CALL) {
- verbose("(%02x) call %s#%d\n", insn->code,
- func_id_name(insn->imm), insn->imm);
- } else if (insn->code == (BPF_JMP | BPF_JA)) {
- verbose("(%02x) goto pc%+d\n",
- insn->code, insn->off);
- } else if (insn->code == (BPF_JMP | BPF_EXIT)) {
- verbose("(%02x) exit\n", insn->code);
- } else if (BPF_SRC(insn->code) == BPF_X) {
- verbose("(%02x) if r%d %s r%d goto pc%+d\n",
- insn->code, insn->dst_reg,
- bpf_jmp_string[BPF_OP(insn->code) >> 4],
- insn->src_reg, insn->off);
- } else {
- verbose("(%02x) if r%d %s 0x%x goto pc%+d\n",
- insn->code, insn->dst_reg,
- bpf_jmp_string[BPF_OP(insn->code) >> 4],
- insn->imm, insn->off);
- }
- } else {
- verbose("(%02x) %s\n", insn->code, bpf_class_string[class]);
- }
+ verbose(env, "\n");
}
static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx)
@@ -469,7 +319,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
env->head = elem;
env->stack_size++;
if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
- verbose("BPF program is too complex\n");
+ verbose(env, "BPF program is too complex\n");
goto err;
}
return &elem->st;
@@ -507,10 +357,11 @@ static void __mark_reg_known_zero(struct bpf_reg_state *reg)
__mark_reg_known(reg, 0);
}
-static void mark_reg_known_zero(struct bpf_reg_state *regs, u32 regno)
+static void mark_reg_known_zero(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs, u32 regno)
{
if (WARN_ON(regno >= MAX_BPF_REG)) {
- verbose("mark_reg_known_zero(regs, %u)\n", regno);
+ verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
/* Something bad happened, let's kill all regs */
for (regno = 0; regno < MAX_BPF_REG; regno++)
__mark_reg_not_init(regs + regno);
@@ -519,6 +370,31 @@ static void mark_reg_known_zero(struct bpf_reg_state *regs, u32 regno)
__mark_reg_known_zero(regs + regno);
}
+static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
+{
+ return type_is_pkt_pointer(reg->type);
+}
+
+static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
+{
+ return reg_is_pkt_pointer(reg) ||
+ reg->type == PTR_TO_PACKET_END;
+}
+
+/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
+static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
+ enum bpf_reg_type which)
+{
+ /* The register can already have a range from prior markings.
+ * This is fine as long as it hasn't been advanced from its
+ * origin.
+ */
+ return reg->type == which &&
+ reg->id == 0 &&
+ reg->off == 0 &&
+ tnum_equals_const(reg->var_off, 0);
+}
+
/* Attempts to improve min/max values based on var_off information */
static void __update_reg_bounds(struct bpf_reg_state *reg)
{
@@ -595,10 +471,11 @@ static void __mark_reg_unknown(struct bpf_reg_state *reg)
__mark_reg_unbounded(reg);
}
-static void mark_reg_unknown(struct bpf_reg_state *regs, u32 regno)
+static void mark_reg_unknown(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs, u32 regno)
{
if (WARN_ON(regno >= MAX_BPF_REG)) {
- verbose("mark_reg_unknown(regs, %u)\n", regno);
+ verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
/* Something bad happened, let's kill all regs */
for (regno = 0; regno < MAX_BPF_REG; regno++)
__mark_reg_not_init(regs + regno);
@@ -613,10 +490,11 @@ static void __mark_reg_not_init(struct bpf_reg_state *reg)
reg->type = NOT_INIT;
}
-static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
+static void mark_reg_not_init(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs, u32 regno)
{
if (WARN_ON(regno >= MAX_BPF_REG)) {
- verbose("mark_reg_not_init(regs, %u)\n", regno);
+ verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
/* Something bad happened, let's kill all regs */
for (regno = 0; regno < MAX_BPF_REG; regno++)
__mark_reg_not_init(regs + regno);
@@ -625,22 +503,23 @@ static void mark_reg_not_init(struct bpf_reg_state *regs, u32 regno)
__mark_reg_not_init(regs + regno);
}
-static void init_reg_state(struct bpf_reg_state *regs)
+static void init_reg_state(struct bpf_verifier_env *env,
+ struct bpf_reg_state *regs)
{
int i;
for (i = 0; i < MAX_BPF_REG; i++) {
- mark_reg_not_init(regs, i);
+ mark_reg_not_init(env, regs, i);
regs[i].live = REG_LIVE_NONE;
}
/* frame pointer */
regs[BPF_REG_FP].type = PTR_TO_STACK;
- mark_reg_known_zero(regs, BPF_REG_FP);
+ mark_reg_known_zero(env, regs, BPF_REG_FP);
/* 1st arg to a function */
regs[BPF_REG_1].type = PTR_TO_CTX;
- mark_reg_known_zero(regs, BPF_REG_1);
+ mark_reg_known_zero(env, regs, BPF_REG_1);
}
enum reg_arg_type {
@@ -674,26 +553,26 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
struct bpf_reg_state *regs = env->cur_state.regs;
if (regno >= MAX_BPF_REG) {
- verbose("R%d is invalid\n", regno);
+ verbose(env, "R%d is invalid\n", regno);
return -EINVAL;
}
if (t == SRC_OP) {
/* check whether register used as source operand can be read */
if (regs[regno].type == NOT_INIT) {
- verbose("R%d !read_ok\n", regno);
+ verbose(env, "R%d !read_ok\n", regno);
return -EACCES;
}
mark_reg_read(&env->cur_state, regno);
} else {
/* check whether register used as dest operand can be written to */
if (regno == BPF_REG_FP) {
- verbose("frame pointer is read only\n");
+ verbose(env, "frame pointer is read only\n");
return -EACCES;
}
regs[regno].live |= REG_LIVE_WRITTEN;
if (t == DST_OP)
- mark_reg_unknown(regs, regno);
+ mark_reg_unknown(env, regs, regno);
}
return 0;
}
@@ -706,6 +585,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
case PTR_TO_STACK:
case PTR_TO_CTX:
case PTR_TO_PACKET:
+ case PTR_TO_PACKET_META:
case PTR_TO_PACKET_END:
case CONST_PTR_TO_MAP:
return true;
@@ -717,7 +597,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
/* check_stack_read/write functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
-static int check_stack_write(struct bpf_verifier_state *state, int off,
+static int check_stack_write(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *state, int off,
int size, int value_regno)
{
int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
@@ -730,7 +611,7 @@ static int check_stack_write(struct bpf_verifier_state *state, int off,
/* register containing pointer is being spilled into stack */
if (size != BPF_REG_SIZE) {
- verbose("invalid size of register spill\n");
+ verbose(env, "invalid size of register spill\n");
return -EACCES;
}
@@ -765,7 +646,8 @@ static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slo
}
}
-static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
+static int check_stack_read(struct bpf_verifier_env *env,
+ struct bpf_verifier_state *state, int off, int size,
int value_regno)
{
u8 *slot_type;
@@ -775,12 +657,12 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
if (slot_type[0] == STACK_SPILL) {
if (size != BPF_REG_SIZE) {
- verbose("invalid size of register spill\n");
+ verbose(env, "invalid size of register spill\n");
return -EACCES;
}
for (i = 1; i < BPF_REG_SIZE; i++) {
if (slot_type[i] != STACK_SPILL) {
- verbose("corrupted spill memory\n");
+ verbose(env, "corrupted spill memory\n");
return -EACCES;
}
}
@@ -796,14 +678,14 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
} else {
for (i = 0; i < size; i++) {
if (slot_type[i] != STACK_MISC) {
- verbose("invalid read from stack off %d+%d size %d\n",
+ verbose(env, "invalid read from stack off %d+%d size %d\n",
off, i, size);
return -EACCES;
}
}
if (value_regno >= 0)
/* have read misc data from the stack */
- mark_reg_unknown(state->regs, value_regno);
+ mark_reg_unknown(env, state->regs, value_regno);
return 0;
}
}
@@ -815,7 +697,7 @@ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
if (off < 0 || size <= 0 || off + size > map->value_size) {
- verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
+ verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
map->value_size, off, size);
return -EACCES;
}
@@ -834,8 +716,8 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
* need to try adding each of min_value and max_value to off
* to make sure our theoretical access will be safe.
*/
- if (log_level)
- print_verifier_state(state);
+ if (env->log.level)
+ print_verifier_state(env, state);
/* The minimum value is only important with signed
* comparisons where we can't assume the floor of a
* value is 0. If we are using signed variables for our
@@ -843,13 +725,14 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
* will have a set floor within our range.
*/
if (reg->smin_value < 0) {
- verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+ verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
regno);
return -EACCES;
}
err = __check_map_access(env, regno, reg->smin_value + off, size);
if (err) {
- verbose("R%d min value is outside of the array range\n", regno);
+ verbose(env, "R%d min value is outside of the array range\n",
+ regno);
return err;
}
@@ -858,13 +741,14 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
* If reg->umax_value + off could overflow, treat that as unbounded too.
*/
if (reg->umax_value >= BPF_MAX_VAR_OFF) {
- verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n",
+ verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
regno);
return -EACCES;
}
err = __check_map_access(env, regno, reg->umax_value + off, size);
if (err)
- verbose("R%d max value is outside of the array range\n", regno);
+ verbose(env, "R%d max value is outside of the array range\n",
+ regno);
return err;
}
@@ -903,7 +787,7 @@ static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
struct bpf_reg_state *reg = &regs[regno];
if (off < 0 || size <= 0 || (u64)off + size > reg->range) {
- verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
+ verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
off, size, regno, reg->id, reg->off, reg->range);
return -EACCES;
}
@@ -926,13 +810,13 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
* detail to prove they're safe.
*/
if (reg->smin_value < 0) {
- verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
+ verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
regno);
return -EACCES;
}
err = __check_packet_access(env, regno, off, size);
if (err) {
- verbose("R%d offset is outside of the packet\n", regno);
+ verbose(env, "R%d offset is outside of the packet\n", regno);
return err;
}
return err;
@@ -946,12 +830,8 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
.reg_type = *reg_type,
};
- /* for analyzer ctx accesses are already validated and converted */
- if (env->analyzer_ops)
- return 0;
-
- if (env->prog->aux->ops->is_valid_access &&
- env->prog->aux->ops->is_valid_access(off, size, t, &info)) {
+ if (env->ops->is_valid_access &&
+ env->ops->is_valid_access(off, size, t, &info)) {
/* A non zero info.ctx_field_size indicates that this field is a
* candidate for later verifier transformation to load the whole
* field and then apply a mask when accessed with a narrower
@@ -959,16 +839,19 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
* will only allow for whole field access and rejects any other
* type of narrower access.
*/
- env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
*reg_type = info.reg_type;
+ if (env->analyzer_ops)
+ return 0;
+
+ env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
/* remember the offset of last byte accessed in ctx */
if (env->prog->aux->max_ctx_offset < off + size)
env->prog->aux->max_ctx_offset = off + size;
return 0;
}
- verbose("invalid bpf_context access off=%d size=%d\n", off, size);
+ verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
return -EACCES;
}
@@ -986,7 +869,8 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]);
}
-static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
+static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg,
int off, int size, bool strict)
{
struct tnum reg_off;
@@ -1011,7 +895,8 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose("misaligned packet access off %d+%s+%d+%d size %d\n",
+ verbose(env,
+ "misaligned packet access off %d+%s+%d+%d size %d\n",
ip_align, tn_buf, reg->off, off, size);
return -EACCES;
}
@@ -1019,7 +904,8 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
return 0;
}
-static int check_generic_ptr_alignment(const struct bpf_reg_state *reg,
+static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg,
const char *pointer_desc,
int off, int size, bool strict)
{
@@ -1034,7 +920,7 @@ static int check_generic_ptr_alignment(const struct bpf_reg_state *reg,
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose("misaligned %saccess off %s+%d+%d size %d\n",
+ verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
pointer_desc, tn_buf, reg->off, off, size);
return -EACCES;
}
@@ -1051,8 +937,11 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
switch (reg->type) {
case PTR_TO_PACKET:
- /* special case, because of NET_IP_ALIGN */
- return check_pkt_ptr_alignment(reg, off, size, strict);
+ case PTR_TO_PACKET_META:
+ /* Special case, because of NET_IP_ALIGN. Given metadata sits
+ * right in front, treat it the very same way.
+ */
+ return check_pkt_ptr_alignment(env, reg, off, size, strict);
case PTR_TO_MAP_VALUE:
pointer_desc = "value ";
break;
@@ -1065,7 +954,8 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
default:
break;
}
- return check_generic_ptr_alignment(reg, pointer_desc, off, size, strict);
+ return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
+ strict);
}
/* check whether memory at (regno + off) is accessible for t = (read | write)
@@ -1097,27 +987,28 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (reg->type == PTR_TO_MAP_VALUE) {
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
- verbose("R%d leaks addr into map\n", value_regno);
+ verbose(env, "R%d leaks addr into map\n", value_regno);
return -EACCES;
}
err = check_map_access(env, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
- mark_reg_unknown(state->regs, value_regno);
+ mark_reg_unknown(env, state->regs, value_regno);
} else if (reg->type == PTR_TO_CTX) {
enum bpf_reg_type reg_type = SCALAR_VALUE;
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
- verbose("R%d leaks addr into ctx\n", value_regno);
+ verbose(env, "R%d leaks addr into ctx\n", value_regno);
return -EACCES;
}
/* ctx accesses must be at a fixed offset, so that we can
* determine what type of data were returned.
*/
if (reg->off) {
- verbose("dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
+ verbose(env,
+ "dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
regno, reg->off, off - reg->off);
return -EACCES;
}
@@ -1125,20 +1016,22 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose("variable ctx access var_off=%s off=%d size=%d",
+ verbose(env,
+ "variable ctx access var_off=%s off=%d size=%d",
tn_buf, off, size);
return -EACCES;
}
err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
if (!err && t == BPF_READ && value_regno >= 0) {
/* ctx access returns either a scalar, or a
- * PTR_TO_PACKET[_END]. In the latter case, we know
- * the offset is zero.
+ * PTR_TO_PACKET[_META,_END]. In the latter
+ * case, we know the offset is zero.
*/
if (reg_type == SCALAR_VALUE)
- mark_reg_unknown(state->regs, value_regno);
+ mark_reg_unknown(env, state->regs, value_regno);
else
- mark_reg_known_zero(state->regs, value_regno);
+ mark_reg_known_zero(env, state->regs,
+ value_regno);
state->regs[value_regno].id = 0;
state->regs[value_regno].off = 0;
state->regs[value_regno].range = 0;
@@ -1154,13 +1047,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- verbose("variable stack access var_off=%s off=%d size=%d",
+ verbose(env, "variable stack access var_off=%s off=%d size=%d",
tn_buf, off, size);
return -EACCES;
}
off += reg->var_off.value;
if (off >= 0 || off < -MAX_BPF_STACK) {
- verbose("invalid stack off=%d size=%d\n", off, size);
+ verbose(env, "invalid stack off=%d size=%d\n", off,
+ size);
return -EACCES;
}
@@ -1171,29 +1065,32 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (!env->allow_ptr_leaks &&
state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
size != BPF_REG_SIZE) {
- verbose("attempt to corrupt spilled pointer on stack\n");
+ verbose(env, "attempt to corrupt spilled pointer on stack\n");
return -EACCES;
}
- err = check_stack_write(state, off, size, value_regno);
+ err = check_stack_write(env, state, off, size,
+ value_regno);
} else {
- err = check_stack_read(state, off, size, value_regno);
+ err = check_stack_read(env, state, off, size,
+ value_regno);
}
- } else if (reg->type == PTR_TO_PACKET) {
+ } else if (reg_is_pkt_pointer(reg)) {
if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
- verbose("cannot write into packet\n");
+ verbose(env, "cannot write into packet\n");
return -EACCES;
}
if (t == BPF_WRITE && value_regno >= 0 &&
is_pointer_value(env, value_regno)) {
- verbose("R%d leaks addr into packet\n", value_regno);
+ verbose(env, "R%d leaks addr into packet\n",
+ value_regno);
return -EACCES;
}
err = check_packet_access(env, regno, off, size);
if (!err && t == BPF_READ && value_regno >= 0)
- mark_reg_unknown(state->regs, value_regno);
+ mark_reg_unknown(env, state->regs, value_regno);
} else {
- verbose("R%d invalid mem access '%s'\n",
- regno, reg_type_str[reg->type]);
+ verbose(env, "R%d invalid mem access '%s'\n", regno,
+ reg_type_str[reg->type]);
return -EACCES;
}
@@ -1213,7 +1110,7 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
insn->imm != 0) {
- verbose("BPF_XADD uses reserved fields\n");
+ verbose(env, "BPF_XADD uses reserved fields\n");
return -EINVAL;
}
@@ -1228,7 +1125,7 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
return err;
if (is_pointer_value(env, insn->src_reg)) {
- verbose("R%d leaks addr into mem\n", insn->src_reg);
+ verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
return -EACCES;
}
@@ -1269,7 +1166,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
register_is_null(regs[regno]))
return 0;
- verbose("R%d type=%s expected=%s\n", regno,
+ verbose(env, "R%d type=%s expected=%s\n", regno,
reg_type_str[regs[regno].type],
reg_type_str[PTR_TO_STACK]);
return -EACCES;
@@ -1280,13 +1177,13 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
- verbose("invalid variable stack read R%d var_off=%s\n",
+ verbose(env, "invalid variable stack read R%d var_off=%s\n",
regno, tn_buf);
}
off = regs[regno].off + regs[regno].var_off.value;
if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
access_size <= 0) {
- verbose("invalid stack type R%d off=%d access_size=%d\n",
+ verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
regno, off, access_size);
return -EACCES;
}
@@ -1302,7 +1199,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
for (i = 0; i < access_size; i++) {
if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) {
- verbose("invalid indirect read from stack off %d+%d size %d\n",
+ verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
off, i, access_size);
return -EACCES;
}
@@ -1318,6 +1215,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
switch (reg->type) {
case PTR_TO_PACKET:
+ case PTR_TO_PACKET_META:
return check_packet_access(env, regno, reg->off, access_size);
case PTR_TO_MAP_VALUE:
return check_map_access(env, regno, reg->off, access_size);
@@ -1344,22 +1242,24 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
if (arg_type == ARG_ANYTHING) {
if (is_pointer_value(env, regno)) {
- verbose("R%d leaks addr into helper function\n", regno);
+ verbose(env, "R%d leaks addr into helper function\n",
+ regno);
return -EACCES;
}
return 0;
}
- if (type == PTR_TO_PACKET &&
+ if (type_is_pkt_pointer(type) &&
!may_access_direct_pkt_data(env, meta, BPF_READ)) {
- verbose("helper access to the packet is not allowed\n");
+ verbose(env, "helper access to the packet is not allowed\n");
return -EACCES;
}
if (arg_type == ARG_PTR_TO_MAP_KEY ||
arg_type == ARG_PTR_TO_MAP_VALUE) {
expected_type = PTR_TO_STACK;
- if (type != PTR_TO_PACKET && type != expected_type)
+ if (!type_is_pkt_pointer(type) &&
+ type != expected_type)
goto err_type;
} else if (arg_type == ARG_CONST_SIZE ||
arg_type == ARG_CONST_SIZE_OR_ZERO) {
@@ -1383,12 +1283,13 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
*/
if (register_is_null(*reg))
/* final test in check_stack_boundary() */;
- else if (type != PTR_TO_PACKET && type != PTR_TO_MAP_VALUE &&
+ else if (!type_is_pkt_pointer(type) &&
+ type != PTR_TO_MAP_VALUE &&
type != expected_type)
goto err_type;
meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
} else {
- verbose("unsupported arg_type %d\n", arg_type);
+ verbose(env, "unsupported arg_type %d\n", arg_type);
return -EFAULT;
}
@@ -1406,10 +1307,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
* we have to check map_key here. Otherwise it means
* that kernel subsystem misconfigured verifier
*/
- verbose("invalid map_ptr to access map->key\n");
+ verbose(env, "invalid map_ptr to access map->key\n");
return -EACCES;
}
- if (type == PTR_TO_PACKET)
+ if (type_is_pkt_pointer(type))
err = check_packet_access(env, regno, reg->off,
meta->map_ptr->key_size);
else
@@ -1422,10 +1323,10 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
*/
if (!meta->map_ptr) {
/* kernel subsystem misconfigured verifier */
- verbose("invalid map_ptr to access map->value\n");
+ verbose(env, "invalid map_ptr to access map->value\n");
return -EACCES;
}
- if (type == PTR_TO_PACKET)
+ if (type_is_pkt_pointer(type))
err = check_packet_access(env, regno, reg->off,
meta->map_ptr->value_size);
else
@@ -1442,7 +1343,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
*/
if (regno == 0) {
/* kernel subsystem misconfigured verifier */
- verbose("ARG_CONST_SIZE cannot be first argument\n");
+ verbose(env,
+ "ARG_CONST_SIZE cannot be first argument\n");
return -EACCES;
}
@@ -1459,7 +1361,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
meta = NULL;
if (reg->smin_value < 0) {
- verbose("R%d min value is negative, either use unsigned or 'var &= const'\n",
+ verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
regno);
return -EACCES;
}
@@ -1473,7 +1375,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
}
if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
- verbose("R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
+ verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
regno);
return -EACCES;
}
@@ -1484,12 +1386,13 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
return err;
err_type:
- verbose("R%d type=%s expected=%s\n", regno,
+ verbose(env, "R%d type=%s expected=%s\n", regno,
reg_type_str[type], reg_type_str[expected_type]);
return -EACCES;
}
-static int check_map_func_compatibility(struct bpf_map *map, int func_id)
+static int check_map_func_compatibility(struct bpf_verifier_env *env,
+ struct bpf_map *map, int func_id)
{
if (!map)
return 0;
@@ -1502,7 +1405,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
break;
case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
if (func_id != BPF_FUNC_perf_event_read &&
- func_id != BPF_FUNC_perf_event_output)
+ func_id != BPF_FUNC_perf_event_output &&
+ func_id != BPF_FUNC_perf_event_read_value)
goto error;
break;
case BPF_MAP_TYPE_STACK_TRACE:
@@ -1522,6 +1426,11 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
if (func_id != BPF_FUNC_redirect_map)
goto error;
break;
+ /* Restrict bpf side of cpumap, open when use-cases appear */
+ case BPF_MAP_TYPE_CPUMAP:
+ if (func_id != BPF_FUNC_redirect_map)
+ goto error;
+ break;
case BPF_MAP_TYPE_ARRAY_OF_MAPS:
case BPF_MAP_TYPE_HASH_OF_MAPS:
if (func_id != BPF_FUNC_map_lookup_elem)
@@ -1545,6 +1454,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
break;
case BPF_FUNC_perf_event_read:
case BPF_FUNC_perf_event_output:
+ case BPF_FUNC_perf_event_read_value:
if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
goto error;
break;
@@ -1558,7 +1468,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
goto error;
break;
case BPF_FUNC_redirect_map:
- if (map->map_type != BPF_MAP_TYPE_DEVMAP)
+ if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
+ map->map_type != BPF_MAP_TYPE_CPUMAP)
goto error;
break;
case BPF_FUNC_sk_redirect_map:
@@ -1575,7 +1486,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
return 0;
error:
- verbose("cannot pass map_type %d into func %s#%d\n",
+ verbose(env, "cannot pass map_type %d into func %s#%d\n",
map->map_type, func_id_name(func_id), func_id);
return -EINVAL;
}
@@ -1598,8 +1509,8 @@ static int check_raw_mode(const struct bpf_func_proto *fn)
return count > 1 ? -EINVAL : 0;
}
-/* Packet data might have moved, any old PTR_TO_PACKET[_END] are now invalid,
- * so turn them into unknown SCALAR_VALUE.
+/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
+ * are now invalid, so turn them into unknown SCALAR_VALUE.
*/
static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
{
@@ -1608,18 +1519,15 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
int i;
for (i = 0; i < MAX_BPF_REG; i++)
- if (regs[i].type == PTR_TO_PACKET ||
- regs[i].type == PTR_TO_PACKET_END)
- mark_reg_unknown(regs, i);
+ if (reg_is_pkt_pointer_any(&regs[i]))
+ mark_reg_unknown(env, regs, i);
for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
if (state->stack_slot_type[i] != STACK_SPILL)
continue;
reg = &state->spilled_regs[i / BPF_REG_SIZE];
- if (reg->type != PTR_TO_PACKET &&
- reg->type != PTR_TO_PACKET_END)
- continue;
- __mark_reg_unknown(reg);
+ if (reg_is_pkt_pointer_any(reg))
+ __mark_reg_unknown(reg);
}
}
@@ -1634,21 +1542,23 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
/* find function prototype */
if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
- verbose("invalid func %s#%d\n", func_id_name(func_id), func_id);
+ verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
+ func_id);
return -EINVAL;
}
- if (env->prog->aux->ops->get_func_proto)
- fn = env->prog->aux->ops->get_func_proto(func_id);
+ if (env->ops->get_func_proto)
+ fn = env->ops->get_func_proto(func_id);
if (!fn) {
- verbose("unknown func %s#%d\n", func_id_name(func_id), func_id);
+ verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
+ func_id);
return -EINVAL;
}
/* eBPF programs must be GPL compatible to use GPL-ed functions */
if (!env->prog->gpl_compatible && fn->gpl_only) {
- verbose("cannot call GPL only function from proprietary program\n");
+ verbose(env, "cannot call GPL only function from proprietary program\n");
return -EINVAL;
}
@@ -1662,7 +1572,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
*/
err = check_raw_mode(fn);
if (err) {
- verbose("kernel subsystem misconfigured func %s#%d\n",
+ verbose(env, "kernel subsystem misconfigured func %s#%d\n",
func_id_name(func_id), func_id);
return err;
}
@@ -1695,14 +1605,14 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
- mark_reg_not_init(regs, caller_saved[i]);
+ mark_reg_not_init(env, regs, caller_saved[i]);
check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
}
/* update return register (already marked as written above) */
if (fn->ret_type == RET_INTEGER) {
/* sets type to SCALAR_VALUE */
- mark_reg_unknown(regs, BPF_REG_0);
+ mark_reg_unknown(env, regs, BPF_REG_0);
} else if (fn->ret_type == RET_VOID) {
regs[BPF_REG_0].type = NOT_INIT;
} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
@@ -1710,14 +1620,15 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
/* There is no offset yet applied, variable or fixed */
- mark_reg_known_zero(regs, BPF_REG_0);
+ mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].off = 0;
/* remember map_ptr, so that check_map_access()
* can check 'value_size' boundary of memory access
* to map element returned from bpf_map_lookup_elem()
*/
if (meta.map_ptr == NULL) {
- verbose("kernel subsystem misconfigured verifier\n");
+ verbose(env,
+ "kernel subsystem misconfigured verifier\n");
return -EINVAL;
}
regs[BPF_REG_0].map_ptr = meta.map_ptr;
@@ -1728,12 +1639,12 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
else if (insn_aux->map_ptr != meta.map_ptr)
insn_aux->map_ptr = BPF_MAP_PTR_POISON;
} else {
- verbose("unknown return type %d of func %s#%d\n",
+ verbose(env, "unknown return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
return -EINVAL;
}
- err = check_map_func_compatibility(meta.map_ptr, func_id);
+ err = check_map_func_compatibility(env, meta.map_ptr, func_id);
if (err)
return err;
@@ -1792,39 +1703,42 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst_reg = &regs[dst];
if (WARN_ON_ONCE(known && (smin_val != smax_val))) {
- print_verifier_state(&env->cur_state);
- verbose("verifier internal error: known but bad sbounds\n");
+ print_verifier_state(env, &env->cur_state);
+ verbose(env,
+ "verifier internal error: known but bad sbounds\n");
return -EINVAL;
}
if (WARN_ON_ONCE(known && (umin_val != umax_val))) {
- print_verifier_state(&env->cur_state);
- verbose("verifier internal error: known but bad ubounds\n");
+ print_verifier_state(env, &env->cur_state);
+ verbose(env,
+ "verifier internal error: known but bad ubounds\n");
return -EINVAL;
}
if (BPF_CLASS(insn->code) != BPF_ALU64) {
/* 32-bit ALU ops on pointers produce (meaningless) scalars */
if (!env->allow_ptr_leaks)
- verbose("R%d 32-bit pointer arithmetic prohibited\n",
+ verbose(env,
+ "R%d 32-bit pointer arithmetic prohibited\n",
dst);
return -EACCES;
}
if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
if (!env->allow_ptr_leaks)
- verbose("R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
+ verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
dst);
return -EACCES;
}
if (ptr_reg->type == CONST_PTR_TO_MAP) {
if (!env->allow_ptr_leaks)
- verbose("R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
+ verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
dst);
return -EACCES;
}
if (ptr_reg->type == PTR_TO_PACKET_END) {
if (!env->allow_ptr_leaks)
- verbose("R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
+ verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
dst);
return -EACCES;
}
@@ -1879,7 +1793,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
}
dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
dst_reg->off = ptr_reg->off;
- if (ptr_reg->type == PTR_TO_PACKET) {
+ if (reg_is_pkt_pointer(ptr_reg)) {
dst_reg->id = ++env->id_gen;
/* something was added to pkt_ptr, set range to zero */
dst_reg->range = 0;
@@ -1889,7 +1803,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
if (dst_reg == off_reg) {
/* scalar -= pointer. Creates an unknown scalar */
if (!env->allow_ptr_leaks)
- verbose("R%d tried to subtract pointer from scalar\n",
+ verbose(env, "R%d tried to subtract pointer from scalar\n",
dst);
return -EACCES;
}
@@ -1899,7 +1813,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
*/
if (ptr_reg->type == PTR_TO_STACK) {
if (!env->allow_ptr_leaks)
- verbose("R%d subtraction from stack pointer prohibited\n",
+ verbose(env, "R%d subtraction from stack pointer prohibited\n",
dst);
return -EACCES;
}
@@ -1939,7 +1853,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
}
dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
dst_reg->off = ptr_reg->off;
- if (ptr_reg->type == PTR_TO_PACKET) {
+ if (reg_is_pkt_pointer(ptr_reg)) {
dst_reg->id = ++env->id_gen;
/* something was added to pkt_ptr, set range to zero */
if (smin_val < 0)
@@ -1954,13 +1868,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
* ptr &= ~3 which would reduce min_value by 3.)
*/
if (!env->allow_ptr_leaks)
- verbose("R%d bitwise operator %s on pointer prohibited\n",
+ verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
dst, bpf_alu_string[opcode >> 4]);
return -EACCES;
default:
/* other operators (e.g. MUL,LSH) produce non-pointer results */
if (!env->allow_ptr_leaks)
- verbose("R%d pointer arithmetic with %s operator prohibited\n",
+ verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
dst, bpf_alu_string[opcode >> 4]);
return -EACCES;
}
@@ -2126,7 +2040,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
/* Shifts greater than 63 are undefined. This includes
* shifts by a negative number.
*/
- mark_reg_unknown(regs, insn->dst_reg);
+ mark_reg_unknown(env, regs, insn->dst_reg);
break;
}
/* We lose all sign bit information (except what we can pick
@@ -2154,7 +2068,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
/* Shifts greater than 63 are undefined. This includes
* shifts by a negative number.
*/
- mark_reg_unknown(regs, insn->dst_reg);
+ mark_reg_unknown(env, regs, insn->dst_reg);
break;
}
/* BPF_RSH is an unsigned shift, so make the appropriate casts */
@@ -2182,7 +2096,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
__update_reg_bounds(dst_reg);
break;
default:
- mark_reg_unknown(regs, insn->dst_reg);
+ mark_reg_unknown(env, regs, insn->dst_reg);
break;
}
@@ -2214,12 +2128,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
* an arbitrary scalar.
*/
if (!env->allow_ptr_leaks) {
- verbose("R%d pointer %s pointer prohibited\n",
+ verbose(env, "R%d pointer %s pointer prohibited\n",
insn->dst_reg,
bpf_alu_string[opcode >> 4]);
return -EACCES;
}
- mark_reg_unknown(regs, insn->dst_reg);
+ mark_reg_unknown(env, regs, insn->dst_reg);
return 0;
} else {
/* scalar += pointer
@@ -2271,13 +2185,13 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
/* Got here implies adding two SCALAR_VALUEs */
if (WARN_ON_ONCE(ptr_reg)) {
- print_verifier_state(&env->cur_state);
- verbose("verifier internal error: unexpected ptr_reg\n");
+ print_verifier_state(env, &env->cur_state);
+ verbose(env, "verifier internal error: unexpected ptr_reg\n");
return -EINVAL;
}
if (WARN_ON(!src_reg)) {
- print_verifier_state(&env->cur_state);
- verbose("verifier internal error: no src_reg\n");
+ print_verifier_state(env, &env->cur_state);
+ verbose(env, "verifier internal error: no src_reg\n");
return -EINVAL;
}
return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
@@ -2295,14 +2209,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (BPF_SRC(insn->code) != 0 ||
insn->src_reg != BPF_REG_0 ||
insn->off != 0 || insn->imm != 0) {
- verbose("BPF_NEG uses reserved fields\n");
+ verbose(env, "BPF_NEG uses reserved fields\n");
return -EINVAL;
}
} else {
if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
(insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
BPF_CLASS(insn->code) == BPF_ALU64) {
- verbose("BPF_END uses reserved fields\n");
+ verbose(env, "BPF_END uses reserved fields\n");
return -EINVAL;
}
}
@@ -2313,7 +2227,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
return err;
if (is_pointer_value(env, insn->dst_reg)) {
- verbose("R%d pointer arithmetic prohibited\n",
+ verbose(env, "R%d pointer arithmetic prohibited\n",
insn->dst_reg);
return -EACCES;
}
@@ -2327,7 +2241,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
if (BPF_SRC(insn->code) == BPF_X) {
if (insn->imm != 0 || insn->off != 0) {
- verbose("BPF_MOV uses reserved fields\n");
+ verbose(env, "BPF_MOV uses reserved fields\n");
return -EINVAL;
}
@@ -2337,7 +2251,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
return err;
} else {
if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
- verbose("BPF_MOV uses reserved fields\n");
+ verbose(env, "BPF_MOV uses reserved fields\n");
return -EINVAL;
}
}
@@ -2357,11 +2271,12 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
} else {
/* R1 = (u32) R2 */
if (is_pointer_value(env, insn->src_reg)) {
- verbose("R%d partial copy of pointer\n",
+ verbose(env,
+ "R%d partial copy of pointer\n",
insn->src_reg);
return -EACCES;
}
- mark_reg_unknown(regs, insn->dst_reg);
+ mark_reg_unknown(env, regs, insn->dst_reg);
/* high 32 bits are known zero. */
regs[insn->dst_reg].var_off = tnum_cast(
regs[insn->dst_reg].var_off, 4);
@@ -2376,14 +2291,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
} else if (opcode > BPF_END) {
- verbose("invalid BPF_ALU opcode %x\n", opcode);
+ verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
return -EINVAL;
} else { /* all other ALU ops: and, sub, xor, add, ... */
if (BPF_SRC(insn->code) == BPF_X) {
if (insn->imm != 0 || insn->off != 0) {
- verbose("BPF_ALU uses reserved fields\n");
+ verbose(env, "BPF_ALU uses reserved fields\n");
return -EINVAL;
}
/* check src1 operand */
@@ -2392,7 +2307,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
return err;
} else {
if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
- verbose("BPF_ALU uses reserved fields\n");
+ verbose(env, "BPF_ALU uses reserved fields\n");
return -EINVAL;
}
}
@@ -2404,7 +2319,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
- verbose("div by zero\n");
+ verbose(env, "div by zero\n");
return -EINVAL;
}
@@ -2413,7 +2328,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
if (insn->imm < 0 || insn->imm >= size) {
- verbose("invalid shift %d\n", insn->imm);
+ verbose(env, "invalid shift %d\n", insn->imm);
return -EINVAL;
}
}
@@ -2431,6 +2346,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
static void find_good_pkt_pointers(struct bpf_verifier_state *state,
struct bpf_reg_state *dst_reg,
+ enum bpf_reg_type type,
bool range_right_open)
{
struct bpf_reg_state *regs = state->regs, *reg;
@@ -2501,7 +2417,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
* dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
*/
for (i = 0; i < MAX_BPF_REG; i++)
- if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
+ if (regs[i].type == type && regs[i].id == dst_reg->id)
/* keep the maximum range already checked */
regs[i].range = max(regs[i].range, new_range);
@@ -2509,8 +2425,8 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state,
if (state->stack_slot_type[i] != STACK_SPILL)
continue;
reg = &state->spilled_regs[i / BPF_REG_SIZE];
- if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
- reg->range = max(reg->range, new_range);
+ if (reg->type == type && reg->id == dst_reg->id)
+ reg->range = max_t(u16, reg->range, new_range);
}
}
@@ -2774,13 +2690,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
int err;
if (opcode > BPF_JSLE) {
- verbose("invalid BPF_JMP opcode %x\n", opcode);
+ verbose(env, "invalid BPF_JMP opcode %x\n", opcode);
return -EINVAL;
}
if (BPF_SRC(insn->code) == BPF_X) {
if (insn->imm != 0) {
- verbose("BPF_JMP uses reserved fields\n");
+ verbose(env, "BPF_JMP uses reserved fields\n");
return -EINVAL;
}
@@ -2790,13 +2706,13 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
return err;
if (is_pointer_value(env, insn->src_reg)) {
- verbose("R%d pointer comparison prohibited\n",
+ verbose(env, "R%d pointer comparison prohibited\n",
insn->src_reg);
return -EACCES;
}
} else {
if (insn->src_reg != BPF_REG_0) {
- verbose("BPF_JMP uses reserved fields\n");
+ verbose(env, "BPF_JMP uses reserved fields\n");
return -EINVAL;
}
}
@@ -2875,48 +2791,77 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
dst_reg->type == PTR_TO_PACKET &&
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
/* pkt_data' > pkt_end */
- find_good_pkt_pointers(this_branch, dst_reg, false);
+ find_good_pkt_pointers(this_branch, dst_reg,
+ PTR_TO_PACKET, false);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
dst_reg->type == PTR_TO_PACKET_END &&
regs[insn->src_reg].type == PTR_TO_PACKET) {
/* pkt_end > pkt_data' */
- find_good_pkt_pointers(other_branch, &regs[insn->src_reg], true);
+ find_good_pkt_pointers(other_branch, &regs[insn->src_reg],
+ PTR_TO_PACKET, true);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
dst_reg->type == PTR_TO_PACKET &&
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
/* pkt_data' < pkt_end */
- find_good_pkt_pointers(other_branch, dst_reg, true);
+ find_good_pkt_pointers(other_branch, dst_reg, PTR_TO_PACKET,
+ true);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
dst_reg->type == PTR_TO_PACKET_END &&
regs[insn->src_reg].type == PTR_TO_PACKET) {
/* pkt_end < pkt_data' */
- find_good_pkt_pointers(this_branch, &regs[insn->src_reg], false);
+ find_good_pkt_pointers(this_branch, &regs[insn->src_reg],
+ PTR_TO_PACKET, false);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
dst_reg->type == PTR_TO_PACKET &&
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
/* pkt_data' >= pkt_end */
- find_good_pkt_pointers(this_branch, dst_reg, true);
+ find_good_pkt_pointers(this_branch, dst_reg,
+ PTR_TO_PACKET, true);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
dst_reg->type == PTR_TO_PACKET_END &&
regs[insn->src_reg].type == PTR_TO_PACKET) {
/* pkt_end >= pkt_data' */
- find_good_pkt_pointers(other_branch, &regs[insn->src_reg], false);
+ find_good_pkt_pointers(other_branch, &regs[insn->src_reg],
+ PTR_TO_PACKET, false);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
dst_reg->type == PTR_TO_PACKET &&
regs[insn->src_reg].type == PTR_TO_PACKET_END) {
/* pkt_data' <= pkt_end */
- find_good_pkt_pointers(other_branch, dst_reg, false);
+ find_good_pkt_pointers(other_branch, dst_reg,
+ PTR_TO_PACKET, false);
} else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
dst_reg->type == PTR_TO_PACKET_END &&
regs[insn->src_reg].type == PTR_TO_PACKET) {
/* pkt_end <= pkt_data' */
- find_good_pkt_pointers(this_branch, &regs[insn->src_reg], true);
+ find_good_pkt_pointers(this_branch, &regs[insn->src_reg],
+ PTR_TO_PACKET, true);
+ } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
+ dst_reg->type == PTR_TO_PACKET_META &&
+ reg_is_init_pkt_pointer(&regs[insn->src_reg], PTR_TO_PACKET)) {
+ find_good_pkt_pointers(this_branch, dst_reg,
+ PTR_TO_PACKET_META, false);
+ } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLT &&
+ dst_reg->type == PTR_TO_PACKET_META &&
+ reg_is_init_pkt_pointer(&regs[insn->src_reg], PTR_TO_PACKET)) {
+ find_good_pkt_pointers(other_branch, dst_reg,
+ PTR_TO_PACKET_META, false);
+ } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
+ reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
+ regs[insn->src_reg].type == PTR_TO_PACKET_META) {
+ find_good_pkt_pointers(other_branch, &regs[insn->src_reg],
+ PTR_TO_PACKET_META, false);
+ } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JLE &&
+ reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
+ regs[insn->src_reg].type == PTR_TO_PACKET_META) {
+ find_good_pkt_pointers(this_branch, &regs[insn->src_reg],
+ PTR_TO_PACKET_META, false);
} else if (is_pointer_value(env, insn->dst_reg)) {
- verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
+ verbose(env, "R%d pointer comparison prohibited\n",
+ insn->dst_reg);
return -EACCES;
}
- if (log_level)
- print_verifier_state(this_branch);
+ if (env->log.level)
+ print_verifier_state(env, this_branch);
return 0;
}
@@ -2935,11 +2880,11 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
int err;
if (BPF_SIZE(insn->code) != BPF_DW) {
- verbose("invalid BPF_LD_IMM insn\n");
+ verbose(env, "invalid BPF_LD_IMM insn\n");
return -EINVAL;
}
if (insn->off != 0) {
- verbose("BPF_LD_IMM64 uses reserved fields\n");
+ verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
return -EINVAL;
}
@@ -2997,14 +2942,14 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
int i, err;
if (!may_access_skb(env->prog->type)) {
- verbose("BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
+ verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
return -EINVAL;
}
if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
BPF_SIZE(insn->code) == BPF_DW ||
(mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
- verbose("BPF_LD_[ABS|IND] uses reserved fields\n");
+ verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
return -EINVAL;
}
@@ -3014,7 +2959,8 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return err;
if (regs[BPF_REG_6].type != PTR_TO_CTX) {
- verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
+ verbose(env,
+ "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
return -EINVAL;
}
@@ -3027,7 +2973,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
/* reset caller saved regs to unreadable */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
- mark_reg_not_init(regs, caller_saved[i]);
+ mark_reg_not_init(env, regs, caller_saved[i]);
check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
}
@@ -3035,7 +2981,44 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
* the value fetched from the packet.
* Already marked as written above.
*/
- mark_reg_unknown(regs, BPF_REG_0);
+ mark_reg_unknown(env, regs, BPF_REG_0);
+ return 0;
+}
+
+static int check_return_code(struct bpf_verifier_env *env)
+{
+ struct bpf_reg_state *reg;
+ struct tnum range = tnum_range(0, 1);
+
+ switch (env->prog->type) {
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ case BPF_PROG_TYPE_SOCK_OPS:
+ break;
+ default:
+ return 0;
+ }
+
+ reg = &env->cur_state.regs[BPF_REG_0];
+ if (reg->type != SCALAR_VALUE) {
+ verbose(env, "At program exit the register R0 is not a known value (%s)\n",
+ reg_type_str[reg->type]);
+ return -EINVAL;
+ }
+
+ if (!tnum_in(range, reg->var_off)) {
+ verbose(env, "At program exit the register R0 ");
+ if (!tnum_is_unknown(reg->var_off)) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env, "has value %s", tn_buf);
+ } else {
+ verbose(env, "has unknown scalar value");
+ }
+ verbose(env, " should have been 0 or 1\n");
+ return -EINVAL;
+ }
return 0;
}
@@ -3099,7 +3082,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
return 0;
if (w < 0 || w >= env->prog->len) {
- verbose("jump out of range from insn %d to %d\n", t, w);
+ verbose(env, "jump out of range from insn %d to %d\n", t, w);
return -EINVAL;
}
@@ -3116,13 +3099,13 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
insn_stack[cur_stack++] = w;
return 1;
} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
- verbose("back-edge from insn %d to %d\n", t, w);
+ verbose(env, "back-edge from insn %d to %d\n", t, w);
return -EINVAL;
} else if (insn_state[w] == EXPLORED) {
/* forward- or cross-edge */
insn_state[t] = DISCOVERED | e;
} else {
- verbose("insn state internal bug\n");
+ verbose(env, "insn state internal bug\n");
return -EFAULT;
}
return 0;
@@ -3216,7 +3199,7 @@ peek_stack:
mark_explored:
insn_state[t] = EXPLORED;
if (cur_stack-- <= 0) {
- verbose("pop stack internal bug\n");
+ verbose(env, "pop stack internal bug\n");
ret = -EFAULT;
goto err_free;
}
@@ -3225,7 +3208,7 @@ mark_explored:
check_state:
for (i = 0; i < insn_cnt; i++) {
if (insn_state[i] != EXPLORED) {
- verbose("unreachable insn %d\n", i);
+ verbose(env, "unreachable insn %d\n", i);
ret = -EINVAL;
goto err_free;
}
@@ -3340,8 +3323,9 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
return false;
/* Check our ids match any regs they're supposed to */
return check_ids(rold->id, rcur->id, idmap);
+ case PTR_TO_PACKET_META:
case PTR_TO_PACKET:
- if (rcur->type != PTR_TO_PACKET)
+ if (rcur->type != rold->type)
return false;
/* We must have at least as much range as the old ptr
* did, so that any accesses which were safe before are
@@ -3605,7 +3589,7 @@ static int do_check(struct bpf_verifier_env *env)
int insn_processed = 0;
bool do_print_state = false;
- init_reg_state(regs);
+ init_reg_state(env, regs);
state->parent = NULL;
insn_idx = 0;
for (;;) {
@@ -3614,7 +3598,7 @@ static int do_check(struct bpf_verifier_env *env)
int err;
if (insn_idx >= insn_cnt) {
- verbose("invalid insn idx %d insn_cnt %d\n",
+ verbose(env, "invalid insn idx %d insn_cnt %d\n",
insn_idx, insn_cnt);
return -EFAULT;
}
@@ -3623,7 +3607,8 @@ static int do_check(struct bpf_verifier_env *env)
class = BPF_CLASS(insn->code);
if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
- verbose("BPF program is too large. Processed %d insn\n",
+ verbose(env,
+ "BPF program is too large. Processed %d insn\n",
insn_processed);
return -E2BIG;
}
@@ -3633,12 +3618,12 @@ static int do_check(struct bpf_verifier_env *env)
return err;
if (err == 1) {
/* found equivalent state, can prune the search */
- if (log_level) {
+ if (env->log.level) {
if (do_print_state)
- verbose("\nfrom %d to %d: safe\n",
+ verbose(env, "\nfrom %d to %d: safe\n",
prev_insn_idx, insn_idx);
else
- verbose("%d: safe\n", insn_idx);
+ verbose(env, "%d: safe\n", insn_idx);
}
goto process_bpf_exit;
}
@@ -3646,19 +3631,20 @@ static int do_check(struct bpf_verifier_env *env)
if (need_resched())
cond_resched();
- if (log_level > 1 || (log_level && do_print_state)) {
- if (log_level > 1)
- verbose("%d:", insn_idx);
+ if (env->log.level > 1 || (env->log.level && do_print_state)) {
+ if (env->log.level > 1)
+ verbose(env, "%d:", insn_idx);
else
- verbose("\nfrom %d to %d:",
+ verbose(env, "\nfrom %d to %d:",
prev_insn_idx, insn_idx);
- print_verifier_state(&env->cur_state);
+ print_verifier_state(env, &env->cur_state);
do_print_state = false;
}
- if (log_level) {
- verbose("%d: ", insn_idx);
- print_bpf_insn(env, insn);
+ if (env->log.level) {
+ verbose(env, "%d: ", insn_idx);
+ print_bpf_insn(verbose, env, insn,
+ env->allow_ptr_leaks);
}
err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
@@ -3714,7 +3700,7 @@ static int do_check(struct bpf_verifier_env *env)
* src_reg == stack|map in some other branch.
* Reject it.
*/
- verbose("same insn cannot be used with different pointers\n");
+ verbose(env, "same insn cannot be used with different pointers\n");
return -EINVAL;
}
@@ -3754,14 +3740,14 @@ static int do_check(struct bpf_verifier_env *env)
} else if (dst_reg_type != *prev_dst_type &&
(dst_reg_type == PTR_TO_CTX ||
*prev_dst_type == PTR_TO_CTX)) {
- verbose("same insn cannot be used with different pointers\n");
+ verbose(env, "same insn cannot be used with different pointers\n");
return -EINVAL;
}
} else if (class == BPF_ST) {
if (BPF_MODE(insn->code) != BPF_MEM ||
insn->src_reg != BPF_REG_0) {
- verbose("BPF_ST uses reserved fields\n");
+ verbose(env, "BPF_ST uses reserved fields\n");
return -EINVAL;
}
/* check src operand */
@@ -3784,7 +3770,7 @@ static int do_check(struct bpf_verifier_env *env)
insn->off != 0 ||
insn->src_reg != BPF_REG_0 ||
insn->dst_reg != BPF_REG_0) {
- verbose("BPF_CALL uses reserved fields\n");
+ verbose(env, "BPF_CALL uses reserved fields\n");
return -EINVAL;
}
@@ -3797,7 +3783,7 @@ static int do_check(struct bpf_verifier_env *env)
insn->imm != 0 ||
insn->src_reg != BPF_REG_0 ||
insn->dst_reg != BPF_REG_0) {
- verbose("BPF_JA uses reserved fields\n");
+ verbose(env, "BPF_JA uses reserved fields\n");
return -EINVAL;
}
@@ -3809,7 +3795,7 @@ static int do_check(struct bpf_verifier_env *env)
insn->imm != 0 ||
insn->src_reg != BPF_REG_0 ||
insn->dst_reg != BPF_REG_0) {
- verbose("BPF_EXIT uses reserved fields\n");
+ verbose(env, "BPF_EXIT uses reserved fields\n");
return -EINVAL;
}
@@ -3824,10 +3810,13 @@ static int do_check(struct bpf_verifier_env *env)
return err;
if (is_pointer_value(env, BPF_REG_0)) {
- verbose("R0 leaks addr as return value\n");
+ verbose(env, "R0 leaks addr as return value\n");
return -EACCES;
}
+ err = check_return_code(env);
+ if (err)
+ return err;
process_bpf_exit:
insn_idx = pop_stack(env, &prev_insn_idx);
if (insn_idx < 0) {
@@ -3856,19 +3845,19 @@ process_bpf_exit:
insn_idx++;
} else {
- verbose("invalid BPF_LD mode\n");
+ verbose(env, "invalid BPF_LD mode\n");
return -EINVAL;
}
} else {
- verbose("unknown insn class %d\n", class);
+ verbose(env, "unknown insn class %d\n", class);
return -EINVAL;
}
insn_idx++;
}
- verbose("processed %d insns, stack depth %d\n",
- insn_processed, env->prog->aux->stack_depth);
+ verbose(env, "processed %d insns, stack depth %d\n", insn_processed,
+ env->prog->aux->stack_depth);
return 0;
}
@@ -3880,7 +3869,8 @@ static int check_map_prealloc(struct bpf_map *map)
!(map->map_flags & BPF_F_NO_PREALLOC);
}
-static int check_map_prog_compatibility(struct bpf_map *map,
+static int check_map_prog_compatibility(struct bpf_verifier_env *env,
+ struct bpf_map *map,
struct bpf_prog *prog)
{
@@ -3891,12 +3881,12 @@ static int check_map_prog_compatibility(struct bpf_map *map,
*/
if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
if (!check_map_prealloc(map)) {
- verbose("perf_event programs can only use preallocated hash map\n");
+ verbose(env, "perf_event programs can only use preallocated hash map\n");
return -EINVAL;
}
if (map->inner_map_meta &&
!check_map_prealloc(map->inner_map_meta)) {
- verbose("perf_event programs can only use preallocated inner hash map\n");
+ verbose(env, "perf_event programs can only use preallocated inner hash map\n");
return -EINVAL;
}
}
@@ -3919,14 +3909,14 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
for (i = 0; i < insn_cnt; i++, insn++) {
if (BPF_CLASS(insn->code) == BPF_LDX &&
(BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
- verbose("BPF_LDX uses reserved fields\n");
+ verbose(env, "BPF_LDX uses reserved fields\n");
return -EINVAL;
}
if (BPF_CLASS(insn->code) == BPF_STX &&
((BPF_MODE(insn->code) != BPF_MEM &&
BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
- verbose("BPF_STX uses reserved fields\n");
+ verbose(env, "BPF_STX uses reserved fields\n");
return -EINVAL;
}
@@ -3937,7 +3927,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
if (i == insn_cnt - 1 || insn[1].code != 0 ||
insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
insn[1].off != 0) {
- verbose("invalid bpf_ld_imm64 insn\n");
+ verbose(env, "invalid bpf_ld_imm64 insn\n");
return -EINVAL;
}
@@ -3946,19 +3936,20 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
goto next_insn;
if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
- verbose("unrecognized bpf_ld_imm64 insn\n");
+ verbose(env,
+ "unrecognized bpf_ld_imm64 insn\n");
return -EINVAL;
}
f = fdget(insn->imm);
map = __bpf_map_get(f);
if (IS_ERR(map)) {
- verbose("fd %d is not pointing to valid bpf_map\n",
+ verbose(env, "fd %d is not pointing to valid bpf_map\n",
insn->imm);
return PTR_ERR(map);
}
- err = check_map_prog_compatibility(map, env->prog);
+ err = check_map_prog_compatibility(env, map, env->prog);
if (err) {
fdput(f);
return err;
@@ -4067,7 +4058,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
*/
static int convert_ctx_accesses(struct bpf_verifier_env *env)
{
- const struct bpf_verifier_ops *ops = env->prog->aux->ops;
+ const struct bpf_verifier_ops *ops = env->ops;
int i, cnt, size, ctx_field_size, delta = 0;
const int insn_cnt = env->prog->len;
struct bpf_insn insn_buf[16], *insn;
@@ -4080,7 +4071,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
env->prog);
if (cnt >= ARRAY_SIZE(insn_buf)) {
- verbose("bpf verifier is misconfigured\n");
+ verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
} else if (cnt) {
new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
@@ -4128,7 +4119,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
u8 size_code;
if (type == BPF_WRITE) {
- verbose("bpf verifier narrow ctx access misconfigured\n");
+ verbose(env, "bpf verifier narrow ctx access misconfigured\n");
return -EINVAL;
}
@@ -4147,7 +4138,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
&target_size);
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
(ctx_field_size && !target_size)) {
- verbose("bpf verifier is misconfigured\n");
+ verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
}
@@ -4229,7 +4220,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
- verbose("bpf verifier is misconfigured\n");
+ verbose(env, "bpf verifier is misconfigured\n");
return -EINVAL;
}
@@ -4268,12 +4259,13 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn = new_prog->insnsi + i + delta;
}
patch_call_imm:
- fn = prog->aux->ops->get_func_proto(insn->imm);
+ fn = env->ops->get_func_proto(insn->imm);
/* all functions that have prototype and verifier allowed
* programs to call them, must be real in-kernel functions
*/
if (!fn->func) {
- verbose("kernel subsystem misconfigured func %s#%d\n",
+ verbose(env,
+ "kernel subsystem misconfigured func %s#%d\n",
func_id_name(insn->imm), insn->imm);
return -EFAULT;
}
@@ -4307,8 +4299,8 @@ static void free_states(struct bpf_verifier_env *env)
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
{
- char __user *log_ubuf = NULL;
struct bpf_verifier_env *env;
+ struct bpf_verifer_log *log;
int ret = -EINVAL;
/* 'struct bpf_verifier_env' can be global, but since it's not small,
@@ -4317,6 +4309,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
if (!env)
return -ENOMEM;
+ log = &env->log;
env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
(*prog)->len);
@@ -4324,6 +4317,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
if (!env->insn_aux_data)
goto err_free_env;
env->prog = *prog;
+ env->ops = bpf_verifier_ops[env->prog->type];
/* grab the mutex to protect few globals used by verifier */
mutex_lock(&bpf_verifier_lock);
@@ -4332,23 +4326,15 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
/* user requested verbose verifier output
* and supplied buffer to store the verification trace
*/
- log_level = attr->log_level;
- log_ubuf = (char __user *) (unsigned long) attr->log_buf;
- log_size = attr->log_size;
- log_len = 0;
+ log->level = attr->log_level;
+ log->ubuf = (char __user *) (unsigned long) attr->log_buf;
+ log->len_total = attr->log_size;
ret = -EINVAL;
- /* log_* values have to be sane */
- if (log_size < 128 || log_size > UINT_MAX >> 8 ||
- log_level == 0 || log_ubuf == NULL)
+ /* log attributes have to be sane */
+ if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
+ !log->level || !log->ubuf)
goto err_unlock;
-
- ret = -ENOMEM;
- log_buf = vmalloc(log_size);
- if (!log_buf)
- goto err_unlock;
- } else {
- log_level = 0;
}
env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
@@ -4385,17 +4371,11 @@ skip_full_check:
if (ret == 0)
ret = fixup_bpf_calls(env);
- if (log_level && log_len >= log_size - 1) {
- BUG_ON(log_len >= log_size);
- /* verifier log exceeded user supplied buffer */
+ if (log->level && bpf_verifier_log_full(log))
ret = -ENOSPC;
- /* fall through to return what was recorded */
- }
-
- /* copy verifier log back to user space including trailing zero */
- if (log_level && copy_to_user(log_ubuf, log_buf, log_len + 1) != 0) {
+ if (log->level && !log->ubuf) {
ret = -EFAULT;
- goto free_log_buf;
+ goto err_release_maps;
}
if (ret == 0 && env->used_map_cnt) {
@@ -4406,7 +4386,7 @@ skip_full_check:
if (!env->prog->aux->used_maps) {
ret = -ENOMEM;
- goto free_log_buf;
+ goto err_release_maps;
}
memcpy(env->prog->aux->used_maps, env->used_maps,
@@ -4419,9 +4399,7 @@ skip_full_check:
convert_pseudo_ld_imm64(env);
}
-free_log_buf:
- if (log_level)
- vfree(log_buf);
+err_release_maps:
if (!env->prog->aux->used_maps)
/* if we didn't copy map pointers into bpf_prog_info, release
* them now. Otherwise free_bpf_prog_info() will release them.
@@ -4436,12 +4414,21 @@ err_free_env:
return ret;
}
+static const struct bpf_verifier_ops * const bpf_analyzer_ops[] = {
+ [BPF_PROG_TYPE_XDP] = &xdp_analyzer_ops,
+ [BPF_PROG_TYPE_SCHED_CLS] = &tc_cls_act_analyzer_ops,
+};
+
int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
void *priv)
{
struct bpf_verifier_env *env;
int ret;
+ if (prog->type >= ARRAY_SIZE(bpf_analyzer_ops) ||
+ !bpf_analyzer_ops[prog->type])
+ return -EOPNOTSUPP;
+
env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
if (!env)
return -ENOMEM;
@@ -4452,14 +4439,13 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
if (!env->insn_aux_data)
goto err_free_env;
env->prog = prog;
+ env->ops = bpf_analyzer_ops[env->prog->type];
env->analyzer_ops = ops;
env->analyzer_priv = priv;
/* grab the mutex to protect few globals used by verifier */
mutex_lock(&bpf_verifier_lock);
- log_level = 0;
-
env->strict_alignment = false;
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
env->strict_alignment = true;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 44857278eb8a..00f5b358aeac 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -1896,6 +1896,9 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
if (ret)
goto destroy_root;
+ ret = cgroup_bpf_inherit(root_cgrp);
+ WARN_ON_ONCE(ret);
+
trace_cgroup_setup_root(root);
/*
@@ -4721,6 +4724,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
cgrp->self.parent = &parent->self;
cgrp->root = root;
cgrp->level = level;
+ ret = cgroup_bpf_inherit(cgrp);
+ if (ret)
+ goto out_idr_free;
for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
@@ -4755,13 +4761,12 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
if (!cgroup_on_dfl(cgrp))
cgrp->subtree_control = cgroup_control(cgrp);
- if (parent)
- cgroup_bpf_inherit(cgrp, parent);
-
cgroup_propagate_control(cgrp);
return cgrp;
+out_idr_free:
+ cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
out_cancel_ref:
percpu_ref_exit(&cgrp->self.refcnt);
out_free_cgrp:
@@ -5744,14 +5749,33 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
#endif /* CONFIG_SOCK_CGROUP_DATA */
#ifdef CONFIG_CGROUP_BPF
-int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog,
- enum bpf_attach_type type, bool overridable)
+int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
+ enum bpf_attach_type type, u32 flags)
+{
+ int ret;
+
+ mutex_lock(&cgroup_mutex);
+ ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
+ mutex_unlock(&cgroup_mutex);
+ return ret;
+}
+int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+ enum bpf_attach_type type, u32 flags)
+{
+ int ret;
+
+ mutex_lock(&cgroup_mutex);
+ ret = __cgroup_bpf_detach(cgrp, prog, type, flags);
+ mutex_unlock(&cgroup_mutex);
+ return ret;
+}
+int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
+ union bpf_attr __user *uattr)
{
- struct cgroup *parent = cgroup_parent(cgrp);
int ret;
mutex_lock(&cgroup_mutex);
- ret = __cgroup_bpf_update(cgrp, parent, prog, type, overridable);
+ ret = __cgroup_bpf_query(cgrp, attr, uattr);
mutex_unlock(&cgroup_mutex);
return ret;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9d93db81fa36..31ee304a5844 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3684,10 +3684,12 @@ static inline u64 perf_event_count(struct perf_event *event)
* will not be local and we cannot read them atomically
* - must not have a pmu::count method
*/
-int perf_event_read_local(struct perf_event *event, u64 *value)
+int perf_event_read_local(struct perf_event *event, u64 *value,
+ u64 *enabled, u64 *running)
{
unsigned long flags;
int ret = 0;
+ u64 now;
/*
* Disabling interrupts avoids all counter scheduling (context
@@ -3718,13 +3720,21 @@ int perf_event_read_local(struct perf_event *event, u64 *value)
goto out;
}
+ now = event->shadow_ctx_time + perf_clock();
+ if (enabled)
+ *enabled = now - event->tstamp_enabled;
/*
* If the event is currently on this CPU, its either a per-task event,
* or local to this CPU. Furthermore it means its ACTIVE (otherwise
* oncpu == -1).
*/
- if (event->oncpu == smp_processor_id())
+ if (event->oncpu == smp_processor_id()) {
event->pmu->read(event);
+ if (running)
+ *running = now - event->tstamp_running;
+ } else if (running) {
+ *running = event->total_time_running;
+ }
*value = local64_read(&event->count);
out:
@@ -8072,6 +8082,7 @@ static void bpf_overflow_handler(struct perf_event *event,
struct bpf_perf_event_data_kern ctx = {
.data = data,
.regs = regs,
+ .event = event,
};
int ret = 0;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index dc498b605d5d..3126da2f468a 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -255,14 +255,14 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
return &bpf_trace_printk_proto;
}
-BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
+static __always_inline int
+get_map_perf_counter(struct bpf_map *map, u64 flags,
+ u64 *value, u64 *enabled, u64 *running)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
unsigned int cpu = smp_processor_id();
u64 index = flags & BPF_F_INDEX_MASK;
struct bpf_event_entry *ee;
- u64 value = 0;
- int err;
if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
return -EINVAL;
@@ -275,7 +275,15 @@ BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
if (!ee)
return -ENOENT;
- err = perf_event_read_local(ee->event, &value);
+ return perf_event_read_local(ee->event, value, enabled, running);
+}
+
+BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
+{
+ u64 value = 0;
+ int err;
+
+ err = get_map_perf_counter(map, flags, &value, NULL, NULL);
/*
* this api is ugly since we miss [-22..-2] range of valid
* counter values, but that's uapi
@@ -293,6 +301,33 @@ static const struct bpf_func_proto bpf_perf_event_read_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags,
+ struct bpf_perf_event_value *, buf, u32, size)
+{
+ int err = -EINVAL;
+
+ if (unlikely(size != sizeof(struct bpf_perf_event_value)))
+ goto clear;
+ err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled,
+ &buf->running);
+ if (unlikely(err))
+ goto clear;
+ return 0;
+clear:
+ memset(buf, 0, size);
+ return err;
+}
+
+static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
+ .func = bpf_perf_event_read_value,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_CONST_MAP_PTR,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg4_type = ARG_CONST_SIZE,
+};
+
static DEFINE_PER_CPU(struct perf_sample_data, bpf_sd);
static __always_inline u64
@@ -499,6 +534,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
return &bpf_perf_event_output_proto;
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto;
+ case BPF_FUNC_perf_event_read_value:
+ return &bpf_perf_event_read_value_proto;
default:
return tracing_func_proto(func_id);
}
@@ -524,11 +561,14 @@ static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type
return true;
}
-const struct bpf_verifier_ops kprobe_prog_ops = {
+const struct bpf_verifier_ops kprobe_verifier_ops = {
.get_func_proto = kprobe_prog_func_proto,
.is_valid_access = kprobe_prog_is_valid_access,
};
+const struct bpf_prog_ops kprobe_prog_ops = {
+};
+
BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
u64, flags, void *, data, u64, size)
{
@@ -576,6 +616,32 @@ static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
.arg3_type = ARG_ANYTHING,
};
+BPF_CALL_3(bpf_perf_prog_read_value_tp, struct bpf_perf_event_data_kern *, ctx,
+ struct bpf_perf_event_value *, buf, u32, size)
+{
+ int err = -EINVAL;
+
+ if (unlikely(size != sizeof(struct bpf_perf_event_value)))
+ goto clear;
+ err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled,
+ &buf->running);
+ if (unlikely(err))
+ goto clear;
+ return 0;
+clear:
+ memset(buf, 0, size);
+ return err;
+}
+
+static const struct bpf_func_proto bpf_perf_prog_read_value_proto_tp = {
+ .func = bpf_perf_prog_read_value_tp,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg3_type = ARG_CONST_SIZE,
+};
+
static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
@@ -583,6 +649,8 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id)
return &bpf_perf_event_output_proto_tp;
case BPF_FUNC_get_stackid:
return &bpf_get_stackid_proto_tp;
+ case BPF_FUNC_perf_prog_read_value:
+ return &bpf_perf_prog_read_value_proto_tp;
default:
return tracing_func_proto(func_id);
}
@@ -602,11 +670,14 @@ static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type
return true;
}
-const struct bpf_verifier_ops tracepoint_prog_ops = {
+const struct bpf_verifier_ops tracepoint_verifier_ops = {
.get_func_proto = tp_prog_func_proto,
.is_valid_access = tp_prog_is_valid_access,
};
+const struct bpf_prog_ops tracepoint_prog_ops = {
+};
+
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
@@ -662,8 +733,11 @@ static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
-const struct bpf_verifier_ops perf_event_prog_ops = {
+const struct bpf_verifier_ops perf_event_verifier_ops = {
.get_func_proto = tp_prog_func_proto,
.is_valid_access = pe_prog_is_valid_access,
.convert_ctx_access = pe_prog_convert_ctx_access,
};
+
+const struct bpf_prog_ops perf_event_prog_ops = {
+};
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index f346715e2255..dbe61c4c2a97 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -127,12 +127,11 @@ void dql_reset(struct dql *dql)
}
EXPORT_SYMBOL(dql_reset);
-int dql_init(struct dql *dql, unsigned hold_time)
+void dql_init(struct dql *dql, unsigned int hold_time)
{
dql->max_limit = DQL_MAX_LIMIT;
dql->min_limit = 0;
dql->slack_hold_time = hold_time;
dql_reset(dql);
- return 0;
}
EXPORT_SYMBOL(dql_init);
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index f237a09a5862..c3e84edc47c9 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -294,6 +294,55 @@ static void cleanup_uevent_env(struct subprocess_info *info)
}
#endif
+static int kobject_uevent_net_broadcast(struct kobject *kobj,
+ struct kobj_uevent_env *env,
+ const char *action_string,
+ const char *devpath)
+{
+ int retval = 0;
+#if defined(CONFIG_NET)
+ struct sk_buff *skb = NULL;
+ struct uevent_sock *ue_sk;
+
+ /* send netlink message */
+ list_for_each_entry(ue_sk, &uevent_sock_list, list) {
+ struct sock *uevent_sock = ue_sk->sk;
+
+ if (!netlink_has_listeners(uevent_sock, 1))
+ continue;
+
+ if (!skb) {
+ /* allocate message with the maximum possible size */
+ size_t len = strlen(action_string) + strlen(devpath) + 2;
+ char *scratch;
+
+ retval = -ENOMEM;
+ skb = alloc_skb(len + env->buflen, GFP_KERNEL);
+ if (!skb)
+ continue;
+
+ /* add header */
+ scratch = skb_put(skb, len);
+ sprintf(scratch, "%s@%s", action_string, devpath);
+
+ skb_put_data(skb, env->buf, env->buflen);
+
+ NETLINK_CB(skb).dst_group = 1;
+ }
+
+ retval = netlink_broadcast_filtered(uevent_sock, skb_get(skb),
+ 0, 1, GFP_KERNEL,
+ kobj_bcast_filter,
+ kobj);
+ /* ENOBUFS should be handled in userspace */
+ if (retval == -ENOBUFS || retval == -ESRCH)
+ retval = 0;
+ }
+ consume_skb(skb);
+#endif
+ return retval;
+}
+
static void zap_modalias_env(struct kobj_uevent_env *env)
{
static const char modalias_prefix[] = "MODALIAS=";
@@ -336,9 +385,6 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
const struct kset_uevent_ops *uevent_ops;
int i = 0;
int retval = 0;
-#ifdef CONFIG_NET
- struct uevent_sock *ue_sk;
-#endif
pr_debug("kobject: '%s' (%p): %s\n",
kobject_name(kobj), kobj, __func__);
@@ -460,46 +506,8 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action,
mutex_unlock(&uevent_sock_mutex);
goto exit;
}
-
-#if defined(CONFIG_NET)
- /* send netlink message */
- list_for_each_entry(ue_sk, &uevent_sock_list, list) {
- struct sock *uevent_sock = ue_sk->sk;
- struct sk_buff *skb;
- size_t len;
-
- if (!netlink_has_listeners(uevent_sock, 1))
- continue;
-
- /* allocate message with the maximum possible size */
- len = strlen(action_string) + strlen(devpath) + 2;
- skb = alloc_skb(len + env->buflen, GFP_KERNEL);
- if (skb) {
- char *scratch;
-
- /* add header */
- scratch = skb_put(skb, len);
- sprintf(scratch, "%s@%s", action_string, devpath);
-
- /* copy keys to our continuous event payload buffer */
- for (i = 0; i < env->envp_idx; i++) {
- len = strlen(env->envp[i]) + 1;
- scratch = skb_put(skb, len);
- strcpy(scratch, env->envp[i]);
- }
-
- NETLINK_CB(skb).dst_group = 1;
- retval = netlink_broadcast_filtered(uevent_sock, skb,
- 0, 1, GFP_KERNEL,
- kobj_bcast_filter,
- kobj);
- /* ENOBUFS should be handled in userspace */
- if (retval == -ENOBUFS || retval == -ESRCH)
- retval = 0;
- } else
- retval = -ENOMEM;
- }
-#endif
+ retval = kobject_uevent_net_broadcast(kobj, env, action_string,
+ devpath);
mutex_unlock(&uevent_sock_mutex);
#ifdef CONFIG_UEVENT_HELPER
diff --git a/lib/once.c b/lib/once.c
index 05c8604627eb..831c5a6b0bb2 100644
--- a/lib/once.c
+++ b/lib/once.c
@@ -5,7 +5,7 @@
struct once_work {
struct work_struct work;
- struct static_key *key;
+ struct static_key_true *key;
};
static void once_deferred(struct work_struct *w)
@@ -14,11 +14,11 @@ static void once_deferred(struct work_struct *w)
work = container_of(w, struct once_work, work);
BUG_ON(!static_key_enabled(work->key));
- static_key_slow_dec(work->key);
+ static_branch_disable(work->key);
kfree(work);
}
-static void once_disable_jump(struct static_key *key)
+static void once_disable_jump(struct static_key_true *key)
{
struct once_work *w;
@@ -51,7 +51,7 @@ bool __do_once_start(bool *done, unsigned long *flags)
}
EXPORT_SYMBOL(__do_once_start);
-void __do_once_done(bool *done, struct static_key *once_key,
+void __do_once_done(bool *done, struct static_key_true *once_key,
unsigned long *flags)
__releases(once_lock)
{
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 0ffca990a833..8e83cbdc049c 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -23,14 +23,15 @@
#include <linux/semaphore.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/random.h>
#include <linux/vmalloc.h>
#define MAX_ENTRIES 1000000
#define TEST_INSERT_FAIL INT_MAX
-static int entries = 50000;
-module_param(entries, int, 0);
-MODULE_PARM_DESC(entries, "Number of entries to add (default: 50000)");
+static int parm_entries = 50000;
+module_param(parm_entries, int, 0);
+MODULE_PARM_DESC(parm_entries, "Number of entries to add (default: 50000)");
static int runs = 4;
module_param(runs, int, 0);
@@ -66,14 +67,18 @@ struct test_obj {
struct rhash_head node;
};
+struct test_obj_rhl {
+ struct test_obj_val value;
+ struct rhlist_head list_node;
+};
+
struct thread_data {
+ unsigned int entries;
int id;
struct task_struct *task;
struct test_obj *objs;
};
-static struct test_obj array[MAX_ENTRIES];
-
static struct rhashtable_params test_rht_params = {
.head_offset = offsetof(struct test_obj, node),
.key_offset = offsetof(struct test_obj, value),
@@ -85,7 +90,7 @@ static struct rhashtable_params test_rht_params = {
static struct semaphore prestart_sem;
static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0);
-static int insert_retry(struct rhashtable *ht, struct rhash_head *obj,
+static int insert_retry(struct rhashtable *ht, struct test_obj *obj,
const struct rhashtable_params params)
{
int err, retries = -1, enomem_retries = 0;
@@ -93,7 +98,7 @@ static int insert_retry(struct rhashtable *ht, struct rhash_head *obj,
do {
retries++;
cond_resched();
- err = rhashtable_insert_fast(ht, obj, params);
+ err = rhashtable_insert_fast(ht, &obj->node, params);
if (err == -ENOMEM && enomem_retry) {
enomem_retries++;
err = -EBUSY;
@@ -107,11 +112,12 @@ static int insert_retry(struct rhashtable *ht, struct rhash_head *obj,
return err ? : retries;
}
-static int __init test_rht_lookup(struct rhashtable *ht)
+static int __init test_rht_lookup(struct rhashtable *ht, struct test_obj *array,
+ unsigned int entries)
{
unsigned int i;
- for (i = 0; i < entries * 2; i++) {
+ for (i = 0; i < entries; i++) {
struct test_obj *obj;
bool expected = !(i % 2);
struct test_obj_val key = {
@@ -144,7 +150,7 @@ static int __init test_rht_lookup(struct rhashtable *ht)
return 0;
}
-static void test_bucket_stats(struct rhashtable *ht)
+static void test_bucket_stats(struct rhashtable *ht, unsigned int entries)
{
unsigned int err, total = 0, chain_len = 0;
struct rhashtable_iter hti;
@@ -186,7 +192,8 @@ static void test_bucket_stats(struct rhashtable *ht)
pr_warn("Test failed: Total count mismatch ^^^");
}
-static s64 __init test_rhashtable(struct rhashtable *ht)
+static s64 __init test_rhashtable(struct rhashtable *ht, struct test_obj *array,
+ unsigned int entries)
{
struct test_obj *obj;
int err;
@@ -203,7 +210,7 @@ static s64 __init test_rhashtable(struct rhashtable *ht)
struct test_obj *obj = &array[i];
obj->value.id = i * 2;
- err = insert_retry(ht, &obj->node, test_rht_params);
+ err = insert_retry(ht, obj, test_rht_params);
if (err > 0)
insert_retries += err;
else if (err)
@@ -214,12 +221,12 @@ static s64 __init test_rhashtable(struct rhashtable *ht)
pr_info(" %u insertions retried due to memory pressure\n",
insert_retries);
- test_bucket_stats(ht);
+ test_bucket_stats(ht, entries);
rcu_read_lock();
- test_rht_lookup(ht);
+ test_rht_lookup(ht, array, entries);
rcu_read_unlock();
- test_bucket_stats(ht);
+ test_bucket_stats(ht, entries);
pr_info(" Deleting %d keys\n", entries);
for (i = 0; i < entries; i++) {
@@ -244,9 +251,227 @@ static s64 __init test_rhashtable(struct rhashtable *ht)
}
static struct rhashtable ht;
+static struct rhltable rhlt;
+
+static int __init test_rhltable(unsigned int entries)
+{
+ struct test_obj_rhl *rhl_test_objects;
+ unsigned long *obj_in_table;
+ unsigned int i, j, k;
+ int ret, err;
+
+ if (entries == 0)
+ entries = 1;
+
+ rhl_test_objects = vzalloc(sizeof(*rhl_test_objects) * entries);
+ if (!rhl_test_objects)
+ return -ENOMEM;
+
+ ret = -ENOMEM;
+ obj_in_table = vzalloc(BITS_TO_LONGS(entries) * sizeof(unsigned long));
+ if (!obj_in_table)
+ goto out_free;
+
+ /* nulls_base not supported in rhlist interface */
+ test_rht_params.nulls_base = 0;
+ err = rhltable_init(&rhlt, &test_rht_params);
+ if (WARN_ON(err))
+ goto out_free;
+
+ k = prandom_u32();
+ ret = 0;
+ for (i = 0; i < entries; i++) {
+ rhl_test_objects[i].value.id = k;
+ err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node,
+ test_rht_params);
+ if (WARN(err, "error %d on element %d\n", err, i))
+ break;
+ if (err == 0)
+ set_bit(i, obj_in_table);
+ }
+
+ if (err)
+ ret = err;
+
+ pr_info("test %d add/delete pairs into rhlist\n", entries);
+ for (i = 0; i < entries; i++) {
+ struct rhlist_head *h, *pos;
+ struct test_obj_rhl *obj;
+ struct test_obj_val key = {
+ .id = k,
+ };
+ bool found;
+
+ rcu_read_lock();
+ h = rhltable_lookup(&rhlt, &key, test_rht_params);
+ if (WARN(!h, "key not found during iteration %d of %d", i, entries)) {
+ rcu_read_unlock();
+ break;
+ }
+
+ if (i) {
+ j = i - 1;
+ rhl_for_each_entry_rcu(obj, pos, h, list_node) {
+ if (WARN(pos == &rhl_test_objects[j].list_node, "old element found, should be gone"))
+ break;
+ }
+ }
+
+ cond_resched_rcu();
+
+ found = false;
+
+ rhl_for_each_entry_rcu(obj, pos, h, list_node) {
+ if (pos == &rhl_test_objects[i].list_node) {
+ found = true;
+ break;
+ }
+ }
+
+ rcu_read_unlock();
+
+ if (WARN(!found, "element %d not found", i))
+ break;
+
+ err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
+ WARN(err, "rhltable_remove: err %d for iteration %d\n", err, i);
+ if (err == 0)
+ clear_bit(i, obj_in_table);
+ }
+
+ if (ret == 0 && err)
+ ret = err;
+
+ for (i = 0; i < entries; i++) {
+ WARN(test_bit(i, obj_in_table), "elem %d allegedly still present", i);
+
+ err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node,
+ test_rht_params);
+ if (WARN(err, "error %d on element %d\n", err, i))
+ break;
+ if (err == 0)
+ set_bit(i, obj_in_table);
+ }
+
+ pr_info("test %d random rhlist add/delete operations\n", entries);
+ for (j = 0; j < entries; j++) {
+ u32 i = prandom_u32_max(entries);
+ u32 prand = prandom_u32();
+
+ cond_resched();
+
+ if (prand == 0)
+ prand = prandom_u32();
+
+ if (prand & 1) {
+ prand >>= 1;
+ continue;
+ }
+
+ err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
+ if (test_bit(i, obj_in_table)) {
+ clear_bit(i, obj_in_table);
+ if (WARN(err, "cannot remove element at slot %d", i))
+ continue;
+ } else {
+ if (WARN(err != -ENOENT, "removed non-existant element %d, error %d not %d",
+ i, err, -ENOENT))
+ continue;
+ }
+
+ if (prand & 1) {
+ prand >>= 1;
+ continue;
+ }
+
+ err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
+ if (err == 0) {
+ if (WARN(test_and_set_bit(i, obj_in_table), "succeeded to insert same object %d", i))
+ continue;
+ } else {
+ if (WARN(!test_bit(i, obj_in_table), "failed to insert object %d", i))
+ continue;
+ }
+
+ if (prand & 1) {
+ prand >>= 1;
+ continue;
+ }
+
+ i = prandom_u32_max(entries);
+ if (test_bit(i, obj_in_table)) {
+ err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
+ WARN(err, "cannot remove element at slot %d", i);
+ if (err == 0)
+ clear_bit(i, obj_in_table);
+ } else {
+ err = rhltable_insert(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
+ WARN(err, "failed to insert object %d", i);
+ if (err == 0)
+ set_bit(i, obj_in_table);
+ }
+ }
+
+ for (i = 0; i < entries; i++) {
+ cond_resched();
+ err = rhltable_remove(&rhlt, &rhl_test_objects[i].list_node, test_rht_params);
+ if (test_bit(i, obj_in_table)) {
+ if (WARN(err, "cannot remove element at slot %d", i))
+ continue;
+ } else {
+ if (WARN(err != -ENOENT, "removed non-existant element, error %d not %d",
+ err, -ENOENT))
+ continue;
+ }
+ }
+
+ rhltable_destroy(&rhlt);
+out_free:
+ vfree(rhl_test_objects);
+ vfree(obj_in_table);
+ return ret;
+}
+
+static int __init test_rhashtable_max(struct test_obj *array,
+ unsigned int entries)
+{
+ unsigned int i, insert_retries = 0;
+ int err;
+
+ test_rht_params.max_size = roundup_pow_of_two(entries / 8);
+ err = rhashtable_init(&ht, &test_rht_params);
+ if (err)
+ return err;
+
+ for (i = 0; i < ht.max_elems; i++) {
+ struct test_obj *obj = &array[i];
+
+ obj->value.id = i * 2;
+ err = insert_retry(&ht, obj, test_rht_params);
+ if (err > 0)
+ insert_retries += err;
+ else if (err)
+ return err;
+ }
+
+ err = insert_retry(&ht, &array[ht.max_elems], test_rht_params);
+ if (err == -E2BIG) {
+ err = 0;
+ } else {
+ pr_info("insert element %u should have failed with %d, got %d\n",
+ ht.max_elems, -E2BIG, err);
+ if (err == 0)
+ err = -1;
+ }
+
+ rhashtable_destroy(&ht);
+
+ return err;
+}
static int thread_lookup_test(struct thread_data *tdata)
{
+ unsigned int entries = tdata->entries;
int i, err = 0;
for (i = 0; i < entries; i++) {
@@ -283,10 +508,10 @@ static int threadfunc(void *data)
if (down_interruptible(&startup_sem))
pr_err(" thread[%d]: down_interruptible failed\n", tdata->id);
- for (i = 0; i < entries; i++) {
+ for (i = 0; i < tdata->entries; i++) {
tdata->objs[i].value.id = i;
tdata->objs[i].value.tid = tdata->id;
- err = insert_retry(&ht, &tdata->objs[i].node, test_rht_params);
+ err = insert_retry(&ht, &tdata->objs[i], test_rht_params);
if (err > 0) {
insert_retries += err;
} else if (err) {
@@ -307,7 +532,7 @@ static int threadfunc(void *data)
}
for (step = 10; step > 0; step--) {
- for (i = 0; i < entries; i += step) {
+ for (i = 0; i < tdata->entries; i += step) {
if (tdata->objs[i].value.id == TEST_INSERT_FAIL)
continue;
err = rhashtable_remove_fast(&ht, &tdata->objs[i].node,
@@ -338,17 +563,25 @@ out:
static int __init test_rht_init(void)
{
+ unsigned int entries;
int i, err, started_threads = 0, failed_threads = 0;
u64 total_time = 0;
struct thread_data *tdata;
struct test_obj *objs;
- entries = min(entries, MAX_ENTRIES);
+ if (parm_entries < 0)
+ parm_entries = 1;
+
+ entries = min(parm_entries, MAX_ENTRIES);
test_rht_params.automatic_shrinking = shrinking;
test_rht_params.max_size = max_size ? : roundup_pow_of_two(entries);
test_rht_params.nelem_hint = size;
+ objs = vzalloc((test_rht_params.max_size + 1) * sizeof(struct test_obj));
+ if (!objs)
+ return -ENOMEM;
+
pr_info("Running rhashtable test nelem=%d, max_size=%d, shrinking=%d\n",
size, max_size, shrinking);
@@ -356,7 +589,8 @@ static int __init test_rht_init(void)
s64 time;
pr_info("Test %02d:\n", i);
- memset(&array, 0, sizeof(array));
+ memset(objs, 0, test_rht_params.max_size * sizeof(struct test_obj));
+
err = rhashtable_init(&ht, &test_rht_params);
if (err < 0) {
pr_warn("Test failed: Unable to initialize hashtable: %d\n",
@@ -364,9 +598,10 @@ static int __init test_rht_init(void)
continue;
}
- time = test_rhashtable(&ht);
+ time = test_rhashtable(&ht, objs, entries);
rhashtable_destroy(&ht);
if (time < 0) {
+ vfree(objs);
pr_warn("Test failed: return code %lld\n", time);
return -EINVAL;
}
@@ -374,6 +609,11 @@ static int __init test_rht_init(void)
total_time += time;
}
+ pr_info("test if its possible to exceed max_size %d: %s\n",
+ test_rht_params.max_size, test_rhashtable_max(objs, entries) == 0 ?
+ "no, ok" : "YES, failed");
+ vfree(objs);
+
do_div(total_time, runs);
pr_info("Average test time: %llu\n", total_time);
@@ -404,6 +644,7 @@ static int __init test_rht_init(void)
}
for (i = 0; i < tcount; i++) {
tdata[i].id = i;
+ tdata[i].entries = entries;
tdata[i].objs = objs + i * entries;
tdata[i].task = kthread_run(threadfunc, &tdata[i],
"rhashtable_thrad[%d]", i);
@@ -425,11 +666,17 @@ static int __init test_rht_init(void)
failed_threads++;
}
}
- pr_info("Started %d threads, %d failed\n",
- started_threads, failed_threads);
rhashtable_destroy(&ht);
vfree(tdata);
vfree(objs);
+
+ /*
+ * rhltable_remove is very expensive, default values can cause test
+ * to run for 2 minutes or more, use a smaller number instead.
+ */
+ err = test_rhltable(entries / 16);
+ pr_info("Started %d threads, %d failed, rhltable test returns %d\n",
+ started_threads, failed_threads, err);
return 0;
}
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 9649579b5b9f..71c3e045505b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -138,7 +138,7 @@ int vlan_check_real_dev(struct net_device *real_dev,
return 0;
}
-int register_vlan_dev(struct net_device *dev)
+int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack)
{
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
struct net_device *real_dev = vlan->real_dev;
@@ -174,7 +174,7 @@ int register_vlan_dev(struct net_device *dev)
if (err < 0)
goto out_uninit_mvrp;
- err = netdev_upper_dev_link(real_dev, dev);
+ err = netdev_upper_dev_link(real_dev, dev, extack);
if (err)
goto out_unregister_netdev;
@@ -270,7 +270,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
vlan->flags = VLAN_FLAG_REORDER_HDR;
new_dev->rtnl_link_ops = &vlan_link_ops;
- err = register_vlan_dev(new_dev);
+ err = register_vlan_dev(new_dev, NULL);
if (err < 0)
goto out_free_newdev;
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index df8bd65dd370..94f8eed9f9b3 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -107,7 +107,7 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
int vlan_check_real_dev(struct net_device *real_dev,
__be16 protocol, u16 vlan_id);
void vlan_setup(struct net_device *dev);
-int register_vlan_dev(struct net_device *dev);
+int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
bool vlan_dev_inherit_address(struct net_device *dev,
struct net_device *real_dev);
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 5e831de3103e..6e7c5a6a7930 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -160,7 +160,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
if (err < 0)
return err;
- return register_vlan_dev(dev);
+ return register_vlan_dev(dev, extack);
}
static inline size_t vlan_qos_map_size(unsigned int n)
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 65f706e4344c..d4f6029d5109 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -153,7 +153,7 @@ static int neigh_check_cb(struct neighbour *n)
return 1;
}
-static void idle_timer_check(unsigned long dummy)
+static void idle_timer_check(struct timer_list *unused)
{
write_lock(&arp_tbl.lock);
__neigh_for_each_release(&arp_tbl, neigh_check_cb);
@@ -887,7 +887,7 @@ static int __init atm_clip_init(void)
register_netdevice_notifier(&clip_dev_notifier);
register_inetaddr_notifier(&clip_inet_notifier);
- setup_timer(&idle_timer, idle_timer_check, 0);
+ timer_setup(&idle_timer, idle_timer_check, 0);
#ifdef CONFIG_PROC_FS
{
diff --git a/net/atm/lec.c b/net/atm/lec.c
index a3d93a1bb133..c976196da3ea 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1232,7 +1232,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
#define LEC_ARP_REFRESH_INTERVAL (3*HZ)
static void lec_arp_check_expire(struct work_struct *work);
-static void lec_arp_expire_arp(unsigned long data);
+static void lec_arp_expire_arp(struct timer_list *t);
/*
* Arp table funcs
@@ -1559,8 +1559,7 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
}
ether_addr_copy(to_return->mac_addr, mac_addr);
INIT_HLIST_NODE(&to_return->next);
- setup_timer(&to_return->timer, lec_arp_expire_arp,
- (unsigned long)to_return);
+ timer_setup(&to_return->timer, lec_arp_expire_arp, 0);
to_return->last_used = jiffies;
to_return->priv = priv;
skb_queue_head_init(&to_return->tx_wait);
@@ -1569,11 +1568,11 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv,
}
/* Arp sent timer expired */
-static void lec_arp_expire_arp(unsigned long data)
+static void lec_arp_expire_arp(struct timer_list *t)
{
struct lec_arp_table *entry;
- entry = (struct lec_arp_table *)data;
+ entry = from_timer(entry, t, timer);
pr_debug("\n");
if (entry->status == ESI_ARP_PENDING) {
@@ -1591,10 +1590,10 @@ static void lec_arp_expire_arp(unsigned long data)
}
/* Unknown/unused vcc expire, remove associated entry */
-static void lec_arp_expire_vcc(unsigned long data)
+static void lec_arp_expire_vcc(struct timer_list *t)
{
unsigned long flags;
- struct lec_arp_table *to_remove = (struct lec_arp_table *)data;
+ struct lec_arp_table *to_remove = from_timer(to_remove, t, timer);
struct lec_priv *priv = to_remove->priv;
del_timer(&to_remove->timer);
@@ -1799,7 +1798,7 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv,
else
send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL);
entry->timer.expires = jiffies + (1 * HZ);
- entry->timer.function = lec_arp_expire_arp;
+ entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_arp;
add_timer(&entry->timer);
found = priv->mcast_vcc;
}
@@ -1999,7 +1998,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
entry->old_recv_push = old_push;
entry->status = ESI_UNKNOWN;
entry->timer.expires = jiffies + priv->vcc_timeout_period;
- entry->timer.function = lec_arp_expire_vcc;
+ entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc;
hlist_add_head(&entry->next, &priv->lec_no_forward);
add_timer(&entry->timer);
dump_arp_table(priv);
@@ -2083,7 +2082,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data,
entry->status = ESI_UNKNOWN;
hlist_add_head(&entry->next, &priv->lec_arp_empty_ones);
entry->timer.expires = jiffies + priv->vcc_timeout_period;
- entry->timer.function = lec_arp_expire_vcc;
+ entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc;
add_timer(&entry->timer);
pr_debug("After vcc was added\n");
dump_arp_table(priv);
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 5677147209e8..b43d99430eb6 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -799,7 +799,6 @@ static int atm_mpoa_mpoad_attach(struct atm_vcc *vcc, int arg)
int err;
if (mpcs == NULL) {
- init_timer(&mpc_timer);
mpc_timer_refresh();
/* This lets us now how our LECs are doing */
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 83ba5483455a..1b659ab652fb 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -916,8 +916,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface)
u16 tvlv_len = 0;
unsigned long send_time;
- if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
- (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
+ if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+ hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
return;
/* the interface gets activated here to avoid race conditions between
@@ -1264,7 +1264,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node,
* drops as they can't send and receive at the same time.
*/
tq_iface_penalty = BATADV_TQ_MAX_VALUE;
- if (if_outgoing && (if_incoming == if_outgoing) &&
+ if (if_outgoing && if_incoming == if_outgoing &&
batadv_is_wifi_hardif(if_outgoing))
tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE,
bat_priv);
@@ -1369,7 +1369,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr,
ret = BATADV_NEIGH_DUP;
} else {
set_mark = 0;
- if (is_dup && (ret != BATADV_NEIGH_DUP))
+ if (is_dup && ret != BATADV_NEIGH_DUP)
ret = BATADV_ORIG_DUP;
}
@@ -1515,7 +1515,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
/* drop packet if sender is not a direct neighbor and if we
* don't route towards it
*/
- if (!is_single_hop_neigh && (!orig_neigh_router)) {
+ if (!is_single_hop_neigh && !orig_neigh_router) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Drop packet: OGM via unknown neighbor!\n");
goto out_neigh;
@@ -1535,7 +1535,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno);
similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl;
- if (is_bidirect && ((dup_status == BATADV_NO_DUP) ||
+ if (is_bidirect && (dup_status == BATADV_NO_DUP ||
(sameseq && similar_ttl))) {
batadv_iv_ogm_orig_update(bat_priv, orig_node,
orig_ifinfo, ethhdr,
@@ -1553,8 +1553,8 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
/* OGMs from secondary interfaces should only scheduled once
* per interface where it has been received, not multiple times
*/
- if ((ogm_packet->ttl <= 2) &&
- (if_incoming != if_outgoing)) {
+ if (ogm_packet->ttl <= 2 &&
+ if_incoming != if_outgoing) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Drop packet: OGM from secondary interface and wrong outgoing interface\n");
goto out_neigh;
@@ -1590,7 +1590,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset,
if_incoming, if_outgoing);
out_neigh:
- if ((orig_neigh_node) && (!is_single_hop_neigh))
+ if (orig_neigh_node && !is_single_hop_neigh)
batadv_orig_node_put(orig_neigh_node);
out:
if (router_ifinfo)
@@ -2523,9 +2523,9 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
tmp_gw_factor *= 100 * 100;
tmp_gw_factor >>= 18;
- if ((tmp_gw_factor > max_gw_factor) ||
- ((tmp_gw_factor == max_gw_factor) &&
- (tq_avg > max_tq))) {
+ if (tmp_gw_factor > max_gw_factor ||
+ (tmp_gw_factor == max_gw_factor &&
+ tq_avg > max_tq)) {
if (curr_gw)
batadv_gw_node_put(curr_gw);
curr_gw = gw_node;
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index 4e2724c5b33d..93ef1c06227e 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -767,7 +767,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv)
if (batadv_v_gw_throughput_get(gw_node, &bw) < 0)
goto next;
- if (curr_gw && (bw <= max_bw))
+ if (curr_gw && bw <= max_bw)
goto next;
if (curr_gw)
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index bd1064d98e16..1de992c58b35 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -134,7 +134,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
throughput = link_settings.base.speed;
- if (throughput && (throughput != SPEED_UNKNOWN))
+ if (throughput && throughput != SPEED_UNKNOWN)
return throughput * 10;
}
@@ -263,8 +263,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work)
goto out;
/* we are in the process of shutting this interface down */
- if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) ||
- (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED))
+ if (hard_iface->if_status == BATADV_IF_NOT_IN_USE ||
+ hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)
goto out;
/* the interface was enabled but may not be ready yet */
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 8be61734fc43..c251445a42a0 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -304,8 +304,8 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv,
* due to the store & forward characteristics of WIFI.
* Very low throughput values are the exception.
*/
- if ((throughput > 10) &&
- (if_incoming == if_outgoing) &&
+ if (throughput > 10 &&
+ if_incoming == if_outgoing &&
!(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX))
return throughput / 2;
@@ -455,7 +455,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv,
/* drop packets with old seqnos, however accept the first packet after
* a host has been rebooted.
*/
- if ((seq_diff < 0) && !protection_started)
+ if (seq_diff < 0 && !protection_started)
goto out;
neigh_node->last_seen = jiffies;
@@ -568,8 +568,8 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv,
router_throughput = router_ifinfo->bat_v.throughput;
neigh_throughput = neigh_ifinfo->bat_v.throughput;
- if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) &&
- (router_throughput >= neigh_throughput))
+ if (neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF &&
+ router_throughput >= neigh_throughput)
goto out;
}
@@ -621,7 +621,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
return;
/* only unknown & newer OGMs contain TVLVs we are interested in */
- if ((seqno_age > 0) && (if_outgoing == BATADV_IF_DEFAULT))
+ if (seqno_age > 0 && if_outgoing == BATADV_IF_DEFAULT)
batadv_tvlv_containers_process(bat_priv, true, orig_node,
NULL, NULL,
(unsigned char *)(ogm2 + 1),
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index b6cfa78e9381..760c0de72582 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -492,8 +492,8 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
/* this is an hash collision with the temporary selected node. Choose
* the one with the lowest address
*/
- if ((tmp_max == max) && max_orig_node &&
- (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0))
+ if (tmp_max == max && max_orig_node &&
+ batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)
goto out;
ret = true;
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index de9955d5224d..10d521f0b17f 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -248,12 +248,12 @@ void batadv_gw_election(struct batadv_priv *bat_priv)
}
}
- if ((curr_gw) && (!next_gw)) {
+ if (curr_gw && !next_gw) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Removing selected gateway - no gateway in range\n");
batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL,
NULL);
- } else if ((!curr_gw) && (next_gw)) {
+ } else if (!curr_gw && next_gw) {
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n",
next_gw->orig_node->orig,
@@ -411,8 +411,8 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv,
goto out;
}
- if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) &&
- (gw_node->bandwidth_up == ntohl(gateway->bandwidth_up)))
+ if (gw_node->bandwidth_down == ntohl(gateway->bandwidth_down) &&
+ gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))
goto out;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 33940c5c74a8..2c26039c23fc 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -56,8 +56,8 @@ bool batadv_parse_throughput(struct net_device *net_dev, char *buff,
if (strncasecmp(tmp_ptr, "mbit", 4) == 0)
bw_unit_type = BATADV_BW_UNIT_MBIT;
- if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) ||
- (bw_unit_type == BATADV_BW_UNIT_MBIT))
+ if (strncasecmp(tmp_ptr, "kbit", 4) == 0 ||
+ bw_unit_type == BATADV_BW_UNIT_MBIT)
*tmp_ptr = '\0';
}
@@ -190,7 +190,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff,
if (!up_new)
up_new = 1;
- if ((down_curr == down_new) && (up_curr == up_new))
+ if (down_curr == down_new && up_curr == up_new)
return count;
batadv_gw_reselect(bat_priv);
@@ -224,16 +224,16 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
/* only fetch the tvlv value if the handler wasn't called via the
* CIFNOTFND flag and if there is data to fetch
*/
- if ((flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) ||
- (tvlv_value_len < sizeof(gateway))) {
+ if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND ||
+ tvlv_value_len < sizeof(gateway)) {
gateway.bandwidth_down = 0;
gateway.bandwidth_up = 0;
} else {
gateway_ptr = tvlv_value;
gateway.bandwidth_down = gateway_ptr->bandwidth_down;
gateway.bandwidth_up = gateway_ptr->bandwidth_up;
- if ((gateway.bandwidth_down == 0) ||
- (gateway.bandwidth_up == 0)) {
+ if (gateway.bandwidth_down == 0 ||
+ gateway.bandwidth_up == 0) {
gateway.bandwidth_down = 0;
gateway.bandwidth_up = 0;
}
@@ -242,8 +242,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
batadv_gw_node_update(bat_priv, orig, &gateway);
/* restart gateway selection */
- if ((gateway.bandwidth_down != 0) &&
- (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT))
+ if (gateway.bandwidth_down != 0 &&
+ atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)
batadv_gw_check_election(bat_priv, orig);
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index e348f76ea8c1..4e3d5340ad96 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -504,8 +504,8 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev)
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
- (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+ if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+ hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
continue;
if (hard_iface->net_dev == net_dev)
@@ -568,8 +568,8 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
- if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
- (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+ if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+ hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
continue;
if (hard_iface->soft_iface != soft_iface)
@@ -654,8 +654,8 @@ out:
static void
batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface)
{
- if ((hard_iface->if_status != BATADV_IF_ACTIVE) &&
- (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED))
+ if (hard_iface->if_status != BATADV_IF_ACTIVE &&
+ hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)
return;
hard_iface->if_status = BATADV_IF_INACTIVE;
@@ -738,7 +738,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface,
bat_priv = netdev_priv(hard_iface->soft_iface);
ret = netdev_master_upper_dev_link(hard_iface->net_dev,
- soft_iface, NULL, NULL);
+ soft_iface, NULL, NULL, NULL);
if (ret)
goto err_dev;
diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index 8ead292886d1..bded31121d12 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -132,10 +132,10 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf,
size_t packet_len;
int error;
- if ((file->f_flags & O_NONBLOCK) && (socket_client->queue_len == 0))
+ if ((file->f_flags & O_NONBLOCK) && socket_client->queue_len == 0)
return -EAGAIN;
- if ((!buf) || (count < sizeof(struct batadv_icmp_packet)))
+ if (!buf || count < sizeof(struct batadv_icmp_packet))
return -EINVAL;
if (!access_ok(VERIFY_WRITE, buf, count))
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index fb381fb26a66..4daed7ad46f2 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -73,8 +73,8 @@
* list traversals just rcu-locked
*/
struct list_head batadv_hardif_list;
-static int (*batadv_rx_handler[256])(struct sk_buff *,
- struct batadv_hard_iface *);
+static int (*batadv_rx_handler[256])(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if);
unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
@@ -540,12 +540,12 @@ batadv_recv_handler_register(u8 packet_type,
int (*recv_handler)(struct sk_buff *,
struct batadv_hard_iface *))
{
- int (*curr)(struct sk_buff *,
- struct batadv_hard_iface *);
+ int (*curr)(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if);
curr = batadv_rx_handler[packet_type];
- if ((curr != batadv_recv_unhandled_packet) &&
- (curr != batadv_recv_unhandled_unicast_packet))
+ if (curr != batadv_recv_unhandled_packet &&
+ curr != batadv_recv_unhandled_unicast_packet)
return -EBUSY;
batadv_rx_handler[packet_type] = recv_handler;
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 05cc7637c064..edb2f239d04d 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -24,7 +24,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2017.3"
+#define BATADV_SOURCE_VERSION "2017.4"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index d327670641ac..e553a8770a89 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -1126,7 +1126,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
bool orig_initialized;
if (orig_mcast_enabled && tvlv_value &&
- (tvlv_value_len >= sizeof(mcast_flags)))
+ tvlv_value_len >= sizeof(mcast_flags))
mcast_flags = *(u8 *)tvlv_value;
spin_lock_bh(&orig->mcast_handler_lock);
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 8e2a4b205257..2967b86c13da 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -1062,9 +1062,9 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv,
continue;
/* don't purge if the interface is not (going) down */
- if ((if_outgoing->if_status != BATADV_IF_INACTIVE) &&
- (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) &&
- (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED))
+ if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
+ if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
+ if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
continue;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1106,9 +1106,9 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv,
continue;
/* don't purge if the interface is not (going) down */
- if ((if_outgoing->if_status != BATADV_IF_INACTIVE) &&
- (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) &&
- (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED))
+ if (if_outgoing->if_status != BATADV_IF_INACTIVE &&
+ if_outgoing->if_status != BATADV_IF_NOT_IN_USE &&
+ if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)
continue;
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
@@ -1155,13 +1155,13 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv,
last_seen = neigh_node->last_seen;
if_incoming = neigh_node->if_incoming;
- if ((batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT)) ||
- (if_incoming->if_status == BATADV_IF_INACTIVE) ||
- (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
- (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) {
- if ((if_incoming->if_status == BATADV_IF_INACTIVE) ||
- (if_incoming->if_status == BATADV_IF_NOT_IN_USE) ||
- (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED))
+ if (batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT) ||
+ if_incoming->if_status == BATADV_IF_INACTIVE ||
+ if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
+ if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) {
+ if (if_incoming->if_status == BATADV_IF_INACTIVE ||
+ if_incoming->if_status == BATADV_IF_NOT_IN_USE ||
+ if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)
batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
"neighbor purge: originator %pM, neighbor: %pM, iface: %s\n",
orig_node->orig, neigh_node->addr,
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index f10e3ff26f9d..40d9bf3e5bfe 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -93,14 +93,14 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
batadv_orig_ifinfo_put(orig_ifinfo);
/* route deleted */
- if ((curr_router) && (!neigh_node)) {
+ if (curr_router && !neigh_node) {
batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
"Deleting route towards: %pM\n", orig_node->orig);
batadv_tt_global_del_orig(bat_priv, orig_node, -1,
"Deleted route towards originator");
/* route added */
- } else if ((!curr_router) && (neigh_node)) {
+ } else if (!curr_router && neigh_node) {
batadv_dbg(BATADV_DBG_ROUTES, bat_priv,
"Adding route towards: %pM (via %pM)\n",
orig_node->orig, neigh_node->addr);
@@ -381,7 +381,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb,
/* add record route information if not full */
if ((icmph->msg_type == BATADV_ECHO_REPLY ||
icmph->msg_type == BATADV_ECHO_REQUEST) &&
- (skb->len >= sizeof(struct batadv_icmp_packet_rr))) {
+ skb->len >= sizeof(struct batadv_icmp_packet_rr)) {
if (skb_linearize(skb) < 0)
goto free_skb;
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 054a65e6eb68..7895323fd2a7 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -142,7 +142,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb,
#ifdef CONFIG_BATMAN_ADV_BATMAN_V
hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr);
- if ((hardif_neigh) && (ret != NET_XMIT_DROP))
+ if (hardif_neigh && ret != NET_XMIT_DROP)
hardif_neigh->bat_v.last_unicast_tx = jiffies;
if (hardif_neigh)
@@ -615,8 +615,8 @@ batadv_forw_packet_list_steal(struct hlist_head *forw_list,
* we delete only packets belonging to the given interface
*/
if (hard_iface &&
- (forw_packet->if_incoming != hard_iface) &&
- (forw_packet->if_outgoing != hard_iface))
+ forw_packet->if_incoming != hard_iface &&
+ forw_packet->if_outgoing != hard_iface)
continue;
hlist_del(&forw_packet->list);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 10f7edfb176e..543d2c3e0f0d 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -69,8 +69,8 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len)
int result;
/* TODO: We must check if we can release all references to non-payload
- * data using skb_header_release in our skbs to allow skb_cow_header to
- * work optimally. This means that those skbs are not allowed to read
+ * data using __skb_header_release in our skbs to allow skb_cow_header
+ * to work optimally. This means that those skbs are not allowed to read
* or write any data which is before the current position of skb->data
* after that call and thus allow other skbs with the same data buffer
* to write freely in that area.
@@ -160,7 +160,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
{
/* check ranges */
- if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev)))
+ if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
return -EINVAL;
dev->mtu = new_mtu;
@@ -867,7 +867,8 @@ free_bat_counters:
* Return: 0 if successful or error otherwise.
*/
static int batadv_softif_slave_add(struct net_device *dev,
- struct net_device *slave_dev)
+ struct net_device *slave_dev,
+ struct netlink_ext_ack *extack)
{
struct batadv_hard_iface *hard_iface;
struct net *net = dev_net(dev);
diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c
index 0ae8b30e4eaa..aa187fd42475 100644
--- a/net/batman-adv/sysfs.c
+++ b/net/batman-adv/sysfs.c
@@ -925,8 +925,8 @@ static int batadv_store_mesh_iface_finish(struct net_device *net_dev,
if (hard_iface->if_status == status_tmp)
goto out;
- if ((hard_iface->soft_iface) &&
- (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0))
+ if (hard_iface->soft_iface &&
+ strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)
goto out;
if (status_tmp == BATADV_IF_NOT_IN_USE) {
diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c
index bfe8effe9238..4b90033f35a8 100644
--- a/net/batman-adv/tp_meter.c
+++ b/net/batman-adv/tp_meter.c
@@ -1206,7 +1206,7 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst,
/* send the ack */
r = batadv_send_skb_to_orig(skb, orig_node, NULL);
- if (unlikely(r < 0) || (r == NET_XMIT_DROP)) {
+ if (unlikely(r < 0) || r == NET_XMIT_DROP) {
ret = BATADV_TP_REASON_DST_UNREACHABLE;
goto out;
}
diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c
index c7b1a9aee579..2155ce802877 100644
--- a/net/bluetooth/ecdh_helper.c
+++ b/net/bluetooth/ecdh_helper.c
@@ -23,7 +23,6 @@
#include "ecdh_helper.h"
#include <linux/scatterlist.h>
-#include <crypto/kpp.h>
#include <crypto/ecdh.h>
struct ecdh_completion {
@@ -50,55 +49,35 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
out[i] = __swab64(in[ndigits - 1 - i]);
}
-bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32],
- u8 secret[32])
+/* compute_ecdh_secret() - function assumes that the private key was
+ * already set.
+ * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
+ * @public_key: pair's ecc public key.
+ * secret: memory where the ecdh computed shared secret will be saved.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64],
+ u8 secret[32])
{
- struct crypto_kpp *tfm;
struct kpp_request *req;
- struct ecdh p;
+ u8 *tmp;
struct ecdh_completion result;
struct scatterlist src, dst;
- u8 *tmp, *buf;
- unsigned int buf_len;
- int err = -ENOMEM;
+ int err;
tmp = kmalloc(64, GFP_KERNEL);
if (!tmp)
- return false;
+ return -ENOMEM;
- tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
- if (IS_ERR(tfm)) {
- pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n",
- PTR_ERR(tfm));
+ req = kpp_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ err = -ENOMEM;
goto free_tmp;
}
- req = kpp_request_alloc(tfm, GFP_KERNEL);
- if (!req)
- goto free_kpp;
-
init_completion(&result.completion);
- /* Security Manager Protocol holds digits in litte-endian order
- * while ECC API expect big-endian data
- */
- swap_digits((u64 *)private_key, (u64 *)tmp, 4);
- p.key = (char *)tmp;
- p.key_size = 32;
- /* Set curve_id */
- p.curve_id = ECC_CURVE_NIST_P256;
- buf_len = crypto_ecdh_key_len(&p);
- buf = kmalloc(buf_len, GFP_KERNEL);
- if (!buf)
- goto free_req;
-
- crypto_ecdh_encode_key(buf, buf_len, &p);
-
- /* Set A private Key */
- err = crypto_kpp_set_secret(tfm, (void *)buf, buf_len);
- if (err)
- goto free_all;
-
swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */
swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */
@@ -123,104 +102,129 @@ bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32],
memcpy(secret, tmp, 32);
free_all:
- kzfree(buf);
-free_req:
kpp_request_free(req);
-free_kpp:
- crypto_free_kpp(tfm);
free_tmp:
- kfree(tmp);
- return (err == 0);
+ kzfree(tmp);
+ return err;
}
-bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32])
+/* set_ecdh_privkey() - set or generate ecc private key.
+ *
+ * Function generates an ecc private key in the crypto subsystem when receiving
+ * a NULL private key or sets the received key when not NULL.
+ *
+ * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
+ * @private_key: user's ecc private key. When not NULL, the key is expected
+ * in little endian format.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 private_key[32])
+{
+ u8 *buf, *tmp = NULL;
+ unsigned int buf_len;
+ int err;
+ struct ecdh p = {0};
+
+ p.curve_id = ECC_CURVE_NIST_P256;
+
+ if (private_key) {
+ tmp = kmalloc(32, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+ swap_digits((u64 *)private_key, (u64 *)tmp, 4);
+ p.key = tmp;
+ p.key_size = 32;
+ }
+
+ buf_len = crypto_ecdh_key_len(&p);
+ buf = kmalloc(buf_len, GFP_KERNEL);
+ if (!buf) {
+ err = -ENOMEM;
+ goto free_tmp;
+ }
+
+ err = crypto_ecdh_encode_key(buf, buf_len, &p);
+ if (err)
+ goto free_all;
+
+ err = crypto_kpp_set_secret(tfm, buf, buf_len);
+ /* fall through */
+free_all:
+ kzfree(buf);
+free_tmp:
+ kzfree(tmp);
+ return err;
+}
+
+/* generate_ecdh_public_key() - function assumes that the private key was
+ * already set.
+ *
+ * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
+ * @public_key: memory where the computed ecc public key will be saved.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64])
{
- struct crypto_kpp *tfm;
struct kpp_request *req;
- struct ecdh p;
+ u8 *tmp;
struct ecdh_completion result;
struct scatterlist dst;
- u8 *tmp, *buf;
- unsigned int buf_len;
- int err = -ENOMEM;
- const unsigned short max_tries = 16;
- unsigned short tries = 0;
+ int err;
tmp = kmalloc(64, GFP_KERNEL);
if (!tmp)
- return false;
+ return -ENOMEM;
- tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
- if (IS_ERR(tfm)) {
- pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n",
- PTR_ERR(tfm));
+ req = kpp_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ err = -ENOMEM;
goto free_tmp;
}
- req = kpp_request_alloc(tfm, GFP_KERNEL);
- if (!req)
- goto free_kpp;
-
init_completion(&result.completion);
+ sg_init_one(&dst, tmp, 64);
+ kpp_request_set_input(req, NULL, 0);
+ kpp_request_set_output(req, &dst, 64);
+ kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ ecdh_complete, &result);
- /* Set curve_id */
- p.curve_id = ECC_CURVE_NIST_P256;
- p.key_size = 32;
- buf_len = crypto_ecdh_key_len(&p);
- buf = kmalloc(buf_len, GFP_KERNEL);
- if (!buf)
- goto free_req;
-
- do {
- if (tries++ >= max_tries)
- goto free_all;
-
- /* Set private Key */
- p.key = (char *)private_key;
- crypto_ecdh_encode_key(buf, buf_len, &p);
- err = crypto_kpp_set_secret(tfm, buf, buf_len);
- if (err)
- goto free_all;
-
- sg_init_one(&dst, tmp, 64);
- kpp_request_set_input(req, NULL, 0);
- kpp_request_set_output(req, &dst, 64);
- kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- ecdh_complete, &result);
-
- err = crypto_kpp_generate_public_key(req);
-
- if (err == -EINPROGRESS) {
- wait_for_completion(&result.completion);
- err = result.err;
- }
-
- /* Private key is not valid. Regenerate */
- if (err == -EINVAL)
- continue;
-
- if (err < 0)
- goto free_all;
- else
- break;
-
- } while (true);
-
- /* Keys are handed back in little endian as expected by Security
- * Manager Protocol
+ err = crypto_kpp_generate_public_key(req);
+ if (err == -EINPROGRESS) {
+ wait_for_completion(&result.completion);
+ err = result.err;
+ }
+ if (err < 0)
+ goto free_all;
+
+ /* The public key is handed back in little endian as expected by
+ * the Security Manager Protocol.
*/
swap_digits((u64 *)tmp, (u64 *)public_key, 4); /* x */
swap_digits((u64 *)&tmp[32], (u64 *)&public_key[32], 4); /* y */
- swap_digits((u64 *)private_key, (u64 *)tmp, 4);
- memcpy(private_key, tmp, 32);
free_all:
- kzfree(buf);
-free_req:
kpp_request_free(req);
-free_kpp:
- crypto_free_kpp(tfm);
free_tmp:
kfree(tmp);
- return (err == 0);
+ return err;
+}
+
+/* generate_ecdh_keys() - generate ecc key pair.
+ *
+ * @tfm: KPP tfm handle allocated with crypto_alloc_kpp().
+ * @public_key: memory where the computed ecc public key will be saved.
+ *
+ * Return: zero on success; error code in case of error.
+ */
+int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64])
+{
+ int err;
+
+ err = set_ecdh_privkey(tfm, NULL);
+ if (err)
+ return err;
+
+ return generate_ecdh_public_key(tfm, public_key);
}
diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h
index 7a423faf76e5..a6f8d03d4aaf 100644
--- a/net/bluetooth/ecdh_helper.h
+++ b/net/bluetooth/ecdh_helper.h
@@ -20,8 +20,11 @@
* COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
* SOFTWARE IS DISCLAIMED.
*/
+#include <crypto/kpp.h>
#include <linux/types.h>
-bool compute_ecdh_secret(const u8 pub_a[64], const u8 priv_b[32],
- u8 secret[32]);
-bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]);
+int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pair_public_key[64],
+ u8 secret[32]);
+int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 *private_key);
+int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]);
+int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64]);
diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c
index 34a1227f4391..03e3c89c3046 100644
--- a/net/bluetooth/selftest.c
+++ b/net/bluetooth/selftest.c
@@ -138,12 +138,12 @@ static const u8 dhkey_3[32] __initconst = {
0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70,
};
-static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
- const u8 pub_a[64], const u8 pub_b[64],
- const u8 dhkey[32])
+static int __init test_ecdh_sample(struct crypto_kpp *tfm, const u8 priv_a[32],
+ const u8 priv_b[32], const u8 pub_a[64],
+ const u8 pub_b[64], const u8 dhkey[32])
{
u8 *tmp, *dhkey_a, *dhkey_b;
- int ret = 0;
+ int ret;
tmp = kmalloc(64, GFP_KERNEL);
if (!tmp)
@@ -152,17 +152,30 @@ static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32],
dhkey_a = &tmp[0];
dhkey_b = &tmp[32];
- compute_ecdh_secret(pub_b, priv_a, dhkey_a);
- compute_ecdh_secret(pub_a, priv_b, dhkey_b);
+ ret = set_ecdh_privkey(tfm, priv_a);
+ if (ret)
+ goto out;
+
+ ret = compute_ecdh_secret(tfm, pub_b, dhkey_a);
+ if (ret)
+ goto out;
if (memcmp(dhkey_a, dhkey, 32)) {
ret = -EINVAL;
goto out;
}
+ ret = set_ecdh_privkey(tfm, priv_b);
+ if (ret)
+ goto out;
+
+ ret = compute_ecdh_secret(tfm, pub_a, dhkey_b);
+ if (ret)
+ goto out;
+
if (memcmp(dhkey_b, dhkey, 32))
ret = -EINVAL;
-
+ /* fall through*/
out:
kfree(tmp);
return ret;
@@ -185,30 +198,43 @@ static const struct file_operations test_ecdh_fops = {
static int __init test_ecdh(void)
{
+ struct crypto_kpp *tfm;
ktime_t calltime, delta, rettime;
- unsigned long long duration;
+ unsigned long long duration = 0;
int err;
calltime = ktime_get();
- err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1);
+ tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(tfm)) {
+ BT_ERR("Unable to create ECDH crypto context");
+ err = PTR_ERR(tfm);
+ goto done;
+ }
+
+ err = test_ecdh_sample(tfm, priv_a_1, priv_b_1, pub_a_1, pub_b_1,
+ dhkey_1);
if (err) {
BT_ERR("ECDH sample 1 failed");
goto done;
}
- err = test_ecdh_sample(priv_a_2, priv_b_2, pub_a_2, pub_b_2, dhkey_2);
+ err = test_ecdh_sample(tfm, priv_a_2, priv_b_2, pub_a_2, pub_b_2,
+ dhkey_2);
if (err) {
BT_ERR("ECDH sample 2 failed");
goto done;
}
- err = test_ecdh_sample(priv_a_3, priv_a_3, pub_a_3, pub_a_3, dhkey_3);
+ err = test_ecdh_sample(tfm, priv_a_3, priv_a_3, pub_a_3, pub_a_3,
+ dhkey_3);
if (err) {
BT_ERR("ECDH sample 3 failed");
goto done;
}
+ crypto_free_kpp(tfm);
+
rettime = ktime_get();
delta = ktime_sub(rettime, calltime);
duration = (unsigned long long) ktime_to_ns(delta) >> 10;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a0ef89772c36..d41449b9e9d6 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -26,6 +26,7 @@
#include <crypto/algapi.h>
#include <crypto/b128ops.h>
#include <crypto/hash.h>
+#include <crypto/kpp.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -83,7 +84,6 @@ enum {
struct smp_dev {
/* Secure Connections OOB data */
u8 local_pk[64];
- u8 local_sk[32];
u8 local_rand[16];
bool debug_key;
@@ -92,6 +92,7 @@ struct smp_dev {
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
+ struct crypto_kpp *tfm_ecdh;
};
struct smp_chan {
@@ -124,13 +125,13 @@ struct smp_chan {
/* Secure Connections variables */
u8 local_pk[64];
- u8 local_sk[32];
u8 remote_pk[64];
u8 dhkey[32];
u8 mackey[16];
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
+ struct crypto_kpp *tfm_ecdh;
};
/* These debug key values are defined in the SMP section of the core
@@ -565,22 +566,22 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
BT_DBG("Using debug keys");
+ err = set_ecdh_privkey(smp->tfm_ecdh, debug_sk);
+ if (err)
+ return err;
memcpy(smp->local_pk, debug_pk, 64);
- memcpy(smp->local_sk, debug_sk, 32);
smp->debug_key = true;
} else {
while (true) {
- /* Seed private key with random number */
- get_random_bytes(smp->local_sk, 32);
-
- /* Generate local key pair for Secure Connections */
- if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
- return -EIO;
+ /* Generate key pair for Secure Connections */
+ err = generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk);
+ if (err)
+ return err;
/* This is unlikely, but we need to check that
* we didn't accidentially generate a debug key.
*/
- if (crypto_memneq(smp->local_sk, debug_sk, 32))
+ if (crypto_memneq(smp->local_pk, debug_pk, 64))
break;
}
smp->debug_key = false;
@@ -588,7 +589,6 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
SMP_DBG("OOB Public Key X: %32phN", smp->local_pk);
SMP_DBG("OOB Public Key Y: %32phN", smp->local_pk + 32);
- SMP_DBG("OOB Private Key: %32phN", smp->local_sk);
get_random_bytes(smp->local_rand, 16);
@@ -771,6 +771,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn)
crypto_free_cipher(smp->tfm_aes);
crypto_free_shash(smp->tfm_cmac);
+ crypto_free_kpp(smp->tfm_ecdh);
/* Ensure that we don't leave any debug key around if debug key
* support hasn't been explicitly enabled.
@@ -1391,16 +1392,19 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(smp->tfm_aes)) {
BT_ERR("Unable to create AES crypto context");
- kzfree(smp);
- return NULL;
+ goto zfree_smp;
}
smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
if (IS_ERR(smp->tfm_cmac)) {
BT_ERR("Unable to create CMAC crypto context");
- crypto_free_cipher(smp->tfm_aes);
- kzfree(smp);
- return NULL;
+ goto free_cipher;
+ }
+
+ smp->tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(smp->tfm_ecdh)) {
+ BT_ERR("Unable to create ECDH crypto context");
+ goto free_shash;
}
smp->conn = conn;
@@ -1413,6 +1417,14 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn)
hci_conn_hold(conn->hcon);
return smp;
+
+free_shash:
+ crypto_free_shash(smp->tfm_cmac);
+free_cipher:
+ crypto_free_cipher(smp->tfm_aes);
+zfree_smp:
+ kzfree(smp);
+ return NULL;
}
static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16])
@@ -1883,7 +1895,6 @@ static u8 sc_send_public_key(struct smp_chan *smp)
smp_dev = chan->data;
memcpy(smp->local_pk, smp_dev->local_pk, 64);
- memcpy(smp->local_sk, smp_dev->local_sk, 32);
memcpy(smp->lr, smp_dev->local_rand, 16);
if (smp_dev->debug_key)
@@ -1894,22 +1905,20 @@ static u8 sc_send_public_key(struct smp_chan *smp)
if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
BT_DBG("Using debug keys");
+ if (set_ecdh_privkey(smp->tfm_ecdh, debug_sk))
+ return SMP_UNSPECIFIED;
memcpy(smp->local_pk, debug_pk, 64);
- memcpy(smp->local_sk, debug_sk, 32);
set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags);
} else {
while (true) {
- /* Seed private key with random number */
- get_random_bytes(smp->local_sk, 32);
-
- /* Generate local key pair for Secure Connections */
- if (!generate_ecdh_keys(smp->local_pk, smp->local_sk))
+ /* Generate key pair for Secure Connections */
+ if (generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk))
return SMP_UNSPECIFIED;
/* This is unlikely, but we need to check that
* we didn't accidentially generate a debug key.
*/
- if (crypto_memneq(smp->local_sk, debug_sk, 32))
+ if (crypto_memneq(smp->local_pk, debug_pk, 64))
break;
}
}
@@ -1917,7 +1926,6 @@ static u8 sc_send_public_key(struct smp_chan *smp)
done:
SMP_DBG("Local Public Key X: %32phN", smp->local_pk);
SMP_DBG("Local Public Key Y: %32phN", smp->local_pk + 32);
- SMP_DBG("Local Private Key: %32phN", smp->local_sk);
smp_send_cmd(smp->conn, SMP_CMD_PUBLIC_KEY, 64, smp->local_pk);
@@ -2645,6 +2653,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
struct l2cap_chan *chan = conn->smp;
struct smp_chan *smp = chan->data;
struct hci_dev *hdev = hcon->hdev;
+ struct crypto_kpp *tfm_ecdh;
struct smp_cmd_pairing_confirm cfm;
int err;
@@ -2677,7 +2686,18 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk);
SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32);
- if (!compute_ecdh_secret(smp->remote_pk, smp->local_sk, smp->dhkey))
+ /* Compute the shared secret on the same crypto tfm on which the private
+ * key was set/generated.
+ */
+ if (test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) {
+ struct smp_dev *smp_dev = chan->data;
+
+ tfm_ecdh = smp_dev->tfm_ecdh;
+ } else {
+ tfm_ecdh = smp->tfm_ecdh;
+ }
+
+ if (compute_ecdh_secret(tfm_ecdh, smp->remote_pk, smp->dhkey))
return SMP_UNSPECIFIED;
SMP_DBG("DHKey %32phN", smp->dhkey);
@@ -3169,6 +3189,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
struct smp_dev *smp;
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
+ struct crypto_kpp *tfm_ecdh;
if (cid == L2CAP_CID_SMP_BREDR) {
smp = NULL;
@@ -3194,8 +3215,18 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid)
return ERR_CAST(tfm_cmac);
}
+ tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(tfm_ecdh)) {
+ BT_ERR("Unable to create ECDH crypto context");
+ crypto_free_shash(tfm_cmac);
+ crypto_free_cipher(tfm_aes);
+ kzfree(smp);
+ return ERR_CAST(tfm_ecdh);
+ }
+
smp->tfm_aes = tfm_aes;
smp->tfm_cmac = tfm_cmac;
+ smp->tfm_ecdh = tfm_ecdh;
smp->min_key_size = SMP_MIN_ENC_KEY_SIZE;
smp->max_key_size = SMP_MAX_ENC_KEY_SIZE;
@@ -3205,6 +3236,7 @@ create_chan:
if (smp) {
crypto_free_cipher(smp->tfm_aes);
crypto_free_shash(smp->tfm_cmac);
+ crypto_free_kpp(smp->tfm_ecdh);
kzfree(smp);
}
return ERR_PTR(-ENOMEM);
@@ -3252,6 +3284,7 @@ static void smp_del_chan(struct l2cap_chan *chan)
chan->data = NULL;
crypto_free_cipher(smp->tfm_aes);
crypto_free_shash(smp->tfm_cmac);
+ crypto_free_kpp(smp->tfm_ecdh);
kzfree(smp);
}
@@ -3490,25 +3523,18 @@ void smp_unregister(struct hci_dev *hdev)
#if IS_ENABLED(CONFIG_BT_SELFTEST_SMP)
-static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
+static int __init test_debug_key(struct crypto_kpp *tfm_ecdh)
{
- int i;
-
- for (i = 0; i < ndigits; i++)
- out[i] = __swab64(in[ndigits - 1 - i]);
-}
-
-static int __init test_debug_key(void)
-{
- u8 pk[64], sk[32];
-
- swap_digits((u64 *)debug_sk, (u64 *)sk, 4);
+ u8 pk[64];
+ int err;
- if (!generate_ecdh_keys(pk, sk))
- return -EINVAL;
+ err = set_ecdh_privkey(tfm_ecdh, debug_sk);
+ if (err)
+ return err;
- if (crypto_memneq(sk, debug_sk, 32))
- return -EINVAL;
+ err = generate_ecdh_public_key(tfm_ecdh, pk);
+ if (err)
+ return err;
if (crypto_memneq(pk, debug_pk, 64))
return -EINVAL;
@@ -3763,7 +3789,8 @@ static const struct file_operations test_smp_fops = {
};
static int __init run_selftests(struct crypto_cipher *tfm_aes,
- struct crypto_shash *tfm_cmac)
+ struct crypto_shash *tfm_cmac,
+ struct crypto_kpp *tfm_ecdh)
{
ktime_t calltime, delta, rettime;
unsigned long long duration;
@@ -3771,7 +3798,7 @@ static int __init run_selftests(struct crypto_cipher *tfm_aes,
calltime = ktime_get();
- err = test_debug_key();
+ err = test_debug_key(tfm_ecdh);
if (err) {
BT_ERR("debug_key test failed");
goto done;
@@ -3848,6 +3875,7 @@ int __init bt_selftest_smp(void)
{
struct crypto_cipher *tfm_aes;
struct crypto_shash *tfm_cmac;
+ struct crypto_kpp *tfm_ecdh;
int err;
tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
@@ -3863,10 +3891,19 @@ int __init bt_selftest_smp(void)
return PTR_ERR(tfm_cmac);
}
- err = run_selftests(tfm_aes, tfm_cmac);
+ tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
+ if (IS_ERR(tfm_ecdh)) {
+ BT_ERR("Unable to create ECDH crypto context");
+ crypto_free_shash(tfm_cmac);
+ crypto_free_cipher(tfm_aes);
+ return PTR_ERR(tfm_ecdh);
+ }
+
+ err = run_selftests(tfm_aes, tfm_cmac, tfm_ecdh);
crypto_free_shash(tfm_cmac);
crypto_free_cipher(tfm_aes);
+ crypto_free_kpp(tfm_ecdh);
return err;
}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 6be41a44d688..a86e6687026e 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -133,7 +133,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
if (is_l2)
__skb_push(skb, ETH_HLEN);
if (is_direct_pkt_access)
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
retval = bpf_test_run(prog, skb, repeat, &duration);
if (!is_l2)
__skb_push(skb, ETH_HLEN);
@@ -162,6 +162,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
xdp.data_hard_start = data;
xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN;
+ xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + size;
retval = bpf_test_run(prog, &xdp, repeat, &duration);
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 40b1ede527ca..4aee55fdcc92 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_BRIDGE) += bridge.o
bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
br_ioctl.o br_stp.o br_stp_bpdu.o \
br_stp_if.o br_stp_timer.o br_netlink.o \
- br_netlink_tunnel.o
+ br_netlink_tunnel.o br_arp_nd_proxy.o
bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
new file mode 100644
index 000000000000..2cf7716254be
--- /dev/null
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -0,0 +1,469 @@
+/*
+ * Handle bridge arp/nd proxy/suppress
+ *
+ * Copyright (C) 2017 Cumulus Networks
+ * Copyright (c) 2017 Roopa Prabhu <roopa@cumulusnetworks.com>
+ *
+ * Authors:
+ * Roopa Prabhu <roopa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/neighbour.h>
+#include <net/arp.h>
+#include <linux/if_vlan.h>
+#include <linux/inetdevice.h>
+#include <net/addrconf.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_checksum.h>
+#endif
+
+#include "br_private.h"
+
+void br_recalculate_neigh_suppress_enabled(struct net_bridge *br)
+{
+ struct net_bridge_port *p;
+ bool neigh_suppress = false;
+
+ list_for_each_entry(p, &br->port_list, list) {
+ if (p->flags & BR_NEIGH_SUPPRESS) {
+ neigh_suppress = true;
+ break;
+ }
+ }
+
+ br->neigh_suppress_enabled = neigh_suppress;
+}
+
+#if IS_ENABLED(CONFIG_INET)
+static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p,
+ struct net_device *dev, __be32 dest_ip, __be32 src_ip,
+ const unsigned char *dest_hw,
+ const unsigned char *src_hw,
+ const unsigned char *target_hw,
+ __be16 vlan_proto, u16 vlan_tci)
+{
+ struct net_bridge_vlan_group *vg;
+ struct sk_buff *skb;
+ u16 pvid;
+
+ netdev_dbg(dev, "arp send dev %s dst %pI4 dst_hw %pM src %pI4 src_hw %pM\n",
+ dev->name, &dest_ip, dest_hw, &src_ip, src_hw);
+
+ if (!vlan_tci) {
+ arp_send(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip,
+ dest_hw, src_hw, target_hw);
+ return;
+ }
+
+ skb = arp_create(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip,
+ dest_hw, src_hw, target_hw);
+ if (!skb)
+ return;
+
+ if (p)
+ vg = nbp_vlan_group_rcu(p);
+ else
+ vg = br_vlan_group_rcu(br);
+ pvid = br_get_pvid(vg);
+ if (pvid == (vlan_tci & VLAN_VID_MASK))
+ vlan_tci = 0;
+
+ if (vlan_tci)
+ __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
+
+ if (p) {
+ arp_xmit(skb);
+ } else {
+ skb_reset_mac_header(skb);
+ __skb_pull(skb, skb_network_offset(skb));
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->pkt_type = PACKET_HOST;
+
+ netif_rx_ni(skb);
+ }
+}
+
+static int br_chk_addr_ip(struct net_device *dev, void *data)
+{
+ __be32 ip = *(__be32 *)data;
+ struct in_device *in_dev;
+ __be32 addr = 0;
+
+ in_dev = __in_dev_get_rcu(dev);
+ if (in_dev)
+ addr = inet_confirm_addr(dev_net(dev), in_dev, 0, ip,
+ RT_SCOPE_HOST);
+
+ if (addr == ip)
+ return 1;
+
+ return 0;
+}
+
+static bool br_is_local_ip(struct net_device *dev, __be32 ip)
+{
+ if (br_chk_addr_ip(dev, &ip))
+ return true;
+
+ /* check if ip is configured on upper dev */
+ if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &ip))
+ return true;
+
+ return false;
+}
+
+void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
+ u16 vid, struct net_bridge_port *p)
+{
+ struct net_device *dev = br->dev;
+ struct net_device *vlandev = dev;
+ struct neighbour *n;
+ struct arphdr *parp;
+ u8 *arpptr, *sha;
+ __be32 sip, tip;
+
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+
+ if ((dev->flags & IFF_NOARP) ||
+ !pskb_may_pull(skb, arp_hdr_len(dev)))
+ return;
+
+ parp = arp_hdr(skb);
+
+ if (parp->ar_pro != htons(ETH_P_IP) ||
+ parp->ar_hln != dev->addr_len ||
+ parp->ar_pln != 4)
+ return;
+
+ arpptr = (u8 *)parp + sizeof(struct arphdr);
+ sha = arpptr;
+ arpptr += dev->addr_len; /* sha */
+ memcpy(&sip, arpptr, sizeof(sip));
+ arpptr += sizeof(sip);
+ arpptr += dev->addr_len; /* tha */
+ memcpy(&tip, arpptr, sizeof(tip));
+
+ if (ipv4_is_loopback(tip) ||
+ ipv4_is_multicast(tip))
+ return;
+
+ if (br->neigh_suppress_enabled) {
+ if (p && (p->flags & BR_NEIGH_SUPPRESS))
+ return;
+ if (ipv4_is_zeronet(sip) || sip == tip) {
+ /* prevent flooding to neigh suppress ports */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+ }
+
+ if (parp->ar_op != htons(ARPOP_REQUEST))
+ return;
+
+ if (vid != 0) {
+ vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto,
+ vid);
+ if (!vlandev)
+ return;
+ }
+
+ if (br->neigh_suppress_enabled && br_is_local_ip(vlandev, tip)) {
+ /* its our local ip, so don't proxy reply
+ * and don't forward to neigh suppress ports
+ */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+
+ n = neigh_lookup(&arp_tbl, &tip, vlandev);
+ if (n) {
+ struct net_bridge_fdb_entry *f;
+
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_release(n);
+ return;
+ }
+
+ f = br_fdb_find_rcu(br, n->ha, vid);
+ if (f) {
+ bool replied = false;
+
+ if ((p && (p->flags & BR_PROXYARP)) ||
+ (f->dst && (f->dst->flags & (BR_PROXYARP_WIFI |
+ BR_NEIGH_SUPPRESS)))) {
+ if (!vid)
+ br_arp_send(br, p, skb->dev, sip, tip,
+ sha, n->ha, sha, 0, 0);
+ else
+ br_arp_send(br, p, skb->dev, sip, tip,
+ sha, n->ha, sha,
+ skb->vlan_proto,
+ skb_vlan_tag_get(skb));
+ replied = true;
+ }
+
+ /* If we have replied or as long as we know the
+ * mac, indicate to arp replied
+ */
+ if (replied || br->neigh_suppress_enabled)
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ }
+
+ neigh_release(n);
+ }
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *msg)
+{
+ struct nd_msg *m;
+
+ m = skb_header_pointer(skb, skb_network_offset(skb) +
+ sizeof(struct ipv6hdr), sizeof(*msg), msg);
+ if (!m)
+ return NULL;
+
+ if (m->icmph.icmp6_code != 0 ||
+ (m->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION &&
+ m->icmph.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT))
+ return NULL;
+
+ return m;
+}
+
+static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
+ struct sk_buff *request, struct neighbour *n,
+ __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns)
+{
+ struct net_device *dev = request->dev;
+ struct net_bridge_vlan_group *vg;
+ struct sk_buff *reply;
+ struct nd_msg *na;
+ struct ipv6hdr *pip6;
+ int na_olen = 8; /* opt hdr + ETH_ALEN for target */
+ int ns_olen;
+ int i, len;
+ u8 *daddr;
+ u16 pvid;
+
+ if (!dev)
+ return;
+
+ len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) +
+ sizeof(*na) + na_olen + dev->needed_tailroom;
+
+ reply = alloc_skb(len, GFP_ATOMIC);
+ if (!reply)
+ return;
+
+ reply->protocol = htons(ETH_P_IPV6);
+ reply->dev = dev;
+ skb_reserve(reply, LL_RESERVED_SPACE(dev));
+ skb_push(reply, sizeof(struct ethhdr));
+ skb_set_mac_header(reply, 0);
+
+ daddr = eth_hdr(request)->h_source;
+
+ /* Do we need option processing ? */
+ ns_olen = request->len - (skb_network_offset(request) +
+ sizeof(struct ipv6hdr)) - sizeof(*ns);
+ for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) {
+ if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
+ daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
+ break;
+ }
+ }
+
+ /* Ethernet header */
+ ether_addr_copy(eth_hdr(reply)->h_dest, daddr);
+ ether_addr_copy(eth_hdr(reply)->h_source, n->ha);
+ eth_hdr(reply)->h_proto = htons(ETH_P_IPV6);
+ reply->protocol = htons(ETH_P_IPV6);
+
+ skb_pull(reply, sizeof(struct ethhdr));
+ skb_set_network_header(reply, 0);
+ skb_put(reply, sizeof(struct ipv6hdr));
+
+ /* IPv6 header */
+ pip6 = ipv6_hdr(reply);
+ memset(pip6, 0, sizeof(struct ipv6hdr));
+ pip6->version = 6;
+ pip6->priority = ipv6_hdr(request)->priority;
+ pip6->nexthdr = IPPROTO_ICMPV6;
+ pip6->hop_limit = 255;
+ pip6->daddr = ipv6_hdr(request)->saddr;
+ pip6->saddr = *(struct in6_addr *)n->primary_key;
+
+ skb_pull(reply, sizeof(struct ipv6hdr));
+ skb_set_transport_header(reply, 0);
+
+ na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen);
+
+ /* Neighbor Advertisement */
+ memset(na, 0, sizeof(*na) + na_olen);
+ na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
+ na->icmph.icmp6_router = 0; /* XXX: should be 1 ? */
+ na->icmph.icmp6_override = 1;
+ na->icmph.icmp6_solicited = 1;
+ na->target = ns->target;
+ ether_addr_copy(&na->opt[2], n->ha);
+ na->opt[0] = ND_OPT_TARGET_LL_ADDR;
+ na->opt[1] = na_olen >> 3;
+
+ na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr,
+ &pip6->daddr,
+ sizeof(*na) + na_olen,
+ IPPROTO_ICMPV6,
+ csum_partial(na, sizeof(*na) + na_olen, 0));
+
+ pip6->payload_len = htons(sizeof(*na) + na_olen);
+
+ skb_push(reply, sizeof(struct ipv6hdr));
+ skb_push(reply, sizeof(struct ethhdr));
+
+ reply->ip_summed = CHECKSUM_UNNECESSARY;
+
+ if (p)
+ vg = nbp_vlan_group_rcu(p);
+ else
+ vg = br_vlan_group_rcu(br);
+ pvid = br_get_pvid(vg);
+ if (pvid == (vlan_tci & VLAN_VID_MASK))
+ vlan_tci = 0;
+
+ if (vlan_tci)
+ __vlan_hwaccel_put_tag(reply, vlan_proto, vlan_tci);
+
+ netdev_dbg(dev, "nd send dev %s dst %pI6 dst_hw %pM src %pI6 src_hw %pM\n",
+ dev->name, &pip6->daddr, daddr, &pip6->saddr, n->ha);
+
+ if (p) {
+ dev_queue_xmit(reply);
+ } else {
+ skb_reset_mac_header(reply);
+ __skb_pull(reply, skb_network_offset(reply));
+ reply->ip_summed = CHECKSUM_UNNECESSARY;
+ reply->pkt_type = PACKET_HOST;
+
+ netif_rx_ni(reply);
+ }
+}
+
+static int br_chk_addr_ip6(struct net_device *dev, void *data)
+{
+ struct in6_addr *addr = (struct in6_addr *)data;
+
+ if (ipv6_chk_addr(dev_net(dev), addr, dev, 0))
+ return 1;
+
+ return 0;
+}
+
+static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr)
+
+{
+ if (br_chk_addr_ip6(dev, addr))
+ return true;
+
+ /* check if ip is configured on upper dev */
+ if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, addr))
+ return true;
+
+ return false;
+}
+
+void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
+ u16 vid, struct net_bridge_port *p, struct nd_msg *msg)
+{
+ struct net_device *dev = br->dev;
+ struct net_device *vlandev = NULL;
+ struct in6_addr *saddr, *daddr;
+ struct ipv6hdr *iphdr;
+ struct neighbour *n;
+
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+
+ if (p && (p->flags & BR_NEIGH_SUPPRESS))
+ return;
+
+ if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT &&
+ !msg->icmph.icmp6_solicited) {
+ /* prevent flooding to neigh suppress ports */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+
+ if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
+ return;
+
+ iphdr = ipv6_hdr(skb);
+ saddr = &iphdr->saddr;
+ daddr = &iphdr->daddr;
+
+ if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) {
+ /* prevent flooding to neigh suppress ports */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+
+ if (vid != 0) {
+ /* build neigh table lookup on the vlan device */
+ vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto,
+ vid);
+ if (!vlandev)
+ return;
+ } else {
+ vlandev = dev;
+ }
+
+ if (br_is_local_ip6(vlandev, &msg->target)) {
+ /* its our own ip, so don't proxy reply
+ * and don't forward to arp suppress ports
+ */
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ return;
+ }
+
+ n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, vlandev);
+ if (n) {
+ struct net_bridge_fdb_entry *f;
+
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_release(n);
+ return;
+ }
+
+ f = br_fdb_find_rcu(br, n->ha, vid);
+ if (f) {
+ bool replied = false;
+
+ if (f->dst && (f->dst->flags & BR_NEIGH_SUPPRESS)) {
+ if (vid != 0)
+ br_nd_send(br, p, skb, n,
+ skb->vlan_proto,
+ skb_vlan_tag_get(skb), msg);
+ else
+ br_nd_send(br, p, skb, n, 0, 0, msg);
+ replied = true;
+ }
+
+ /* If we have replied or as long as we know the
+ * mac, indicate to NEIGH_SUPPRESS ports that we
+ * have replied
+ */
+ if (replied || br->neigh_suppress_enabled)
+ BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+ }
+ neigh_release(n);
+ }
+}
+#endif
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f6b6a92f1c48..28bb22186fa0 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
const struct nf_br_ops *nf_ops;
const unsigned char *dest;
+ struct ethhdr *eth;
u16 vid = 0;
rcu_read_lock();
@@ -57,11 +58,30 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
BR_INPUT_SKB_CB(skb)->brdev = dev;
skb_reset_mac_header(skb);
+ eth = eth_hdr(skb);
skb_pull(skb, ETH_HLEN);
if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
goto out;
+ if (IS_ENABLED(CONFIG_INET) &&
+ (eth->h_proto == htons(ETH_P_ARP) ||
+ eth->h_proto == htons(ETH_P_RARP)) &&
+ br->neigh_suppress_enabled) {
+ br_do_proxy_suppress_arp(skb, br, vid, NULL);
+ } else if (IS_ENABLED(CONFIG_IPV6) &&
+ skb->protocol == htons(ETH_P_IPV6) &&
+ br->neigh_suppress_enabled &&
+ pskb_may_pull(skb, sizeof(struct ipv6hdr) +
+ sizeof(struct nd_msg)) &&
+ ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
+ struct nd_msg *msg, _msg;
+
+ msg = br_is_nd_neigh_msg(skb, &_msg);
+ if (msg)
+ br_do_suppress_nd(skb, br, vid, NULL, msg);
+ }
+
dest = eth_hdr(skb)->h_dest;
if (is_broadcast_ether_addr(dest)) {
br_flood(br, skb, BR_PKT_BROADCAST, false, true);
@@ -320,12 +340,13 @@ void br_netpoll_disable(struct net_bridge_port *p)
#endif
-static int br_add_slave(struct net_device *dev, struct net_device *slave_dev)
+static int br_add_slave(struct net_device *dev, struct net_device *slave_dev,
+ struct netlink_ext_ack *extack)
{
struct net_bridge *br = netdev_priv(dev);
- return br_add_if(br, slave_dev);
+ return br_add_if(br, slave_dev, extack);
}
static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 48fb17417fac..b4eed113d2ec 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -204,7 +204,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
/* Do not flood to ports that enable proxy ARP */
if (p->flags & BR_PROXYARP)
continue;
- if ((p->flags & BR_PROXYARP_WIFI) &&
+ if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) &&
BR_INPUT_SKB_CB(skb)->proxyarp_replied)
continue;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f3aef22931ab..ae38547bbf91 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -310,6 +310,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
del_nbp(p);
}
+ br_recalculate_neigh_suppress_enabled(br);
+
br_fdb_delete_by_port(br, NULL, 0, 1);
cancel_delayed_work_sync(&br->gc_work);
@@ -480,7 +482,8 @@ netdev_features_t br_features_recompute(struct net_bridge *br,
}
/* called with RTNL */
-int br_add_if(struct net_bridge *br, struct net_device *dev)
+int br_add_if(struct net_bridge *br, struct net_device *dev,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_port *p;
int err = 0;
@@ -500,16 +503,22 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
return -EINVAL;
/* No bridging of bridges */
- if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit)
+ if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
+ NL_SET_ERR_MSG(extack,
+ "Can not enslave a bridge to a bridge");
return -ELOOP;
+ }
/* Device is already being bridged */
if (br_port_exists(dev))
return -EBUSY;
/* No bridging devices that dislike that (e.g. wireless) */
- if (dev->priv_flags & IFF_DONT_BRIDGE)
+ if (dev->priv_flags & IFF_DONT_BRIDGE) {
+ NL_SET_ERR_MSG(extack,
+ "Device does not allow enslaving to a bridge");
return -EOPNOTSUPP;
+ }
p = new_nbp(br, dev);
if (IS_ERR(p))
@@ -540,7 +549,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
dev->priv_flags |= IFF_BRIDGE_PORT;
- err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL);
+ err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack);
if (err)
goto err5;
@@ -653,4 +662,7 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
if (mask & BR_AUTO_MASK)
nbp_update_port_count(br);
+
+ if (mask & BR_NEIGH_SUPPRESS)
+ br_recalculate_neigh_suppress_enabled(br);
}
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 7637f58c1226..a096d3e189da 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -71,62 +71,6 @@ static int br_pass_frame_up(struct sk_buff *skb)
br_netif_receive_skb);
}
-static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
- u16 vid, struct net_bridge_port *p)
-{
- struct net_device *dev = br->dev;
- struct neighbour *n;
- struct arphdr *parp;
- u8 *arpptr, *sha;
- __be32 sip, tip;
-
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
-
- if ((dev->flags & IFF_NOARP) ||
- !pskb_may_pull(skb, arp_hdr_len(dev)))
- return;
-
- parp = arp_hdr(skb);
-
- if (parp->ar_pro != htons(ETH_P_IP) ||
- parp->ar_op != htons(ARPOP_REQUEST) ||
- parp->ar_hln != dev->addr_len ||
- parp->ar_pln != 4)
- return;
-
- arpptr = (u8 *)parp + sizeof(struct arphdr);
- sha = arpptr;
- arpptr += dev->addr_len; /* sha */
- memcpy(&sip, arpptr, sizeof(sip));
- arpptr += sizeof(sip);
- arpptr += dev->addr_len; /* tha */
- memcpy(&tip, arpptr, sizeof(tip));
-
- if (ipv4_is_loopback(tip) ||
- ipv4_is_multicast(tip))
- return;
-
- n = neigh_lookup(&arp_tbl, &tip, dev);
- if (n) {
- struct net_bridge_fdb_entry *f;
-
- if (!(n->nud_state & NUD_VALID)) {
- neigh_release(n);
- return;
- }
-
- f = br_fdb_find_rcu(br, n->ha, vid);
- if (f && ((p->flags & BR_PROXYARP) ||
- (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) {
- arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip,
- sha, n->ha, sha);
- BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
- }
-
- neigh_release(n);
- }
-}
-
/* note: already called with rcu_read_lock */
int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
@@ -171,8 +115,22 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
BR_INPUT_SKB_CB(skb)->brdev = br->dev;
- if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
- br_do_proxy_arp(skb, br, vid, p);
+ if (IS_ENABLED(CONFIG_INET) &&
+ (skb->protocol == htons(ETH_P_ARP) ||
+ skb->protocol == htons(ETH_P_RARP))) {
+ br_do_proxy_suppress_arp(skb, br, vid, p);
+ } else if (IS_ENABLED(CONFIG_IPV6) &&
+ skb->protocol == htons(ETH_P_IPV6) &&
+ br->neigh_suppress_enabled &&
+ pskb_may_pull(skb, sizeof(struct ipv6hdr) +
+ sizeof(struct nd_msg)) &&
+ ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) {
+ struct nd_msg *msg, _msg;
+
+ msg = br_is_nd_neigh_msg(skb, &_msg);
+ if (msg)
+ br_do_suppress_nd(skb, br, vid, p, msg);
+ }
switch (pkt_type) {
case BR_PKT_MULTICAST:
@@ -289,6 +247,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
*
* Others reserved for future standardization
*/
+ fwd_mask |= p->group_fwd_mask;
switch (dest[5]) {
case 0x00: /* Bridge Group Address */
/* If STP is turned off,
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 7970f8540cbb..8f29103935a3 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -98,10 +98,13 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd)
return -EINVAL;
if (isadd)
- ret = br_add_if(br, dev);
+ ret = br_add_if(br, dev, NULL);
else
ret = br_del_if(br, dev);
+ if (!ret)
+ rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_MASTER, GFP_KERNEL);
+
return ret;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 8dc5c8d69bcd..7947e0436e18 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -859,8 +859,32 @@ out:
spin_unlock(&br->multicast_lock);
}
+static void br_mc_router_state_change(struct net_bridge *p,
+ bool is_mc_router)
+{
+ struct switchdev_attr attr = {
+ .orig_dev = p->dev,
+ .id = SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
+ .flags = SWITCHDEV_F_DEFER,
+ .u.mrouter = is_mc_router,
+ };
+
+ switchdev_port_attr_set(p->dev, &attr);
+}
+
static void br_multicast_local_router_expired(unsigned long data)
{
+ struct net_bridge *br = (struct net_bridge *)data;
+
+ spin_lock(&br->multicast_lock);
+ if (br->multicast_router == MDB_RTR_TYPE_DISABLED ||
+ br->multicast_router == MDB_RTR_TYPE_PERM ||
+ timer_pending(&br->multicast_router_timer))
+ goto out;
+
+ br_mc_router_state_change(br, false);
+out:
+ spin_unlock(&br->multicast_lock);
}
static void br_multicast_querier_expired(struct net_bridge *br,
@@ -1364,9 +1388,12 @@ static void br_multicast_mark_router(struct net_bridge *br,
unsigned long now = jiffies;
if (!port) {
- if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY)
+ if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) {
+ if (!timer_pending(&br->multicast_router_timer))
+ br_mc_router_state_change(br, true);
mod_timer(&br->multicast_router_timer,
now + br->multicast_querier_interval);
+ }
return;
}
@@ -1952,7 +1979,7 @@ void br_multicast_init(struct net_bridge *br)
spin_lock_init(&br->multicast_lock);
setup_timer(&br->multicast_router_timer,
- br_multicast_local_router_expired, 0);
+ br_multicast_local_router_expired, (unsigned long)br);
setup_timer(&br->ip4_other_query.timer,
br_ip4_multicast_querier_expired, (unsigned long)br);
setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired,
@@ -2042,9 +2069,14 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val)
switch (val) {
case MDB_RTR_TYPE_DISABLED:
case MDB_RTR_TYPE_PERM:
+ br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM);
del_timer(&br->multicast_router_timer);
- /* fall through */
+ br->multicast_router = val;
+ err = 0;
+ break;
case MDB_RTR_TYPE_TEMP_QUERY:
+ if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY)
+ br_mc_router_state_change(br, false);
br->multicast_router = val;
err = 0;
break;
@@ -2184,6 +2216,18 @@ bool br_multicast_enabled(const struct net_device *dev)
}
EXPORT_SYMBOL_GPL(br_multicast_enabled);
+bool br_multicast_router(const struct net_device *dev)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ bool is_router;
+
+ spin_lock_bh(&br->multicast_lock);
+ is_router = br_multicast_is_router(br);
+ spin_unlock_bh(&br->multicast_lock);
+ return is_router;
+}
+EXPORT_SYMBOL_GPL(br_multicast_router);
+
int br_multicast_set_querier(struct net_bridge *br, unsigned long val)
{
unsigned long max_delay;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index de2152730809..fb61b6c79235 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -138,6 +138,7 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(1) /* IFLA_BRPORT_PROXYARP */
+ nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */
+ nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */
+ + nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
+ nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
@@ -152,6 +153,7 @@ static inline size_t br_port_info_size(void)
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */
#endif
+ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */
+ 0;
}
@@ -208,7 +210,10 @@ static int br_port_fill_attrs(struct sk_buff *skb,
p->topology_change_ack) ||
nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) ||
nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags &
- BR_VLAN_TUNNEL)))
+ BR_VLAN_TUNNEL)) ||
+ nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask) ||
+ nla_put_u8(skb, IFLA_BRPORT_NEIGH_SUPPRESS,
+ !!(p->flags & BR_NEIGH_SUPPRESS)))
return -EMSGSIZE;
timerval = br_timer_value(&p->message_age_timer);
@@ -637,6 +642,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 },
[IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 },
[IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 },
+ [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 },
};
/* Change the state of the port and notify spanning tree */
@@ -773,6 +779,20 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
return err;
}
#endif
+
+ if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) {
+ u16 fwd_mask = nla_get_u16(tb[IFLA_BRPORT_GROUP_FWD_MASK]);
+
+ if (fwd_mask & BR_GROUPFWD_MACPAUSE)
+ return -EINVAL;
+ p->group_fwd_mask = fwd_mask;
+ }
+
+ err = br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS,
+ BR_NEIGH_SUPPRESS);
+ if (err)
+ return err;
+
br_port_flags_change(p, old_flags ^ p->flags);
return 0;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index e870cfc85b14..fa0039f44818 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -36,7 +36,14 @@
/* Control of forwarding link local multicast */
#define BR_GROUPFWD_DEFAULT 0
/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */
-#define BR_GROUPFWD_RESTRICTED 0x0007u
+enum {
+ BR_GROUPFWD_STP = BIT(0),
+ BR_GROUPFWD_MACPAUSE = BIT(1),
+ BR_GROUPFWD_LACP = BIT(2),
+};
+
+#define BR_GROUPFWD_RESTRICTED (BR_GROUPFWD_STP | BR_GROUPFWD_MACPAUSE | \
+ BR_GROUPFWD_LACP)
/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */
#define BR_GROUPFWD_8021AD 0xB801u
@@ -268,6 +275,7 @@ struct net_bridge_port {
#ifdef CONFIG_NET_SWITCHDEV
int offload_fwd_mark;
#endif
+ u16 group_fwd_mask;
};
#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
@@ -396,6 +404,7 @@ struct net_bridge {
#ifdef CONFIG_NET_SWITCHDEV
int offload_fwd_mark;
#endif
+ bool neigh_suppress_enabled;
};
struct br_input_skb_cb {
@@ -558,7 +567,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
void br_port_carrier_check(struct net_bridge_port *p);
int br_add_bridge(struct net *net, const char *name);
int br_del_bridge(struct net *net, const char *name);
-int br_add_if(struct net_bridge *br, struct net_device *dev);
+int br_add_if(struct net_bridge *br, struct net_device *dev,
+ struct netlink_ext_ack *extack);
int br_del_if(struct net_bridge *br, struct net_device *dev);
int br_min_mtu(const struct net_bridge *br);
netdev_features_t br_features_recompute(struct net_bridge *br,
@@ -1130,4 +1140,11 @@ static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
}
#endif /* CONFIG_NET_SWITCHDEV */
+/* br_arp_nd_proxy.c */
+void br_recalculate_neigh_suppress_enabled(struct net_bridge *br);
+void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
+ u16 vid, struct net_bridge_port *p);
+void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br,
+ u16 vid, struct net_bridge_port *p, struct nd_msg *msg);
+struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m);
#endif
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 5d5d413a6cf8..0a1fa9ccd8b7 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -165,6 +165,23 @@ static int store_flush(struct net_bridge_port *p, unsigned long v)
}
static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush);
+static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf)
+{
+ return sprintf(buf, "%#x\n", p->group_fwd_mask);
+}
+
+static int store_group_fwd_mask(struct net_bridge_port *p,
+ unsigned long v)
+{
+ if (v & BR_GROUPFWD_MACPAUSE)
+ return -EINVAL;
+ p->group_fwd_mask = v;
+
+ return 0;
+}
+static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask,
+ store_group_fwd_mask);
+
BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE);
BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD);
BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
@@ -174,6 +191,7 @@ BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD);
BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD);
+BRPORT_ATTR_FLAG(neigh_suppress, BR_NEIGH_SUPPRESS);
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -223,6 +241,8 @@ static const struct brport_attribute *brport_attrs[] = {
&brport_attr_proxyarp_wifi,
&brport_attr_multicast_flood,
&brport_attr_broadcast_flood,
+ &brport_attr_group_fwd_mask,
+ &brport_attr_neigh_suppress,
NULL
};
diff --git a/net/can/af_can.c b/net/can/af_can.c
index ecd5c703d11e..003b2d6d655f 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -887,8 +887,8 @@ static int can_pernet_init(struct net *net)
if (IS_ENABLED(CONFIG_PROC_FS)) {
/* the statistics are updated every second (timer triggered) */
if (stats_timer) {
- setup_timer(&net->can.can_stattimer, can_stat_update,
- (unsigned long)net);
+ timer_setup(&net->can.can_stattimer, can_stat_update,
+ 0);
mod_timer(&net->can.can_stattimer,
round_jiffies(jiffies + HZ));
}
diff --git a/net/can/af_can.h b/net/can/af_can.h
index d0ef45bb2a72..eca6463c6213 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -113,6 +113,6 @@ struct s_pstats {
/* function prototypes for the CAN networklayer procfs (proc.c) */
void can_init_proc(struct net *net);
void can_remove_proc(struct net *net);
-void can_stat_update(unsigned long data);
+void can_stat_update(struct timer_list *t);
#endif /* AF_CAN_H */
diff --git a/net/can/proc.c b/net/can/proc.c
index 83045f00c63c..d979b3dc49a6 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -115,9 +115,9 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif,
return rate;
}
-void can_stat_update(unsigned long data)
+void can_stat_update(struct timer_list *t)
{
- struct net *net = (struct net *)data;
+ struct net *net = from_timer(net, t, can.can_stattimer);
struct s_stats *can_stats = net->can.can_stats;
unsigned long j = jiffies; /* snapshot */
@@ -221,7 +221,7 @@ static int can_stats_proc_show(struct seq_file *m, void *v)
seq_putc(m, '\n');
- if (net->can.can_stattimer.function == can_stat_update) {
+ if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) {
seq_printf(m, " %8ld %% total match ratio (RXMR)\n",
can_stats->total_rx_match_ratio);
@@ -291,7 +291,7 @@ static int can_reset_stats_proc_show(struct seq_file *m, void *v)
user_reset = 1;
- if (net->can.can_stattimer.function == can_stat_update) {
+ if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) {
seq_printf(m, "Scheduled statistic reset #%ld.\n",
can_pstats->stats_reset + 1);
} else {
diff --git a/net/core/datagram.c b/net/core/datagram.c
index f7fb7e3f2acf..0b7b4c22719e 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -188,7 +188,7 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
}
if (!skb->len) {
skb = skb_set_peeked(skb);
- if (unlikely(IS_ERR(skb))) {
+ if (IS_ERR(skb)) {
*err = PTR_ERR(skb);
return NULL;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 11596a302a26..24ac9083bc13 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
#include <linux/crash_dump.h>
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
+#include <linux/net_namespace.h>
#include "net-sysfs.h"
@@ -162,7 +163,6 @@ static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info);
static struct napi_struct *napi_by_id(unsigned int napi_id);
@@ -188,6 +188,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id);
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+static DEFINE_MUTEX(ifalias_mutex);
+
/* protects napi_hash addition/deletion and napi_gen_id */
static DEFINE_SPINLOCK(napi_hash_lock);
@@ -1265,29 +1267,53 @@ rollback:
*/
int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
{
- char *new_ifalias;
-
- ASSERT_RTNL();
+ struct dev_ifalias *new_alias = NULL;
if (len >= IFALIASZ)
return -EINVAL;
- if (!len) {
- kfree(dev->ifalias);
- dev->ifalias = NULL;
- return 0;
+ if (len) {
+ new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
+ if (!new_alias)
+ return -ENOMEM;
+
+ memcpy(new_alias->ifalias, alias, len);
+ new_alias->ifalias[len] = 0;
}
- new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL);
- if (!new_ifalias)
- return -ENOMEM;
- dev->ifalias = new_ifalias;
- memcpy(dev->ifalias, alias, len);
- dev->ifalias[len] = 0;
+ mutex_lock(&ifalias_mutex);
+ rcu_swap_protected(dev->ifalias, new_alias,
+ mutex_is_locked(&ifalias_mutex));
+ mutex_unlock(&ifalias_mutex);
+
+ if (new_alias)
+ kfree_rcu(new_alias, rcuhead);
return len;
}
+/**
+ * dev_get_alias - get ifalias of a device
+ * @dev: device
+ * @name: buffer to store name of ifalias
+ * @len: size of buffer
+ *
+ * get ifalias for a device. Caller must make sure dev cannot go
+ * away, e.g. rcu read lock or own a reference count to device.
+ */
+int dev_get_alias(const struct net_device *dev, char *name, size_t len)
+{
+ const struct dev_ifalias *alias;
+ int ret = 0;
+
+ rcu_read_lock();
+ alias = rcu_dereference(dev->ifalias);
+ if (alias)
+ ret = snprintf(name, len, "%s", alias->ifalias);
+ rcu_read_unlock();
+
+ return ret;
+}
/**
* netdev_features_change - device changes features
@@ -1312,10 +1338,11 @@ EXPORT_SYMBOL(netdev_features_change);
void netdev_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info.dev = dev,
+ };
- change_info.flags_changed = 0;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGE,
&change_info.info);
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
}
@@ -1536,9 +1563,10 @@ EXPORT_SYMBOL(dev_disable_lro);
static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- netdev_notifier_info_init(&info, dev);
return nb->notifier_call(nb, val, &info);
}
@@ -1663,11 +1691,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
*/
static int call_netdevice_notifiers_info(unsigned long val,
- struct net_device *dev,
struct netdev_notifier_info *info)
{
ASSERT_RTNL();
- netdev_notifier_info_init(info, dev);
return raw_notifier_call_chain(&netdev_chain, val, info);
}
@@ -1682,9 +1708,11 @@ static int call_netdevice_notifiers_info(unsigned long val,
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- struct netdev_notifier_info info;
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ };
- return call_netdevice_notifiers_info(val, dev, &info);
+ return call_netdevice_notifiers_info(val, &info);
}
EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -2012,6 +2040,7 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq)
return 0;
}
+EXPORT_SYMBOL(netdev_txq_to_tc);
#ifdef CONFIG_XPS
static DEFINE_MUTEX(xps_map_mutex);
@@ -3864,8 +3893,8 @@ drop:
static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct bpf_prog *xdp_prog)
{
+ u32 metalen, act = XDP_DROP;
struct xdp_buff xdp;
- u32 act = XDP_DROP;
void *orig_data;
int hlen, off;
u32 mac_len;
@@ -3876,8 +3905,25 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (skb_cloned(skb))
return XDP_PASS;
- if (skb_linearize(skb))
- goto do_drop;
+ /* XDP packets must be linear and must have sufficient headroom
+ * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
+ * native XDP provides, thus we need to do it here as well.
+ */
+ if (skb_is_nonlinear(skb) ||
+ skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+ int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
+ int troom = skb->tail + skb->data_len - skb->end;
+
+ /* In case we have to go down the path and also linearize,
+ * then lets do the pskb_expand_head() work just once here.
+ */
+ if (pskb_expand_head(skb,
+ hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
+ troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
+ goto do_drop;
+ if (troom > 0 && __skb_linearize(skb))
+ goto do_drop;
+ }
/* The XDP program wants to see the packet starting at the MAC
* header.
@@ -3885,6 +3931,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
mac_len = skb->data - skb_mac_header(skb);
hlen = skb_headlen(skb) + mac_len;
xdp.data = skb->data - mac_len;
+ xdp.data_meta = xdp.data;
xdp.data_end = xdp.data + hlen;
xdp.data_hard_start = skb->data - skb_headroom(skb);
orig_data = xdp.data;
@@ -3902,10 +3949,12 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
case XDP_REDIRECT:
case XDP_TX:
__skb_push(skb, mac_len);
- /* fall through */
+ break;
case XDP_PASS:
+ metalen = xdp.data - xdp.data_meta;
+ if (metalen)
+ skb_metadata_set(skb, metalen);
break;
-
default:
bpf_warn_invalid_xdp_action(act);
/* fall through */
@@ -4443,6 +4492,33 @@ out:
return ret;
}
+/**
+ * netif_receive_skb_core - special purpose version of netif_receive_skb
+ * @skb: buffer to process
+ *
+ * More direct receive version of netif_receive_skb(). It should
+ * only be used by callers that have a need to skip RPS and Generic XDP.
+ * Caller must also take care of handling if (page_is_)pfmemalloc.
+ *
+ * This function may only be called from softirq context and interrupts
+ * should be enabled.
+ *
+ * Return values (usually ignored):
+ * NET_RX_SUCCESS: no congestion
+ * NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb_core(struct sk_buff *skb)
+{
+ int ret;
+
+ rcu_read_lock();
+ ret = __netif_receive_skb_core(skb, false);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(netif_receive_skb_core);
+
static int __netif_receive_skb(struct sk_buff *skb)
{
int ret;
@@ -4695,6 +4771,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb)
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
diffs |= p->vlan_tci ^ skb->vlan_tci;
diffs |= skb_metadata_dst_cmp(p, skb);
+ diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
diffs |= compare_ether_header(skb_mac_header(p),
skb_mac_header(skb));
@@ -6228,9 +6305,19 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
- void *upper_priv, void *upper_info)
-{
- struct netdev_notifier_changeupper_info changeupper_info;
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ .extack = extack,
+ },
+ .upper_dev = upper_dev,
+ .master = master,
+ .linking = true,
+ .upper_info = upper_info,
+ };
int ret = 0;
ASSERT_RTNL();
@@ -6248,12 +6335,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (master && netdev_master_upper_dev_get(dev))
return -EBUSY;
- changeupper_info.upper_dev = upper_dev;
- changeupper_info.master = master;
- changeupper_info.linking = true;
- changeupper_info.upper_info = upper_info;
-
- ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
@@ -6264,7 +6346,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (ret)
return ret;
- ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
ret = notifier_to_errno(ret);
if (ret)
@@ -6289,9 +6371,11 @@ rollback:
* returns zero.
*/
int netdev_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ struct netlink_ext_ack *extack)
{
- return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
+ return __netdev_upper_dev_link(dev, upper_dev, false,
+ NULL, NULL, extack);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
@@ -6310,10 +6394,11 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
*/
int netdev_master_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev,
- void *upper_priv, void *upper_info)
+ void *upper_priv, void *upper_info,
+ struct netlink_ext_ack *extack)
{
return __netdev_upper_dev_link(dev, upper_dev, true,
- upper_priv, upper_info);
+ upper_priv, upper_info, extack);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
@@ -6328,20 +6413,24 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link);
void netdev_upper_dev_unlink(struct net_device *dev,
struct net_device *upper_dev)
{
- struct netdev_notifier_changeupper_info changeupper_info;
+ struct netdev_notifier_changeupper_info changeupper_info = {
+ .info = {
+ .dev = dev,
+ },
+ .upper_dev = upper_dev,
+ .linking = false,
+ };
ASSERT_RTNL();
- changeupper_info.upper_dev = upper_dev;
changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
- changeupper_info.linking = false;
- call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER,
&changeupper_info.info);
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
- call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGEUPPER,
&changeupper_info.info);
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
@@ -6357,11 +6446,13 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink);
void netdev_bonding_info_change(struct net_device *dev,
struct netdev_bonding_info *bonding_info)
{
- struct netdev_notifier_bonding_info info;
+ struct netdev_notifier_bonding_info info = {
+ .info.dev = dev,
+ };
memcpy(&info.bonding_info, bonding_info,
sizeof(struct netdev_bonding_info));
- call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+ call_netdevice_notifiers_info(NETDEV_BONDING_INFO,
&info.info);
}
EXPORT_SYMBOL(netdev_bonding_info_change);
@@ -6487,11 +6578,13 @@ EXPORT_SYMBOL(dev_get_nest_level);
void netdev_lower_state_changed(struct net_device *lower_dev,
void *lower_state_info)
{
- struct netdev_notifier_changelowerstate_info changelowerstate_info;
+ struct netdev_notifier_changelowerstate_info changelowerstate_info = {
+ .info.dev = lower_dev,
+ };
ASSERT_RTNL();
changelowerstate_info.lower_state_info = lower_state_info;
- call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
+ call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE,
&changelowerstate_info.info);
}
EXPORT_SYMBOL(netdev_lower_state_changed);
@@ -6782,11 +6875,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
if (dev->flags & IFF_UP &&
(changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) {
- struct netdev_notifier_change_info change_info;
+ struct netdev_notifier_change_info change_info = {
+ .info = {
+ .dev = dev,
+ },
+ .flags_changed = changes,
+ };
- change_info.flags_changed = changes;
- call_netdevice_notifiers_info(NETDEV_CHANGE, dev,
- &change_info.info);
+ call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info);
}
}
@@ -7157,7 +7253,7 @@ static void rollback_registered_many(struct list_head *head)
if (!dev->rtnl_link_ops ||
dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0,
- GFP_KERNEL);
+ GFP_KERNEL, NULL);
/*
* Flush the unicast and multicast chains
@@ -7994,7 +8090,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
unsigned int txqs, unsigned int rxqs)
{
struct net_device *dev;
- size_t alloc_size;
+ unsigned int alloc_size;
struct net_device *p;
BUG_ON(strlen(name) >= sizeof(dev->name));
@@ -8244,7 +8340,7 @@ EXPORT_SYMBOL(unregister_netdev);
int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
{
- int err;
+ int err, new_nsid;
ASSERT_RTNL();
@@ -8300,7 +8396,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
rcu_barrier();
call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
- rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net)
+ new_nsid = peernet2id_alloc(dev_net(dev), net);
+ else
+ new_nsid = peernet2id(dev_net(dev), net);
+ rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid);
/*
* Flush the unicast and multicast chains
diff --git a/net/core/dst.c b/net/core/dst.c
index a6c47da7d0f8..662a2d4a3d19 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -322,3 +322,19 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)
return md_dst;
}
EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
+
+void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
+{
+#ifdef CONFIG_DST_CACHE
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
+
+ if (one_md_dst->type == METADATA_IP_TUNNEL)
+ dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
+ }
+#endif
+ free_percpu(md_dst);
+}
+EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 9a9a3d77e327..f8fcf450a36e 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -403,6 +403,22 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data)
return 0;
}
+/* Given two link masks, AND them together and save the result in dst. */
+void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst,
+ struct ethtool_link_ksettings *src)
+{
+ unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS);
+ unsigned int idx = 0;
+
+ for (; idx < size; idx++) {
+ dst->link_modes.supported[idx] &=
+ src->link_modes.supported[idx];
+ dst->link_modes.advertising[idx] &=
+ src->link_modes.advertising[idx];
+ }
+}
+EXPORT_SYMBOL(ethtool_intersect_link_masks);
+
void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst,
u32 legacy_u32)
{
diff --git a/net/core/filter.c b/net/core/filter.c
index aa0265997f93..b79c44cc8145 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -43,6 +43,7 @@
#include <linux/timer.h>
#include <linux/uaccess.h>
#include <asm/unaligned.h>
+#include <asm/cmpxchg.h>
#include <linux/filter.h>
#include <linux/ratelimit.h>
#include <linux/seccomp.h>
@@ -1406,7 +1407,7 @@ static inline int bpf_try_make_writable(struct sk_buff *skb,
{
int err = __bpf_try_make_writable(skb, write_len);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return err;
}
@@ -1967,7 +1968,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
bpf_pull_mac_rcsum(skb);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -1989,7 +1990,7 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
ret = skb_vlan_pop(skb);
bpf_pull_mac_rcsum(skb);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2183,7 +2184,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
* need to be verified first.
*/
ret = bpf_skb_proto_xlat(skb, proto);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2308,7 +2309,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
bpf_skb_net_grow(skb, len_diff_abs);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2399,7 +2400,7 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
skb_gso_reset(skb);
}
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return ret;
}
@@ -2439,7 +2440,7 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
skb_reset_mac_header(skb);
}
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
return 0;
}
@@ -2452,14 +2453,26 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING,
};
+static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
+{
+ return xdp_data_meta_unsupported(xdp) ? 0 :
+ xdp->data - xdp->data_meta;
+}
+
BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
{
+ unsigned long metalen = xdp_get_metalen(xdp);
+ void *data_start = xdp->data_hard_start + metalen;
void *data = xdp->data + offset;
- if (unlikely(data < xdp->data_hard_start ||
+ if (unlikely(data < data_start ||
data > xdp->data_end - ETH_HLEN))
return -EINVAL;
+ if (metalen)
+ memmove(xdp->data_meta + offset,
+ xdp->data_meta, metalen);
+ xdp->data_meta += offset;
xdp->data = data;
return 0;
@@ -2473,6 +2486,33 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
.arg2_type = ARG_ANYTHING,
};
+BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
+{
+ void *meta = xdp->data_meta + offset;
+ unsigned long metalen = xdp->data - meta;
+
+ if (xdp_data_meta_unsupported(xdp))
+ return -ENOTSUPP;
+ if (unlikely(meta < xdp->data_hard_start ||
+ meta > xdp->data))
+ return -EINVAL;
+ if (unlikely((metalen & (sizeof(__u32) - 1)) ||
+ (metalen > 32)))
+ return -EACCES;
+
+ xdp->data_meta = meta;
+
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
+ .func = bpf_xdp_adjust_meta,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
static int __bpf_tx_xdp(struct net_device *dev,
struct bpf_map *map,
struct xdp_buff *xdp,
@@ -2487,10 +2527,36 @@ static int __bpf_tx_xdp(struct net_device *dev,
err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
if (err)
return err;
- if (map)
+ dev->netdev_ops->ndo_xdp_flush(dev);
+ return 0;
+}
+
+static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
+ struct bpf_map *map,
+ struct xdp_buff *xdp,
+ u32 index)
+{
+ int err;
+
+ if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+ struct net_device *dev = fwd;
+
+ if (!dev->netdev_ops->ndo_xdp_xmit)
+ return -EOPNOTSUPP;
+
+ err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
+ if (err)
+ return err;
__dev_map_insert_ctx(map, index);
- else
- dev->netdev_ops->ndo_xdp_flush(dev);
+
+ } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
+ struct bpf_cpu_map_entry *rcpu = fwd;
+
+ err = cpu_map_enqueue(rcpu, xdp, dev_rx);
+ if (err)
+ return err;
+ __cpu_map_insert_ctx(map, index);
+ }
return 0;
}
@@ -2500,11 +2566,33 @@ void xdp_do_flush_map(void)
struct bpf_map *map = ri->map_to_flush;
ri->map_to_flush = NULL;
- if (map)
- __dev_map_flush(map);
+ if (map) {
+ switch (map->map_type) {
+ case BPF_MAP_TYPE_DEVMAP:
+ __dev_map_flush(map);
+ break;
+ case BPF_MAP_TYPE_CPUMAP:
+ __cpu_map_flush(map);
+ break;
+ default:
+ break;
+ }
+ }
}
EXPORT_SYMBOL_GPL(xdp_do_flush_map);
+static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
+{
+ switch (map->map_type) {
+ case BPF_MAP_TYPE_DEVMAP:
+ return __dev_map_lookup_elem(map, index);
+ case BPF_MAP_TYPE_CPUMAP:
+ return __cpu_map_lookup_elem(map, index);
+ default:
+ return NULL;
+ }
+}
+
static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
unsigned long aux)
{
@@ -2517,8 +2605,8 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
unsigned long map_owner = ri->map_owner;
struct bpf_map *map = ri->map;
- struct net_device *fwd = NULL;
u32 index = ri->ifindex;
+ void *fwd = NULL;
int err;
ri->ifindex = 0;
@@ -2531,7 +2619,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
goto err;
}
- fwd = __dev_map_lookup_elem(map, index);
+ fwd = __xdp_map_lookup_elem(map, index);
if (!fwd) {
err = -EINVAL;
goto err;
@@ -2539,7 +2627,7 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
if (ri->map_to_flush && ri->map_to_flush != map)
xdp_do_flush_map();
- err = __bpf_tx_xdp(fwd, map, xdp, index);
+ err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
if (unlikely(err))
goto err;
@@ -2581,54 +2669,88 @@ err:
}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
-int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
- struct bpf_prog *xdp_prog)
+static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
+{
+ unsigned int len;
+
+ if (unlikely(!(fwd->flags & IFF_UP)))
+ return -ENETDOWN;
+
+ len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
+ if (skb->len > len)
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
{
struct redirect_info *ri = this_cpu_ptr(&redirect_info);
unsigned long map_owner = ri->map_owner;
struct bpf_map *map = ri->map;
struct net_device *fwd = NULL;
u32 index = ri->ifindex;
- unsigned int len;
int err = 0;
ri->ifindex = 0;
ri->map = NULL;
ri->map_owner = 0;
- if (map) {
- if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
- err = -EFAULT;
- map = NULL;
- goto err;
- }
- fwd = __dev_map_lookup_elem(map, index);
- } else {
- fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
+ err = -EFAULT;
+ map = NULL;
+ goto err;
}
+ fwd = __xdp_map_lookup_elem(map, index);
if (unlikely(!fwd)) {
err = -EINVAL;
goto err;
}
- if (unlikely(!(fwd->flags & IFF_UP))) {
- err = -ENETDOWN;
+ if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+ if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+ goto err;
+ skb->dev = fwd;
+ } else {
+ /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
+ err = -EBADRQC;
goto err;
}
- len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
- if (skb->len > len) {
- err = -EMSGSIZE;
+ _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+ return 0;
+err:
+ _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+ return err;
+}
+
+int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
+ struct bpf_prog *xdp_prog)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ u32 index = ri->ifindex;
+ struct net_device *fwd;
+ int err = 0;
+
+ if (ri->map)
+ return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
+
+ ri->ifindex = 0;
+ fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ if (unlikely(!fwd)) {
+ err = -EINVAL;
goto err;
}
+ if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+ goto err;
+
skb->dev = fwd;
- map ? _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index)
- : _trace_xdp_redirect(dev, xdp_prog, index);
+ _trace_xdp_redirect(dev, xdp_prog, index);
return 0;
err:
- map ? _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err)
- : _trace_xdp_redirect_err(dev, xdp_prog, index, err);
+ _trace_xdp_redirect_err(dev, xdp_prog, index, err);
return err;
}
EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
@@ -2697,7 +2819,8 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
- func == bpf_xdp_adjust_head)
+ func == bpf_xdp_adjust_head ||
+ func == bpf_xdp_adjust_meta)
return true;
return false;
@@ -2948,14 +3071,15 @@ static const struct bpf_func_proto *
bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
{
if (!md_dst) {
- /* Race is not possible, since it's called from verifier
- * that is holding verifier mutex.
- */
- md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
- METADATA_IP_TUNNEL,
- GFP_KERNEL);
- if (!md_dst)
+ struct metadata_dst __percpu *tmp;
+
+ tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
+ METADATA_IP_TUNNEL,
+ GFP_KERNEL);
+ if (!tmp)
return NULL;
+ if (cmpxchg(&md_dst, NULL, tmp))
+ metadata_dst_free_percpu(tmp);
}
switch (which) {
@@ -3150,7 +3274,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
static const struct bpf_func_proto bpf_setsockopt_proto = {
.func = bpf_setsockopt,
- .gpl_only = true,
+ .gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
@@ -3159,6 +3283,48 @@ static const struct bpf_func_proto bpf_setsockopt_proto = {
.arg5_type = ARG_CONST_SIZE,
};
+BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
+ int, level, int, optname, char *, optval, int, optlen)
+{
+ struct sock *sk = bpf_sock->sk;
+ int ret = 0;
+
+ if (!sk_fullsock(sk))
+ goto err_clear;
+
+#ifdef CONFIG_INET
+ if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
+ if (optname == TCP_CONGESTION) {
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (!icsk->icsk_ca_ops || optlen <= 1)
+ goto err_clear;
+ strncpy(optval, icsk->icsk_ca_ops->name, optlen);
+ optval[optlen - 1] = 0;
+ } else {
+ goto err_clear;
+ }
+ } else {
+ goto err_clear;
+ }
+ return ret;
+#endif
+err_clear:
+ memset(optval, 0, optlen);
+ return -EINVAL;
+}
+
+static const struct bpf_func_proto bpf_getsockopt_proto = {
+ .func = bpf_getsockopt,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+ .arg4_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg5_type = ARG_CONST_SIZE,
+};
+
static const struct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id)
{
@@ -3293,6 +3459,8 @@ xdp_func_proto(enum bpf_func_id func_id)
return &bpf_get_smp_processor_id_proto;
case BPF_FUNC_xdp_adjust_head:
return &bpf_xdp_adjust_head_proto;
+ case BPF_FUNC_xdp_adjust_meta:
+ return &bpf_xdp_adjust_meta_proto;
case BPF_FUNC_redirect:
return &bpf_xdp_redirect_proto;
case BPF_FUNC_redirect_map:
@@ -3335,6 +3503,8 @@ static const struct bpf_func_proto *
switch (func_id) {
case BPF_FUNC_setsockopt:
return &bpf_setsockopt_proto;
+ case BPF_FUNC_getsockopt:
+ return &bpf_getsockopt_proto;
case BPF_FUNC_sock_map_update:
return &bpf_sock_map_update_proto;
default:
@@ -3423,6 +3593,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
case bpf_ctx_range(struct __sk_buff, data):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
if (size != size_default)
return false;
@@ -3449,6 +3620,7 @@ static bool sk_filter_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range(struct __sk_buff, data):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, data_end):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
return false;
@@ -3473,6 +3645,7 @@ static bool lwt_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_classid):
case bpf_ctx_range_till(struct __sk_buff, family, local_port):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
return false;
}
@@ -3591,6 +3764,9 @@ static bool tc_cls_act_is_valid_access(int off, int size,
case bpf_ctx_range(struct __sk_buff, data):
info->reg_type = PTR_TO_PACKET;
break;
+ case bpf_ctx_range(struct __sk_buff, data_meta):
+ info->reg_type = PTR_TO_PACKET_META;
+ break;
case bpf_ctx_range(struct __sk_buff, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
@@ -3601,6 +3777,25 @@ static bool tc_cls_act_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, info);
}
+static bool
+tc_cls_act_is_valid_access_analyzer(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+{
+ switch (off) {
+ case offsetof(struct sk_buff, len):
+ return true;
+ case offsetof(struct sk_buff, data):
+ info->reg_type = PTR_TO_PACKET;
+ return true;
+ case offsetof(struct sk_buff, cb) +
+ offsetof(struct bpf_skb_data_end, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ return true;
+ }
+ return false;
+}
+
static bool __is_valid_xdp_access(int off, int size)
{
if (off < 0 || off >= sizeof(struct xdp_md))
@@ -3624,6 +3819,9 @@ static bool xdp_is_valid_access(int off, int size,
case offsetof(struct xdp_md, data):
info->reg_type = PTR_TO_PACKET;
break;
+ case offsetof(struct xdp_md, data_meta):
+ info->reg_type = PTR_TO_PACKET_META;
+ break;
case offsetof(struct xdp_md, data_end):
info->reg_type = PTR_TO_PACKET_END;
break;
@@ -3632,6 +3830,21 @@ static bool xdp_is_valid_access(int off, int size,
return __is_valid_xdp_access(off, size);
}
+static bool xdp_is_valid_access_analyzer(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+{
+ switch (off) {
+ case offsetof(struct xdp_buff, data):
+ info->reg_type = PTR_TO_PACKET;
+ return true;
+ case offsetof(struct xdp_buff, data_end):
+ info->reg_type = PTR_TO_PACKET_END;
+ return true;
+ }
+ return false;
+}
+
void bpf_warn_invalid_xdp_action(u32 act)
{
const u32 act_max = XDP_REDIRECT;
@@ -3682,6 +3895,12 @@ static bool sk_skb_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info)
{
+ switch (off) {
+ case bpf_ctx_range(struct __sk_buff, tc_classid):
+ case bpf_ctx_range(struct __sk_buff, data_meta):
+ return false;
+ }
+
if (type == BPF_WRITE) {
switch (off) {
case bpf_ctx_range(struct __sk_buff, tc_index):
@@ -3694,7 +3913,6 @@ static bool sk_skb_is_valid_access(int off, int size,
switch (off) {
case bpf_ctx_range(struct __sk_buff, mark):
- case bpf_ctx_range(struct __sk_buff, tc_classid):
return false;
case bpf_ctx_range(struct __sk_buff, data):
info->reg_type = PTR_TO_PACKET;
@@ -3852,6 +4070,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
offsetof(struct sk_buff, data));
break;
+ case offsetof(struct __sk_buff, data_meta):
+ off = si->off;
+ off -= offsetof(struct __sk_buff, data_meta);
+ off += offsetof(struct sk_buff, cb);
+ off += offsetof(struct bpf_skb_data_end, data_meta);
+ *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
+ si->src_reg, off);
+ break;
+
case offsetof(struct __sk_buff, data_end):
off = si->off;
off -= offsetof(struct __sk_buff, data_end);
@@ -4100,6 +4327,11 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, data));
break;
+ case offsetof(struct xdp_md, data_meta):
+ *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
+ si->dst_reg, si->src_reg,
+ offsetof(struct xdp_buff, data_meta));
+ break;
case offsetof(struct xdp_md, data_end):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
si->dst_reg, si->src_reg,
@@ -4243,68 +4475,103 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
return insn - insn_buf;
}
-const struct bpf_verifier_ops sk_filter_prog_ops = {
+const struct bpf_verifier_ops sk_filter_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
};
-const struct bpf_verifier_ops tc_cls_act_prog_ops = {
+const struct bpf_prog_ops sk_filter_prog_ops = {
+};
+
+const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
.get_func_proto = tc_cls_act_func_proto,
.is_valid_access = tc_cls_act_is_valid_access,
.convert_ctx_access = tc_cls_act_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+};
+
+const struct bpf_verifier_ops tc_cls_act_analyzer_ops = {
+ .is_valid_access = tc_cls_act_is_valid_access_analyzer,
+};
+
+const struct bpf_prog_ops tc_cls_act_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops xdp_prog_ops = {
+const struct bpf_verifier_ops xdp_verifier_ops = {
.get_func_proto = xdp_func_proto,
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
+};
+
+const struct bpf_verifier_ops xdp_analyzer_ops = {
+ .is_valid_access = xdp_is_valid_access_analyzer,
+};
+
+const struct bpf_prog_ops xdp_prog_ops = {
.test_run = bpf_prog_test_run_xdp,
};
-const struct bpf_verifier_ops cg_skb_prog_ops = {
+const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = sk_filter_func_proto,
.is_valid_access = sk_filter_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops cg_skb_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops lwt_inout_prog_ops = {
+const struct bpf_verifier_ops lwt_inout_verifier_ops = {
.get_func_proto = lwt_inout_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
+};
+
+const struct bpf_prog_ops lwt_inout_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops lwt_xmit_prog_ops = {
+const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
.get_func_proto = lwt_xmit_func_proto,
.is_valid_access = lwt_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
.gen_prologue = tc_cls_act_prologue,
+};
+
+const struct bpf_prog_ops lwt_xmit_prog_ops = {
.test_run = bpf_prog_test_run_skb,
};
-const struct bpf_verifier_ops cg_sock_prog_ops = {
+const struct bpf_verifier_ops cg_sock_verifier_ops = {
.get_func_proto = sock_filter_func_proto,
.is_valid_access = sock_filter_is_valid_access,
.convert_ctx_access = sock_filter_convert_ctx_access,
};
-const struct bpf_verifier_ops sock_ops_prog_ops = {
+const struct bpf_prog_ops cg_sock_prog_ops = {
+};
+
+const struct bpf_verifier_ops sock_ops_verifier_ops = {
.get_func_proto = sock_ops_func_proto,
.is_valid_access = sock_ops_is_valid_access,
.convert_ctx_access = sock_ops_convert_ctx_access,
};
-const struct bpf_verifier_ops sk_skb_prog_ops = {
+const struct bpf_prog_ops sock_ops_prog_ops = {
+};
+
+const struct bpf_verifier_ops sk_skb_verifier_ops = {
.get_func_proto = sk_skb_func_proto,
.is_valid_access = sk_skb_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
.gen_prologue = sk_skb_prologue,
};
+const struct bpf_prog_ops sk_skb_prog_ops = {
+};
+
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 0a977373d003..1f5caafb4492 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -5,6 +5,7 @@
#include <linux/ipv6.h>
#include <linux/if_vlan.h>
#include <net/dsa.h>
+#include <net/dst_metadata.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/gre.h>
@@ -115,6 +116,102 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
}
EXPORT_SYMBOL(__skb_flow_get_ports);
+static void
+skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct flow_dissector_key_control *ctrl;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL))
+ return;
+
+ ctrl = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL,
+ target_container);
+ ctrl->addr_type = type;
+}
+
+static void
+__skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+{
+ struct ip_tunnel_info *info;
+ struct ip_tunnel_key *key;
+
+ /* A quick check to see if there might be something to do. */
+ if (!dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_CONTROL) &&
+ !dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS))
+ return;
+
+ info = skb_tunnel_info(skb);
+ if (!info)
+ return;
+
+ key = &info->key;
+
+ switch (ip_tunnel_info_af(info)) {
+ case AF_INET:
+ skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ flow_dissector,
+ target_container);
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+ struct flow_dissector_key_ipv4_addrs *ipv4;
+
+ ipv4 = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+ target_container);
+ ipv4->src = key->u.ipv4.src;
+ ipv4->dst = key->u.ipv4.dst;
+ }
+ break;
+ case AF_INET6:
+ skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ flow_dissector,
+ target_container);
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
+ struct flow_dissector_key_ipv6_addrs *ipv6;
+
+ ipv6 = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
+ target_container);
+ ipv6->src = key->u.ipv6.src;
+ ipv6->dst = key->u.ipv6.dst;
+ }
+ break;
+ }
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_dissector_key_keyid *keyid;
+
+ keyid = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_KEYID,
+ target_container);
+ keyid->keyid = tunnel_id_to_key32(key->tun_id);
+ }
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+ struct flow_dissector_key_ports *tp;
+
+ tp = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_ENC_PORTS,
+ target_container);
+ tp->src = key->tp_src;
+ tp->dst = key->tp_dst;
+ }
+}
+
static enum flow_dissect_ret
__skb_flow_dissect_mpls(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
@@ -478,6 +575,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
FLOW_DISSECTOR_KEY_BASIC,
target_container);
+ __skb_flow_dissect_tunnel_info(skb, flow_dissector,
+ target_container);
+
if (dissector_uses_key(flow_dissector,
FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
struct ethhdr *eth = eth_hdr(skb);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 1307731ddfe4..e7e626fb87bb 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -51,7 +51,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
*/
preempt_disable();
rcu_read_lock();
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
ret = bpf_prog_run_save_cb(lwt->prog, skb);
rcu_read_unlock();
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 16a1a4c4eb57..6ea3a1a7f36a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -457,7 +457,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
const void *pkey)
{
struct neighbour *n;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val;
struct neigh_hash_table *nht;
@@ -488,7 +488,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref)
{
u32 hash_val;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
int error;
struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
struct neigh_hash_table *nht;
@@ -572,7 +572,7 @@ out_neigh_release:
}
EXPORT_SYMBOL(__neigh_create);
-static u32 pneigh_hash(const void *pkey, int key_len)
+static u32 pneigh_hash(const void *pkey, unsigned int key_len)
{
u32 hash_val = *(u32 *)(pkey + key_len - 4);
hash_val ^= (hash_val >> 16);
@@ -585,7 +585,7 @@ static u32 pneigh_hash(const void *pkey, int key_len)
static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
struct net *net,
const void *pkey,
- int key_len,
+ unsigned int key_len,
struct net_device *dev)
{
while (n) {
@@ -601,7 +601,7 @@ static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
struct net *net, const void *pkey, struct net_device *dev)
{
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val = pneigh_hash(pkey, key_len);
return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
@@ -614,7 +614,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
struct net_device *dev, int creat)
{
struct pneigh_entry *n;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val = pneigh_hash(pkey, key_len);
read_lock_bh(&tbl->lock);
@@ -659,7 +659,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
struct net_device *dev)
{
struct pneigh_entry *n, **np;
- int key_len = tbl->key_len;
+ unsigned int key_len = tbl->key_len;
u32 hash_val = pneigh_hash(pkey, key_len);
write_lock_bh(&tbl->lock);
@@ -1662,7 +1662,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tbl == NULL)
return -EAFNOSUPPORT;
- if (nla_len(dst_attr) < tbl->key_len)
+ if (nla_len(dst_attr) < (int)tbl->key_len)
goto out;
if (ndm->ndm_flags & NTF_PROXY) {
@@ -1730,7 +1730,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tbl == NULL)
return -EAFNOSUPPORT;
- if (nla_len(tb[NDA_DST]) < tbl->key_len)
+ if (nla_len(tb[NDA_DST]) < (int)tbl->key_len)
goto out;
dst = nla_data(tb[NDA_DST]);
lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 927a6dcbad96..51d5836d8fb9 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -391,10 +391,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
if (len > 0 && buf[len - 1] == '\n')
--count;
- if (!rtnl_trylock())
- return restart_syscall();
ret = dev_set_alias(netdev, buf, count);
- rtnl_unlock();
return ret < 0 ? ret : len;
}
@@ -403,13 +400,12 @@ static ssize_t ifalias_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
const struct net_device *netdev = to_net_dev(dev);
+ char tmp[IFALIASZ];
ssize_t ret = 0;
- if (!rtnl_trylock())
- return restart_syscall();
- if (netdev->ifalias)
- ret = sprintf(buf, "%s\n", netdev->ifalias);
- rtnl_unlock();
+ ret = dev_get_alias(netdev, tmp, sizeof(tmp));
+ if (ret > 0)
+ ret = sprintf(buf, "%s\n", tmp);
return ret;
}
static DEVICE_ATTR_RW(ifalias);
@@ -1488,7 +1484,10 @@ static void netdev_release(struct device *d)
BUG_ON(dev->reg_state != NETREG_RELEASED);
- kfree(dev->ifalias);
+ /* no need to wait for rcu grace period:
+ * device is dead and about to be freed.
+ */
+ kfree(rcu_access_pointer(dev->ifalias));
netdev_freemem(dev);
}
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 1132820c8e62..f4e4fa2db505 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -31,6 +31,7 @@
#include <trace/events/napi.h>
#include <trace/events/sock.h>
#include <trace/events/udp.h>
+#include <trace/events/tcp.h>
#include <trace/events/fib.h>
#include <trace/events/qdisc.h>
#if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5ace48926b19..04680a53c8dd 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -453,7 +453,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
{
const struct rtnl_af_ops *ops;
- list_for_each_entry(ops, &rtnl_af_ops, list) {
+ list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
if (ops->family == family)
return ops;
}
@@ -470,32 +470,22 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family)
void rtnl_af_register(struct rtnl_af_ops *ops)
{
rtnl_lock();
- list_add_tail(&ops->list, &rtnl_af_ops);
+ list_add_tail_rcu(&ops->list, &rtnl_af_ops);
rtnl_unlock();
}
EXPORT_SYMBOL_GPL(rtnl_af_register);
/**
- * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
- * @ops: struct rtnl_af_ops * to unregister
- *
- * The caller must hold the rtnl_mutex.
- */
-void __rtnl_af_unregister(struct rtnl_af_ops *ops)
-{
- list_del(&ops->list);
-}
-EXPORT_SYMBOL_GPL(__rtnl_af_unregister);
-
-/**
* rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink.
* @ops: struct rtnl_af_ops * to unregister
*/
void rtnl_af_unregister(struct rtnl_af_ops *ops)
{
rtnl_lock();
- __rtnl_af_unregister(ops);
+ list_del_rcu(&ops->list);
rtnl_unlock();
+
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(rtnl_af_unregister);
@@ -508,13 +498,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
/* IFLA_AF_SPEC */
size = nla_total_size(sizeof(struct nlattr));
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->get_link_af_size) {
/* AF_* + nested data */
size += nla_total_size(sizeof(struct nlattr)) +
af_ops->get_link_af_size(dev, ext_filter_mask);
}
}
+ rcu_read_unlock();
return size;
}
@@ -522,11 +514,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev,
static bool rtnl_have_link_slave_info(const struct net_device *dev)
{
struct net_device *master_dev;
+ bool ret = false;
- master_dev = netdev_master_upper_dev_get((struct net_device *) dev);
+ rcu_read_lock();
+
+ master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev);
if (master_dev && master_dev->rtnl_link_ops)
- return true;
- return false;
+ ret = true;
+ rcu_read_unlock();
+ return ret;
}
static int rtnl_link_slave_info_fill(struct sk_buff *skb,
@@ -923,6 +919,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */
+ rtnl_xdp_size() /* IFLA_XDP */
+ nla_total_size(4) /* IFLA_EVENT */
+ + nla_total_size(4) /* IFLA_NEW_NETNSID */
+ nla_total_size(1); /* IFLA_PROTO_DOWN */
}
@@ -1211,6 +1208,36 @@ nla_put_vfinfo_failure:
return -EMSGSIZE;
}
+static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
+ struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ struct nlattr *vfinfo;
+ int i, num_vfs;
+
+ if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0))
+ return 0;
+
+ num_vfs = dev_num_vf(dev->dev.parent);
+ if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs))
+ return -EMSGSIZE;
+
+ if (!dev->netdev_ops->ndo_get_vf_config)
+ return 0;
+
+ vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+ if (!vfinfo)
+ return -EMSGSIZE;
+
+ for (i = 0; i < num_vfs; i++) {
+ if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
+ return -EMSGSIZE;
+ }
+
+ nla_nest_end(skb, vfinfo);
+ return 0;
+}
+
static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev)
{
struct rtnl_link_ifmap map;
@@ -1307,16 +1334,106 @@ static u32 rtnl_get_event(unsigned long event)
return rtnl_event_type;
}
+static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
+{
+ const struct net_device *upper_dev;
+ int ret = 0;
+
+ rcu_read_lock();
+
+ upper_dev = netdev_master_upper_dev_get_rcu(dev);
+ if (upper_dev)
+ ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex);
+
+ rcu_read_unlock();
+ return ret;
+}
+
+static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev)
+{
+ int ifindex = dev_get_iflink(dev);
+
+ if (dev->ifindex == ifindex)
+ return 0;
+
+ return nla_put_u32(skb, IFLA_LINK, ifindex);
+}
+
+static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ char buf[IFALIASZ];
+ int ret;
+
+ ret = dev_get_alias(dev, buf, sizeof(buf));
+ return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0;
+}
+
+static int rtnl_fill_link_netnsid(struct sk_buff *skb,
+ const struct net_device *dev)
+{
+ if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) {
+ struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
+
+ if (!net_eq(dev_net(dev), link_net)) {
+ int id = peernet2id_alloc(dev_net(dev), link_net);
+
+ if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
+ return -EMSGSIZE;
+ }
+ }
+
+ return 0;
+}
+
+static int rtnl_fill_link_af(struct sk_buff *skb,
+ const struct net_device *dev,
+ u32 ext_filter_mask)
+{
+ const struct rtnl_af_ops *af_ops;
+ struct nlattr *af_spec;
+
+ af_spec = nla_nest_start(skb, IFLA_AF_SPEC);
+ if (!af_spec)
+ return -EMSGSIZE;
+
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
+ struct nlattr *af;
+ int err;
+
+ if (!af_ops->fill_link_af)
+ continue;
+
+ af = nla_nest_start(skb, af_ops->family);
+ if (!af)
+ return -EMSGSIZE;
+
+ err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
+ /*
+ * Caller may return ENODATA to indicate that there
+ * was no data to be dumped. This is not an error, it
+ * means we should trim the attribute header and
+ * continue.
+ */
+ if (err == -ENODATA)
+ nla_nest_cancel(skb, af);
+ else if (err < 0)
+ return -EMSGSIZE;
+
+ nla_nest_end(skb, af);
+ }
+
+ nla_nest_end(skb, af_spec);
+ return 0;
+}
+
static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask,
- u32 event)
+ u32 event, int *new_nsid)
{
struct ifinfomsg *ifm;
struct nlmsghdr *nlh;
- struct nlattr *af_spec;
- struct rtnl_af_ops *af_ops;
- struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
ASSERT_RTNL();
nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags);
@@ -1345,15 +1462,12 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif
- (dev->ifindex != dev_get_iflink(dev) &&
- nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
- (upper_dev &&
- nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) ||
+ nla_put_iflink(skb, dev) ||
+ put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
(dev->qdisc &&
nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) ||
- (dev->ifalias &&
- nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) ||
+ nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_changes)) ||
nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
@@ -1385,27 +1499,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (rtnl_fill_stats(skb, dev))
goto nla_put_failure;
- if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) &&
- nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent)))
+ if (rtnl_fill_vf(skb, dev, ext_filter_mask))
goto nla_put_failure;
- if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent &&
- ext_filter_mask & RTEXT_FILTER_VF) {
- int i;
- struct nlattr *vfinfo;
- int num_vfs = dev_num_vf(dev->dev.parent);
-
- vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
- if (!vfinfo)
- goto nla_put_failure;
- for (i = 0; i < num_vfs; i++) {
- if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
- goto nla_put_failure;
- }
-
- nla_nest_end(skb, vfinfo);
- }
-
if (rtnl_port_fill(skb, dev, ext_filter_mask))
goto nla_put_failure;
@@ -1417,51 +1513,23 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
}
- if (dev->rtnl_link_ops &&
- dev->rtnl_link_ops->get_link_net) {
- struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
-
- if (!net_eq(dev_net(dev), link_net)) {
- int id = peernet2id_alloc(dev_net(dev), link_net);
-
- if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
- goto nla_put_failure;
- }
- }
-
- if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
+ if (rtnl_fill_link_netnsid(skb, dev))
goto nla_put_failure;
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
- if (af_ops->fill_link_af) {
- struct nlattr *af;
- int err;
-
- if (!(af = nla_nest_start(skb, af_ops->family)))
- goto nla_put_failure;
-
- err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
-
- /*
- * Caller may return ENODATA to indicate that there
- * was no data to be dumped. This is not an error, it
- * means we should trim the attribute header and
- * continue.
- */
- if (err == -ENODATA)
- nla_nest_cancel(skb, af);
- else if (err < 0)
- goto nla_put_failure;
-
- nla_nest_end(skb, af);
- }
- }
+ if (new_nsid &&
+ nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
+ goto nla_put_failure;
- nla_nest_end(skb, af_spec);
+ rcu_read_lock();
+ if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
+ goto nla_put_failure_rcu;
+ rcu_read_unlock();
nlmsg_end(skb, nlh);
return 0;
+nla_put_failure_rcu:
+ rcu_read_unlock();
nla_put_failure:
nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
@@ -1661,7 +1729,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0,
flags,
- ext_filter_mask, 0);
+ ext_filter_mask, 0, NULL);
if (err < 0) {
if (likely(skb->len))
@@ -1726,17 +1794,27 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
- if (!(af_ops = rtnl_af_lookup(nla_type(af))))
+ rcu_read_lock();
+ af_ops = rtnl_af_lookup(nla_type(af));
+ if (!af_ops) {
+ rcu_read_unlock();
return -EAFNOSUPPORT;
+ }
- if (!af_ops->set_link_af)
+ if (!af_ops->set_link_af) {
+ rcu_read_unlock();
return -EOPNOTSUPP;
+ }
if (af_ops->validate_link_af) {
err = af_ops->validate_link_af(dev, af);
- if (err < 0)
+ if (err < 0) {
+ rcu_read_unlock();
return err;
+ }
}
+
+ rcu_read_unlock();
}
}
@@ -1912,7 +1990,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
return err;
}
-static int do_set_master(struct net_device *dev, int ifindex)
+static int do_set_master(struct net_device *dev, int ifindex,
+ struct netlink_ext_ack *extack)
{
struct net_device *upper_dev = netdev_master_upper_dev_get(dev);
const struct net_device_ops *ops;
@@ -1937,7 +2016,7 @@ static int do_set_master(struct net_device *dev, int ifindex)
return -EINVAL;
ops = upper_dev->netdev_ops;
if (ops->ndo_add_slave) {
- err = ops->ndo_add_slave(upper_dev, dev);
+ err = ops->ndo_add_slave(upper_dev, dev, extack);
if (err)
return err;
} else {
@@ -2070,7 +2149,7 @@ static int do_setlink(const struct sk_buff *skb,
}
if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
if (err)
goto errout;
status |= DO_SETLINK_MODIFIED;
@@ -2193,13 +2272,18 @@ static int do_setlink(const struct sk_buff *skb,
nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
const struct rtnl_af_ops *af_ops;
+ rcu_read_lock();
+
if (!(af_ops = rtnl_af_lookup(nla_type(af))))
BUG();
err = af_ops->set_link_af(dev, af);
- if (err < 0)
+ if (err < 0) {
+ rcu_read_unlock();
goto errout;
+ }
+ rcu_read_unlock();
status |= DO_SETLINK_NOTIFY;
}
}
@@ -2579,12 +2663,6 @@ replay:
return err;
slave_data = slave_attr;
}
- if (m_ops->slave_validate) {
- err = m_ops->slave_validate(tb, slave_data,
- extack);
- if (err < 0)
- return err;
- }
}
if (dev) {
@@ -2714,7 +2792,8 @@ replay:
goto out_unregister;
}
if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]));
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]),
+ extack);
if (err)
goto out_unregister;
}
@@ -2774,7 +2853,7 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
return -ENOBUFS;
err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid,
- nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0);
+ nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0, NULL);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size */
WARN_ON(err == -EMSGSIZE);
@@ -2859,7 +2938,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
unsigned int change,
- u32 event, gfp_t flags)
+ u32 event, gfp_t flags, int *new_nsid)
{
struct net *net = dev_net(dev);
struct sk_buff *skb;
@@ -2870,7 +2949,8 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
if (skb == NULL)
goto errout;
- err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event);
+ err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event,
+ new_nsid);
if (err < 0) {
/* -EMSGSIZE implies BUG in if_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -2893,14 +2973,14 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
static void rtmsg_ifinfo_event(int type, struct net_device *dev,
unsigned int change, u32 event,
- gfp_t flags)
+ gfp_t flags, int *new_nsid)
{
struct sk_buff *skb;
if (dev->reg_state != NETREG_REGISTERED)
return;
- skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags);
+ skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid);
if (skb)
rtmsg_ifinfo_send(skb, dev, flags);
}
@@ -2908,10 +2988,17 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev,
void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
gfp_t flags)
{
- rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags);
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL);
}
EXPORT_SYMBOL(rtmsg_ifinfo);
+void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
+ gfp_t flags, int *new_nsid)
+{
+ rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
+ new_nsid);
+}
+
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
u8 *addr, u16 vid, u32 pid, u32 seq,
@@ -3017,21 +3104,21 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
}
EXPORT_SYMBOL(ndo_dflt_fdb_add);
-static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid)
+static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid,
+ struct netlink_ext_ack *extack)
{
u16 vid = 0;
if (vlan_attr) {
if (nla_len(vlan_attr) != sizeof(u16)) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n");
+ NL_SET_ERR_MSG(extack, "invalid vlan attribute size");
return -EINVAL;
}
vid = nla_get_u16(vlan_attr);
if (!vid || vid >= VLAN_VID_MASK) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n",
- vid);
+ NL_SET_ERR_MSG(extack, "invalid vlan id");
return -EINVAL;
}
}
@@ -3056,24 +3143,24 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex == 0) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n");
+ NL_SET_ERR_MSG(extack, "invalid ifindex");
return -EINVAL;
}
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
- pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n");
+ NL_SET_ERR_MSG(extack, "invalid address");
return -EINVAL;
}
addr = nla_data(tb[NDA_LLADDR]);
- err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+ err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
if (err)
return err;
@@ -3160,24 +3247,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
ndm = nlmsg_data(nlh);
if (ndm->ndm_ifindex == 0) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n");
+ NL_SET_ERR_MSG(extack, "invalid ifindex");
return -EINVAL;
}
dev = __dev_get_by_index(net, ndm->ndm_ifindex);
if (dev == NULL) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) {
- pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n");
+ NL_SET_ERR_MSG(extack, "invalid address");
return -EINVAL;
}
addr = nla_data(tb[NDA_LLADDR]);
- err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+ err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack);
if (err)
return err;
@@ -3617,7 +3704,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
dev = __dev_get_by_index(net, ifm->ifi_index);
if (!dev) {
- pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
@@ -3692,7 +3779,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
dev = __dev_get_by_index(net, ifm->ifi_index);
if (!dev) {
- pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n");
+ NL_SET_ERR_MSG(extack, "unknown ifindex");
return -ENODEV;
}
@@ -3943,25 +4030,30 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
if (!attr)
goto nla_put_failure;
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->fill_stats_af) {
struct nlattr *af;
int err;
af = nla_nest_start(skb, af_ops->family);
- if (!af)
+ if (!af) {
+ rcu_read_unlock();
goto nla_put_failure;
-
+ }
err = af_ops->fill_stats_af(skb, dev);
- if (err == -ENODATA)
+ if (err == -ENODATA) {
nla_nest_cancel(skb, af);
- else if (err < 0)
+ } else if (err < 0) {
+ rcu_read_unlock();
goto nla_put_failure;
+ }
nla_nest_end(skb, af);
}
}
+ rcu_read_unlock();
nla_nest_end(skb, attr);
@@ -4030,7 +4122,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
/* for IFLA_STATS_AF_SPEC */
size += nla_total_size(0);
- list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) {
if (af_ops->get_stats_af_size) {
size += nla_total_size(
af_ops->get_stats_af_size(dev));
@@ -4039,6 +4132,7 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
size += nla_total_size(0);
}
}
+ rcu_read_unlock();
}
return size;
@@ -4294,7 +4388,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_CHANGEINFODATA:
case NETDEV_CHANGE_TX_QUEUE_LEN:
rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event),
- GFP_KERNEL);
+ GFP_KERNEL, NULL);
break;
default:
break;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 24656076906d..97e604d55d55 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1354,8 +1354,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
/* Set the tail pointer and length */
skb_put(n, skb->len);
- if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len))
- BUG();
+ BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
copy_skb_header(n, skb);
return n;
@@ -1453,8 +1452,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
BUG_ON(nhead < 0);
- if (skb_shared(skb))
- BUG();
+ BUG_ON(skb_shared(skb));
size = SKB_DATA_ALIGN(size);
@@ -1513,6 +1511,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);
+ skb_metadata_clear(skb);
+
/* It is not generally safe to change skb->truesize.
* For the moment, we really care of rx path, or
* when skb is orphaned (not attached to a socket).
@@ -1597,9 +1597,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
head_copy_off = newheadroom - head_copy_len;
/* Copy the linear header and data. */
- if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
- skb->len + head_copy_len))
- BUG();
+ BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off,
+ skb->len + head_copy_len));
copy_skb_header(n, skb);
@@ -1880,8 +1879,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
return NULL;
}
- if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta))
- BUG();
+ BUG_ON(skb_copy_bits(skb, skb_headlen(skb),
+ skb_tail_pointer(skb), delta));
/* Optimization: no fragments, no reasons to preestimate
* size of pulled pages. Superb.
@@ -2852,12 +2851,15 @@ EXPORT_SYMBOL(skb_queue_purge);
*/
void skb_rbtree_purge(struct rb_root *root)
{
- struct sk_buff *skb, *next;
+ struct rb_node *p = rb_first(root);
- rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode)
- kfree_skb(skb);
+ while (p) {
+ struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode);
- *root = RB_ROOT;
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, root);
+ kfree_skb(skb);
+ }
}
/**
@@ -4766,6 +4768,7 @@ EXPORT_SYMBOL(kfree_skb_partial);
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
bool *fragstolen, int *delta_truesize)
{
+ struct skb_shared_info *to_shinfo, *from_shinfo;
int i, delta, len = from->len;
*fragstolen = false;
@@ -4780,7 +4783,9 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return true;
}
- if (skb_has_frag_list(to) || skb_has_frag_list(from))
+ to_shinfo = skb_shinfo(to);
+ from_shinfo = skb_shinfo(from);
+ if (to_shinfo->frag_list || from_shinfo->frag_list)
return false;
if (skb_zcopy(to) || skb_zcopy(from))
return false;
@@ -4789,8 +4794,8 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
struct page *page;
unsigned int offset;
- if (skb_shinfo(to)->nr_frags +
- skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+ if (to_shinfo->nr_frags +
+ from_shinfo->nr_frags >= MAX_SKB_FRAGS)
return false;
if (skb_head_is_locked(from))
@@ -4801,12 +4806,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page);
- skb_fill_page_desc(to, skb_shinfo(to)->nr_frags,
+ skb_fill_page_desc(to, to_shinfo->nr_frags,
page, offset, skb_headlen(from));
*fragstolen = true;
} else {
- if (skb_shinfo(to)->nr_frags +
- skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS)
+ if (to_shinfo->nr_frags +
+ from_shinfo->nr_frags > MAX_SKB_FRAGS)
return false;
delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from));
@@ -4814,19 +4819,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
WARN_ON_ONCE(delta < len);
- memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags,
- skb_shinfo(from)->frags,
- skb_shinfo(from)->nr_frags * sizeof(skb_frag_t));
- skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags;
+ memcpy(to_shinfo->frags + to_shinfo->nr_frags,
+ from_shinfo->frags,
+ from_shinfo->nr_frags * sizeof(skb_frag_t));
+ to_shinfo->nr_frags += from_shinfo->nr_frags;
if (!skb_cloned(from))
- skb_shinfo(from)->nr_frags = 0;
+ from_shinfo->nr_frags = 0;
/* if the skb is not cloned this does nothing
* since we set nr_frags to 0.
*/
- for (i = 0; i < skb_shinfo(from)->nr_frags; i++)
- skb_frag_ref(from, i);
+ for (i = 0; i < from_shinfo->nr_frags; i++)
+ __skb_frag_ref(&from_shinfo->frags[i]);
to->truesize += delta;
to->len += len;
diff --git a/net/core/sock.c b/net/core/sock.c
index 415f441c63b9..759400053110 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2685,7 +2685,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk_init_common(sk);
sk->sk_send_head = NULL;
- init_timer(&sk->sk_timer);
+ timer_setup(&sk->sk_timer, NULL, 0);
sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = sysctl_rmem_default;
diff --git a/net/dccp/input.c b/net/dccp/input.c
index fa6be9750bb4..d28d46bff6ab 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -534,6 +534,7 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
case DCCP_PKT_DATA:
if (sk->sk_state == DCCP_RESPOND)
break;
+ /* fall through */
case DCCP_PKT_DATAACK:
case DCCP_PKT_ACK:
/*
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 51cdfc3bd8ca..4e40db017e19 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -227,8 +227,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
* Ack vectors are processed by the TX CCID if it is
* interested. The RX CCID need not parse Ack Vectors,
* since it is only interested in clearing old state.
- * Fall through.
*/
+ /* fall through */
case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
pkt_type, opt, value, len))
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index 3a2c34027758..1e35526bf436 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -125,10 +125,11 @@ static void dccp_retransmit_timer(struct sock *sk)
__sk_dst_reset(sk);
}
-static void dccp_write_timer(unsigned long data)
+static void dccp_write_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
- struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_connection_sock *icsk =
+ from_timer(icsk, t, icsk_retransmit_timer);
+ struct sock *sk = &icsk->icsk_inet.sk;
int event = 0;
bh_lock_sock(sk);
@@ -161,19 +162,20 @@ out:
sock_put(sk);
}
-static void dccp_keepalive_timer(unsigned long data)
+static void dccp_keepalive_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
+ struct sock *sk = from_timer(sk, t, sk_timer);
pr_err("dccp should not use a keepalive timer !\n");
sock_put(sk);
}
/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
-static void dccp_delack_timer(unsigned long data)
+static void dccp_delack_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
- struct inet_connection_sock *icsk = inet_csk(sk);
+ struct inet_connection_sock *icsk =
+ from_timer(icsk, t, icsk_delack_timer);
+ struct sock *sk = &icsk->icsk_inet.sk;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 73a0399dc7a2..518cea17b811 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -533,10 +533,6 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf
scp->keepalive = 10 * HZ;
scp->keepalive_fxn = dn_keepalive;
- init_timer(&scp->delack_timer);
- scp->delack_pending = 0;
- scp->delack_fxn = dn_nsp_delayed_ack;
-
dn_start_slow_timer(sk);
out:
return sk;
@@ -634,10 +630,12 @@ static void dn_destroy_sock(struct sock *sk)
goto disc_reject;
case DN_RUN:
scp->state = DN_DI;
+ /* fall through */
case DN_DI:
case DN_DR:
disc_reject:
dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation);
+ /* fall through */
case DN_NC:
case DN_NR:
case DN_RJ:
@@ -651,6 +649,7 @@ disc_reject:
break;
default:
printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n");
+ /* fall through */
case DN_O:
dn_stop_slow_timer(sk);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 4d339de56862..92dbaa3f1eae 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1038,14 +1038,14 @@ static void dn_eth_down(struct net_device *dev)
static void dn_dev_set_timer(struct net_device *dev);
-static void dn_dev_timer_func(unsigned long arg)
+static void dn_dev_timer_func(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)arg;
- struct dn_dev *dn_db;
+ struct dn_dev *dn_db = from_timer(dn_db, t, timer);
+ struct net_device *dev;
struct dn_ifaddr *ifa;
rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
+ dev = dn_db->dev;
if (dn_db->t3 <= dn_db->parms.t2) {
if (dn_db->parms.timer3) {
for (ifa = rcu_dereference(dn_db->ifa_list);
@@ -1070,8 +1070,6 @@ static void dn_dev_set_timer(struct net_device *dev)
if (dn_db->parms.t2 > dn_db->parms.t3)
dn_db->parms.t2 = dn_db->parms.t3;
- dn_db->timer.data = (unsigned long)dev;
- dn_db->timer.function = dn_dev_timer_func;
dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ);
add_timer(&dn_db->timer);
@@ -1100,7 +1098,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
rcu_assign_pointer(dev->dn_ptr, dn_db);
dn_db->dev = dev;
- init_timer(&dn_db->timer);
+ timer_setup(&dn_db->timer, dn_dev_timer_func, 0);
dn_db->uptime = jiffies;
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 66f035e476ea..e50a4adfcf7e 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -491,17 +491,6 @@ void dn_send_conn_ack (struct sock *sk)
dn_nsp_send(skb);
}
-void dn_nsp_delayed_ack(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->ackxmt_oth != scp->numoth_rcv)
- dn_nsp_send_oth_ack(sk);
-
- if (scp->ackxmt_dat != scp->numdat_rcv)
- dn_nsp_send_data_ack(sk);
-}
-
static int dn_nsp_retrans_conn_conf(struct sock *sk)
{
struct dn_scp *scp = DN_SK(sk);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 0bd3afd01dd2..bff5ab88cdbb 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -338,7 +338,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
- dst_use(&rth->dst, now);
+ dst_hold_and_use(&rth->dst, now);
spin_unlock_bh(&dn_rt_hash_table[hash].lock);
dst_release_immediate(&rt->dst);
@@ -351,7 +351,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou
rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain);
rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
- dst_use(&rt->dst, now);
+ dst_hold_and_use(&rt->dst, now);
spin_unlock_bh(&dn_rt_hash_table[hash].lock);
*rp = rt;
return 0;
@@ -1258,7 +1258,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *
(flp->flowidn_mark == rt->fld.flowidn_mark) &&
dn_is_output_route(rt) &&
(rt->fld.flowidn_oif == flp->flowidn_oif)) {
- dst_use(&rt->dst, jiffies);
+ dst_hold_and_use(&rt->dst, jiffies);
rcu_read_unlock_bh();
*pprt = &rt->dst;
return 0;
@@ -1535,7 +1535,7 @@ static int dn_route_input(struct sk_buff *skb)
(rt->fld.flowidn_oif == 0) &&
(rt->fld.flowidn_mark == skb->mark) &&
(rt->fld.flowidn_iif == cb->iif)) {
- dst_use(&rt->dst, jiffies);
+ dst_hold_and_use(&rt->dst, jiffies);
rcu_read_unlock();
skb_dst_set(skb, (struct dst_entry *)rt);
return 0;
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index fcce25da937c..2e7ac8bab19d 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -1,6 +1,6 @@
# the core
obj-$(CONFIG_NET_DSA) += dsa_core.o
-dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o
+dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o
# tagging formats
dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 03c58b0eb082..a3abf7a7b9a2 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -14,6 +14,7 @@
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/module.h>
+#include <linux/notifier.h>
#include <linux/of.h>
#include <linux/of_mdio.h>
#include <linux/of_platform.h>
@@ -112,34 +113,6 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol)
return ops;
}
-int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp)
-{
- struct dsa_switch *ds = cpu_dp->ds;
- struct net_device *master;
- struct ethtool_ops *cpu_ops;
-
- master = cpu_dp->netdev;
-
- cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL);
- if (!cpu_ops)
- return -ENOMEM;
-
- memcpy(&cpu_dp->ethtool_ops, master->ethtool_ops,
- sizeof(struct ethtool_ops));
- cpu_dp->orig_ethtool_ops = master->ethtool_ops;
- memcpy(cpu_ops, &cpu_dp->ethtool_ops,
- sizeof(struct ethtool_ops));
- dsa_cpu_port_ethtool_init(cpu_ops);
- master->ethtool_ops = cpu_ops;
-
- return 0;
-}
-
-void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp)
-{
- cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops;
-}
-
void dsa_cpu_dsa_destroy(struct dsa_port *port)
{
struct device_node *port_dn = port->dn;
@@ -188,12 +161,12 @@ EXPORT_SYMBOL_GPL(dsa_dev_to_net_device);
static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *unused)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
struct sk_buff *nskb = NULL;
struct pcpu_sw_netstats *s;
struct dsa_slave_priv *p;
- if (unlikely(dst == NULL)) {
+ if (unlikely(!cpu_dp)) {
kfree_skb(skb);
return 0;
}
@@ -202,7 +175,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
if (!skb)
return 0;
- nskb = dst->rcv(skb, dev, pt);
+ nskb = cpu_dp->rcv(skb, dev, pt);
if (!nskb) {
kfree_skb(skb);
return 0;
@@ -228,7 +201,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
#ifdef CONFIG_PM_SLEEP
static bool dsa_is_port_initialized(struct dsa_switch *ds, int p)
{
- return ds->enabled_port_mask & (1 << p) && ds->ports[p].netdev;
+ return ds->enabled_port_mask & (1 << p) && ds->ports[p].slave;
}
int dsa_switch_suspend(struct dsa_switch *ds)
@@ -240,7 +213,7 @@ int dsa_switch_suspend(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_suspend(ds->ports[i].netdev);
+ ret = dsa_slave_suspend(ds->ports[i].slave);
if (ret)
return ret;
}
@@ -267,7 +240,7 @@ int dsa_switch_resume(struct dsa_switch *ds)
if (!dsa_is_port_initialized(ds, i))
continue;
- ret = dsa_slave_resume(ds->ports[i].netdev);
+ ret = dsa_slave_resume(ds->ports[i].slave);
if (ret)
return ret;
}
@@ -289,6 +262,28 @@ bool dsa_schedule_work(struct work_struct *work)
return queue_work(dsa_owq, work);
}
+static ATOMIC_NOTIFIER_HEAD(dsa_notif_chain);
+
+int register_dsa_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&dsa_notif_chain, nb);
+}
+EXPORT_SYMBOL_GPL(register_dsa_notifier);
+
+int unregister_dsa_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_unregister(&dsa_notif_chain, nb);
+}
+EXPORT_SYMBOL_GPL(unregister_dsa_notifier);
+
+int call_dsa_notifiers(unsigned long val, struct net_device *dev,
+ struct dsa_notifier_info *info)
+{
+ info->dev = dev;
+ return atomic_notifier_call_chain(&dsa_notif_chain, val, info);
+}
+EXPORT_SYMBOL_GPL(call_dsa_notifiers);
+
static int __init dsa_init_module(void)
{
int rc;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 873af0108e24..9e8b8aab049d 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -279,7 +279,7 @@ static int dsa_user_port_apply(struct dsa_port *port)
if (err) {
dev_warn(ds->dev, "Failed to create slave %d: %d\n",
port->index, err);
- port->netdev = NULL;
+ port->slave = NULL;
return err;
}
@@ -289,7 +289,7 @@ static int dsa_user_port_apply(struct dsa_port *port)
if (err)
return err;
- devlink_port_type_eth_set(&port->devlink_port, port->netdev);
+ devlink_port_type_eth_set(&port->devlink_port, port->slave);
return 0;
}
@@ -297,9 +297,9 @@ static int dsa_user_port_apply(struct dsa_port *port)
static void dsa_user_port_unapply(struct dsa_port *port)
{
devlink_port_unregister(&port->devlink_port);
- if (port->netdev) {
- dsa_slave_destroy(port->netdev);
- port->netdev = NULL;
+ if (port->slave) {
+ dsa_slave_destroy(port->slave);
+ port->slave = NULL;
port->ds->enabled_port_mask &= ~(1 << port->index);
}
}
@@ -336,12 +336,6 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds)
if (err)
return err;
- if (ds->ops->set_addr) {
- err = ds->ops->set_addr(ds, dst->cpu_dp->netdev->dev_addr);
- if (err < 0)
- return err;
- }
-
if (!ds->slave_mii_bus && ds->ops->phy_read) {
ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
if (!ds->slave_mii_bus)
@@ -433,18 +427,17 @@ static int dsa_dst_apply(struct dsa_switch_tree *dst)
return err;
}
- if (dst->cpu_dp) {
- err = dsa_cpu_port_ethtool_setup(dst->cpu_dp);
- if (err)
- return err;
- }
-
/* If we use a tagging format that doesn't have an ethertype
* field, make sure that all packets from this point on get
* sent to the tag format's receive function.
*/
wmb();
- dst->cpu_dp->netdev->dsa_ptr = dst;
+ dst->cpu_dp->master->dsa_ptr = dst->cpu_dp;
+
+ err = dsa_master_ethtool_setup(dst->cpu_dp->master);
+ if (err)
+ return err;
+
dst->applied = true;
return 0;
@@ -458,7 +451,9 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
if (!dst->applied)
return;
- dst->cpu_dp->netdev->dsa_ptr = NULL;
+ dsa_master_ethtool_restore(dst->cpu_dp->master);
+
+ dst->cpu_dp->master->dsa_ptr = NULL;
/* If we used a tagging format that doesn't have an ethertype
* field, make sure that all packets from this point get sent
@@ -474,10 +469,7 @@ static void dsa_dst_unapply(struct dsa_switch_tree *dst)
dsa_ds_unapply(dst, ds);
}
- if (dst->cpu_dp) {
- dsa_cpu_port_ethtool_restore(dst->cpu_dp);
- dst->cpu_dp = NULL;
- }
+ dst->cpu_dp = NULL;
pr_info("DSA: tree %d unapplied\n", dst->tree);
dst->applied = false;
@@ -487,6 +479,7 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
struct dsa_switch_tree *dst,
struct dsa_switch *ds)
{
+ const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol tag_protocol;
struct net_device *ethernet_dev;
struct device_node *ethernet;
@@ -506,7 +499,7 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
if (!dst->cpu_dp) {
dst->cpu_dp = port;
- dst->cpu_dp->netdev = ethernet_dev;
+ dst->cpu_dp->master = ethernet_dev;
}
/* Initialize cpu_port_mask now for drv->setup()
@@ -516,14 +509,18 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
ds->cpu_port_mask |= BIT(index);
tag_protocol = ds->ops->get_tag_protocol(ds);
- dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
- if (IS_ERR(dst->tag_ops)) {
+ tag_ops = dsa_resolve_tag_protocol(tag_protocol);
+ if (IS_ERR(tag_ops)) {
dev_warn(ds->dev, "No tagger for this switch\n");
ds->cpu_port_mask &= ~BIT(index);
- return PTR_ERR(dst->tag_ops);
+ return PTR_ERR(tag_ops);
}
- dst->rcv = dst->tag_ops->rcv;
+ dst->cpu_dp->tag_ops = tag_ops;
+
+ /* Make a few copies for faster access in master receive hot path */
+ dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv;
+ dst->cpu_dp->dst = dst;
return 0;
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 9c3eeb72462d..1e9914062d0b 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -66,7 +66,7 @@ struct dsa_notifier_vlan_info {
};
struct dsa_slave_priv {
- /* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */
+ /* Copy of CPU port xmit for faster access in slave transmit hot path */
struct sk_buff * (*xmit)(struct sk_buff *skb,
struct net_device *dev);
@@ -79,7 +79,6 @@ struct dsa_slave_priv {
* The phylib phy_device pointer for the PHY connected
* to this port.
*/
- struct phy_device *phy;
phy_interface_t phy_interface;
int old_link;
int old_pause;
@@ -97,8 +96,6 @@ struct dsa_slave_priv {
int dsa_cpu_dsa_setup(struct dsa_port *port);
void dsa_cpu_dsa_destroy(struct dsa_port *dport);
const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol);
-int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp);
-void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp);
bool dsa_schedule_work(struct work_struct *work);
/* legacy.c */
@@ -112,10 +109,35 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid);
+/* master.c */
+int dsa_master_ethtool_setup(struct net_device *dev);
+void dsa_master_ethtool_restore(struct net_device *dev);
+
+static inline struct net_device *dsa_master_find_slave(struct net_device *dev,
+ int device, int port)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct dsa_switch_tree *dst = cpu_dp->dst;
+ struct dsa_switch *ds;
+
+ if (device < 0 || device >= DSA_MAX_SWITCHES)
+ return NULL;
+
+ ds = dst->ds[device];
+ if (!ds)
+ return NULL;
+
+ if (port < 0 || port >= ds->num_ports)
+ return NULL;
+
+ return ds->ports[port].slave;
+}
+
/* port.c */
int dsa_port_set_state(struct dsa_port *dp, u8 state,
struct switchdev_trans *trans);
-void dsa_port_set_state_now(struct dsa_port *dp, u8 state);
+int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy);
+void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy);
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
@@ -126,6 +148,7 @@ int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
u16 vid);
+int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data);
int dsa_port_mdb_add(struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb,
struct switchdev_trans *trans);
@@ -139,7 +162,6 @@ int dsa_port_vlan_del(struct dsa_port *dp,
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
-void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops);
int dsa_slave_create(struct dsa_port *port, const char *name);
void dsa_slave_destroy(struct net_device *slave_dev);
int dsa_slave_suspend(struct net_device *slave_dev);
@@ -147,6 +169,21 @@ int dsa_slave_resume(struct net_device *slave_dev);
int dsa_slave_register_notifier(void);
void dsa_slave_unregister_notifier(void);
+static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+
+ return p->dp;
+}
+
+static inline struct net_device *
+dsa_slave_to_master(const struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ return dp->cpu_dp->master;
+}
+
/* switch.c */
int dsa_switch_register_notifier(struct dsa_switch *ds);
void dsa_switch_unregister_notifier(struct dsa_switch *ds);
@@ -175,14 +212,4 @@ extern const struct dsa_device_ops qca_netdev_ops;
/* tag_trailer.c */
extern const struct dsa_device_ops trailer_netdev_ops;
-static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p)
-{
- return p->dp->cpu_dp->netdev;
-}
-
-static inline struct dsa_port *dsa_get_cpu_port(struct dsa_switch_tree *dst)
-{
- return dst->cpu_dp;
-}
-
#endif
diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c
index 91e6f7981d39..b6c88fd33d4f 100644
--- a/net/dsa/legacy.c
+++ b/net/dsa/legacy.c
@@ -120,7 +120,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
return -EINVAL;
}
dst->cpu_dp = &ds->ports[i];
- dst->cpu_dp->netdev = master;
+ dst->cpu_dp->master = master;
ds->cpu_port_mask |= 1 << i;
} else if (!strcmp(name, "dsa")) {
ds->dsa_port_mask |= 1 << i;
@@ -144,14 +144,19 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
* switch.
*/
if (dst->cpu_dp->ds == ds) {
+ const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol tag_protocol;
tag_protocol = ops->get_tag_protocol(ds);
- dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
- if (IS_ERR(dst->tag_ops))
- return PTR_ERR(dst->tag_ops);
+ tag_ops = dsa_resolve_tag_protocol(tag_protocol);
+ if (IS_ERR(tag_ops))
+ return PTR_ERR(tag_ops);
- dst->rcv = dst->tag_ops->rcv;
+ dst->cpu_dp->tag_ops = tag_ops;
+
+ /* Few copies for faster access in master receive hot path */
+ dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv;
+ dst->cpu_dp->dst = dst;
}
memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable));
@@ -167,12 +172,6 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
if (ret)
return ret;
- if (ops->set_addr) {
- ret = ops->set_addr(ds, master->dev_addr);
- if (ret < 0)
- return ret;
- }
-
if (!ds->slave_mii_bus && ops->phy_read) {
ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
if (!ds->slave_mii_bus)
@@ -206,10 +205,6 @@ static int dsa_switch_setup_one(struct dsa_switch *ds,
netdev_err(master, "[%d] : can't configure CPU and DSA ports\n",
index);
- ret = dsa_cpu_port_ethtool_setup(ds->dst->cpu_dp);
- if (ret)
- return ret;
-
return 0;
}
@@ -266,10 +261,10 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
if (!(ds->enabled_port_mask & (1 << port)))
continue;
- if (!ds->ports[port].netdev)
+ if (!ds->ports[port].slave)
continue;
- dsa_slave_destroy(ds->ports[port].netdev);
+ dsa_slave_destroy(ds->ports[port].slave);
}
/* Disable configuration of the CPU and DSA ports */
@@ -604,9 +599,9 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev,
* sent to the tag format's receive function.
*/
wmb();
- dev->dsa_ptr = dst;
+ dev->dsa_ptr = dst->cpu_dp;
- return 0;
+ return dsa_master_ethtool_setup(dst->cpu_dp->master);
}
static int dsa_probe(struct platform_device *pdev)
@@ -671,7 +666,9 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
{
int i;
- dst->cpu_dp->netdev->dsa_ptr = NULL;
+ dsa_master_ethtool_restore(dst->cpu_dp->master);
+
+ dst->cpu_dp->master->dsa_ptr = NULL;
/* If we used a tagging format that doesn't have an ethertype
* field, make sure that all packets from this point get sent
@@ -686,9 +683,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst)
dsa_switch_destroy(ds);
}
- dsa_cpu_port_ethtool_restore(dst->cpu_dp);
-
- dev_put(dst->cpu_dp->netdev);
+ dev_put(dst->cpu_dp->master);
}
static int dsa_remove(struct platform_device *pdev)
@@ -745,8 +740,7 @@ int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
const unsigned char *addr, u16 vid,
u16 flags)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
return dsa_port_fdb_add(dp, addr, vid);
}
@@ -755,8 +749,7 @@ int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
return dsa_port_fdb_del(dp, addr, vid);
}
diff --git a/net/dsa/master.c b/net/dsa/master.c
new file mode 100644
index 000000000000..5f3f57e372e0
--- /dev/null
+++ b/net/dsa/master.c
@@ -0,0 +1,117 @@
+/*
+ * Handling of a master device, switching frames via its switch fabric CPU port
+ *
+ * Copyright (c) 2017 Savoir-faire Linux Inc.
+ * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include "dsa_priv.h"
+
+static void dsa_master_get_ethtool_stats(struct net_device *dev,
+ struct ethtool_stats *stats,
+ uint64_t *data)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+ struct dsa_switch *ds = cpu_dp->ds;
+ int port = cpu_dp->index;
+ int count = 0;
+
+ if (ops && ops->get_sset_count && ops->get_ethtool_stats) {
+ count = ops->get_sset_count(dev, ETH_SS_STATS);
+ ops->get_ethtool_stats(dev, stats, data);
+ }
+
+ if (ds->ops->get_ethtool_stats)
+ ds->ops->get_ethtool_stats(ds, port, data + count);
+}
+
+static int dsa_master_get_sset_count(struct net_device *dev, int sset)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+ struct dsa_switch *ds = cpu_dp->ds;
+ int count = 0;
+
+ if (ops && ops->get_sset_count)
+ count += ops->get_sset_count(dev, sset);
+
+ if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
+ count += ds->ops->get_sset_count(ds);
+
+ return count;
+}
+
+static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
+ uint8_t *data)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops;
+ struct dsa_switch *ds = cpu_dp->ds;
+ int port = cpu_dp->index;
+ int len = ETH_GSTRING_LEN;
+ int mcount = 0, count;
+ unsigned int i;
+ uint8_t pfx[4];
+ uint8_t *ndata;
+
+ snprintf(pfx, sizeof(pfx), "p%.2d", port);
+ /* We do not want to be NULL-terminated, since this is a prefix */
+ pfx[sizeof(pfx) - 1] = '_';
+
+ if (ops && ops->get_sset_count && ops->get_strings) {
+ mcount = ops->get_sset_count(dev, ETH_SS_STATS);
+ ops->get_strings(dev, stringset, data);
+ }
+
+ if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
+ ndata = data + mcount * len;
+ /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
+ * the output after to prepend our CPU port prefix we
+ * constructed earlier
+ */
+ ds->ops->get_strings(ds, port, ndata);
+ count = ds->ops->get_sset_count(ds);
+ for (i = 0; i < count; i++) {
+ memmove(ndata + (i * len + sizeof(pfx)),
+ ndata + i * len, len - sizeof(pfx));
+ memcpy(ndata + i * len, pfx, sizeof(pfx));
+ }
+ }
+}
+
+int dsa_master_ethtool_setup(struct net_device *dev)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct dsa_switch *ds = cpu_dp->ds;
+ struct ethtool_ops *ops;
+
+ ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL);
+ if (!ops)
+ return -ENOMEM;
+
+ cpu_dp->orig_ethtool_ops = dev->ethtool_ops;
+ if (cpu_dp->orig_ethtool_ops)
+ memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops));
+
+ ops->get_sset_count = dsa_master_get_sset_count;
+ ops->get_ethtool_stats = dsa_master_get_ethtool_stats;
+ ops->get_strings = dsa_master_get_strings;
+
+ dev->ethtool_ops = ops;
+
+ return 0;
+}
+
+void dsa_master_ethtool_restore(struct net_device *dev)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+
+ dev->ethtool_ops = cpu_dp->orig_ethtool_ops;
+ cpu_dp->orig_ethtool_ops = NULL;
+}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 659676ba3f8b..72c8dbd3d3f2 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -56,7 +56,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state,
return 0;
}
-void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
+static void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
{
int err;
@@ -65,6 +65,35 @@ void dsa_port_set_state_now(struct dsa_port *dp, u8 state)
pr_err("DSA: failed to set STP state %u (%d)\n", state, err);
}
+int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy)
+{
+ u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+ int err;
+
+ if (ds->ops->port_enable) {
+ err = ds->ops->port_enable(ds, port, phy);
+ if (err)
+ return err;
+ }
+
+ dsa_port_set_state_now(dp, stp_state);
+
+ return 0;
+}
+
+void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy)
+{
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+
+ dsa_port_set_state_now(dp, BR_STATE_DISABLED);
+
+ if (ds->ops->port_disable)
+ ds->ops->port_disable(ds, port, phy);
+}
+
int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
{
struct dsa_notifier_bridge_info info = {
@@ -173,6 +202,17 @@ int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr,
return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info);
}
+int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data)
+{
+ struct dsa_switch *ds = dp->ds;
+ int port = dp->index;
+
+ if (!ds->ops->port_fdb_dump)
+ return -EOPNOTSUPP;
+
+ return ds->ops->port_fdb_dump(ds, port, cb, data);
+}
+
int dsa_port_mdb_add(struct dsa_port *dp,
const struct switchdev_obj_port_mdb *mdb,
struct switchdev_trans *trans)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 865e29e62bad..d0ae7010ea45 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -64,18 +64,13 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
/* slave device handling ****************************************************/
static int dsa_slave_get_iflink(const struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- return dsa_master_netdev(p)->ifindex;
+ return dsa_slave_to_master(dev)->ifindex;
}
static int dsa_slave_open(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
- struct dsa_switch *ds = dp->ds;
- struct net_device *master = dsa_master_netdev(p);
- u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING;
+ struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
if (!(master->flags & IFF_UP))
@@ -98,16 +93,12 @@ static int dsa_slave_open(struct net_device *dev)
goto clear_allmulti;
}
- if (ds->ops->port_enable) {
- err = ds->ops->port_enable(ds, p->dp->index, p->phy);
- if (err)
- goto clear_promisc;
- }
-
- dsa_port_set_state_now(p->dp, stp_state);
+ err = dsa_port_enable(dp, dev->phydev);
+ if (err)
+ goto clear_promisc;
- if (p->phy)
- phy_start(p->phy);
+ if (dev->phydev)
+ phy_start(dev->phydev);
return 0;
@@ -126,12 +117,13 @@ out:
static int dsa_slave_close(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
- struct dsa_switch *ds = p->dp->ds;
+ struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+
+ if (dev->phydev)
+ phy_stop(dev->phydev);
- if (p->phy)
- phy_stop(p->phy);
+ dsa_port_disable(dp, dev->phydev);
dev_mc_unsync(master, dev);
dev_uc_unsync(master, dev);
@@ -143,18 +135,12 @@ static int dsa_slave_close(struct net_device *dev)
if (!ether_addr_equal(dev->dev_addr, master->dev_addr))
dev_uc_del(master, dev->dev_addr);
- if (ds->ops->port_disable)
- ds->ops->port_disable(ds, p->dp->index, p->phy);
-
- dsa_port_set_state_now(p->dp, BR_STATE_DISABLED);
-
return 0;
}
static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
+ struct net_device *master = dsa_slave_to_master(dev);
if (change & IFF_ALLMULTI)
dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1);
@@ -164,8 +150,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
static void dsa_slave_set_rx_mode(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
+ struct net_device *master = dsa_slave_to_master(dev);
dev_mc_sync(master, dev);
dev_uc_sync(master, dev);
@@ -173,8 +158,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev)
static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
+ struct net_device *master = dsa_slave_to_master(dev);
struct sockaddr *addr = a;
int err;
@@ -255,43 +239,34 @@ dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev, struct net_device *filter_dev,
int *idx)
{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_dump_ctx dump = {
.dev = dev,
.skb = skb,
.cb = cb,
.idx = *idx,
};
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
- struct dsa_switch *ds = dp->ds;
int err;
- if (!ds->ops->port_fdb_dump)
- return -EOPNOTSUPP;
-
- err = ds->ops->port_fdb_dump(ds, dp->index,
- dsa_slave_port_fdb_do_dump,
- &dump);
+ err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump);
*idx = dump.idx;
+
return err;
}
static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- if (p->phy != NULL)
- return phy_mii_ioctl(p->phy, ifr, cmd);
+ if (!dev->phydev)
+ return -ENODEV;
- return -EOPNOTSUPP;
+ return phy_mii_ioctl(dev->phydev, ifr, cmd);
}
static int dsa_slave_port_attr_set(struct net_device *dev,
const struct switchdev_attr *attr,
struct switchdev_trans *trans)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int ret;
switch (attr->id) {
@@ -317,8 +292,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
const struct switchdev_obj *obj,
struct switchdev_trans *trans)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
/* For the prepare phase, ensure the full set of changes is feasable in
@@ -345,8 +319,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
static int dsa_slave_port_obj_del(struct net_device *dev,
const struct switchdev_obj *obj)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
switch (obj->id) {
@@ -367,8 +340,8 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
static int dsa_slave_port_attr_get(struct net_device *dev,
struct switchdev_attr *attr)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
@@ -385,10 +358,12 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
return 0;
}
-static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p,
- struct sk_buff *skb)
+static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
+ struct sk_buff *skb)
{
#ifdef CONFIG_NET_POLL_CONTROLLER
+ struct dsa_slave_priv *p = netdev_priv(dev);
+
if (p->netpoll)
netpoll_send_skb(p->netpoll, skb);
#else
@@ -422,43 +397,18 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
* tag to be successfully transmitted
*/
if (unlikely(netpoll_tx_running(dev)))
- return dsa_netpoll_send_skb(p, nskb);
+ return dsa_slave_netpoll_send_skb(dev, nskb);
/* Queue the SKB for transmission on the parent interface, but
* do not modify its EtherType
*/
- nskb->dev = dsa_master_netdev(p);
+ nskb->dev = dsa_slave_to_master(dev);
dev_queue_xmit(nskb);
return NETDEV_TX_OK;
}
/* ethtool operations *******************************************************/
-static int
-dsa_slave_get_link_ksettings(struct net_device *dev,
- struct ethtool_link_ksettings *cmd)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- if (!p->phy)
- return -EOPNOTSUPP;
-
- phy_ethtool_ksettings_get(p->phy, cmd);
-
- return 0;
-}
-
-static int
-dsa_slave_set_link_ksettings(struct net_device *dev,
- const struct ethtool_link_ksettings *cmd)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- if (p->phy != NULL)
- return phy_ethtool_ksettings_set(p->phy, cmd);
-
- return -EOPNOTSUPP;
-}
static void dsa_slave_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
@@ -470,11 +420,11 @@ static void dsa_slave_get_drvinfo(struct net_device *dev,
static int dsa_slave_get_regs_len(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->get_regs_len)
- return ds->ops->get_regs_len(ds, p->dp->index);
+ return ds->ops->get_regs_len(ds, dp->index);
return -EOPNOTSUPP;
}
@@ -482,39 +432,27 @@ static int dsa_slave_get_regs_len(struct net_device *dev)
static void
dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->get_regs)
- ds->ops->get_regs(ds, p->dp->index, regs, _p);
-}
-
-static int dsa_slave_nway_reset(struct net_device *dev)
-{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- if (p->phy != NULL)
- return genphy_restart_aneg(p->phy);
-
- return -EOPNOTSUPP;
+ ds->ops->get_regs(ds, dp->index, regs, _p);
}
static u32 dsa_slave_get_link(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ if (!dev->phydev)
+ return -ENODEV;
- if (p->phy != NULL) {
- genphy_update_link(p->phy);
- return p->phy->link;
- }
+ genphy_update_link(dev->phydev);
- return -EOPNOTSUPP;
+ return dev->phydev->link;
}
static int dsa_slave_get_eeprom_len(struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->cd && ds->cd->eeprom_len)
return ds->cd->eeprom_len;
@@ -528,8 +466,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev)
static int dsa_slave_get_eeprom(struct net_device *dev,
struct ethtool_eeprom *eeprom, u8 *data)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->get_eeprom)
return ds->ops->get_eeprom(ds, eeprom, data);
@@ -540,8 +478,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev,
static int dsa_slave_set_eeprom(struct net_device *dev,
struct ethtool_eeprom *eeprom, u8 *data)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->set_eeprom)
return ds->ops->set_eeprom(ds, eeprom, data);
@@ -552,8 +490,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev,
static void dsa_slave_get_strings(struct net_device *dev,
uint32_t stringset, uint8_t *data)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (stringset == ETH_SS_STATS) {
int len = ETH_GSTRING_LEN;
@@ -563,80 +501,7 @@ static void dsa_slave_get_strings(struct net_device *dev,
strncpy(data + 2 * len, "rx_packets", len);
strncpy(data + 3 * len, "rx_bytes", len);
if (ds->ops->get_strings)
- ds->ops->get_strings(ds, p->dp->index, data + 4 * len);
- }
-}
-
-static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev,
- struct ethtool_stats *stats,
- uint64_t *data)
-{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
- s8 cpu_port = cpu_dp->index;
- int count = 0;
-
- if (cpu_dp->ethtool_ops.get_sset_count) {
- count = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
- cpu_dp->ethtool_ops.get_ethtool_stats(dev, stats, data);
- }
-
- if (ds->ops->get_ethtool_stats)
- ds->ops->get_ethtool_stats(ds, cpu_port, data + count);
-}
-
-static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset)
-{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
- int count = 0;
-
- if (cpu_dp->ethtool_ops.get_sset_count)
- count += cpu_dp->ethtool_ops.get_sset_count(dev, sset);
-
- if (sset == ETH_SS_STATS && ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds);
-
- return count;
-}
-
-static void dsa_cpu_port_get_strings(struct net_device *dev,
- uint32_t stringset, uint8_t *data)
-{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
- s8 cpu_port = cpu_dp->index;
- int len = ETH_GSTRING_LEN;
- int mcount = 0, count;
- unsigned int i;
- uint8_t pfx[4];
- uint8_t *ndata;
-
- snprintf(pfx, sizeof(pfx), "p%.2d", cpu_port);
- /* We do not want to be NULL-terminated, since this is a prefix */
- pfx[sizeof(pfx) - 1] = '_';
-
- if (cpu_dp->ethtool_ops.get_sset_count) {
- mcount = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS);
- cpu_dp->ethtool_ops.get_strings(dev, stringset, data);
- }
-
- if (stringset == ETH_SS_STATS && ds->ops->get_strings) {
- ndata = data + mcount * len;
- /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle
- * the output after to prepend our CPU port prefix we
- * constructed earlier
- */
- ds->ops->get_strings(ds, cpu_port, ndata);
- count = ds->ops->get_sset_count(ds);
- for (i = 0; i < count; i++) {
- memmove(ndata + (i * len + sizeof(pfx)),
- ndata + i * len, len - sizeof(pfx));
- memcpy(ndata + i * len, pfx, sizeof(pfx));
- }
+ ds->ops->get_strings(ds, dp->index, data + 4 * len);
}
}
@@ -644,8 +509,9 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats,
uint64_t *data)
{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_switch *ds = dp->ds;
struct pcpu_sw_netstats *s;
unsigned int start;
int i;
@@ -667,13 +533,13 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev,
data[3] += rx_bytes;
}
if (ds->ops->get_ethtool_stats)
- ds->ops->get_ethtool_stats(ds, p->dp->index, data + 4);
+ ds->ops->get_ethtool_stats(ds, dp->index, data + 4);
}
static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (sset == ETH_SS_STATS) {
int count;
@@ -690,77 +556,77 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (ds->ops->get_wol)
- ds->ops->get_wol(ds, p->dp->index, w);
+ ds->ops->get_wol(ds, dp->index, w);
}
static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
int ret = -EOPNOTSUPP;
if (ds->ops->set_wol)
- ret = ds->ops->set_wol(ds, p->dp->index, w);
+ ret = ds->ops->set_wol(ds, dp->index, w);
return ret;
}
static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
int ret;
/* Port's PHY and MAC both need to be EEE capable */
- if (!p->phy)
+ if (!dev->phydev)
return -ENODEV;
if (!ds->ops->set_mac_eee)
return -EOPNOTSUPP;
- ret = ds->ops->set_mac_eee(ds, p->dp->index, e);
+ ret = ds->ops->set_mac_eee(ds, dp->index, e);
if (ret)
return ret;
if (e->eee_enabled) {
- ret = phy_init_eee(p->phy, 0);
+ ret = phy_init_eee(dev->phydev, 0);
if (ret)
return ret;
}
- return phy_ethtool_set_eee(p->phy, e);
+ return phy_ethtool_set_eee(dev->phydev, e);
}
static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
int ret;
/* Port's PHY and MAC both need to be EEE capable */
- if (!p->phy)
+ if (!dev->phydev)
return -ENODEV;
if (!ds->ops->get_mac_eee)
return -EOPNOTSUPP;
- ret = ds->ops->get_mac_eee(ds, p->dp->index, e);
+ ret = ds->ops->get_mac_eee(ds, dp->index, e);
if (ret)
return ret;
- return phy_ethtool_get_eee(p->phy, e);
+ return phy_ethtool_get_eee(dev->phydev, e);
}
#ifdef CONFIG_NET_POLL_CONTROLLER
static int dsa_slave_netpoll_setup(struct net_device *dev,
struct netpoll_info *ni)
{
+ struct net_device *master = dsa_slave_to_master(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
- struct net_device *master = dsa_master_netdev(p);
struct netpoll *netpoll;
int err = 0;
@@ -800,18 +666,18 @@ static void dsa_slave_poll_controller(struct net_device *dev)
static int dsa_slave_get_phys_port_name(struct net_device *dev,
char *name, size_t len)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
- if (snprintf(name, len, "p%d", p->dp->index) >= len)
+ if (snprintf(name, len, "p%d", dp->index) >= len)
return -EINVAL;
return 0;
}
static struct dsa_mall_tc_entry *
-dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p,
- unsigned long cookie)
+dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie)
{
+ struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_mall_tc_entry *mall_tc_entry;
list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list)
@@ -825,14 +691,15 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
struct tc_cls_matchall_offload *cls,
bool ingress)
{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_mall_tc_entry *mall_tc_entry;
__be16 protocol = cls->common.protocol;
- struct dsa_switch *ds = p->dp->ds;
struct net *net = dev_net(dev);
- struct dsa_slave_priv *to_p;
+ struct dsa_switch *ds = dp->ds;
struct net_device *to_dev;
const struct tc_action *a;
+ struct dsa_port *to_dp;
int err = -EOPNOTSUPP;
LIST_HEAD(actions);
int ifindex;
@@ -865,13 +732,12 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
mall_tc_entry->type = DSA_PORT_MALL_MIRROR;
mirror = &mall_tc_entry->mirror;
- to_p = netdev_priv(to_dev);
+ to_dp = dsa_slave_to_port(to_dev);
- mirror->to_local_port = to_p->dp->index;
+ mirror->to_local_port = to_dp->index;
mirror->ingress = ingress;
- err = ds->ops->port_mirror_add(ds, p->dp->index, mirror,
- ingress);
+ err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress);
if (err) {
kfree(mall_tc_entry);
return err;
@@ -886,14 +752,14 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev,
static void dsa_slave_del_cls_matchall(struct net_device *dev,
struct tc_cls_matchall_offload *cls)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_mall_tc_entry *mall_tc_entry;
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_switch *ds = dp->ds;
if (!ds->ops->port_mirror_del)
return;
- mall_tc_entry = dsa_slave_mall_tc_entry_find(p, cls->cookie);
+ mall_tc_entry = dsa_slave_mall_tc_entry_find(dev, cls->cookie);
if (!mall_tc_entry)
return;
@@ -901,8 +767,7 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
switch (mall_tc_entry->type) {
case DSA_PORT_MALL_MIRROR:
- ds->ops->port_mirror_del(ds, p->dp->index,
- &mall_tc_entry->mirror);
+ ds->ops->port_mirror_del(ds, dp->index, &mall_tc_entry->mirror);
break;
default:
WARN_ON(1);
@@ -912,17 +777,9 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev,
}
static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
- struct tc_cls_matchall_offload *cls)
+ struct tc_cls_matchall_offload *cls,
+ bool ingress)
{
- bool ingress;
-
- if (is_classid_clsact_ingress(cls->common.classid))
- ingress = true;
- else if (is_classid_clsact_egress(cls->common.classid))
- ingress = false;
- else
- return -EOPNOTSUPP;
-
if (cls->common.chain_index)
return -EOPNOTSUPP;
@@ -937,12 +794,60 @@ static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev,
}
}
+static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+ void *cb_priv, bool ingress)
+{
+ struct net_device *dev = cb_priv;
+
+ switch (type) {
+ case TC_SETUP_CLSMATCHALL:
+ return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int dsa_slave_setup_tc_block_cb_ig(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
+{
+ return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, true);
+}
+
+static int dsa_slave_setup_tc_block_cb_eg(enum tc_setup_type type,
+ void *type_data, void *cb_priv)
+{
+ return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, false);
+}
+
+static int dsa_slave_setup_tc_block(struct net_device *dev,
+ struct tc_block_offload *f)
+{
+ tc_setup_cb_t *cb;
+
+ if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+ cb = dsa_slave_setup_tc_block_cb_ig;
+ else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+ cb = dsa_slave_setup_tc_block_cb_eg;
+ else
+ return -EOPNOTSUPP;
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, cb, dev, dev);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, cb, dev);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
void *type_data)
{
switch (type) {
- case TC_SETUP_CLSMATCHALL:
- return dsa_slave_setup_tc_cls_matchall(dev, type_data);
+ case TC_SETUP_BLOCK:
+ return dsa_slave_setup_tc_block(dev, type_data);
default:
return -EOPNOTSUPP;
}
@@ -976,42 +881,35 @@ static void dsa_slave_get_stats64(struct net_device *dev,
}
}
-void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops)
-{
- ops->get_sset_count = dsa_cpu_port_get_sset_count;
- ops->get_ethtool_stats = dsa_cpu_port_get_ethtool_stats;
- ops->get_strings = dsa_cpu_port_get_strings;
-}
-
static int dsa_slave_get_rxnfc(struct net_device *dev,
struct ethtool_rxnfc *nfc, u32 *rule_locs)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (!ds->ops->get_rxnfc)
return -EOPNOTSUPP;
- return ds->ops->get_rxnfc(ds, p->dp->index, nfc, rule_locs);
+ return ds->ops->get_rxnfc(ds, dp->index, nfc, rule_locs);
}
static int dsa_slave_set_rxnfc(struct net_device *dev,
struct ethtool_rxnfc *nfc)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
if (!ds->ops->set_rxnfc)
return -EOPNOTSUPP;
- return ds->ops->set_rxnfc(ds, p->dp->index, nfc);
+ return ds->ops->set_rxnfc(ds, dp->index, nfc);
}
static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_drvinfo = dsa_slave_get_drvinfo,
.get_regs_len = dsa_slave_get_regs_len,
.get_regs = dsa_slave_get_regs,
- .nway_reset = dsa_slave_nway_reset,
+ .nway_reset = phy_ethtool_nway_reset,
.get_link = dsa_slave_get_link,
.get_eeprom_len = dsa_slave_get_eeprom_len,
.get_eeprom = dsa_slave_get_eeprom,
@@ -1023,8 +921,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_wol = dsa_slave_get_wol,
.set_eee = dsa_slave_set_eee,
.get_eee = dsa_slave_get_eee,
- .get_link_ksettings = dsa_slave_get_link_ksettings,
- .set_link_ksettings = dsa_slave_set_link_ksettings,
+ .get_link_ksettings = phy_ethtool_get_link_ksettings,
+ .set_link_ksettings = phy_ethtool_set_link_ksettings,
.get_rxnfc = dsa_slave_get_rxnfc,
.set_rxnfc = dsa_slave_set_rxnfc,
};
@@ -1064,78 +962,81 @@ static struct device_type dsa_type = {
static void dsa_slave_adjust_link(struct net_device *dev)
{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_switch *ds = dp->ds;
unsigned int status_changed = 0;
- if (p->old_link != p->phy->link) {
+ if (p->old_link != dev->phydev->link) {
status_changed = 1;
- p->old_link = p->phy->link;
+ p->old_link = dev->phydev->link;
}
- if (p->old_duplex != p->phy->duplex) {
+ if (p->old_duplex != dev->phydev->duplex) {
status_changed = 1;
- p->old_duplex = p->phy->duplex;
+ p->old_duplex = dev->phydev->duplex;
}
- if (p->old_pause != p->phy->pause) {
+ if (p->old_pause != dev->phydev->pause) {
status_changed = 1;
- p->old_pause = p->phy->pause;
+ p->old_pause = dev->phydev->pause;
}
if (ds->ops->adjust_link && status_changed)
- ds->ops->adjust_link(ds, p->dp->index, p->phy);
+ ds->ops->adjust_link(ds, dp->index, dev->phydev);
if (status_changed)
- phy_print_status(p->phy);
+ phy_print_status(dev->phydev);
}
static int dsa_slave_fixed_link_update(struct net_device *dev,
struct fixed_phy_status *status)
{
- struct dsa_slave_priv *p;
struct dsa_switch *ds;
+ struct dsa_port *dp;
if (dev) {
- p = netdev_priv(dev);
- ds = p->dp->ds;
+ dp = dsa_slave_to_port(dev);
+ ds = dp->ds;
if (ds->ops->fixed_link_update)
- ds->ops->fixed_link_update(ds, p->dp->index, status);
+ ds->ops->fixed_link_update(ds, dp->index, status);
}
return 0;
}
/* slave device setup *******************************************************/
-static int dsa_slave_phy_connect(struct dsa_slave_priv *p,
- struct net_device *slave_dev,
- int addr)
+static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr)
{
- struct dsa_switch *ds = p->dp->ds;
+ struct dsa_port *dp = dsa_slave_to_port(slave_dev);
+ struct dsa_slave_priv *p = netdev_priv(slave_dev);
+ struct dsa_switch *ds = dp->ds;
- p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr);
- if (!p->phy) {
+ slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr);
+ if (!slave_dev->phydev) {
netdev_err(slave_dev, "no phy at %d\n", addr);
return -ENODEV;
}
/* Use already configured phy mode */
if (p->phy_interface == PHY_INTERFACE_MODE_NA)
- p->phy_interface = p->phy->interface;
- return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link,
- p->phy_interface);
+ p->phy_interface = slave_dev->phydev->interface;
+
+ return phy_connect_direct(slave_dev, slave_dev->phydev,
+ dsa_slave_adjust_link, p->phy_interface);
}
-static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
- struct net_device *slave_dev)
+static int dsa_slave_phy_setup(struct net_device *slave_dev)
{
- struct dsa_switch *ds = p->dp->ds;
- struct device_node *phy_dn, *port_dn;
+ struct dsa_port *dp = dsa_slave_to_port(slave_dev);
+ struct dsa_slave_priv *p = netdev_priv(slave_dev);
+ struct device_node *port_dn = dp->dn;
+ struct dsa_switch *ds = dp->ds;
+ struct device_node *phy_dn;
bool phy_is_fixed = false;
u32 phy_flags = 0;
int mode, ret;
- port_dn = p->dp->dn;
mode = of_get_phy_mode(port_dn);
if (mode < 0)
mode = PHY_INTERFACE_MODE_NA;
@@ -1156,7 +1057,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
}
if (ds->ops->get_phy_flags)
- phy_flags = ds->ops->get_phy_flags(ds, p->dp->index);
+ phy_flags = ds->ops->get_phy_flags(ds, dp->index);
if (phy_dn) {
int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn);
@@ -1168,40 +1069,41 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
*/
if (!phy_is_fixed && phy_id >= 0 &&
(ds->phys_mii_mask & (1 << phy_id))) {
- ret = dsa_slave_phy_connect(p, slave_dev, phy_id);
+ ret = dsa_slave_phy_connect(slave_dev, phy_id);
if (ret) {
netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret);
of_node_put(phy_dn);
return ret;
}
} else {
- p->phy = of_phy_connect(slave_dev, phy_dn,
- dsa_slave_adjust_link,
- phy_flags,
- p->phy_interface);
+ slave_dev->phydev = of_phy_connect(slave_dev, phy_dn,
+ dsa_slave_adjust_link,
+ phy_flags,
+ p->phy_interface);
}
of_node_put(phy_dn);
}
- if (p->phy && phy_is_fixed)
- fixed_phy_set_link_update(p->phy, dsa_slave_fixed_link_update);
+ if (slave_dev->phydev && phy_is_fixed)
+ fixed_phy_set_link_update(slave_dev->phydev,
+ dsa_slave_fixed_link_update);
/* We could not connect to a designated PHY, so use the switch internal
* MDIO bus instead
*/
- if (!p->phy) {
- ret = dsa_slave_phy_connect(p, slave_dev, p->dp->index);
+ if (!slave_dev->phydev) {
+ ret = dsa_slave_phy_connect(slave_dev, dp->index);
if (ret) {
netdev_err(slave_dev, "failed to connect to port %d: %d\n",
- p->dp->index, ret);
+ dp->index, ret);
if (phy_is_fixed)
of_phy_deregister_fixed_link(port_dn);
return ret;
}
}
- phy_attached_info(p->phy);
+ phy_attached_info(slave_dev->phydev);
return 0;
}
@@ -1221,12 +1123,12 @@ int dsa_slave_suspend(struct net_device *slave_dev)
netif_device_detach(slave_dev);
- if (p->phy) {
- phy_stop(p->phy);
+ if (slave_dev->phydev) {
+ phy_stop(slave_dev->phydev);
p->old_pause = -1;
p->old_link = -1;
p->old_duplex = -1;
- phy_suspend(p->phy);
+ phy_suspend(slave_dev->phydev);
}
return 0;
@@ -1234,31 +1136,39 @@ int dsa_slave_suspend(struct net_device *slave_dev)
int dsa_slave_resume(struct net_device *slave_dev)
{
- struct dsa_slave_priv *p = netdev_priv(slave_dev);
-
netif_device_attach(slave_dev);
- if (p->phy) {
- phy_resume(p->phy);
- phy_start(p->phy);
+ if (slave_dev->phydev) {
+ phy_resume(slave_dev->phydev);
+ phy_start(slave_dev->phydev);
}
return 0;
}
+static void dsa_slave_notify(struct net_device *dev, unsigned long val)
+{
+ struct net_device *master = dsa_slave_to_master(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_notifier_register_info rinfo = {
+ .switch_number = dp->ds->index,
+ .port_number = dp->index,
+ .master = master,
+ .info.dev = dev,
+ };
+
+ call_dsa_notifiers(val, dev, &rinfo.info);
+}
+
int dsa_slave_create(struct dsa_port *port, const char *name)
{
+ struct dsa_port *cpu_dp = port->cpu_dp;
+ struct net_device *master = cpu_dp->master;
struct dsa_switch *ds = port->ds;
- struct dsa_switch_tree *dst = ds->dst;
- struct net_device *master;
struct net_device *slave_dev;
struct dsa_slave_priv *p;
- struct dsa_port *cpu_dp;
int ret;
- cpu_dp = ds->dst->cpu_dp;
- master = cpu_dp->netdev;
-
if (!ds->num_tx_queues)
ds->num_tx_queues = 1;
@@ -1294,22 +1204,24 @@ int dsa_slave_create(struct dsa_port *port, const char *name)
}
p->dp = port;
INIT_LIST_HEAD(&p->mall_tc_list);
- p->xmit = dst->tag_ops->xmit;
+ p->xmit = cpu_dp->tag_ops->xmit;
p->old_pause = -1;
p->old_link = -1;
p->old_duplex = -1;
- port->netdev = slave_dev;
+ port->slave = slave_dev;
netif_carrier_off(slave_dev);
- ret = dsa_slave_phy_setup(p, slave_dev);
+ ret = dsa_slave_phy_setup(slave_dev);
if (ret) {
netdev_err(master, "error %d setting up slave phy\n", ret);
goto out_free;
}
+ dsa_slave_notify(slave_dev, DSA_PORT_REGISTER);
+
ret = register_netdev(slave_dev);
if (ret) {
netdev_err(master, "error %d registering interface %s\n",
@@ -1320,30 +1232,30 @@ int dsa_slave_create(struct dsa_port *port, const char *name)
return 0;
out_phy:
- phy_disconnect(p->phy);
- if (of_phy_is_fixed_link(p->dp->dn))
- of_phy_deregister_fixed_link(p->dp->dn);
+ phy_disconnect(slave_dev->phydev);
+ if (of_phy_is_fixed_link(port->dn))
+ of_phy_deregister_fixed_link(port->dn);
out_free:
free_percpu(p->stats64);
free_netdev(slave_dev);
- port->netdev = NULL;
+ port->slave = NULL;
return ret;
}
void dsa_slave_destroy(struct net_device *slave_dev)
{
+ struct dsa_port *dp = dsa_slave_to_port(slave_dev);
struct dsa_slave_priv *p = netdev_priv(slave_dev);
- struct device_node *port_dn;
-
- port_dn = p->dp->dn;
+ struct device_node *port_dn = dp->dn;
netif_carrier_off(slave_dev);
- if (p->phy) {
- phy_disconnect(p->phy);
+ if (slave_dev->phydev) {
+ phy_disconnect(slave_dev->phydev);
if (of_phy_is_fixed_link(port_dn))
of_phy_deregister_fixed_link(port_dn);
}
+ dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
unregister_netdev(slave_dev);
free_percpu(p->stats64);
free_netdev(slave_dev);
@@ -1357,8 +1269,7 @@ static bool dsa_slave_dev_check(struct net_device *dev)
static int dsa_slave_changeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
- struct dsa_port *dp = p->dp;
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err = NOTIFY_DONE;
if (netif_is_bridge_master(info->upper_dev)) {
@@ -1379,7 +1290,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- if (dev->netdev_ops != &dsa_slave_netdev_ops)
+ if (!dsa_slave_dev_check(dev))
return NOTIFY_DONE;
if (event == NETDEV_CHANGEUPPER)
@@ -1401,14 +1312,14 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
container_of(work, struct dsa_switchdev_event_work, work);
struct net_device *dev = switchdev_work->dev;
struct switchdev_notifier_fdb_info *fdb_info;
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
int err;
rtnl_lock();
switch (switchdev_work->event) {
case SWITCHDEV_FDB_ADD_TO_DEVICE:
fdb_info = &switchdev_work->fdb_info;
- err = dsa_port_fdb_add(p->dp, fdb_info->addr, fdb_info->vid);
+ err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid);
if (err) {
netdev_dbg(dev, "fdb add failed err=%d\n", err);
break;
@@ -1419,7 +1330,7 @@ static void dsa_slave_switchdev_event_work(struct work_struct *work)
case SWITCHDEV_FDB_DEL_TO_DEVICE:
fdb_info = &switchdev_work->fdb_info;
- err = dsa_port_fdb_del(p->dp, fdb_info->addr, fdb_info->vid);
+ err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid);
if (err) {
netdev_dbg(dev, "fdb del failed err=%d\n", err);
dev_close(dev);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index dbb016434ace..9e082bae3cb0 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -61,7 +61,7 @@
static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u16 queue = skb_get_queue_mapping(skb);
u8 *brcm_tag;
@@ -82,9 +82,14 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT);
brcm_tag[1] = 0;
brcm_tag[2] = 0;
- if (p->dp->index == 8)
+ if (dp->index == 8)
brcm_tag[2] = BRCM_IG_DSTMAP2_MASK;
- brcm_tag[3] = (1 << p->dp->index) & BRCM_IG_DSTMAP1_MASK;
+ brcm_tag[3] = (1 << dp->index) & BRCM_IG_DSTMAP1_MASK;
+
+ /* Now tell the master network device about the desired output queue
+ * as well
+ */
+ skb_set_queue_mapping(skb, BRCM_TAG_SET_PORT_QUEUE(dp->index, queue));
return skb;
}
@@ -92,9 +97,6 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev
static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
int source_port;
u8 *brcm_tag;
@@ -117,8 +119,8 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
/* Locate which port this is coming from */
source_port = brcm_tag[3] & BRCM_EG_PID_MASK;
- /* Validate port against switch setup, either the port is totally */
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ if (!skb->dev)
return NULL;
/* Remove Broadcom tag and update checksum */
@@ -129,8 +131,6 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
skb->data - ETH_HLEN - BRCM_TAG_LEN,
2 * ETH_ALEN);
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index fbf9ca954773..dbbcdafed8c3 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -18,7 +18,7 @@
static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *dsa_header;
/*
@@ -34,8 +34,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
* Construct tagged FROM_CPU DSA tag from 802.1q tag.
*/
dsa_header = skb->data + 2 * ETH_ALEN;
- dsa_header[0] = 0x60 | p->dp->ds->index;
- dsa_header[1] = p->dp->index << 3;
+ dsa_header[0] = 0x60 | dp->ds->index;
+ dsa_header[1] = dp->index << 3;
/*
* Move CFI field from byte 2 to byte 1.
@@ -55,8 +55,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
* Construct untagged FROM_CPU DSA tag.
*/
dsa_header = skb->data + 2 * ETH_ALEN;
- dsa_header[0] = 0x40 | p->dp->ds->index;
- dsa_header[1] = p->dp->index << 3;
+ dsa_header[0] = 0x40 | dp->ds->index;
+ dsa_header[1] = dp->index << 3;
dsa_header[2] = 0x00;
dsa_header[3] = 0x00;
}
@@ -67,8 +67,6 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
u8 *dsa_header;
int source_device;
int source_port;
@@ -93,18 +91,8 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
source_device = dsa_header[0] & 0x1f;
source_port = (dsa_header[1] >> 3) & 0x1f;
- /*
- * Check that the source device exists and that the source
- * port is a registered DSA port.
- */
- if (source_device >= DSA_MAX_SWITCHES)
- return NULL;
-
- ds = dst->ds[source_device];
- if (!ds)
- return NULL;
-
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+ skb->dev = dsa_master_find_slave(dev, source_device, source_port);
+ if (!skb->dev)
return NULL;
/*
@@ -153,8 +141,6 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 76367ba1b2e2..f38a626b3a05 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -19,7 +19,7 @@
static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *edsa_header;
/*
@@ -43,8 +43,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
edsa_header[1] = ETH_P_EDSA & 0xff;
edsa_header[2] = 0x00;
edsa_header[3] = 0x00;
- edsa_header[4] = 0x60 | p->dp->ds->index;
- edsa_header[5] = p->dp->index << 3;
+ edsa_header[4] = 0x60 | dp->ds->index;
+ edsa_header[5] = dp->index << 3;
/*
* Move CFI field from byte 6 to byte 5.
@@ -68,8 +68,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
edsa_header[1] = ETH_P_EDSA & 0xff;
edsa_header[2] = 0x00;
edsa_header[3] = 0x00;
- edsa_header[4] = 0x40 | p->dp->ds->index;
- edsa_header[5] = p->dp->index << 3;
+ edsa_header[4] = 0x40 | dp->ds->index;
+ edsa_header[5] = dp->index << 3;
edsa_header[6] = 0x00;
edsa_header[7] = 0x00;
}
@@ -80,8 +80,6 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
u8 *edsa_header;
int source_device;
int source_port;
@@ -106,18 +104,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
source_device = edsa_header[0] & 0x1f;
source_port = (edsa_header[1] >> 3) & 0x1f;
- /*
- * Check that the source device exists and that the source
- * port is a registered DSA port.
- */
- if (source_device >= DSA_MAX_SWITCHES)
- return NULL;
-
- ds = dst->ds[source_device];
- if (!ds)
- return NULL;
-
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+ skb->dev = dsa_master_find_slave(dev, source_device, source_port);
+ if (!skb->dev)
return NULL;
/*
@@ -172,8 +160,6 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev,
2 * ETH_ALEN);
}
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 010ca0a336c4..0f62effad88f 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -34,7 +34,7 @@
static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
int padlen;
u8 *tag;
@@ -72,7 +72,7 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
tag[0] = 0;
- tag[1] = 1 << p->dp->index; /* destination port */
+ tag[1] = 1 << dp->index; /* destination port */
return nskb;
}
@@ -80,22 +80,19 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
u8 *tag;
int source_port;
tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN;
source_port = tag[0] & 7;
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ if (!skb->dev)
return NULL;
pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN);
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 0b9826105e42..57519597c6fc 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -42,7 +42,7 @@
static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u16 *lan9303_tag;
/* insert a special VLAN tag between the MAC addresses
@@ -62,7 +62,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev)
lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN);
lan9303_tag[0] = htons(ETH_P_8021Q);
- lan9303_tag[1] = htons(p->dp->index | BIT(4));
+ lan9303_tag[1] = htons(dp->index | BIT(4));
return skb;
}
@@ -71,17 +71,8 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
u16 *lan9303_tag;
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
unsigned int source_port;
- ds = dst->ds[0];
-
- if (unlikely(!ds)) {
- dev_warn_ratelimited(&dev->dev, "Dropping packet, due to missing DSA switch device\n");
- return NULL;
- }
-
if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) {
dev_warn_ratelimited(&dev->dev,
"Dropping packet, cannot pull\n");
@@ -103,16 +94,12 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
source_port = ntohs(lan9303_tag[1]) & 0x3;
- if (source_port >= ds->num_ports) {
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ if (!skb->dev) {
dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n");
return NULL;
}
- if (!ds->ports[source_port].netdev) {
- dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid netdev or device\n");
- return NULL;
- }
-
/* remove the special VLAN tag between the MAC addresses
* and the current ethertype field.
*/
@@ -120,9 +107,6 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev,
memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN),
2 * ETH_ALEN);
- /* forward the packet to the dedicated interface */
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index ec8ee5f43255..8475434af7d5 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -23,7 +23,7 @@
static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *mtk_tag;
if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
@@ -36,7 +36,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
/* Build the tag after the MAC Source Address */
mtk_tag = skb->data + 2 * ETH_ALEN;
mtk_tag[0] = 0;
- mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
+ mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
mtk_tag[2] = 0;
mtk_tag[3] = 0;
@@ -46,8 +46,6 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_switch *ds;
int port;
__be16 *phdr, hdr;
@@ -68,20 +66,12 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev,
skb->data - ETH_HLEN - MTK_HDR_LEN,
2 * ETH_ALEN);
- /* This protocol doesn't support cascading multiple
- * switches so it's safe to assume the switch is first
- * in the tree.
- */
- ds = dst->ds[0];
- if (!ds)
- return NULL;
-
/* Get source port information */
port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK);
- if (!ds->ports[port].netdev)
- return NULL;
- skb->dev = ds->ports[port].netdev;
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (!skb->dev)
+ return NULL;
return skb;
}
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 1d4c70711c0f..613f4ee97771 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -38,7 +38,7 @@
static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
u16 *phdr, hdr;
dev->stats.tx_packets++;
@@ -54,8 +54,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
/* Set the version field, and set destination port information */
hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S |
- QCA_HDR_XMIT_FROM_CPU |
- BIT(p->dp->index);
+ QCA_HDR_XMIT_FROM_CPU | BIT(dp->index);
*phdr = htons(hdr);
@@ -65,9 +64,6 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds;
u8 ver;
int port;
__be16 *phdr, hdr;
@@ -92,20 +88,12 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev,
memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN,
ETH_HLEN - QCA_HDR_LEN);
- /* This protocol doesn't support cascading multiple switches so it's
- * safe to assume the switch is first in the tree
- */
- ds = cpu_dp->ds;
- if (!ds)
- return NULL;
-
/* Get source port information */
port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK);
- if (!ds->ports[port].netdev)
- return NULL;
- /* Update skb & forward the frame accordingly */
- skb->dev = ds->ports[port].netdev;
+ skb->dev = dsa_master_find_slave(dev, 0, port);
+ if (!skb->dev)
+ return NULL;
return skb;
}
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index d2fd4923aa3e..7d20e1f3de28 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -16,7 +16,7 @@
static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_port *dp = dsa_slave_to_port(dev);
struct sk_buff *nskb;
int padlen;
u8 *trailer;
@@ -48,7 +48,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
trailer = skb_put(nskb, 4);
trailer[0] = 0x80;
- trailer[1] = 1 << p->dp->index;
+ trailer[1] = 1 << dp->index;
trailer[2] = 0x10;
trailer[3] = 0x00;
@@ -58,9 +58,6 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt)
{
- struct dsa_switch_tree *dst = dev->dsa_ptr;
- struct dsa_port *cpu_dp = dsa_get_cpu_port(dst);
- struct dsa_switch *ds = cpu_dp->ds;
u8 *trailer;
int source_port;
@@ -73,13 +70,13 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev,
return NULL;
source_port = trailer[1] & 7;
- if (source_port >= ds->num_ports || !ds->ports[source_port].netdev)
+
+ skb->dev = dsa_master_find_slave(dev, 0, source_port);
+ if (!skb->dev)
return NULL;
pskb_trim_rcsum(skb, skb->len - 4);
- skb->dev = ds->ports[source_port].netdev;
-
return skb;
}
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index f85b08baff16..85bf86ad6b18 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -80,12 +80,13 @@ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a)
fq->daddr = *arg->dst;
}
-static void lowpan_frag_expire(unsigned long data)
+static void lowpan_frag_expire(struct timer_list *t)
{
+ struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
struct net *net;
- fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags);
spin_lock(&fq->q.lock);
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 6bde9e5a5503..96636e3b7aa9 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -89,7 +89,7 @@ int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info)
return genlmsg_reply(msg, info);
}
-static const struct genl_ops ieee8021154_ops[] = {
+static const struct genl_ops ieee802154_ops[] = {
/* see nl-phy.c */
IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy,
ieee802154_dump_phy),
@@ -137,8 +137,8 @@ struct genl_family nl802154_family __ro_after_init = {
.version = 1,
.maxattr = IEEE802154_ATTR_MAX,
.module = THIS_MODULE,
- .ops = ieee8021154_ops,
- .n_ops = ARRAY_SIZE(ieee8021154_ops),
+ .ops = ieee802154_ops,
+ .n_ops = ARRAY_SIZE(ieee802154_ops),
.mcgrps = ieee802154_mcgrps,
.n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps),
};
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e31108e5ef79..ce4aa827be05 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
- int err;
+ int err, tcp_fastopen;
lock_sock(sk);
@@ -217,11 +217,12 @@ int inet_listen(struct socket *sock, int backlog)
* because the socket was in TCP_LISTEN state previously but
* was shutdown() rather than close().
*/
- if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
- (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+ tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
+ (tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
fastopen_queue_tune(sk, backlog);
- tcp_fastopen_init_key_once(true);
+ tcp_fastopen_init_key_once(sock_net(sk));
}
err = inet_csk_listen_start(sk, backlog);
@@ -826,6 +827,7 @@ int inet_shutdown(struct socket *sock, int how)
err = -ENOTCONN;
/* Hack to wake up other listeners, who can poll for
POLLHUP, even on eg. unconnected UDP sockets -- RR */
+ /* fall through */
default:
sk->sk_shutdown |= how;
if (sk->sk_prot->shutdown)
@@ -839,7 +841,7 @@ int inet_shutdown(struct socket *sock, int how)
case TCP_LISTEN:
if (!(how & RCV_SHUTDOWN))
break;
- /* Fall through */
+ /* fall through */
case TCP_SYN_SENT:
err = sk->sk_prot->disconnect(sk, O_NONBLOCK);
sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 7c45b8896709..a8d7c5a9fb05 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1180,6 +1180,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCSARP:
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EPERM;
+ /* fall through */
case SIOCGARP:
err = copy_from_user(&r, arg, sizeof(struct arpreq));
if (err)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index d7adc0616599..a4573bccd6da 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -137,22 +137,12 @@ static void inet_hash_remove(struct in_ifaddr *ifa)
*/
struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
{
- u32 hash = inet_addr_hash(net, addr);
struct net_device *result = NULL;
struct in_ifaddr *ifa;
rcu_read_lock();
- hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) {
- if (ifa->ifa_local == addr) {
- struct net_device *dev = ifa->ifa_dev->dev;
-
- if (!net_eq(dev_net(dev), net))
- continue;
- result = dev;
- break;
- }
- }
- if (!result) {
+ ifa = inet_lookup_ifaddr_rcu(net, addr);
+ if (!ifa) {
struct flowi4 fl4 = { .daddr = addr };
struct fib_result res = { 0 };
struct fib_table *local;
@@ -165,6 +155,8 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
!fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
res.type == RTN_LOCAL)
result = FIB_RES_DEV(res);
+ } else {
+ result = ifa->ifa_dev->dev;
}
if (result && devref)
dev_hold(result);
@@ -173,6 +165,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
}
EXPORT_SYMBOL(__ip_dev_find);
+/* called under RCU lock */
+struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr)
+{
+ u32 hash = inet_addr_hash(net, addr);
+ struct in_ifaddr *ifa;
+
+ hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash)
+ if (ifa->ifa_local == addr &&
+ net_eq(dev_net(ifa->ifa_dev->dev), net))
+ return ifa;
+
+ return NULL;
+}
+
static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
@@ -438,7 +444,7 @@ static void check_lifetime(struct work_struct *work);
static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime);
static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
- u32 portid)
+ u32 portid, struct netlink_ext_ack *extack)
{
struct in_device *in_dev = ifa->ifa_dev;
struct in_ifaddr *ifa1, **ifap, **last_primary;
@@ -483,6 +489,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
*/
ivi.ivi_addr = ifa->ifa_address;
ivi.ivi_dev = ifa->ifa_dev;
+ ivi.extack = extack;
ret = blocking_notifier_call_chain(&inetaddr_validator_chain,
NETDEV_UP, &ivi);
ret = notifier_to_errno(ret);
@@ -515,7 +522,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
static int inet_insert_ifa(struct in_ifaddr *ifa)
{
- return __inet_insert_ifa(ifa, NULL, 0);
+ return __inet_insert_ifa(ifa, NULL, 0, NULL);
}
static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
@@ -896,7 +903,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
return ret;
}
}
- return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
+ return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
+ extack);
} else {
inet_free_ifa(ifa);
@@ -1516,6 +1524,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
if (inetdev_valid_mtu(dev->mtu))
break;
/* disable IP when MTU is not enough */
+ /* fall through */
case NETDEV_UNREGISTER:
inetdev_destroy(in_dev);
break;
@@ -1751,7 +1760,7 @@ static int inet_validate_link_af(const struct net_device *dev,
struct nlattr *a, *tb[IFLA_INET_MAX+1];
int err, rem;
- if (dev && !__in_dev_get_rtnl(dev))
+ if (dev && !__in_dev_get_rcu(dev))
return -EAFNOSUPPORT;
err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL);
@@ -1775,7 +1784,7 @@ static int inet_validate_link_af(const struct net_device *dev,
static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
{
- struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
struct nlattr *a, *tb[IFLA_INET_MAX+1];
int rem;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 37819ab4cc74..f02819134ba2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -345,9 +345,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (res.type != RTN_UNICAST &&
(res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
goto e_inval;
- if (!rpf && !fib_num_tclassid_users(net) &&
- (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev)))
- goto last_resort;
fib_combine_itag(itag, &res);
dev_match = false;
@@ -402,13 +399,26 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
struct in_device *idev, u32 *itag)
{
int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
+ struct net *net = dev_net(dev);
- if (!r && !fib_num_tclassid_users(dev_net(dev)) &&
- IN_DEV_ACCEPT_LOCAL(idev) &&
+ if (!r && !fib_num_tclassid_users(net) &&
(dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
+ if (IN_DEV_ACCEPT_LOCAL(idev))
+ goto ok;
+ /* if no local routes are added from user space we can check
+ * for local addresses looking-up the ifaddr table
+ */
+ if (net->ipv4.fib_has_custom_local_routes)
+ goto full_check;
+ if (inet_lookup_ifaddr_rcu(net, src))
+ return -EINVAL;
+
+ok:
*itag = 0;
return 0;
}
+
+full_check:
return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag);
}
@@ -759,6 +769,8 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
}
err = fib_table_insert(net, tb, &cfg, extack);
+ if (!err && cfg.fc_type == RTN_LOCAL)
+ net->ipv4.fib_has_custom_local_routes = true;
errout:
return err;
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 57a5d48acee8..467b3c15395f 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -601,17 +601,9 @@ static void fib_rebalance(struct fib_info *fi)
atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
} endfor_nexthops(fi);
}
-
-static inline void fib_add_weight(struct fib_info *fi,
- const struct fib_nh *nh)
-{
- fi->fib_weight += nh->nh_weight;
-}
-
#else /* CONFIG_IP_ROUTE_MULTIPATH */
#define fib_rebalance(fi) do { } while (0)
-#define fib_add_weight(fi, nh) do { } while (0)
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
@@ -774,8 +766,8 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
* |
* |-> {local prefix} (terminal node)
*/
-static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
- struct fib_nh *nh, struct netlink_ext_ack *extack)
+static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
+ struct netlink_ext_ack *extack)
{
int err = 0;
struct net *net;
@@ -1258,7 +1250,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
int linkdown = 0;
change_nexthops(fi) {
- err = fib_check_nh(cfg, fi, nexthop_nh, extack);
+ err = fib_check_nh(cfg, nexthop_nh, extack);
if (err != 0)
goto failure;
if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
@@ -1275,7 +1267,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
change_nexthops(fi) {
fib_info_update_nh_saddr(net, nexthop_nh);
- fib_add_weight(fi, nexthop_nh);
} endfor_nexthops(fi)
fib_rebalance(fi);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c636650a6a70..aaa1ba09afaa 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -87,32 +87,30 @@
static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type, u32 dst,
- int dst_len, struct fib_info *fi,
- u8 tos, u8 type, u32 tb_id)
+ int dst_len, struct fib_alias *fa)
{
struct fib_entry_notifier_info info = {
.dst = dst,
.dst_len = dst_len,
- .fi = fi,
- .tos = tos,
- .type = type,
- .tb_id = tb_id,
+ .fi = fa->fa_info,
+ .tos = fa->fa_tos,
+ .type = fa->fa_type,
+ .tb_id = fa->tb_id,
};
return call_fib4_notifier(nb, net, event_type, &info.info);
}
static int call_fib_entry_notifiers(struct net *net,
enum fib_event_type event_type, u32 dst,
- int dst_len, struct fib_info *fi,
- u8 tos, u8 type, u32 tb_id)
+ int dst_len, struct fib_alias *fa)
{
struct fib_entry_notifier_info info = {
.dst = dst,
.dst_len = dst_len,
- .fi = fi,
- .tos = tos,
- .type = type,
- .tb_id = tb_id,
+ .fi = fa->fa_info,
+ .tos = fa->fa_tos,
+ .type = fa->fa_type,
+ .tb_id = fa->tb_id,
};
return call_fib4_notifiers(net, event_type, &info.info);
}
@@ -1216,9 +1214,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->fa_default = -1;
call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
- key, plen, fi,
- new_fa->fa_tos, cfg->fc_type,
- tb->tb_id);
+ key, plen, new_fa);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, nlflags);
@@ -1273,8 +1269,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
tb->tb_num_default++;
rt_cache_flush(cfg->fc_nlinfo.nl_net);
- call_fib_entry_notifiers(net, event, key, plen, fi, tos, cfg->fc_type,
- tb->tb_id);
+ call_fib_entry_notifiers(net, event, key, plen, new_fa);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
&cfg->fc_nlinfo, nlflags);
succeeded:
@@ -1574,8 +1569,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
return -ESRCH;
call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
- fa_to_delete->fa_info, tos,
- fa_to_delete->fa_type, tb->tb_id);
+ fa_to_delete);
rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
@@ -1892,9 +1886,7 @@ int fib_table_flush(struct net *net, struct fib_table *tb)
call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
n->key,
- KEYLENGTH - fa->fa_slen,
- fi, fa->fa_tos, fa->fa_type,
- tb->tb_id);
+ KEYLENGTH - fa->fa_slen, fa);
hlist_del_rcu(&fa->fa_list);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
@@ -1932,8 +1924,7 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l,
continue;
call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key,
- KEYLENGTH - fa->fa_slen, fi, fa->fa_tos,
- fa->fa_type, fa->tb_id);
+ KEYLENGTH - fa->fa_slen, fa);
}
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 681e33998e03..3c1570d3e22f 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -782,7 +782,7 @@ static bool icmp_tag_validation(int proto)
}
/*
- * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and
+ * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEEDED, ICMP_QUENCH, and
* ICMP_PARAMETERPROB.
*/
@@ -810,7 +810,8 @@ static bool icmp_unreach(struct sk_buff *skb)
if (iph->ihl < 5) /* Mangled header, drop. */
goto out_err;
- if (icmph->type == ICMP_DEST_UNREACH) {
+ switch (icmph->type) {
+ case ICMP_DEST_UNREACH:
switch (icmph->code & 15) {
case ICMP_NET_UNREACH:
case ICMP_HOST_UNREACH:
@@ -846,8 +847,16 @@ static bool icmp_unreach(struct sk_buff *skb)
}
if (icmph->code > NR_ICMP_UNREACH)
goto out;
- } else if (icmph->type == ICMP_PARAMETERPROB)
+ break;
+ case ICMP_PARAMETERPROB:
info = ntohl(icmph->un.gateway) >> 24;
+ break;
+ case ICMP_TIME_EXCEEDED:
+ __ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS);
+ if (icmph->code == ICMP_EXC_FRAGTIME)
+ goto out;
+ break;
+ }
/*
* Throw it at our lower layers
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5ec9136a7c36..ca03a1dcbc8f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -39,11 +39,11 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
* IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY,
* and 0.0.0.0 equals to 0.0.0.0 only
*/
-static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
- const struct in6_addr *sk2_rcv_saddr6,
- __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
- bool sk1_ipv6only, bool sk2_ipv6only,
- bool match_wildcard)
+static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
+ const struct in6_addr *sk2_rcv_saddr6,
+ __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
+ bool sk1_ipv6only, bool sk2_ipv6only,
+ bool match_wildcard)
{
int addr_type = ipv6_addr_type(sk1_rcv_saddr6);
int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
@@ -52,29 +52,29 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) {
if (!sk2_ipv6only) {
if (sk1_rcv_saddr == sk2_rcv_saddr)
- return 1;
+ return true;
if (!sk1_rcv_saddr || !sk2_rcv_saddr)
return match_wildcard;
}
- return 0;
+ return false;
}
if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY)
- return 1;
+ return true;
if (addr_type2 == IPV6_ADDR_ANY && match_wildcard &&
!(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
- return 1;
+ return true;
if (addr_type == IPV6_ADDR_ANY && match_wildcard &&
!(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
- return 1;
+ return true;
if (sk2_rcv_saddr6 &&
ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6))
- return 1;
+ return true;
- return 0;
+ return false;
}
#endif
@@ -82,20 +82,20 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6,
* match_wildcard == false: addresses must be exactly the same, i.e.
* 0.0.0.0 only equals to 0.0.0.0
*/
-static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
- bool sk2_ipv6only, bool match_wildcard)
+static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr,
+ bool sk2_ipv6only, bool match_wildcard)
{
if (!sk2_ipv6only) {
if (sk1_rcv_saddr == sk2_rcv_saddr)
- return 1;
+ return true;
if (!sk1_rcv_saddr || !sk2_rcv_saddr)
return match_wildcard;
}
- return 0;
+ return false;
}
-int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
- bool match_wildcard)
+bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2,
+ bool match_wildcard)
{
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
@@ -495,17 +495,15 @@ EXPORT_SYMBOL(inet_csk_accept);
* to optimize.
*/
void inet_csk_init_xmit_timers(struct sock *sk,
- void (*retransmit_handler)(unsigned long),
- void (*delack_handler)(unsigned long),
- void (*keepalive_handler)(unsigned long))
+ void (*retransmit_handler)(struct timer_list *t),
+ void (*delack_handler)(struct timer_list *t),
+ void (*keepalive_handler)(struct timer_list *t))
{
struct inet_connection_sock *icsk = inet_csk(sk);
- setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler,
- (unsigned long)sk);
- setup_timer(&icsk->icsk_delack_timer, delack_handler,
- (unsigned long)sk);
- setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk);
+ timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0);
+ timer_setup(&icsk->icsk_delack_timer, delack_handler, 0);
+ timer_setup(&sk->sk_timer, keepalive_handler, 0);
icsk->icsk_pending = icsk->icsk_ack.pending = 0;
}
EXPORT_SYMBOL(inet_csk_init_xmit_timers);
@@ -675,9 +673,9 @@ void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req
}
EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
-static void reqsk_timer_handler(unsigned long data)
+static void reqsk_timer_handler(struct timer_list *t)
{
- struct request_sock *req = (struct request_sock *)data;
+ struct request_sock *req = from_timer(req, t, rsk_timer);
struct sock *sk_listener = req->rsk_listener;
struct net *net = sock_net(sk_listener);
struct inet_connection_sock *icsk = inet_csk(sk_listener);
@@ -748,8 +746,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
req->num_timeout = 0;
req->sk = NULL;
- setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler,
- (unsigned long)req);
+ timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
mod_timer(&req->rsk_timer, jiffies + timeout);
inet_ehash_insert(req_to_sk(req), NULL);
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index af74d0433453..7f3ef5c287a1 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -147,7 +147,7 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
spin_unlock(&hb->chain_lock);
hlist_for_each_entry_safe(fq, n, &expired, list_evictor)
- f->frag_expire((unsigned long) fq);
+ f->frag_expire(&fq->timer);
return evicted;
}
@@ -366,7 +366,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
f->constructor(q, arg);
add_frag_mem_limit(nf, f->qsize);
- setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
+ timer_setup(&q->timer, f->frag_expire, 0);
spin_lock_init(&q->lock);
refcount_set(&q->refcnt, 1);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 5b039159e67a..a4bab81f1462 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -142,9 +142,9 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
}
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
-static void tw_timer_handler(unsigned long data)
+static void tw_timer_handler(struct timer_list *t)
{
- struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
+ struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
if (tw->tw_kill)
__NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
@@ -188,8 +188,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
tw->tw_prot = sk->sk_prot_creator;
atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
twsk_net_set(tw, sock_net(sk));
- setup_pinned_timer(&tw->tw_timer, tw_timer_handler,
- (unsigned long)tw);
+ timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED);
/*
* Because we use RCU lookups, we should not set tw_refcnt
* to a non null value before everything is setup for this
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index b20c8ac64081..914d56928578 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -284,14 +284,17 @@ EXPORT_SYMBOL(inet_peer_xrlim_allow);
void inetpeer_invalidate_tree(struct inet_peer_base *base)
{
- struct inet_peer *p, *n;
+ struct rb_node *p = rb_first(&base->rb_root);
- rbtree_postorder_for_each_entry_safe(p, n, &base->rb_root, rb_node) {
- inet_putpeer(p);
+ while (p) {
+ struct inet_peer *peer = rb_entry(p, struct inet_peer, rb_node);
+
+ p = rb_next(p);
+ rb_erase(&peer->rb_node, &base->rb_root);
+ inet_putpeer(peer);
cond_resched();
}
- base->rb_root = RB_ROOT;
base->total = 0;
}
EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 46408c220d9d..9215654a401f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -190,12 +190,13 @@ static bool frag_expire_skip_icmp(u32 user)
/*
* Oops, a fragment queue timed out. Kill it and send an ICMP reply.
*/
-static void ip_expire(unsigned long arg)
+static void ip_expire(struct timer_list *t)
{
+ struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct ipq *qp;
struct net *net;
- qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
+ qp = container_of(frag, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
rcu_read_lock();
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 467e44d7587d..c105a315b1a3 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -579,8 +579,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
if (gre_handle_offloads(skb, false))
goto err_free_rt;
- if (skb->len > dev->mtu) {
- pskb_trim(skb, dev->mtu);
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+ pskb_trim(skb, dev->mtu + dev->hard_header_len);
truncate = true;
}
@@ -731,8 +731,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
if (skb_cow_head(skb, dev->needed_headroom))
goto free_skb;
- if (skb->len - dev->hard_header_len > dev->mtu) {
- pskb_trim(skb, dev->mtu);
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+ pskb_trim(skb, dev->mtu + dev->hard_header_len);
truncate = true;
}
@@ -1011,15 +1011,14 @@ static int __net_init ipgre_init_net(struct net *net)
return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
}
-static void __net_exit ipgre_exit_net(struct net *net)
+static void __net_exit ipgre_exit_batch_net(struct list_head *list_net)
{
- struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
- ip_tunnel_delete_net(itn, &ipgre_link_ops);
+ ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops);
}
static struct pernet_operations ipgre_net_ops = {
.init = ipgre_init_net,
- .exit = ipgre_exit_net,
+ .exit_batch = ipgre_exit_batch_net,
.id = &ipgre_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1542,15 +1541,14 @@ static int __net_init ipgre_tap_init_net(struct net *net)
return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
}
-static void __net_exit ipgre_tap_exit_net(struct net *net)
+static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net)
{
- struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
- ip_tunnel_delete_net(itn, &ipgre_tap_ops);
+ ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops);
}
static struct pernet_operations ipgre_tap_net_ops = {
.init = ipgre_tap_init_net,
- .exit = ipgre_tap_exit_net,
+ .exit_batch = ipgre_tap_exit_batch_net,
.id = &gre_tap_net_id,
.size = sizeof(struct ip_tunnel_net),
};
@@ -1561,16 +1559,14 @@ static int __net_init erspan_init_net(struct net *net)
&erspan_link_ops, "erspan0");
}
-static void __net_exit erspan_exit_net(struct net *net)
+static void __net_exit erspan_exit_batch_net(struct list_head *net_list)
{
- struct ip_tunnel_net *itn = net_generic(net, erspan_net_id);
-
- ip_tunnel_delete_net(itn, &erspan_link_ops);
+ ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops);
}
static struct pernet_operations erspan_net_ops = {
.init = erspan_init_net,
- .exit = erspan_exit_net,
+ .exit_batch = erspan_exit_batch_net,
.id = &erspan_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index e9805ad664ac..fe6fee728ce4 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -1061,16 +1061,22 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
}
}
-void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
+void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
+ struct rtnl_link_ops *ops)
{
+ struct ip_tunnel_net *itn;
+ struct net *net;
LIST_HEAD(list);
rtnl_lock();
- ip_tunnel_destroy(itn, &list, ops);
+ list_for_each_entry(net, net_list, exit_list) {
+ itn = net_generic(net, id);
+ ip_tunnel_destroy(itn, &list, ops);
+ }
unregister_netdevice_many(&list);
rtnl_unlock();
}
-EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
+EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_parm *p, __u32 fwmark)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 89453cf62158..58465c0a8682 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -453,15 +453,14 @@ static int __net_init vti_init_net(struct net *net)
return 0;
}
-static void __net_exit vti_exit_net(struct net *net)
+static void __net_exit vti_exit_batch_net(struct list_head *list_net)
{
- struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
- ip_tunnel_delete_net(itn, &vti_link_ops);
+ ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops);
}
static struct pernet_operations vti_net_ops = {
.init = vti_init_net,
- .exit = vti_exit_net,
+ .exit_batch = vti_exit_batch_net,
.id = &vti_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index fb1ad22b5e29..1e47818e38c7 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -634,15 +634,14 @@ static int __net_init ipip_init_net(struct net *net)
return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");
}
-static void __net_exit ipip_exit_net(struct net *net)
+static void __net_exit ipip_exit_batch_net(struct list_head *list_net)
{
- struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
- ip_tunnel_delete_net(itn, &ipip_link_ops);
+ ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops);
}
static struct pernet_operations ipip_net_ops = {
.init = ipip_init_net,
- .exit = ipip_exit_net,
+ .exit_batch = ipip_exit_batch_net,
.id = &ipip_net_id,
.size = sizeof(struct ip_tunnel_net),
};
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c9b3e6e069ae..40a43ad294cb 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -67,6 +67,7 @@
#include <net/fib_rules.h>
#include <linux/netconf.h>
#include <net/nexthop.h>
+#include <net/switchdev.h>
struct ipmr_rule {
struct fib_rule common;
@@ -264,6 +265,22 @@ static void __net_exit ipmr_rules_exit(struct net *net)
fib_rules_unregister(net->ipv4.mr_rules_ops);
rtnl_unlock();
}
+
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR);
+}
+
+static unsigned int ipmr_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, RTNL_FAMILY_IPMR);
+}
+
+bool ipmr_rule_default(const struct fib_rule *rule)
+{
+ return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT;
+}
+EXPORT_SYMBOL(ipmr_rule_default);
#else
#define ipmr_for_each_table(mrt, net) \
for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
@@ -298,6 +315,22 @@ static void __net_exit ipmr_rules_exit(struct net *net)
net->ipv4.mrt = NULL;
rtnl_unlock();
}
+
+static int ipmr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return 0;
+}
+
+static unsigned int ipmr_rules_seq_read(struct net *net)
+{
+ return 0;
+}
+
+bool ipmr_rule_default(const struct fib_rule *rule)
+{
+ return true;
+}
+EXPORT_SYMBOL(ipmr_rule_default);
#endif
static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg,
@@ -587,6 +620,82 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
}
#endif
+static int call_ipmr_vif_entry_notifier(struct notifier_block *nb,
+ struct net *net,
+ enum fib_event_type event_type,
+ struct vif_device *vif,
+ vifi_t vif_index, u32 tb_id)
+{
+ struct vif_entry_notifier_info info = {
+ .info = {
+ .family = RTNL_FAMILY_IPMR,
+ .net = net,
+ },
+ .dev = vif->dev,
+ .vif_index = vif_index,
+ .vif_flags = vif->flags,
+ .tb_id = tb_id,
+ };
+
+ return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_ipmr_vif_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct vif_device *vif,
+ vifi_t vif_index, u32 tb_id)
+{
+ struct vif_entry_notifier_info info = {
+ .info = {
+ .family = RTNL_FAMILY_IPMR,
+ .net = net,
+ },
+ .dev = vif->dev,
+ .vif_index = vif_index,
+ .vif_flags = vif->flags,
+ .tb_id = tb_id,
+ };
+
+ ASSERT_RTNL();
+ net->ipv4.ipmr_seq++;
+ return call_fib_notifiers(net, event_type, &info.info);
+}
+
+static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb,
+ struct net *net,
+ enum fib_event_type event_type,
+ struct mfc_cache *mfc, u32 tb_id)
+{
+ struct mfc_entry_notifier_info info = {
+ .info = {
+ .family = RTNL_FAMILY_IPMR,
+ .net = net,
+ },
+ .mfc = mfc,
+ .tb_id = tb_id
+ };
+
+ return call_fib_notifier(nb, net, event_type, &info.info);
+}
+
+static int call_ipmr_mfc_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct mfc_cache *mfc, u32 tb_id)
+{
+ struct mfc_entry_notifier_info info = {
+ .info = {
+ .family = RTNL_FAMILY_IPMR,
+ .net = net,
+ },
+ .mfc = mfc,
+ .tb_id = tb_id
+ };
+
+ ASSERT_RTNL();
+ net->ipv4.ipmr_seq++;
+ return call_fib_notifiers(net, event_type, &info.info);
+}
+
/**
* vif_delete - Delete a VIF entry
* @notify: Set to 1, if the caller is a notifier_call
@@ -594,6 +703,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
static int vif_delete(struct mr_table *mrt, int vifi, int notify,
struct list_head *head)
{
+ struct net *net = read_pnet(&mrt->net);
struct vif_device *v;
struct net_device *dev;
struct in_device *in_dev;
@@ -603,6 +713,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
v = &mrt->vif_table[vifi];
+ if (VIF_EXISTS(mrt, vifi))
+ call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi,
+ mrt->id);
+
write_lock_bh(&mrt_lock);
dev = v->dev;
v->dev = NULL;
@@ -652,10 +766,11 @@ static void ipmr_cache_free_rcu(struct rcu_head *head)
kmem_cache_free(mrt_cachep, c);
}
-static inline void ipmr_cache_free(struct mfc_cache *c)
+void ipmr_cache_free(struct mfc_cache *c)
{
call_rcu(&c->rcu, ipmr_cache_free_rcu);
}
+EXPORT_SYMBOL(ipmr_cache_free);
/* Destroy an unresolved cache entry, killing queued skbs
* and reporting error to netlink readers.
@@ -754,6 +869,9 @@ static int vif_add(struct net *net, struct mr_table *mrt,
struct vifctl *vifc, int mrtsock)
{
int vifi = vifc->vifc_vifi;
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
+ };
struct vif_device *v = &mrt->vif_table[vifi];
struct net_device *dev;
struct in_device *in_dev;
@@ -828,6 +946,13 @@ static int vif_add(struct net *net, struct mr_table *mrt,
/* Fill in the VIF structures */
+ attr.orig_dev = dev;
+ if (!switchdev_port_attr_get(dev, &attr)) {
+ memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len);
+ v->dev_parent_id.id_len = attr.u.ppid.id_len;
+ } else {
+ v->dev_parent_id.id_len = 0;
+ }
v->rate_limit = vifc->vifc_rate_limit;
v->local = vifc->vifc_lcl_addr.s_addr;
v->remote = vifc->vifc_rmt_addr.s_addr;
@@ -851,6 +976,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
if (vifi+1 > mrt->maxvif)
mrt->maxvif = vifi+1;
write_unlock_bh(&mrt_lock);
+ call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id);
return 0;
}
@@ -949,6 +1075,7 @@ static struct mfc_cache *ipmr_cache_alloc(void)
if (c) {
c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
c->mfc_un.res.minvif = MAXVIFS;
+ refcount_set(&c->mfc_un.res.refcount, 1);
}
return c;
}
@@ -1150,6 +1277,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi,
static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
{
+ struct net *net = read_pnet(&mrt->net);
struct mfc_cache *c;
/* The entries are added/deleted only under RTNL */
@@ -1161,8 +1289,9 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent)
return -ENOENT;
rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
list_del_rcu(&c->list);
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_free(c);
+ ipmr_cache_put(c);
return 0;
}
@@ -1189,6 +1318,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c,
+ mrt->id);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1238,6 +1369,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
ipmr_cache_resolve(net, mrt, uc, c);
ipmr_cache_free(uc);
}
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id);
mroute_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1245,6 +1377,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
/* Close the multicast socket, and clear the vif tables etc */
static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
+ struct net *net = read_pnet(&mrt->net);
struct mfc_cache *c, *tmp;
LIST_HEAD(list);
int i;
@@ -1263,8 +1396,10 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all)
continue;
rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params);
list_del_rcu(&c->list);
+ call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c,
+ mrt->id);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
- ipmr_cache_free(c);
+ ipmr_cache_put(c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
@@ -1393,6 +1528,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval,
case MRT_ADD_MFC:
case MRT_DEL_MFC:
parent = -1;
+ /* fall through */
case MRT_ADD_MFC_PROXY:
case MRT_DEL_MFC_PROXY:
if (optlen != sizeof(mfc)) {
@@ -1724,10 +1860,33 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
return dst_output(net, sk, skb);
}
+#ifdef CONFIG_NET_SWITCHDEV
+static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
+ int in_vifi, int out_vifi)
+{
+ struct vif_device *out_vif = &mrt->vif_table[out_vifi];
+ struct vif_device *in_vif = &mrt->vif_table[in_vifi];
+
+ if (!skb->offload_mr_fwd_mark)
+ return false;
+ if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len)
+ return false;
+ return netdev_phys_item_id_same(&out_vif->dev_parent_id,
+ &in_vif->dev_parent_id);
+}
+#else
+static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt,
+ int in_vifi, int out_vifi)
+{
+ return false;
+}
+#endif
+
/* Processing handlers for ipmr_forward */
static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
- struct sk_buff *skb, struct mfc_cache *c, int vifi)
+ int in_vifi, struct sk_buff *skb,
+ struct mfc_cache *c, int vifi)
{
const struct iphdr *iph = ip_hdr(skb);
struct vif_device *vif = &mrt->vif_table[vifi];
@@ -1748,6 +1907,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
goto out_free;
}
+ if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi))
+ goto out_free;
+
if (vif->flags & VIFF_TUNNEL) {
rt = ip_route_output_ports(net, &fl4, NULL,
vif->remote, vif->local,
@@ -1925,8 +2087,8 @@ forward:
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ipmr_queue_xmit(net, mrt, skb2, cache,
- psend);
+ ipmr_queue_xmit(net, mrt, true_vifi,
+ skb2, cache, psend);
}
psend = ct;
}
@@ -1937,9 +2099,10 @@ last_forward:
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ipmr_queue_xmit(net, mrt, skb2, cache, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb2,
+ cache, psend);
} else {
- ipmr_queue_xmit(net, mrt, skb, cache, psend);
+ ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend);
return;
}
}
@@ -2156,6 +2319,9 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
return -EMSGSIZE;
+ if (c->mfc_flags & MFC_OFFLOAD)
+ rtm->rtm_flags |= RTNH_F_OFFLOAD;
+
if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH)))
return -EMSGSIZE;
@@ -3048,14 +3214,87 @@ static const struct net_protocol pim_protocol = {
};
#endif
+static unsigned int ipmr_seq_read(struct net *net)
+{
+ ASSERT_RTNL();
+
+ return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net);
+}
+
+static int ipmr_dump(struct net *net, struct notifier_block *nb)
+{
+ struct mr_table *mrt;
+ int err;
+
+ err = ipmr_rules_dump(net, nb);
+ if (err)
+ return err;
+
+ ipmr_for_each_table(mrt, net) {
+ struct vif_device *v = &mrt->vif_table[0];
+ struct mfc_cache *mfc;
+ int vifi;
+
+ /* Notifiy on table VIF entries */
+ read_lock(&mrt_lock);
+ for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
+ if (!v->dev)
+ continue;
+
+ call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD,
+ v, vifi, mrt->id);
+ }
+ read_unlock(&mrt_lock);
+
+ /* Notify on table MFC entries */
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list)
+ call_ipmr_mfc_entry_notifier(nb, net,
+ FIB_EVENT_ENTRY_ADD, mfc,
+ mrt->id);
+ }
+
+ return 0;
+}
+
+static const struct fib_notifier_ops ipmr_notifier_ops_template = {
+ .family = RTNL_FAMILY_IPMR,
+ .fib_seq_read = ipmr_seq_read,
+ .fib_dump = ipmr_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __net_init ipmr_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
+
+ net->ipv4.ipmr_seq = 0;
+
+ ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+ net->ipv4.ipmr_notifier_ops = ops;
+
+ return 0;
+}
+
+static void __net_exit ipmr_notifier_exit(struct net *net)
+{
+ fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops);
+ net->ipv4.ipmr_notifier_ops = NULL;
+}
+
/* Setup for IP multicast routing */
static int __net_init ipmr_net_init(struct net *net)
{
int err;
+ err = ipmr_notifier_init(net);
+ if (err)
+ goto ipmr_notifier_fail;
+
err = ipmr_rules_init(net);
if (err < 0)
- goto fail;
+ goto ipmr_rules_fail;
#ifdef CONFIG_PROC_FS
err = -ENOMEM;
@@ -3072,7 +3311,9 @@ proc_cache_fail:
proc_vif_fail:
ipmr_rules_exit(net);
#endif
-fail:
+ipmr_rules_fail:
+ ipmr_notifier_exit(net);
+ipmr_notifier_fail:
return err;
}
@@ -3082,6 +3323,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
remove_proc_entry("ip_mr_cache", net->proc_net);
remove_proc_entry("ip_mr_vif", net->proc_net);
#endif
+ ipmr_notifier_exit(net);
ipmr_rules_exit(net);
}
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index a0f37b208268..0443ca4120b0 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -276,7 +276,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
else
return NF_ACCEPT;
}
- /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ /* Only ICMPs can be IP_CT_IS_REPLY: */
+ /* fall through */
case IP_CT_NEW:
/* Seen it before? This can happen for loopback, retrans,
* or local packets.
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3d9f1c2f81c5..bc40bd411196 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1250,7 +1250,7 @@ static void set_class_tag(struct rtable *rt, u32 tag)
static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
{
unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr);
- unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size,
+ unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size,
ip_rt_min_advmss);
return min(advmss, IPV4_MAX_PMTU - header_size);
@@ -3038,7 +3038,6 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
int __init ip_rt_init(void)
{
- int rc = 0;
int cpu;
ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
@@ -3095,7 +3094,7 @@ int __init ip_rt_init(void)
#endif
register_pernet_subsys(&rt_genid_ops);
register_pernet_subsys(&ipv4_inetpeer_ops);
- return rc;
+ return 0;
}
#ifdef CONFIG_SYSCTL
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 0d3c038d7b04..81d218346cf7 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -251,10 +251,12 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl,
return ret;
}
-static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
+static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_tcp_fastopen);
struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
struct tcp_fastopen_context *ctxt;
int ret;
@@ -265,7 +267,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
return -ENOMEM;
rcu_read_lock();
- ctxt = rcu_dereference(tcp_fastopen_ctx);
+ ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctxt)
memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
else
@@ -282,12 +284,8 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write,
ret = -EINVAL;
goto bad_key;
}
- /* Generate a dummy secret but don't publish it. This
- * is needed so we don't regenerate a new key on the
- * first invocation of tcp_fastopen_cookie_gen
- */
- tcp_fastopen_init_key_once(false);
- tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH);
+ tcp_fastopen_reset_cipher(net, NULL, user_key,
+ TCP_FASTOPEN_KEY_LENGTH);
}
bad_key:
@@ -358,11 +356,13 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
void __user *buffer,
size_t *lenp, loff_t *ppos)
{
+ struct net *net = container_of(table->data, struct net,
+ ipv4.sysctl_tcp_fastopen_blackhole_timeout);
int ret;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (write && ret == 0)
- tcp_fastopen_active_timeout_reset();
+ atomic_set(&net->ipv4.tfo_active_disable_times, 0);
return ret;
}
@@ -401,27 +401,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
- .procname = "tcp_fastopen",
- .data = &sysctl_tcp_fastopen,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "tcp_fastopen_key",
- .mode = 0600,
- .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
- .proc_handler = proc_tcp_fastopen_key,
- },
- {
- .procname = "tcp_fastopen_blackhole_timeout_sec",
- .data = &sysctl_tcp_fastopen_blackhole_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_tfo_blackhole_detect_timeout,
- .extra1 = &zero,
- },
- {
.procname = "tcp_abort_on_overflow",
.data = &sysctl_tcp_abort_on_overflow,
.maxlen = sizeof(int),
@@ -1085,6 +1064,28 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "tcp_fastopen",
+ .data = &init_net.ipv4.sysctl_tcp_fastopen,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_fastopen_key",
+ .mode = 0600,
+ .data = &init_net.ipv4.sysctl_tcp_fastopen,
+ .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+ .proc_handler = proc_tcp_fastopen_key,
+ },
+ {
+ .procname = "tcp_fastopen_blackhole_timeout_sec",
+ .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_tfo_blackhole_detect_timeout,
+ .extra1 = &zero,
+ },
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
.procname = "fib_multipath_use_neigh",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5091402720ab..8b1fa4dd4538 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -413,8 +413,10 @@ void tcp_init_sock(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
tp->out_of_order_queue = RB_ROOT;
+ sk->tcp_rtx_queue = RB_ROOT;
tcp_init_xmit_timers(sk);
INIT_LIST_HEAD(&tp->tsq_node);
+ INIT_LIST_HEAD(&tp->tsorted_sent_queue);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
@@ -456,8 +458,22 @@ void tcp_init_sock(struct sock *sk)
}
EXPORT_SYMBOL(tcp_init_sock);
-static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
+void tcp_init_transfer(struct sock *sk, int bpf_op)
{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ tcp_mtup_init(sk);
+ icsk->icsk_af_ops->rebuild_header(sk);
+ tcp_init_metrics(sk);
+ tcp_call_bpf(sk, bpf_op);
+ tcp_init_congestion_control(sk);
+ tcp_init_buffer_space(sk);
+}
+
+static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
+{
+ struct sk_buff *skb = tcp_write_queue_tail(sk);
+
if (tsflags && skb) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -686,10 +702,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now,
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- if (!tcp_send_head(sk))
- return;
-
skb = tcp_write_queue_tail(sk);
+ if (!skb)
+ return;
if (!(flags & MSG_MORE) || forced_push(tp))
tcp_mark_push(tp, skb);
@@ -869,6 +884,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
* available to the caller, no more, no less.
*/
skb->reserved_tailroom = skb->end - skb->tail - size;
+ INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
return skb;
}
__kfree_skb(skb);
@@ -948,14 +964,14 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
int copy, i;
bool can_coalesce;
- if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 ||
+ if (!skb || (copy = size_goal - skb->len) <= 0 ||
!tcp_skb_can_collapse_to(skb)) {
new_segment:
if (!sk_stream_memory_free(sk))
goto wait_for_sndbuf;
skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation,
- skb_queue_empty(&sk->sk_write_queue));
+ tcp_rtx_and_write_queues_empty(sk));
if (!skb)
goto wait_for_memory;
@@ -1027,7 +1043,7 @@ wait_for_memory:
out:
if (copied) {
- tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk));
+ tcp_tx_timestamp(sk, sk->sk_tsflags);
if (!(flags & MSG_SENDPAGE_NOTLAST))
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
}
@@ -1126,7 +1142,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
struct sockaddr *uaddr = msg->msg_name;
int err, flags;
- if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
(uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
@@ -1183,7 +1199,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
goto out_err;
}
- skb = tcp_send_head(sk) ? tcp_write_queue_tail(sk) : NULL;
+ skb = tcp_write_queue_tail(sk);
uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb));
if (!uarg) {
err = -ENOBUFS;
@@ -1259,7 +1275,7 @@ restart:
int max = size_goal;
skb = tcp_write_queue_tail(sk);
- if (tcp_send_head(sk)) {
+ if (skb) {
if (skb->ip_summed == CHECKSUM_NONE)
max = mss_now;
copy = max - skb->len;
@@ -1279,7 +1295,7 @@ new_segment:
process_backlog = false;
goto restart;
}
- first_skb = skb_queue_empty(&sk->sk_write_queue);
+ first_skb = tcp_rtx_and_write_queues_empty(sk);
skb = sk_stream_alloc_skb(sk,
select_size(sk, sg, first_skb),
sk->sk_allocation,
@@ -1404,7 +1420,7 @@ wait_for_memory:
out:
if (copied) {
- tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk));
+ tcp_tx_timestamp(sk, sockc.tsflags);
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
}
out_nopush:
@@ -1505,6 +1521,13 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
/* XXX -- need to support SO_PEEK_OFF */
+ skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
+ err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
+ if (err)
+ return err;
+ copied += skb->len;
+ }
+
skb_queue_walk(&sk->sk_write_queue, skb) {
err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
if (err)
@@ -2304,6 +2327,37 @@ static inline bool tcp_need_reset(int state)
TCPF_FIN_WAIT2 | TCPF_SYN_RECV);
}
+static void tcp_rtx_queue_purge(struct sock *sk)
+{
+ struct rb_node *p = rb_first(&sk->tcp_rtx_queue);
+
+ while (p) {
+ struct sk_buff *skb = rb_to_skb(p);
+
+ p = rb_next(p);
+ /* Since we are deleting whole queue, no need to
+ * list_del(&skb->tcp_tsorted_anchor)
+ */
+ tcp_rtx_queue_unlink(skb, sk);
+ sk_wmem_free_skb(sk, skb);
+ }
+}
+
+void tcp_write_queue_purge(struct sock *sk)
+{
+ struct sk_buff *skb;
+
+ tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
+ while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
+ tcp_skb_tsorted_anchor_cleanup(skb);
+ sk_wmem_free_skb(sk, skb);
+ }
+ tcp_rtx_queue_purge(sk);
+ INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue);
+ sk_mem_reclaim(sk);
+ tcp_clear_all_retrans_hints(tcp_sk(sk));
+}
+
int tcp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
@@ -2362,7 +2416,6 @@ int tcp_disconnect(struct sock *sk, int flags)
* issue in __tcp_select_window()
*/
icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
- tcp_init_send_head(sk);
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
dst_release(sk->sk_rx_dst);
@@ -2518,6 +2571,17 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
release_sock(sk);
return err;
}
+ case TCP_FASTOPEN_KEY: {
+ __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+
+ if (optlen != sizeof(key))
+ return -EINVAL;
+
+ if (copy_from_user(key, optval, optlen))
+ return -EFAULT;
+
+ return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key));
+ }
default:
/* fallthru */
break;
@@ -2749,7 +2813,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
TCPF_LISTEN))) {
- tcp_fastopen_init_key_once(true);
+ tcp_fastopen_init_key_once(net);
fastopen_queue_tune(sk, val);
} else {
@@ -2759,7 +2823,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN_CONNECT:
if (val > 1 || val < 0) {
err = -EINVAL;
- } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+ } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
if (sk->sk_state == TCP_CLOSE)
tp->fastopen_connect = val;
else
@@ -3104,6 +3168,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return -EFAULT;
return 0;
+ case TCP_FASTOPEN_KEY: {
+ __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ struct tcp_fastopen_context *ctx;
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ rcu_read_lock();
+ ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
+ if (ctx)
+ memcpy(key, ctx->key, sizeof(key));
+ else
+ len = 0;
+ rcu_read_unlock();
+
+ len = min_t(unsigned int, len, sizeof(key));
+ if (put_user(len, optlen))
+ return -EFAULT;
+ if (copy_to_user(optval, key, len))
+ return -EFAULT;
+ return 0;
+ }
case TCP_THIN_LINEAR_TIMEOUTS:
val = tp->thin_lto;
break;
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 66ac69f7bd19..06fbe102a425 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -389,7 +389,7 @@ static void tcp_cdg_release(struct sock *sk)
kfree(ca->gradients);
}
-struct tcp_congestion_ops tcp_cdg __read_mostly = {
+static struct tcp_congestion_ops tcp_cdg __read_mostly = {
.cong_avoid = tcp_cdg_cong_avoid,
.cwnd_event = tcp_cdg_cwnd_event,
.pkts_acked = tcp_cdg_acked,
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index e3c33220c418..21075ce19cb6 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -9,15 +9,18 @@
#include <net/inetpeer.h>
#include <net/tcp.h>
-int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE;
-
-struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
-
-static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock);
-
-void tcp_fastopen_init_key_once(bool publish)
+void tcp_fastopen_init_key_once(struct net *net)
{
- static u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ struct tcp_fastopen_context *ctxt;
+
+ rcu_read_lock();
+ ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
+ if (ctxt) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
/* tcp_fastopen_reset_cipher publishes the new context
* atomically, so we allow this race happening here.
@@ -25,8 +28,8 @@ void tcp_fastopen_init_key_once(bool publish)
* All call sites of tcp_fastopen_cookie_gen also check
* for a valid cookie, so this is an acceptable risk.
*/
- if (net_get_random_once(key, sizeof(key)) && publish)
- tcp_fastopen_reset_cipher(key, sizeof(key));
+ get_random_bytes(key, sizeof(key));
+ tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key));
}
static void tcp_fastopen_ctx_free(struct rcu_head *head)
@@ -37,10 +40,37 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head)
kfree(ctx);
}
-int tcp_fastopen_reset_cipher(void *key, unsigned int len)
+void tcp_fastopen_destroy_cipher(struct sock *sk)
+{
+ struct tcp_fastopen_context *ctx;
+
+ ctx = rcu_dereference_protected(
+ inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1);
+ if (ctx)
+ call_rcu(&ctx->rcu, tcp_fastopen_ctx_free);
+}
+
+void tcp_fastopen_ctx_destroy(struct net *net)
+{
+ struct tcp_fastopen_context *ctxt;
+
+ spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
+
+ ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
+ lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL);
+ spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
+
+ if (ctxt)
+ call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
+}
+
+int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
+ void *key, unsigned int len)
{
- int err;
struct tcp_fastopen_context *ctx, *octx;
+ struct fastopen_queue *q;
+ int err;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
@@ -61,26 +91,39 @@ error: kfree(ctx);
}
memcpy(ctx->key, key, len);
- spin_lock(&tcp_fastopen_ctx_lock);
- octx = rcu_dereference_protected(tcp_fastopen_ctx,
- lockdep_is_held(&tcp_fastopen_ctx_lock));
- rcu_assign_pointer(tcp_fastopen_ctx, ctx);
- spin_unlock(&tcp_fastopen_ctx_lock);
+ if (sk) {
+ q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
+ spin_lock_bh(&q->lock);
+ octx = rcu_dereference_protected(q->ctx,
+ lockdep_is_held(&q->lock));
+ rcu_assign_pointer(q->ctx, ctx);
+ spin_unlock_bh(&q->lock);
+ } else {
+ spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
+ octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx,
+ lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock));
+ rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx);
+ spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock);
+ }
if (octx)
call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
return err;
}
-static bool __tcp_fastopen_cookie_gen(const void *path,
+static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
struct tcp_fastopen_cookie *foc)
{
struct tcp_fastopen_context *ctx;
bool ok = false;
rcu_read_lock();
- ctx = rcu_dereference(tcp_fastopen_ctx);
+
+ ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
+ if (!ctx)
+ ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
+
if (ctx) {
crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
@@ -96,7 +139,8 @@ static bool __tcp_fastopen_cookie_gen(const void *path,
*
* XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
*/
-static bool tcp_fastopen_cookie_gen(struct request_sock *req,
+static bool tcp_fastopen_cookie_gen(struct sock *sk,
+ struct request_sock *req,
struct sk_buff *syn,
struct tcp_fastopen_cookie *foc)
{
@@ -104,7 +148,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
const struct iphdr *iph = ip_hdr(syn);
__be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
- return __tcp_fastopen_cookie_gen(path, foc);
+ return __tcp_fastopen_cookie_gen(sk, path, foc);
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -112,13 +156,13 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
const struct ipv6hdr *ip6h = ipv6_hdr(syn);
struct tcp_fastopen_cookie tmp;
- if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) {
+ if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) {
struct in6_addr *buf = &tmp.addr;
int i;
for (i = 0; i < 4; i++)
buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
- return __tcp_fastopen_cookie_gen(buf, foc);
+ return __tcp_fastopen_cookie_gen(sk, buf, foc);
}
}
#endif
@@ -216,12 +260,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
refcount_set(&req->rsk_refcnt, 2);
/* Now finish processing the fastopen child socket. */
- inet_csk(child)->icsk_af_ops->rebuild_header(child);
- tcp_init_congestion_control(child);
- tcp_mtup_init(child);
- tcp_init_metrics(child);
- tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
- tcp_init_buffer_space(child);
+ tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
@@ -279,25 +318,26 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct tcp_fastopen_cookie *foc)
{
- struct tcp_fastopen_cookie valid_foc = { .len = -1 };
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
+ int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
if (foc->len == 0) /* Client requests a cookie */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
- if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) &&
+ if (!((tcp_fastopen & TFO_SERVER_ENABLE) &&
(syn_data || foc->len >= 0) &&
tcp_fastopen_queue_check(sk))) {
foc->len = -1;
return NULL;
}
- if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
+ if (syn_data && (tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;
if (foc->len >= 0 && /* Client presents or requests a cookie */
- tcp_fastopen_cookie_gen(req, skb, &valid_foc) &&
+ tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) &&
foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
foc->len == valid_foc.len &&
!memcmp(foc->val, valid_foc.val, foc->len)) {
@@ -347,7 +387,7 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
return false;
}
- if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
+ if (sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
cookie->len = -1;
return true;
}
@@ -401,25 +441,16 @@ EXPORT_SYMBOL(tcp_fastopen_defer_connect);
* TFO connection with data exchanges.
*/
-/* Default to 1hr */
-unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60;
-static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0);
-static unsigned long tfo_active_disable_stamp __read_mostly;
-
/* Disable active TFO and record current jiffies and
* tfo_active_disable_times
*/
void tcp_fastopen_active_disable(struct sock *sk)
{
- atomic_inc(&tfo_active_disable_times);
- tfo_active_disable_stamp = jiffies;
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE);
-}
+ struct net *net = sock_net(sk);
-/* Reset tfo_active_disable_times to 0 */
-void tcp_fastopen_active_timeout_reset(void)
-{
- atomic_set(&tfo_active_disable_times, 0);
+ atomic_inc(&net->ipv4.tfo_active_disable_times);
+ net->ipv4.tfo_active_disable_stamp = jiffies;
+ NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
}
/* Calculate timeout for tfo active disable
@@ -428,17 +459,18 @@ void tcp_fastopen_active_timeout_reset(void)
*/
bool tcp_fastopen_active_should_disable(struct sock *sk)
{
- int tfo_da_times = atomic_read(&tfo_active_disable_times);
- int multiplier;
+ unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
+ int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times);
unsigned long timeout;
+ int multiplier;
if (!tfo_da_times)
return false;
/* Limit timout to max: 2^6 * initial timeout */
multiplier = 1 << min(tfo_da_times - 1, 6);
- timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ;
- if (time_before(jiffies, tfo_active_disable_stamp + timeout))
+ timeout = multiplier * tfo_bh_timeout * HZ;
+ if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout))
return true;
/* Mark check bit so we can check for successful active TFO
@@ -457,27 +489,25 @@ bool tcp_fastopen_active_should_disable(struct sock *sk)
void tcp_fastopen_active_disable_ofo_check(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct rb_node *p;
- struct sk_buff *skb;
struct dst_entry *dst;
+ struct sk_buff *skb;
if (!tp->syn_fastopen)
return;
if (!tp->data_segs_in) {
- p = rb_first(&tp->out_of_order_queue);
- if (p && !rb_next(p)) {
- skb = rb_entry(p, struct sk_buff, rbnode);
+ skb = skb_rb_first(&tp->out_of_order_queue);
+ if (skb && !skb_rb_next(skb)) {
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
tcp_fastopen_active_disable(sk);
return;
}
}
} else if (tp->syn_fastopen_ch &&
- atomic_read(&tfo_active_disable_times)) {
+ atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) {
dst = sk_dst_get(sk);
if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK)))
- tcp_fastopen_active_timeout_reset();
+ atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0);
dst_release(dst);
}
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7eec3383702b..ab3f12898245 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1142,6 +1142,7 @@ struct tcp_sacktag_state {
u64 last_sackt;
struct rate_sample *rate;
int flag;
+ unsigned int mss_now;
};
/* Check if skb is fully within the SACK block. In presence of GSO skbs,
@@ -1191,7 +1192,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
if (pkt_len >= skb->len && !in_sack)
return 0;
- err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC);
+ err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ pkt_len, mss, GFP_ATOMIC);
if (err < 0)
return err;
}
@@ -1288,13 +1290,13 @@ static u8 tcp_sacktag_one(struct sock *sk,
/* Shift newly-SACKed bytes from this skb to the immediately previous
* already-SACKed sk_buff. Mark the newly-SACKed bytes as such.
*/
-static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
+static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev,
+ struct sk_buff *skb,
struct tcp_sacktag_state *state,
unsigned int pcount, int shifted, int mss,
bool dup_sack)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *prev = tcp_write_queue_prev(sk, skb);
u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */
u32 end_seq = start_seq + shifted; /* end of newly-SACKed */
@@ -1363,8 +1365,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp))
TCP_SKB_CB(prev)->tx.delivered_mstamp = 0;
- tcp_unlink_write_queue(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ tcp_rtx_queue_unlink_and_free(skb, sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED);
@@ -1414,9 +1415,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
goto fallback;
/* Can only happen with delayed DSACK + discard craziness */
- if (unlikely(skb == tcp_write_queue_head(sk)))
+ prev = skb_rb_prev(skb);
+ if (!prev)
goto fallback;
- prev = tcp_write_queue_prev(sk, skb);
if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
goto fallback;
@@ -1495,18 +1496,17 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
if (!skb_shift(prev, skb, len))
goto fallback;
- if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
+ if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack))
goto out;
/* Hole filled allows collapsing with the next as well, this is very
* useful when hole on every nth skb pattern happens
*/
- if (prev == tcp_write_queue_tail(sk))
+ skb = skb_rb_next(prev);
+ if (!skb)
goto out;
- skb = tcp_write_queue_next(sk, prev);
if (!skb_can_shift(skb) ||
- (skb == tcp_send_head(sk)) ||
((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) ||
(mss != tcp_skb_seglen(skb)))
goto out;
@@ -1514,7 +1514,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
len = skb->len;
if (skb_shift(prev, skb, len)) {
pcount += tcp_skb_pcount(skb);
- tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
+ tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb),
+ len, mss, 0);
}
out:
@@ -1538,13 +1539,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *tmp;
- tcp_for_write_queue_from(skb, sk) {
+ skb_rbtree_walk_from(skb) {
int in_sack = 0;
bool dup_sack = dup_sack_in;
- if (skb == tcp_send_head(sk))
- break;
-
/* queue is in-order => we can short-circuit the walk early */
if (!before(TCP_SKB_CB(skb)->seq, end_seq))
break;
@@ -1593,6 +1591,8 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
tcp_skb_pcount(skb),
skb->skb_mstamp);
tcp_rate_skb_delivered(sk, skb, state->rate);
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
+ list_del_init(&skb->tcp_tsorted_anchor);
if (!before(TCP_SKB_CB(skb)->seq,
tcp_highest_sack_seq(tp)))
@@ -1604,23 +1604,44 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
return skb;
}
-/* Avoid all extra work that is being done by sacktag while walking in
- * a normal way
- */
+static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk,
+ struct tcp_sacktag_state *state,
+ u32 seq)
+{
+ struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node;
+ struct sk_buff *skb;
+ int unack_bytes;
+
+ while (*p) {
+ parent = *p;
+ skb = rb_to_skb(parent);
+ if (before(seq, TCP_SKB_CB(skb)->seq)) {
+ p = &parent->rb_left;
+ continue;
+ }
+ if (!before(seq, TCP_SKB_CB(skb)->end_seq)) {
+ p = &parent->rb_right;
+ continue;
+ }
+
+ state->fack_count = 0;
+ unack_bytes = TCP_SKB_CB(skb)->seq - tcp_sk(sk)->snd_una;
+ if (state->mss_now && unack_bytes > 0)
+ state->fack_count = unack_bytes / state->mss_now;
+
+ return skb;
+ }
+ return NULL;
+}
+
static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
struct tcp_sacktag_state *state,
u32 skip_to_seq)
{
- tcp_for_write_queue_from(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
-
- if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
- break;
+ if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq))
+ return skb;
- state->fack_count += tcp_skb_pcount(skb);
- }
- return skb;
+ return tcp_sacktag_bsearch(sk, state, skip_to_seq);
}
static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
@@ -1742,8 +1763,9 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
}
}
- skb = tcp_write_queue_head(sk);
+ state->mss_now = tcp_current_mss(sk);
state->fack_count = 0;
+ skb = NULL;
i = 0;
if (!tp->sacked_out) {
@@ -1967,7 +1989,7 @@ void tcp_enter_loss(struct sock *sk)
if (tcp_is_reno(tp))
tcp_reset_reno_sack(tp);
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
if (is_reneg) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING);
@@ -1976,10 +1998,7 @@ void tcp_enter_loss(struct sock *sk)
}
tcp_clear_all_retrans_hints(tp);
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
-
+ skb_rbtree_walk_from(skb) {
mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
is_reneg);
if (mark_lost)
@@ -2205,20 +2224,18 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq;
WARN_ON(packets > tp->packets_out);
- if (tp->lost_skb_hint) {
- skb = tp->lost_skb_hint;
- cnt = tp->lost_cnt_hint;
+ skb = tp->lost_skb_hint;
+ if (skb) {
/* Head already handled? */
- if (mark_head && skb != tcp_write_queue_head(sk))
+ if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una))
return;
+ cnt = tp->lost_cnt_hint;
} else {
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
cnt = 0;
}
- tcp_for_write_queue_from(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
+ skb_rbtree_walk_from(skb) {
/* TODO: do this better */
/* this is not the most efficient way to do this... */
tp->lost_skb_hint = skb;
@@ -2242,7 +2259,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
/* If needed, chop off the prefix to mark as lost. */
lost = (packets - oldcnt) * mss;
if (lost < skb->len &&
- tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0)
+ tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ lost, mss, GFP_ATOMIC) < 0)
break;
cnt = packets;
}
@@ -2326,16 +2344,16 @@ static bool tcp_any_retrans_done(const struct sock *sk)
if (tp->retrans_out)
return true;
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS))
return true;
return false;
}
-#if FASTRETRANS_DEBUG > 1
static void DBGUNDO(struct sock *sk, const char *msg)
{
+#if FASTRETRANS_DEBUG > 1
struct tcp_sock *tp = tcp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
@@ -2357,10 +2375,8 @@ static void DBGUNDO(struct sock *sk, const char *msg)
tp->packets_out);
}
#endif
-}
-#else
-#define DBGUNDO(x...) do { } while (0)
#endif
+}
static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
{
@@ -2369,9 +2385,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss)
if (unmark_loss) {
struct sk_buff *skb;
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
+ skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
}
tp->lost_out = 0;
@@ -2616,9 +2630,7 @@ void tcp_simple_retransmit(struct sock *sk)
unsigned int mss = tcp_current_mss(sk);
u32 prior_lost = tp->lost_out;
- tcp_for_write_queue(skb, sk) {
- if (skb == tcp_send_head(sk))
- break;
+ skb_rbtree_walk(skb, &sk->tcp_rtx_queue) {
if (tcp_skb_seglen(skb) > mss &&
!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
@@ -2712,7 +2724,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
* is updated in tcp_ack()). Otherwise fall back to
* the conventional recovery.
*/
- if (tcp_send_head(sk) &&
+ if (!tcp_write_queue_empty(sk) &&
after(tcp_wnd_end(tp), tp->snd_nxt)) {
*rexmit = REXMIT_NEW;
return;
@@ -2804,9 +2816,9 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
(tcp_fackets_out(tp) > tp->reordering));
- if (WARN_ON(!tp->packets_out && tp->sacked_out))
+ if (!tp->packets_out && tp->sacked_out)
tp->sacked_out = 0;
- if (WARN_ON(!tp->sacked_out && tp->fackets_out))
+ if (!tp->sacked_out && tp->fackets_out)
tp->fackets_out = 0;
/* Now state machine starts.
@@ -2873,6 +2885,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
(*ack_flag & FLAG_LOST_RETRANS)))
return;
/* Change state if cwnd is undone or retransmits are lost */
+ /* fall through */
default:
if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED)
@@ -3056,8 +3069,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb,
shinfo = skb_shinfo(skb);
if (!before(shinfo->tskey, prior_snd_una) &&
- before(shinfo->tskey, tcp_sk(sk)->snd_una))
- __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+ before(shinfo->tskey, tcp_sk(sk)->snd_una)) {
+ tcp_skb_tsorted_save(skb) {
+ __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
+ } tcp_skb_tsorted_restore(skb);
+ }
}
/* Remove acknowledged frames from the retransmission queue. If our packet
@@ -3073,11 +3089,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
struct tcp_sock *tp = tcp_sk(sk);
u32 prior_sacked = tp->sacked_out;
u32 reord = tp->packets_out;
+ struct sk_buff *skb, *next;
bool fully_acked = true;
long sack_rtt_us = -1L;
long seq_rtt_us = -1L;
long ca_rtt_us = -1L;
- struct sk_buff *skb;
u32 pkts_acked = 0;
u32 last_in_flight = 0;
bool rtt_update;
@@ -3085,7 +3101,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
first_ackt = 0;
- while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
+ for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
u8 sacked = scb->sacked;
u32 acked_pcount;
@@ -3103,8 +3119,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
break;
fully_acked = false;
} else {
- /* Speedup tcp_unlink_write_queue() and next loop */
- prefetchw(skb->next);
acked_pcount = tcp_skb_pcount(skb);
}
@@ -3156,12 +3170,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (!fully_acked)
break;
- tcp_unlink_write_queue(skb, sk);
- sk_wmem_free_skb(sk, skb);
+ next = skb_rb_next(skb);
if (unlikely(skb == tp->retransmit_skb_hint))
tp->retransmit_skb_hint = NULL;
if (unlikely(skb == tp->lost_skb_hint))
tp->lost_skb_hint = NULL;
+ tcp_rtx_queue_unlink_and_free(skb, sk);
}
if (!skb)
@@ -3253,12 +3267,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
static void tcp_ack_probe(struct sock *sk)
{
- const struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sk_buff *head = tcp_send_head(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
/* Was it a usable window open? */
-
- if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
+ if (!head)
+ return;
+ if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) {
icsk->icsk_backoff = 0;
inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
/* Socket must be waked up by subsequent tcp_data_snd_check().
@@ -3378,7 +3394,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
tp->pred_flags = 0;
tcp_fast_path_check(sk);
- if (tcp_send_head(sk))
+ if (!tcp_write_queue_empty(sk))
tcp_slow_start_after_idle_check(sk);
if (nwin > tp->max_window) {
@@ -3563,8 +3579,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
sack_state.first_sackt = 0;
sack_state.rate = &rs;
- /* We very likely will need to access write queue head. */
- prefetchw(sk->sk_write_queue.next);
+ /* We very likely will need to access rtx queue. */
+ prefetch(sk->tcp_rtx_queue.rb_node);
/* If the ack is older than previous acks
* then we can probably ignore it.
@@ -3678,8 +3694,7 @@ no_queue:
* being used to time the probes, and is probably far higher than
* it needs to be for normal retransmission.
*/
- if (tcp_send_head(sk))
- tcp_ack_probe(sk);
+ tcp_ack_probe(sk);
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
@@ -4268,11 +4283,6 @@ static void tcp_sack_remove(struct tcp_sock *tp)
tp->rx_opt.num_sacks = num_sacks;
}
-enum tcp_queue {
- OOO_QUEUE,
- RCV_QUEUE,
-};
-
/**
* tcp_try_coalesce - try to merge skb to prior one
* @sk: socket
@@ -4288,7 +4298,6 @@ enum tcp_queue {
* Returns true if caller should free @from instead of queueing it
*/
static bool tcp_try_coalesce(struct sock *sk,
- enum tcp_queue dest,
struct sk_buff *to,
struct sk_buff *from,
bool *fragstolen)
@@ -4313,10 +4322,7 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->has_rxtstamp) {
TCP_SKB_CB(to)->has_rxtstamp = true;
- if (dest == OOO_QUEUE)
- TCP_SKB_CB(to)->swtstamp = TCP_SKB_CB(from)->swtstamp;
- else
- to->tstamp = from->tstamp;
+ to->tstamp = from->tstamp;
}
return true;
@@ -4341,7 +4347,7 @@ static void tcp_ofo_queue(struct sock *sk)
p = rb_first(&tp->out_of_order_queue);
while (p) {
- skb = rb_entry(p, struct sk_buff, rbnode);
+ skb = rb_to_skb(p);
if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
@@ -4353,9 +4359,6 @@ static void tcp_ofo_queue(struct sock *sk)
}
p = rb_next(p);
rb_erase(&skb->rbnode, &tp->out_of_order_queue);
- /* Replace tstamp which was stomped by rbnode */
- if (TCP_SKB_CB(skb)->has_rxtstamp)
- skb->tstamp = TCP_SKB_CB(skb)->swtstamp;
if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
SOCK_DEBUG(sk, "ofo packet was already received\n");
@@ -4367,8 +4370,7 @@ static void tcp_ofo_queue(struct sock *sk)
TCP_SKB_CB(skb)->end_seq);
tail = skb_peek_tail(&sk->sk_receive_queue);
- eaten = tail && tcp_try_coalesce(sk, RCV_QUEUE,
- tail, skb, &fragstolen);
+ eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
if (!eaten)
@@ -4409,7 +4411,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct rb_node **p, *q, *parent;
+ struct rb_node **p, *parent;
struct sk_buff *skb1;
u32 seq, end_seq;
bool fragstolen;
@@ -4422,10 +4424,6 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
return;
}
- /* Stash tstamp to avoid being stomped on by rbnode */
- if (TCP_SKB_CB(skb)->has_rxtstamp)
- TCP_SKB_CB(skb)->swtstamp = skb->tstamp;
-
/* Disable header prediction. */
tp->pred_flags = 0;
inet_csk_schedule_ack(sk);
@@ -4453,7 +4451,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
/* In the typical case, we are adding an skb to the end of the list.
* Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
*/
- if (tcp_try_coalesce(sk, OOO_QUEUE, tp->ooo_last_skb,
+ if (tcp_try_coalesce(sk, tp->ooo_last_skb,
skb, &fragstolen)) {
coalesce_done:
tcp_grow_window(sk, skb);
@@ -4472,7 +4470,7 @@ coalesce_done:
parent = NULL;
while (*p) {
parent = *p;
- skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ skb1 = rb_to_skb(parent);
if (before(seq, TCP_SKB_CB(skb1)->seq)) {
p = &parent->rb_left;
continue;
@@ -4504,7 +4502,7 @@ coalesce_done:
__kfree_skb(skb1);
goto merge_right;
}
- } else if (tcp_try_coalesce(sk, OOO_QUEUE, skb1,
+ } else if (tcp_try_coalesce(sk, skb1,
skb, &fragstolen)) {
goto coalesce_done;
}
@@ -4517,9 +4515,7 @@ insert:
merge_right:
/* Remove other segments covered by skb. */
- while ((q = rb_next(&skb->rbnode)) != NULL) {
- skb1 = rb_entry(q, struct sk_buff, rbnode);
-
+ while ((skb1 = skb_rb_next(skb)) != NULL) {
if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
break;
if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
@@ -4534,7 +4530,7 @@ merge_right:
tcp_drop(sk, skb1);
}
/* If there is no skb after us, we are the last_skb ! */
- if (!q)
+ if (!skb1)
tp->ooo_last_skb = skb;
add_sack:
@@ -4556,7 +4552,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
__skb_pull(skb, hdrlen);
eaten = (tail &&
- tcp_try_coalesce(sk, RCV_QUEUE, tail,
+ tcp_try_coalesce(sk, tail,
skb, fragstolen)) ? 1 : 0;
tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
if (!eaten) {
@@ -4720,7 +4716,7 @@ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *li
if (list)
return !skb_queue_is_last(list, skb) ? skb->next : NULL;
- return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+ return skb_rb_next(skb);
}
static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
@@ -4741,7 +4737,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
}
/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
-static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -4749,7 +4745,7 @@ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
while (*p) {
parent = *p;
- skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ skb1 = rb_to_skb(parent);
if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
p = &parent->rb_left;
else
@@ -4868,26 +4864,19 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb, *head;
- struct rb_node *p;
u32 start, end;
- p = rb_first(&tp->out_of_order_queue);
- skb = rb_entry_safe(p, struct sk_buff, rbnode);
+ skb = skb_rb_first(&tp->out_of_order_queue);
new_range:
if (!skb) {
- p = rb_last(&tp->out_of_order_queue);
- /* Note: This is possible p is NULL here. We do not
- * use rb_entry_safe(), as ooo_last_skb is valid only
- * if rbtree is not empty.
- */
- tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
+ tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue);
return;
}
start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq;
for (head = skb;;) {
- skb = tcp_skb_next(skb, NULL);
+ skb = skb_rb_next(skb);
/* Range is terminated when we see a gap or when
* we are at the queue end.
@@ -4930,14 +4919,14 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
do {
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
- tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
+ tcp_drop(sk, rb_to_skb(node));
sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
!tcp_under_memory_pressure(sk))
break;
node = prev;
} while (node);
- tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+ tp->ooo_last_skb = rb_to_skb(prev);
/* Reset SACK state. A conforming SACK implementation will
* do the same at a timeout based retransmit. When a connection
@@ -5532,20 +5521,13 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
security_inet_conn_established(sk, skb);
}
- /* Make sure socket is routed, for correct metrics. */
- icsk->icsk_af_ops->rebuild_header(sk);
-
- tcp_init_metrics(sk);
- tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
- tcp_init_congestion_control(sk);
+ tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB);
/* Prevent spurious tcp_cwnd_restart() on first data
* packet.
*/
tp->lsndtime = tcp_jiffies32;
- tcp_init_buffer_space(sk);
-
if (sock_flag(sk, SOCK_KEEPOPEN))
inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
@@ -5559,7 +5541,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
struct tcp_fastopen_cookie *cookie)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL;
+ struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL;
u16 mss = tp->rx_opt.mss_clamp, try_exp = 0;
bool syn_drop = false;
@@ -5594,9 +5576,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp);
if (data) { /* Retransmit unacked data in SYN */
- tcp_for_write_queue_from(data, sk) {
- if (data == tcp_send_head(sk) ||
- __tcp_retransmit_skb(sk, data, 1))
+ skb_rbtree_walk_from(data) {
+ if (__tcp_retransmit_skb(sk, data, 1))
break;
}
tcp_rearm_rto(sk);
@@ -5712,7 +5693,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
if (tcp_is_sack(tp) && sysctl_tcp_fack)
tcp_enable_fack(tp);
- tcp_mtup_init(sk);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
@@ -5938,15 +5918,18 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (req) {
inet_csk(sk)->icsk_retransmits = 0;
reqsk_fastopen_remove(sk, req, false);
+ /* Re-arm the timer because data may have been sent out.
+ * This is similar to the regular data transmission case
+ * when new data has just been ack'ed.
+ *
+ * (TFO) - we could try to be more aggressive and
+ * retransmitting any data sooner based on when they
+ * are sent out.
+ */
+ tcp_rearm_rto(sk);
} else {
- /* Make sure socket is routed, for correct metrics. */
- icsk->icsk_af_ops->rebuild_header(sk);
- tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
- tcp_init_congestion_control(sk);
-
- tcp_mtup_init(sk);
+ tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
tp->copied_seq = tp->rcv_nxt;
- tcp_init_buffer_space(sk);
}
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
@@ -5966,19 +5949,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
- if (req) {
- /* Re-arm the timer because data may have been sent out.
- * This is similar to the regular data transmission case
- * when new data has just been ack'ed.
- *
- * (TFO) - we could try to be more aggressive and
- * retransmitting any data sooner based on when they
- * are sent out.
- */
- tcp_rearm_rto(sk);
- } else
- tcp_init_metrics(sk);
-
if (!inet_csk(sk)->icsk_ca_ops->cong_control)
tcp_update_pacing_rate(sk);
@@ -6075,6 +6045,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
case TCP_LAST_ACK:
if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
break;
+ /* fall through */
case TCP_FIN_WAIT1:
case TCP_FIN_WAIT2:
/* RFC 793 says to queue data in these states,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4c43365c374c..e22439f05e46 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -480,7 +480,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
TCP_TIMEOUT_INIT;
icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
BUG_ON(!skb);
tcp_mstamp_refresh(tp);
@@ -1783,8 +1783,9 @@ do_time_wait:
refcounted = false;
goto process;
}
- /* Fall through to ACK */
}
+ /* to ACK */
+ /* fall through */
case TCP_TW_ACK:
tcp_v4_timewait_ack(sk, skb);
break;
@@ -1896,6 +1897,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
/* If socket is aborted during connect operation */
tcp_free_fastopen_req(tp);
+ tcp_fastopen_destroy_cipher(sk);
tcp_saved_syn_free(tp);
sk_sockets_allocated_dec(sk);
@@ -2477,6 +2479,11 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_window_scaling = 1;
net->ipv4.sysctl_tcp_timestamps = 1;
+ net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
+ spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
+ net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60;
+ atomic_set(&net->ipv4.tfo_active_disable_times, 0);
+
return 0;
fail:
tcp_sk_exit(net);
@@ -2486,7 +2493,12 @@ fail:
static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
{
+ struct net *net;
+
inet_twsk_purge(&tcp_hashinfo, AF_INET);
+
+ list_for_each_entry(net, net_exit_list, exit_list)
+ tcp_fastopen_ctx_destroy(net);
}
static struct pernet_operations __net_initdata tcp_sk_ops = {
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 102b2c90bb80..0ab78abc811b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -892,10 +892,14 @@ static void tcp_metrics_flush_all(struct net *net)
for (row = 0; row < max_rows; row++, hb++) {
struct tcp_metrics_block __rcu **pp;
+ bool match;
+
spin_lock_bh(&tcp_metrics_lock);
pp = &hb->chain;
for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) {
- if (net_eq(tm_net(tm), net)) {
+ match = net ? net_eq(tm_net(tm), net) :
+ !atomic_read(&tm_net(tm)->count);
+ if (match) {
*pp = tm->tcpm_next;
kfree_rcu(tm, rcu_head);
} else {
@@ -1018,14 +1022,14 @@ static int __net_init tcp_net_metrics_init(struct net *net)
return 0;
}
-static void __net_exit tcp_net_metrics_exit(struct net *net)
+static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_list)
{
- tcp_metrics_flush_all(net);
+ tcp_metrics_flush_all(NULL);
}
static __net_initdata struct pernet_operations tcp_net_metrics_ops = {
- .init = tcp_net_metrics_init,
- .exit = tcp_net_metrics_exit,
+ .init = tcp_net_metrics_init,
+ .exit_batch = tcp_net_metrics_exit_batch,
};
void __init tcp_metrics_init(void)
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 188a6f31356d..2341b9f857b6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -446,6 +446,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
INIT_LIST_HEAD(&newtp->tsq_node);
+ INIT_LIST_HEAD(&newtp->tsorted_sent_queue);
tcp_init_wl(newtp, treq->rcv_isn);
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 1ff73982e28c..a978e3fa71ec 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -39,7 +39,7 @@
* nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected
* nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8
* nv_rtt_factor RTT averaging factor
- * nv_loss_dec_factor Decrease cwnd by this (50%) when losses occur
+ * nv_loss_dec_factor Decrease cwnd to this (80%) when losses occur
* nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd
* nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd
* nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping
@@ -61,7 +61,7 @@ static int nv_min_cwnd __read_mostly = 2;
static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */
static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */
static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */
-static int nv_loss_dec_factor __read_mostly = 512; /* => 50% */
+static int nv_loss_dec_factor __read_mostly = 819; /* => 80% */
static int nv_cwnd_growth_rate_neg __read_mostly = 8;
static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */
static int nv_dec_eval_min_calls __read_mostly = 60;
@@ -101,6 +101,11 @@ struct tcpnv {
u32 nv_last_rtt; /* last rtt */
u32 nv_min_rtt; /* active min rtt. Used to determine slope */
u32 nv_min_rtt_new; /* min rtt for future use */
+ u32 nv_base_rtt; /* If non-zero it represents the threshold for
+ * congestion */
+ u32 nv_lower_bound_rtt; /* Used in conjunction with nv_base_rtt. It is
+ * set to 80% of nv_base_rtt. It helps reduce
+ * unfairness between flows */
u32 nv_rtt_max_rate; /* max rate seen during current RTT */
u32 nv_rtt_start_seq; /* current RTT ends when packet arrives
* acking beyond nv_rtt_start_seq */
@@ -132,9 +137,24 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk)
static void tcpnv_init(struct sock *sk)
{
struct tcpnv *ca = inet_csk_ca(sk);
+ int base_rtt;
tcpnv_reset(ca, sk);
+ /* See if base_rtt is available from socket_ops bpf program.
+ * It is meant to be used in environments, such as communication
+ * within a datacenter, where we have reasonable estimates of
+ * RTTs
+ */
+ base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT);
+ if (base_rtt > 0) {
+ ca->nv_base_rtt = base_rtt;
+ ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */
+ } else {
+ ca->nv_base_rtt = 0;
+ ca->nv_lower_bound_rtt = 0;
+ }
+
ca->nv_allow_cwnd_growth = 1;
ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ;
ca->nv_min_rtt = NV_INIT_RTT;
@@ -144,6 +164,19 @@ static void tcpnv_init(struct sock *sk)
ca->cwnd_growth_factor = 0;
}
+/* If provided, apply upper (base_rtt) and lower (lower_bound_rtt)
+ * bounds to RTT.
+ */
+inline u32 nv_get_bounded_rtt(struct tcpnv *ca, u32 val)
+{
+ if (ca->nv_lower_bound_rtt > 0 && val < ca->nv_lower_bound_rtt)
+ return ca->nv_lower_bound_rtt;
+ else if (ca->nv_base_rtt > 0 && val > ca->nv_base_rtt)
+ return ca->nv_base_rtt;
+ else
+ return val;
+}
+
static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -265,6 +298,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
if (ca->nv_eval_call_cnt < 255)
ca->nv_eval_call_cnt++;
+ /* Apply bounds to rtt. Only used to update min_rtt */
+ avg_rtt = nv_get_bounded_rtt(ca, avg_rtt);
+
/* update min rtt if necessary */
if (avg_rtt < ca->nv_min_rtt)
ca->nv_min_rtt = avg_rtt;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0bc9e46a5369..988733f289c8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -42,6 +42,8 @@
#include <linux/gfp.h>
#include <linux/module.h>
+#include <trace/events/tcp.h>
+
/* People can turn this off for buggy TCP's found in printers etc. */
int sysctl_tcp_retrans_collapse __read_mostly = 1;
@@ -66,15 +68,17 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp);
/* Account for new data that has been sent to the network. */
-static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
+static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
unsigned int prior_packets = tp->packets_out;
- tcp_advance_send_head(sk, skb);
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
+ __skb_unlink(skb, &sk->sk_write_queue);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
+
tp->packets_out += tcp_skb_pcount(skb);
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
@@ -971,6 +975,12 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
HRTIMER_MODE_ABS_PINNED);
}
+static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
+{
+ skb->skb_mstamp = tp->tcp_mstamp;
+ list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue);
+}
+
/* This routine actually transmits TCP packets queued in by
* tcp_do_sendmsg(). This is used by both the initial
* transmission and possible later retransmissions.
@@ -1003,10 +1013,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
oskb = skb;
- if (unlikely(skb_cloned(skb)))
- skb = pskb_copy(skb, gfp_mask);
- else
- skb = skb_clone(skb, gfp_mask);
+
+ tcp_skb_tsorted_save(oskb) {
+ if (unlikely(skb_cloned(oskb)))
+ skb = pskb_copy(oskb, gfp_mask);
+ else
+ skb = skb_clone(oskb, gfp_mask);
+ } tcp_skb_tsorted_restore(oskb);
+
if (unlikely(!skb))
return -ENOBUFS;
}
@@ -1127,7 +1141,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
err = net_xmit_eval(err);
}
if (!err && oskb) {
- oskb->skb_mstamp = tp->tcp_mstamp;
+ tcp_update_skb_after_send(tp, oskb);
tcp_rate_skb_sent(sk, oskb);
}
return err;
@@ -1239,12 +1253,25 @@ static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2)
TCP_SKB_CB(skb)->eor = 0;
}
+/* Insert buff after skb on the write or rtx queue of sk. */
+static void tcp_insert_write_queue_after(struct sk_buff *skb,
+ struct sk_buff *buff,
+ struct sock *sk,
+ enum tcp_queue tcp_queue)
+{
+ if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE)
+ __skb_queue_after(&sk->sk_write_queue, skb, buff);
+ else
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
+}
+
/* Function to create two new TCP segments. Shrinks the given segment
* to the specified size and appends a new segment with the rest of the
* packet to the list. This won't be called frequently, I hope.
* Remember, these are still headerless SKBs at this point.
*/
-int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
+int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+ struct sk_buff *skb, u32 len,
unsigned int mss_now, gfp_t gfp)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -1327,7 +1354,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
/* Link BUFF into the send queue. */
__skb_header_release(buff);
- tcp_insert_write_queue_after(skb, buff, sk);
+ tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
+ list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor);
return 0;
}
@@ -1614,10 +1642,10 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
* is caused by insufficient sender buffer:
* 1) just sent some data (see tcp_write_xmit)
* 2) not cwnd limited (this else condition)
- * 3) no more data to send (null tcp_send_head )
+ * 3) no more data to send (tcp_write_queue_empty())
* 4) application is hitting buffer limit (SOCK_NOSPACE)
*/
- if (!tcp_send_head(sk) && sk->sk_socket &&
+ if (tcp_write_queue_empty(sk) && sk->sk_socket &&
test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) &&
(1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT))
tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED);
@@ -1813,7 +1841,8 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
* know that all the data is in scatter-gather pages, and that the
* packet has never been sent out before (and thus is not cloned).
*/
-static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
+static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue,
+ struct sk_buff *skb, unsigned int len,
unsigned int mss_now, gfp_t gfp)
{
struct sk_buff *buff;
@@ -1822,7 +1851,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* All of a TSO frame must be composed of paged data. */
if (skb->len != skb->data_len)
- return tcp_fragment(sk, skb, len, mss_now, gfp);
+ return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp);
buff = sk_stream_alloc_skb(sk, 0, gfp, true);
if (unlikely(!buff))
@@ -1858,7 +1887,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
/* Link BUFF into the send queue. */
__skb_header_release(buff);
- tcp_insert_write_queue_after(skb, buff, sk);
+ tcp_insert_write_queue_after(skb, buff, sk, tcp_queue);
return 0;
}
@@ -1928,8 +1957,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
goto send_now;
}
- head = tcp_write_queue_head(sk);
-
+ /* TODO : use tsorted_sent_queue ? */
+ head = tcp_rtx_queue_head(sk);
+ if (!head)
+ goto send_now;
age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp);
/* If next ACK is likely to come too late (half srtt), do not defer */
if (age < (tp->srtt_us >> 4))
@@ -2147,13 +2178,12 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit <<= factor;
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
- /* Always send the 1st or 2nd skb in write queue.
+ /* Always send skb if rtx queue is empty.
* No need to wait for TX completion to call us back,
* after softirq/tasklet schedule.
* This helps when TX completions are delayed too much.
*/
- if (skb == sk->sk_write_queue.next ||
- skb->prev == sk->sk_write_queue.next)
+ if (tcp_rtx_queue_empty(sk))
return false;
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
@@ -2204,7 +2234,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type)
* it's the "most interesting" or current chrono we are
* tracking and starts busy chrono if we have pending data.
*/
- if (tcp_write_queue_empty(sk))
+ if (tcp_rtx_and_write_queues_empty(sk))
tcp_chrono_set(tp, TCP_CHRONO_UNSPEC);
else if (type == tp->chrono_type)
tcp_chrono_set(tp, TCP_CHRONO_BUSY);
@@ -2260,7 +2290,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
/* "skb_mstamp" is used as a start point for the retransmit timer */
- skb->skb_mstamp = tp->tcp_mstamp;
+ tcp_update_skb_after_send(tp, skb);
goto repair; /* Skip network transmission */
}
@@ -2299,7 +2329,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
nonagle);
if (skb->len > limit &&
- unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
+ unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+ skb, limit, mss_now, gfp)))
break;
if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
@@ -2339,7 +2370,7 @@ repair:
tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
- return !tp->packets_out && tcp_send_head(sk);
+ return !tp->packets_out && !tcp_write_queue_empty(sk);
}
bool tcp_schedule_loss_probe(struct sock *sk)
@@ -2363,7 +2394,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
return false;
if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) &&
- tcp_send_head(sk))
+ !tcp_write_queue_empty(sk))
return false;
/* Probe timeout is 2*rtt. Add minimum RTO to account
@@ -2416,18 +2447,14 @@ void tcp_send_loss_probe(struct sock *sk)
int mss = tcp_current_mss(sk);
skb = tcp_send_head(sk);
- if (skb) {
- if (tcp_snd_wnd_test(tp, skb, mss)) {
- pcount = tp->packets_out;
- tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
- if (tp->packets_out > pcount)
- goto probe_sent;
- goto rearm_timer;
- }
- skb = tcp_write_queue_prev(sk, skb);
- } else {
- skb = tcp_write_queue_tail(sk);
+ if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
+ pcount = tp->packets_out;
+ tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
+ if (tp->packets_out > pcount)
+ goto probe_sent;
+ goto rearm_timer;
}
+ skb = skb_rb_last(&sk->tcp_rtx_queue);
/* At most one outstanding TLP retransmission. */
if (tp->tlp_high_seq)
@@ -2445,10 +2472,11 @@ void tcp_send_loss_probe(struct sock *sk)
goto rearm_timer;
if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) {
- if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
+ if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
+ (pcount - 1) * mss, mss,
GFP_ATOMIC)))
goto rearm_timer;
- skb = tcp_write_queue_next(sk, skb);
+ skb = skb_rb_next(skb);
}
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
@@ -2648,7 +2676,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
+ struct sk_buff *next_skb = skb_rb_next(skb);
int skb_size, next_skb_size;
skb_size = skb->len;
@@ -2665,8 +2693,6 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
}
tcp_highest_sack_combine(sk, next_skb, skb);
- tcp_unlink_write_queue(next_skb, sk);
-
if (next_skb->ip_summed == CHECKSUM_PARTIAL)
skb->ip_summed = CHECKSUM_PARTIAL;
@@ -2694,7 +2720,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
tcp_skb_collapse_tstamp(skb, next_skb);
- sk_wmem_free_skb(sk, next_skb);
+ tcp_rtx_queue_unlink_and_free(next_skb, sk);
return true;
}
@@ -2705,8 +2731,6 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
return false;
if (skb_cloned(skb))
return false;
- if (skb == tcp_send_head(sk))
- return false;
/* Some heuristics for collapsing over SACK'd could be invented */
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
return false;
@@ -2729,7 +2753,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
- tcp_for_write_queue_from_safe(skb, tmp, sk) {
+ skb_rbtree_walk_from_safe(skb, tmp) {
if (!tcp_can_collapse(sk, skb))
break;
@@ -2804,7 +2828,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
len = cur_mss * segs;
if (skb->len > len) {
- if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC))
+ if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
+ cur_mss, GFP_ATOMIC))
return -ENOMEM; /* We'll try again later. */
} else {
if (skb_unclone(skb, GFP_ATOMIC))
@@ -2838,17 +2863,21 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
skb_headroom(skb) >= 0xFFFF)) {
struct sk_buff *nskb;
- nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
- err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
- -ENOBUFS;
+ tcp_skb_tsorted_save(skb) {
+ nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
+ err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+ -ENOBUFS;
+ } tcp_skb_tsorted_restore(skb);
+
if (!err)
- skb->skb_mstamp = tp->tcp_mstamp;
+ tcp_update_skb_after_send(tp, skb);
} else {
err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
if (likely(!err)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+ trace_tcp_retransmit_skb(sk, skb);
} else if (err != -EBUSY) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
}
@@ -2892,29 +2921,21 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
void tcp_xmit_retransmit_queue(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sk_buff *skb, *rtx_head, *hole = NULL;
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
- struct sk_buff *hole = NULL;
u32 max_segs;
int mib_idx;
if (!tp->packets_out)
return;
- if (tp->retransmit_skb_hint) {
- skb = tp->retransmit_skb_hint;
- } else {
- skb = tcp_write_queue_head(sk);
- }
-
+ rtx_head = tcp_rtx_queue_head(sk);
+ skb = tp->retransmit_skb_hint ?: rtx_head;
max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
- tcp_for_write_queue_from(skb, sk) {
+ skb_rbtree_walk_from(skb) {
__u8 sacked;
int segs;
- if (skb == tcp_send_head(sk))
- break;
-
if (tcp_pacing_check(sk))
break;
@@ -2959,7 +2980,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
if (tcp_in_cwnd_reduction(sk))
tp->prr_out += tcp_skb_pcount(skb);
- if (skb == tcp_write_queue_head(sk) &&
+ if (skb == rtx_head &&
icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto,
@@ -3001,12 +3022,15 @@ void tcp_send_fin(struct sock *sk)
* Note: in the latter case, FIN packet will be sent after a timeout,
* as TCP stack thinks it has already been transmitted.
*/
- if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) {
+ if (!tskb && tcp_under_memory_pressure(sk))
+ tskb = skb_rb_last(&sk->tcp_rtx_queue);
+
+ if (tskb) {
coalesce:
TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
TCP_SKB_CB(tskb)->end_seq++;
tp->write_seq++;
- if (!tcp_send_head(sk)) {
+ if (tcp_write_queue_empty(sk)) {
/* This means tskb was already sent.
* Pretend we included the FIN on previous transmit.
* We need to set tp->snd_nxt to the value it would have
@@ -3023,6 +3047,7 @@ coalesce:
goto coalesce;
return;
}
+ INIT_LIST_HEAD(&skb->tcp_tsorted_anchor);
skb_reserve(skb, MAX_TCP_HEADER);
sk_forced_mem_schedule(sk, skb->truesize);
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
@@ -3071,20 +3096,24 @@ int tcp_send_synack(struct sock *sk)
{
struct sk_buff *skb;
- skb = tcp_write_queue_head(sk);
+ skb = tcp_rtx_queue_head(sk);
if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
- pr_debug("%s: wrong queue state\n", __func__);
+ pr_err("%s: wrong queue state\n", __func__);
return -EFAULT;
}
if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) {
if (skb_cloned(skb)) {
- struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+ struct sk_buff *nskb;
+
+ tcp_skb_tsorted_save(skb) {
+ nskb = skb_copy(skb, GFP_ATOMIC);
+ } tcp_skb_tsorted_restore(skb);
if (!nskb)
return -ENOMEM;
- tcp_unlink_write_queue(skb, sk);
+ INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor);
+ tcp_rtx_queue_unlink_and_free(skb, sk);
__skb_header_release(nskb);
- __tcp_add_write_queue_head(sk, nskb);
- sk_wmem_free_skb(sk, skb);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
sk->sk_wmem_queued += nskb->truesize;
sk_mem_charge(sk, nskb->truesize);
skb = nskb;
@@ -3307,7 +3336,6 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
tcb->end_seq += skb->len;
__skb_header_release(skb);
- __tcp_add_write_queue_tail(sk, skb);
sk->sk_wmem_queued += skb->truesize;
sk_mem_charge(sk, skb->truesize);
tp->write_seq = tcb->end_seq;
@@ -3355,6 +3383,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
int copied = copy_from_iter(skb_put(syn_data, space), space,
&fo->data->msg_iter);
if (unlikely(!copied)) {
+ tcp_skb_tsorted_anchor_cleanup(syn_data);
kfree_skb(syn_data);
goto fallback;
}
@@ -3385,12 +3414,13 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH;
if (!err) {
tp->syn_data = (fo->copied > 0);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT);
goto done;
}
- /* data was not sent, this is our new send_head */
- sk->sk_send_head = syn_data;
+ /* data was not sent, put it in write_queue */
+ __skb_queue_tail(&sk->sk_write_queue, syn_data);
tp->packets_out -= tcp_skb_pcount(syn_data);
fallback:
@@ -3433,6 +3463,7 @@ int tcp_connect(struct sock *sk)
tp->retrans_stamp = tcp_time_stamp(tp);
tcp_connect_queue_skb(sk, buff);
tcp_ecn_send_syn(sk, buff);
+ tcp_rbtree_insert(&sk->tcp_rtx_queue, buff);
/* Send off SYN; include data in Fast Open. */
err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
@@ -3627,7 +3658,8 @@ int tcp_write_wakeup(struct sock *sk, int mib)
skb->len > mss) {
seg_size = min(seg_size, mss);
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
- if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC))
+ if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
+ skb, seg_size, mss, GFP_ATOMIC))
return -1;
} else if (!tcp_skb_pcount(skb))
tcp_set_skb_tso_segs(skb, mss);
@@ -3657,7 +3689,7 @@ void tcp_send_probe0(struct sock *sk)
err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
- if (tp->packets_out || !tcp_send_head(sk)) {
+ if (tp->packets_out || tcp_write_queue_empty(sk)) {
/* Cancel probe timer, if it is not required. */
icsk->icsk_probes_out = 0;
icsk->icsk_backoff = 0;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index 449cd914d58e..cda6074a429a 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -45,7 +45,7 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2)
static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct sk_buff *skb;
+ struct sk_buff *skb, *n;
u32 reo_wnd;
*reo_timeout = 0;
@@ -58,45 +58,31 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U)
reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd);
- tcp_for_write_queue(skb, sk) {
+ list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue,
+ tcp_tsorted_anchor) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
+ s32 remaining;
- if (skb == tcp_send_head(sk))
- break;
-
- /* Skip ones already (s)acked */
- if (!after(scb->end_seq, tp->snd_una) ||
- scb->sacked & TCPCB_SACKED_ACKED)
+ /* Skip ones marked lost but not yet retransmitted */
+ if ((scb->sacked & TCPCB_LOST) &&
+ !(scb->sacked & TCPCB_SACKED_RETRANS))
continue;
- if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
- tp->rack.end_seq, scb->end_seq)) {
- /* Step 3 in draft-cheng-tcpm-rack-00.txt:
- * A packet is lost if its elapsed time is beyond
- * the recent RTT plus the reordering window.
- */
- u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp,
- skb->skb_mstamp);
- s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed;
-
- if (remaining < 0) {
- tcp_rack_mark_skb_lost(sk, skb);
- continue;
- }
-
- /* Skip ones marked lost but not yet retransmitted */
- if ((scb->sacked & TCPCB_LOST) &&
- !(scb->sacked & TCPCB_SACKED_RETRANS))
- continue;
+ if (!tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp,
+ tp->rack.end_seq, scb->end_seq))
+ break;
+ /* A packet is lost if it has not been s/acked beyond
+ * the recent RTT plus the reordering window.
+ */
+ remaining = tp->rack.rtt_us + reo_wnd -
+ tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp);
+ if (remaining < 0) {
+ tcp_rack_mark_skb_lost(sk, skb);
+ list_del_init(&skb->tcp_tsorted_anchor);
+ } else {
/* Record maximum wait time (+1 to avoid 0) */
*reo_timeout = max_t(u32, *reo_timeout, 1 + remaining);
-
- } else if (!(scb->sacked & TCPCB_RETRANS)) {
- /* Original data are sent sequentially so stop early
- * b/c the rest are all sent after rack_sent
- */
- break;
}
}
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 655dd8d7f064..804a8d34ce86 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -156,8 +156,13 @@ static bool retransmits_timed_out(struct sock *sk,
return false;
start_ts = tcp_sk(sk)->retrans_stamp;
- if (unlikely(!start_ts))
- start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk));
+ if (unlikely(!start_ts)) {
+ struct sk_buff *head = tcp_rtx_queue_head(sk);
+
+ if (!head)
+ return false;
+ start_ts = tcp_skb_timestamp(head);
+ }
if (likely(timeout == 0)) {
linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
@@ -283,15 +288,17 @@ out:
*
* Returns: Nothing (void)
*/
-static void tcp_delack_timer(unsigned long data)
+static void tcp_delack_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
+ struct inet_connection_sock *icsk =
+ from_timer(icsk, t, icsk_delack_timer);
+ struct sock *sk = &icsk->icsk_inet.sk;
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
tcp_delack_timer_handler(sk);
} else {
- inet_csk(sk)->icsk_ack.blocked = 1;
+ icsk->icsk_ack.blocked = 1;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
/* deleguate our work to tcp_release_cb() */
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
@@ -304,11 +311,12 @@ static void tcp_delack_timer(unsigned long data)
static void tcp_probe_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct sk_buff *skb = tcp_send_head(sk);
struct tcp_sock *tp = tcp_sk(sk);
int max_probes;
u32 start_ts;
- if (tp->packets_out || !tcp_send_head(sk)) {
+ if (tp->packets_out || !skb) {
icsk->icsk_probes_out = 0;
return;
}
@@ -321,9 +329,9 @@ static void tcp_probe_timer(struct sock *sk)
* corresponding system limit. We also implement similar policy when
* we use RTO to probe window in tcp_retransmit_timer().
*/
- start_ts = tcp_skb_timestamp(tcp_send_head(sk));
+ start_ts = tcp_skb_timestamp(skb);
if (!start_ts)
- tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp;
+ skb->skb_mstamp = tp->tcp_mstamp;
else if (icsk->icsk_user_timeout &&
(s32)(tcp_time_stamp(tp) - start_ts) >
jiffies_to_msecs(icsk->icsk_user_timeout))
@@ -408,7 +416,7 @@ void tcp_retransmit_timer(struct sock *sk)
if (!tp->packets_out)
goto out;
- WARN_ON(tcp_write_queue_empty(sk));
+ WARN_ON(tcp_rtx_queue_empty(sk));
tp->tlp_high_seq = 0;
@@ -441,7 +449,7 @@ void tcp_retransmit_timer(struct sock *sk)
goto out;
}
tcp_enter_loss(sk);
- tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1);
+ tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1);
__sk_dst_reset(sk);
goto out_reset_timer;
}
@@ -473,7 +481,7 @@ void tcp_retransmit_timer(struct sock *sk)
tcp_enter_loss(sk);
- if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) {
+ if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) {
/* Retransmission failed because of local congestion,
* do not backoff.
*/
@@ -570,9 +578,11 @@ out:
sk_mem_reclaim(sk);
}
-static void tcp_write_timer(unsigned long data)
+static void tcp_write_timer(struct timer_list *t)
{
- struct sock *sk = (struct sock *)data;
+ struct inet_connection_sock *icsk =
+ from_timer(icsk, t, icsk_retransmit_timer);
+ struct sock *sk = &icsk->icsk_inet.sk;
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
@@ -607,9 +617,9 @@ void tcp_set_keepalive(struct sock *sk, int val)
EXPORT_SYMBOL_GPL(tcp_set_keepalive);
-static void tcp_keepalive_timer (unsigned long data)
+static void tcp_keepalive_timer (struct timer_list *t)
{
- struct sock *sk = (struct sock *) data;
+ struct sock *sk = from_timer(sk, t, sk_timer);
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 elapsed;
@@ -647,7 +657,7 @@ static void tcp_keepalive_timer (unsigned long data)
elapsed = keepalive_time_when(tp);
/* It is alive without keepalive 8) */
- if (tp->packets_out || tcp_send_head(sk))
+ if (tp->packets_out || !tcp_write_queue_empty(sk))
goto resched;
elapsed = keepalive_time_elapsed(tp);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 218cfcc77650..ee113ff15fd0 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event);
static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp)
{
- return min(tp->snd_ssthresh, tp->snd_cwnd-1);
+ return min(tp->snd_ssthresh, tp->snd_cwnd);
}
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ebfbccae62fd..a6699af05539 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1209,8 +1209,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial,
if (likely(partial)) {
up->forward_deficit += size;
size = up->forward_deficit;
- if (size < (sk->sk_rcvbuf >> 2) &&
- !skb_queue_empty(&up->reader_queue))
+ if (size < (sk->sk_rcvbuf >> 2))
return;
} else {
size += up->forward_deficit;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4a96ebbf8eda..93f9c0a61911 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -152,7 +152,7 @@ static void ipv6_regen_rndid(struct inet6_dev *idev);
static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
-static int ipv6_count_addresses(struct inet6_dev *idev);
+static int ipv6_count_addresses(const struct inet6_dev *idev);
static int ipv6_generate_stable_address(struct in6_addr *addr,
u8 dad_count,
const struct inet6_dev *idev);
@@ -303,10 +303,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.disable_policy = 0,
};
-/* Check if a valid qdisc is available */
-static inline bool addrconf_qdisc_ok(const struct net_device *dev)
+/* Check if link is ready: is it up and is a valid qdisc available */
+static inline bool addrconf_link_ready(const struct net_device *dev)
{
- return !qdisc_tx_is_noop(dev);
+ return netif_oper_up(dev) && !qdisc_tx_is_noop(dev);
}
static void addrconf_del_rs_timer(struct inet6_dev *idev)
@@ -451,7 +451,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ndev->token = in6addr_any;
- if (netif_running(dev) && addrconf_qdisc_ok(dev))
+ if (netif_running(dev) && addrconf_link_ready(dev))
ndev->if_flags |= IF_READY;
ipv6_mc_init_dev(ndev);
@@ -616,23 +616,23 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
{
struct net *net = sock_net(in_skb->sk);
struct nlattr *tb[NETCONFA_MAX+1];
+ struct inet6_dev *in6_dev = NULL;
+ struct net_device *dev = NULL;
struct netconfmsg *ncm;
struct sk_buff *skb;
struct ipv6_devconf *devconf;
- struct inet6_dev *in6_dev;
- struct net_device *dev;
int ifindex;
int err;
err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
devconf_ipv6_policy, extack);
if (err < 0)
- goto errout;
+ return err;
- err = -EINVAL;
if (!tb[NETCONFA_IFINDEX])
- goto errout;
+ return -EINVAL;
+ err = -EINVAL;
ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]);
switch (ifindex) {
case NETCONFA_IFINDEX_ALL:
@@ -642,10 +642,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
devconf = net->ipv6.devconf_dflt;
break;
default:
- dev = __dev_get_by_index(net, ifindex);
+ dev = dev_get_by_index(net, ifindex);
if (!dev)
- goto errout;
- in6_dev = __in6_dev_get(dev);
+ return -EINVAL;
+ in6_dev = in6_dev_get(dev);
if (!in6_dev)
goto errout;
devconf = &in6_dev->cnf;
@@ -653,7 +653,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
}
err = -ENOBUFS;
- skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC);
+ skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL);
if (!skb)
goto errout;
@@ -669,6 +669,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
}
err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
errout:
+ if (in6_dev)
+ in6_dev_put(in6_dev);
+ if (dev)
+ dev_put(dev);
return err;
}
@@ -945,7 +949,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
break;
}
- list_add_tail(&ifp->if_list, p);
+ list_add_tail_rcu(&ifp->if_list, p);
}
static u32 inet6_addr_hash(const struct in6_addr *addr)
@@ -953,18 +957,42 @@ static u32 inet6_addr_hash(const struct in6_addr *addr)
return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT);
}
+static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
+{
+ unsigned int hash;
+ int err = 0;
+
+ spin_lock(&addrconf_hash_lock);
+
+ /* Ignore adding duplicate addresses on an interface */
+ if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev)) {
+ ADBG("ipv6_add_addr: already assigned\n");
+ err = -EEXIST;
+ goto out;
+ }
+
+ /* Add to big hash table */
+ hash = inet6_addr_hash(&ifa->addr);
+ hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
+
+out:
+ spin_unlock(&addrconf_hash_lock);
+
+ return err;
+}
+
/* On success it returns ifp with increased reference count */
static struct inet6_ifaddr *
ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
const struct in6_addr *peer_addr, int pfxlen,
- int scope, u32 flags, u32 valid_lft, u32 prefered_lft)
+ int scope, u32 flags, u32 valid_lft, u32 prefered_lft,
+ bool can_block, struct netlink_ext_ack *extack)
{
+ gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC;
struct net *net = dev_net(idev->dev);
struct inet6_ifaddr *ifa = NULL;
- struct rt6_info *rt;
- struct in6_validator_info i6vi;
- unsigned int hash;
+ struct rt6_info *rt = NULL;
int err = 0;
int addr_type = ipv6_addr_type(addr);
@@ -974,42 +1002,33 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
addr_type & IPV6_ADDR_LOOPBACK))
return ERR_PTR(-EADDRNOTAVAIL);
- rcu_read_lock_bh();
-
- in6_dev_hold(idev);
-
if (idev->dead) {
err = -ENODEV; /*XXX*/
- goto out2;
+ goto out;
}
if (idev->cnf.disable_ipv6) {
err = -EACCES;
- goto out2;
- }
-
- i6vi.i6vi_addr = *addr;
- i6vi.i6vi_dev = idev;
- rcu_read_unlock_bh();
-
- err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
-
- rcu_read_lock_bh();
- err = notifier_to_errno(err);
- if (err)
- goto out2;
-
- spin_lock(&addrconf_hash_lock);
-
- /* Ignore adding duplicate addresses on an interface */
- if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
- ADBG("ipv6_add_addr: already assigned\n");
- err = -EEXIST;
goto out;
}
- ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC);
+ /* validator notifier needs to be blocking;
+ * do not call in atomic context
+ */
+ if (can_block) {
+ struct in6_validator_info i6vi = {
+ .i6vi_addr = *addr,
+ .i6vi_dev = idev,
+ .extack = extack,
+ };
+
+ err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi);
+ err = notifier_to_errno(err);
+ if (err < 0)
+ goto out;
+ }
+ ifa = kzalloc(sizeof(*ifa), gfp_flags);
if (!ifa) {
ADBG("ipv6_add_addr: malloc failed\n");
err = -ENOBUFS;
@@ -1019,6 +1038,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
rt = addrconf_dst_alloc(idev, addr, false);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
+ rt = NULL;
goto out;
}
@@ -1049,16 +1069,21 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifa->rt = rt;
ifa->idev = idev;
+ in6_dev_hold(idev);
+
/* For caller */
refcount_set(&ifa->refcnt, 1);
- /* Add to big hash table */
- hash = inet6_addr_hash(addr);
+ rcu_read_lock_bh();
- hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
- spin_unlock(&addrconf_hash_lock);
+ err = ipv6_add_addr_hash(idev->dev, ifa);
+ if (err < 0) {
+ rcu_read_unlock_bh();
+ goto out;
+ }
write_lock(&idev->lock);
+
/* Add to inet6_dev unicast addr list. */
ipv6_link_dev_addr(idev, ifa);
@@ -1069,21 +1094,23 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
in6_ifa_hold(ifa);
write_unlock(&idev->lock);
-out2:
+
rcu_read_unlock_bh();
- if (likely(err == 0))
- inet6addr_notifier_call_chain(NETDEV_UP, ifa);
- else {
- kfree(ifa);
- in6_dev_put(idev);
+ inet6addr_notifier_call_chain(NETDEV_UP, ifa);
+out:
+ if (unlikely(err < 0)) {
+ if (rt)
+ ip6_rt_put(rt);
+ if (ifa) {
+ if (ifa->idev)
+ in6_dev_put(ifa->idev);
+ kfree(ifa);
+ }
ifa = ERR_PTR(err);
}
return ifa;
-out:
- spin_unlock(&addrconf_hash_lock);
- goto out2;
}
enum cleanup_prefix_rt_t {
@@ -1204,7 +1231,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE))
action = check_cleanup_prefix_route(ifp, &expires);
- list_del_init(&ifp->if_list);
+ list_del_rcu(&ifp->if_list);
__in6_ifa_put(ifp);
write_unlock_bh(&ifp->idev->lock);
@@ -1330,7 +1357,7 @@ retry:
ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
ipv6_addr_scope(&addr), addr_flags,
- tmp_valid_lft, tmp_prefered_lft);
+ tmp_valid_lft, tmp_prefered_lft, true, NULL);
if (IS_ERR(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
@@ -1558,8 +1585,7 @@ static int __ipv6_dev_get_saddr(struct net *net,
{
struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
- read_lock_bh(&idev->lock);
- list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
+ list_for_each_entry_rcu(score->ifa, &idev->addr_list, if_list) {
int i;
/*
@@ -1609,11 +1635,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
}
break;
} else if (minihiscore < miniscore) {
- if (hiscore->ifa)
- in6_ifa_put(hiscore->ifa);
-
- in6_ifa_hold(score->ifa);
-
swap(hiscore, score);
hiscore_idx = 1 - hiscore_idx;
@@ -1625,7 +1646,6 @@ static int __ipv6_dev_get_saddr(struct net *net,
}
}
out:
- read_unlock_bh(&idev->lock);
return hiscore_idx;
}
@@ -1662,6 +1682,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
int dst_type;
bool use_oif_addr = false;
int hiscore_idx = 0;
+ int ret = 0;
dst_type = __ipv6_addr_type(daddr);
dst.addr = daddr;
@@ -1737,15 +1758,14 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
}
out:
- rcu_read_unlock();
-
hiscore = &scores[hiscore_idx];
if (!hiscore->ifa)
- return -EADDRNOTAVAIL;
+ ret = -EADDRNOTAVAIL;
+ else
+ *saddr = hiscore->ifa->addr;
- *saddr = hiscore->ifa->addr;
- in6_ifa_put(hiscore->ifa);
- return 0;
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(ipv6_dev_get_saddr);
@@ -1785,15 +1805,15 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
return err;
}
-static int ipv6_count_addresses(struct inet6_dev *idev)
+static int ipv6_count_addresses(const struct inet6_dev *idev)
{
+ const struct inet6_ifaddr *ifp;
int cnt = 0;
- struct inet6_ifaddr *ifp;
- read_lock_bh(&idev->lock);
- list_for_each_entry(ifp, &idev->addr_list, if_list)
+ rcu_read_lock();
+ list_for_each_entry_rcu(ifp, &idev->addr_list, if_list)
cnt++;
- read_unlock_bh(&idev->lock);
+ rcu_read_unlock();
return cnt;
}
@@ -1859,20 +1879,18 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
const unsigned int prefix_len, struct net_device *dev)
{
- struct inet6_dev *idev;
- struct inet6_ifaddr *ifa;
+ const struct inet6_ifaddr *ifa;
+ const struct inet6_dev *idev;
bool ret = false;
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
- read_lock_bh(&idev->lock);
- list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
if (ret)
break;
}
- read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
@@ -1882,22 +1900,20 @@ EXPORT_SYMBOL(ipv6_chk_custom_prefix);
int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
{
- struct inet6_dev *idev;
- struct inet6_ifaddr *ifa;
+ const struct inet6_ifaddr *ifa;
+ const struct inet6_dev *idev;
int onlink;
onlink = 0;
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
- read_lock_bh(&idev->lock);
- list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) {
onlink = ipv6_prefix_equal(addr, &ifa->addr,
ifa->prefix_len);
if (onlink)
break;
}
- read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
return onlink;
@@ -2025,7 +2041,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen,
scope, flags, valid_lft,
- preferred_lft);
+ preferred_lft, false, NULL);
if (IS_ERR(ifp2))
goto lock_errdad;
@@ -2321,24 +2337,24 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
if (!table)
return NULL;
- read_lock_bh(&table->tb6_lock);
- fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
+ rcu_read_lock();
+ fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0, true);
if (!fn)
goto out;
- noflags |= RTF_CACHE;
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_node_rt_rcu(fn) {
if (rt->dst.dev->ifindex != dev->ifindex)
continue;
if ((rt->rt6i_flags & flags) != flags)
continue;
if ((rt->rt6i_flags & noflags) != 0)
continue;
- dst_hold(&rt->dst);
+ if (!dst_hold_safe(&rt->dst))
+ rt = NULL;
break;
}
out:
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
return rt;
}
@@ -2483,7 +2499,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
pinfo->prefix_len,
addr_type&IPV6_ADDR_SCOPE_MASK,
addr_flags, valid_lft,
- prefered_lft);
+ prefered_lft, false, NULL);
if (IS_ERR_OR_NULL(ifp))
return -1;
@@ -2793,7 +2809,8 @@ static int inet6_addr_add(struct net *net, int ifindex,
const struct in6_addr *pfx,
const struct in6_addr *peer_pfx,
unsigned int plen, __u32 ifa_flags,
- __u32 prefered_lft, __u32 valid_lft)
+ __u32 prefered_lft, __u32 valid_lft,
+ struct netlink_ext_ack *extack)
{
struct inet6_ifaddr *ifp;
struct inet6_dev *idev;
@@ -2852,7 +2869,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
}
ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags,
- valid_lft, prefered_lft);
+ valid_lft, prefered_lft, true, extack);
if (!IS_ERR(ifp)) {
if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
@@ -2937,7 +2954,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
rtnl_lock();
err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL,
ireq.ifr6_prefixlen, IFA_F_PERMANENT,
- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, NULL);
rtnl_unlock();
return err;
}
@@ -2967,7 +2984,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifp = ipv6_add_addr(idev, addr, NULL, plen,
scope, IFA_F_PERMANENT,
- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME,
+ true, NULL);
if (!IS_ERR(ifp)) {
spin_lock_bh(&ifp->lock);
ifp->flags &= ~IFA_F_TENTATIVE;
@@ -3067,7 +3085,7 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
#endif
ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
+ INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL);
if (!IS_ERR(ifp)) {
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);
addrconf_dad_start(ifp);
@@ -3297,7 +3315,7 @@ static int fixup_permanent_addr(struct inet6_dev *idev,
struct rt6_info *rt, *prev;
rt = addrconf_dst_alloc(idev, &ifp->addr, false);
- if (unlikely(IS_ERR(rt)))
+ if (IS_ERR(rt))
return PTR_ERR(rt);
/* ifp->rt can be accessed outside of rtnl */
@@ -3403,7 +3421,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
/* restore routes for permanent addresses */
addrconf_permanent_addr(dev);
- if (!addrconf_qdisc_ok(dev)) {
+ if (!addrconf_link_ready(dev)) {
/* device is not ready yet. */
pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
dev->name);
@@ -3418,7 +3436,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
run_pending = 1;
}
} else if (event == NETDEV_CHANGE) {
- if (!addrconf_qdisc_ok(dev)) {
+ if (!addrconf_link_ready(dev)) {
/* device is still not ready. */
break;
}
@@ -3562,7 +3580,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
struct net *net = dev_net(dev);
struct inet6_dev *idev;
struct inet6_ifaddr *ifa, *tmp;
- struct list_head del_list;
int _keep_addr;
bool keep_addr;
int state, i;
@@ -3654,7 +3671,6 @@ restart:
*/
keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6);
- INIT_LIST_HEAD(&del_list);
list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
struct rt6_info *rt = NULL;
bool keep;
@@ -3663,8 +3679,6 @@ restart:
keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
!addr_is_local(&ifa->addr);
- if (!keep)
- list_move(&ifa->if_list, &del_list);
write_unlock_bh(&idev->lock);
spin_lock_bh(&ifa->lock);
@@ -3698,19 +3712,14 @@ restart:
}
write_lock_bh(&idev->lock);
+ if (!keep) {
+ list_del_rcu(&ifa->if_list);
+ in6_ifa_put(ifa);
+ }
}
write_unlock_bh(&idev->lock);
- /* now clean up addresses to be removed */
- while (!list_empty(&del_list)) {
- ifa = list_first_entry(&del_list,
- struct inet6_ifaddr, if_list);
- list_del(&ifa->if_list);
-
- in6_ifa_put(ifa);
- }
-
/* Step 5: Discard anycast and multicast list */
if (how) {
ipv6_ac_destroy_dev(idev);
@@ -4579,7 +4588,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
*/
return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
ifm->ifa_prefixlen, ifa_flags,
- preferred_lft, valid_lft);
+ preferred_lft, valid_lft, extack);
}
if (nlh->nlmsg_flags & NLM_F_EXCL ||
@@ -4906,17 +4915,15 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
extack);
if (err < 0)
- goto errout;
+ return err;
addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
- if (!addr) {
- err = -EINVAL;
- goto errout;
- }
+ if (!addr)
+ return -EINVAL;
ifm = nlmsg_data(nlh);
if (ifm->ifa_index)
- dev = __dev_get_by_index(net, ifm->ifa_index);
+ dev = dev_get_by_index(net, ifm->ifa_index);
ifa = ipv6_get_ifaddr(net, addr, dev, 1);
if (!ifa) {
@@ -4942,6 +4949,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
errout_ifa:
in6_ifa_put(ifa);
errout:
+ if (dev)
+ dev_put(dev);
return err;
}
@@ -5898,10 +5907,9 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
spin_lock(&ifa->lock);
if (ifa->rt) {
struct rt6_info *rt = ifa->rt;
- struct fib6_table *table = rt->rt6i_table;
int cpu;
- read_lock(&table->tb6_lock);
+ rcu_read_lock();
addrconf_set_nopolicy(ifa->rt, val);
if (rt->rt6i_pcpu) {
for_each_possible_cpu(cpu) {
@@ -5911,7 +5919,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
addrconf_set_nopolicy(*rtp, val);
}
}
- read_unlock(&table->tb6_lock);
+ rcu_read_unlock();
}
spin_unlock(&ifa->lock);
}
@@ -6585,13 +6593,13 @@ int __init addrconf_init(void)
__rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
__rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
__rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
- inet6_dump_ifaddr, 0);
+ inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED);
__rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
inet6_dump_ifmcaddr, 0);
__rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
inet6_dump_ifacaddr, 0);
__rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
- inet6_netconf_dump_devconf, 0);
+ inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED);
ipv6_addr_label_rtnl_register();
@@ -6618,9 +6626,9 @@ void addrconf_cleanup(void)
unregister_pernet_subsys(&addrconf_ops);
ipv6_addr_label_cleanup();
- rtnl_lock();
+ rtnl_af_unregister(&inet6_ops);
- __rtnl_af_unregister(&inet6_ops);
+ rtnl_lock();
/* clean dev list */
for_each_netdev(&init_net, dev) {
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 9e3488d50b15..32b564dfd02a 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -88,7 +88,7 @@ int __ipv6_addr_type(const struct in6_addr *addr)
EXPORT_SYMBOL(__ipv6_addr_type);
static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
-static ATOMIC_NOTIFIER_HEAD(inet6addr_validator_chain);
+static BLOCKING_NOTIFIER_HEAD(inet6addr_validator_chain);
int register_inet6addr_notifier(struct notifier_block *nb)
{
@@ -110,19 +110,20 @@ EXPORT_SYMBOL(inet6addr_notifier_call_chain);
int register_inet6addr_validator_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_register(&inet6addr_validator_chain, nb);
+ return blocking_notifier_chain_register(&inet6addr_validator_chain, nb);
}
EXPORT_SYMBOL(register_inet6addr_validator_notifier);
int unregister_inet6addr_validator_notifier(struct notifier_block *nb)
{
- return atomic_notifier_chain_unregister(&inet6addr_validator_chain, nb);
+ return blocking_notifier_chain_unregister(&inet6addr_validator_chain,
+ nb);
}
EXPORT_SYMBOL(unregister_inet6addr_validator_notifier);
int inet6addr_validator_notifier_call_chain(unsigned long val, void *v)
{
- return atomic_notifier_call_chain(&inet6addr_validator_chain, val, v);
+ return blocking_notifier_call_chain(&inet6addr_validator_chain, val, v);
}
EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index b055bc79f56d..2606d2fa3ac4 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -18,7 +18,6 @@
#include <linux/if_addrlabel.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
-#include <linux/refcount.h>
#if 0
#define ADDRLABEL(x...) printk(x)
@@ -30,30 +29,15 @@
* Policy Table
*/
struct ip6addrlbl_entry {
- possible_net_t lbl_net;
struct in6_addr prefix;
int prefixlen;
int ifindex;
int addrtype;
u32 label;
struct hlist_node list;
- refcount_t refcnt;
struct rcu_head rcu;
};
-static struct ip6addrlbl_table
-{
- struct hlist_head head;
- spinlock_t lock;
- u32 seq;
-} ip6addrlbl_table;
-
-static inline
-struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl)
-{
- return read_pnet(&lbl->lbl_net);
-}
-
/*
* Default policy table (RFC6724 + extensions)
*
@@ -125,36 +109,11 @@ static const __net_initconst struct ip6addrlbl_init_table
}
};
-/* Object management */
-static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
-{
- kfree(p);
-}
-
-static void ip6addrlbl_free_rcu(struct rcu_head *h)
-{
- ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
-}
-
-static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p)
-{
- return refcount_inc_not_zero(&p->refcnt);
-}
-
-static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
-{
- if (refcount_dec_and_test(&p->refcnt))
- call_rcu(&p->rcu, ip6addrlbl_free_rcu);
-}
-
/* Find label */
-static bool __ip6addrlbl_match(struct net *net,
- const struct ip6addrlbl_entry *p,
+static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p,
const struct in6_addr *addr,
int addrtype, int ifindex)
{
- if (!net_eq(ip6addrlbl_net(p), net))
- return false;
if (p->ifindex && p->ifindex != ifindex)
return false;
if (p->addrtype && p->addrtype != addrtype)
@@ -169,8 +128,9 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net,
int type, int ifindex)
{
struct ip6addrlbl_entry *p;
- hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
- if (__ip6addrlbl_match(net, p, addr, type, ifindex))
+
+ hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
+ if (__ip6addrlbl_match(p, addr, type, ifindex))
return p;
}
return NULL;
@@ -196,8 +156,7 @@ u32 ipv6_addr_label(struct net *net,
}
/* allocate one entry */
-static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
- const struct in6_addr *prefix,
+static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
int prefixlen, int ifindex,
u32 label)
{
@@ -236,24 +195,22 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
newp->addrtype = addrtype;
newp->label = label;
INIT_HLIST_NODE(&newp->list);
- write_pnet(&newp->lbl_net, net);
- refcount_set(&newp->refcnt, 1);
return newp;
}
/* add a label */
-static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
+static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp,
+ int replace)
{
- struct hlist_node *n;
struct ip6addrlbl_entry *last = NULL, *p = NULL;
+ struct hlist_node *n;
int ret = 0;
ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
replace);
- hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
if (p->prefixlen == newp->prefixlen &&
- net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
p->ifindex == newp->ifindex &&
ipv6_addr_equal(&p->prefix, &newp->prefix)) {
if (!replace) {
@@ -261,7 +218,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
goto out;
}
hlist_replace_rcu(&p->list, &newp->list);
- ip6addrlbl_put(p);
+ kfree_rcu(p, rcu);
goto out;
} else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
(p->prefixlen < newp->prefixlen)) {
@@ -273,10 +230,10 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
if (last)
hlist_add_behind_rcu(&newp->list, &last->list);
else
- hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
+ hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head);
out:
if (!ret)
- ip6addrlbl_table.seq++;
+ net->ipv6.ip6addrlbl_table.seq++;
return ret;
}
@@ -292,14 +249,14 @@ static int ip6addrlbl_add(struct net *net,
__func__, prefix, prefixlen, ifindex, (unsigned int)label,
replace);
- newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label);
+ newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
if (IS_ERR(newp))
return PTR_ERR(newp);
- spin_lock(&ip6addrlbl_table.lock);
- ret = __ip6addrlbl_add(newp, replace);
- spin_unlock(&ip6addrlbl_table.lock);
+ spin_lock(&net->ipv6.ip6addrlbl_table.lock);
+ ret = __ip6addrlbl_add(net, newp, replace);
+ spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
if (ret)
- ip6addrlbl_free(newp);
+ kfree(newp);
return ret;
}
@@ -315,13 +272,12 @@ static int __ip6addrlbl_del(struct net *net,
ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n",
__func__, prefix, prefixlen, ifindex);
- hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
+ hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
if (p->prefixlen == prefixlen &&
- net_eq(ip6addrlbl_net(p), net) &&
p->ifindex == ifindex &&
ipv6_addr_equal(&p->prefix, prefix)) {
hlist_del_rcu(&p->list);
- ip6addrlbl_put(p);
+ kfree_rcu(p, rcu);
ret = 0;
break;
}
@@ -340,9 +296,9 @@ static int ip6addrlbl_del(struct net *net,
__func__, prefix, prefixlen, ifindex);
ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
- spin_lock(&ip6addrlbl_table.lock);
+ spin_lock(&net->ipv6.ip6addrlbl_table.lock);
ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex);
- spin_unlock(&ip6addrlbl_table.lock);
+ spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
return ret;
}
@@ -354,6 +310,9 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
ADDRLABEL(KERN_DEBUG "%s\n", __func__);
+ spin_lock_init(&net->ipv6.ip6addrlbl_table.lock);
+ INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head);
+
for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
int ret = ip6addrlbl_add(net,
ip6addrlbl_init_table[i].prefix,
@@ -373,14 +332,12 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net)
struct hlist_node *n;
/* Remove all labels belonging to the exiting net */
- spin_lock(&ip6addrlbl_table.lock);
- hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
- if (net_eq(ip6addrlbl_net(p), net)) {
- hlist_del_rcu(&p->list);
- ip6addrlbl_put(p);
- }
+ spin_lock(&net->ipv6.ip6addrlbl_table.lock);
+ hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
+ hlist_del_rcu(&p->list);
+ kfree_rcu(p, rcu);
}
- spin_unlock(&ip6addrlbl_table.lock);
+ spin_unlock(&net->ipv6.ip6addrlbl_table.lock);
}
static struct pernet_operations ipv6_addr_label_ops = {
@@ -390,8 +347,6 @@ static struct pernet_operations ipv6_addr_label_ops = {
int __init ipv6_addr_label_init(void)
{
- spin_lock_init(&ip6addrlbl_table.lock);
-
return register_pernet_subsys(&ipv6_addr_label_ops);
}
@@ -510,11 +465,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
int err;
rcu_read_lock();
- hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {
- if (idx >= s_idx &&
- net_eq(ip6addrlbl_net(p), net)) {
+ hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) {
+ if (idx >= s_idx) {
err = ip6addrlbl_fill(skb, p,
- ip6addrlbl_table.seq,
+ net->ipv6.ip6addrlbl_table.seq,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWADDRLABEL,
@@ -567,38 +521,28 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return -EINVAL;
addr = nla_data(tb[IFAL_ADDRESS]);
- rcu_read_lock();
- p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
- if (p && !ip6addrlbl_hold(p))
- p = NULL;
- lseq = ip6addrlbl_table.seq;
- rcu_read_unlock();
-
- if (!p) {
- err = -ESRCH;
- goto out;
- }
-
skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL);
- if (!skb) {
- ip6addrlbl_put(p);
+ if (!skb)
return -ENOBUFS;
- }
- err = ip6addrlbl_fill(skb, p, lseq,
- NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- RTM_NEWADDRLABEL, 0);
+ err = -ESRCH;
- ip6addrlbl_put(p);
+ rcu_read_lock();
+ p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
+ lseq = net->ipv6.ip6addrlbl_table.seq;
+ if (p)
+ err = ip6addrlbl_fill(skb, p, lseq,
+ NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq,
+ RTM_NEWADDRLABEL, 0);
+ rcu_read_unlock();
if (err < 0) {
WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
- goto out;
+ } else {
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
}
-
- err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
-out:
return err;
}
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 7802b72196f3..37bb33fbc742 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -271,6 +271,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
case NEXTHDR_DEST:
if (dir == XFRM_POLICY_OUT)
ipv6_rearrange_destopt(iph, exthdr.opth);
+ /* fall through */
case NEXTHDR_HOP:
if (!zero_out_mutable_opts(exthdr.opth)) {
net_dbg_ratelimited("overrun %sopts\n",
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 95516138e861..7835dea930b4 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -89,6 +89,7 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff)
*/
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr))
break;
+ /* fall through */
case 2: /* send ICMP PARM PROB regardless and drop packet */
icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff);
return false;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 305e2ed730bf..11025f8d124b 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -99,7 +99,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp,
break;
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH)
- hdrlen = (hp->hdrlen+2)<<2;
+ hdrlen = ipv6_authlen(hp);
else
hdrlen = ipv6_optlen(hp);
@@ -187,7 +187,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
{
unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr);
u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- unsigned int len;
bool found;
if (fragoff)
@@ -204,7 +203,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
start = *offset + sizeof(struct ipv6hdr);
nexthdr = ip6->nexthdr;
}
- len = skb->len - start;
do {
struct ipv6_opt_hdr _hdr, *hp;
@@ -273,7 +271,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
if (!found) {
nexthdr = hp->nexthdr;
- len -= hdrlen;
start += hdrlen;
}
} while (!found);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 5acb54405b10..6ae5dd3f4d0d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -250,16 +250,15 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset)
return (*op & 0xC0) == 0x80;
}
-int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
- struct icmp6hdr *thdr, int len)
+void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
+ struct icmp6hdr *thdr, int len)
{
struct sk_buff *skb;
struct icmp6hdr *icmp6h;
- int err = 0;
skb = skb_peek(&sk->sk_write_queue);
if (!skb)
- goto out;
+ return;
icmp6h = icmp6_hdr(skb);
memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
@@ -287,8 +286,6 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
tmp_csum);
}
ip6_push_pending_frames(sk);
-out:
- return err;
}
struct icmpv6_msg {
@@ -438,7 +435,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
int iif = 0;
int addr_type = 0;
int len;
- int err = 0;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
if ((u8 *)hdr < skb->head ||
@@ -575,17 +571,16 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
rcu_read_lock();
idev = __in6_dev_get(skb->dev);
- err = ip6_append_data(sk, icmpv6_getfrag, &msg,
- len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr),
- &ipc6, &fl6, (struct rt6_info *)dst,
- MSG_DONTWAIT, &sockc_unused);
- if (err) {
+ if (ip6_append_data(sk, icmpv6_getfrag, &msg,
+ len + sizeof(struct icmp6hdr),
+ sizeof(struct icmp6hdr),
+ &ipc6, &fl6, (struct rt6_info *)dst,
+ MSG_DONTWAIT, &sockc_unused)) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
- err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
- len + sizeof(struct icmp6hdr));
+ icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+ len + sizeof(struct icmp6hdr));
}
rcu_read_unlock();
out_dst_release:
@@ -682,7 +677,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct icmpv6_msg msg;
struct dst_entry *dst;
struct ipcm6_cookie ipc6;
- int err = 0;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
struct sockcm_cookie sockc_unused = {0};
@@ -719,8 +713,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- err = ip6_dst_lookup(net, sk, &dst, &fl6);
- if (err)
+ if (ip6_dst_lookup(net, sk, &dst, &fl6))
goto out;
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
if (IS_ERR(dst))
@@ -737,17 +730,16 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
ipc6.dontfrag = np->dontfrag;
ipc6.opt = NULL;
- err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
- sizeof(struct icmp6hdr), &ipc6, &fl6,
- (struct rt6_info *)dst, MSG_DONTWAIT,
- &sockc_unused);
-
- if (err) {
+ if (ip6_append_data(sk, icmpv6_getfrag, &msg,
+ skb->len + sizeof(struct icmp6hdr),
+ sizeof(struct icmp6hdr), &ipc6, &fl6,
+ (struct rt6_info *)dst, MSG_DONTWAIT,
+ &sockc_unused)) {
__ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
- err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
- skb->len + sizeof(struct icmp6hdr));
+ icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
+ skb->len + sizeof(struct icmp6hdr));
}
dst_release(dst);
out:
@@ -872,10 +864,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
goto discard_it;
hdr = icmp6_hdr(skb);
- /*
- * Drop through to notify
- */
-
+ /* to notify */
+ /* fall through */
case ICMPV6_DEST_UNREACH:
case ICMPV6_TIME_EXCEED:
case ICMPV6_PARAMPROB:
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index e5308d7cbd75..1ada9672d198 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -38,14 +38,6 @@
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
-#define RT6_DEBUG 2
-
-#if RT6_DEBUG >= 3
-#define RT6_TRACE(x...) pr_debug(x)
-#else
-#define RT6_TRACE(x...) do { ; } while (0)
-#endif
-
static struct kmem_cache *fib6_node_kmem __read_mostly;
struct fib6_cleaner {
@@ -62,9 +54,12 @@ struct fib6_cleaner {
#define FWS_INIT FWS_L
#endif
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
-static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
-static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
+static struct rt6_info *fib6_find_prefix(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *fn);
+static struct fib6_node *fib6_repair_tree(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *fn);
static int fib6_walk(struct net *net, struct fib6_walker *w);
static int fib6_walk_continue(struct fib6_walker *w);
@@ -110,6 +105,20 @@ enum {
FIB6_NO_SERNUM_CHANGE = 0,
};
+void fib6_update_sernum(struct rt6_info *rt)
+{
+ struct fib6_table *table = rt->rt6i_table;
+ struct net *net = dev_net(rt->dst.dev);
+ struct fib6_node *fn;
+
+ spin_lock_bh(&table->tb6_lock);
+ fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&table->tb6_lock));
+ if (fn)
+ fn->fn_sernum = fib6_new_sernum(net);
+ spin_unlock_bh(&table->tb6_lock);
+}
+
/*
* Auxiliary address test functions for the radix tree.
*
@@ -140,18 +149,21 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
-static struct fib6_node *node_alloc(void)
+static struct fib6_node *node_alloc(struct net *net)
{
struct fib6_node *fn;
fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC);
+ if (fn)
+ net->ipv6.rt6_stats->fib_nodes++;
return fn;
}
-static void node_free_immediate(struct fib6_node *fn)
+static void node_free_immediate(struct net *net, struct fib6_node *fn)
{
kmem_cache_free(fib6_node_kmem, fn);
+ net->ipv6.rt6_stats->fib_nodes--;
}
static void node_free_rcu(struct rcu_head *head)
@@ -161,9 +173,10 @@ static void node_free_rcu(struct rcu_head *head)
kmem_cache_free(fib6_node_kmem, fn);
}
-static void node_free(struct fib6_node *fn)
+static void node_free(struct net *net, struct fib6_node *fn)
{
call_rcu(&fn->rcu, node_free_rcu);
+ net->ipv6.rt6_stats->fib_nodes--;
}
void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
@@ -185,9 +198,6 @@ void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
*ppcpu_rt = NULL;
}
}
-
- free_percpu(non_pcpu_rt->rt6i_pcpu);
- non_pcpu_rt->rt6i_pcpu = NULL;
}
EXPORT_SYMBOL_GPL(rt6_free_pcpu);
@@ -205,8 +215,7 @@ static void fib6_link_table(struct net *net, struct fib6_table *tb)
* Initialize table lock at a single place to give lockdep a key,
* tables aren't visible prior to being linked to the list.
*/
- rwlock_init(&tb->tb6_lock);
-
+ spin_lock_init(&tb->tb6_lock);
h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1);
/*
@@ -225,7 +234,8 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id)
table = kzalloc(sizeof(*table), GFP_ATOMIC);
if (table) {
table->tb6_id = id;
- table->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ rcu_assign_pointer(table->tb6_root.leaf,
+ net->ipv6.ip6_null_entry);
table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&table->tb6_peers);
}
@@ -322,11 +332,8 @@ unsigned int fib6_tables_seq_read(struct net *net)
struct hlist_head *head = &net->ipv6.fib_table_hash[h];
struct fib6_table *tb;
- hlist_for_each_entry_rcu(tb, head, tb6_hlist) {
- read_lock_bh(&tb->tb6_lock);
+ hlist_for_each_entry_rcu(tb, head, tb6_hlist)
fib_seq += tb->fib_seq;
- read_unlock_bh(&tb->tb6_lock);
- }
}
rcu_read_unlock();
@@ -372,7 +379,7 @@ static int fib6_node_dump(struct fib6_walker *w)
{
struct rt6_info *rt;
- for (rt = w->leaf; rt; rt = rt->dst.rt6_next)
+ for_each_fib6_walker_rt(w)
fib6_rt_dump(rt, w->args);
w->leaf = NULL;
return 0;
@@ -382,9 +389,9 @@ static void fib6_table_dump(struct net *net, struct fib6_table *tb,
struct fib6_walker *w)
{
w->root = &tb->tb6_root;
- read_lock_bh(&tb->tb6_lock);
+ spin_lock_bh(&tb->tb6_lock);
fib6_walk(net, w);
- read_unlock_bh(&tb->tb6_lock);
+ spin_unlock_bh(&tb->tb6_lock);
}
/* Called with rcu_read_lock() */
@@ -421,7 +428,7 @@ static int fib6_dump_node(struct fib6_walker *w)
int res;
struct rt6_info *rt;
- for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_walker_rt(w) {
res = rt6_dump_route(rt, w->args);
if (res < 0) {
/* Frame is full, suspend walking */
@@ -480,9 +487,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
w->count = 0;
w->skip = 0;
- read_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
res = fib6_walk(net, w);
- read_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
if (res > 0) {
cb->args[4] = 1;
cb->args[5] = w->root->fn_sernum;
@@ -497,9 +504,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
} else
w->skip = 0;
- read_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
res = fib6_walk_continue(w);
- read_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
if (res <= 0) {
fib6_walker_unlink(net, w);
cb->args[4] = 0;
@@ -580,11 +587,13 @@ out:
* node.
*/
-static struct fib6_node *fib6_add_1(struct fib6_node *root,
- struct in6_addr *addr, int plen,
- int offset, int allow_create,
- int replace_required, int sernum,
- struct netlink_ext_ack *extack)
+static struct fib6_node *fib6_add_1(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *root,
+ struct in6_addr *addr, int plen,
+ int offset, int allow_create,
+ int replace_required,
+ struct netlink_ext_ack *extack)
{
struct fib6_node *fn, *in, *ln;
struct fib6_node *pn = NULL;
@@ -599,7 +608,9 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
fn = root;
do {
- key = (struct rt6key *)((u8 *)fn->leaf + offset);
+ struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ key = (struct rt6key *)((u8 *)leaf + offset);
/*
* Prefix match
@@ -625,12 +636,10 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
if (plen == fn->fn_bit) {
/* clean up an intermediate node */
if (!(fn->fn_flags & RTN_RTINFO)) {
- rt6_release(fn->leaf);
- fn->leaf = NULL;
+ RCU_INIT_POINTER(fn->leaf, NULL);
+ rt6_release(leaf);
}
- fn->fn_sernum = sernum;
-
return fn;
}
@@ -639,10 +648,13 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
*/
/* Try to walk down on tree. */
- fn->fn_sernum = sernum;
dir = addr_bit_set(addr, fn->fn_bit);
pn = fn;
- fn = dir ? fn->right : fn->left;
+ fn = dir ?
+ rcu_dereference_protected(fn->right,
+ lockdep_is_held(&table->tb6_lock)) :
+ rcu_dereference_protected(fn->left,
+ lockdep_is_held(&table->tb6_lock));
} while (fn);
if (!allow_create) {
@@ -668,19 +680,17 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,
* Create new leaf node without children.
*/
- ln = node_alloc();
+ ln = node_alloc(net);
if (!ln)
return ERR_PTR(-ENOMEM);
ln->fn_bit = plen;
-
- ln->parent = pn;
- ln->fn_sernum = sernum;
+ RCU_INIT_POINTER(ln->parent, pn);
if (dir)
- pn->right = ln;
+ rcu_assign_pointer(pn->right, ln);
else
- pn->left = ln;
+ rcu_assign_pointer(pn->left, ln);
return ln;
@@ -694,7 +704,8 @@ insert_above:
* and the current
*/
- pn = fn->parent;
+ pn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&table->tb6_lock));
/* find 1st bit in difference between the 2 addrs.
@@ -710,14 +721,14 @@ insert_above:
* (new leaf node)[ln] (old node)[fn]
*/
if (plen > bit) {
- in = node_alloc();
- ln = node_alloc();
+ in = node_alloc(net);
+ ln = node_alloc(net);
if (!in || !ln) {
if (in)
- node_free_immediate(in);
+ node_free_immediate(net, in);
if (ln)
- node_free_immediate(ln);
+ node_free_immediate(net, ln);
return ERR_PTR(-ENOMEM);
}
@@ -731,31 +742,28 @@ insert_above:
in->fn_bit = bit;
- in->parent = pn;
+ RCU_INIT_POINTER(in->parent, pn);
in->leaf = fn->leaf;
- atomic_inc(&in->leaf->rt6i_ref);
-
- in->fn_sernum = sernum;
+ atomic_inc(&rcu_dereference_protected(in->leaf,
+ lockdep_is_held(&table->tb6_lock))->rt6i_ref);
/* update parent pointer */
if (dir)
- pn->right = in;
+ rcu_assign_pointer(pn->right, in);
else
- pn->left = in;
+ rcu_assign_pointer(pn->left, in);
ln->fn_bit = plen;
- ln->parent = in;
- fn->parent = in;
-
- ln->fn_sernum = sernum;
+ RCU_INIT_POINTER(ln->parent, in);
+ rcu_assign_pointer(fn->parent, in);
if (addr_bit_set(addr, bit)) {
- in->right = ln;
- in->left = fn;
+ rcu_assign_pointer(in->right, ln);
+ rcu_assign_pointer(in->left, fn);
} else {
- in->left = ln;
- in->right = fn;
+ rcu_assign_pointer(in->left, ln);
+ rcu_assign_pointer(in->right, fn);
}
} else { /* plen <= bit */
@@ -765,28 +773,26 @@ insert_above:
* (old node)[fn] NULL
*/
- ln = node_alloc();
+ ln = node_alloc(net);
if (!ln)
return ERR_PTR(-ENOMEM);
ln->fn_bit = plen;
- ln->parent = pn;
-
- ln->fn_sernum = sernum;
-
- if (dir)
- pn->right = ln;
- else
- pn->left = ln;
+ RCU_INIT_POINTER(ln->parent, pn);
if (addr_bit_set(&key->addr, plen))
- ln->right = fn;
+ RCU_INIT_POINTER(ln->right, fn);
else
- ln->left = fn;
+ RCU_INIT_POINTER(ln->left, fn);
- fn->parent = ln;
+ rcu_assign_pointer(fn->parent, ln);
+
+ if (dir)
+ rcu_assign_pointer(pn->right, ln);
+ else
+ rcu_assign_pointer(pn->left, ln);
}
return ln;
}
@@ -832,6 +838,8 @@ static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc)
static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
struct net *net)
{
+ struct fib6_table *table = rt->rt6i_table;
+
if (atomic_read(&rt->rt6i_ref) != 1) {
/* This route is used as dummy address holder in some split
* nodes. It is not leaked, but it still holds other resources,
@@ -840,12 +848,17 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
* to still alive ones.
*/
while (fn) {
- if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) {
- fn->leaf = fib6_find_prefix(net, fn);
- atomic_inc(&fn->leaf->rt6i_ref);
+ struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *new_leaf;
+ if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
+ new_leaf = fib6_find_prefix(net, table, fn);
+ atomic_inc(&new_leaf->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, new_leaf);
rt6_release(rt);
}
- fn = fn->parent;
+ fn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&table->tb6_lock));
}
}
}
@@ -857,9 +870,11 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn,
static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
struct nl_info *info, struct mx6_config *mxc)
{
+ struct rt6_info *leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
struct rt6_info *iter = NULL;
- struct rt6_info **ins;
- struct rt6_info **fallback_ins = NULL;
+ struct rt6_info __rcu **ins;
+ struct rt6_info __rcu **fallback_ins = NULL;
int replace = (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_REPLACE));
int add = (!info->nlh ||
@@ -874,7 +889,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
ins = &fn->leaf;
- for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) {
+ for (iter = leaf; iter;
+ iter = rcu_dereference_protected(iter->dst.rt6_next,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
/*
* Search for duplicates
*/
@@ -936,7 +953,8 @@ next_iter:
if (fallback_ins && !found) {
/* No ECMP-able route found, replace first non-ECMP one */
ins = fallback_ins;
- iter = *ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
found++;
}
@@ -950,7 +968,7 @@ next_iter:
struct rt6_info *sibling, *temp_sibling;
/* Find the first route that have the same metric */
- sibling = fn->leaf;
+ sibling = leaf;
while (sibling) {
if (sibling->rt6i_metric == rt->rt6i_metric &&
rt6_qualify_for_ecmp(sibling)) {
@@ -958,7 +976,8 @@ next_iter:
&sibling->rt6i_siblings);
break;
}
- sibling = sibling->dst.rt6_next;
+ sibling = rcu_dereference_protected(sibling->dst.rt6_next,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
}
/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
@@ -987,10 +1006,10 @@ add:
if (err)
return err;
- rt->dst.rt6_next = iter;
- *ins = rt;
- rcu_assign_pointer(rt->rt6i_node, fn);
+ rcu_assign_pointer(rt->dst.rt6_next, iter);
atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(rt->rt6i_node, fn);
+ rcu_assign_pointer(*ins, rt);
call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
rt);
if (!info->skip_notify)
@@ -1016,10 +1035,10 @@ add:
if (err)
return err;
- *ins = rt;
+ atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rt->dst.rt6_next = iter->dst.rt6_next;
- atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(*ins, rt);
call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
rt);
if (!info->skip_notify)
@@ -1031,14 +1050,15 @@ add:
nsiblings = iter->rt6i_nsiblings;
iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
- if (fn->rr_ptr == iter)
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
rt6_release(iter);
if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
ins = &rt->dst.rt6_next;
- iter = *ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
while (iter) {
if (iter->rt6i_metric > rt->rt6i_metric)
break;
@@ -1046,14 +1066,16 @@ add:
*ins = iter->dst.rt6_next;
iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
- if (fn->rr_ptr == iter)
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
fn->rr_ptr = NULL;
rt6_release(iter);
nsiblings--;
+ info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
} else {
ins = &iter->dst.rt6_next;
}
- iter = *ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
}
WARN_ON(nsiblings != 0);
}
@@ -1077,16 +1099,33 @@ void fib6_force_start_gc(struct net *net)
jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
}
+static void fib6_update_sernum_upto_root(struct rt6_info *rt,
+ int sernum)
+{
+ struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+
+ /* paired with smp_rmb() in rt6_get_cookie_safe() */
+ smp_wmb();
+ while (fn) {
+ fn->fn_sernum = sernum;
+ fn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ }
+}
+
/*
* Add routing information to the routing tree.
* <destination addr>/<source addr>
* with source addr info in sub-trees
+ * Need to own table->tb6_lock
*/
int fib6_add(struct fib6_node *root, struct rt6_info *rt,
struct nl_info *info, struct mx6_config *mxc,
struct netlink_ext_ack *extack)
{
+ struct fib6_table *table = rt->rt6i_table;
struct fib6_node *fn, *pn = NULL;
int err = -ENOMEM;
int allow_create = 1;
@@ -1095,6 +1134,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt)))
return -EINVAL;
+ if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE))
+ return -EINVAL;
if (info->nlh) {
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -1105,9 +1146,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (!allow_create && !replace_required)
pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n");
- fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
+ fn = fib6_add_1(info->nl_net, table, root,
+ &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
offsetof(struct rt6_info, rt6i_dst), allow_create,
- replace_required, sernum, extack);
+ replace_required, extack);
if (IS_ERR(fn)) {
err = PTR_ERR(fn);
fn = NULL;
@@ -1120,7 +1162,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (rt->rt6i_src.plen) {
struct fib6_node *sn;
- if (!fn->subtree) {
+ if (!rcu_access_pointer(fn->subtree)) {
struct fib6_node *sfn;
/*
@@ -1134,42 +1176,40 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
*/
/* Create subtree root node */
- sfn = node_alloc();
+ sfn = node_alloc(info->nl_net);
if (!sfn)
goto failure;
- sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
+ rcu_assign_pointer(sfn->leaf,
+ info->nl_net->ipv6.ip6_null_entry);
sfn->fn_flags = RTN_ROOT;
- sfn->fn_sernum = sernum;
/* Now add the first leaf node to new subtree */
- sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
- rt->rt6i_src.plen,
+ sn = fib6_add_1(info->nl_net, table, sfn,
+ &rt->rt6i_src.addr, rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required, sernum,
- extack);
+ allow_create, replace_required, extack);
if (IS_ERR(sn)) {
/* If it is failed, discard just allocated
root, and then (in failure) stale node
in main tree.
*/
- node_free_immediate(sfn);
+ node_free_immediate(info->nl_net, sfn);
err = PTR_ERR(sn);
goto failure;
}
/* Now link new subtree to main tree */
- sfn->parent = fn;
- fn->subtree = sfn;
+ rcu_assign_pointer(sfn->parent, fn);
+ rcu_assign_pointer(fn->subtree, sfn);
} else {
- sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
- rt->rt6i_src.plen,
+ sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn),
+ &rt->rt6i_src.addr, rt->rt6i_src.plen,
offsetof(struct rt6_info, rt6i_src),
- allow_create, replace_required, sernum,
- extack);
+ allow_create, replace_required, extack);
if (IS_ERR(sn)) {
err = PTR_ERR(sn);
@@ -1177,9 +1217,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
}
}
- if (!fn->leaf) {
- fn->leaf = rt;
+ if (!rcu_access_pointer(fn->leaf)) {
atomic_inc(&rt->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, rt);
}
fn = sn;
}
@@ -1187,9 +1227,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
err = fib6_add_rt2node(fn, rt, info, mxc);
if (!err) {
+ fib6_update_sernum_upto_root(rt, sernum);
fib6_start_gc(info->nl_net, rt);
- if (!(rt->rt6i_flags & RTF_CACHE))
- fib6_prune_clones(info->nl_net, pn);
}
out:
@@ -1199,19 +1238,23 @@ out:
* If fib6_add_1 has cleared the old leaf pointer in the
* super-tree leaf node we have to find a new one for it.
*/
- if (pn != fn && pn->leaf == rt) {
- pn->leaf = NULL;
+ struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (pn != fn && pn_leaf == rt) {
+ pn_leaf = NULL;
+ RCU_INIT_POINTER(pn->leaf, NULL);
atomic_dec(&rt->rt6i_ref);
}
- if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) {
- pn->leaf = fib6_find_prefix(info->nl_net, pn);
+ if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
+ pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
#if RT6_DEBUG >= 2
- if (!pn->leaf) {
- WARN_ON(pn->leaf == NULL);
- pn->leaf = info->nl_net->ipv6.ip6_null_entry;
+ if (!pn_leaf) {
+ WARN_ON(!pn_leaf);
+ pn_leaf = info->nl_net->ipv6.ip6_null_entry;
}
#endif
- atomic_inc(&pn->leaf->rt6i_ref);
+ atomic_inc(&pn_leaf->rt6i_ref);
+ rcu_assign_pointer(pn->leaf, pn_leaf);
}
#endif
goto failure;
@@ -1226,7 +1269,7 @@ failure:
* fn->leaf.
*/
if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
- fib6_repair_tree(info->nl_net, fn);
+ fib6_repair_tree(info->nl_net, table, fn);
/* Always release dst as dst->__refcnt is guaranteed
* to be taken before entering this function
*/
@@ -1264,7 +1307,8 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
dir = addr_bit_set(args->addr, fn->fn_bit);
- next = dir ? fn->right : fn->left;
+ next = dir ? rcu_dereference(fn->right) :
+ rcu_dereference(fn->left);
if (next) {
fn = next;
@@ -1274,18 +1318,22 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
}
while (fn) {
- if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) {
+ struct fib6_node *subtree = FIB6_SUBTREE(fn);
+
+ if (subtree || fn->fn_flags & RTN_RTINFO) {
+ struct rt6_info *leaf = rcu_dereference(fn->leaf);
struct rt6key *key;
- key = (struct rt6key *) ((u8 *) fn->leaf +
- args->offset);
+ if (!leaf)
+ goto backtrack;
+
+ key = (struct rt6key *) ((u8 *)leaf + args->offset);
if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
#ifdef CONFIG_IPV6_SUBTREES
- if (fn->subtree) {
+ if (subtree) {
struct fib6_node *sfn;
- sfn = fib6_lookup_1(fn->subtree,
- args + 1);
+ sfn = fib6_lookup_1(subtree, args + 1);
if (!sfn)
goto backtrack;
fn = sfn;
@@ -1295,18 +1343,18 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root,
return fn;
}
}
-#ifdef CONFIG_IPV6_SUBTREES
backtrack:
-#endif
if (fn->fn_flags & RTN_ROOT)
break;
- fn = fn->parent;
+ fn = rcu_dereference(fn->parent);
}
return NULL;
}
+/* called with rcu_read_lock() held
+ */
struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr,
const struct in6_addr *saddr)
{
@@ -1337,54 +1385,87 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad
/*
* Get node with specified destination prefix (and source prefix,
* if subtrees are used)
+ * exact_match == true means we try to find fn with exact match of
+ * the passed in prefix addr
+ * exact_match == false means we try to find fn with longest prefix
+ * match of the passed in prefix addr. This is useful for finding fn
+ * for cached route as it will be stored in the exception table under
+ * the node with longest prefix length.
*/
static struct fib6_node *fib6_locate_1(struct fib6_node *root,
const struct in6_addr *addr,
- int plen, int offset)
+ int plen, int offset,
+ bool exact_match)
{
- struct fib6_node *fn;
+ struct fib6_node *fn, *prev = NULL;
for (fn = root; fn ; ) {
- struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
+ struct rt6_info *leaf = rcu_dereference(fn->leaf);
+ struct rt6key *key;
+
+ /* This node is being deleted */
+ if (!leaf) {
+ if (plen <= fn->fn_bit)
+ goto out;
+ else
+ goto next;
+ }
+
+ key = (struct rt6key *)((u8 *)leaf + offset);
/*
* Prefix match
*/
if (plen < fn->fn_bit ||
!ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
- return NULL;
+ goto out;
if (plen == fn->fn_bit)
return fn;
+ prev = fn;
+
+next:
/*
* We have more bits to go
*/
if (addr_bit_set(addr, fn->fn_bit))
- fn = fn->right;
+ fn = rcu_dereference(fn->right);
else
- fn = fn->left;
+ fn = rcu_dereference(fn->left);
}
- return NULL;
+out:
+ if (exact_match)
+ return NULL;
+ else
+ return prev;
}
struct fib6_node *fib6_locate(struct fib6_node *root,
const struct in6_addr *daddr, int dst_len,
- const struct in6_addr *saddr, int src_len)
+ const struct in6_addr *saddr, int src_len,
+ bool exact_match)
{
struct fib6_node *fn;
fn = fib6_locate_1(root, daddr, dst_len,
- offsetof(struct rt6_info, rt6i_dst));
+ offsetof(struct rt6_info, rt6i_dst),
+ exact_match);
#ifdef CONFIG_IPV6_SUBTREES
if (src_len) {
WARN_ON(saddr == NULL);
- if (fn && fn->subtree)
- fn = fib6_locate_1(fn->subtree, saddr, src_len,
- offsetof(struct rt6_info, rt6i_src));
+ if (fn) {
+ struct fib6_node *subtree = FIB6_SUBTREE(fn);
+
+ if (subtree) {
+ fn = fib6_locate_1(subtree, saddr, src_len,
+ offsetof(struct rt6_info, rt6i_src),
+ exact_match);
+ }
+ }
}
#endif
@@ -1400,16 +1481,26 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
*
*/
-static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
+static struct rt6_info *fib6_find_prefix(struct net *net,
+ struct fib6_table *table,
+ struct fib6_node *fn)
{
+ struct fib6_node *child_left, *child_right;
+
if (fn->fn_flags & RTN_ROOT)
return net->ipv6.ip6_null_entry;
while (fn) {
- if (fn->left)
- return fn->left->leaf;
- if (fn->right)
- return fn->right->leaf;
+ child_left = rcu_dereference_protected(fn->left,
+ lockdep_is_held(&table->tb6_lock));
+ child_right = rcu_dereference_protected(fn->right,
+ lockdep_is_held(&table->tb6_lock));
+ if (child_left)
+ return rcu_dereference_protected(child_left->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (child_right)
+ return rcu_dereference_protected(child_right->leaf,
+ lockdep_is_held(&table->tb6_lock));
fn = FIB6_SUBTREE(fn);
}
@@ -1419,31 +1510,49 @@ static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn)
/*
* Called to trim the tree of intermediate nodes when possible. "fn"
* is the node we want to try and remove.
+ * Need to own table->tb6_lock
*/
static struct fib6_node *fib6_repair_tree(struct net *net,
- struct fib6_node *fn)
+ struct fib6_table *table,
+ struct fib6_node *fn)
{
int children;
int nstate;
- struct fib6_node *child, *pn;
+ struct fib6_node *child;
struct fib6_walker *w;
int iter = 0;
for (;;) {
+ struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *fn_l = rcu_dereference_protected(fn->left,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *pn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *pn_r = rcu_dereference_protected(pn->right,
+ lockdep_is_held(&table->tb6_lock));
+ struct fib6_node *pn_l = rcu_dereference_protected(pn->left,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ struct rt6_info *new_fn_leaf;
+
RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
iter++;
WARN_ON(fn->fn_flags & RTN_RTINFO);
WARN_ON(fn->fn_flags & RTN_TL_ROOT);
- WARN_ON(fn->leaf);
+ WARN_ON(fn_leaf);
children = 0;
child = NULL;
- if (fn->right)
- child = fn->right, children |= 1;
- if (fn->left)
- child = fn->left, children |= 2;
+ if (fn_r)
+ child = fn_r, children |= 1;
+ if (fn_l)
+ child = fn_l, children |= 2;
if (children == 3 || FIB6_SUBTREE(fn)
#ifdef CONFIG_IPV6_SUBTREES
@@ -1451,36 +1560,36 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
|| (children && fn->fn_flags & RTN_ROOT)
#endif
) {
- fn->leaf = fib6_find_prefix(net, fn);
+ new_fn_leaf = fib6_find_prefix(net, table, fn);
#if RT6_DEBUG >= 2
- if (!fn->leaf) {
- WARN_ON(!fn->leaf);
- fn->leaf = net->ipv6.ip6_null_entry;
+ if (!new_fn_leaf) {
+ WARN_ON(!new_fn_leaf);
+ new_fn_leaf = net->ipv6.ip6_null_entry;
}
#endif
- atomic_inc(&fn->leaf->rt6i_ref);
- return fn->parent;
+ atomic_inc(&new_fn_leaf->rt6i_ref);
+ rcu_assign_pointer(fn->leaf, new_fn_leaf);
+ return pn;
}
- pn = fn->parent;
#ifdef CONFIG_IPV6_SUBTREES
if (FIB6_SUBTREE(pn) == fn) {
WARN_ON(!(fn->fn_flags & RTN_ROOT));
- FIB6_SUBTREE(pn) = NULL;
+ RCU_INIT_POINTER(pn->subtree, NULL);
nstate = FWS_L;
} else {
WARN_ON(fn->fn_flags & RTN_ROOT);
#endif
- if (pn->right == fn)
- pn->right = child;
- else if (pn->left == fn)
- pn->left = child;
+ if (pn_r == fn)
+ rcu_assign_pointer(pn->right, child);
+ else if (pn_l == fn)
+ rcu_assign_pointer(pn->left, child);
#if RT6_DEBUG >= 2
else
WARN_ON(1);
#endif
if (child)
- child->parent = pn;
+ rcu_assign_pointer(child->parent, pn);
nstate = FWS_R;
#ifdef CONFIG_IPV6_SUBTREES
}
@@ -1489,19 +1598,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
read_lock(&net->ipv6.fib6_walker_lock);
FOR_WALKERS(net, w) {
if (!child) {
- if (w->root == fn) {
- w->root = w->node = NULL;
- RT6_TRACE("W %p adjusted by delroot 1\n", w);
- } else if (w->node == fn) {
+ if (w->node == fn) {
RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
w->node = pn;
w->state = nstate;
}
} else {
- if (w->root == fn) {
- w->root = child;
- RT6_TRACE("W %p adjusted by delroot 2\n", w);
- }
if (w->node == fn) {
w->node = child;
if (children&2) {
@@ -1516,33 +1618,39 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
}
read_unlock(&net->ipv6.fib6_walker_lock);
- node_free(fn);
+ node_free(net, fn);
if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
return pn;
- rt6_release(pn->leaf);
- pn->leaf = NULL;
+ RCU_INIT_POINTER(pn->leaf, NULL);
+ rt6_release(pn_leaf);
fn = pn;
}
}
-static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
- struct nl_info *info)
+static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
+ struct rt6_info __rcu **rtp, struct nl_info *info)
{
struct fib6_walker *w;
- struct rt6_info *rt = *rtp;
+ struct rt6_info *rt = rcu_dereference_protected(*rtp,
+ lockdep_is_held(&table->tb6_lock));
struct net *net = info->nl_net;
RT6_TRACE("fib6_del_route\n");
+ WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
+
/* Unlink it */
*rtp = rt->dst.rt6_next;
rt->rt6i_node = NULL;
net->ipv6.rt6_stats->fib_rt_entries--;
net->ipv6.rt6_stats->fib_discarded_routes++;
+ /* Flush all cached dst in exception table */
+ rt6_flush_exceptions(rt);
+
/* Reset round-robin state, if necessary */
- if (fn->rr_ptr == rt)
+ if (rcu_access_pointer(fn->rr_ptr) == rt)
fn->rr_ptr = NULL;
/* Remove this entry from other siblings */
@@ -1561,20 +1669,19 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
RT6_TRACE("walker %p adjusted by delroute\n", w);
- w->leaf = rt->dst.rt6_next;
+ w->leaf = rcu_dereference_protected(rt->dst.rt6_next,
+ lockdep_is_held(&table->tb6_lock));
if (!w->leaf)
w->state = FWS_U;
}
}
read_unlock(&net->ipv6.fib6_walker_lock);
- rt->dst.rt6_next = NULL;
-
/* If it was last route, expunge its radix tree node */
- if (!fn->leaf) {
+ if (!rcu_access_pointer(fn->leaf)) {
fn->fn_flags &= ~RTN_RTINFO;
net->ipv6.rt6_stats->fib_route_nodes--;
- fn = fib6_repair_tree(net, fn);
+ fn = fib6_repair_tree(net, table, fn);
}
fib6_purge_rt(rt, fn, net);
@@ -1585,12 +1692,15 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
rt6_release(rt);
}
+/* Need to own table->tb6_lock */
int fib6_del(struct rt6_info *rt, struct nl_info *info)
{
struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ struct fib6_table *table = rt->rt6i_table;
struct net *net = info->nl_net;
- struct rt6_info **rtp;
+ struct rt6_info __rcu **rtp;
+ struct rt6_info __rcu **rtp_next;
#if RT6_DEBUG >= 2
if (rt->dst.obsolete > 0) {
@@ -1603,28 +1713,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
WARN_ON(!(fn->fn_flags & RTN_RTINFO));
- if (!(rt->rt6i_flags & RTF_CACHE)) {
- struct fib6_node *pn = fn;
-#ifdef CONFIG_IPV6_SUBTREES
- /* clones of this route might be in another subtree */
- if (rt->rt6i_src.plen) {
- while (!(pn->fn_flags & RTN_ROOT))
- pn = pn->parent;
- pn = pn->parent;
- }
-#endif
- fib6_prune_clones(info->nl_net, pn);
- }
+ /* remove cached dst from exception table */
+ if (rt->rt6i_flags & RTF_CACHE)
+ return rt6_remove_exception_rt(rt);
/*
* Walk the leaf entries looking for ourself
*/
- for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) {
- if (*rtp == rt) {
- fib6_del_route(fn, rtp, info);
+ for (rtp = &fn->leaf; *rtp; rtp = rtp_next) {
+ struct rt6_info *cur = rcu_dereference_protected(*rtp,
+ lockdep_is_held(&table->tb6_lock));
+ if (rt == cur) {
+ fib6_del_route(table, fn, rtp, info);
return 0;
}
+ rtp_next = &cur->dst.rt6_next;
}
return -ENOENT;
}
@@ -1651,22 +1755,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
* 0 -> walk is complete.
* >0 -> walk is incomplete (i.e. suspended)
* <0 -> walk is terminated by an error.
+ *
+ * This function is called with tb6_lock held.
*/
static int fib6_walk_continue(struct fib6_walker *w)
{
- struct fib6_node *fn, *pn;
+ struct fib6_node *fn, *pn, *left, *right;
+
+ /* w->root should always be table->tb6_root */
+ WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT));
for (;;) {
fn = w->node;
if (!fn)
return 0;
- if (w->prune && fn != w->root &&
- fn->fn_flags & RTN_RTINFO && w->state < FWS_C) {
- w->state = FWS_C;
- w->leaf = fn->leaf;
- }
switch (w->state) {
#ifdef CONFIG_IPV6_SUBTREES
case FWS_S:
@@ -1676,21 +1780,26 @@ static int fib6_walk_continue(struct fib6_walker *w)
}
w->state = FWS_L;
#endif
+ /* fall through */
case FWS_L:
- if (fn->left) {
- w->node = fn->left;
+ left = rcu_dereference_protected(fn->left, 1);
+ if (left) {
+ w->node = left;
w->state = FWS_INIT;
continue;
}
w->state = FWS_R;
+ /* fall through */
case FWS_R:
- if (fn->right) {
- w->node = fn->right;
+ right = rcu_dereference_protected(fn->right, 1);
+ if (right) {
+ w->node = right;
w->state = FWS_INIT;
continue;
}
w->state = FWS_C;
- w->leaf = fn->leaf;
+ w->leaf = rcu_dereference_protected(fn->leaf, 1);
+ /* fall through */
case FWS_C:
if (w->leaf && fn->fn_flags & RTN_RTINFO) {
int err;
@@ -1709,10 +1818,13 @@ static int fib6_walk_continue(struct fib6_walker *w)
}
skip:
w->state = FWS_U;
+ /* fall through */
case FWS_U:
if (fn == w->root)
return 0;
- pn = fn->parent;
+ pn = rcu_dereference_protected(fn->parent, 1);
+ left = rcu_dereference_protected(pn->left, 1);
+ right = rcu_dereference_protected(pn->right, 1);
w->node = pn;
#ifdef CONFIG_IPV6_SUBTREES
if (FIB6_SUBTREE(pn) == fn) {
@@ -1721,13 +1833,13 @@ skip:
continue;
}
#endif
- if (pn->left == fn) {
+ if (left == fn) {
w->state = FWS_R;
continue;
}
- if (pn->right == fn) {
+ if (right == fn) {
w->state = FWS_C;
- w->leaf = w->node->leaf;
+ w->leaf = rcu_dereference_protected(w->node->leaf, 1);
continue;
}
#if RT6_DEBUG >= 2
@@ -1770,7 +1882,7 @@ static int fib6_clean_node(struct fib6_walker *w)
return 0;
}
- for (rt = w->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_walker_rt(w) {
res = c->func(rt, c->arg);
if (res < 0) {
w->leaf = rt;
@@ -1798,20 +1910,16 @@ static int fib6_clean_node(struct fib6_walker *w)
* func is called on each route.
* It may return -1 -> delete this route.
* 0 -> continue walking
- *
- * prune==1 -> only immediate children of node (certainly,
- * ignoring pure split nodes) will be scanned.
*/
static void fib6_clean_tree(struct net *net, struct fib6_node *root,
int (*func)(struct rt6_info *, void *arg),
- bool prune, int sernum, void *arg)
+ int sernum, void *arg)
{
struct fib6_cleaner c;
c.w.root = root;
c.w.func = fib6_clean_node;
- c.w.prune = prune;
c.w.count = 0;
c.w.skip = 0;
c.func = func;
@@ -1834,10 +1942,10 @@ static void __fib6_clean_all(struct net *net,
for (h = 0; h < FIB6_TABLE_HASHSZ; h++) {
head = &net->ipv6.fib_table_hash[h];
hlist_for_each_entry_rcu(table, head, tb6_hlist) {
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
fib6_clean_tree(net, &table->tb6_root,
- func, false, sernum, arg);
- write_unlock_bh(&table->tb6_lock);
+ func, sernum, arg);
+ spin_unlock_bh(&table->tb6_lock);
}
}
rcu_read_unlock();
@@ -1849,22 +1957,6 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *),
__fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg);
}
-static int fib6_prune_clone(struct rt6_info *rt, void *arg)
-{
- if (rt->rt6i_flags & RTF_CACHE) {
- RT6_TRACE("pruning clone %p\n", rt);
- return -1;
- }
-
- return 0;
-}
-
-static void fib6_prune_clones(struct net *net, struct fib6_node *fn)
-{
- fib6_clean_tree(net, fn, fib6_prune_clone, true,
- FIB6_NO_SERNUM_CHANGE, NULL);
-}
-
static void fib6_flush_trees(struct net *net)
{
int new_sernum = fib6_new_sernum(net);
@@ -1876,12 +1968,6 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/
-struct fib6_gc_args
-{
- int timeout;
- int more;
-};
-
static int fib6_age(struct rt6_info *rt, void *arg)
{
struct fib6_gc_args *gc_args = arg;
@@ -1890,9 +1976,6 @@ static int fib6_age(struct rt6_info *rt, void *arg)
/*
* check addrconf expiration here.
* Routes are expired even if they are in use.
- *
- * Also age clones. Note, that clones are aged out
- * only if they are not in use now.
*/
if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) {
@@ -1901,31 +1984,14 @@ static int fib6_age(struct rt6_info *rt, void *arg)
return -1;
}
gc_args->more++;
- } else if (rt->rt6i_flags & RTF_CACHE) {
- if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout))
- rt->dst.obsolete = DST_OBSOLETE_KILL;
- if (atomic_read(&rt->dst.__refcnt) == 1 &&
- rt->dst.obsolete == DST_OBSOLETE_KILL) {
- RT6_TRACE("aging clone %p\n", rt);
- return -1;
- } else if (rt->rt6i_flags & RTF_GATEWAY) {
- struct neighbour *neigh;
- __u8 neigh_flags = 0;
-
- neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
- if (neigh) {
- neigh_flags = neigh->flags;
- neigh_release(neigh);
- }
- if (!(neigh_flags & NTF_ROUTER)) {
- RT6_TRACE("purging route %p via non-router but gateway\n",
- rt);
- return -1;
- }
- }
- gc_args->more++;
}
+ /* Also age clones in the exception table.
+ * Note, that clones are aged out
+ * only if they are not in use now.
+ */
+ rt6_age_exceptions(rt, gc_args, now);
+
return 0;
}
@@ -1993,7 +2059,8 @@ static int __net_init fib6_net_init(struct net *net)
goto out_fib_table_hash;
net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN;
- net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf,
+ net->ipv6.ip6_null_entry);
net->ipv6.fib6_main_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers);
@@ -2004,7 +2071,8 @@ static int __net_init fib6_net_init(struct net *net)
if (!net->ipv6.fib6_local_tbl)
goto out_fib6_main_tbl;
net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL;
- net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry;
+ rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf,
+ net->ipv6.ip6_null_entry);
net->ipv6.fib6_local_tbl->tb6_root.fn_flags =
RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers);
@@ -2134,7 +2202,9 @@ static int ipv6_route_yield(struct fib6_walker *w)
return 1;
do {
- iter->w.leaf = iter->w.leaf->dst.rt6_next;
+ iter->w.leaf = rcu_dereference_protected(
+ iter->w.leaf->dst.rt6_next,
+ lockdep_is_held(&iter->tbl->tb6_lock));
iter->skip--;
if (!iter->skip && iter->w.leaf)
return 1;
@@ -2199,7 +2269,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!v)
goto iter_table;
- n = ((struct rt6_info *)v)->dst.rt6_next;
+ n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next);
if (n) {
++*pos;
return n;
@@ -2207,9 +2277,9 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
iter_table:
ipv6_route_check_sernum(iter);
- read_lock(&iter->tbl->tb6_lock);
+ spin_lock_bh(&iter->tbl->tb6_lock);
r = fib6_walk_continue(&iter->w);
- read_unlock(&iter->tbl->tb6_lock);
+ spin_unlock_bh(&iter->tbl->tb6_lock);
if (r > 0) {
if (v)
++*pos;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 1602b491b281..241841fb479c 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1156,19 +1156,21 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6gre_exit_net(struct net *net)
+static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list)
{
+ struct net *net;
LIST_HEAD(list);
rtnl_lock();
- ip6gre_destroy_tunnels(net, &list);
+ list_for_each_entry(net, net_list, exit_list)
+ ip6gre_destroy_tunnels(net, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations ip6gre_net_ops = {
.init = ip6gre_init_net,
- .exit = ip6gre_exit_net,
+ .exit_batch = ip6gre_exit_batch_net,
.id = &ip6gre_net_id,
.size = sizeof(struct ip6gre_net),
};
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index a1c24443cd9e..4212879ff35e 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -593,6 +593,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
case NDISC_REDIRECT:
rel_type = ICMP_REDIRECT;
rel_code = ICMP_REDIR_HOST;
+ /* fall through */
default:
return 0;
}
@@ -2168,17 +2169,16 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
.priority = 1,
};
-static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
+static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
{
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
struct net_device *dev, *aux;
int h;
struct ip6_tnl *t;
- LIST_HEAD(list);
for_each_netdev_safe(net, dev, aux)
if (dev->rtnl_link_ops == &ip6_link_ops)
- unregister_netdevice_queue(dev, &list);
+ unregister_netdevice_queue(dev, list);
for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
@@ -2187,12 +2187,10 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)
* been added to the list by the previous loop.
*/
if (!net_eq(dev_net(t->dev), net))
- unregister_netdevice_queue(t->dev, &list);
+ unregister_netdevice_queue(t->dev, list);
t = rtnl_dereference(t->next);
}
}
-
- unregister_netdevice_many(&list);
}
static int __net_init ip6_tnl_init_net(struct net *net)
@@ -2236,16 +2234,21 @@ err_alloc_dev:
return err;
}
-static void __net_exit ip6_tnl_exit_net(struct net *net)
+static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list)
{
+ struct net *net;
+ LIST_HEAD(list);
+
rtnl_lock();
- ip6_tnl_destroy_tunnels(net);
+ list_for_each_entry(net, net_list, exit_list)
+ ip6_tnl_destroy_tunnels(net, &list);
+ unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations ip6_tnl_net_ops = {
.init = ip6_tnl_init_net,
- .exit = ip6_tnl_exit_net,
+ .exit_batch = ip6_tnl_exit_batch_net,
.id = &ip6_tnl_net_id,
.size = sizeof(struct ip6_tnl_net),
};
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index bcdc2d557de1..dbb74f3c57a7 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1053,23 +1053,22 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
.get_link_net = ip6_tnl_get_link_net,
};
-static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
+static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n,
+ struct list_head *list)
{
int h;
struct ip6_tnl *t;
- LIST_HEAD(list);
for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
t = rtnl_dereference(ip6n->tnls_r_l[h]);
while (t) {
- unregister_netdevice_queue(t->dev, &list);
+ unregister_netdevice_queue(t->dev, list);
t = rtnl_dereference(t->next);
}
}
t = rtnl_dereference(ip6n->tnls_wc[0]);
- unregister_netdevice_queue(t->dev, &list);
- unregister_netdevice_many(&list);
+ unregister_netdevice_queue(t->dev, list);
}
static int __net_init vti6_init_net(struct net *net)
@@ -1109,18 +1108,24 @@ err_alloc_dev:
return err;
}
-static void __net_exit vti6_exit_net(struct net *net)
+static void __net_exit vti6_exit_batch_net(struct list_head *net_list)
{
- struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ struct vti6_net *ip6n;
+ struct net *net;
+ LIST_HEAD(list);
rtnl_lock();
- vti6_destroy_tunnels(ip6n);
+ list_for_each_entry(net, net_list, exit_list) {
+ ip6n = net_generic(net, vti6_net_id);
+ vti6_destroy_tunnels(ip6n, &list);
+ }
+ unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations vti6_net_ops = {
.init = vti6_init_net,
- .exit = vti6_exit_net,
+ .exit_batch = vti6_exit_batch_net,
.id = &vti6_net_id,
.size = sizeof(struct vti6_net),
};
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index f5500f5444e9..59fad81e5f7a 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1722,6 +1722,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
case MRT6_ADD_MFC:
case MRT6_DEL_MFC:
parent = -1;
+ /* fall through */
case MRT6_ADD_MFC_PROXY:
case MRT6_DEL_MFC_PROXY:
if (optlen < sizeof(mfc))
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a5e466d4e093..b9404feabd78 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -377,6 +377,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = 0;
break;
+ case IPV6_FREEBIND:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ /* we also don't have a separate freebind bit for IPV6 */
+ inet_sk(sk)->freebind = valbool;
+ retv = 0;
+ break;
+
case IPV6_RECVORIGDSTADDR:
if (optlen < sizeof(int))
goto e_inval;
@@ -1214,6 +1222,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = inet_sk(sk)->transparent;
break;
+ case IPV6_FREEBIND:
+ val = inet_sk(sk)->freebind;
+ break;
+
case IPV6_RECVORIGDSTADDR:
val = np->rxopt.bits.rxorigdstaddr;
break;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b263bf3a19f7..977d8900cfd1 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -169,12 +169,13 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q)
return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
}
-static void nf_ct_frag6_expire(unsigned long data)
+static void nf_ct_frag6_expire(struct timer_list *t)
{
+ struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
struct net *net;
- fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
ip6_expire_frag_queue(net, fq, &nf_frags);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 46d6dba50698..1d2fb9267d6f 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -290,7 +290,8 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
else
return NF_ACCEPT;
}
- /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ /* Only ICMPs can be IP_CT_IS_REPLY: */
+ /* fall through */
case IP_CT_NEW:
/* Seen it before? This can happen for loopback, retrans,
* or local packets.
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index ac826dd338ff..d12c55dad7d1 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -154,9 +154,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
- err = icmpv6_push_pending_frames(sk, &fl6,
- (struct icmp6hdr *) &pfh.icmph,
- len);
+ icmpv6_push_pending_frames(sk, &fl6,
+ (struct icmp6hdr *)&pfh.icmph, len);
}
release_sock(sk);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index e4462b0ff801..761a473a07c5 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1055,6 +1055,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
break;
+ /* fall through */
default:
return ipv6_setsockopt(sk, level, optname, optval, optlen);
}
@@ -1077,6 +1078,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
break;
+ /* fall through */
default:
return compat_ipv6_setsockopt(sk, level, optname,
optval, optlen);
@@ -1138,6 +1140,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
break;
+ /* fall through */
default:
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
@@ -1160,6 +1163,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname,
if (optname == IPV6_CHECKSUM ||
optname == IPV6_HDRINCL)
break;
+ /* fall through */
default:
return compat_ipv6_getsockopt(sk, level, optname,
optval, optlen);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 846012eae526..afbc000ad4f2 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -170,12 +170,13 @@ out:
}
EXPORT_SYMBOL(ip6_expire_frag_queue);
-static void ip6_frag_expire(unsigned long data)
+static void ip6_frag_expire(struct timer_list *t)
{
+ struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
struct net *net;
- fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q);
+ fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ipv6.frags);
ip6_expire_frag_queue(net, fq, &ip6_frags);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a96d5b385d8f..46c59a53c53f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -44,6 +44,7 @@
#include <linux/seq_file.h>
#include <linux/nsproxy.h>
#include <linux/slab.h>
+#include <linux/jhash.h>
#include <net/net_namespace.h>
#include <net/snmp.h>
#include <net/ipv6.h>
@@ -104,6 +105,9 @@ static int rt6_fill_node(struct net *net,
struct in6_addr *dst, struct in6_addr *src,
int iif, int type, u32 portid, u32 seq,
unsigned int flags);
+static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+ struct in6_addr *daddr,
+ struct in6_addr *saddr);
#ifdef CONFIG_IPV6_ROUTE_INFO
static struct rt6_info *rt6_add_route_info(struct net *net,
@@ -139,9 +143,11 @@ static void rt6_uncached_list_del(struct rt6_info *rt)
{
if (!list_empty(&rt->rt6i_uncached)) {
struct uncached_list *ul = rt->rt6i_uncached_list;
+ struct net *net = dev_net(rt->dst.dev);
spin_lock_bh(&ul->lock);
list_del(&rt->rt6i_uncached);
+ atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
spin_unlock_bh(&ul->lock);
}
}
@@ -355,8 +361,10 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
1, DST_OBSOLETE_FORCE_CHK, flags);
- if (rt)
+ if (rt) {
rt6_info_init(rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
+ }
return rt;
}
@@ -369,17 +377,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net,
if (rt) {
rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
- if (rt->rt6i_pcpu) {
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct rt6_info **p;
-
- p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
- /* no one shares rt */
- *p = NULL;
- }
- } else {
+ if (!rt->rt6i_pcpu) {
dst_release_immediate(&rt->dst);
return NULL;
}
@@ -392,6 +390,7 @@ EXPORT_SYMBOL(ip6_dst_alloc);
static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
+ struct rt6_exception_bucket *bucket;
struct dst_entry *from = dst->from;
struct inet6_dev *idev;
@@ -404,6 +403,11 @@ static void ip6_dst_destroy(struct dst_entry *dst)
rt->rt6i_idev = NULL;
in6_dev_put(idev);
}
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1);
+ if (bucket) {
+ rt->rt6i_exception_bucket = NULL;
+ kfree(bucket);
+ }
dst->from = NULL;
dst_release(from);
@@ -478,7 +482,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
}
/*
- * Route lookup. Any table->tb6_lock is implied.
+ * Route lookup. rcu_read_lock() should be held.
*/
static inline struct rt6_info *rt6_device_match(struct net *net,
@@ -493,7 +497,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
if (!oif && ipv6_addr_any(saddr))
goto out;
- for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) {
+ for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
struct net_device *dev = sprt->dst.dev;
if (oif) {
@@ -702,6 +706,7 @@ out:
}
static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
+ struct rt6_info *leaf,
struct rt6_info *rr_head,
u32 metric, int oif, int strict,
bool *do_rr)
@@ -711,7 +716,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = NULL;
cont = NULL;
- for (rt = rr_head; rt; rt = rt->dst.rt6_next) {
+ for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -720,7 +725,8 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = find_match(rt, oif, strict, &mpri, match, do_rr);
}
- for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) {
+ for (rt = leaf; rt && rt != rr_head;
+ rt = rcu_dereference(rt->dst.rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -732,37 +738,59 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
if (match || !cont)
return match;
- for (rt = cont; rt; rt = rt->dst.rt6_next)
+ for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
}
-static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
+static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
+ int oif, int strict)
{
+ struct rt6_info *leaf = rcu_dereference(fn->leaf);
struct rt6_info *match, *rt0;
- struct net *net;
bool do_rr = false;
+ int key_plen;
- rt0 = fn->rr_ptr;
+ if (!leaf)
+ return net->ipv6.ip6_null_entry;
+
+ rt0 = rcu_dereference(fn->rr_ptr);
if (!rt0)
- fn->rr_ptr = rt0 = fn->leaf;
+ rt0 = leaf;
- match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
+ /* Double check to make sure fn is not an intermediate node
+ * and fn->leaf does not points to its child's leaf
+ * (This might happen if all routes under fn are deleted from
+ * the tree and fib6_repair_tree() is called on the node.)
+ */
+ key_plen = rt0->rt6i_dst.plen;
+#ifdef CONFIG_IPV6_SUBTREES
+ if (rt0->rt6i_src.plen)
+ key_plen = rt0->rt6i_src.plen;
+#endif
+ if (fn->fn_bit != key_plen)
+ return net->ipv6.ip6_null_entry;
+
+ match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict,
&do_rr);
if (do_rr) {
- struct rt6_info *next = rt0->dst.rt6_next;
+ struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
/* no entries matched; do round-robin */
if (!next || next->rt6i_metric != rt0->rt6i_metric)
- next = fn->leaf;
-
- if (next != rt0)
- fn->rr_ptr = next;
+ next = leaf;
+
+ if (next != rt0) {
+ spin_lock_bh(&leaf->rt6i_table->tb6_lock);
+ /* make sure next is not being deleted from the tree */
+ if (next->rt6i_node)
+ rcu_assign_pointer(fn->rr_ptr, next);
+ spin_unlock_bh(&leaf->rt6i_table->tb6_lock);
+ }
}
- net = dev_net(rt0->dst.dev);
return match ? match : net->ipv6.ip6_null_entry;
}
@@ -850,13 +878,14 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
struct in6_addr *saddr)
{
- struct fib6_node *pn;
+ struct fib6_node *pn, *sn;
while (1) {
if (fn->fn_flags & RTN_TL_ROOT)
return NULL;
- pn = fn->parent;
- if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn)
- fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr);
+ pn = rcu_dereference(fn->parent);
+ sn = FIB6_SUBTREE(pn);
+ if (sn && sn != fn)
+ fn = fib6_lookup(sn, NULL, saddr);
else
fn = pn;
if (fn->fn_flags & RTN_RTINFO)
@@ -864,29 +893,59 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
}
}
+static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
+ bool null_fallback)
+{
+ struct rt6_info *rt = *prt;
+
+ if (dst_hold_safe(&rt->dst))
+ return true;
+ if (null_fallback) {
+ rt = net->ipv6.ip6_null_entry;
+ dst_hold(&rt->dst);
+ } else {
+ rt = NULL;
+ }
+ *prt = rt;
+ return false;
+}
+
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6, int flags)
{
+ struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
- struct rt6_info *rt;
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
- rt = fn->leaf;
- rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
- if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
- rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags);
+ rt = rcu_dereference(fn->leaf);
+ if (!rt) {
+ rt = net->ipv6.ip6_null_entry;
+ } else {
+ rt = rt6_device_match(net, rt, &fl6->saddr,
+ fl6->flowi6_oif, flags);
+ if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
+ rt = rt6_multipath_select(rt, fl6,
+ fl6->flowi6_oif, flags);
+ }
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
goto restart;
}
- dst_use(&rt->dst, jiffies);
- read_unlock_bh(&table->tb6_lock);
+ /* Search through exception table */
+ rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
+ if (rt_cache)
+ rt = rt_cache;
+
+ if (ip6_hold_safe(net, &rt, true))
+ dst_use_noref(&rt->dst, jiffies);
+
+ rcu_read_unlock();
- trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+ trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
@@ -938,9 +997,9 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
struct fib6_table *table;
table = rt->rt6i_table;
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
err = fib6_add(&table->tb6_root, rt, info, mxc, extack);
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
return err;
}
@@ -1038,7 +1097,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
return pcpu_rt;
}
-/* It should be called with read_lock_bh(&tb6_lock) acquired */
+/* It should be called with rcu_read_lock() acquired */
static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
{
struct rt6_info *pcpu_rt, **p;
@@ -1046,16 +1105,14 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
p = this_cpu_ptr(rt->rt6i_pcpu);
pcpu_rt = *p;
- if (pcpu_rt) {
- dst_hold(&pcpu_rt->dst);
+ if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false))
rt6_dst_from_metrics_check(pcpu_rt);
- }
+
return pcpu_rt;
}
static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
{
- struct fib6_table *table = rt->rt6i_table;
struct rt6_info *pcpu_rt, *prev, **p;
pcpu_rt = ip6_rt_pcpu_alloc(rt);
@@ -1066,36 +1123,526 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
return net->ipv6.ip6_null_entry;
}
- read_lock_bh(&table->tb6_lock);
- if (rt->rt6i_pcpu) {
- p = this_cpu_ptr(rt->rt6i_pcpu);
- prev = cmpxchg(p, NULL, pcpu_rt);
- if (prev) {
- /* If someone did it before us, return prev instead */
- dst_release_immediate(&pcpu_rt->dst);
- pcpu_rt = prev;
- }
- } else {
- /* rt has been removed from the fib6 tree
- * before we have a chance to acquire the read_lock.
- * In this case, don't brother to create a pcpu rt
- * since rt is going away anyway. The next
- * dst_check() will trigger a re-lookup.
- */
- dst_release_immediate(&pcpu_rt->dst);
- pcpu_rt = rt;
- }
dst_hold(&pcpu_rt->dst);
+ p = this_cpu_ptr(rt->rt6i_pcpu);
+ prev = cmpxchg(p, NULL, pcpu_rt);
+ BUG_ON(prev);
+
rt6_dst_from_metrics_check(pcpu_rt);
- read_unlock_bh(&table->tb6_lock);
return pcpu_rt;
}
+/* exception hash table implementation
+ */
+static DEFINE_SPINLOCK(rt6_exception_lock);
+
+/* Remove rt6_ex from hash table and free the memory
+ * Caller must hold rt6_exception_lock
+ */
+static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
+ struct rt6_exception *rt6_ex)
+{
+ struct net *net;
+
+ if (!bucket || !rt6_ex)
+ return;
+
+ net = dev_net(rt6_ex->rt6i->dst.dev);
+ rt6_ex->rt6i->rt6i_node = NULL;
+ hlist_del_rcu(&rt6_ex->hlist);
+ rt6_release(rt6_ex->rt6i);
+ kfree_rcu(rt6_ex, rcu);
+ WARN_ON_ONCE(!bucket->depth);
+ bucket->depth--;
+ net->ipv6.rt6_stats->fib_rt_cache--;
+}
+
+/* Remove oldest rt6_ex in bucket and free the memory
+ * Caller must hold rt6_exception_lock
+ */
+static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
+{
+ struct rt6_exception *rt6_ex, *oldest = NULL;
+
+ if (!bucket)
+ return;
+
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ if (!oldest || time_before(rt6_ex->stamp, oldest->stamp))
+ oldest = rt6_ex;
+ }
+ rt6_remove_exception(bucket, oldest);
+}
+
+static u32 rt6_exception_hash(const struct in6_addr *dst,
+ const struct in6_addr *src)
+{
+ static u32 seed __read_mostly;
+ u32 val;
+
+ net_get_random_once(&seed, sizeof(seed));
+ val = jhash(dst, sizeof(*dst), seed);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (src)
+ val = jhash(src, sizeof(*src), val);
+#endif
+ return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
+}
+
+/* Helper function to find the cached rt in the hash table
+ * and update bucket pointer to point to the bucket for this
+ * (daddr, saddr) pair
+ * Caller must hold rt6_exception_lock
+ */
+static struct rt6_exception *
+__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
+{
+ struct rt6_exception *rt6_ex;
+ u32 hval;
+
+ if (!(*bucket) || !daddr)
+ return NULL;
+
+ hval = rt6_exception_hash(daddr, saddr);
+ *bucket += hval;
+
+ hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) {
+ struct rt6_info *rt6 = rt6_ex->rt6i;
+ bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (matched && saddr)
+ matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
+#endif
+ if (matched)
+ return rt6_ex;
+ }
+ return NULL;
+}
+
+/* Helper function to find the cached rt in the hash table
+ * and update bucket pointer to point to the bucket for this
+ * (daddr, saddr) pair
+ * Caller must hold rcu_read_lock()
+ */
+static struct rt6_exception *
+__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
+ const struct in6_addr *daddr,
+ const struct in6_addr *saddr)
+{
+ struct rt6_exception *rt6_ex;
+ u32 hval;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ if (!(*bucket) || !daddr)
+ return NULL;
+
+ hval = rt6_exception_hash(daddr, saddr);
+ *bucket += hval;
+
+ hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) {
+ struct rt6_info *rt6 = rt6_ex->rt6i;
+ bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ if (matched && saddr)
+ matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr);
+#endif
+ if (matched)
+ return rt6_ex;
+ }
+ return NULL;
+}
+
+static int rt6_insert_exception(struct rt6_info *nrt,
+ struct rt6_info *ort)
+{
+ struct net *net = dev_net(ort->dst.dev);
+ struct rt6_exception_bucket *bucket;
+ struct in6_addr *src_key = NULL;
+ struct rt6_exception *rt6_ex;
+ int err = 0;
+
+ /* ort can't be a cache or pcpu route */
+ if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
+ ort = (struct rt6_info *)ort->dst.from;
+ WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
+
+ spin_lock_bh(&rt6_exception_lock);
+
+ if (ort->exception_bucket_flushed) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+ if (!bucket) {
+ bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
+ GFP_ATOMIC);
+ if (!bucket) {
+ err = -ENOMEM;
+ goto out;
+ }
+ rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
+ }
+
+#ifdef CONFIG_IPV6_SUBTREES
+ /* rt6i_src.plen != 0 indicates ort is in subtree
+ * and exception table is indexed by a hash of
+ * both rt6i_dst and rt6i_src.
+ * Otherwise, the exception table is indexed by
+ * a hash of only rt6i_dst.
+ */
+ if (ort->rt6i_src.plen)
+ src_key = &nrt->rt6i_src.addr;
+#endif
+
+ /* Update rt6i_prefsrc as it could be changed
+ * in rt6_remove_prefsrc()
+ */
+ nrt->rt6i_prefsrc = ort->rt6i_prefsrc;
+ /* rt6_mtu_change() might lower mtu on ort.
+ * Only insert this exception route if its mtu
+ * is less than ort's mtu value.
+ */
+ if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr,
+ src_key);
+ if (rt6_ex)
+ rt6_remove_exception(bucket, rt6_ex);
+
+ rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC);
+ if (!rt6_ex) {
+ err = -ENOMEM;
+ goto out;
+ }
+ rt6_ex->rt6i = nrt;
+ rt6_ex->stamp = jiffies;
+ atomic_inc(&nrt->rt6i_ref);
+ nrt->rt6i_node = ort->rt6i_node;
+ hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain);
+ bucket->depth++;
+ net->ipv6.rt6_stats->fib_rt_cache++;
+
+ if (bucket->depth > FIB6_MAX_DEPTH)
+ rt6_exception_remove_oldest(bucket);
+
+out:
+ spin_unlock_bh(&rt6_exception_lock);
+
+ /* Update fn->fn_sernum to invalidate all cached dst */
+ if (!err) {
+ fib6_update_sernum(ort);
+ fib6_force_start_gc(net);
+ }
+
+ return err;
+}
+
+void rt6_flush_exceptions(struct rt6_info *rt)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ struct hlist_node *tmp;
+ int i;
+
+ spin_lock_bh(&rt6_exception_lock);
+ /* Prevent rt6_insert_exception() to recreate the bucket list */
+ rt->exception_bucket_flushed = 1;
+
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+ if (!bucket)
+ goto out;
+
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
+ rt6_remove_exception(bucket, rt6_ex);
+ WARN_ON_ONCE(bucket->depth);
+ bucket++;
+ }
+
+out:
+ spin_unlock_bh(&rt6_exception_lock);
+}
+
+/* Find cached rt in the hash table inside passed in rt
+ * Caller has to hold rcu_read_lock()
+ */
+static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
+ struct in6_addr *daddr,
+ struct in6_addr *saddr)
+{
+ struct rt6_exception_bucket *bucket;
+ struct in6_addr *src_key = NULL;
+ struct rt6_exception *rt6_ex;
+ struct rt6_info *res = NULL;
+
+ bucket = rcu_dereference(rt->rt6i_exception_bucket);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ /* rt6i_src.plen != 0 indicates rt is in subtree
+ * and exception table is indexed by a hash of
+ * both rt6i_dst and rt6i_src.
+ * Otherwise, the exception table is indexed by
+ * a hash of only rt6i_dst.
+ */
+ if (rt->rt6i_src.plen)
+ src_key = saddr;
+#endif
+ rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
+
+ if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
+ res = rt6_ex->rt6i;
+
+ return res;
+}
+
+/* Remove the passed in cached rt from the hash table that contains it */
+int rt6_remove_exception_rt(struct rt6_info *rt)
+{
+ struct rt6_info *from = (struct rt6_info *)rt->dst.from;
+ struct rt6_exception_bucket *bucket;
+ struct in6_addr *src_key = NULL;
+ struct rt6_exception *rt6_ex;
+ int err;
+
+ if (!from ||
+ !(rt->rt6i_flags & RTF_CACHE))
+ return -EINVAL;
+
+ if (!rcu_access_pointer(from->rt6i_exception_bucket))
+ return -ENOENT;
+
+ spin_lock_bh(&rt6_exception_lock);
+ bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+#ifdef CONFIG_IPV6_SUBTREES
+ /* rt6i_src.plen != 0 indicates 'from' is in subtree
+ * and exception table is indexed by a hash of
+ * both rt6i_dst and rt6i_src.
+ * Otherwise, the exception table is indexed by
+ * a hash of only rt6i_dst.
+ */
+ if (from->rt6i_src.plen)
+ src_key = &rt->rt6i_src.addr;
+#endif
+ rt6_ex = __rt6_find_exception_spinlock(&bucket,
+ &rt->rt6i_dst.addr,
+ src_key);
+ if (rt6_ex) {
+ rt6_remove_exception(bucket, rt6_ex);
+ err = 0;
+ } else {
+ err = -ENOENT;
+ }
+
+ spin_unlock_bh(&rt6_exception_lock);
+ return err;
+}
+
+/* Find rt6_ex which contains the passed in rt cache and
+ * refresh its stamp
+ */
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+ struct rt6_info *from = (struct rt6_info *)rt->dst.from;
+ struct rt6_exception_bucket *bucket;
+ struct in6_addr *src_key = NULL;
+ struct rt6_exception *rt6_ex;
+
+ if (!from ||
+ !(rt->rt6i_flags & RTF_CACHE))
+ return;
+
+ rcu_read_lock();
+ bucket = rcu_dereference(from->rt6i_exception_bucket);
+
+#ifdef CONFIG_IPV6_SUBTREES
+ /* rt6i_src.plen != 0 indicates 'from' is in subtree
+ * and exception table is indexed by a hash of
+ * both rt6i_dst and rt6i_src.
+ * Otherwise, the exception table is indexed by
+ * a hash of only rt6i_dst.
+ */
+ if (from->rt6i_src.plen)
+ src_key = &rt->rt6i_src.addr;
+#endif
+ rt6_ex = __rt6_find_exception_rcu(&bucket,
+ &rt->rt6i_dst.addr,
+ src_key);
+ if (rt6_ex)
+ rt6_ex->stamp = jiffies;
+
+ rcu_read_unlock();
+}
+
+static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ int i;
+
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ rt6_ex->rt6i->rt6i_prefsrc.plen = 0;
+ }
+ bucket++;
+ }
+ }
+}
+
+static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ int i;
+
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) {
+ struct rt6_info *entry = rt6_ex->rt6i;
+ /* For RTF_CACHE with rt6i_pmtu == 0
+ * (i.e. a redirected route),
+ * the metrics of its rt->dst.from has already
+ * been updated.
+ */
+ if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu)
+ entry->rt6i_pmtu = mtu;
+ }
+ bucket++;
+ }
+ }
+}
+
+#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
+
+static void rt6_exceptions_clean_tohost(struct rt6_info *rt,
+ struct in6_addr *gateway)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ struct hlist_node *tmp;
+ int i;
+
+ if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ return;
+
+ spin_lock_bh(&rt6_exception_lock);
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry_safe(rt6_ex, tmp,
+ &bucket->chain, hlist) {
+ struct rt6_info *entry = rt6_ex->rt6i;
+
+ if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) ==
+ RTF_CACHE_GATEWAY &&
+ ipv6_addr_equal(gateway,
+ &entry->rt6i_gateway)) {
+ rt6_remove_exception(bucket, rt6_ex);
+ }
+ }
+ bucket++;
+ }
+ }
+
+ spin_unlock_bh(&rt6_exception_lock);
+}
+
+static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
+ struct rt6_exception *rt6_ex,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ struct rt6_info *rt = rt6_ex->rt6i;
+
+ /* we are pruning and obsoleting aged-out and non gateway exceptions
+ * even if others have still references to them, so that on next
+ * dst_check() such references can be dropped.
+ * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
+ * expired, independently from their aging, as per RFC 8201 section 4
+ */
+ if (!(rt->rt6i_flags & RTF_EXPIRES) &&
+ time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
+ RT6_TRACE("aging clone %p\n", rt);
+ rt6_remove_exception(bucket, rt6_ex);
+ return;
+ } else if (rt->rt6i_flags & RTF_GATEWAY) {
+ struct neighbour *neigh;
+ __u8 neigh_flags = 0;
+
+ neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway);
+ if (neigh) {
+ neigh_flags = neigh->flags;
+ neigh_release(neigh);
+ }
+ if (!(neigh_flags & NTF_ROUTER)) {
+ RT6_TRACE("purging route %p via non-router but gateway\n",
+ rt);
+ rt6_remove_exception(bucket, rt6_ex);
+ return;
+ }
+ } else if (__rt6_check_expired(rt)) {
+ RT6_TRACE("purging expired route %p\n", rt);
+ rt6_remove_exception(bucket, rt6_ex);
+ return;
+ }
+ gc_args->more++;
+}
+
+void rt6_age_exceptions(struct rt6_info *rt,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+ struct hlist_node *tmp;
+ int i;
+
+ if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ return;
+
+ spin_lock_bh(&rt6_exception_lock);
+ bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
+
+ if (bucket) {
+ for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
+ hlist_for_each_entry_safe(rt6_ex, tmp,
+ &bucket->chain, hlist) {
+ rt6_age_examine_exception(bucket, rt6_ex,
+ gc_args, now);
+ }
+ bucket++;
+ }
+ }
+ spin_unlock_bh(&rt6_exception_lock);
+}
+
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
int oif, struct flowi6 *fl6, int flags)
{
struct fib6_node *fn, *saved_fn;
- struct rt6_info *rt;
+ struct rt6_info *rt, *rt_cache;
int strict = 0;
strict |= flags & RT6_LOOKUP_F_IFACE;
@@ -1103,7 +1650,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
if (net->ipv6.devconf_all->forwarding == 0)
strict |= RT6_LOOKUP_F_REACHABLE;
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
@@ -1112,7 +1659,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
oif = 0;
redo_rt6_select:
- rt = rt6_select(fn, oif, strict);
+ rt = rt6_select(net, fn, oif, strict);
if (rt->rt6i_nsiblings)
rt = rt6_multipath_select(rt, fl6, oif, strict);
if (rt == net->ipv6.ip6_null_entry) {
@@ -1127,14 +1674,23 @@ redo_rt6_select:
}
}
+ /*Search through exception table */
+ rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr);
+ if (rt_cache)
+ rt = rt_cache;
- if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
- dst_use(&rt->dst, jiffies);
- read_unlock_bh(&table->tb6_lock);
-
- rt6_dst_from_metrics_check(rt);
-
- trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+ if (rt == net->ipv6.ip6_null_entry) {
+ rcu_read_unlock();
+ dst_hold(&rt->dst);
+ trace_fib6_table_lookup(net, rt, table, fl6);
+ return rt;
+ } else if (rt->rt6i_flags & RTF_CACHE) {
+ if (ip6_hold_safe(net, &rt, true)) {
+ dst_use_noref(&rt->dst, jiffies);
+ rt6_dst_from_metrics_check(rt);
+ }
+ rcu_read_unlock();
+ trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
!(rt->rt6i_flags & RTF_GATEWAY))) {
@@ -1146,8 +1702,14 @@ redo_rt6_select:
struct rt6_info *uncached_rt;
- dst_use(&rt->dst, jiffies);
- read_unlock_bh(&table->tb6_lock);
+ if (ip6_hold_safe(net, &rt, true)) {
+ dst_use_noref(&rt->dst, jiffies);
+ } else {
+ rcu_read_unlock();
+ uncached_rt = rt;
+ goto uncached_rt_out;
+ }
+ rcu_read_unlock();
uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
dst_release(&rt->dst);
@@ -1157,12 +1719,14 @@ redo_rt6_select:
* No need for another dst_hold()
*/
rt6_uncached_list_add(uncached_rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
} else {
uncached_rt = net->ipv6.ip6_null_entry;
dst_hold(&uncached_rt->dst);
}
- trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6);
+uncached_rt_out:
+ trace_fib6_table_lookup(net, uncached_rt, table, fl6);
return uncached_rt;
} else {
@@ -1170,26 +1734,28 @@ redo_rt6_select:
struct rt6_info *pcpu_rt;
- rt->dst.lastuse = jiffies;
- rt->dst.__use++;
+ dst_use_noref(&rt->dst, jiffies);
+ local_bh_disable();
pcpu_rt = rt6_get_pcpu_route(rt);
- if (pcpu_rt) {
- read_unlock_bh(&table->tb6_lock);
- } else {
- /* We have to do the read_unlock first
- * because rt6_make_pcpu_route() may trigger
- * ip6_dst_gc() which will take the write_lock.
- */
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
- pcpu_rt = rt6_make_pcpu_route(rt);
- dst_release(&rt->dst);
+ if (!pcpu_rt) {
+ /* atomic_inc_not_zero() is needed when using rcu */
+ if (atomic_inc_not_zero(&rt->rt6i_ref)) {
+ /* No dst_hold() on rt is needed because grabbing
+ * rt->rt6i_ref makes sure rt can't be released.
+ */
+ pcpu_rt = rt6_make_pcpu_route(rt);
+ rt6_release(rt);
+ } else {
+ /* rt is already removed from tree */
+ pcpu_rt = net->ipv6.ip6_null_entry;
+ dst_hold(&pcpu_rt->dst);
+ }
}
-
- trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6);
+ local_bh_enable();
+ rcu_read_unlock();
+ trace_fib6_table_lookup(net, pcpu_rt, table, fl6);
return pcpu_rt;
-
}
}
EXPORT_SYMBOL_GPL(ip6_pol_route);
@@ -1328,6 +1894,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
DST_OBSOLETE_DEAD, 0);
if (rt) {
rt6_info_init(rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc);
new = &rt->dst;
new->__use = 1;
@@ -1491,23 +2058,17 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (!rt6_cache_allowed_for_pmtu(rt6)) {
rt6_do_update_pmtu(rt6, mtu);
+ /* update rt6_ex->stamp for cache */
+ if (rt6->rt6i_flags & RTF_CACHE)
+ rt6_update_exception_stamp_rt(rt6);
} else if (daddr) {
struct rt6_info *nrt6;
nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr);
if (nrt6) {
rt6_do_update_pmtu(nrt6, mtu);
-
- /* ip6_ins_rt(nrt6) will bump the
- * rt6->rt6i_node->fn_sernum
- * which will fail the next rt6_check() and
- * invalidate the sk->sk_dst_cache.
- */
- ip6_ins_rt(nrt6);
- /* Release the reference taken in
- * ip6_rt_cache_alloc()
- */
- dst_release(&nrt6->dst);
+ if (rt6_insert_exception(nrt6, rt6))
+ dst_release_immediate(&nrt6->dst);
}
}
}
@@ -1571,7 +2132,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
- struct rt6_info *rt;
+ struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
/* Get the "current" route for this destination and
@@ -1584,10 +2145,10 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
* routes.
*/
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_node_rt_rcu(fn) {
if (rt6_check_expired(rt))
continue;
if (rt->dst.error)
@@ -1596,8 +2157,23 @@ restart:
continue;
if (fl6->flowi6_oif != rt->dst.dev->ifindex)
continue;
- if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
+ /* rt_cache's gateway might be different from its 'parent'
+ * in the case of an ip redirect.
+ * So we keep searching in the exception table if the gateway
+ * is different.
+ */
+ if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) {
+ rt_cache = rt6_find_cached_rt(rt,
+ &fl6->daddr,
+ &fl6->saddr);
+ if (rt_cache &&
+ ipv6_addr_equal(&rdfl->gateway,
+ &rt_cache->rt6i_gateway)) {
+ rt = rt_cache;
+ break;
+ }
continue;
+ }
break;
}
@@ -1615,11 +2191,11 @@ restart:
}
out:
- dst_hold(&rt->dst);
+ ip6_hold_safe(net, &rt, true);
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
- trace_fib6_table_lookup(net, rt, table->tb6_id, fl6);
+ trace_fib6_table_lookup(net, rt, table, fl6);
return rt;
};
@@ -1766,6 +2342,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
* do proper release of the net_device
*/
rt6_uncached_list_add(rt);
+ atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
@@ -2216,9 +2793,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
}
table = rt->rt6i_table;
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
err = fib6_del(rt, info);
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
out:
ip6_rt_put(rt);
@@ -2244,7 +2821,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
if (rt == net->ipv6.ip6_null_entry)
goto out_put;
table = rt->rt6i_table;
- write_lock_bh(&table->tb6_lock);
+ spin_lock_bh(&table->tb6_lock);
if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) {
struct rt6_info *sibling, *next_sibling;
@@ -2274,7 +2851,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
err = fib6_del(rt, info);
out_unlock:
- write_unlock_bh(&table->tb6_lock);
+ spin_unlock_bh(&table->tb6_lock);
out_put:
ip6_rt_put(rt);
@@ -2288,9 +2865,9 @@ out_put:
static int ip6_route_del(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
+ struct rt6_info *rt, *rt_cache;
struct fib6_table *table;
struct fib6_node *fn;
- struct rt6_info *rt;
int err = -ESRCH;
table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
@@ -2299,17 +2876,22 @@ static int ip6_route_del(struct fib6_config *cfg,
return err;
}
- read_lock_bh(&table->tb6_lock);
+ rcu_read_lock();
fn = fib6_locate(&table->tb6_root,
&cfg->fc_dst, cfg->fc_dst_len,
- &cfg->fc_src, cfg->fc_src_len);
+ &cfg->fc_src, cfg->fc_src_len,
+ !(cfg->fc_flags & RTF_CACHE));
if (fn) {
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
- if ((rt->rt6i_flags & RTF_CACHE) &&
- !(cfg->fc_flags & RTF_CACHE))
- continue;
+ for_each_fib6_node_rt_rcu(fn) {
+ if (cfg->fc_flags & RTF_CACHE) {
+ rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
+ &cfg->fc_src);
+ if (!rt_cache)
+ continue;
+ rt = rt_cache;
+ }
if (cfg->fc_ifindex &&
(!rt->dst.dev ||
rt->dst.dev->ifindex != cfg->fc_ifindex))
@@ -2321,8 +2903,9 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol)
continue;
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
+ if (!dst_hold_safe(&rt->dst))
+ break;
+ rcu_read_unlock();
/* if gateway was specified only delete the one hop */
if (cfg->fc_flags & RTF_GATEWAY)
@@ -2331,7 +2914,7 @@ static int ip6_route_del(struct fib6_config *cfg,
return __ip6_del_rt_siblings(rt, cfg);
}
}
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
return err;
}
@@ -2435,8 +3018,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
nrt->rt6i_protocol = RTPROT_REDIRECT;
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- if (ip6_ins_rt(nrt))
- goto out_release;
+ /* No need to remove rt from the exception table if rt is
+ * a cached route because rt6_insert_exception() will
+ * takes care of it
+ */
+ if (rt6_insert_exception(nrt, rt)) {
+ dst_release_immediate(&nrt->dst);
+ goto out;
+ }
netevent.old = &rt->dst;
netevent.new = &nrt->dst;
@@ -2444,17 +3033,6 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
netevent.neigh = neigh;
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
- if (rt->rt6i_flags & RTF_CACHE) {
- rt = (struct rt6_info *) dst_clone(&rt->dst);
- ip6_del_rt(rt);
- }
-
-out_release:
- /* Release the reference taken in
- * ip6_rt_cache_alloc()
- */
- dst_release(&nrt->dst);
-
out:
neigh_release(neigh);
}
@@ -2511,23 +3089,23 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
if (!table)
return NULL;
- read_lock_bh(&table->tb6_lock);
- fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0);
+ rcu_read_lock();
+ fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true);
if (!fn)
goto out;
- for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
+ for_each_fib6_node_rt_rcu(fn) {
if (rt->dst.dev->ifindex != ifindex)
continue;
if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
continue;
if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
continue;
- dst_hold(&rt->dst);
+ ip6_hold_safe(NULL, &rt, false);
break;
}
out:
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
return rt;
}
@@ -2573,16 +3151,16 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
if (!table)
return NULL;
- read_lock_bh(&table->tb6_lock);
- for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
+ rcu_read_lock();
+ for_each_fib6_node_rt_rcu(&table->tb6_root) {
if (dev == rt->dst.dev &&
((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
ipv6_addr_equal(&rt->rt6i_gateway, addr))
break;
}
if (rt)
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
+ ip6_hold_safe(NULL, &rt, false);
+ rcu_read_unlock();
return rt;
}
@@ -2620,17 +3198,20 @@ static void __rt6_purge_dflt_routers(struct fib6_table *table)
struct rt6_info *rt;
restart:
- read_lock_bh(&table->tb6_lock);
- for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) {
+ rcu_read_lock();
+ for_each_fib6_node_rt_rcu(&table->tb6_root) {
if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
(!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) {
- dst_hold(&rt->dst);
- read_unlock_bh(&table->tb6_lock);
- ip6_del_rt(rt);
+ if (dst_hold_safe(&rt->dst)) {
+ rcu_read_unlock();
+ ip6_del_rt(rt);
+ } else {
+ rcu_read_unlock();
+ }
goto restart;
}
}
- read_unlock_bh(&table->tb6_lock);
+ rcu_read_unlock();
table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER;
}
@@ -2818,8 +3399,12 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg)
if (((void *)rt->dst.dev == dev || !dev) &&
rt != net->ipv6.ip6_null_entry &&
ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) {
+ spin_lock_bh(&rt6_exception_lock);
/* remove prefsrc entry */
rt->rt6i_prefsrc.plen = 0;
+ /* need to update cache as well */
+ rt6_exceptions_remove_prefsrc(rt);
+ spin_unlock_bh(&rt6_exception_lock);
}
return 0;
}
@@ -2836,18 +3421,23 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
}
#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
-#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
/* Remove routers and update dst entries when gateway turn into host. */
static int fib6_clean_tohost(struct rt6_info *rt, void *arg)
{
struct in6_addr *gateway = (struct in6_addr *)arg;
- if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) ||
- ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) &&
- ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
+ if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
+ ipv6_addr_equal(gateway, &rt->rt6i_gateway)) {
return -1;
}
+
+ /* Further clean up cached routes in exception table.
+ * This is needed because cached route may have a different
+ * gateway than its 'parent' in the case of an ip redirect.
+ */
+ rt6_exceptions_clean_tohost(rt, gateway);
+
return 0;
}
@@ -2926,19 +3516,14 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
if (rt->dst.dev == arg->dev &&
dst_metric_raw(&rt->dst, RTAX_MTU) &&
!dst_metric_locked(&rt->dst, RTAX_MTU)) {
- if (rt->rt6i_flags & RTF_CACHE) {
- /* For RTF_CACHE with rt6i_pmtu == 0
- * (i.e. a redirected route),
- * the metrics of its rt->dst.from has already
- * been updated.
- */
- if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu)
- rt->rt6i_pmtu = arg->mtu;
- } else if (dst_mtu(&rt->dst) >= arg->mtu ||
- (dst_mtu(&rt->dst) < arg->mtu &&
- dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
+ spin_lock_bh(&rt6_exception_lock);
+ if (dst_mtu(&rt->dst) >= arg->mtu ||
+ (dst_mtu(&rt->dst) < arg->mtu &&
+ dst_mtu(&rt->dst) == idev->cnf.mtu6)) {
dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu);
}
+ rt6_exceptions_update_pmtu(rt, arg->mtu);
+ spin_unlock_bh(&rt6_exception_lock);
}
return 0;
}
@@ -3839,7 +4424,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
net->ipv6.rt6_stats->fib_nodes,
net->ipv6.rt6_stats->fib_route_nodes,
- net->ipv6.rt6_stats->fib_rt_alloc,
+ atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc),
net->ipv6.rt6_stats->fib_rt_entries,
net->ipv6.rt6_stats->fib_rt_cache,
dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index ac912bb21747..a799f5258614 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1848,19 +1848,22 @@ err_alloc_dev:
return err;
}
-static void __net_exit sit_exit_net(struct net *net)
+static void __net_exit sit_exit_batch_net(struct list_head *net_list)
{
LIST_HEAD(list);
+ struct net *net;
rtnl_lock();
- sit_destroy_tunnels(net, &list);
+ list_for_each_entry(net, net_list, exit_list)
+ sit_destroy_tunnels(net, &list);
+
unregister_netdevice_many(&list);
rtnl_unlock();
}
static struct pernet_operations sit_net_ops = {
.init = sit_init_net,
- .exit = sit_exit_net,
+ .exit_batch = sit_exit_batch_net,
.id = &sit_net_id,
.size = sizeof(struct sit_net),
};
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 64d94afa427f..ae83615b7f6d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1577,8 +1577,9 @@ do_time_wait:
refcounted = false;
goto process;
}
- /* Fall through to ACK */
}
+ /* to ACK */
+ /* fall through */
case TCP_TW_ACK:
tcp_v6_timewait_ack(sk, skb);
break;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 11d1314ab6c5..4ed9f8cc3b6a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -152,6 +152,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
switch (nexthdr) {
case NEXTHDR_FRAGMENT:
onlyproto = 1;
+ /* fall through */
case NEXTHDR_ROUTING:
case NEXTHDR_HOP:
case NEXTHDR_DEST:
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index ac598ec90589..d21a9d128d3e 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1867,6 +1867,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
rc = -EPERM;
if (!capable(CAP_NET_ADMIN))
break;
+ /* fall through */
case SIOCGIFADDR:
rc = ipxitf_ioctl(cmd, argp);
break;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index af4e76ac88ff..0b750a22c4b9 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1650,7 +1650,7 @@ static int kcm_clone(struct socket *osock, struct kcm_clone *info,
}
newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
- if (unlikely(IS_ERR(newfile))) {
+ if (IS_ERR(newfile)) {
err = PTR_ERR(newfile);
goto out_sock_alloc_fail;
}
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 7135f4645d3a..f5179424eaf1 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -406,7 +406,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) ||
nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport)))
goto nla_put_failure;
- /* NOBREAK */
+ /* fall through */
case L2TP_ENCAPTYPE_IP:
#if IS_ENABLED(CONFIG_IPV6)
if (np) {
diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c
index e15314e3b464..db6e0afe3a20 100644
--- a/net/lapb/lapb_iface.c
+++ b/net/lapb/lapb_iface.c
@@ -127,8 +127,8 @@ static struct lapb_cb *lapb_create_cb(void)
skb_queue_head_init(&lapb->write_queue);
skb_queue_head_init(&lapb->ack_queue);
- init_timer(&lapb->t1timer);
- init_timer(&lapb->t2timer);
+ timer_setup(&lapb->t1timer, NULL, 0);
+ timer_setup(&lapb->t2timer, NULL, 0);
lapb->t1 = LAPB_DEFAULT_T1;
lapb->t2 = LAPB_DEFAULT_T2;
diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c
index 1a5535bc3b8d..8bb469cb3abe 100644
--- a/net/lapb/lapb_timer.c
+++ b/net/lapb/lapb_timer.c
@@ -35,15 +35,14 @@
#include <linux/interrupt.h>
#include <net/lapb.h>
-static void lapb_t1timer_expiry(unsigned long);
-static void lapb_t2timer_expiry(unsigned long);
+static void lapb_t1timer_expiry(struct timer_list *);
+static void lapb_t2timer_expiry(struct timer_list *);
void lapb_start_t1timer(struct lapb_cb *lapb)
{
del_timer(&lapb->t1timer);
- lapb->t1timer.data = (unsigned long)lapb;
- lapb->t1timer.function = &lapb_t1timer_expiry;
+ lapb->t1timer.function = (TIMER_FUNC_TYPE)lapb_t1timer_expiry;
lapb->t1timer.expires = jiffies + lapb->t1;
add_timer(&lapb->t1timer);
@@ -53,8 +52,7 @@ void lapb_start_t2timer(struct lapb_cb *lapb)
{
del_timer(&lapb->t2timer);
- lapb->t2timer.data = (unsigned long)lapb;
- lapb->t2timer.function = &lapb_t2timer_expiry;
+ lapb->t2timer.function = (TIMER_FUNC_TYPE)lapb_t2timer_expiry;
lapb->t2timer.expires = jiffies + lapb->t2;
add_timer(&lapb->t2timer);
@@ -75,9 +73,9 @@ int lapb_t1timer_running(struct lapb_cb *lapb)
return timer_pending(&lapb->t1timer);
}
-static void lapb_t2timer_expiry(unsigned long param)
+static void lapb_t2timer_expiry(struct timer_list *t)
{
- struct lapb_cb *lapb = (struct lapb_cb *)param;
+ struct lapb_cb *lapb = from_timer(lapb, t, t2timer);
if (lapb->condition & LAPB_ACK_PENDING_CONDITION) {
lapb->condition &= ~LAPB_ACK_PENDING_CONDITION;
@@ -85,9 +83,9 @@ static void lapb_t2timer_expiry(unsigned long param)
}
}
-static void lapb_t1timer_expiry(unsigned long param)
+static void lapb_t1timer_expiry(struct timer_list *t)
{
- struct lapb_cb *lapb = (struct lapb_cb *)param;
+ struct lapb_cb *lapb = from_timer(lapb, t, t1timer);
switch (lapb->state) {
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 282912245938..80f25ff2f24b 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -6,6 +6,7 @@ mac80211-y := \
driver-ops.o \
sta_info.o \
wep.o \
+ aead_api.o \
wpa.o \
scan.o offchannel.o \
ht.o agg-tx.o agg-rx.o \
@@ -15,8 +16,6 @@ mac80211-y := \
rate.o \
michael.o \
tkip.o \
- aes_ccm.o \
- aes_gcm.o \
aes_cmac.o \
aes_gmac.o \
fils_aead.o \
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aead_api.c
index a4e0d59a40dd..160f9df30402 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aead_api.c
@@ -1,6 +1,7 @@
/*
* Copyright 2003-2004, Instant802 Networks, Inc.
* Copyright 2005-2006, Devicescape Software, Inc.
+ * Copyright 2014-2015, Qualcomm Atheros, Inc.
*
* Rewrite: Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
*
@@ -12,30 +13,29 @@
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/err.h>
+#include <linux/scatterlist.h>
#include <crypto/aead.h>
-#include <net/mac80211.h>
-#include "key.h"
-#include "aes_ccm.h"
+#include "aead_api.h"
-int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len)
+int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
+ u8 *data, size_t data_len, u8 *mic)
{
+ size_t mic_len = crypto_aead_authsize(tfm);
struct scatterlist sg[3];
struct aead_request *aead_req;
int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
u8 *__aad;
- aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC);
+ aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC);
if (!aead_req)
return -ENOMEM;
__aad = (u8 *)aead_req + reqsize;
- memcpy(__aad, aad, CCM_AAD_LEN);
+ memcpy(__aad, aad, aad_len);
sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
+ sg_set_buf(&sg[0], __aad, aad_len);
sg_set_buf(&sg[1], data, data_len);
sg_set_buf(&sg[2], mic, mic_len);
@@ -49,10 +49,10 @@ int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
return 0;
}
-int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len)
+int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
+ u8 *data, size_t data_len, u8 *mic)
{
+ size_t mic_len = crypto_aead_authsize(tfm);
struct scatterlist sg[3];
struct aead_request *aead_req;
int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
@@ -62,15 +62,15 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
if (data_len == 0)
return -EINVAL;
- aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC);
+ aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC);
if (!aead_req)
return -ENOMEM;
__aad = (u8 *)aead_req + reqsize;
- memcpy(__aad, aad, CCM_AAD_LEN);
+ memcpy(__aad, aad, aad_len);
sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
+ sg_set_buf(&sg[0], __aad, aad_len);
sg_set_buf(&sg[1], data, data_len);
sg_set_buf(&sg[2], mic, mic_len);
@@ -84,14 +84,14 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
return err;
}
-struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
- size_t key_len,
- size_t mic_len)
+struct crypto_aead *
+aead_key_setup_encrypt(const char *alg, const u8 key[],
+ size_t key_len, size_t mic_len)
{
struct crypto_aead *tfm;
int err;
- tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC);
+ tfm = crypto_alloc_aead(alg, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm))
return tfm;
@@ -109,7 +109,7 @@ free_aead:
return ERR_PTR(err);
}
-void ieee80211_aes_key_free(struct crypto_aead *tfm)
+void aead_key_free(struct crypto_aead *tfm)
{
crypto_free_aead(tfm);
}
diff --git a/net/mac80211/aead_api.h b/net/mac80211/aead_api.h
new file mode 100644
index 000000000000..5e39ea843bbf
--- /dev/null
+++ b/net/mac80211/aead_api.h
@@ -0,0 +1,27 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _AEAD_API_H
+#define _AEAD_API_H
+
+#include <crypto/aead.h>
+#include <linux/crypto.h>
+
+struct crypto_aead *
+aead_key_setup_encrypt(const char *alg, const u8 key[],
+ size_t key_len, size_t mic_len);
+
+int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+ size_t aad_len, u8 *data,
+ size_t data_len, u8 *mic);
+
+int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
+ size_t aad_len, u8 *data,
+ size_t data_len, u8 *mic);
+
+void aead_key_free(struct crypto_aead *tfm);
+
+#endif /* _AEAD_API_H */
diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h
index fcd3254c5cf0..e9b7ca0bde5b 100644
--- a/net/mac80211/aes_ccm.h
+++ b/net/mac80211/aes_ccm.h
@@ -10,19 +10,39 @@
#ifndef AES_CCM_H
#define AES_CCM_H
-#include <linux/crypto.h>
+#include "aead_api.h"
#define CCM_AAD_LEN 32
-struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[],
- size_t key_len,
- size_t mic_len);
-int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len);
-int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic,
- size_t mic_len);
-void ieee80211_aes_key_free(struct crypto_aead *tfm);
+static inline struct crypto_aead *
+ieee80211_aes_key_setup_encrypt(const u8 key[], size_t key_len, size_t mic_len)
+{
+ return aead_key_setup_encrypt("ccm(aes)", key, key_len, mic_len);
+}
+
+static inline int
+ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm,
+ u8 *b_0, u8 *aad, u8 *data,
+ size_t data_len, u8 *mic)
+{
+ return aead_encrypt(tfm, b_0, aad + 2,
+ be16_to_cpup((__be16 *)aad),
+ data, data_len, mic);
+}
+
+static inline int
+ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm,
+ u8 *b_0, u8 *aad, u8 *data,
+ size_t data_len, u8 *mic)
+{
+ return aead_decrypt(tfm, b_0, aad + 2,
+ be16_to_cpup((__be16 *)aad),
+ data, data_len, mic);
+}
+
+static inline void ieee80211_aes_key_free(struct crypto_aead *tfm)
+{
+ return aead_key_free(tfm);
+}
#endif /* AES_CCM_H */
diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c
deleted file mode 100644
index 8a4397cc1b08..000000000000
--- a/net/mac80211/aes_gcm.c
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright 2014-2015, Qualcomm Atheros, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/err.h>
-#include <crypto/aead.h>
-
-#include <net/mac80211.h>
-#include "key.h"
-#include "aes_gcm.h"
-
-int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic)
-{
- struct scatterlist sg[3];
- struct aead_request *aead_req;
- int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
- u8 *__aad;
-
- aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
- if (!aead_req)
- return -ENOMEM;
-
- __aad = (u8 *)aead_req + reqsize;
- memcpy(__aad, aad, GCM_AAD_LEN);
-
- sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
- sg_set_buf(&sg[1], data, data_len);
- sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
-
- aead_request_set_tfm(aead_req, tfm);
- aead_request_set_crypt(aead_req, sg, sg, data_len, j_0);
- aead_request_set_ad(aead_req, sg[0].length);
-
- crypto_aead_encrypt(aead_req);
- kzfree(aead_req);
- return 0;
-}
-
-int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic)
-{
- struct scatterlist sg[3];
- struct aead_request *aead_req;
- int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
- u8 *__aad;
- int err;
-
- if (data_len == 0)
- return -EINVAL;
-
- aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC);
- if (!aead_req)
- return -ENOMEM;
-
- __aad = (u8 *)aead_req + reqsize;
- memcpy(__aad, aad, GCM_AAD_LEN);
-
- sg_init_table(sg, 3);
- sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad));
- sg_set_buf(&sg[1], data, data_len);
- sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN);
-
- aead_request_set_tfm(aead_req, tfm);
- aead_request_set_crypt(aead_req, sg, sg,
- data_len + IEEE80211_GCMP_MIC_LEN, j_0);
- aead_request_set_ad(aead_req, sg[0].length);
-
- err = crypto_aead_decrypt(aead_req);
- kzfree(aead_req);
-
- return err;
-}
-
-struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
- size_t key_len)
-{
- struct crypto_aead *tfm;
- int err;
-
- tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm))
- return tfm;
-
- err = crypto_aead_setkey(tfm, key, key_len);
- if (err)
- goto free_aead;
- err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN);
- if (err)
- goto free_aead;
-
- return tfm;
-
-free_aead:
- crypto_free_aead(tfm);
- return ERR_PTR(err);
-}
-
-void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
-{
- crypto_free_aead(tfm);
-}
diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h
index 55aed5352494..d2b096033009 100644
--- a/net/mac80211/aes_gcm.h
+++ b/net/mac80211/aes_gcm.h
@@ -9,16 +9,38 @@
#ifndef AES_GCM_H
#define AES_GCM_H
-#include <linux/crypto.h>
+#include "aead_api.h"
#define GCM_AAD_LEN 32
-int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic);
-int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
- u8 *data, size_t data_len, u8 *mic);
-struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
- size_t key_len);
-void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm);
+static inline int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm,
+ u8 *j_0, u8 *aad, u8 *data,
+ size_t data_len, u8 *mic)
+{
+ return aead_encrypt(tfm, j_0, aad + 2,
+ be16_to_cpup((__be16 *)aad),
+ data, data_len, mic);
+}
+
+static inline int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm,
+ u8 *j_0, u8 *aad, u8 *data,
+ size_t data_len, u8 *mic)
+{
+ return aead_decrypt(tfm, j_0, aad + 2,
+ be16_to_cpup((__be16 *)aad),
+ data, data_len, mic);
+}
+
+static inline struct crypto_aead *
+ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], size_t key_len)
+{
+ return aead_key_setup_encrypt("gcm(aes)", key,
+ key_len, IEEE80211_GCMP_MIC_LEN);
+}
+
+static inline void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
+{
+ return aead_key_free(tfm);
+}
#endif /* AES_GCM_H */
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 2849a1fc41c5..88cc1ae935ea 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -459,7 +459,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
}
void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
- const u8 *addr, unsigned int bit)
+ const u8 *addr, unsigned int tid)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
struct ieee80211_local *local = sdata->local;
@@ -470,7 +470,7 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
if (!sta)
goto unlock;
- set_bit(bit, sta->ampdu_mlme.tid_rx_manage_offl);
+ set_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl);
ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
unlock:
rcu_read_unlock();
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index d6d0b4201e40..41f5e48f8021 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -290,13 +290,15 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta,
{
int i;
+ mutex_lock(&sta->ampdu_mlme.mtx);
for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
- __ieee80211_stop_tx_ba_session(sta, i, reason);
- __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
- WLAN_REASON_QSTA_LEAVE_QBSS,
- reason != AGG_STOP_DESTROY_STA &&
- reason != AGG_STOP_PEER_REQUEST);
+ ___ieee80211_stop_tx_ba_session(sta, i, reason);
+ ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
+ WLAN_REASON_QSTA_LEAVE_QBSS,
+ reason != AGG_STOP_DESTROY_STA &&
+ reason != AGG_STOP_PEER_REQUEST);
}
+ mutex_unlock(&sta->ampdu_mlme.mtx);
/* stopping might queue the work again - so cancel only afterwards */
cancel_work_sync(&sta->ampdu_mlme.work);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9675814f64db..68f874e73561 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2009,6 +2009,8 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
struct txq_info *txq, int tid);
void ieee80211_txq_purge(struct ieee80211_local *local,
struct txq_info *txqi);
+void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
u16 transaction, u16 auth_alg, u16 status,
const u8 *extra, size_t extra_len, const u8 *bssid,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index f75029abf728..13b16f90e1cf 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -793,9 +793,7 @@ static int ieee80211_open(struct net_device *dev)
static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
bool going_down)
{
- struct ieee80211_sub_if_data *txq_sdata = sdata;
struct ieee80211_local *local = sdata->local;
- struct fq *fq = &local->fq;
unsigned long flags;
struct sk_buff *skb, *tmp;
u32 hw_reconf_flags = 0;
@@ -939,9 +937,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP_VLAN:
- txq_sdata = container_of(sdata->bss,
- struct ieee80211_sub_if_data, u.ap);
-
mutex_lock(&local->mtx);
list_del(&sdata->u.vlan.list);
mutex_unlock(&local->mtx);
@@ -998,8 +993,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
skb_queue_purge(&sdata->skb_queue);
}
- sdata->bss = NULL;
-
spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
for (i = 0; i < IEEE80211_MAX_QUEUES; i++) {
skb_queue_walk_safe(&local->pending[i], skb, tmp) {
@@ -1012,22 +1005,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
}
spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
- if (txq_sdata->vif.txq) {
- struct txq_info *txqi = to_txq_info(txq_sdata->vif.txq);
-
- /*
- * FIXME FIXME
- *
- * We really shouldn't purge the *entire* txqi since that
- * contains frames for the other AP_VLANs (and possibly
- * the AP itself) as well, but there's no API in FQ now
- * to be able to filter.
- */
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ ieee80211_txq_remove_vlan(local, sdata);
- spin_lock_bh(&fq->lock);
- ieee80211_txq_purge(local, txqi);
- spin_unlock_bh(&fq->lock);
- }
+ sdata->bss = NULL;
if (local->open_count == 0)
ieee80211_clear_tx_pending(local);
@@ -1772,7 +1753,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
sizeof(void *));
int txq_size = 0;
- if (local->ops->wake_tx_queue)
+ if (local->ops->wake_tx_queue &&
+ type != NL80211_IFTYPE_AP_VLAN &&
+ type != NL80211_IFTYPE_MONITOR)
txq_size += sizeof(struct txq_info) +
local->hw.txq_data_size;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index a550c707cd8a..7a76c4a6df30 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -675,8 +675,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
enum nl80211_band band;
u8 *pos;
struct ieee80211_sub_if_data *sdata;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) +
- sizeof(mgmt->u.beacon);
+ int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon);
sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh);
rcu_read_lock();
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 7e5f271e3c30..465b7853edc0 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -275,6 +275,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
u8 *hw_addr, struct ieee802_11_elems *ie);
bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
+void mesh_plink_timer(struct timer_list *t);
void mesh_plink_broken(struct sta_info *sta);
u32 mesh_plink_deactivate(struct sta_info *sta);
u32 mesh_plink_open(struct sta_info *sta);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index d8bbd0d2225a..146ec6c0f12f 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -111,8 +111,8 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
u8 *pos, ie_len;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) +
- sizeof(mgmt->u.action.u.mesh_action);
+ int hdr_len = offsetofend(struct ieee80211_mgmt,
+ u.action.u.mesh_action);
skb = dev_alloc_skb(local->tx_headroom +
hdr_len +
@@ -242,8 +242,8 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct ieee80211_mgmt *mgmt;
u8 *pos, ie_len;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) +
- sizeof(mgmt->u.action.u.mesh_action);
+ int hdr_len = offsetofend(struct ieee80211_mgmt,
+ u.action.u.mesh_action);
if (time_before(jiffies, ifmsh->next_perr))
return -EAGAIN;
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index f69c6c38ca43..e2d00cce3c17 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -220,8 +220,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
bool include_plid = false;
u16 peering_proto = 0;
u8 *pos, ie_len = 4;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) +
- sizeof(mgmt->u.action.u.self_prot);
+ int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot);
int err = -ENOMEM;
skb = dev_alloc_skb(local->tx_headroom +
@@ -604,8 +603,9 @@ out:
ieee80211_mbss_info_change_notify(sdata, changed);
}
-static void mesh_plink_timer(unsigned long data)
+void mesh_plink_timer(struct timer_list *t)
{
+ struct mesh_sta *mesh = from_timer(mesh, t, plink_timer);
struct sta_info *sta;
u16 reason = 0;
struct ieee80211_sub_if_data *sdata;
@@ -617,7 +617,7 @@ static void mesh_plink_timer(unsigned long data)
* del_timer_sync() this timer after having made sure
* it cannot be readded (by deleting the plink.)
*/
- sta = (struct sta_info *) data;
+ sta = mesh->plink_sta;
if (sta->sdata->local->quiescing)
return;
@@ -697,11 +697,8 @@ static void mesh_plink_timer(unsigned long data)
static inline void mesh_plink_timer_set(struct sta_info *sta, u32 timeout)
{
- sta->mesh->plink_timer.expires = jiffies + msecs_to_jiffies(timeout);
- sta->mesh->plink_timer.data = (unsigned long) sta;
- sta->mesh->plink_timer.function = mesh_plink_timer;
sta->mesh->plink_timeout = timeout;
- add_timer(&sta->mesh->plink_timer);
+ mod_timer(&sta->mesh->plink_timer, jiffies + msecs_to_jiffies(timeout));
}
static bool llid_in_use(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 3b8e2709d8de..e4ededa1909d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -145,7 +145,6 @@ static u32
ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
struct ieee80211_channel *channel,
- const struct ieee80211_ht_cap *ht_cap,
const struct ieee80211_ht_operation *ht_oper,
const struct ieee80211_vht_operation *vht_oper,
struct cfg80211_chan_def *chandef, bool tracking)
@@ -163,20 +162,13 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
chandef->center_freq1 = channel->center_freq;
chandef->center_freq2 = 0;
- if (!ht_cap || !ht_oper || !sta_ht_cap.ht_supported) {
+ if (!ht_oper || !sta_ht_cap.ht_supported) {
ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
goto out;
}
chandef->width = NL80211_CHAN_WIDTH_20;
- if (!(ht_cap->cap_info &
- cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40))) {
- ret = IEEE80211_STA_DISABLE_40MHZ;
- vht_chandef = *chandef;
- goto out;
- }
-
ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
channel->band);
/* check that channel matches the right operating channel */
@@ -344,7 +336,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
/* calculate new channel (type) based on HT/VHT operation IEs */
flags = ieee80211_determine_chantype(sdata, sband, chan,
- ht_cap, ht_oper, vht_oper,
+ ht_oper, vht_oper,
&chandef, true);
/*
@@ -780,11 +772,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
WLAN_EID_HT_CAPABILITY,
WLAN_EID_BSS_COEX_2040,
+ /* luckily this is almost always there */
WLAN_EID_EXT_CAPABILITY,
WLAN_EID_QOS_TRAFFIC_CAPA,
WLAN_EID_TIM_BCAST_REQ,
WLAN_EID_INTERWORKING,
- /* 60GHz doesn't happen right now */
+ /* 60 GHz (Multi-band, DMG, MMS) can't happen */
WLAN_EID_VHT_CAPABILITY,
WLAN_EID_OPMODE_NOTIF,
};
@@ -811,22 +804,16 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
/* if present, add any custom IEs that go before VHT */
if (assoc_data->ie_len) {
static const u8 before_vht[] = {
- WLAN_EID_SSID,
- WLAN_EID_SUPP_RATES,
- WLAN_EID_EXT_SUPP_RATES,
- WLAN_EID_PWR_CAPABILITY,
- WLAN_EID_SUPPORTED_CHANNELS,
- WLAN_EID_RSN,
- WLAN_EID_QOS_CAPA,
- WLAN_EID_RRM_ENABLED_CAPABILITIES,
- WLAN_EID_MOBILITY_DOMAIN,
- WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
- WLAN_EID_HT_CAPABILITY,
+ /*
+ * no need to list the ones split off before HT
+ * or generated here
+ */
WLAN_EID_BSS_COEX_2040,
WLAN_EID_EXT_CAPABILITY,
WLAN_EID_QOS_TRAFFIC_CAPA,
WLAN_EID_TIM_BCAST_REQ,
WLAN_EID_INTERWORKING,
+ /* 60 GHz (Multi-band, DMG, MMS) can't happen */
};
/* RIC already taken above, so no need to handle here anymore */
@@ -4317,7 +4304,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
cbss->channel,
- ht_cap, ht_oper, vht_oper,
+ ht_oper, vht_oper,
&chandef, false);
sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 47d2ed570470..ef2becaade50 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -7,7 +7,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
- * Copyright 2016 Intel Deutschland GmbH
+ * Copyright 2016-2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -183,6 +183,20 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
return bss;
}
+static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
+ u32 scan_flags, const u8 *da)
+{
+ if (!sdata)
+ return false;
+ /* accept broadcast for OCE */
+ if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP &&
+ is_broadcast_ether_addr(da))
+ return true;
+ if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
+ return true;
+ return ether_addr_equal(da, sdata->vif.addr);
+}
+
void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
{
struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
@@ -208,19 +222,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
if (ieee80211_is_probe_resp(mgmt->frame_control)) {
struct cfg80211_scan_request *scan_req;
struct cfg80211_sched_scan_request *sched_scan_req;
+ u32 scan_req_flags = 0, sched_scan_req_flags = 0;
scan_req = rcu_dereference(local->scan_req);
sched_scan_req = rcu_dereference(local->sched_scan_req);
- /* ignore ProbeResp to foreign address unless scanning
- * with randomised address
+ if (scan_req)
+ scan_req_flags = scan_req->flags;
+
+ if (sched_scan_req)
+ sched_scan_req_flags = sched_scan_req->flags;
+
+ /* ignore ProbeResp to foreign address or non-bcast (OCE)
+ * unless scanning with randomised address
*/
- if (!(sdata1 &&
- (ether_addr_equal(mgmt->da, sdata1->vif.addr) ||
- scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)) &&
- !(sdata2 &&
- (ether_addr_equal(mgmt->da, sdata2->vif.addr) ||
- sched_scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)))
+ if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags,
+ mgmt->da) &&
+ !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags,
+ mgmt->da))
return;
elements = mgmt->u.probe_resp.variable;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 69615016d5bf..9673e157bf8f 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -329,10 +329,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta->mesh = kzalloc(sizeof(*sta->mesh), gfp);
if (!sta->mesh)
goto free;
+ sta->mesh->plink_sta = sta;
spin_lock_init(&sta->mesh->plink_lock);
if (ieee80211_vif_is_mesh(&sdata->vif) &&
!sdata->u.mesh.user_mpm)
- init_timer(&sta->mesh->plink_timer);
+ timer_setup(&sta->mesh->plink_timer, mesh_plink_timer,
+ 0);
sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
}
#endif
@@ -515,6 +517,31 @@ static int sta_info_insert_drv_state(struct ieee80211_local *local,
return err;
}
+static void
+ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ bool allow_p2p_go_ps = sdata->vif.p2p;
+ struct sta_info *sta;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ if (sdata != sta->sdata ||
+ !test_sta_flag(sta, WLAN_STA_ASSOC))
+ continue;
+ if (!sta->sta.support_p2p_ps) {
+ allow_p2p_go_ps = false;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
+ sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
+ ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
+ }
+}
+
/*
* should be called with sta_mtx locked
* this function replaces the mutex lock
@@ -561,6 +588,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
goto out_remove;
set_sta_flag(sta, WLAN_STA_INSERTED);
+
+ if (sta->sta_state >= IEEE80211_STA_ASSOC) {
+ ieee80211_recalc_min_chandef(sta->sdata);
+ if (!sta->sta.support_p2p_ps)
+ ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
+ }
+
/* accept BA sessions now */
clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
@@ -1788,31 +1822,6 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta,
}
EXPORT_SYMBOL(ieee80211_sta_set_buffered);
-static void
-ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata)
-{
- struct ieee80211_local *local = sdata->local;
- bool allow_p2p_go_ps = sdata->vif.p2p;
- struct sta_info *sta;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sta, &local->sta_list, list) {
- if (sdata != sta->sdata ||
- !test_sta_flag(sta, WLAN_STA_ASSOC))
- continue;
- if (!sta->sta.support_p2p_ps) {
- allow_p2p_go_ps = false;
- break;
- }
- }
- rcu_read_unlock();
-
- if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) {
- sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps;
- ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS);
- }
-}
-
int sta_info_move_state(struct sta_info *sta,
enum ieee80211_sta_state new_state)
{
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 3acbdfa9f649..5c54acd10562 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -344,6 +344,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8)
* @plink_state: peer link state
* @plink_timeout: timeout of peer link
* @plink_timer: peer link watch timer
+ * @plink_sta: peer link watch timer's sta_info
* @t_offset: timing offset relative to this host
* @t_offset_setpoint: reference timing offset of this sta to be used when
* calculating clockdrift
@@ -356,6 +357,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8)
*/
struct mesh_sta {
struct timer_list plink_timer;
+ struct sta_info *plink_sta;
s64 t_offset;
s64 t_offset_setpoint;
@@ -398,7 +400,7 @@ struct ieee80211_sta_rx_stats {
u64 msdu[IEEE80211_NUM_TIDS + 1];
};
-/**
+/*
* The bandwidth threshold below which the per-station CoDel parameters will be
* scaled to be more lenient (to prevent starvation of slow stations). This
* value will be scaled by the number of active stations when it is being
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 94826680cf2b..7b8154474b9e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1396,6 +1396,40 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
fq_flow_get_default_func);
}
+static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin,
+ struct fq_flow *flow, struct sk_buff *skb,
+ void *data)
+{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+
+ return info->control.vif == data;
+}
+
+void ieee80211_txq_remove_vlan(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ struct fq *fq = &local->fq;
+ struct txq_info *txqi;
+ struct fq_tin *tin;
+ struct ieee80211_sub_if_data *ap;
+
+ if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP_VLAN))
+ return;
+
+ ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap);
+
+ if (!ap->vif.txq)
+ return;
+
+ txqi = to_txq_info(ap->vif.txq);
+ tin = &txqi->tin;
+
+ spin_lock_bh(&fq->lock);
+ fq_tin_filter(fq, tin, fq_vlan_filter_func, &sdata->vif,
+ fq_skb_free_func);
+ spin_unlock_bh(&fq->lock);
+}
+
void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
struct txq_info *txqi, int tid)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 6aef6793d052..d57e5f6bd8b6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1392,10 +1392,10 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
/* insert custom IEs that go before HT */
if (ie && ie_len) {
static const u8 before_ht[] = {
- WLAN_EID_SSID,
- WLAN_EID_SUPP_RATES,
- WLAN_EID_REQUEST,
- WLAN_EID_EXT_SUPP_RATES,
+ /*
+ * no need to list the ones split off already
+ * (or generated here)
+ */
WLAN_EID_DS_PARAMS,
WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
};
@@ -1424,20 +1424,17 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
/* insert custom IEs that go before VHT */
if (ie && ie_len) {
static const u8 before_vht[] = {
- WLAN_EID_SSID,
- WLAN_EID_SUPP_RATES,
- WLAN_EID_REQUEST,
- WLAN_EID_EXT_SUPP_RATES,
- WLAN_EID_DS_PARAMS,
- WLAN_EID_SUPPORTED_REGULATORY_CLASSES,
- WLAN_EID_HT_CAPABILITY,
+ /*
+ * no need to list the ones split off already
+ * (or generated here)
+ */
WLAN_EID_BSS_COEX_2040,
WLAN_EID_EXT_CAPABILITY,
WLAN_EID_SSID_LIST,
WLAN_EID_CHANNEL_USAGE,
WLAN_EID_INTERWORKING,
WLAN_EID_MESH_ID,
- /* 60 GHz can't happen here right now */
+ /* 60 GHz (Multi-band, DMG, MMS) can't happen */
};
noffset = ieee80211_ie_split(ie, ie_len,
before_vht, ARRAY_SIZE(before_vht),
@@ -2980,8 +2977,8 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt;
struct ieee80211_local *local = sdata->local;
int freq;
- int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch) +
- sizeof(mgmt->u.action.u.chan_switch);
+ int hdr_len = offsetofend(struct ieee80211_mgmt,
+ u.action.u.chan_switch);
u8 *pos;
if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 19ec2189d3ac..b9276ac849fa 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -386,6 +386,16 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
bw = ieee80211_sta_cap_rx_bw(sta);
bw = min(bw, sta->cur_max_bandwidth);
+
+ /* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of
+ * IEEE80211-2016 specification makes higher bandwidth operation
+ * possible on the TDLS link if the peers have wider bandwidth
+ * capability.
+ */
+ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+ test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW))
+ return bw;
+
bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width));
return bw;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 0d722ea98a1b..b58722d9de37 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -464,7 +464,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb,
pos += IEEE80211_CCMP_HDR_LEN;
ccmp_special_blocks(skb, pn, b_0, aad);
return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len,
- skb_put(skb, mic_len), mic_len);
+ skb_put(skb, mic_len));
}
@@ -543,7 +543,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
key->u.ccmp.tfm, b_0, aad,
skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN,
data_len,
- skb->data + skb->len - mic_len, mic_len))
+ skb->data + skb->len - mic_len))
return RX_DROP_UNUSABLE;
}
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 1e1c9b20bab7..2fb703d70803 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -623,13 +623,18 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec,
u8 iv[16];
struct scatterlist src;
SKCIPHER_REQUEST_ON_STACK(req, key->tfm0);
- int err;
+ int err, datalen;
+ unsigned char *data;
llsec_geniv(iv, sec->params.hwaddr, &hdr->sec);
- sg_init_one(&src, skb->data, skb->len);
+ /* Compute data payload offset and data length */
+ data = skb_mac_header(skb) + skb->mac_len;
+ datalen = skb_tail_pointer(skb) - data;
+ sg_init_one(&src, data, datalen);
+
skcipher_request_set_tfm(req, key->tfm0);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, &src, &src, skb->len, iv);
+ skcipher_request_set_crypt(req, &src, &src, datalen, iv);
err = crypto_skcipher_encrypt(req);
skcipher_request_zero(req);
return err;
@@ -713,7 +718,8 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb)
if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA)
return -EINVAL;
- if (!hdr.fc.security_enabled || hdr.sec.level == 0) {
+ if (!hdr.fc.security_enabled ||
+ (hdr.sec.level == IEEE802154_SCF_SECLEVEL_NONE)) {
skb_push(skb, hlen);
return 0;
}
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index 5c467ef97311..801ea9098387 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -24,6 +24,7 @@ config NET_MPLS_GSO
config MPLS_ROUTING
tristate "MPLS: routing support"
+ depends on NET_IP_TUNNEL || NET_IP_TUNNEL=n
---help---
Add support for forwarding of mpls packets.
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index c5b9ce41d66f..8ca9915befc8 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -16,6 +16,7 @@
#include <net/arp.h>
#include <net/ip_fib.h>
#include <net/netevent.h>
+#include <net/ip_tunnels.h>
#include <net/netns/generic.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -39,6 +40,36 @@ static int one = 1;
static int label_limit = (1 << 20) - 1;
static int ttl_max = 255;
+#if IS_ENABLED(CONFIG_NET_IP_TUNNEL)
+static size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e)
+{
+ return sizeof(struct mpls_shim_hdr);
+}
+
+static const struct ip_tunnel_encap_ops mpls_iptun_ops = {
+ .encap_hlen = ipgre_mpls_encap_hlen,
+};
+
+static int ipgre_tunnel_encap_add_mpls_ops(void)
+{
+ return ip_tunnel_encap_add_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
+}
+
+static void ipgre_tunnel_encap_del_mpls_ops(void)
+{
+ ip_tunnel_encap_del_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS);
+}
+#else
+static int ipgre_tunnel_encap_add_mpls_ops(void)
+{
+ return 0;
+}
+
+static void ipgre_tunnel_encap_del_mpls_ops(void)
+{
+}
+#endif
+
static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
struct nlmsghdr *nlh, struct net *net, u32 portid,
unsigned int nlm_flags);
@@ -2485,6 +2516,10 @@ static int __init mpls_init(void)
0);
rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf,
mpls_netconf_dump_devconf, 0);
+ err = ipgre_tunnel_encap_add_mpls_ops();
+ if (err)
+ pr_err("Can't add mpls over gre tunnel ops\n");
+
err = 0;
out:
return err;
@@ -2502,6 +2537,7 @@ static void __exit mpls_exit(void)
dev_remove_pack(&mpls_packet_type);
unregister_netdevice_notifier(&mpls_dev_notifier);
unregister_pernet_subsys(&mpls_net_ops);
+ ipgre_tunnel_encap_del_mpls_ops();
}
module_exit(mpls_exit);
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 8ad2b52a0b32..5ca18f07683b 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -37,11 +37,11 @@
#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
static void
-mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
{
struct mtype *map = set->data;
- setup_timer(&map->gc, gc, (unsigned long)set);
+ timer_setup(&map->gc, gc, 0);
mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
}
@@ -272,10 +272,10 @@ out:
}
static void
-mtype_gc(unsigned long ul_set)
+mtype_gc(struct timer_list *t)
{
- struct ip_set *set = (struct ip_set *)ul_set;
- struct mtype *map = set->data;
+ struct mtype *map = from_timer(map, t, gc);
+ struct ip_set *set = map->set;
void *x;
u32 id;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 4783efff0bde..d8975a0b4282 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -48,6 +48,7 @@ struct bitmap_ip {
size_t memsize; /* members size */
u8 netmask; /* subnet netmask */
struct timer_list gc; /* garbage collection */
+ struct ip_set *set; /* attached to this ip_set */
unsigned char extensions[0] /* data extensions */
__aligned(__alignof__(u64));
};
@@ -232,6 +233,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
map->netmask = netmask;
set->timeout = IPSET_NO_TIMEOUT;
+ map->set = set;
set->data = map;
set->family = NFPROTO_IPV4;
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 9a065f672d3a..4c279fbd2d5d 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -52,6 +52,7 @@ struct bitmap_ipmac {
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collector */
+ struct ip_set *set; /* attached to this ip_set */
unsigned char extensions[0] /* MAC + data extensions */
__aligned(__alignof__(u64));
};
@@ -307,6 +308,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map,
map->elements = elements;
set->timeout = IPSET_NO_TIMEOUT;
+ map->set = set;
set->data = map;
set->family = NFPROTO_IPV4;
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 7f0c733358a4..7f9bbd7c98b5 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -40,6 +40,7 @@ struct bitmap_port {
u32 elements; /* number of max elements in the set */
size_t memsize; /* members size */
struct timer_list gc; /* garbage collection */
+ struct ip_set *set; /* attached to this ip_set */
unsigned char extensions[0] /* data extensions */
__aligned(__alignof__(u64));
};
@@ -214,6 +215,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map,
map->last_port = last_port;
set->timeout = IPSET_NO_TIMEOUT;
+ map->set = set;
set->data = map;
set->family = NFPROTO_UNSPEC;
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 51063d9ed0f7..efffc8eabafe 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -280,6 +280,7 @@ htable_bits(u32 hashsize)
struct htype {
struct htable __rcu *table; /* the hash table */
struct timer_list gc; /* garbage collection when timeout enabled */
+ struct ip_set *set; /* attached to this ip_set */
u32 maxelem; /* max elements in the hash */
u32 initval; /* random jhash init value */
#ifdef IP_SET_HASH_WITH_MARKMASK
@@ -429,11 +430,11 @@ mtype_destroy(struct ip_set *set)
}
static void
-mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
{
struct htype *h = set->data;
- setup_timer(&h->gc, gc, (unsigned long)set);
+ timer_setup(&h->gc, gc, 0);
mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
pr_debug("gc initialized, run in every %u\n",
IPSET_GC_PERIOD(set->timeout));
@@ -526,10 +527,10 @@ mtype_expire(struct ip_set *set, struct htype *h)
}
static void
-mtype_gc(unsigned long ul_set)
+mtype_gc(struct timer_list *t)
{
- struct ip_set *set = (struct ip_set *)ul_set;
- struct htype *h = set->data;
+ struct htype *h = from_timer(h, t, gc);
+ struct ip_set *set = h->set;
pr_debug("called\n");
spin_lock_bh(&set->lock);
@@ -1314,6 +1315,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
t->htable_bits = hbits;
RCU_INIT_POINTER(h->table, t);
+ h->set = set;
set->data = h;
#ifndef IP_SET_PROTO_UNDEF
if (set->family == NFPROTO_IPV4) {
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 178d4eba013b..c9b4e05ad940 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -44,6 +44,7 @@ struct set_adt_elem {
struct list_set {
u32 size; /* size of set list array */
struct timer_list gc; /* garbage collection */
+ struct ip_set *set; /* attached to this ip_set */
struct net *net; /* namespace */
struct list_head members; /* the set members */
};
@@ -571,10 +572,10 @@ static const struct ip_set_type_variant set_variant = {
};
static void
-list_set_gc(unsigned long ul_set)
+list_set_gc(struct timer_list *t)
{
- struct ip_set *set = (struct ip_set *)ul_set;
- struct list_set *map = set->data;
+ struct list_set *map = from_timer(map, t, gc);
+ struct ip_set *set = map->set;
spin_lock_bh(&set->lock);
set_cleanup_entries(set);
@@ -585,11 +586,11 @@ list_set_gc(unsigned long ul_set)
}
static void
-list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
+list_set_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t))
{
struct list_set *map = set->data;
- setup_timer(&map->gc, gc, (unsigned long)set);
+ timer_setup(&map->gc, gc, 0);
mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ);
}
@@ -606,6 +607,7 @@ init_list_set(struct net *net, struct ip_set *set, u32 size)
map->size = size;
map->net = net;
+ map->set = set;
INIT_LIST_HEAD(&map->members);
set->data = map;
diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c
index 29123934887b..041da0d9c06f 100644
--- a/net/netfilter/xt_bpf.c
+++ b/net/netfilter/xt_bpf.c
@@ -56,7 +56,7 @@ static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret)
int retval, fd;
set_fs(KERNEL_DS);
- fd = bpf_obj_get_user(path);
+ fd = bpf_obj_get_user(path, 0);
set_fs(oldfs);
if (fd < 0)
return fd;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index b93148e8e9fb..767c84e10e20 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2336,16 +2336,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
if (!skb) {
- struct sock *sk;
-
- sk = netlink_lookup(sock_net(in_skb->sk),
- in_skb->sk->sk_protocol,
- NETLINK_CB(in_skb).portid);
- if (sk) {
- sk->sk_err = ENOBUFS;
- sk->sk_error_report(sk);
- sock_put(sk);
- }
+ NETLINK_CB(in_skb).sk->sk_err = ENOBUFS;
+ NETLINK_CB(in_skb).sk->sk_error_report(NETLINK_CB(in_skb).sk);
return;
}
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ebf16f7f9089..2dec3583c97d 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -241,9 +241,9 @@ void nr_destroy_socket(struct sock *);
/*
* Handler for deferred kills.
*/
-static void nr_destroy_timer(unsigned long data)
+static void nr_destroy_timer(struct timer_list *t)
{
- struct sock *sk=(struct sock *)data;
+ struct sock *sk = from_timer(sk, t, sk_timer);
bh_lock_sock(sk);
sock_hold(sk);
nr_destroy_socket(sk);
@@ -284,7 +284,7 @@ void nr_destroy_socket(struct sock *sk)
if (sk_has_allocations(sk)) {
/* Defer: outstanding buffers */
- sk->sk_timer.function = nr_destroy_timer;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_destroy_timer;
sk->sk_timer.expires = jiffies + 2 * HZ;
add_timer(&sk->sk_timer);
} else
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 80dbd0beb516..fbfdae452ff9 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -125,7 +125,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
case NR_DISCREQ:
nr_write_internal(sk, NR_DISCACK);
-
+ /* fall through */
case NR_DISCACK:
nr_disconnect(sk, 0);
break;
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 94d05806a9a2..43569aea0f5e 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -29,24 +29,23 @@
#include <linux/interrupt.h>
#include <net/netrom.h>
-static void nr_heartbeat_expiry(unsigned long);
-static void nr_t1timer_expiry(unsigned long);
-static void nr_t2timer_expiry(unsigned long);
-static void nr_t4timer_expiry(unsigned long);
-static void nr_idletimer_expiry(unsigned long);
+static void nr_heartbeat_expiry(struct timer_list *);
+static void nr_t1timer_expiry(struct timer_list *);
+static void nr_t2timer_expiry(struct timer_list *);
+static void nr_t4timer_expiry(struct timer_list *);
+static void nr_idletimer_expiry(struct timer_list *);
void nr_init_timers(struct sock *sk)
{
struct nr_sock *nr = nr_sk(sk);
- setup_timer(&nr->t1timer, nr_t1timer_expiry, (unsigned long)sk);
- setup_timer(&nr->t2timer, nr_t2timer_expiry, (unsigned long)sk);
- setup_timer(&nr->t4timer, nr_t4timer_expiry, (unsigned long)sk);
- setup_timer(&nr->idletimer, nr_idletimer_expiry, (unsigned long)sk);
+ timer_setup(&nr->t1timer, nr_t1timer_expiry, 0);
+ timer_setup(&nr->t2timer, nr_t2timer_expiry, 0);
+ timer_setup(&nr->t4timer, nr_t4timer_expiry, 0);
+ timer_setup(&nr->idletimer, nr_idletimer_expiry, 0);
/* initialized by sock_init_data */
- sk->sk_timer.data = (unsigned long)sk;
- sk->sk_timer.function = &nr_heartbeat_expiry;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_heartbeat_expiry;
}
void nr_start_t1timer(struct sock *sk)
@@ -113,9 +112,9 @@ int nr_t1timer_running(struct sock *sk)
return timer_pending(&nr_sk(sk)->t1timer);
}
-static void nr_heartbeat_expiry(unsigned long param)
+static void nr_heartbeat_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct sock *sk = from_timer(sk, t, sk_timer);
struct nr_sock *nr = nr_sk(sk);
bh_lock_sock(sk);
@@ -152,10 +151,10 @@ static void nr_heartbeat_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void nr_t2timer_expiry(unsigned long param)
+static void nr_t2timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
- struct nr_sock *nr = nr_sk(sk);
+ struct nr_sock *nr = from_timer(nr, t, t2timer);
+ struct sock *sk = &nr->sock;
bh_lock_sock(sk);
if (nr->condition & NR_COND_ACK_PENDING) {
@@ -165,19 +164,20 @@ static void nr_t2timer_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void nr_t4timer_expiry(unsigned long param)
+static void nr_t4timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct nr_sock *nr = from_timer(nr, t, t4timer);
+ struct sock *sk = &nr->sock;
bh_lock_sock(sk);
nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY;
bh_unlock_sock(sk);
}
-static void nr_idletimer_expiry(unsigned long param)
+static void nr_idletimer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
- struct nr_sock *nr = nr_sk(sk);
+ struct nr_sock *nr = from_timer(nr, t, idletimer);
+ struct sock *sk = &nr->sock;
bh_lock_sock(sk);
@@ -202,10 +202,10 @@ static void nr_idletimer_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void nr_t1timer_expiry(unsigned long param)
+static void nr_t1timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
- struct nr_sock *nr = nr_sk(sk);
+ struct nr_sock *nr = from_timer(nr, t, t1timer);
+ struct sock *sk = &nr->sock;
bh_lock_sock(sk);
switch (nr->state) {
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 5cf33df888c3..e5e23c2cbe74 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -1094,9 +1094,8 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops,
dev->targets_generation = 1;
if (ops->check_presence) {
- init_timer(&dev->check_pres_timer);
- dev->check_pres_timer.data = (unsigned long)dev;
- dev->check_pres_timer.function = nfc_check_pres_timeout;
+ setup_timer(&dev->check_pres_timer, nfc_check_pres_timeout,
+ (unsigned long)dev);
INIT_WORK(&dev->check_pres_work, nfc_check_pres_work);
}
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index b740fef0acc5..a8a6e7814e09 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -1004,9 +1004,8 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev)
INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work);
- init_timer(&hdev->cmd_timer);
- hdev->cmd_timer.data = (unsigned long)hdev;
- hdev->cmd_timer.function = nfc_hci_cmd_timeout;
+ setup_timer(&hdev->cmd_timer, nfc_hci_cmd_timeout,
+ (unsigned long)hdev);
skb_queue_head_init(&hdev->rx_hcp_frags);
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index 17e59a009ce6..58df37eae1e8 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -763,17 +763,14 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
mutex_init(&shdlc->state_mutex);
shdlc->state = SHDLC_DISCONNECTED;
- init_timer(&shdlc->connect_timer);
- shdlc->connect_timer.data = (unsigned long)shdlc;
- shdlc->connect_timer.function = llc_shdlc_connect_timeout;
+ setup_timer(&shdlc->connect_timer, llc_shdlc_connect_timeout,
+ (unsigned long)shdlc);
- init_timer(&shdlc->t1_timer);
- shdlc->t1_timer.data = (unsigned long)shdlc;
- shdlc->t1_timer.function = llc_shdlc_t1_timeout;
+ setup_timer(&shdlc->t1_timer, llc_shdlc_t1_timeout,
+ (unsigned long)shdlc);
- init_timer(&shdlc->t2_timer);
- shdlc->t2_timer.data = (unsigned long)shdlc;
- shdlc->t2_timer.function = llc_shdlc_t2_timeout;
+ setup_timer(&shdlc->t2_timer, llc_shdlc_t2_timeout,
+ (unsigned long)shdlc);
shdlc->w = SHDLC_MAX_WINDOW;
shdlc->srej_support = SHDLC_SREJ_SUPPORT;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 02eef5cf3cce..7988185072e5 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -1573,9 +1573,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
INIT_LIST_HEAD(&local->list);
kref_init(&local->ref);
mutex_init(&local->sdp_lock);
- init_timer(&local->link_timer);
- local->link_timer.data = (unsigned long) local;
- local->link_timer.function = nfc_llcp_symm_timer;
+ setup_timer(&local->link_timer, nfc_llcp_symm_timer,
+ (unsigned long)local);
skb_queue_head_init(&local->tx_queue);
INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
@@ -1601,9 +1600,8 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
mutex_init(&local->sdreq_lock);
INIT_HLIST_HEAD(&local->pending_sdreqs);
- init_timer(&local->sdreq_timer);
- local->sdreq_timer.data = (unsigned long) local;
- local->sdreq_timer.function = nfc_llcp_sdreq_timer;
+ setup_timer(&local->sdreq_timer, nfc_llcp_sdreq_timer,
+ (unsigned long)local);
INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work);
list_add(&local->list, &llcp_devices);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index a54a556fcdb5..a551232daf61 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1203,6 +1203,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
return err == -EINPROGRESS ? 0 : err;
break;
+ case OVS_ACTION_ATTR_CT_CLEAR:
+ err = ovs_ct_clear(skb, key);
+ break;
+
case OVS_ACTION_ATTR_PUSH_ETH:
err = push_eth(skb, key, nla_data(a));
break;
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index d558e882ca0c..b27c5c6d9cab 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -752,6 +752,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct,
}
}
/* Non-ICMP, fall thru to initialize if needed. */
+ /* fall through */
case IP_CT_NEW:
/* Seen it before? This can happen for loopback, retrans,
* or local packets.
@@ -1129,6 +1130,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
return err;
}
+int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
+{
+ if (skb_nfct(skb)) {
+ nf_conntrack_put(skb_nfct(skb));
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+ ovs_ct_fill_key(skb, key);
+ }
+
+ return 0;
+}
+
static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name,
const struct sw_flow_key *key, bool log)
{
diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h
index bc7efd1867ab..399dfdd2c4f9 100644
--- a/net/openvswitch/conntrack.h
+++ b/net/openvswitch/conntrack.h
@@ -30,6 +30,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *);
int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
const struct ovs_conntrack_info *);
+int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key);
void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
int ovs_ct_put_key(const struct sw_flow_key *swkey,
@@ -73,6 +74,12 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb,
return -ENOTSUPP;
}
+static inline int ovs_ct_clear(struct sk_buff *skb,
+ struct sw_flow_key *key)
+{
+ return -ENOTSUPP;
+}
+
static inline void ovs_ct_fill_key(const struct sk_buff *skb,
struct sw_flow_key *key)
{
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index e8eb427ce6d1..dc0d79092e74 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,7 @@
#include <net/ndisc.h>
#include <net/mpls.h>
#include <net/vxlan.h>
+#include <net/erspan.h>
#include "flow_netlink.h"
@@ -75,6 +76,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
break;
case OVS_ACTION_ATTR_CT:
+ case OVS_ACTION_ATTR_CT_CLEAR:
case OVS_ACTION_ATTR_HASH:
case OVS_ACTION_ATTR_POP_ETH:
case OVS_ACTION_ATTR_POP_MPLS:
@@ -319,7 +321,8 @@ size_t ovs_tun_key_attr_size(void)
* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
*/
+ nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
- + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */
+ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_DST */
+ + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
}
size_t ovs_key_attr_size(void)
@@ -371,6 +374,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
.next = ovs_vxlan_ext_key_lens },
[OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
[OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
+ [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) },
};
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
@@ -593,6 +597,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
return 0;
}
+static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
+{
+ unsigned long opt_key_offset;
+ struct erspan_metadata opts;
+
+ BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
+
+ memset(&opts, 0, sizeof(opts));
+ opts.index = nla_get_be32(attr);
+
+ /* Index has only 20-bit */
+ if (ntohl(opts.index) & ~INDEX_MASK) {
+ OVS_NLERR(log, "ERSPAN index number %x too large.",
+ ntohl(opts.index));
+ return -EINVAL;
+ }
+
+ SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask);
+ opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
+ SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
+ is_mask);
+
+ return 0;
+}
+
static int ip_tun_from_nlattr(const struct nlattr *attr,
struct sw_flow_match *match, bool is_mask,
bool log)
@@ -700,6 +731,19 @@ static int ip_tun_from_nlattr(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_PAD:
break;
+ case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+ if (opts_type) {
+ OVS_NLERR(log, "Multiple metadata blocks provided");
+ return -EINVAL;
+ }
+
+ err = erspan_tun_opt_from_nlattr(a, match, is_mask, log);
+ if (err)
+ return err;
+
+ tun_flags |= TUNNEL_ERSPAN_OPT;
+ opts_type = type;
+ break;
default:
OVS_NLERR(log, "Unknown IP tunnel attribute %d",
type);
@@ -824,6 +868,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb,
else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
return -EMSGSIZE;
+ else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
+ nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
+ ((struct erspan_metadata *)tun_opts)->index))
+ return -EMSGSIZE;
}
return 0;
@@ -2195,6 +2243,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
break;
case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
break;
+ case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
+ break;
}
};
@@ -2479,6 +2529,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
[OVS_ACTION_ATTR_CT] = (u32)-1,
+ [OVS_ACTION_ATTR_CT_CLEAR] = 0,
[OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
[OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
[OVS_ACTION_ATTR_POP_ETH] = 0,
@@ -2620,6 +2671,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
skip_copy = true;
break;
+ case OVS_ACTION_ATTR_CT_CLEAR:
+ break;
+
case OVS_ACTION_ATTR_PUSH_ETH:
/* Disallow pushing an Ethernet header if one
* is already present */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 0389398fa4ab..2e5e7a41d8ef 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -108,7 +108,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name)
rtnl_lock();
err = netdev_master_upper_dev_link(vport->dev,
- get_dpdev(vport->dp), NULL, NULL);
+ get_dpdev(vport->dp),
+ NULL, NULL, NULL);
if (err)
goto error_unlock;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2986941164b1..4f4fa323171d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -544,9 +544,7 @@ static void prb_init_blk_timer(struct packet_sock *po,
struct tpacket_kbdq_core *pkc,
void (*func) (unsigned long))
{
- init_timer(&pkc->retire_blk_timer);
- pkc->retire_blk_timer.data = (long)po;
- pkc->retire_blk_timer.function = func;
+ setup_timer(&pkc->retire_blk_timer, func, (long)po);
pkc->retire_blk_timer.expires = jiffies;
}
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index f925753668a7..3b0ef691f5b1 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -35,11 +35,11 @@
#include <net/phonet/pn_dev.h>
/* Transport protocol registration */
-static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
+static const struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly;
-static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
+static const struct phonet_protocol *phonet_proto_get(unsigned int protocol)
{
- struct phonet_protocol *pp;
+ const struct phonet_protocol *pp;
if (protocol >= PHONET_NPROTO)
return NULL;
@@ -53,7 +53,7 @@ static struct phonet_protocol *phonet_proto_get(unsigned int protocol)
return pp;
}
-static inline void phonet_proto_put(struct phonet_protocol *pp)
+static inline void phonet_proto_put(const struct phonet_protocol *pp)
{
module_put(pp->prot->owner);
}
@@ -65,7 +65,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol,
{
struct sock *sk;
struct pn_sock *pn;
- struct phonet_protocol *pnp;
+ const struct phonet_protocol *pnp;
int err;
if (!capable(CAP_SYS_ADMIN))
@@ -149,7 +149,7 @@ static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr)
return 1;
}
-struct header_ops phonet_header_ops = {
+const struct header_ops phonet_header_ops = {
.create = pn_header_create,
.parse = pn_header_parse,
};
@@ -470,7 +470,7 @@ static struct packet_type phonet_packet_type __read_mostly = {
static DEFINE_MUTEX(proto_tab_lock);
int __init_or_module phonet_proto_register(unsigned int protocol,
- struct phonet_protocol *pp)
+ const struct phonet_protocol *pp)
{
int err = 0;
@@ -492,7 +492,8 @@ int __init_or_module phonet_proto_register(unsigned int protocol,
}
EXPORT_SYMBOL(phonet_proto_register);
-void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp)
+void phonet_proto_unregister(unsigned int protocol,
+ const struct phonet_protocol *pp)
{
mutex_lock(&proto_tab_lock);
BUG_ON(proto_tab[protocol] != pp);
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 5e710435ffa9..b44fb9018fb8 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -195,7 +195,7 @@ static struct proto pn_proto = {
.name = "PHONET",
};
-static struct phonet_protocol pn_dgram_proto = {
+static const struct phonet_protocol pn_dgram_proto = {
.ops = &phonet_dgram_ops,
.prot = &pn_proto,
.sock_type = SOCK_DGRAM,
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e81537991ddf..9fc76b19cd3c 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1351,7 +1351,7 @@ static struct proto pep_proto = {
.name = "PNPIPE",
};
-static struct phonet_protocol pep_pn_proto = {
+static const struct phonet_protocol pep_pn_proto = {
.ops = &phonet_stream_ops,
.prot = &pep_proto,
.sock_type = SOCK_SEQPACKET,
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index c2f5c13550c0..e458ece96d3d 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -20,26 +20,15 @@
#include "qrtr.h"
-#define QRTR_PROTO_VER 1
+#define QRTR_PROTO_VER_1 1
+#define QRTR_PROTO_VER_2 3
/* auto-bind range */
#define QRTR_MIN_EPH_SOCKET 0x4000
#define QRTR_MAX_EPH_SOCKET 0x7fff
-enum qrtr_pkt_type {
- QRTR_TYPE_DATA = 1,
- QRTR_TYPE_HELLO = 2,
- QRTR_TYPE_BYE = 3,
- QRTR_TYPE_NEW_SERVER = 4,
- QRTR_TYPE_DEL_SERVER = 5,
- QRTR_TYPE_DEL_CLIENT = 6,
- QRTR_TYPE_RESUME_TX = 7,
- QRTR_TYPE_EXIT = 8,
- QRTR_TYPE_PING = 9,
-};
-
/**
- * struct qrtr_hdr - (I|R)PCrouter packet header
+ * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
* @version: protocol version
* @type: packet type; one of QRTR_TYPE_*
* @src_node_id: source node
@@ -49,7 +38,7 @@ enum qrtr_pkt_type {
* @dst_node_id: destination node
* @dst_port_id: destination port
*/
-struct qrtr_hdr {
+struct qrtr_hdr_v1 {
__le32 version;
__le32 type;
__le32 src_node_id;
@@ -60,9 +49,44 @@ struct qrtr_hdr {
__le32 dst_port_id;
} __packed;
-#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr)
-#define QRTR_NODE_BCAST ((unsigned int)-1)
-#define QRTR_PORT_CTRL ((unsigned int)-2)
+/**
+ * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions
+ * @version: protocol version
+ * @type: packet type; one of QRTR_TYPE_*
+ * @flags: bitmask of QRTR_FLAGS_*
+ * @optlen: length of optional header data
+ * @size: length of packet, excluding this header and optlen
+ * @src_node_id: source node
+ * @src_port_id: source port
+ * @dst_node_id: destination node
+ * @dst_port_id: destination port
+ */
+struct qrtr_hdr_v2 {
+ u8 version;
+ u8 type;
+ u8 flags;
+ u8 optlen;
+ __le32 size;
+ __le16 src_node_id;
+ __le16 src_port_id;
+ __le16 dst_node_id;
+ __le16 dst_port_id;
+};
+
+#define QRTR_FLAGS_CONFIRM_RX BIT(0)
+
+struct qrtr_cb {
+ u32 src_node;
+ u32 src_port;
+ u32 dst_node;
+ u32 dst_port;
+
+ u8 type;
+ u8 confirm_rx;
+};
+
+#define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \
+ sizeof(struct qrtr_hdr_v2))
struct qrtr_sock {
/* WARNING: sk must be the first member */
@@ -111,8 +135,12 @@ struct qrtr_node {
struct list_head item;
};
-static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb);
-static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb);
+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to);
+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to);
/* Release node resources and free the node.
*
@@ -150,10 +178,27 @@ static void qrtr_node_release(struct qrtr_node *node)
}
/* Pass an outgoing packet socket buffer to the endpoint driver. */
-static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to)
{
+ struct qrtr_hdr_v1 *hdr;
+ size_t len = skb->len;
int rc = -ENODEV;
+ hdr = skb_push(skb, sizeof(*hdr));
+ hdr->version = cpu_to_le32(QRTR_PROTO_VER_1);
+ hdr->type = cpu_to_le32(type);
+ hdr->src_node_id = cpu_to_le32(from->sq_node);
+ hdr->src_port_id = cpu_to_le32(from->sq_port);
+ hdr->dst_node_id = cpu_to_le32(to->sq_node);
+ hdr->dst_port_id = cpu_to_le32(to->sq_port);
+
+ hdr->size = cpu_to_le32(len);
+ hdr->confirm_rx = 0;
+
+ skb_put_padto(skb, ALIGN(len, 4));
+
mutex_lock(&node->ep_lock);
if (node->ep)
rc = node->ep->xmit(node->ep, skb);
@@ -207,125 +252,103 @@ static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid)
int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
{
struct qrtr_node *node = ep->node;
- const struct qrtr_hdr *phdr = data;
+ const struct qrtr_hdr_v1 *v1;
+ const struct qrtr_hdr_v2 *v2;
struct sk_buff *skb;
- unsigned int psize;
+ struct qrtr_cb *cb;
unsigned int size;
- unsigned int type;
unsigned int ver;
- unsigned int dst;
-
- if (len < QRTR_HDR_SIZE || len & 3)
- return -EINVAL;
-
- ver = le32_to_cpu(phdr->version);
- size = le32_to_cpu(phdr->size);
- type = le32_to_cpu(phdr->type);
- dst = le32_to_cpu(phdr->dst_port_id);
-
- psize = (size + 3) & ~3;
+ size_t hdrlen;
- if (ver != QRTR_PROTO_VER)
- return -EINVAL;
-
- if (len != psize + QRTR_HDR_SIZE)
- return -EINVAL;
-
- if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA)
+ if (len & 3)
return -EINVAL;
skb = netdev_alloc_skb(NULL, len);
if (!skb)
return -ENOMEM;
- skb_reset_transport_header(skb);
- skb_put_data(skb, data, len);
-
- skb_queue_tail(&node->rx_queue, skb);
- schedule_work(&node->work);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
+ cb = (struct qrtr_cb *)skb->cb;
-static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len,
- u32 src_node, u32 dst_node)
-{
- struct qrtr_hdr *hdr;
- struct sk_buff *skb;
-
- skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL);
- if (!skb)
- return NULL;
- skb_reset_transport_header(skb);
+ /* Version field in v1 is little endian, so this works for both cases */
+ ver = *(u8*)data;
- hdr = skb_put(skb, QRTR_HDR_SIZE);
- hdr->version = cpu_to_le32(QRTR_PROTO_VER);
- hdr->type = cpu_to_le32(type);
- hdr->src_node_id = cpu_to_le32(src_node);
- hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL);
- hdr->confirm_rx = cpu_to_le32(0);
- hdr->size = cpu_to_le32(pkt_len);
- hdr->dst_node_id = cpu_to_le32(dst_node);
- hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
+ switch (ver) {
+ case QRTR_PROTO_VER_1:
+ v1 = data;
+ hdrlen = sizeof(*v1);
- return skb;
-}
+ cb->type = le32_to_cpu(v1->type);
+ cb->src_node = le32_to_cpu(v1->src_node_id);
+ cb->src_port = le32_to_cpu(v1->src_port_id);
+ cb->confirm_rx = !!v1->confirm_rx;
+ cb->dst_node = le32_to_cpu(v1->dst_node_id);
+ cb->dst_port = le32_to_cpu(v1->dst_port_id);
-/* Allocate and construct a resume-tx packet. */
-static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node,
- u32 dst_node, u32 port)
-{
- const int pkt_len = 20;
- struct sk_buff *skb;
- __le32 *buf;
+ size = le32_to_cpu(v1->size);
+ break;
+ case QRTR_PROTO_VER_2:
+ v2 = data;
+ hdrlen = sizeof(*v2) + v2->optlen;
+
+ cb->type = v2->type;
+ cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX);
+ cb->src_node = le16_to_cpu(v2->src_node_id);
+ cb->src_port = le16_to_cpu(v2->src_port_id);
+ cb->dst_node = le16_to_cpu(v2->dst_node_id);
+ cb->dst_port = le16_to_cpu(v2->dst_port_id);
+
+ if (cb->src_port == (u16)QRTR_PORT_CTRL)
+ cb->src_port = QRTR_PORT_CTRL;
+ if (cb->dst_port == (u16)QRTR_PORT_CTRL)
+ cb->dst_port = QRTR_PORT_CTRL;
+
+ size = le32_to_cpu(v2->size);
+ break;
+ default:
+ pr_err("qrtr: Invalid version %d\n", ver);
+ goto err;
+ }
- skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_RESUME_TX, pkt_len,
- src_node, dst_node);
- if (!skb)
- return NULL;
+ if (len != ALIGN(size, 4) + hdrlen)
+ goto err;
- buf = skb_put_zero(skb, pkt_len);
- buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX);
- buf[1] = cpu_to_le32(src_node);
- buf[2] = cpu_to_le32(port);
+ if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA)
+ goto err;
- return skb;
-}
+ skb_put_data(skb, data + hdrlen, size);
-/* Allocate and construct a BYE message to signal remote termination */
-static struct sk_buff *qrtr_alloc_local_bye(u32 src_node)
-{
- const int pkt_len = 20;
- struct sk_buff *skb;
- __le32 *buf;
+ skb_queue_tail(&node->rx_queue, skb);
+ schedule_work(&node->work);
- skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_BYE, pkt_len,
- src_node, qrtr_local_nid);
- if (!skb)
- return NULL;
+ return 0;
- buf = skb_put_zero(skb, pkt_len);
- buf[0] = cpu_to_le32(QRTR_TYPE_BYE);
+err:
+ kfree_skb(skb);
+ return -EINVAL;
- return skb;
}
+EXPORT_SYMBOL_GPL(qrtr_endpoint_post);
-static struct sk_buff *qrtr_alloc_del_client(struct sockaddr_qrtr *sq)
+/**
+ * qrtr_alloc_ctrl_packet() - allocate control packet skb
+ * @pkt: reference to qrtr_ctrl_pkt pointer
+ *
+ * Returns newly allocated sk_buff, or NULL on failure
+ *
+ * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and
+ * on success returns a reference to the control packet in @pkt.
+ */
+static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt)
{
- const int pkt_len = 20;
+ const int pkt_len = sizeof(struct qrtr_ctrl_pkt);
struct sk_buff *skb;
- __le32 *buf;
- skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_DEL_CLIENT, pkt_len,
- sq->sq_node, QRTR_NODE_BCAST);
+ skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, GFP_KERNEL);
if (!skb)
return NULL;
- buf = skb_put_zero(skb, pkt_len);
- buf[0] = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
- buf[1] = cpu_to_le32(sq->sq_node);
- buf[2] = cpu_to_le32(sq->sq_port);
+ skb_reserve(skb, QRTR_HDR_MAX_SIZE);
+ *pkt = skb_put_zero(skb, pkt_len);
return skb;
}
@@ -340,24 +363,26 @@ static void qrtr_port_put(struct qrtr_sock *ipc);
static void qrtr_node_rx_work(struct work_struct *work)
{
struct qrtr_node *node = container_of(work, struct qrtr_node, work);
+ struct qrtr_ctrl_pkt *pkt;
+ struct sockaddr_qrtr dst;
+ struct sockaddr_qrtr src;
struct sk_buff *skb;
while ((skb = skb_dequeue(&node->rx_queue)) != NULL) {
- const struct qrtr_hdr *phdr;
- u32 dst_node, dst_port;
struct qrtr_sock *ipc;
- u32 src_node;
+ struct qrtr_cb *cb;
int confirm;
- phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
- src_node = le32_to_cpu(phdr->src_node_id);
- dst_node = le32_to_cpu(phdr->dst_node_id);
- dst_port = le32_to_cpu(phdr->dst_port_id);
- confirm = !!phdr->confirm_rx;
+ cb = (struct qrtr_cb *)skb->cb;
+ src.sq_node = cb->src_node;
+ src.sq_port = cb->src_port;
+ dst.sq_node = cb->dst_node;
+ dst.sq_port = cb->dst_port;
+ confirm = !!cb->confirm_rx;
- qrtr_node_assign(node, src_node);
+ qrtr_node_assign(node, cb->src_node);
- ipc = qrtr_port_lookup(dst_port);
+ ipc = qrtr_port_lookup(cb->dst_port);
if (!ipc) {
kfree_skb(skb);
} else {
@@ -368,10 +393,16 @@ static void qrtr_node_rx_work(struct work_struct *work)
}
if (confirm) {
- skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port);
+ skb = qrtr_alloc_ctrl_packet(&pkt);
if (!skb)
break;
- if (qrtr_node_enqueue(node, skb))
+
+ pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX);
+ pkt->client.node = cpu_to_le32(dst.sq_node);
+ pkt->client.port = cpu_to_le32(dst.sq_port);
+
+ if (qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX,
+ &dst, &src))
break;
}
}
@@ -421,6 +452,9 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_register);
void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
{
struct qrtr_node *node = ep->node;
+ struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL};
+ struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL};
+ struct qrtr_ctrl_pkt *pkt;
struct sk_buff *skb;
mutex_lock(&node->ep_lock);
@@ -428,9 +462,11 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep)
mutex_unlock(&node->ep_lock);
/* Notify the local controller about the event */
- skb = qrtr_alloc_local_bye(node->nid);
- if (skb)
- qrtr_local_enqueue(NULL, skb);
+ skb = qrtr_alloc_ctrl_packet(&pkt);
+ if (skb) {
+ pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE);
+ qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst);
+ }
qrtr_node_release(node);
ep->node = NULL;
@@ -466,13 +502,24 @@ static void qrtr_port_put(struct qrtr_sock *ipc)
/* Remove port assignment. */
static void qrtr_port_remove(struct qrtr_sock *ipc)
{
+ struct qrtr_ctrl_pkt *pkt;
struct sk_buff *skb;
int port = ipc->us.sq_port;
+ struct sockaddr_qrtr to;
- skb = qrtr_alloc_del_client(&ipc->us);
+ to.sq_family = AF_QIPCRTR;
+ to.sq_node = QRTR_NODE_BCAST;
+ to.sq_port = QRTR_PORT_CTRL;
+
+ skb = qrtr_alloc_ctrl_packet(&pkt);
if (skb) {
+ pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT);
+ pkt->client.node = cpu_to_le32(ipc->us.sq_node);
+ pkt->client.port = cpu_to_le32(ipc->us.sq_port);
+
skb_set_owner_w(skb, &ipc->sk);
- qrtr_bcast_enqueue(NULL, skb);
+ qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us,
+ &to);
}
if (port == QRTR_PORT_CTRL)
@@ -541,7 +588,7 @@ static void qrtr_reset_ports(void)
sock_hold(&ipc->sk);
ipc->sk.sk_err = ENETRESET;
- wake_up_interruptible(sk_sleep(&ipc->sk));
+ ipc->sk.sk_error_report(&ipc->sk);
sock_put(&ipc->sk);
}
mutex_unlock(&qrtr_port_lock);
@@ -620,19 +667,23 @@ static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len)
}
/* Queue packet to local peer socket. */
-static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to)
{
- const struct qrtr_hdr *phdr;
struct qrtr_sock *ipc;
+ struct qrtr_cb *cb;
- phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
-
- ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id));
+ ipc = qrtr_port_lookup(to->sq_port);
if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */
kfree_skb(skb);
return -ENODEV;
}
+ cb = (struct qrtr_cb *)skb->cb;
+ cb->src_node = from->sq_node;
+ cb->src_port = from->sq_port;
+
if (sock_queue_rcv_skb(&ipc->sk, skb)) {
qrtr_port_put(ipc);
kfree_skb(skb);
@@ -645,7 +696,9 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb)
}
/* Queue packet for broadcast. */
-static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
+static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb,
+ int type, struct sockaddr_qrtr *from,
+ struct sockaddr_qrtr *to)
{
struct sk_buff *skbn;
@@ -655,11 +708,11 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
if (!skbn)
break;
skb_set_owner_w(skbn, skb->sk);
- qrtr_node_enqueue(node, skbn);
+ qrtr_node_enqueue(node, skbn, type, from, to);
}
mutex_unlock(&qrtr_node_lock);
- qrtr_local_enqueue(node, skb);
+ qrtr_local_enqueue(node, skb, type, from, to);
return 0;
}
@@ -667,13 +720,14 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb)
static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
- int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *);
+ int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int,
+ struct sockaddr_qrtr *, struct sockaddr_qrtr *);
struct qrtr_sock *ipc = qrtr_sk(sock->sk);
struct sock *sk = sock->sk;
struct qrtr_node *node;
- struct qrtr_hdr *hdr;
struct sk_buff *skb;
size_t plen;
+ u32 type = QRTR_TYPE_DATA;
int rc;
if (msg->msg_flags & ~(MSG_DONTWAIT))
@@ -722,37 +776,19 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
plen = (len + 3) & ~3;
- skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE,
+ skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
msg->msg_flags & MSG_DONTWAIT, &rc);
if (!skb)
goto out_node;
- skb_reset_transport_header(skb);
- skb_put(skb, len + QRTR_HDR_SIZE);
-
- hdr = (struct qrtr_hdr *)skb_transport_header(skb);
- hdr->version = cpu_to_le32(QRTR_PROTO_VER);
- hdr->src_node_id = cpu_to_le32(ipc->us.sq_node);
- hdr->src_port_id = cpu_to_le32(ipc->us.sq_port);
- hdr->confirm_rx = cpu_to_le32(0);
- hdr->size = cpu_to_le32(len);
- hdr->dst_node_id = cpu_to_le32(addr->sq_node);
- hdr->dst_port_id = cpu_to_le32(addr->sq_port);
+ skb_reserve(skb, QRTR_HDR_MAX_SIZE);
- rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE,
- &msg->msg_iter, len);
+ rc = memcpy_from_msg(skb_put(skb, len), msg, len);
if (rc) {
kfree_skb(skb);
goto out_node;
}
- if (plen != len) {
- rc = skb_pad(skb, plen - len);
- if (rc)
- goto out_node;
- skb_put(skb, plen - len);
- }
-
if (ipc->us.sq_port == QRTR_PORT_CTRL) {
if (len < 4) {
rc = -EINVAL;
@@ -761,12 +797,11 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
/* control messages already require the type as 'command' */
- skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4);
- } else {
- hdr->type = cpu_to_le32(QRTR_TYPE_DATA);
+ skb_copy_bits(skb, 0, &type, 4);
+ type = le32_to_cpu(type);
}
- rc = enqueue_fn(node, skb);
+ rc = enqueue_fn(node, skb, type, &ipc->us, addr);
if (rc >= 0)
rc = len;
@@ -781,9 +816,9 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name);
- const struct qrtr_hdr *phdr;
struct sock *sk = sock->sk;
struct sk_buff *skb;
+ struct qrtr_cb *cb;
int copied, rc;
lock_sock(sk);
@@ -800,22 +835,22 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
return rc;
}
- phdr = (const struct qrtr_hdr *)skb_transport_header(skb);
- copied = le32_to_cpu(phdr->size);
+ copied = skb->len;
if (copied > size) {
copied = size;
msg->msg_flags |= MSG_TRUNC;
}
- rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied);
+ rc = skb_copy_datagram_msg(skb, 0, msg, copied);
if (rc < 0)
goto out;
rc = copied;
if (addr) {
+ cb = (struct qrtr_cb *)skb->cb;
addr->sq_family = AF_QIPCRTR;
- addr->sq_node = le32_to_cpu(phdr->src_node_id);
- addr->sq_port = le32_to_cpu(phdr->src_port_id);
+ addr->sq_node = cb->src_node;
+ addr->sq_port = cb->src_port;
msg->msg_namelen = sizeof(*addr);
}
@@ -908,7 +943,7 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case TIOCINQ:
skb = skb_peek(&sk->sk_receive_queue);
if (skb)
- len = skb->len - QRTR_HDR_SIZE;
+ len = skb->len;
rc = put_user(len, (int __user *)argp);
break;
case SIOCGIFADDR:
diff --git a/net/rds/ib.c b/net/rds/ib.c
index a0954ace3774..36dd2099048a 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -126,6 +126,7 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
static void rds_ib_add_one(struct ib_device *device)
{
struct rds_ib_device *rds_ibdev;
+ bool has_fr, has_fmr;
/* Only handle IB (no iWARP) devices */
if (device->node_type != RDMA_NODE_IB_CA)
@@ -143,11 +144,11 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->max_wrs = device->attrs.max_qp_wr;
rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
- rds_ibdev->has_fr = (device->attrs.device_cap_flags &
- IB_DEVICE_MEM_MGT_EXTENSIONS);
- rds_ibdev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
- device->map_phys_fmr && device->unmap_fmr);
- rds_ibdev->use_fastreg = (rds_ibdev->has_fr && !rds_ibdev->has_fmr);
+ has_fr = (device->attrs.device_cap_flags &
+ IB_DEVICE_MEM_MGT_EXTENSIONS);
+ has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
+ device->map_phys_fmr && device->unmap_fmr);
+ rds_ibdev->use_fastreg = (has_fr && !has_fmr);
rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32;
rds_ibdev->max_1m_mrs = device->attrs.max_mr ?
diff --git a/net/rds/ib.h b/net/rds/ib.h
index bf4822407567..6ea6a27891b0 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -215,8 +215,6 @@ struct rds_ib_device {
struct list_head conn_list;
struct ib_device *dev;
struct ib_pd *pd;
- bool has_fmr;
- bool has_fr;
bool use_fastreg;
unsigned int max_mrs;
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 9a3c54e659e9..e678699268a2 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -601,11 +601,11 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
if (pool_type == RDS_IB_MR_1M_POOL) {
/* +1 allows for unaligned MRs */
pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1;
- pool->max_items = RDS_MR_1M_POOL_SIZE;
+ pool->max_items = rds_ibdev->max_1m_mrs;
} else {
/* pool_type == RDS_IB_MR_8K_POOL */
pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1;
- pool->max_items = RDS_MR_8K_POOL_SIZE;
+ pool->max_items = rds_ibdev->max_8k_mrs;
}
pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 4a9729257023..6a5c4992cf61 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -318,9 +318,11 @@ void rose_destroy_socket(struct sock *);
/*
* Handler for deferred kills.
*/
-static void rose_destroy_timer(unsigned long data)
+static void rose_destroy_timer(struct timer_list *t)
{
- rose_destroy_socket((struct sock *)data);
+ struct sock *sk = from_timer(sk, t, sk_timer);
+
+ rose_destroy_socket(sk);
}
/*
@@ -353,8 +355,7 @@ void rose_destroy_socket(struct sock *sk)
if (sk_has_allocations(sk)) {
/* Defer: outstanding buffers */
- setup_timer(&sk->sk_timer, rose_destroy_timer,
- (unsigned long)sk);
+ timer_setup(&sk->sk_timer, rose_destroy_timer, 0);
sk->sk_timer.expires = jiffies + 10 * HZ;
add_timer(&sk->sk_timer);
} else
@@ -538,8 +539,8 @@ static int rose_create(struct net *net, struct socket *sock, int protocol,
sock->ops = &rose_proto_ops;
sk->sk_protocol = protocol;
- init_timer(&rose->timer);
- init_timer(&rose->idletimer);
+ timer_setup(&rose->timer, NULL, 0);
+ timer_setup(&rose->idletimer, NULL, 0);
rose->t1 = msecs_to_jiffies(sysctl_rose_call_request_timeout);
rose->t2 = msecs_to_jiffies(sysctl_rose_reset_request_timeout);
@@ -582,8 +583,8 @@ static struct sock *rose_make_new(struct sock *osk)
sk->sk_state = TCP_ESTABLISHED;
sock_copy_flags(sk, osk);
- init_timer(&rose->timer);
- init_timer(&rose->idletimer);
+ timer_setup(&rose->timer, NULL, 0);
+ timer_setup(&rose->idletimer, NULL, 0);
orose = rose_sk(osk);
rose->t1 = orose->t1;
diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c
index 0a6394754e81..9bbbfe325c5a 100644
--- a/net/rose/rose_in.c
+++ b/net/rose/rose_in.c
@@ -219,6 +219,7 @@ static int rose_state4_machine(struct sock *sk, struct sk_buff *skb, int framety
switch (frametype) {
case ROSE_RESET_REQUEST:
rose_write_internal(sk, ROSE_RESET_CONFIRMATION);
+ /* fall through */
case ROSE_RESET_CONFIRMATION:
rose_stop_timer(sk);
rose_start_idletimer(sk);
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index c76638cc2cd5..cda4c6678ef1 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -27,8 +27,8 @@
#include <linux/interrupt.h>
#include <net/rose.h>
-static void rose_ftimer_expiry(unsigned long);
-static void rose_t0timer_expiry(unsigned long);
+static void rose_ftimer_expiry(struct timer_list *);
+static void rose_t0timer_expiry(struct timer_list *);
static void rose_transmit_restart_confirmation(struct rose_neigh *neigh);
static void rose_transmit_restart_request(struct rose_neigh *neigh);
@@ -37,8 +37,7 @@ void rose_start_ftimer(struct rose_neigh *neigh)
{
del_timer(&neigh->ftimer);
- neigh->ftimer.data = (unsigned long)neigh;
- neigh->ftimer.function = &rose_ftimer_expiry;
+ neigh->ftimer.function = (TIMER_FUNC_TYPE)rose_ftimer_expiry;
neigh->ftimer.expires =
jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout);
@@ -49,8 +48,7 @@ static void rose_start_t0timer(struct rose_neigh *neigh)
{
del_timer(&neigh->t0timer);
- neigh->t0timer.data = (unsigned long)neigh;
- neigh->t0timer.function = &rose_t0timer_expiry;
+ neigh->t0timer.function = (TIMER_FUNC_TYPE)rose_t0timer_expiry;
neigh->t0timer.expires =
jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout);
@@ -77,13 +75,13 @@ static int rose_t0timer_running(struct rose_neigh *neigh)
return timer_pending(&neigh->t0timer);
}
-static void rose_ftimer_expiry(unsigned long param)
+static void rose_ftimer_expiry(struct timer_list *t)
{
}
-static void rose_t0timer_expiry(unsigned long param)
+static void rose_t0timer_expiry(struct timer_list *t)
{
- struct rose_neigh *neigh = (struct rose_neigh *)param;
+ struct rose_neigh *neigh = from_timer(neigh, t, t0timer);
rose_transmit_restart_request(neigh);
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 344456206b70..7af4f99c4a93 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -19,12 +19,13 @@ static struct sk_buff_head loopback_queue;
static struct timer_list loopback_timer;
static void rose_set_loopback_timer(void);
+static void rose_loopback_timer(struct timer_list *unused);
void rose_loopback_init(void)
{
skb_queue_head_init(&loopback_queue);
- init_timer(&loopback_timer);
+ timer_setup(&loopback_timer, rose_loopback_timer, 0);
}
static int rose_loopback_running(void)
@@ -50,20 +51,16 @@ int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh)
return 1;
}
-static void rose_loopback_timer(unsigned long);
static void rose_set_loopback_timer(void)
{
del_timer(&loopback_timer);
- loopback_timer.data = 0;
- loopback_timer.function = &rose_loopback_timer;
loopback_timer.expires = jiffies + 10;
-
add_timer(&loopback_timer);
}
-static void rose_loopback_timer(unsigned long param)
+static void rose_loopback_timer(struct timer_list *unused)
{
struct sk_buff *skb;
struct net_device *dev;
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 452bbb38d943..8ca3124df83f 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -104,8 +104,8 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route,
skb_queue_head_init(&rose_neigh->queue);
- init_timer(&rose_neigh->ftimer);
- init_timer(&rose_neigh->t0timer);
+ timer_setup(&rose_neigh->ftimer, NULL, 0);
+ timer_setup(&rose_neigh->t0timer, NULL, 0);
if (rose_route->ndigis != 0) {
rose_neigh->digipeat =
@@ -346,6 +346,7 @@ static int rose_del_node(struct rose_route_struct *rose_route,
case 0:
rose_node->neighbour[0] =
rose_node->neighbour[1];
+ /* fall through */
case 1:
rose_node->neighbour[1] =
rose_node->neighbour[2];
@@ -390,8 +391,8 @@ void rose_add_loopback_neigh(void)
skb_queue_head_init(&sn->queue);
- init_timer(&sn->ftimer);
- init_timer(&sn->t0timer);
+ timer_setup(&sn->ftimer, NULL, 0);
+ timer_setup(&sn->t0timer, NULL, 0);
spin_lock_bh(&rose_neigh_list_lock);
sn->next = rose_neigh_list;
@@ -507,6 +508,7 @@ void rose_rt_device_down(struct net_device *dev)
switch (i) {
case 0:
t->neighbour[0] = t->neighbour[1];
+ /* fall through */
case 1:
t->neighbour[1] = t->neighbour[2];
case 2:
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index bc5469d6d9cb..ea613b2a9735 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -28,16 +28,15 @@
#include <linux/interrupt.h>
#include <net/rose.h>
-static void rose_heartbeat_expiry(unsigned long);
-static void rose_timer_expiry(unsigned long);
-static void rose_idletimer_expiry(unsigned long);
+static void rose_heartbeat_expiry(struct timer_list *t);
+static void rose_timer_expiry(struct timer_list *);
+static void rose_idletimer_expiry(struct timer_list *);
void rose_start_heartbeat(struct sock *sk)
{
del_timer(&sk->sk_timer);
- sk->sk_timer.data = (unsigned long)sk;
- sk->sk_timer.function = &rose_heartbeat_expiry;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)rose_heartbeat_expiry;
sk->sk_timer.expires = jiffies + 5 * HZ;
add_timer(&sk->sk_timer);
@@ -49,8 +48,7 @@ void rose_start_t1timer(struct sock *sk)
del_timer(&rose->timer);
- rose->timer.data = (unsigned long)sk;
- rose->timer.function = &rose_timer_expiry;
+ rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
rose->timer.expires = jiffies + rose->t1;
add_timer(&rose->timer);
@@ -62,8 +60,7 @@ void rose_start_t2timer(struct sock *sk)
del_timer(&rose->timer);
- rose->timer.data = (unsigned long)sk;
- rose->timer.function = &rose_timer_expiry;
+ rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
rose->timer.expires = jiffies + rose->t2;
add_timer(&rose->timer);
@@ -75,8 +72,7 @@ void rose_start_t3timer(struct sock *sk)
del_timer(&rose->timer);
- rose->timer.data = (unsigned long)sk;
- rose->timer.function = &rose_timer_expiry;
+ rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
rose->timer.expires = jiffies + rose->t3;
add_timer(&rose->timer);
@@ -88,8 +84,7 @@ void rose_start_hbtimer(struct sock *sk)
del_timer(&rose->timer);
- rose->timer.data = (unsigned long)sk;
- rose->timer.function = &rose_timer_expiry;
+ rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry;
rose->timer.expires = jiffies + rose->hb;
add_timer(&rose->timer);
@@ -102,8 +97,7 @@ void rose_start_idletimer(struct sock *sk)
del_timer(&rose->idletimer);
if (rose->idle > 0) {
- rose->idletimer.data = (unsigned long)sk;
- rose->idletimer.function = &rose_idletimer_expiry;
+ rose->idletimer.function = (TIMER_FUNC_TYPE)rose_idletimer_expiry;
rose->idletimer.expires = jiffies + rose->idle;
add_timer(&rose->idletimer);
@@ -125,9 +119,9 @@ void rose_stop_idletimer(struct sock *sk)
del_timer(&rose_sk(sk)->idletimer);
}
-static void rose_heartbeat_expiry(unsigned long param)
+static void rose_heartbeat_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct sock *sk = from_timer(sk, t, sk_timer);
struct rose_sock *rose = rose_sk(sk);
bh_lock_sock(sk);
@@ -163,10 +157,10 @@ static void rose_heartbeat_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void rose_timer_expiry(unsigned long param)
+static void rose_timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
- struct rose_sock *rose = rose_sk(sk);
+ struct rose_sock *rose = from_timer(rose, t, timer);
+ struct sock *sk = &rose->sock;
bh_lock_sock(sk);
switch (rose->state) {
@@ -192,9 +186,10 @@ static void rose_timer_expiry(unsigned long param)
bh_unlock_sock(sk);
}
-static void rose_idletimer_expiry(unsigned long param)
+static void rose_idletimer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct rose_sock *rose = from_timer(rose, t, idletimer);
+ struct sock *sk = &rose->sock;
bh_lock_sock(sk);
rose_clear_queues(sk);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 4b0a8288c98a..054e32872808 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -265,6 +265,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* @tx_total_len: Total length of data to transmit during the call (or -1)
* @gfp: The allocation constraints
* @notify_rx: Where to send notifications instead of socket queue
+ * @upgrade: Request service upgrade for call
*
* Allow a kernel service to begin a call on the nominated socket. This just
* sets up all the internal tracking structures and allocates connection and
@@ -279,7 +280,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
unsigned long user_call_ID,
s64 tx_total_len,
gfp_t gfp,
- rxrpc_notify_rx_t notify_rx)
+ rxrpc_notify_rx_t notify_rx,
+ bool upgrade)
{
struct rxrpc_conn_parameters cp;
struct rxrpc_call *call;
@@ -304,6 +306,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
cp.key = key;
cp.security_level = 0;
cp.exclusive = false;
+ cp.upgrade = upgrade;
cp.service_id = srx->srx_service;
call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len,
gfp);
@@ -338,6 +341,25 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
EXPORT_SYMBOL(rxrpc_kernel_end_call);
/**
+ * rxrpc_kernel_check_life - Check to see whether a call is still alive
+ * @sock: The socket the call is on
+ * @call: The call to check
+ *
+ * Allow a kernel service to find out whether a call is still alive - ie. we're
+ * getting ACKs from the server. Returns a number representing the life state
+ * which can be compared to that returned by a previous call.
+ *
+ * If this is a client call, ping ACKs will be sent to the server to find out
+ * whether it's still responsive and whether the call is still alive on the
+ * server.
+ */
+u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call)
+{
+ return call->acks_latest;
+}
+EXPORT_SYMBOL(rxrpc_kernel_check_life);
+
+/**
* rxrpc_kernel_check_call - Check a call's state
* @sock: The socket the call is on
* @call: The call to check
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 5787f97f5330..d02a99f37f5f 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -411,3 +411,16 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
*_srx = call->peer->srx;
}
EXPORT_SYMBOL(rxrpc_kernel_get_peer);
+
+/**
+ * rxrpc_kernel_get_rtt - Get a call's peer RTT
+ * @sock: The socket on which the call is in progress.
+ * @call: The call to query
+ *
+ * Get the call's peer RTT.
+ */
+u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call)
+{
+ return call->peer->rtt;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_rtt);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index bdece21f313d..e4937b3f3685 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -607,6 +607,7 @@ wait_error:
* @_offset: The running offset into the buffer.
* @want_more: True if more data is expected to be read
* @_abort: Where the abort code is stored if -ECONNABORTED is returned
+ * @_service: Where to store the actual service ID (may be upgraded)
*
* Allow a kernel service to receive data and pick up information about the
* state of a call. Returns 0 if got what was asked for and there's more
@@ -624,7 +625,7 @@ wait_error:
*/
int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
void *buf, size_t size, size_t *_offset,
- bool want_more, u32 *_abort)
+ bool want_more, u32 *_abort, u16 *_service)
{
struct iov_iter iter;
struct kvec iov;
@@ -680,6 +681,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
read_phase_complete:
ret = 1;
out:
+ if (_service)
+ *_service = call->service_id;
mutex_unlock(&call->user_mutex);
_leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
return ret;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 9ea6f972767e..2d9edc656ca3 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -38,12 +38,86 @@ struct rxrpc_send_params {
};
/*
+ * Wait for space to appear in the Tx queue or a signal to occur.
+ */
+static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ long *timeo)
+{
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (call->tx_top - call->tx_hard_ack <
+ min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra))
+ return 0;
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return call->error;
+
+ if (signal_pending(current))
+ return sock_intr_errno(*timeo);
+
+ trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ mutex_unlock(&call->user_mutex);
+ *timeo = schedule_timeout(*timeo);
+ if (mutex_lock_interruptible(&call->user_mutex) < 0)
+ return sock_intr_errno(*timeo);
+ }
+}
+
+/*
+ * Wait for space to appear in the Tx queue uninterruptibly, but with
+ * a timeout of 2*RTT if no progress was made and a signal occurred.
+ */
+static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
+ struct rxrpc_call *call)
+{
+ rxrpc_seq_t tx_start, tx_win;
+ signed long rtt2, timeout;
+ u64 rtt;
+
+ rtt = READ_ONCE(call->peer->rtt);
+ rtt2 = nsecs_to_jiffies64(rtt) * 2;
+ if (rtt2 < 1)
+ rtt2 = 1;
+
+ timeout = rtt2;
+ tx_start = READ_ONCE(call->tx_hard_ack);
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ tx_win = READ_ONCE(call->tx_hard_ack);
+ if (call->tx_top - tx_win <
+ min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra))
+ return 0;
+
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return call->error;
+
+ if (timeout == 0 &&
+ tx_win == tx_start && signal_pending(current))
+ return -EINTR;
+
+ if (tx_win != tx_start) {
+ timeout = rtt2;
+ tx_start = tx_win;
+ }
+
+ trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ timeout = schedule_timeout(timeout);
+ }
+}
+
+/*
* wait for space to appear in the transmit/ACK window
* - caller holds the socket locked
*/
static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
struct rxrpc_call *call,
- long *timeo)
+ long *timeo,
+ bool waitall)
{
DECLARE_WAITQUEUE(myself, current);
int ret;
@@ -53,30 +127,10 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
add_wait_queue(&call->waitq, &myself);
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- ret = 0;
- if (call->tx_top - call->tx_hard_ack <
- min_t(unsigned int, call->tx_winsize,
- call->cong_cwnd + call->cong_extra))
- break;
- if (call->state >= RXRPC_CALL_COMPLETE) {
- ret = call->error;
- break;
- }
- if (signal_pending(current)) {
- ret = sock_intr_errno(*timeo);
- break;
- }
-
- trace_rxrpc_transmit(call, rxrpc_transmit_wait);
- mutex_unlock(&call->user_mutex);
- *timeo = schedule_timeout(*timeo);
- if (mutex_lock_interruptible(&call->user_mutex) < 0) {
- ret = sock_intr_errno(*timeo);
- break;
- }
- }
+ if (waitall)
+ ret = rxrpc_wait_for_tx_window_nonintr(rx, call);
+ else
+ ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo);
remove_wait_queue(&call->waitq, &myself);
set_current_state(TASK_RUNNING);
@@ -254,7 +308,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
if (msg->msg_flags & MSG_DONTWAIT)
goto maybe_error;
ret = rxrpc_wait_for_tx_window(rx, call,
- &timeo);
+ &timeo,
+ msg->msg_flags & MSG_WAITALL);
if (ret < 0)
goto maybe_error;
}
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index da6fa82c98a8..c67b820a8307 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -21,6 +21,8 @@
#include <linux/kmod.h>
#include <linux/err.h>
#include <linux/module.h>
+#include <linux/rhashtable.h>
+#include <linux/list.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/sch_generic.h>
@@ -1249,8 +1251,227 @@ out_module_put:
return skb->len;
}
+struct tcf_action_net {
+ struct rhashtable egdev_ht;
+};
+
+static unsigned int tcf_action_net_id;
+
+struct tcf_action_egdev_cb {
+ struct list_head list;
+ tc_setup_cb_t *cb;
+ void *cb_priv;
+};
+
+struct tcf_action_egdev {
+ struct rhash_head ht_node;
+ const struct net_device *dev;
+ unsigned int refcnt;
+ struct list_head cb_list;
+};
+
+static const struct rhashtable_params tcf_action_egdev_ht_params = {
+ .key_offset = offsetof(struct tcf_action_egdev, dev),
+ .head_offset = offsetof(struct tcf_action_egdev, ht_node),
+ .key_len = sizeof(const struct net_device *),
+};
+
+static struct tcf_action_egdev *
+tcf_action_egdev_lookup(const struct net_device *dev)
+{
+ struct net *net = dev_net(dev);
+ struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+ return rhashtable_lookup_fast(&tan->egdev_ht, &dev,
+ tcf_action_egdev_ht_params);
+}
+
+static struct tcf_action_egdev *
+tcf_action_egdev_get(const struct net_device *dev)
+{
+ struct tcf_action_egdev *egdev;
+ struct tcf_action_net *tan;
+
+ egdev = tcf_action_egdev_lookup(dev);
+ if (egdev)
+ goto inc_ref;
+
+ egdev = kzalloc(sizeof(*egdev), GFP_KERNEL);
+ if (!egdev)
+ return NULL;
+ INIT_LIST_HEAD(&egdev->cb_list);
+ egdev->dev = dev;
+ tan = net_generic(dev_net(dev), tcf_action_net_id);
+ rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node,
+ tcf_action_egdev_ht_params);
+
+inc_ref:
+ egdev->refcnt++;
+ return egdev;
+}
+
+static void tcf_action_egdev_put(struct tcf_action_egdev *egdev)
+{
+ struct tcf_action_net *tan;
+
+ if (--egdev->refcnt)
+ return;
+ tan = net_generic(dev_net(egdev->dev), tcf_action_net_id);
+ rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node,
+ tcf_action_egdev_ht_params);
+ kfree(egdev);
+}
+
+static struct tcf_action_egdev_cb *
+tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev_cb *egdev_cb;
+
+ list_for_each_entry(egdev_cb, &egdev->cb_list, list)
+ if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv)
+ return egdev_cb;
+ return NULL;
+}
+
+static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev,
+ enum tc_setup_type type,
+ void *type_data, bool err_stop)
+{
+ struct tcf_action_egdev_cb *egdev_cb;
+ int ok_count = 0;
+ int err;
+
+ list_for_each_entry(egdev_cb, &egdev->cb_list, list) {
+ err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv);
+ if (err) {
+ if (err_stop)
+ return err;
+ } else {
+ ok_count++;
+ }
+ }
+ return ok_count;
+}
+
+static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev_cb *egdev_cb;
+
+ egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
+ if (WARN_ON(egdev_cb))
+ return -EEXIST;
+ egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL);
+ if (!egdev_cb)
+ return -ENOMEM;
+ egdev_cb->cb = cb;
+ egdev_cb->cb_priv = cb_priv;
+ list_add(&egdev_cb->list, &egdev->cb_list);
+ return 0;
+}
+
+static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev_cb *egdev_cb;
+
+ egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv);
+ if (WARN_ON(!egdev_cb))
+ return;
+ list_del(&egdev_cb->list);
+ kfree(egdev_cb);
+}
+
+static int __tc_setup_cb_egdev_register(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev);
+ int err;
+
+ if (!egdev)
+ return -ENOMEM;
+ err = tcf_action_egdev_cb_add(egdev, cb, cb_priv);
+ if (err)
+ goto err_cb_add;
+ return 0;
+
+err_cb_add:
+ tcf_action_egdev_put(egdev);
+ return err;
+}
+int tc_setup_cb_egdev_register(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ int err;
+
+ rtnl_lock();
+ err = __tc_setup_cb_egdev_register(dev, cb, cb_priv);
+ rtnl_unlock();
+ return err;
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register);
+
+static void __tc_setup_cb_egdev_unregister(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
+
+ if (WARN_ON(!egdev))
+ return;
+ tcf_action_egdev_cb_del(egdev, cb, cb_priv);
+ tcf_action_egdev_put(egdev);
+}
+void tc_setup_cb_egdev_unregister(const struct net_device *dev,
+ tc_setup_cb_t *cb, void *cb_priv)
+{
+ rtnl_lock();
+ __tc_setup_cb_egdev_unregister(dev, cb, cb_priv);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister);
+
+int tc_setup_cb_egdev_call(const struct net_device *dev,
+ enum tc_setup_type type, void *type_data,
+ bool err_stop)
+{
+ struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev);
+
+ if (!egdev)
+ return 0;
+ return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop);
+}
+EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call);
+
+static __net_init int tcf_action_net_init(struct net *net)
+{
+ struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+ return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params);
+}
+
+static void __net_exit tcf_action_net_exit(struct net *net)
+{
+ struct tcf_action_net *tan = net_generic(net, tcf_action_net_id);
+
+ rhashtable_destroy(&tan->egdev_ht);
+}
+
+static struct pernet_operations tcf_action_net_ops = {
+ .init = tcf_action_net_init,
+ .exit = tcf_action_net_exit,
+ .id = &tcf_action_net_id,
+ .size = sizeof(struct tcf_action_net),
+};
+
static int __init tc_action_init(void)
{
+ int err;
+
+ err = register_pernet_subsys(&tcf_action_net_ops);
+ if (err)
+ return err;
+
rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action,
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index c0c707eb2c96..5ef8ce8c83d4 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -49,11 +49,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
filter = rcu_dereference(prog->filter);
if (at_ingress) {
__skb_push(skb, skb->mac_len);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
filter_res = BPF_PROG_RUN(filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
filter_res = BPF_PROG_RUN(filter, skb);
}
rcu_read_unlock();
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 8ccd35825b6b..3007cb1310ea 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -248,6 +248,22 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len)
return ret;
}
+#ifdef CONFIG_MODULES
+static const char *ife_meta_id2name(u32 metaid)
+{
+ switch (metaid) {
+ case IFE_META_SKBMARK:
+ return "skbmark";
+ case IFE_META_PRIO:
+ return "skbprio";
+ case IFE_META_TCINDEX:
+ return "tcindex";
+ default:
+ return "unknown";
+ }
+}
+#endif
+
/* called when adding new meta information
* under ife->tcf_lock for existing action
*/
@@ -263,7 +279,7 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid,
if (exists)
spin_unlock_bh(&ife->tcf_lock);
rtnl_unlock();
- request_module("ifemeta%u", metaid);
+ request_module("ife-meta-%s", ife_meta_id2name(metaid));
rtnl_lock();
if (exists)
spin_lock_bh(&ife->tcf_lock);
@@ -392,10 +408,14 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind)
static void tcf_ife_cleanup(struct tc_action *a, int bind)
{
struct tcf_ife_info *ife = to_ife(a);
+ struct tcf_ife_params *p;
spin_lock_bh(&ife->tcf_lock);
_tcf_ife_cleanup(a, bind);
spin_unlock_bh(&ife->tcf_lock);
+
+ p = rcu_dereference_protected(ife->params, 1);
+ kfree_rcu(p, rcu);
}
/* under ife->tcf_lock for existing action */
@@ -432,6 +452,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct tc_action_net *tn = net_generic(net, ife_net_id);
struct nlattr *tb[TCA_IFE_MAX + 1];
struct nlattr *tb2[IFE_META_MAX + 1];
+ struct tcf_ife_params *p, *p_old;
struct tcf_ife_info *ife;
u16 ife_type = ETH_P_IFE;
struct tc_ife *parm;
@@ -450,24 +471,41 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_IFE_PARMS]);
+ /* IFE_DECODE is 0 and indicates the opposite of IFE_ENCODE because
+ * they cannot run as the same time. Check on all other values which
+ * are not supported right now.
+ */
+ if (parm->flags & ~IFE_ENCODE)
+ return -EINVAL;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
exists = tcf_idr_check(tn, parm->index, a, bind);
- if (exists && bind)
+ if (exists && bind) {
+ kfree(p);
return 0;
+ }
if (!exists) {
ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops,
- bind, false);
- if (ret)
+ bind, true);
+ if (ret) {
+ kfree(p);
return ret;
+ }
ret = ACT_P_CREATED;
} else {
tcf_idr_release(*a, bind);
- if (!ovr)
+ if (!ovr) {
+ kfree(p);
return -EEXIST;
+ }
}
ife = to_ife(*a);
- ife->flags = parm->flags;
+ p->flags = parm->flags;
if (parm->flags & IFE_ENCODE) {
if (tb[TCA_IFE_TYPE])
@@ -478,24 +516,25 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
saddr = nla_data(tb[TCA_IFE_SMAC]);
}
- if (exists)
- spin_lock_bh(&ife->tcf_lock);
ife->tcf_action = parm->action;
if (parm->flags & IFE_ENCODE) {
if (daddr)
- ether_addr_copy(ife->eth_dst, daddr);
+ ether_addr_copy(p->eth_dst, daddr);
else
- eth_zero_addr(ife->eth_dst);
+ eth_zero_addr(p->eth_dst);
if (saddr)
- ether_addr_copy(ife->eth_src, saddr);
+ ether_addr_copy(p->eth_src, saddr);
else
- eth_zero_addr(ife->eth_src);
+ eth_zero_addr(p->eth_src);
- ife->eth_type = ife_type;
+ p->eth_type = ife_type;
}
+ if (exists)
+ spin_lock_bh(&ife->tcf_lock);
+
if (ret == ACT_P_CREATED)
INIT_LIST_HEAD(&ife->metalist);
@@ -511,6 +550,7 @@ metadata_parse_err:
if (exists)
spin_unlock_bh(&ife->tcf_lock);
+ kfree(p);
return err;
}
@@ -531,6 +571,7 @@ metadata_parse_err:
if (exists)
spin_unlock_bh(&ife->tcf_lock);
+ kfree(p);
return err;
}
}
@@ -538,6 +579,11 @@ metadata_parse_err:
if (exists)
spin_unlock_bh(&ife->tcf_lock);
+ p_old = rtnl_dereference(ife->params);
+ rcu_assign_pointer(ife->params, p);
+ if (p_old)
+ kfree_rcu(p_old, rcu);
+
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
@@ -549,12 +595,13 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_ife_info *ife = to_ife(a);
+ struct tcf_ife_params *p = rtnl_dereference(ife->params);
struct tc_ife opt = {
.index = ife->tcf_index,
.refcnt = ife->tcf_refcnt - ref,
.bindcnt = ife->tcf_bindcnt - bind,
.action = ife->tcf_action,
- .flags = ife->flags,
+ .flags = p->flags,
};
struct tcf_t t;
@@ -565,17 +612,17 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD))
goto nla_put_failure;
- if (!is_zero_ether_addr(ife->eth_dst)) {
- if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, ife->eth_dst))
+ if (!is_zero_ether_addr(p->eth_dst)) {
+ if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, p->eth_dst))
goto nla_put_failure;
}
- if (!is_zero_ether_addr(ife->eth_src)) {
- if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, ife->eth_src))
+ if (!is_zero_ether_addr(p->eth_src)) {
+ if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, p->eth_src))
goto nla_put_failure;
}
- if (nla_put(skb, TCA_IFE_TYPE, 2, &ife->eth_type))
+ if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type))
goto nla_put_failure;
if (dump_metalist(skb, ife)) {
@@ -617,19 +664,15 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
u8 *tlv_data;
u16 metalen;
- spin_lock(&ife->tcf_lock);
- bstats_update(&ife->tcf_bstats, skb);
+ bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
tcf_lastuse_update(&ife->tcf_tm);
- spin_unlock(&ife->tcf_lock);
if (skb_at_tc_ingress(skb))
skb_push(skb, skb->dev->hard_header_len);
tlv_data = ife_decode(skb, &metalen);
if (unlikely(!tlv_data)) {
- spin_lock(&ife->tcf_lock);
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
return TC_ACT_SHOT;
}
@@ -647,14 +690,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a,
*/
pr_info_ratelimited("Unknown metaid %d dlen %d\n",
mtype, dlen);
- ife->tcf_qstats.overlimits++;
+ qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
}
}
if (WARN_ON(tlv_data != ifehdr_end)) {
- spin_lock(&ife->tcf_lock);
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
return TC_ACT_SHOT;
}
@@ -683,7 +724,7 @@ static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife)
}
static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
- struct tcf_result *res)
+ struct tcf_result *res, struct tcf_ife_params *p)
{
struct tcf_ife_info *ife = to_ife(a);
int action = ife->tcf_action;
@@ -706,23 +747,20 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
exceed_mtu = true;
}
- spin_lock(&ife->tcf_lock);
- bstats_update(&ife->tcf_bstats, skb);
+ bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb);
tcf_lastuse_update(&ife->tcf_tm);
if (!metalen) { /* no metadata to send */
/* abuse overlimits to count when we allow packet
* with no metadata
*/
- ife->tcf_qstats.overlimits++;
- spin_unlock(&ife->tcf_lock);
+ qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats));
return action;
}
/* could be stupid policy setup or mtu config
* so lets be conservative.. */
if ((action == TC_ACT_SHOT) || exceed_mtu) {
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
return TC_ACT_SHOT;
}
@@ -731,6 +769,8 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
ife_meta = ife_encode(skb, metalen);
+ spin_lock(&ife->tcf_lock);
+
/* XXX: we dont have a clever way of telling encode to
* not repeat some of the computations that are done by
* ops->presence_check...
@@ -742,25 +782,24 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a,
}
if (err < 0) {
/* too corrupt to keep around if overwritten */
- ife->tcf_qstats.drops++;
spin_unlock(&ife->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats));
return TC_ACT_SHOT;
}
skboff += err;
}
+ spin_unlock(&ife->tcf_lock);
oethh = (struct ethhdr *)skb->data;
- if (!is_zero_ether_addr(ife->eth_src))
- ether_addr_copy(oethh->h_source, ife->eth_src);
- if (!is_zero_ether_addr(ife->eth_dst))
- ether_addr_copy(oethh->h_dest, ife->eth_dst);
- oethh->h_proto = htons(ife->eth_type);
+ if (!is_zero_ether_addr(p->eth_src))
+ ether_addr_copy(oethh->h_source, p->eth_src);
+ if (!is_zero_ether_addr(p->eth_dst))
+ ether_addr_copy(oethh->h_dest, p->eth_dst);
+ oethh->h_proto = htons(p->eth_type);
if (skb_at_tc_ingress(skb))
skb_pull(skb, skb->dev->hard_header_len);
- spin_unlock(&ife->tcf_lock);
-
return action;
}
@@ -768,21 +807,19 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_ife_info *ife = to_ife(a);
+ struct tcf_ife_params *p;
+ int ret;
+
+ rcu_read_lock();
+ p = rcu_dereference(ife->params);
+ if (p->flags & IFE_ENCODE) {
+ ret = tcf_ife_encode(skb, a, res, p);
+ rcu_read_unlock();
+ return ret;
+ }
+ rcu_read_unlock();
- if (ife->flags & IFE_ENCODE)
- return tcf_ife_encode(skb, a, res);
-
- if (!(ife->flags & IFE_ENCODE))
- return tcf_ife_decode(skb, a, res);
-
- pr_info_ratelimited("unknown failure(policy neither de/encode\n");
- spin_lock(&ife->tcf_lock);
- bstats_update(&ife->tcf_bstats, skb);
- tcf_lastuse_update(&ife->tcf_tm);
- ife->tcf_qstats.drops++;
- spin_unlock(&ife->tcf_lock);
-
- return TC_ACT_SHOT;
+ return tcf_ife_decode(skb, a, res);
}
static int tcf_ife_walker(struct net *net, struct sk_buff *skb,
diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c
index 82892170ce4f..1e3f10e5da99 100644
--- a/net/sched/act_meta_mark.c
+++ b/net/sched/act_meta_mark.c
@@ -76,4 +76,4 @@ module_exit(ifemark_cleanup_module);
MODULE_AUTHOR("Jamal Hadi Salim(2015)");
MODULE_DESCRIPTION("Inter-FE skb mark metadata module");
MODULE_LICENSE("GPL");
-MODULE_ALIAS_IFE_META(IFE_META_SKBMARK);
+MODULE_ALIAS_IFE_META("skbmark");
diff --git a/net/sched/act_meta_skbprio.c b/net/sched/act_meta_skbprio.c
index 26bf4d86030b..4033f9fc4d4a 100644
--- a/net/sched/act_meta_skbprio.c
+++ b/net/sched/act_meta_skbprio.c
@@ -73,4 +73,4 @@ module_exit(ifeprio_cleanup_module);
MODULE_AUTHOR("Jamal Hadi Salim(2015)");
MODULE_DESCRIPTION("Inter-FE skb prio metadata action");
MODULE_LICENSE("GPL");
-MODULE_ALIAS_IFE_META(IFE_META_PRIO);
+MODULE_ALIAS_IFE_META("skbprio");
diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c
index 3b35774ce890..2ea1f26c9e96 100644
--- a/net/sched/act_meta_skbtcindex.c
+++ b/net/sched/act_meta_skbtcindex.c
@@ -76,4 +76,4 @@ module_exit(ifetc_index_cleanup_module);
MODULE_AUTHOR("Jamal Hadi Salim(2016)");
MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module");
MODULE_LICENSE("GPL");
-MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX);
+MODULE_ALIAS_IFE_META("tcindex");
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 416627c66f08..8b3e59388480 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -140,6 +140,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
m->tcfm_eaction = parm->eaction;
if (dev != NULL) {
m->tcfm_ifindex = parm->ifindex;
+ m->net = net;
if (ret != ACT_P_CREATED)
dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
dev_hold(dev);
@@ -313,15 +314,11 @@ static struct notifier_block mirred_device_notifier = {
.notifier_call = mirred_device_event,
};
-static int tcf_mirred_device(const struct tc_action *a, struct net *net,
- struct net_device **mirred_dev)
+static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
{
- int ifindex = tcf_mirred_ifindex(a);
+ struct tcf_mirred *m = to_mirred(a);
- *mirred_dev = __dev_get_by_index(net, ifindex);
- if (!*mirred_dev)
- return -EINVAL;
- return 0;
+ return __dev_get_by_index(m->net, m->tcfm_ifindex);
}
static struct tc_action_ops act_mirred_ops = {
@@ -336,7 +333,7 @@ static struct tc_action_ops act_mirred_ops = {
.walk = tcf_mirred_walker,
.lookup = tcf_mirred_search,
.size = sizeof(struct tcf_mirred),
- .get_dev = tcf_mirred_device,
+ .get_dev = tcf_mirred_get_dev,
};
static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 0b2219adf520..cdfdc24b89cf 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -240,8 +240,36 @@ tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain,
chain->p_filter_chain = p_filter_chain;
}
-int tcf_block_get(struct tcf_block **p_block,
- struct tcf_proto __rcu **p_filter_chain)
+static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei,
+ enum tc_block_command command)
+{
+ struct net_device *dev = q->dev_queue->dev;
+ struct tc_block_offload bo = {};
+
+ if (!tc_can_offload(dev))
+ return;
+ bo.command = command;
+ bo.binder_type = ei->binder_type;
+ bo.block = block;
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+}
+
+static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
+{
+ tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
+}
+
+static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
+{
+ tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
+}
+
+int tcf_block_get_ext(struct tcf_block **p_block,
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
{
struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
struct tcf_chain *chain;
@@ -250,6 +278,8 @@ int tcf_block_get(struct tcf_block **p_block,
if (!block)
return -ENOMEM;
INIT_LIST_HEAD(&block->chain_list);
+ INIT_LIST_HEAD(&block->cb_list);
+
/* Create chain 0 by default, it has to be always present. */
chain = tcf_chain_create(block, 0);
if (!chain) {
@@ -257,6 +287,9 @@ int tcf_block_get(struct tcf_block **p_block,
goto err_chain_create;
}
tcf_chain_filter_chain_ptr_set(chain, p_filter_chain);
+ block->net = qdisc_net(q);
+ block->q = q;
+ tcf_block_offload_bind(block, q, ei);
*p_block = block;
return 0;
@@ -264,15 +297,28 @@ err_chain_create:
kfree(block);
return err;
}
+EXPORT_SYMBOL(tcf_block_get_ext);
+
+int tcf_block_get(struct tcf_block **p_block,
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q)
+{
+ struct tcf_block_ext_info ei = {0, };
+
+ return tcf_block_get_ext(p_block, p_filter_chain, q, &ei);
+}
EXPORT_SYMBOL(tcf_block_get);
-void tcf_block_put(struct tcf_block *block)
+void tcf_block_put_ext(struct tcf_block *block,
+ struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+ struct tcf_block_ext_info *ei)
{
struct tcf_chain *chain, *tmp;
if (!block)
return;
+ tcf_block_offload_unbind(block, q, ei);
+
/* XXX: Standalone actions are not allowed to jump to any chain, and
* bound actions should be all removed after flushing. However,
* filters are destroyed in RCU callbacks, we have to hold the chains
@@ -300,8 +346,119 @@ void tcf_block_put(struct tcf_block *block)
tcf_chain_put(chain);
kfree(block);
}
+EXPORT_SYMBOL(tcf_block_put_ext);
+
+void tcf_block_put(struct tcf_block *block)
+{
+ struct tcf_block_ext_info ei = {0, };
+
+ tcf_block_put_ext(block, NULL, block->q, &ei);
+}
EXPORT_SYMBOL(tcf_block_put);
+struct tcf_block_cb {
+ struct list_head list;
+ tc_setup_cb_t *cb;
+ void *cb_ident;
+ void *cb_priv;
+ unsigned int refcnt;
+};
+
+void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
+{
+ return block_cb->cb_priv;
+}
+EXPORT_SYMBOL(tcf_block_cb_priv);
+
+struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident)
+{ struct tcf_block_cb *block_cb;
+
+ list_for_each_entry(block_cb, &block->cb_list, list)
+ if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
+ return block_cb;
+ return NULL;
+}
+EXPORT_SYMBOL(tcf_block_cb_lookup);
+
+void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
+{
+ block_cb->refcnt++;
+}
+EXPORT_SYMBOL(tcf_block_cb_incref);
+
+unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
+{
+ return --block_cb->refcnt;
+}
+EXPORT_SYMBOL(tcf_block_cb_decref);
+
+struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv)
+{
+ struct tcf_block_cb *block_cb;
+
+ block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
+ if (!block_cb)
+ return NULL;
+ block_cb->cb = cb;
+ block_cb->cb_ident = cb_ident;
+ block_cb->cb_priv = cb_priv;
+ list_add(&block_cb->list, &block->cb_list);
+ return block_cb;
+}
+EXPORT_SYMBOL(__tcf_block_cb_register);
+
+int tcf_block_cb_register(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident,
+ void *cb_priv)
+{
+ struct tcf_block_cb *block_cb;
+
+ block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
+ return block_cb ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(tcf_block_cb_register);
+
+void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
+{
+ list_del(&block_cb->list);
+ kfree(block_cb);
+}
+EXPORT_SYMBOL(__tcf_block_cb_unregister);
+
+void tcf_block_cb_unregister(struct tcf_block *block,
+ tc_setup_cb_t *cb, void *cb_ident)
+{
+ struct tcf_block_cb *block_cb;
+
+ block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
+ if (!block_cb)
+ return;
+ __tcf_block_cb_unregister(block_cb);
+}
+EXPORT_SYMBOL(tcf_block_cb_unregister);
+
+static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
+ void *type_data, bool err_stop)
+{
+ struct tcf_block_cb *block_cb;
+ int ok_count = 0;
+ int err;
+
+ list_for_each_entry(block_cb, &block->cb_list, list) {
+ err = block_cb->cb(type, type_data, block_cb->cb_priv);
+ if (err) {
+ if (err_stop)
+ return err;
+ } else {
+ ok_count++;
+ }
+ }
+ return ok_count;
+}
+
/* Main classifier routine: scans classifier chain attached
* to this qdisc, (optionally) tests for protocol and asks
* specific classifiers.
@@ -418,8 +575,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
}
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
- struct tcf_proto *tp, void *fh, u32 portid,
- u32 seq, u16 flags, int event)
+ struct tcf_proto *tp, struct Qdisc *q, u32 parent,
+ void *fh, u32 portid, u32 seq, u16 flags, int event)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -432,8 +589,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
tcm->tcm_family = AF_UNSPEC;
tcm->tcm__pad1 = 0;
tcm->tcm__pad2 = 0;
- tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex;
- tcm->tcm_parent = tp->classid;
+ tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
+ tcm->tcm_parent = parent;
tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
goto nla_put_failure;
@@ -456,6 +613,7 @@ nla_put_failure:
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
+ struct Qdisc *q, u32 parent,
void *fh, int event, bool unicast)
{
struct sk_buff *skb;
@@ -465,7 +623,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
+ if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
n->nlmsg_flags, event) <= 0) {
kfree_skb(skb);
return -EINVAL;
@@ -480,6 +638,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
+ struct Qdisc *q, u32 parent,
void *fh, bool unicast, bool *last)
{
struct sk_buff *skb;
@@ -490,7 +649,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq,
+ if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
kfree_skb(skb);
return -EINVAL;
@@ -510,6 +669,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
}
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
+ struct Qdisc *q, u32 parent,
struct nlmsghdr *n,
struct tcf_chain *chain, int event)
{
@@ -517,7 +677,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
for (tp = rtnl_dereference(chain->filter_chain);
tp; tp = rtnl_dereference(tp->next))
- tfilter_notify(net, oskb, n, tp, 0, event, false);
+ tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false);
}
/* Add/change/delete/get a filter node */
@@ -636,7 +796,8 @@ replay:
}
if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
- tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER);
+ tfilter_notify_chain(net, skb, q, parent, n,
+ chain, RTM_DELTFILTER);
tcf_chain_flush(chain);
err = 0;
goto errout;
@@ -683,7 +844,7 @@ replay:
if (!fh) {
if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
tcf_chain_tp_remove(chain, &chain_info, tp);
- tfilter_notify(net, skb, n, tp, fh,
+ tfilter_notify(net, skb, n, tp, q, parent, fh,
RTM_DELTFILTER, false);
tcf_proto_destroy(tp);
err = 0;
@@ -708,8 +869,8 @@ replay:
}
break;
case RTM_DELTFILTER:
- err = tfilter_del_notify(net, skb, n, tp, fh, false,
- &last);
+ err = tfilter_del_notify(net, skb, n, tp, q, parent,
+ fh, false, &last);
if (err)
goto errout;
if (last) {
@@ -718,7 +879,7 @@ replay:
}
goto errout;
case RTM_GETTFILTER:
- err = tfilter_notify(net, skb, n, tp, fh,
+ err = tfilter_notify(net, skb, n, tp, q, parent, fh,
RTM_NEWTFILTER, true);
goto errout;
default:
@@ -732,7 +893,8 @@ replay:
if (err == 0) {
if (tp_created)
tcf_chain_tp_insert(chain, &chain_info, tp);
- tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);
+ tfilter_notify(net, skb, n, tp, q, parent, fh,
+ RTM_NEWTFILTER, false);
} else {
if (tp_created)
tcf_proto_destroy(tp);
@@ -751,6 +913,8 @@ struct tcf_dump_args {
struct tcf_walker w;
struct sk_buff *skb;
struct netlink_callback *cb;
+ struct Qdisc *q;
+ u32 parent;
};
static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
@@ -758,13 +922,14 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
struct tcf_dump_args *a = (void *)arg;
struct net *net = sock_net(a->skb->sk);
- return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid,
+ return tcf_fill_node(net, a->skb, tp, a->q, a->parent,
+ n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER);
}
-static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
- struct netlink_callback *cb,
+static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
+ struct sk_buff *skb, struct netlink_callback *cb,
long index_start, long *p_index)
{
struct net *net = sock_net(skb->sk);
@@ -786,7 +951,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
if (cb->args[1] == 0) {
- if (tcf_fill_node(net, skb, tp, 0,
+ if (tcf_fill_node(net, skb, tp, q, parent, 0,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER) <= 0)
@@ -799,6 +964,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb,
arg.w.fn = tcf_node_dump;
arg.skb = skb;
arg.cb = cb;
+ arg.q = q;
+ arg.parent = parent;
arg.w.stop = 0;
arg.w.skip = cb->args[1] - 1;
arg.w.count = 0;
@@ -824,6 +991,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
const struct Qdisc_class_ops *cops;
long index_start;
long index;
+ u32 parent;
int err;
if (nlmsg_len(cb->nlh) < sizeof(*tcm))
@@ -837,10 +1005,13 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if (!dev)
return skb->len;
- if (!tcm->tcm_parent)
+ parent = tcm->tcm_parent;
+ if (!parent) {
q = dev->qdisc;
- else
+ parent = q->handle;
+ } else {
q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
+ }
if (!q)
goto out;
cops = q->ops->cl_ops;
@@ -864,7 +1035,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
if (tca[TCA_CHAIN] &&
nla_get_u32(tca[TCA_CHAIN]) != chain->index)
continue;
- if (!tcf_chain_dump(chain, skb, cb, index_start, &index))
+ if (!tcf_chain_dump(chain, q, parent, skb, cb,
+ index_start, &index))
break;
}
@@ -1004,29 +1176,57 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
}
EXPORT_SYMBOL(tcf_exts_dump_stats);
-int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts,
- struct net_device **hw_dev)
+static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
+ enum tc_setup_type type,
+ void *type_data, bool err_stop)
{
+ int ok_count = 0;
#ifdef CONFIG_NET_CLS_ACT
const struct tc_action *a;
+ struct net_device *dev;
LIST_HEAD(actions);
+ int ret;
if (!tcf_exts_has_actions(exts))
- return -EINVAL;
+ return 0;
tcf_exts_to_list(exts, &actions);
list_for_each_entry(a, &actions, list) {
- if (a->ops->get_dev) {
- a->ops->get_dev(a, dev_net(dev), hw_dev);
- break;
- }
+ if (!a->ops->get_dev)
+ continue;
+ dev = a->ops->get_dev(a);
+ if (!dev || !tc_can_offload(dev))
+ continue;
+ ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
+ if (ret < 0)
+ return ret;
+ ok_count += ret;
}
- if (*hw_dev)
- return 0;
#endif
- return -EOPNOTSUPP;
+ return ok_count;
+}
+
+int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
+ enum tc_setup_type type, void *type_data, bool err_stop)
+{
+ int ok_count;
+ int ret;
+
+ ret = tcf_block_cb_call(block, type, type_data, err_stop);
+ if (ret < 0)
+ return ret;
+ ok_count = ret;
+
+ if (!exts)
+ return ok_count;
+ ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
+ if (ret < 0)
+ return ret;
+ ok_count += ret;
+
+ return ok_count;
}
-EXPORT_SYMBOL(tcf_exts_get_dev);
+EXPORT_SYMBOL(tc_setup_cb_call);
static int __init tc_filter_init(void)
{
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index d89ebafd2239..700b345b07f9 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -17,13 +17,14 @@
#include <linux/errno.h>
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
+#include <linux/idr.h>
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
struct basic_head {
- u32 hgenerator;
struct list_head flist;
+ struct idr handle_idr;
struct rcu_head rcu;
};
@@ -78,6 +79,7 @@ static int basic_init(struct tcf_proto *tp)
if (head == NULL)
return -ENOBUFS;
INIT_LIST_HEAD(&head->flist);
+ idr_init(&head->handle_idr);
rcu_assign_pointer(tp->root, head);
return 0;
}
@@ -99,8 +101,10 @@ static void basic_destroy(struct tcf_proto *tp)
list_for_each_entry_safe(f, n, &head->flist, link) {
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
+ idr_remove_ext(&head->handle_idr, f->handle);
call_rcu(&f->rcu, basic_delete_filter);
}
+ idr_destroy(&head->handle_idr);
kfree_rcu(head, rcu);
}
@@ -111,6 +115,7 @@ static int basic_delete(struct tcf_proto *tp, void *arg, bool *last)
list_del_rcu(&f->link);
tcf_unbind_filter(tp, &f->res);
+ idr_remove_ext(&head->handle_idr, f->handle);
call_rcu(&f->rcu, basic_delete_filter);
*last = list_empty(&head->flist);
return 0;
@@ -154,6 +159,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
struct nlattr *tb[TCA_BASIC_MAX + 1];
struct basic_filter *fold = (struct basic_filter *) *arg;
struct basic_filter *fnew;
+ unsigned long idr_index;
if (tca[TCA_OPTIONS] == NULL)
return -EINVAL;
@@ -176,33 +182,33 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout;
- err = -EINVAL;
if (handle) {
fnew->handle = handle;
- } else if (fold) {
- fnew->handle = fold->handle;
+ if (!fold) {
+ err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+ handle, handle + 1, GFP_KERNEL);
+ if (err)
+ goto errout;
+ }
} else {
- unsigned int i = 0x80000000;
- do {
- if (++head->hgenerator == 0x7FFFFFFF)
- head->hgenerator = 1;
- } while (--i > 0 && basic_get(tp, head->hgenerator));
-
- if (i <= 0) {
- pr_err("Insufficient number of handles\n");
+ err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index,
+ 1, 0x7FFFFFFF, GFP_KERNEL);
+ if (err)
goto errout;
- }
-
- fnew->handle = head->hgenerator;
+ fnew->handle = idr_index;
}
err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr);
- if (err < 0)
+ if (err < 0) {
+ if (!fold)
+ idr_remove_ext(&head->handle_idr, fnew->handle);
goto errout;
+ }
*arg = fnew;
if (fold) {
+ idr_replace_ext(&head->handle_idr, fnew, fnew->handle);
list_replace_rcu(&fold->link, &fnew->link);
tcf_unbind_filter(tp, &fold->res);
call_rcu(&fold->rcu, basic_delete_filter);
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 520c5027646a..0f8b51061c39 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -17,6 +17,7 @@
#include <linux/skbuff.h>
#include <linux/filter.h>
#include <linux/bpf.h>
+#include <linux/idr.h>
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
@@ -32,7 +33,7 @@ MODULE_DESCRIPTION("TC BPF based classifier");
struct cls_bpf_head {
struct list_head plist;
- u32 hgen;
+ struct idr handle_idr;
struct rcu_head rcu;
};
@@ -99,11 +100,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
} else if (at_ingress) {
/* It is safe to push/pull even if skb_shared() */
__skb_push(skb, skb->mac_len);
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
filter_res = BPF_PROG_RUN(prog->filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
- bpf_compute_data_end(skb);
+ bpf_compute_data_pointers(skb);
filter_res = BPF_PROG_RUN(prog->filter, skb);
}
@@ -146,7 +147,9 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
enum tc_clsbpf_command cmd)
{
- struct net_device *dev = tp->q->dev_queue->dev;
+ bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE;
+ struct tcf_block *block = tp->chain->block;
+ bool skip_sw = tc_skip_sw(prog->gen_flags);
struct tc_cls_bpf_offload cls_bpf = {};
int err;
@@ -158,17 +161,25 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog,
cls_bpf.exts_integrated = prog->exts_integrated;
cls_bpf.gen_flags = prog->gen_flags;
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSBPF, &cls_bpf);
- if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE))
- prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
+ err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw);
+ if (addorrep) {
+ if (err < 0) {
+ cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY);
+ return err;
+ } else if (err > 0) {
+ prog->gen_flags |= TCA_CLS_FLAGS_IN_HW;
+ }
+ }
+
+ if (addorrep && skip_sw && !(prog->gen_flags && TCA_CLS_FLAGS_IN_HW))
+ return -EINVAL;
- return err;
+ return 0;
}
static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
struct cls_bpf_prog *oldprog)
{
- struct net_device *dev = tp->q->dev_queue->dev;
struct cls_bpf_prog *obj = prog;
enum tc_clsbpf_command cmd;
bool skip_sw;
@@ -178,7 +189,7 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
(oldprog && tc_skip_sw(oldprog->gen_flags));
if (oldprog && oldprog->offloaded) {
- if (tc_should_offload(dev, prog->gen_flags)) {
+ if (!tc_skip_hw(prog->gen_flags)) {
cmd = TC_CLSBPF_REPLACE;
} else if (!tc_skip_sw(prog->gen_flags)) {
obj = oldprog;
@@ -187,14 +198,14 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog,
return -EINVAL;
}
} else {
- if (!tc_should_offload(dev, prog->gen_flags))
+ if (tc_skip_hw(prog->gen_flags))
return skip_sw ? -EINVAL : 0;
cmd = TC_CLSBPF_ADD;
}
ret = cls_bpf_offload_cmd(tp, obj, cmd);
if (ret)
- return skip_sw ? ret : 0;
+ return ret;
obj->offloaded = true;
if (oldprog)
@@ -238,6 +249,7 @@ static int cls_bpf_init(struct tcf_proto *tp)
return -ENOBUFS;
INIT_LIST_HEAD_RCU(&head->plist);
+ idr_init(&head->handle_idr);
rcu_assign_pointer(tp->root, head);
return 0;
@@ -264,6 +276,9 @@ static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu)
static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog)
{
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
+
+ idr_remove_ext(&head->handle_idr, prog->handle);
cls_bpf_stop_offload(tp, prog);
list_del_rcu(&prog->link);
tcf_unbind_filter(tp, &prog->res);
@@ -287,6 +302,7 @@ static void cls_bpf_destroy(struct tcf_proto *tp)
list_for_each_entry_safe(prog, tmp, &head->plist, link)
__cls_bpf_delete(tp, prog);
+ idr_destroy(&head->handle_idr);
kfree_rcu(head, rcu);
}
@@ -421,27 +437,6 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
return 0;
}
-static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
- struct cls_bpf_head *head)
-{
- unsigned int i = 0x80000000;
- u32 handle;
-
- do {
- if (++head->hgen == 0x7FFFFFFF)
- head->hgen = 1;
- } while (--i > 0 && cls_bpf_get(tp, head->hgen));
-
- if (unlikely(i == 0)) {
- pr_err("Insufficient number of handles\n");
- handle = 0;
- } else {
- handle = head->hgen;
- }
-
- return handle;
-}
-
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
@@ -451,6 +446,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct cls_bpf_prog *oldprog = *arg;
struct nlattr *tb[TCA_BPF_MAX + 1];
struct cls_bpf_prog *prog;
+ unsigned long idr_index;
int ret;
if (tca[TCA_OPTIONS] == NULL)
@@ -476,21 +472,30 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
}
}
- if (handle == 0)
- prog->handle = cls_bpf_grab_new_handle(tp, head);
- else
+ if (handle == 0) {
+ ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index,
+ 1, 0x7FFFFFFF, GFP_KERNEL);
+ if (ret)
+ goto errout;
+ prog->handle = idr_index;
+ } else {
+ if (!oldprog) {
+ ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index,
+ handle, handle + 1, GFP_KERNEL);
+ if (ret)
+ goto errout;
+ }
prog->handle = handle;
- if (prog->handle == 0) {
- ret = -EINVAL;
- goto errout;
}
ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr);
if (ret < 0)
- goto errout;
+ goto errout_idr;
ret = cls_bpf_offload(tp, prog, oldprog);
if (ret) {
+ if (!oldprog)
+ idr_remove_ext(&head->handle_idr, prog->handle);
__cls_bpf_delete_prog(prog);
return ret;
}
@@ -499,6 +504,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;
if (oldprog) {
+ idr_replace_ext(&head->handle_idr, prog, handle);
list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu);
@@ -509,6 +515,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
*arg = prog;
return 0;
+errout_idr:
+ if (!oldprog)
+ idr_remove_ext(&head->handle_idr, prog->handle);
errout:
tcf_exts_destroy(&prog->exts);
kfree(prog);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 2a3a60ec5b86..6b29cef90bec 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -345,9 +345,9 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return -1;
}
-static void flow_perturbation(unsigned long arg)
+static void flow_perturbation(struct timer_list *t)
{
- struct flow_filter *f = (struct flow_filter *)arg;
+ struct flow_filter *f = from_timer(f, t, perturb_timer);
get_random_bytes(&f->hashrnd, 4);
if (f->perturb_period)
@@ -491,8 +491,11 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ;
}
- if (TC_H_MAJ(baseclass) == 0)
- baseclass = TC_H_MAKE(tp->q->handle, baseclass);
+ if (TC_H_MAJ(baseclass) == 0) {
+ struct Qdisc *q = tcf_block_q(tp->chain->block);
+
+ baseclass = TC_H_MAKE(q->handle, baseclass);
+ }
if (TC_H_MIN(baseclass) == 0)
baseclass = TC_H_MAKE(baseclass, 1);
@@ -502,8 +505,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
get_random_bytes(&fnew->hashrnd, 4);
}
- setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation,
- (unsigned long)fnew);
+ timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE);
netif_keep_dst(qdisc_dev(tp->q));
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index b480d7c792ba..16f58abaa697 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -88,7 +88,6 @@ struct cls_fl_filter {
u32 handle;
u32 flags;
struct rcu_head rcu;
- struct net_device *hw_dev;
};
static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
@@ -152,37 +151,12 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct cls_fl_filter *f;
struct fl_flow_key skb_key;
struct fl_flow_key skb_mkey;
- struct ip_tunnel_info *info;
if (!atomic_read(&head->ht.nelems))
return -1;
fl_clear_masked_range(&skb_key, &head->mask);
- info = skb_tunnel_info(skb);
- if (info) {
- struct ip_tunnel_key *key = &info->key;
-
- switch (ip_tunnel_info_af(info)) {
- case AF_INET:
- skb_key.enc_control.addr_type =
- FLOW_DISSECTOR_KEY_IPV4_ADDRS;
- skb_key.enc_ipv4.src = key->u.ipv4.src;
- skb_key.enc_ipv4.dst = key->u.ipv4.dst;
- break;
- case AF_INET6:
- skb_key.enc_control.addr_type =
- FLOW_DISSECTOR_KEY_IPV6_ADDRS;
- skb_key.enc_ipv6.src = key->u.ipv6.src;
- skb_key.enc_ipv6.dst = key->u.ipv6.dst;
- break;
- }
-
- skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id);
- skb_key.enc_tp.src = key->tp_src;
- skb_key.enc_tp.dst = key->tp_dst;
- }
-
skb_key.indev_ifindex = skb->skb_iif;
/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
* so do it rather here.
@@ -226,17 +200,14 @@ static void fl_destroy_filter(struct rcu_head *head)
static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f)
{
struct tc_cls_flower_offload cls_flower = {};
- struct net_device *dev = f->hw_dev;
-
- if (!tc_can_offload(dev))
- return;
+ struct tcf_block *block = tp->chain->block;
tc_cls_common_offload_init(&cls_flower.common, tp);
cls_flower.command = TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long) f;
- cls_flower.egress_dev = f->hw_dev != tp->q->dev_queue->dev;
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, &cls_flower);
+ tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
+ &cls_flower, false);
}
static int fl_hw_replace_filter(struct tcf_proto *tp,
@@ -244,22 +215,11 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
struct fl_flow_key *mask,
struct cls_fl_filter *f)
{
- struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_flower_offload cls_flower = {};
+ struct tcf_block *block = tp->chain->block;
+ bool skip_sw = tc_skip_sw(f->flags);
int err;
- if (!tc_can_offload(dev)) {
- if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) ||
- (f->hw_dev && !tc_can_offload(f->hw_dev))) {
- f->hw_dev = dev;
- return tc_skip_sw(f->flags) ? -EINVAL : 0;
- }
- dev = f->hw_dev;
- cls_flower.egress_dev = true;
- } else {
- f->hw_dev = dev;
- }
-
tc_cls_common_offload_init(&cls_flower.common, tp);
cls_flower.command = TC_CLSFLOWER_REPLACE;
cls_flower.cookie = (unsigned long) f;
@@ -268,32 +228,33 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
cls_flower.key = &f->mkey;
cls_flower.exts = &f->exts;
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
- &cls_flower);
- if (!err)
+ err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
+ &cls_flower, skip_sw);
+ if (err < 0) {
+ fl_hw_destroy_filter(tp, f);
+ return err;
+ } else if (err > 0) {
f->flags |= TCA_CLS_FLAGS_IN_HW;
+ }
+
+ if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW))
+ return -EINVAL;
- if (tc_skip_sw(f->flags))
- return err;
return 0;
}
static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
{
struct tc_cls_flower_offload cls_flower = {};
- struct net_device *dev = f->hw_dev;
-
- if (!tc_can_offload(dev))
- return;
+ struct tcf_block *block = tp->chain->block;
tc_cls_common_offload_init(&cls_flower.common, tp);
cls_flower.command = TC_CLSFLOWER_STATS;
cls_flower.cookie = (unsigned long) f;
cls_flower.exts = &f->exts;
- cls_flower.egress_dev = f->hw_dev != tp->q->dev_queue->dev;
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER,
- &cls_flower);
+ tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER,
+ &cls_flower, false);
}
static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f)
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 941245ad07fd..aa1e1f3bd20c 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -28,6 +28,7 @@
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
+#include <net/sch_generic.h>
#define HTSIZE 256
@@ -83,9 +84,11 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
}
}
} else {
+ struct Qdisc *q = tcf_block_q(tp->chain->block);
+
/* Old method: classify the packet using its skb mark. */
if (id && (TC_H_MAJ(id) == 0 ||
- !(TC_H_MAJ(id ^ tp->q->handle)))) {
+ !(TC_H_MAJ(id ^ q->handle)))) {
res->classid = id;
res->class = 0;
return 0;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index eeac606c95ab..70e78d74f6d3 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -50,12 +50,27 @@ static void mall_destroy_rcu(struct rcu_head *rcu)
kfree(head);
}
+static void mall_destroy_hw_filter(struct tcf_proto *tp,
+ struct cls_mall_head *head,
+ unsigned long cookie)
+{
+ struct tc_cls_matchall_offload cls_mall = {};
+ struct tcf_block *block = tp->chain->block;
+
+ tc_cls_common_offload_init(&cls_mall.common, tp);
+ cls_mall.command = TC_CLSMATCHALL_DESTROY;
+ cls_mall.cookie = cookie;
+
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false);
+}
+
static int mall_replace_hw_filter(struct tcf_proto *tp,
struct cls_mall_head *head,
unsigned long cookie)
{
- struct net_device *dev = tp->q->dev_queue->dev;
struct tc_cls_matchall_offload cls_mall = {};
+ struct tcf_block *block = tp->chain->block;
+ bool skip_sw = tc_skip_sw(head->flags);
int err;
tc_cls_common_offload_init(&cls_mall.common, tp);
@@ -63,37 +78,29 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
cls_mall.exts = &head->exts;
cls_mall.cookie = cookie;
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL,
- &cls_mall);
- if (!err)
+ err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL,
+ &cls_mall, skip_sw);
+ if (err < 0) {
+ mall_destroy_hw_filter(tp, head, cookie);
+ return err;
+ } else if (err > 0) {
head->flags |= TCA_CLS_FLAGS_IN_HW;
+ }
- return err;
-}
-
-static void mall_destroy_hw_filter(struct tcf_proto *tp,
- struct cls_mall_head *head,
- unsigned long cookie)
-{
- struct net_device *dev = tp->q->dev_queue->dev;
- struct tc_cls_matchall_offload cls_mall = {};
-
- tc_cls_common_offload_init(&cls_mall.common, tp);
- cls_mall.command = TC_CLSMATCHALL_DESTROY;
- cls_mall.cookie = cookie;
+ if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW))
+ return -EINVAL;
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSMATCHALL, &cls_mall);
+ return 0;
}
static void mall_destroy(struct tcf_proto *tp)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct net_device *dev = tp->q->dev_queue->dev;
if (!head)
return;
- if (tc_should_offload(dev, head->flags))
+ if (!tc_skip_hw(head->flags))
mall_destroy_hw_filter(tp, head, (unsigned long) head);
call_rcu(&head->rcu, mall_destroy_rcu);
@@ -133,7 +140,6 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
void **arg, bool ovr)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
- struct net_device *dev = tp->q->dev_queue->dev;
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
struct cls_mall_head *new;
u32 flags = 0;
@@ -173,14 +179,10 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
if (err)
goto err_set_parms;
- if (tc_should_offload(dev, flags)) {
+ if (!tc_skip_hw(new->flags)) {
err = mall_replace_hw_filter(tp, new, (unsigned long) new);
- if (err) {
- if (tc_skip_sw(flags))
- goto err_replace_hw_filter;
- else
- err = 0;
- }
+ if (err)
+ goto err_replace_hw_filter;
}
if (!tc_in_hw(new->flags))
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 14a7e08b2fa9..d732b5474a4d 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -13,6 +13,7 @@
#include <net/act_api.h>
#include <net/netlink.h>
#include <net/pkt_cls.h>
+#include <net/sch_generic.h>
/*
* Passing parameters to the root seems to be done more awkwardly than really
@@ -90,9 +91,11 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
f = tcindex_lookup(p, key);
if (!f) {
+ struct Qdisc *q = tcf_block_q(tp->chain->block);
+
if (!p->fall_through)
return -1;
- res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key);
+ res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key);
res->class = 0;
pr_debug("alg 0x%x\n", res->classid);
return 0;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 10b8d851fc6b..9ff17159fb61 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -46,6 +46,7 @@
#include <net/act_api.h>
#include <net/pkt_cls.h>
#include <linux/netdevice.h>
+#include <linux/idr.h>
struct tc_u_knode {
struct tc_u_knode __rcu *next;
@@ -82,6 +83,7 @@ struct tc_u_hnode {
struct tc_u_common *tp_c;
int refcnt;
unsigned int divisor;
+ struct idr handle_idr;
struct rcu_head rcu;
/* The 'ht' field MUST be the last field in structure to allow for
* more entries allocated at end of structure.
@@ -91,9 +93,9 @@ struct tc_u_hnode {
struct tc_u_common {
struct tc_u_hnode __rcu *hlist;
- struct Qdisc *q;
+ struct tcf_block *block;
int refcnt;
- u32 hgenerator;
+ struct idr handle_idr;
struct hlist_node hnode;
struct rcu_head rcu;
};
@@ -311,19 +313,19 @@ static void *u32_get(struct tcf_proto *tp, u32 handle)
return u32_lookup_key(ht, handle);
}
-static u32 gen_new_htid(struct tc_u_common *tp_c)
+static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
{
- int i = 0x800;
+ unsigned long idr_index;
+ int err;
- /* hgenerator only used inside rtnl lock it is safe to increment
+ /* This is only used inside rtnl lock it is safe to increment
* without read _copy_ update semantics
*/
- do {
- if (++tp_c->hgenerator == 0x7FF)
- tp_c->hgenerator = 1;
- } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
-
- return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
+ err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index,
+ 1, 0x7FF, GFP_KERNEL);
+ if (err)
+ return 0;
+ return (u32)(idr_index | 0x800) << 20;
}
static struct hlist_head *tc_u_common_hash;
@@ -333,11 +335,7 @@ static struct hlist_head *tc_u_common_hash;
static unsigned int tc_u_hash(const struct tcf_proto *tp)
{
- struct net_device *dev = tp->q->dev_queue->dev;
- u32 qhandle = tp->q->handle;
- int ifindex = dev->ifindex;
-
- return hash_64((u64)ifindex << 32 | qhandle, U32_HASH_SHIFT);
+ return hash_ptr(tp->chain->block, U32_HASH_SHIFT);
}
static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
@@ -347,7 +345,7 @@ static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp)
h = tc_u_hash(tp);
hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) {
- if (tc->q == tp->q)
+ if (tc->block == tp->chain->block)
return tc;
}
return NULL;
@@ -366,8 +364,9 @@ static int u32_init(struct tcf_proto *tp)
return -ENOBUFS;
root_ht->refcnt++;
- root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000;
+ root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
root_ht->prio = tp->prio;
+ idr_init(&root_ht->handle_idr);
if (tp_c == NULL) {
tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL);
@@ -375,8 +374,9 @@ static int u32_init(struct tcf_proto *tp)
kfree(root_ht);
return -ENOBUFS;
}
- tp_c->q = tp->q;
+ tp_c->block = tp->chain->block;
INIT_HLIST_NODE(&tp_c->hnode);
+ idr_init(&tp_c->handle_idr);
h = tc_u_hash(tp);
hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]);
@@ -462,71 +462,69 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key)
return 0;
}
-static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
+static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
{
- struct net_device *dev = tp->q->dev_queue->dev;
+ struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
- if (!tc_should_offload(dev, 0))
- return;
-
tc_cls_common_offload_init(&cls_u32.common, tp);
- cls_u32.command = TC_CLSU32_DELETE_KNODE;
- cls_u32.knode.handle = handle;
+ cls_u32.command = TC_CLSU32_DELETE_HNODE;
+ cls_u32.hnode.divisor = h->divisor;
+ cls_u32.hnode.handle = h->handle;
+ cls_u32.hnode.prio = h->prio;
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
}
static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h,
u32 flags)
{
- struct net_device *dev = tp->q->dev_queue->dev;
+ struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
+ bool skip_sw = tc_skip_sw(flags);
+ bool offloaded = false;
int err;
- if (!tc_should_offload(dev, flags))
- return tc_skip_sw(flags) ? -EINVAL : 0;
-
tc_cls_common_offload_init(&cls_u32.common, tp);
cls_u32.command = TC_CLSU32_NEW_HNODE;
cls_u32.hnode.divisor = h->divisor;
cls_u32.hnode.handle = h->handle;
cls_u32.hnode.prio = h->prio;
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
- if (tc_skip_sw(flags))
+ err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+ if (err < 0) {
+ u32_clear_hw_hnode(tp, h);
return err;
+ } else if (err > 0) {
+ offloaded = true;
+ }
+
+ if (skip_sw && !offloaded)
+ return -EINVAL;
return 0;
}
-static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h)
+static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle)
{
- struct net_device *dev = tp->q->dev_queue->dev;
+ struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
- if (!tc_should_offload(dev, 0))
- return;
-
tc_cls_common_offload_init(&cls_u32.common, tp);
- cls_u32.command = TC_CLSU32_DELETE_HNODE;
- cls_u32.hnode.divisor = h->divisor;
- cls_u32.hnode.handle = h->handle;
- cls_u32.hnode.prio = h->prio;
+ cls_u32.command = TC_CLSU32_DELETE_KNODE;
+ cls_u32.knode.handle = handle;
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
+ tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false);
}
static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
u32 flags)
{
- struct net_device *dev = tp->q->dev_queue->dev;
+ struct tcf_block *block = tp->chain->block;
struct tc_cls_u32_offload cls_u32 = {};
+ bool skip_sw = tc_skip_sw(flags);
int err;
- if (!tc_should_offload(dev, flags))
- return tc_skip_sw(flags) ? -EINVAL : 0;
-
tc_cls_common_offload_init(&cls_u32.common, tp);
cls_u32.command = TC_CLSU32_REPLACE_KNODE;
cls_u32.knode.handle = n->handle;
@@ -543,13 +541,16 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n,
if (n->ht_down)
cls_u32.knode.link_handle = n->ht_down->handle;
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSU32, &cls_u32);
-
- if (!err)
+ err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw);
+ if (err < 0) {
+ u32_remove_hw_knode(tp, n->handle);
+ return err;
+ } else if (err > 0) {
n->flags |= TCA_CLS_FLAGS_IN_HW;
+ }
- if (tc_skip_sw(flags))
- return err;
+ if (skip_sw && !(n->flags && TCA_CLS_FLAGS_IN_HW))
+ return -EINVAL;
return 0;
}
@@ -565,6 +566,7 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
rtnl_dereference(n->next));
tcf_unbind_filter(tp, &n->res);
u32_remove_hw_knode(tp, n->handle);
+ idr_remove_ext(&ht->handle_idr, n->handle);
call_rcu(&n->rcu, u32_delete_key_freepf_rcu);
}
}
@@ -586,6 +588,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
hn = &phn->next, phn = rtnl_dereference(*hn)) {
if (phn == ht) {
u32_clear_hw_hnode(tp, ht);
+ idr_destroy(&ht->handle_idr);
+ idr_remove_ext(&tp_c->handle_idr, ht->handle);
RCU_INIT_POINTER(*hn, ht->next);
kfree_rcu(ht, rcu);
return 0;
@@ -633,6 +637,7 @@ static void u32_destroy(struct tcf_proto *tp)
kfree_rcu(ht, rcu);
}
+ idr_destroy(&tp_c->handle_idr);
kfree(tp_c);
}
@@ -701,27 +706,21 @@ ret:
return ret;
}
-#define NR_U32_NODE (1<<12)
-static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
+static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid)
{
- struct tc_u_knode *n;
- unsigned long i;
- unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long),
- GFP_KERNEL);
- if (!bitmap)
- return handle | 0xFFF;
-
- for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]);
- n;
- n = rtnl_dereference(n->next))
- set_bit(TC_U32_NODE(n->handle), bitmap);
-
- i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800);
- if (i >= NR_U32_NODE)
- i = find_next_zero_bit(bitmap, NR_U32_NODE, 1);
+ unsigned long idr_index;
+ u32 start = htid | 0x800;
+ u32 max = htid | 0xFFF;
+ u32 min = htid;
+
+ if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
+ start, max + 1, GFP_KERNEL)) {
+ if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index,
+ min + 1, max + 1, GFP_KERNEL))
+ return max;
+ }
- kfree(bitmap);
- return handle | (i >= NR_U32_NODE ? 0xFFF : i);
+ return (u32)idr_index;
}
static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -806,6 +805,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
if (pins->handle == n->handle)
break;
+ idr_replace_ext(&ht->handle_idr, n, n->handle);
RCU_INIT_POINTER(n->next, pins->next);
rcu_assign_pointer(*ins, n);
}
@@ -937,22 +937,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
if (TC_U32_KEY(handle))
return -EINVAL;
- if (handle == 0) {
- handle = gen_new_htid(tp->data);
- if (handle == 0)
- return -ENOMEM;
- }
ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
if (ht == NULL)
return -ENOBUFS;
+ if (handle == 0) {
+ handle = gen_new_htid(tp->data, ht);
+ if (handle == 0) {
+ kfree(ht);
+ return -ENOMEM;
+ }
+ } else {
+ err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL,
+ handle, handle + 1, GFP_KERNEL);
+ if (err) {
+ kfree(ht);
+ return err;
+ }
+ }
ht->tp_c = tp_c;
ht->refcnt = 1;
ht->divisor = divisor;
ht->handle = handle;
ht->prio = tp->prio;
+ idr_init(&ht->handle_idr);
err = u32_replace_hw_hnode(tp, ht, flags);
if (err) {
+ idr_remove_ext(&tp_c->handle_idr, handle);
kfree(ht);
return err;
}
@@ -986,24 +997,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid))
return -EINVAL;
handle = htid | TC_U32_NODE(handle);
+ err = idr_alloc_ext(&ht->handle_idr, NULL, NULL,
+ handle, handle + 1,
+ GFP_KERNEL);
+ if (err)
+ return err;
} else
handle = gen_new_kid(ht, htid);
- if (tb[TCA_U32_SEL] == NULL)
- return -EINVAL;
+ if (tb[TCA_U32_SEL] == NULL) {
+ err = -EINVAL;
+ goto erridr;
+ }
s = nla_data(tb[TCA_U32_SEL]);
n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL);
- if (n == NULL)
- return -ENOBUFS;
+ if (n == NULL) {
+ err = -ENOBUFS;
+ goto erridr;
+ }
#ifdef CONFIG_CLS_U32_PERF
size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64);
n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt));
if (!n->pf) {
- kfree(n);
- return -ENOBUFS;
+ err = -ENOBUFS;
+ goto errfree;
}
#endif
@@ -1066,9 +1086,12 @@ errhw:
errout:
tcf_exts_destroy(&n->exts);
#ifdef CONFIG_CLS_U32_PERF
+errfree:
free_percpu(n->pf);
#endif
kfree(n);
+erridr:
+ idr_remove_ext(&ht->handle_idr, handle);
return err;
}
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 03b677bc0700..1331a4c2d8ff 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -178,7 +178,7 @@ static int tcf_em_validate(struct tcf_proto *tp,
struct tcf_ematch_hdr *em_hdr = nla_data(nla);
int data_len = nla_len(nla) - sizeof(*em_hdr);
void *data = (void *) em_hdr + sizeof(*em_hdr);
- struct net *net = dev_net(qdisc_dev(tp->q));
+ struct net *net = tp->chain->block->net;
if (!TCF_EM_REL_VALID(em_hdr->flags))
goto errout;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index c6deb74e3d2f..a9ac912f1d67 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1500,7 +1500,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
int s_idx, s_q_idx;
struct net_device *dev;
const struct nlmsghdr *nlh = cb->nlh;
- struct tcmsg *tcm = nlmsg_data(nlh);
struct nlattr *tca[TCA_MAX + 1];
int err;
@@ -1510,7 +1509,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
idx = 0;
ASSERT_RTNL();
- err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
+ err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
if (err < 0)
return err;
@@ -1662,9 +1661,11 @@ static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
struct tcf_bind_args *a = (void *)arg;
if (tp->ops->bind_class) {
- tcf_tree_lock(tp);
+ struct Qdisc *q = tcf_block_q(tp->chain->block);
+
+ sch_tree_lock(q);
tp->ops->bind_class(n, a->classid, a->cl);
- tcf_tree_unlock(tp);
+ sch_tree_unlock(q);
}
return 0;
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index c5fcdf1a58a0..2dbd249c0b2f 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -281,7 +281,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
goto err_out;
}
- error = tcf_block_get(&flow->block, &flow->filter_list);
+ error = tcf_block_get(&flow->block, &flow->filter_list, sch);
if (error) {
kfree(flow);
goto err_out;
@@ -546,7 +546,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
p->link.q = &noop_qdisc;
pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
- err = tcf_block_get(&p->link.block, &p->link.filter_list);
+ err = tcf_block_get(&p->link.block, &p->link.filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index dcef97fa8047..6361be7881f1 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -255,6 +255,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
case TC_ACT_RECLASSIFY:
@@ -1566,7 +1567,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
if (cl == NULL)
goto failure;
- err = tcf_block_get(&cl->block, &cl->filter_list);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch);
if (err) {
kfree(cl);
return err;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 2d0e8d4bdc29..5bbcef3dcd8c 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -321,6 +321,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -412,7 +413,7 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
struct drr_sched *q = qdisc_priv(sch);
int err;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
err = qdisc_class_hash_init(&q->clhash);
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 2836c80c7aa5..fb4fb71c68cf 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -344,7 +344,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
goto errout;
- err = tcf_block_get(&p->block, &p->filter_list);
+ err = tcf_block_get(&p->block, &p->filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index de3b57ceca7b..0305d791ea94 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -105,6 +105,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return 0;
}
@@ -481,7 +482,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
return err;
}
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index bf8c81e07c70..6ced7c89c141 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -288,9 +288,9 @@ unsigned long dev_trans_start(struct net_device *dev)
}
EXPORT_SYMBOL(dev_trans_start);
-static void dev_watchdog(unsigned long arg)
+static void dev_watchdog(struct timer_list *t)
{
- struct net_device *dev = (struct net_device *)arg;
+ struct net_device *dev = from_timer(dev, t, watchdog_timer);
netif_tx_lock(dev);
if (!qdisc_tx_is_noop(dev)) {
@@ -689,10 +689,8 @@ void qdisc_reset(struct Qdisc *qdisc)
}
EXPORT_SYMBOL(qdisc_reset);
-static void qdisc_rcu_free(struct rcu_head *head)
+static void qdisc_free(struct Qdisc *qdisc)
{
- struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
-
if (qdisc_is_percpu_stats(qdisc)) {
free_percpu(qdisc->cpu_bstats);
free_percpu(qdisc->cpu_qstats);
@@ -725,11 +723,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
kfree_skb_list(qdisc->gso_skb);
kfree_skb(qdisc->skb_bad_txq);
- /*
- * gen_estimator est_timer() might access qdisc->q.lock,
- * wait a RCU grace period before freeing qdisc.
- */
- call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
+ qdisc_free(qdisc);
}
EXPORT_SYMBOL(qdisc_destroy);
@@ -960,7 +954,7 @@ void dev_init_scheduler(struct net_device *dev)
if (dev_ingress_queue(dev))
dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
- setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
+ timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
}
static void shutdown_scheduler_queue(struct net_device *dev,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3f88b75488b0..d04068a97d81 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1033,7 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl == NULL)
return -ENOBUFS;
- err = tcf_block_get(&cl->block, &cl->filter_list);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch);
if (err) {
kfree(cl);
return err;
@@ -1144,6 +1144,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -1405,7 +1406,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
return err;
q->eligible = RB_ROOT;
- err = tcf_block_get(&q->root.block, &q->root.filter_list);
+ err = tcf_block_get(&q->root.block, &q->root.filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 7e148376ba52..fa0380730ff0 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -142,6 +142,7 @@ struct htb_class {
struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
unsigned int drops ____cacheline_aligned_in_smp;
+ unsigned int overlimits;
};
struct htb_level {
@@ -243,6 +244,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -533,6 +535,9 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
if (new_mode == cl->cmode)
return;
+ if (new_mode == HTB_CANT_SEND)
+ cl->overlimits++;
+
if (cl->prio_activity) { /* not necessary: speed optimization */
if (cl->cmode != HTB_CANT_SEND)
htb_deactivate_prios(q, cl);
@@ -1026,7 +1031,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
@@ -1143,6 +1148,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
struct htb_class *cl = (struct htb_class *)arg;
struct gnet_stats_queue qs = {
.drops = cl->drops,
+ .overlimits = cl->overlimits,
};
__u32 qlen = 0;
@@ -1388,7 +1394,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!cl)
goto failure;
- err = tcf_block_get(&cl->block, &cl->filter_list);
+ err = tcf_block_get(&cl->block, &cl->filter_list, sch);
if (err) {
kfree(cl);
goto failure;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index 44de4ee51ce9..b599db26d34b 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -20,6 +20,7 @@
struct ingress_sched_data {
struct tcf_block *block;
+ struct tcf_block_ext_info block_info;
};
static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg)
@@ -59,7 +60,10 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
struct net_device *dev = qdisc_dev(sch);
int err;
- err = tcf_block_get(&q->block, &dev->ingress_cl_list);
+ q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+
+ err = tcf_block_get_ext(&q->block, &dev->ingress_cl_list,
+ sch, &q->block_info);
if (err)
return err;
@@ -72,8 +76,10 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
- tcf_block_put(q->block);
+ tcf_block_put_ext(q->block, &dev->ingress_cl_list,
+ sch, &q->block_info);
net_dec_ingress_queue();
}
@@ -114,6 +120,8 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
struct clsact_sched_data {
struct tcf_block *ingress_block;
struct tcf_block *egress_block;
+ struct tcf_block_ext_info ingress_block_info;
+ struct tcf_block_ext_info egress_block_info;
};
static unsigned long clsact_find(struct Qdisc *sch, u32 classid)
@@ -153,13 +161,19 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
struct net_device *dev = qdisc_dev(sch);
int err;
- err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list);
+ q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
+
+ err = tcf_block_get_ext(&q->ingress_block, &dev->ingress_cl_list,
+ sch, &q->ingress_block_info);
if (err)
return err;
- err = tcf_block_get(&q->egress_block, &dev->egress_cl_list);
+ q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS;
+
+ err = tcf_block_get_ext(&q->egress_block, &dev->egress_cl_list,
+ sch, &q->egress_block_info);
if (err)
- return err;
+ goto err_egress_block_get;
net_inc_ingress_queue();
net_inc_egress_queue();
@@ -167,14 +181,22 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt)
sch->flags |= TCQ_F_CPUSTATS;
return 0;
+
+err_egress_block_get:
+ tcf_block_put_ext(q->ingress_block, &dev->ingress_cl_list,
+ sch, &q->ingress_block_info);
+ return err;
}
static void clsact_destroy(struct Qdisc *sch)
{
struct clsact_sched_data *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
- tcf_block_put(q->egress_block);
- tcf_block_put(q->ingress_block);
+ tcf_block_put_ext(q->egress_block, &dev->egress_cl_list,
+ sch, &q->egress_block_info);
+ tcf_block_put_ext(q->ingress_block, &dev->ingress_cl_list,
+ sch, &q->ingress_block_info);
net_dec_ingress_queue();
net_dec_egress_queue();
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 6bcdfe6e7b63..51c2b289c69b 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -18,10 +18,16 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
struct mqprio_sched {
struct Qdisc **qdiscs;
+ u16 mode;
+ u16 shaper;
int hw_offload;
+ u32 flags;
+ u64 min_rate[TC_QOPT_MAX_QUEUE];
+ u64 max_rate[TC_QOPT_MAX_QUEUE];
};
static void mqprio_destroy(struct Qdisc *sch)
@@ -39,9 +45,17 @@ static void mqprio_destroy(struct Qdisc *sch)
}
if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
- struct tc_mqprio_qopt mqprio = {};
+ struct tc_mqprio_qopt_offload mqprio = { { 0 } };
- dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO, &mqprio);
+ switch (priv->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ case TC_MQPRIO_MODE_CHANNEL:
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO,
+ &mqprio);
+ break;
+ default:
+ return;
+ }
} else {
netdev_set_num_tc(dev, 0);
}
@@ -97,6 +111,26 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
return 0;
}
+static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = {
+ [TCA_MQPRIO_MODE] = { .len = sizeof(u16) },
+ [TCA_MQPRIO_SHAPER] = { .len = sizeof(u16) },
+ [TCA_MQPRIO_MIN_RATE64] = { .type = NLA_NESTED },
+ [TCA_MQPRIO_MAX_RATE64] = { .type = NLA_NESTED },
+};
+
+static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+ const struct nla_policy *policy, int len)
+{
+ int nested_len = nla_len(nla) - NLA_ALIGN(len);
+
+ if (nested_len >= nla_attr_size(0))
+ return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
+ nested_len, policy, NULL);
+
+ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+ return 0;
+}
+
static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
{
struct net_device *dev = qdisc_dev(sch);
@@ -105,6 +139,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
struct Qdisc *qdisc;
int i, err = -EOPNOTSUPP;
struct tc_mqprio_qopt *qopt = NULL;
+ struct nlattr *tb[TCA_MQPRIO_MAX + 1];
+ struct nlattr *attr;
+ int rem;
+ int len;
BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
@@ -115,6 +153,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
if (!netif_is_multiqueue(dev))
return -EOPNOTSUPP;
+ /* make certain can allocate enough classids to handle queues */
+ if (dev->num_tx_queues >= TC_H_MIN_PRIORITY)
+ return -ENOMEM;
+
if (!opt || nla_len(opt) < sizeof(*qopt))
return -EINVAL;
@@ -122,6 +164,59 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
if (mqprio_parse_opt(dev, qopt))
return -EINVAL;
+ len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
+ if (len > 0) {
+ err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
+ sizeof(*qopt));
+ if (err < 0)
+ return err;
+
+ if (!qopt->hw)
+ return -EINVAL;
+
+ if (tb[TCA_MQPRIO_MODE]) {
+ priv->flags |= TC_MQPRIO_F_MODE;
+ priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
+ }
+
+ if (tb[TCA_MQPRIO_SHAPER]) {
+ priv->flags |= TC_MQPRIO_F_SHAPER;
+ priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
+ }
+
+ if (tb[TCA_MQPRIO_MIN_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ return -EINVAL;
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
+ return -EINVAL;
+ if (i >= qopt->num_tc)
+ break;
+ priv->min_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MIN_RATE;
+ }
+
+ if (tb[TCA_MQPRIO_MAX_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ return -EINVAL;
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
+ return -EINVAL;
+ if (i >= qopt->num_tc)
+ break;
+ priv->max_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MAX_RATE;
+ }
+ }
+
/* pre-allocate qdisc, attachment can't fail */
priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
GFP_KERNEL);
@@ -146,14 +241,36 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
* supplied and verified mapping
*/
if (qopt->hw) {
- struct tc_mqprio_qopt mqprio = *qopt;
+ struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_MQPRIO,
+ switch (priv->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
+ return -EINVAL;
+ break;
+ case TC_MQPRIO_MODE_CHANNEL:
+ mqprio.flags = priv->flags;
+ if (priv->flags & TC_MQPRIO_F_MODE)
+ mqprio.mode = priv->mode;
+ if (priv->flags & TC_MQPRIO_F_SHAPER)
+ mqprio.shaper = priv->shaper;
+ if (priv->flags & TC_MQPRIO_F_MIN_RATE)
+ for (i = 0; i < mqprio.qopt.num_tc; i++)
+ mqprio.min_rate[i] = priv->min_rate[i];
+ if (priv->flags & TC_MQPRIO_F_MAX_RATE)
+ for (i = 0; i < mqprio.qopt.num_tc; i++)
+ mqprio.max_rate[i] = priv->max_rate[i];
+ break;
+ default:
+ return -EINVAL;
+ }
+ err = dev->netdev_ops->ndo_setup_tc(dev,
+ TC_SETUP_MQPRIO,
&mqprio);
if (err)
return err;
- priv->hw_offload = mqprio.hw;
+ priv->hw_offload = mqprio.qopt.hw;
} else {
netdev_set_num_tc(dev, qopt->num_tc);
for (i = 0; i < qopt->num_tc; i++)
@@ -193,7 +310,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
unsigned long cl)
{
struct net_device *dev = qdisc_dev(sch);
- unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
+ unsigned long ntx = cl - 1;
if (ntx >= dev->num_tx_queues)
return NULL;
@@ -223,11 +340,51 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
return 0;
}
+static int dump_rates(struct mqprio_sched *priv,
+ struct tc_mqprio_qopt *opt, struct sk_buff *skb)
+{
+ struct nlattr *nest;
+ int i;
+
+ if (priv->flags & TC_MQPRIO_F_MIN_RATE) {
+ nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64);
+ if (!nest)
+ goto nla_put_failure;
+
+ for (i = 0; i < opt->num_tc; i++) {
+ if (nla_put(skb, TCA_MQPRIO_MIN_RATE64,
+ sizeof(priv->min_rate[i]),
+ &priv->min_rate[i]))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+ }
+
+ if (priv->flags & TC_MQPRIO_F_MAX_RATE) {
+ nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64);
+ if (!nest)
+ goto nla_put_failure;
+
+ for (i = 0; i < opt->num_tc; i++) {
+ if (nla_put(skb, TCA_MQPRIO_MAX_RATE64,
+ sizeof(priv->max_rate[i]),
+ &priv->max_rate[i]))
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, nest);
+ }
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct net_device *dev = qdisc_dev(sch);
struct mqprio_sched *priv = qdisc_priv(sch);
- unsigned char *b = skb_tail_pointer(skb);
+ struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
struct tc_mqprio_qopt opt = { 0 };
struct Qdisc *qdisc;
unsigned int i;
@@ -258,12 +415,25 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
opt.offset[i] = dev->tc_to_txq[i].offset;
}
- if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
+ if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt))
goto nla_put_failure;
- return skb->len;
+ if ((priv->flags & TC_MQPRIO_F_MODE) &&
+ nla_put_u16(skb, TCA_MQPRIO_MODE, priv->mode))
+ goto nla_put_failure;
+
+ if ((priv->flags & TC_MQPRIO_F_SHAPER) &&
+ nla_put_u16(skb, TCA_MQPRIO_SHAPER, priv->shaper))
+ goto nla_put_failure;
+
+ if ((priv->flags & TC_MQPRIO_F_MIN_RATE ||
+ priv->flags & TC_MQPRIO_F_MAX_RATE) &&
+ (dump_rates(priv, &opt, skb) != 0))
+ goto nla_put_failure;
+
+ return nla_nest_end(skb, nla);
nla_put_failure:
- nlmsg_trim(skb, b);
+ nlmsg_trim(skb, nla);
return -1;
}
@@ -282,38 +452,35 @@ static unsigned long mqprio_find(struct Qdisc *sch, u32 classid)
struct net_device *dev = qdisc_dev(sch);
unsigned int ntx = TC_H_MIN(classid);
- if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
- return 0;
- return ntx;
+ /* There are essentially two regions here that have valid classid
+ * values. The first region will have a classid value of 1 through
+ * num_tx_queues. All of these are backed by actual Qdiscs.
+ */
+ if (ntx < TC_H_MIN_PRIORITY)
+ return (ntx <= dev->num_tx_queues) ? ntx : 0;
+
+ /* The second region represents the hardware traffic classes. These
+ * are represented by classid values of TC_H_MIN_PRIORITY through
+ * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1
+ */
+ return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0;
}
static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
struct sk_buff *skb, struct tcmsg *tcm)
{
- struct net_device *dev = qdisc_dev(sch);
+ if (cl < TC_H_MIN_PRIORITY) {
+ struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+ struct net_device *dev = qdisc_dev(sch);
+ int tc = netdev_txq_to_tc(dev, cl - 1);
- if (cl <= netdev_get_num_tc(dev)) {
+ tcm->tcm_parent = (tc < 0) ? 0 :
+ TC_H_MAKE(TC_H_MAJ(sch->handle),
+ TC_H_MIN(tc + TC_H_MIN_PRIORITY));
+ tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+ } else {
tcm->tcm_parent = TC_H_ROOT;
tcm->tcm_info = 0;
- } else {
- int i;
- struct netdev_queue *dev_queue;
-
- dev_queue = mqprio_queue_get(sch, cl);
- tcm->tcm_parent = 0;
- for (i = 0; i < netdev_get_num_tc(dev); i++) {
- struct netdev_tc_txq tc = dev->tc_to_txq[i];
- int q_idx = cl - netdev_get_num_tc(dev);
-
- if (q_idx > tc.offset &&
- q_idx <= tc.offset + tc.count) {
- tcm->tcm_parent =
- TC_H_MAKE(TC_H_MAJ(sch->handle),
- TC_H_MIN(i + 1));
- break;
- }
- }
- tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
}
tcm->tcm_handle |= TC_H_MIN(cl);
return 0;
@@ -324,15 +491,14 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
__releases(d->lock)
__acquires(d->lock)
{
- struct net_device *dev = qdisc_dev(sch);
-
- if (cl <= netdev_get_num_tc(dev)) {
+ if (cl >= TC_H_MIN_PRIORITY) {
int i;
__u32 qlen = 0;
struct Qdisc *qdisc;
struct gnet_stats_queue qstats = {0};
struct gnet_stats_basic_packed bstats = {0};
- struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
+ struct net_device *dev = qdisc_dev(sch);
+ struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK];
/* Drop lock here it will be reclaimed before touching
* statistics this is required because the d->lock we
@@ -385,12 +551,25 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
/* Walk hierarchy with a virtual class per tc */
arg->count = arg->skip;
- for (ntx = arg->skip;
- ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
- ntx++) {
+ for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) {
+ if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) {
+ arg->stop = 1;
+ return;
+ }
+ arg->count++;
+ }
+
+ /* Pad the values and skip over unused traffic classes */
+ if (ntx < TC_MAX_QUEUE) {
+ arg->count = TC_MAX_QUEUE;
+ ntx = TC_MAX_QUEUE;
+ }
+
+ /* Reset offset, sort out remaining per-queue qdiscs */
+ for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) {
if (arg->fn(sch, ntx + 1, arg) < 0) {
arg->stop = 1;
- break;
+ return;
}
arg->count++;
}
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index ff4fc3e0facd..012216386c0b 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -54,6 +54,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -245,7 +246,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index b1266e75ca43..db0228a65e8c 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -146,15 +146,8 @@ struct netem_sched_data {
*/
struct netem_skb_cb {
psched_time_t time_to_send;
- ktime_t tstamp_save;
};
-
-static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
-{
- return rb_entry(rb, struct sk_buff, rbnode);
-}
-
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
{
/* we assume we can use skb next/prev/tstamp as storage for rb_node */
@@ -362,12 +355,13 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
static void tfifo_reset(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
- struct rb_node *p;
+ struct rb_node *p = rb_first(&q->t_root);
- while ((p = rb_first(&q->t_root))) {
- struct sk_buff *skb = netem_rb_to_skb(p);
+ while (p) {
+ struct sk_buff *skb = rb_to_skb(p);
- rb_erase(p, &q->t_root);
+ p = rb_next(p);
+ rb_erase(&skb->rbnode, &q->t_root);
rtnl_kfree_skbs(skb, skb);
}
}
@@ -382,7 +376,7 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
struct sk_buff *skb;
parent = *p;
- skb = netem_rb_to_skb(parent);
+ skb = rb_to_skb(parent);
if (tnext >= netem_skb_cb(skb)->time_to_send)
p = &parent->rb_right;
else
@@ -538,7 +532,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff *t_skb;
struct netem_skb_cb *t_last;
- t_skb = netem_rb_to_skb(rb_last(&q->t_root));
+ t_skb = skb_rb_last(&q->t_root);
t_last = netem_skb_cb(t_skb);
if (!last ||
t_last->time_to_send > last->time_to_send) {
@@ -561,7 +555,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
cb->time_to_send = now + delay;
- cb->tstamp_save = skb->tstamp;
++q->counter;
tfifo_enqueue(skb, sch);
} else {
@@ -618,7 +611,7 @@ deliver:
if (p) {
psched_time_t time_to_send;
- skb = netem_rb_to_skb(p);
+ skb = rb_to_skb(p);
/* if more time remaining? */
time_to_send = netem_skb_cb(skb)->time_to_send;
@@ -629,7 +622,10 @@ deliver:
qdisc_qstats_backlog_dec(sch, skb);
skb->next = NULL;
skb->prev = NULL;
- skb->tstamp = netem_skb_cb(skb)->tstamp_save;
+ /* skb->dev shares skb->rbnode area,
+ * we need to restore its value.
+ */
+ skb->dev = qdisc_dev(sch);
#ifdef CONFIG_NET_CLS_ACT
/*
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index 6c2791d6102d..776c694c77c7 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -74,6 +74,7 @@ struct pie_sched_data {
struct pie_vars vars;
struct pie_stats stats;
struct timer_list adapt_timer;
+ struct Qdisc *sch;
};
static void pie_params_init(struct pie_params *params)
@@ -422,10 +423,10 @@ static void calculate_probability(struct Qdisc *sch)
pie_vars_init(&q->vars);
}
-static void pie_timer(unsigned long arg)
+static void pie_timer(struct timer_list *t)
{
- struct Qdisc *sch = (struct Qdisc *)arg;
- struct pie_sched_data *q = qdisc_priv(sch);
+ struct pie_sched_data *q = from_timer(q, t, adapt_timer);
+ struct Qdisc *sch = q->sch;
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
@@ -446,7 +447,8 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt)
pie_vars_init(&q->vars);
sch->limit = q->params.limit;
- setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch);
+ q->sch = sch;
+ timer_setup(&q->adapt_timer, pie_timer, 0);
if (opt) {
int err = pie_change(sch, opt);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 2dd6c68ae91e..2c79559a0d31 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -50,6 +50,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -212,7 +213,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
if (!opt)
return -EINVAL;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 6ddfd4991108..6962b37a3ad3 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -709,6 +709,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_STOLEN:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return NULL;
}
@@ -1419,7 +1420,7 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
int i, j, err;
u32 max_cl_shift, maxbudg_shift, max_classes;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 93b9d70a9b28..fdfdb56aaae2 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -40,6 +40,7 @@ struct red_sched_data {
u32 limit; /* HARD maximal queue length */
unsigned char flags;
struct timer_list adapt_timer;
+ struct Qdisc *sch;
struct red_parms parms;
struct red_vars vars;
struct red_stats stats;
@@ -221,10 +222,10 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
return 0;
}
-static inline void red_adaptative_timer(unsigned long arg)
+static inline void red_adaptative_timer(struct timer_list *t)
{
- struct Qdisc *sch = (struct Qdisc *)arg;
- struct red_sched_data *q = qdisc_priv(sch);
+ struct red_sched_data *q = from_timer(q, t, adapt_timer);
+ struct Qdisc *sch = q->sch;
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
@@ -238,7 +239,8 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt)
struct red_sched_data *q = qdisc_priv(sch);
q->qdisc = &noop_qdisc;
- setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
+ q->sch = sch;
+ timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
return red_change(sch, opt);
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index cc39e170b4aa..0678debdd856 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -268,6 +268,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return false;
}
@@ -553,7 +554,7 @@ static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
struct sfb_sched_data *q = qdisc_priv(sch);
int err;
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 74ea863b8240..890f4a4564e7 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -145,6 +145,7 @@ struct sfq_sched_data {
int perturb_period;
unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
struct timer_list perturb_timer;
+ struct Qdisc *sch;
};
/*
@@ -189,6 +190,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
case TC_ACT_SHOT:
return 0;
}
@@ -604,10 +606,10 @@ drop:
qdisc_tree_reduce_backlog(sch, dropped, drop_len);
}
-static void sfq_perturbation(unsigned long arg)
+static void sfq_perturbation(struct timer_list *t)
{
- struct Qdisc *sch = (struct Qdisc *)arg;
- struct sfq_sched_data *q = qdisc_priv(sch);
+ struct sfq_sched_data *q = from_timer(q, t, perturb_timer);
+ struct Qdisc *sch = q->sch;
spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
@@ -722,10 +724,9 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
int i;
int err;
- setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
- (unsigned long)sch);
+ timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE);
- err = tcf_block_get(&q->block, &q->filter_list);
+ err = tcf_block_get(&q->block, &q->filter_list, sch);
if (err)
return err;
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 70f1b570bab9..bf90c5397719 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -12,7 +12,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
inqueue.o outqueue.o ulpqueue.o \
tsnmap.o bind_addr.o socket.o primitive.o \
output.o input.o debug.o stream.o auth.o \
- offload.o
+ offload.o stream_sched.o stream_sched_prio.o \
+ stream_sched_rr.o
sctp_probe-y := probe.o
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 3afac275ee82..7b261afc47b9 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -311,10 +311,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
if (chunk->sent_count) {
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
- streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
+ streamout->ext->abandoned_sent[SCTP_PR_INDEX(TTL)]++;
} else {
chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
- streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
+ streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++;
}
return 1;
} else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) &&
@@ -323,7 +323,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk)
&chunk->asoc->stream.out[chunk->sinfo.sinfo_stream];
chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
- streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
+ streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++;
return 1;
} else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) &&
chunk->msg->expires_at &&
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 2966ff400755..4db012aa25f7 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -50,6 +50,7 @@
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
/* Declare internal functions here. */
static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn);
@@ -72,32 +73,38 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp);
/* Add data to the front of the queue. */
static inline void sctp_outq_head_data(struct sctp_outq *q,
- struct sctp_chunk *ch)
+ struct sctp_chunk *ch)
{
+ struct sctp_stream_out_ext *oute;
+ __u16 stream;
+
list_add(&ch->list, &q->out_chunk_list);
q->out_qlen += ch->skb->len;
+
+ stream = sctp_chunk_stream_no(ch);
+ oute = q->asoc->stream.out[stream].ext;
+ list_add(&ch->stream_list, &oute->outq);
}
/* Take data from the front of the queue. */
static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q)
{
- struct sctp_chunk *ch = NULL;
-
- if (!list_empty(&q->out_chunk_list)) {
- struct list_head *entry = q->out_chunk_list.next;
-
- ch = list_entry(entry, struct sctp_chunk, list);
- list_del_init(entry);
- q->out_qlen -= ch->skb->len;
- }
- return ch;
+ return q->sched->dequeue(q);
}
+
/* Add data chunk to the end of the queue. */
static inline void sctp_outq_tail_data(struct sctp_outq *q,
struct sctp_chunk *ch)
{
+ struct sctp_stream_out_ext *oute;
+ __u16 stream;
+
list_add_tail(&ch->list, &q->out_chunk_list);
q->out_qlen += ch->skb->len;
+
+ stream = sctp_chunk_stream_no(ch);
+ oute = q->asoc->stream.out[stream].ext;
+ list_add_tail(&ch->stream_list, &oute->outq);
}
/*
@@ -207,6 +214,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
INIT_LIST_HEAD(&q->retransmit);
INIT_LIST_HEAD(&q->sacked);
INIT_LIST_HEAD(&q->abandoned);
+ sctp_sched_set_sched(asoc, SCTP_SS_FCFS);
}
/* Free the outqueue structure and any related pending chunks.
@@ -258,6 +266,7 @@ static void __sctp_outq_teardown(struct sctp_outq *q)
/* Throw away any leftover data chunks. */
while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
+ sctp_sched_dequeue_done(q, chunk);
/* Mark as send failure. */
sctp_chunk_fail(chunk, q->error);
@@ -366,7 +375,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc,
streamout = &asoc->stream.out[chk->sinfo.sinfo_stream];
asoc->sent_cnt_removable--;
asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
- streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
+ streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++;
if (!chk->tsn_gap_acked) {
if (chk->transport)
@@ -391,20 +400,21 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
struct sctp_outq *q = &asoc->outqueue;
struct sctp_chunk *chk, *temp;
+ q->sched->unsched_all(&asoc->stream);
+
list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) {
if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) ||
chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive)
continue;
- list_del_init(&chk->list);
- q->out_qlen -= chk->skb->len;
+ sctp_sched_dequeue_common(q, chk);
asoc->sent_cnt_removable--;
asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) {
struct sctp_stream_out *streamout =
&asoc->stream.out[chk->sinfo.sinfo_stream];
- streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
+ streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++;
}
msg_len -= SCTP_DATA_SNDSIZE(chk) +
@@ -415,6 +425,8 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc,
break;
}
+ q->sched->sched_all(&asoc->stream);
+
return msg_len;
}
@@ -1033,22 +1045,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
__u32 sid = ntohs(chunk->subh.data_hdr->stream);
- /* RFC 2960 6.5 Every DATA chunk MUST carry a valid
- * stream identifier.
- */
- if (chunk->sinfo.sinfo_stream >= asoc->stream.outcnt) {
-
- /* Mark as failed send. */
- sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM);
- if (asoc->peer.prsctp_capable &&
- SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags))
- asoc->sent_cnt_removable--;
- sctp_chunk_free(chunk);
- continue;
- }
-
/* Has this chunk expired? */
if (sctp_chunk_abandoned(chunk)) {
+ sctp_sched_dequeue_done(q, chunk);
sctp_chunk_fail(chunk, 0);
sctp_chunk_free(chunk);
continue;
@@ -1070,6 +1069,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
new_transport = asoc->peer.active_path;
if (new_transport->state == SCTP_UNCONFIRMED) {
WARN_ONCE(1, "Attempt to send packet on unconfirmed path.");
+ sctp_sched_dequeue_done(q, chunk);
sctp_chunk_fail(chunk, 0);
sctp_chunk_free(chunk);
continue;
@@ -1133,6 +1133,11 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp)
else
asoc->stats.oodchunks++;
+ /* Only now it's safe to consider this
+ * chunk as sent, sched-wise.
+ */
+ sctp_sched_dequeue_done(q, chunk);
+
break;
default:
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index e6a2974e020e..402bfbb888cd 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -50,6 +50,7 @@
#include <net/sock.h>
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
static int sctp_cmd_interpreter(enum sctp_event event_type,
union sctp_subtype subtype,
@@ -1089,6 +1090,8 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc,
list_for_each_entry(chunk, &msg->chunks, frag_list)
sctp_outq_tail(&asoc->outqueue, chunk, gfp);
+
+ asoc->outqueue.sched->enqueue(&asoc->outqueue, msg);
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 17841ab30798..c75acdf71a6f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -79,6 +79,7 @@
#include <net/sock.h>
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
/* Forward declarations for internal helper functions. */
static int sctp_writeable(struct sock *sk);
@@ -1927,6 +1928,13 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len)
goto out_free;
}
+ /* Allocate sctp_stream_out_ext if not already done */
+ if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) {
+ err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream);
+ if (err)
+ goto out_free;
+ }
+
if (sctp_wspace(asoc) < msg_len)
sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc));
@@ -3907,6 +3915,64 @@ out:
return retval;
}
+static int sctp_setsockopt_scheduler(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_association *asoc;
+ struct sctp_assoc_value params;
+ int retval = -EINVAL;
+
+ if (optlen < sizeof(params))
+ goto out;
+
+ optlen = sizeof(params);
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (params.assoc_value > SCTP_SS_MAX)
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc)
+ goto out;
+
+ retval = sctp_sched_set_sched(asoc, params.assoc_value);
+
+out:
+ return retval;
+}
+
+static int sctp_setsockopt_scheduler_value(struct sock *sk,
+ char __user *optval,
+ unsigned int optlen)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_value params;
+ int retval = -EINVAL;
+
+ if (optlen < sizeof(params))
+ goto out;
+
+ optlen = sizeof(params);
+ if (copy_from_user(&params, optval, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc)
+ goto out;
+
+ retval = sctp_sched_set_value(asoc, params.stream_id,
+ params.stream_value, GFP_KERNEL);
+
+out:
+ return retval;
+}
+
/* API 6.2 setsockopt(), getsockopt()
*
* Applications use setsockopt() and getsockopt() to set or retrieve
@@ -4088,6 +4154,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname,
case SCTP_ADD_STREAMS:
retval = sctp_setsockopt_add_streams(sk, optval, optlen);
break;
+ case SCTP_STREAM_SCHEDULER:
+ retval = sctp_setsockopt_scheduler(sk, optval, optlen);
+ break;
+ case SCTP_STREAM_SCHEDULER_VALUE:
+ retval = sctp_setsockopt_scheduler_value(sk, optval, optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
@@ -6649,7 +6721,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
char __user *optval,
int __user *optlen)
{
- struct sctp_stream_out *streamout;
+ struct sctp_stream_out_ext *streamoute;
struct sctp_association *asoc;
struct sctp_prstatus params;
int retval = -EINVAL;
@@ -6672,21 +6744,29 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len,
if (!asoc || params.sprstat_sid >= asoc->stream.outcnt)
goto out;
- streamout = &asoc->stream.out[params.sprstat_sid];
+ streamoute = asoc->stream.out[params.sprstat_sid].ext;
+ if (!streamoute) {
+ /* Not allocated yet, means all stats are 0 */
+ params.sprstat_abandoned_unsent = 0;
+ params.sprstat_abandoned_sent = 0;
+ retval = 0;
+ goto out;
+ }
+
if (policy == SCTP_PR_SCTP_NONE) {
params.sprstat_abandoned_unsent = 0;
params.sprstat_abandoned_sent = 0;
for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) {
params.sprstat_abandoned_unsent +=
- streamout->abandoned_unsent[policy];
+ streamoute->abandoned_unsent[policy];
params.sprstat_abandoned_sent +=
- streamout->abandoned_sent[policy];
+ streamoute->abandoned_sent[policy];
}
} else {
params.sprstat_abandoned_unsent =
- streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)];
+ streamoute->abandoned_unsent[__SCTP_PR_INDEX(policy)];
params.sprstat_abandoned_sent =
- streamout->abandoned_sent[__SCTP_PR_INDEX(policy)];
+ streamoute->abandoned_sent[__SCTP_PR_INDEX(policy)];
}
if (put_user(len, optlen) || copy_to_user(optval, &params, len)) {
@@ -6782,6 +6862,85 @@ out:
return retval;
}
+static int sctp_getsockopt_scheduler(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_assoc_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ params.assoc_value = sctp_sched_get_sched(asoc);
+
+ if (put_user(len, optlen))
+ goto out;
+
+ if (copy_to_user(optval, &params, len))
+ goto out;
+
+ retval = 0;
+
+out:
+ return retval;
+}
+
+static int sctp_getsockopt_scheduler_value(struct sock *sk, int len,
+ char __user *optval,
+ int __user *optlen)
+{
+ struct sctp_stream_value params;
+ struct sctp_association *asoc;
+ int retval = -EFAULT;
+
+ if (len < sizeof(params)) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ len = sizeof(params);
+ if (copy_from_user(&params, optval, len))
+ goto out;
+
+ asoc = sctp_id2assoc(sk, params.assoc_id);
+ if (!asoc) {
+ retval = -EINVAL;
+ goto out;
+ }
+
+ retval = sctp_sched_get_value(asoc, params.stream_id,
+ &params.stream_value);
+ if (retval)
+ goto out;
+
+ if (put_user(len, optlen)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+ if (copy_to_user(optval, &params, len)) {
+ retval = -EFAULT;
+ goto out;
+ }
+
+out:
+ return retval;
+}
+
static int sctp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
@@ -6964,6 +7123,14 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
retval = sctp_getsockopt_enable_strreset(sk, len, optval,
optlen);
break;
+ case SCTP_STREAM_SCHEDULER:
+ retval = sctp_getsockopt_scheduler(sk, len, optval,
+ optlen);
+ break;
+ case SCTP_STREAM_SCHEDULER_VALUE:
+ retval = sctp_getsockopt_scheduler_value(sk, len, optval,
+ optlen);
+ break;
default:
retval = -ENOPROTOOPT;
break;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index 63ea15503714..5ea33a2c453b 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -32,44 +32,181 @@
* Xin Long <lucien.xin@gmail.com>
*/
+#include <linux/list.h>
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Migrates chunks from stream queues to new stream queues if needed,
+ * but not across associations. Also, removes those chunks to streams
+ * higher than the new max.
+ */
+static void sctp_stream_outq_migrate(struct sctp_stream *stream,
+ struct sctp_stream *new, __u16 outcnt)
+{
+ struct sctp_association *asoc;
+ struct sctp_chunk *ch, *temp;
+ struct sctp_outq *outq;
+ int i;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ outq = &asoc->outqueue;
+
+ list_for_each_entry_safe(ch, temp, &outq->out_chunk_list, list) {
+ __u16 sid = sctp_chunk_stream_no(ch);
+
+ if (sid < outcnt)
+ continue;
+
+ sctp_sched_dequeue_common(outq, ch);
+ /* No need to call dequeue_done here because
+ * the chunks are not scheduled by now.
+ */
+
+ /* Mark as failed send. */
+ sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM);
+ if (asoc->peer.prsctp_capable &&
+ SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags))
+ asoc->sent_cnt_removable--;
+
+ sctp_chunk_free(ch);
+ }
+
+ if (new) {
+ /* Here we actually move the old ext stuff into the new
+ * buffer, because we want to keep it. Then
+ * sctp_stream_update will swap ->out pointers.
+ */
+ for (i = 0; i < outcnt; i++) {
+ kfree(new->out[i].ext);
+ new->out[i].ext = stream->out[i].ext;
+ stream->out[i].ext = NULL;
+ }
+ }
+
+ for (i = outcnt; i < stream->outcnt; i++)
+ kfree(stream->out[i].ext);
+}
+
+static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt,
+ gfp_t gfp)
+{
+ struct sctp_stream_out *out;
+
+ out = kmalloc_array(outcnt, sizeof(*out), gfp);
+ if (!out)
+ return -ENOMEM;
+
+ if (stream->out) {
+ memcpy(out, stream->out, min(outcnt, stream->outcnt) *
+ sizeof(*out));
+ kfree(stream->out);
+ }
+
+ if (outcnt > stream->outcnt)
+ memset(out + stream->outcnt, 0,
+ (outcnt - stream->outcnt) * sizeof(*out));
+
+ stream->out = out;
+
+ return 0;
+}
+
+static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt,
+ gfp_t gfp)
+{
+ struct sctp_stream_in *in;
+
+ in = kmalloc_array(incnt, sizeof(*stream->in), gfp);
+
+ if (!in)
+ return -ENOMEM;
+
+ if (stream->in) {
+ memcpy(in, stream->in, min(incnt, stream->incnt) *
+ sizeof(*in));
+ kfree(stream->in);
+ }
+
+ if (incnt > stream->incnt)
+ memset(in + stream->incnt, 0,
+ (incnt - stream->incnt) * sizeof(*in));
+
+ stream->in = in;
+
+ return 0;
+}
int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
gfp_t gfp)
{
- int i;
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+ int i, ret = 0;
+
+ gfp |= __GFP_NOWARN;
/* Initial stream->out size may be very big, so free it and alloc
- * a new one with new outcnt to save memory.
+ * a new one with new outcnt to save memory if needed.
*/
- kfree(stream->out);
+ if (outcnt == stream->outcnt)
+ goto in;
- stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp);
- if (!stream->out)
- return -ENOMEM;
+ /* Filter out chunks queued on streams that won't exist anymore */
+ sched->unsched_all(stream);
+ sctp_stream_outq_migrate(stream, NULL, outcnt);
+ sched->sched_all(stream);
+
+ i = sctp_stream_alloc_out(stream, outcnt, gfp);
+ if (i)
+ return i;
stream->outcnt = outcnt;
for (i = 0; i < stream->outcnt; i++)
stream->out[i].state = SCTP_STREAM_OPEN;
+ sched->init(stream);
+
+in:
if (!incnt)
- return 0;
+ goto out;
- stream->in = kcalloc(incnt, sizeof(*stream->in), gfp);
- if (!stream->in) {
- kfree(stream->out);
- stream->out = NULL;
- return -ENOMEM;
+ i = sctp_stream_alloc_in(stream, incnt, gfp);
+ if (i) {
+ ret = -ENOMEM;
+ goto free;
}
stream->incnt = incnt;
+ goto out;
- return 0;
+free:
+ sched->free(stream);
+ kfree(stream->out);
+ stream->out = NULL;
+out:
+ return ret;
+}
+
+int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
+{
+ struct sctp_stream_out_ext *soute;
+
+ soute = kzalloc(sizeof(*soute), GFP_KERNEL);
+ if (!soute)
+ return -ENOMEM;
+ stream->out[sid].ext = soute;
+
+ return sctp_sched_init_sid(stream, sid, GFP_KERNEL);
}
void sctp_stream_free(struct sctp_stream *stream)
{
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+ int i;
+
+ sched->free(stream);
+ for (i = 0; i < stream->outcnt; i++)
+ kfree(stream->out[i].ext);
kfree(stream->out);
kfree(stream->in);
}
@@ -87,6 +224,10 @@ void sctp_stream_clear(struct sctp_stream *stream)
void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
{
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+
+ sched->unsched_all(stream);
+ sctp_stream_outq_migrate(stream, new, new->outcnt);
sctp_stream_free(stream);
stream->out = new->out;
@@ -94,6 +235,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
stream->outcnt = new->outcnt;
stream->incnt = new->incnt;
+ sched->sched_all(stream);
+
new->out = NULL;
new->in = NULL;
}
@@ -270,15 +413,9 @@ int sctp_send_add_streams(struct sctp_association *asoc,
}
if (out) {
- struct sctp_stream_out *streamout;
-
- streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
- GFP_KERNEL);
- if (!streamout)
+ retval = sctp_stream_alloc_out(stream, outcnt, GFP_KERNEL);
+ if (retval)
goto out;
-
- memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
- stream->out = streamout;
}
chunk = sctp_make_strreset_addstrm(asoc, out, in);
@@ -601,7 +738,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
struct sctp_strreset_addstrm *addstrm = param.v;
struct sctp_stream *stream = &asoc->stream;
__u32 result = SCTP_STRRESET_DENIED;
- struct sctp_stream_in *streamin;
__u32 request_seq, incnt;
__u16 in, i;
@@ -648,13 +784,9 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out(
if (!in || incnt > SCTP_MAX_STREAM)
goto out;
- streamin = krealloc(stream->in, incnt * sizeof(*streamin),
- GFP_ATOMIC);
- if (!streamin)
+ if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC))
goto out;
- memset(streamin + stream->incnt, 0, in * sizeof(*streamin));
- stream->in = streamin;
stream->incnt = incnt;
result = SCTP_STRRESET_PERFORMED;
@@ -676,10 +808,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
struct sctp_strreset_addstrm *addstrm = param.v;
struct sctp_stream *stream = &asoc->stream;
__u32 result = SCTP_STRRESET_DENIED;
- struct sctp_stream_out *streamout;
struct sctp_chunk *chunk = NULL;
__u32 request_seq, outcnt;
__u16 out, i;
+ int ret;
request_seq = ntohl(addstrm->request_seq);
if (TSN_lt(asoc->strreset_inseq, request_seq) ||
@@ -708,14 +840,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in(
if (!out || outcnt > SCTP_MAX_STREAM)
goto out;
- streamout = krealloc(stream->out, outcnt * sizeof(*streamout),
- GFP_ATOMIC);
- if (!streamout)
+ ret = sctp_stream_alloc_out(stream, outcnt, GFP_ATOMIC);
+ if (ret)
goto out;
- memset(streamout + stream->outcnt, 0, out * sizeof(*streamout));
- stream->out = streamout;
-
chunk = sctp_make_strreset_addstrm(asoc, out, 0);
if (!chunk)
goto out;
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
new file mode 100644
index 000000000000..0b83ec51e43b
--- /dev/null
+++ b/net/sctp/stream_sched.c
@@ -0,0 +1,275 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* First Come First Serve (a.k.a. FIFO)
+ * RFC DRAFT ndata Section 3.1
+ */
+static int sctp_sched_fcfs_set(struct sctp_stream *stream, __u16 sid,
+ __u16 value, gfp_t gfp)
+{
+ return 0;
+}
+
+static int sctp_sched_fcfs_get(struct sctp_stream *stream, __u16 sid,
+ __u16 *value)
+{
+ *value = 0;
+ return 0;
+}
+
+static int sctp_sched_fcfs_init(struct sctp_stream *stream)
+{
+ return 0;
+}
+
+static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid,
+ gfp_t gfp)
+{
+ return 0;
+}
+
+static void sctp_sched_fcfs_free(struct sctp_stream *stream)
+{
+}
+
+static void sctp_sched_fcfs_enqueue(struct sctp_outq *q,
+ struct sctp_datamsg *msg)
+{
+}
+
+static struct sctp_chunk *sctp_sched_fcfs_dequeue(struct sctp_outq *q)
+{
+ struct sctp_stream *stream = &q->asoc->stream;
+ struct sctp_chunk *ch = NULL;
+ struct list_head *entry;
+
+ if (list_empty(&q->out_chunk_list))
+ goto out;
+
+ if (stream->out_curr) {
+ ch = list_entry(stream->out_curr->ext->outq.next,
+ struct sctp_chunk, stream_list);
+ } else {
+ entry = q->out_chunk_list.next;
+ ch = list_entry(entry, struct sctp_chunk, list);
+ }
+
+ sctp_sched_dequeue_common(q, ch);
+
+out:
+ return ch;
+}
+
+static void sctp_sched_fcfs_dequeue_done(struct sctp_outq *q,
+ struct sctp_chunk *chunk)
+{
+}
+
+static void sctp_sched_fcfs_sched_all(struct sctp_stream *stream)
+{
+}
+
+static void sctp_sched_fcfs_unsched_all(struct sctp_stream *stream)
+{
+}
+
+static struct sctp_sched_ops sctp_sched_fcfs = {
+ .set = sctp_sched_fcfs_set,
+ .get = sctp_sched_fcfs_get,
+ .init = sctp_sched_fcfs_init,
+ .init_sid = sctp_sched_fcfs_init_sid,
+ .free = sctp_sched_fcfs_free,
+ .enqueue = sctp_sched_fcfs_enqueue,
+ .dequeue = sctp_sched_fcfs_dequeue,
+ .dequeue_done = sctp_sched_fcfs_dequeue_done,
+ .sched_all = sctp_sched_fcfs_sched_all,
+ .unsched_all = sctp_sched_fcfs_unsched_all,
+};
+
+/* API to other parts of the stack */
+
+extern struct sctp_sched_ops sctp_sched_prio;
+extern struct sctp_sched_ops sctp_sched_rr;
+
+static struct sctp_sched_ops *sctp_sched_ops[] = {
+ &sctp_sched_fcfs,
+ &sctp_sched_prio,
+ &sctp_sched_rr,
+};
+
+int sctp_sched_set_sched(struct sctp_association *asoc,
+ enum sctp_sched_type sched)
+{
+ struct sctp_sched_ops *n = sctp_sched_ops[sched];
+ struct sctp_sched_ops *old = asoc->outqueue.sched;
+ struct sctp_datamsg *msg = NULL;
+ struct sctp_chunk *ch;
+ int i, ret = 0;
+
+ if (old == n)
+ return ret;
+
+ if (sched > SCTP_SS_MAX)
+ return -EINVAL;
+
+ if (old) {
+ old->free(&asoc->stream);
+
+ /* Give the next scheduler a clean slate. */
+ for (i = 0; i < asoc->stream.outcnt; i++) {
+ void *p = asoc->stream.out[i].ext;
+
+ if (!p)
+ continue;
+
+ p += offsetofend(struct sctp_stream_out_ext, outq);
+ memset(p, 0, sizeof(struct sctp_stream_out_ext) -
+ offsetofend(struct sctp_stream_out_ext, outq));
+ }
+ }
+
+ asoc->outqueue.sched = n;
+ n->init(&asoc->stream);
+ for (i = 0; i < asoc->stream.outcnt; i++) {
+ if (!asoc->stream.out[i].ext)
+ continue;
+
+ ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
+ if (ret)
+ goto err;
+ }
+
+ /* We have to requeue all chunks already queued. */
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+ if (ch->msg == msg)
+ continue;
+ msg = ch->msg;
+ n->enqueue(&asoc->outqueue, msg);
+ }
+
+ return ret;
+
+err:
+ n->free(&asoc->stream);
+ asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */
+
+ return ret;
+}
+
+int sctp_sched_get_sched(struct sctp_association *asoc)
+{
+ int i;
+
+ for (i = 0; i <= SCTP_SS_MAX; i++)
+ if (asoc->outqueue.sched == sctp_sched_ops[i])
+ return i;
+
+ return 0;
+}
+
+int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid,
+ __u16 value, gfp_t gfp)
+{
+ if (sid >= asoc->stream.outcnt)
+ return -EINVAL;
+
+ if (!asoc->stream.out[sid].ext) {
+ int ret;
+
+ ret = sctp_stream_init_ext(&asoc->stream, sid);
+ if (ret)
+ return ret;
+ }
+
+ return asoc->outqueue.sched->set(&asoc->stream, sid, value, gfp);
+}
+
+int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid,
+ __u16 *value)
+{
+ if (sid >= asoc->stream.outcnt)
+ return -EINVAL;
+
+ if (!asoc->stream.out[sid].ext)
+ return 0;
+
+ return asoc->outqueue.sched->get(&asoc->stream, sid, value);
+}
+
+void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch)
+{
+ if (!list_is_last(&ch->frag_list, &ch->msg->chunks)) {
+ struct sctp_stream_out *sout;
+ __u16 sid;
+
+ /* datamsg is not finish, so save it as current one,
+ * in case application switch scheduler or a higher
+ * priority stream comes in.
+ */
+ sid = sctp_chunk_stream_no(ch);
+ sout = &q->asoc->stream.out[sid];
+ q->asoc->stream.out_curr = sout;
+ return;
+ }
+
+ q->asoc->stream.out_curr = NULL;
+ q->sched->dequeue_done(q, ch);
+}
+
+/* Auxiliary functions for the schedulers */
+void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch)
+{
+ list_del_init(&ch->list);
+ list_del_init(&ch->stream_list);
+ q->out_qlen -= ch->skb->len;
+}
+
+int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp)
+{
+ struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream);
+
+ INIT_LIST_HEAD(&stream->out[sid].ext->outq);
+ return sched->init_sid(stream, sid, gfp);
+}
+
+struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream)
+{
+ struct sctp_association *asoc;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+
+ return asoc->outqueue.sched;
+}
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
new file mode 100644
index 000000000000..384dbf3c8760
--- /dev/null
+++ b/net/sctp/stream_sched_prio.c
@@ -0,0 +1,347 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Priority handling
+ * RFC DRAFT ndata section 3.4
+ */
+
+static void sctp_sched_prio_unsched_all(struct sctp_stream *stream);
+
+static struct sctp_stream_priorities *sctp_sched_prio_new_head(
+ struct sctp_stream *stream, int prio, gfp_t gfp)
+{
+ struct sctp_stream_priorities *p;
+
+ p = kmalloc(sizeof(*p), gfp);
+ if (!p)
+ return NULL;
+
+ INIT_LIST_HEAD(&p->prio_sched);
+ INIT_LIST_HEAD(&p->active);
+ p->next = NULL;
+ p->prio = prio;
+
+ return p;
+}
+
+static struct sctp_stream_priorities *sctp_sched_prio_get_head(
+ struct sctp_stream *stream, int prio, gfp_t gfp)
+{
+ struct sctp_stream_priorities *p;
+ int i;
+
+ /* Look into scheduled priorities first, as they are sorted and
+ * we can find it fast IF it's scheduled.
+ */
+ list_for_each_entry(p, &stream->prio_list, prio_sched) {
+ if (p->prio == prio)
+ return p;
+ if (p->prio > prio)
+ break;
+ }
+
+ /* No luck. So we search on all streams now. */
+ for (i = 0; i < stream->outcnt; i++) {
+ if (!stream->out[i].ext)
+ continue;
+
+ p = stream->out[i].ext->prio_head;
+ if (!p)
+ /* Means all other streams won't be initialized
+ * as well.
+ */
+ break;
+ if (p->prio == prio)
+ return p;
+ }
+
+ /* If not even there, allocate a new one. */
+ return sctp_sched_prio_new_head(stream, prio, gfp);
+}
+
+static void sctp_sched_prio_next_stream(struct sctp_stream_priorities *p)
+{
+ struct list_head *pos;
+
+ pos = p->next->prio_list.next;
+ if (pos == &p->active)
+ pos = pos->next;
+ p->next = list_entry(pos, struct sctp_stream_out_ext, prio_list);
+}
+
+static bool sctp_sched_prio_unsched(struct sctp_stream_out_ext *soute)
+{
+ bool scheduled = false;
+
+ if (!list_empty(&soute->prio_list)) {
+ struct sctp_stream_priorities *prio_head = soute->prio_head;
+
+ /* Scheduled */
+ scheduled = true;
+
+ if (prio_head->next == soute)
+ /* Try to move to the next stream */
+ sctp_sched_prio_next_stream(prio_head);
+
+ list_del_init(&soute->prio_list);
+
+ /* Also unsched the priority if this was the last stream */
+ if (list_empty(&prio_head->active)) {
+ list_del_init(&prio_head->prio_sched);
+ /* If there is no stream left, clear next */
+ prio_head->next = NULL;
+ }
+ }
+
+ return scheduled;
+}
+
+static void sctp_sched_prio_sched(struct sctp_stream *stream,
+ struct sctp_stream_out_ext *soute)
+{
+ struct sctp_stream_priorities *prio, *prio_head;
+
+ prio_head = soute->prio_head;
+
+ /* Nothing to do if already scheduled */
+ if (!list_empty(&soute->prio_list))
+ return;
+
+ /* Schedule the stream. If there is a next, we schedule the new
+ * one before it, so it's the last in round robin order.
+ * If there isn't, we also have to schedule the priority.
+ */
+ if (prio_head->next) {
+ list_add(&soute->prio_list, prio_head->next->prio_list.prev);
+ return;
+ }
+
+ list_add(&soute->prio_list, &prio_head->active);
+ prio_head->next = soute;
+
+ list_for_each_entry(prio, &stream->prio_list, prio_sched) {
+ if (prio->prio > prio_head->prio) {
+ list_add(&prio_head->prio_sched, prio->prio_sched.prev);
+ return;
+ }
+ }
+
+ list_add_tail(&prio_head->prio_sched, &stream->prio_list);
+}
+
+static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid,
+ __u16 prio, gfp_t gfp)
+{
+ struct sctp_stream_out *sout = &stream->out[sid];
+ struct sctp_stream_out_ext *soute = sout->ext;
+ struct sctp_stream_priorities *prio_head, *old;
+ bool reschedule = false;
+ int i;
+
+ prio_head = sctp_sched_prio_get_head(stream, prio, gfp);
+ if (!prio_head)
+ return -ENOMEM;
+
+ reschedule = sctp_sched_prio_unsched(soute);
+ old = soute->prio_head;
+ soute->prio_head = prio_head;
+ if (reschedule)
+ sctp_sched_prio_sched(stream, soute);
+
+ if (!old)
+ /* Happens when we set the priority for the first time */
+ return 0;
+
+ for (i = 0; i < stream->outcnt; i++) {
+ soute = stream->out[i].ext;
+ if (soute && soute->prio_head == old)
+ /* It's still in use, nothing else to do here. */
+ return 0;
+ }
+
+ /* No hits, we are good to free it. */
+ kfree(old);
+
+ return 0;
+}
+
+static int sctp_sched_prio_get(struct sctp_stream *stream, __u16 sid,
+ __u16 *value)
+{
+ *value = stream->out[sid].ext->prio_head->prio;
+ return 0;
+}
+
+static int sctp_sched_prio_init(struct sctp_stream *stream)
+{
+ INIT_LIST_HEAD(&stream->prio_list);
+
+ return 0;
+}
+
+static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid,
+ gfp_t gfp)
+{
+ INIT_LIST_HEAD(&stream->out[sid].ext->prio_list);
+ return sctp_sched_prio_set(stream, sid, 0, gfp);
+}
+
+static void sctp_sched_prio_free(struct sctp_stream *stream)
+{
+ struct sctp_stream_priorities *prio, *n;
+ LIST_HEAD(list);
+ int i;
+
+ /* As we don't keep a list of priorities, to avoid multiple
+ * frees we have to do it in 3 steps:
+ * 1. unsched everyone, so the lists are free to use in 2.
+ * 2. build the list of the priorities
+ * 3. free the list
+ */
+ sctp_sched_prio_unsched_all(stream);
+ for (i = 0; i < stream->outcnt; i++) {
+ if (!stream->out[i].ext)
+ continue;
+ prio = stream->out[i].ext->prio_head;
+ if (prio && list_empty(&prio->prio_sched))
+ list_add(&prio->prio_sched, &list);
+ }
+ list_for_each_entry_safe(prio, n, &list, prio_sched) {
+ list_del_init(&prio->prio_sched);
+ kfree(prio);
+ }
+}
+
+static void sctp_sched_prio_enqueue(struct sctp_outq *q,
+ struct sctp_datamsg *msg)
+{
+ struct sctp_stream *stream;
+ struct sctp_chunk *ch;
+ __u16 sid;
+
+ ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
+ sid = sctp_chunk_stream_no(ch);
+ stream = &q->asoc->stream;
+ sctp_sched_prio_sched(stream, stream->out[sid].ext);
+}
+
+static struct sctp_chunk *sctp_sched_prio_dequeue(struct sctp_outq *q)
+{
+ struct sctp_stream *stream = &q->asoc->stream;
+ struct sctp_stream_priorities *prio;
+ struct sctp_stream_out_ext *soute;
+ struct sctp_chunk *ch = NULL;
+
+ /* Bail out quickly if queue is empty */
+ if (list_empty(&q->out_chunk_list))
+ goto out;
+
+ /* Find which chunk is next. It's easy, it's either the current
+ * one or the first chunk on the next active stream.
+ */
+ if (stream->out_curr) {
+ soute = stream->out_curr->ext;
+ } else {
+ prio = list_entry(stream->prio_list.next,
+ struct sctp_stream_priorities, prio_sched);
+ soute = prio->next;
+ }
+ ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
+ sctp_sched_dequeue_common(q, ch);
+
+out:
+ return ch;
+}
+
+static void sctp_sched_prio_dequeue_done(struct sctp_outq *q,
+ struct sctp_chunk *ch)
+{
+ struct sctp_stream_priorities *prio;
+ struct sctp_stream_out_ext *soute;
+ __u16 sid;
+
+ /* Last chunk on that msg, move to the next stream on
+ * this priority.
+ */
+ sid = sctp_chunk_stream_no(ch);
+ soute = q->asoc->stream.out[sid].ext;
+ prio = soute->prio_head;
+
+ sctp_sched_prio_next_stream(prio);
+
+ if (list_empty(&soute->outq))
+ sctp_sched_prio_unsched(soute);
+}
+
+static void sctp_sched_prio_sched_all(struct sctp_stream *stream)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_out *sout;
+ struct sctp_chunk *ch;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+ __u16 sid;
+
+ sid = sctp_chunk_stream_no(ch);
+ sout = &stream->out[sid];
+ if (sout->ext)
+ sctp_sched_prio_sched(stream, sout->ext);
+ }
+}
+
+static void sctp_sched_prio_unsched_all(struct sctp_stream *stream)
+{
+ struct sctp_stream_priorities *p, *tmp;
+ struct sctp_stream_out_ext *soute, *souttmp;
+
+ list_for_each_entry_safe(p, tmp, &stream->prio_list, prio_sched)
+ list_for_each_entry_safe(soute, souttmp, &p->active, prio_list)
+ sctp_sched_prio_unsched(soute);
+}
+
+struct sctp_sched_ops sctp_sched_prio = {
+ .set = sctp_sched_prio_set,
+ .get = sctp_sched_prio_get,
+ .init = sctp_sched_prio_init,
+ .init_sid = sctp_sched_prio_init_sid,
+ .free = sctp_sched_prio_free,
+ .enqueue = sctp_sched_prio_enqueue,
+ .dequeue = sctp_sched_prio_dequeue,
+ .dequeue_done = sctp_sched_prio_dequeue_done,
+ .sched_all = sctp_sched_prio_sched_all,
+ .unsched_all = sctp_sched_prio_unsched_all,
+};
diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c
new file mode 100644
index 000000000000..7612a438c5b9
--- /dev/null
+++ b/net/sctp/stream_sched_rr.c
@@ -0,0 +1,201 @@
+/* SCTP kernel implementation
+ * (C) Copyright Red Hat Inc. 2017
+ *
+ * This file is part of the SCTP kernel implementation
+ *
+ * These functions manipulate sctp stream queue/scheduling.
+ *
+ * This SCTP implementation is free software;
+ * you can redistribute it and/or modify it under the terms of
+ * the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * This SCTP implementation is distributed in the hope that it
+ * will be useful, but WITHOUT ANY WARRANTY; without even the implied
+ * ************************
+ * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU CC; see the file COPYING. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * Please send any bug reports or fixes you make to the
+ * email addresched(es):
+ * lksctp developers <linux-sctp@vger.kernel.org>
+ *
+ * Written or modified by:
+ * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+ */
+
+#include <linux/list.h>
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+#include <net/sctp/stream_sched.h>
+
+/* Priority handling
+ * RFC DRAFT ndata section 3.2
+ */
+static void sctp_sched_rr_unsched_all(struct sctp_stream *stream);
+
+static void sctp_sched_rr_next_stream(struct sctp_stream *stream)
+{
+ struct list_head *pos;
+
+ pos = stream->rr_next->rr_list.next;
+ if (pos == &stream->rr_list)
+ pos = pos->next;
+ stream->rr_next = list_entry(pos, struct sctp_stream_out_ext, rr_list);
+}
+
+static void sctp_sched_rr_unsched(struct sctp_stream *stream,
+ struct sctp_stream_out_ext *soute)
+{
+ if (stream->rr_next == soute)
+ /* Try to move to the next stream */
+ sctp_sched_rr_next_stream(stream);
+
+ list_del_init(&soute->rr_list);
+
+ /* If we have no other stream queued, clear next */
+ if (list_empty(&stream->rr_list))
+ stream->rr_next = NULL;
+}
+
+static void sctp_sched_rr_sched(struct sctp_stream *stream,
+ struct sctp_stream_out_ext *soute)
+{
+ if (!list_empty(&soute->rr_list))
+ /* Already scheduled. */
+ return;
+
+ /* Schedule the stream */
+ list_add_tail(&soute->rr_list, &stream->rr_list);
+
+ if (!stream->rr_next)
+ stream->rr_next = soute;
+}
+
+static int sctp_sched_rr_set(struct sctp_stream *stream, __u16 sid,
+ __u16 prio, gfp_t gfp)
+{
+ return 0;
+}
+
+static int sctp_sched_rr_get(struct sctp_stream *stream, __u16 sid,
+ __u16 *value)
+{
+ return 0;
+}
+
+static int sctp_sched_rr_init(struct sctp_stream *stream)
+{
+ INIT_LIST_HEAD(&stream->rr_list);
+ stream->rr_next = NULL;
+
+ return 0;
+}
+
+static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid,
+ gfp_t gfp)
+{
+ INIT_LIST_HEAD(&stream->out[sid].ext->rr_list);
+
+ return 0;
+}
+
+static void sctp_sched_rr_free(struct sctp_stream *stream)
+{
+ sctp_sched_rr_unsched_all(stream);
+}
+
+static void sctp_sched_rr_enqueue(struct sctp_outq *q,
+ struct sctp_datamsg *msg)
+{
+ struct sctp_stream *stream;
+ struct sctp_chunk *ch;
+ __u16 sid;
+
+ ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list);
+ sid = sctp_chunk_stream_no(ch);
+ stream = &q->asoc->stream;
+ sctp_sched_rr_sched(stream, stream->out[sid].ext);
+}
+
+static struct sctp_chunk *sctp_sched_rr_dequeue(struct sctp_outq *q)
+{
+ struct sctp_stream *stream = &q->asoc->stream;
+ struct sctp_stream_out_ext *soute;
+ struct sctp_chunk *ch = NULL;
+
+ /* Bail out quickly if queue is empty */
+ if (list_empty(&q->out_chunk_list))
+ goto out;
+
+ /* Find which chunk is next */
+ if (stream->out_curr)
+ soute = stream->out_curr->ext;
+ else
+ soute = stream->rr_next;
+ ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list);
+
+ sctp_sched_dequeue_common(q, ch);
+
+out:
+ return ch;
+}
+
+static void sctp_sched_rr_dequeue_done(struct sctp_outq *q,
+ struct sctp_chunk *ch)
+{
+ struct sctp_stream_out_ext *soute;
+ __u16 sid;
+
+ /* Last chunk on that msg, move to the next stream */
+ sid = sctp_chunk_stream_no(ch);
+ soute = q->asoc->stream.out[sid].ext;
+
+ sctp_sched_rr_next_stream(&q->asoc->stream);
+
+ if (list_empty(&soute->outq))
+ sctp_sched_rr_unsched(&q->asoc->stream, soute);
+}
+
+static void sctp_sched_rr_sched_all(struct sctp_stream *stream)
+{
+ struct sctp_association *asoc;
+ struct sctp_stream_out_ext *soute;
+ struct sctp_chunk *ch;
+
+ asoc = container_of(stream, struct sctp_association, stream);
+ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) {
+ __u16 sid;
+
+ sid = sctp_chunk_stream_no(ch);
+ soute = stream->out[sid].ext;
+ if (soute)
+ sctp_sched_rr_sched(stream, soute);
+ }
+}
+
+static void sctp_sched_rr_unsched_all(struct sctp_stream *stream)
+{
+ struct sctp_stream_out_ext *soute, *tmp;
+
+ list_for_each_entry_safe(soute, tmp, &stream->rr_list, rr_list)
+ sctp_sched_rr_unsched(stream, soute);
+}
+
+struct sctp_sched_ops sctp_sched_rr = {
+ .set = sctp_sched_rr_set,
+ .get = sctp_sched_rr_get,
+ .init = sctp_sched_rr_init,
+ .init_sid = sctp_sched_rr_init_sid,
+ .free = sctp_sched_rr_free,
+ .enqueue = sctp_sched_rr_enqueue,
+ .dequeue = sctp_sched_rr_dequeue,
+ .dequeue_done = sctp_sched_rr_dequeue_done,
+ .sched_all = sctp_sched_rr_sched_all,
+ .unsched_all = sctp_sched_rr_unsched_all,
+};
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index a7294edbc221..5ef97e5a5f78 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -62,10 +62,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
bh_unlock_sock(&smc->sk);
}
-int smc_cdc_get_free_slot(struct smc_link *link,
+int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_wr_buf **wr_buf,
struct smc_cdc_tx_pend **pend)
{
+ struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+
return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
(struct smc_wr_tx_pend_priv **)pend);
}
@@ -118,8 +120,7 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn)
struct smc_wr_buf *wr_buf;
int rc;
- rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf,
- &pend);
+ rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
if (rc)
return rc;
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 8e1d76f26007..56f883d1159c 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -206,7 +206,8 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local,
struct smc_cdc_tx_pend;
-int smc_cdc_get_free_slot(struct smc_link *link, struct smc_wr_buf **wr_buf,
+int smc_cdc_get_free_slot(struct smc_connection *conn,
+ struct smc_wr_buf **wr_buf,
struct smc_cdc_tx_pend **pend);
void smc_cdc_tx_dismiss_slots(struct smc_connection *conn);
int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 20b66e79c5d6..5f6a20084157 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -380,10 +380,14 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
&gattr))
continue;
- if (gattr.ndev &&
- (vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id)) {
- lnk->gid = gid;
- return 0;
+ if (gattr.ndev) {
+ if (is_vlan_dev(gattr.ndev) &&
+ vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
+ lnk->gid = gid;
+ dev_put(gattr.ndev);
+ return 0;
+ }
+ dev_put(gattr.ndev);
}
}
return -ENODEV;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 0b5852299158..468e1d725d97 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -369,26 +369,17 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
{
- struct net_device *ndev;
+ struct ib_gid_attr gattr;
int rc;
rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
- &smcibdev->gid[ibport - 1], NULL);
- /* the SMC protocol requires specification of the roce MAC address;
- * if net_device cannot be determined, it can be derived from gid 0
- */
- ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport);
- if (ndev) {
- memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN);
- dev_put(ndev);
- } else if (!rc) {
- memcpy(&smcibdev->mac[ibport - 1][0],
- &smcibdev->gid[ibport - 1].raw[8], 3);
- memcpy(&smcibdev->mac[ibport - 1][3],
- &smcibdev->gid[ibport - 1].raw[13], 3);
- smcibdev->mac[ibport - 1][0] &= ~0x02;
- }
- return rc;
+ &smcibdev->gid[ibport - 1], &gattr);
+ if (rc || !gattr.ndev)
+ return -ENODEV;
+
+ memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
+ dev_put(gattr.ndev);
+ return 0;
}
/* Create an identifier unique for this instance of SMC-R.
@@ -419,6 +410,7 @@ int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
&smcibdev->pattr[ibport - 1]);
if (rc)
goto out;
+ /* the SMC protocol requires specification of the RoCE MAC address */
rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
if (rc)
goto out;
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 3866573288dd..ec49bc3c3283 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -396,8 +396,7 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
int rc;
spin_lock_bh(&conn->send_lock);
- rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf,
- &pend);
+ rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
if (rc < 0) {
if (rc == -EBUSY) {
struct smc_sock *smc =
@@ -466,8 +465,7 @@ void smc_tx_consumer_update(struct smc_connection *conn)
((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmbe_size / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
- rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
- &wr_buf, &pend);
+ rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend);
if (!rc)
rc = smc_cdc_msg_send(conn, wr_buf, pend);
if (rc < 0) {
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 0cc83839c13c..5dea47eb31bb 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -44,7 +44,7 @@ static mempool_t *rpc_buffer_mempool __read_mostly;
static void rpc_async_schedule(struct work_struct *);
static void rpc_release_task(struct rpc_task *task);
-static void __rpc_queue_timer_fn(unsigned long ptr);
+static void __rpc_queue_timer_fn(struct timer_list *t);
/*
* RPC tasks sit here while waiting for conditions to improve.
@@ -228,7 +228,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
queue->maxpriority = nr_queues - 1;
rpc_reset_waitqueue_priority(queue);
queue->qlen = 0;
- setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue);
+ timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0);
INIT_LIST_HEAD(&queue->timer_list.list);
rpc_assign_waitqueue_name(queue, qname);
}
@@ -635,9 +635,9 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
}
EXPORT_SYMBOL_GPL(rpc_wake_up_status);
-static void __rpc_queue_timer_fn(unsigned long ptr)
+static void __rpc_queue_timer_fn(struct timer_list *t)
{
- struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr;
+ struct rpc_wait_queue *queue = from_timer(queue, t, timer_list.timer);
struct rpc_task *task, *n;
unsigned long expires, now, timeo;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index aa04666f929d..33f4ae68426d 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -455,7 +455,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
serv->sv_xdrsize = xdrsize;
INIT_LIST_HEAD(&serv->sv_tempsocks);
INIT_LIST_HEAD(&serv->sv_permsocks);
- init_timer(&serv->sv_temptimer);
+ timer_setup(&serv->sv_temptimer, NULL, 0);
spin_lock_init(&serv->sv_lock);
__svc_init_bc(serv);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d16a8b423c20..71de77bd4423 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -28,7 +28,7 @@ module_param(svc_rpc_per_connection_limit, uint, 0644);
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
-static void svc_age_temp_xprts(unsigned long closure);
+static void svc_age_temp_xprts(struct timer_list *t);
static void svc_delete_xprt(struct svc_xprt *xprt);
/* apparently the "standard" is that clients close
@@ -785,8 +785,7 @@ static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt
serv->sv_tmpcnt++;
if (serv->sv_temptimer.function == NULL) {
/* setup timer to age temp transports */
- setup_timer(&serv->sv_temptimer, svc_age_temp_xprts,
- (unsigned long)serv);
+ serv->sv_temptimer.function = (TIMER_FUNC_TYPE)svc_age_temp_xprts;
mod_timer(&serv->sv_temptimer,
jiffies + svc_conn_age_period * HZ);
}
@@ -960,9 +959,9 @@ out:
* Timer function to close old temporary transports, using
* a mark-and-sweep algorithm.
*/
-static void svc_age_temp_xprts(unsigned long closure)
+static void svc_age_temp_xprts(struct timer_list *t)
{
- struct svc_serv *serv = (struct svc_serv *)closure;
+ struct svc_serv *serv = from_timer(serv, t, sv_temptimer);
struct svc_xprt *xprt;
struct list_head *le, *next;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index e741ec2b4d8e..4b00302e1867 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -696,9 +696,9 @@ xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
}
static void
-xprt_init_autodisconnect(unsigned long data)
+xprt_init_autodisconnect(struct timer_list *t)
{
- struct rpc_xprt *xprt = (struct rpc_xprt *)data;
+ struct rpc_xprt *xprt = from_timer(xprt, t, timer);
spin_lock(&xprt->transport_lock);
if (!list_empty(&xprt->recv))
@@ -1422,10 +1422,9 @@ found:
xprt->idle_timeout = 0;
INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
if (xprt_has_timer(xprt))
- setup_timer(&xprt->timer, xprt_init_autodisconnect,
- (unsigned long)xprt);
+ timer_setup(&xprt->timer, xprt_init_autodisconnect, 0);
else
- init_timer(&xprt->timer);
+ timer_setup(&xprt->timer, NULL, 0);
if (strlen(args->servername) > RPC_MAXNETNAMELEN) {
xprt_destroy(xprt);
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 31b9f9c52974..a3af73ec0b78 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -8,7 +8,7 @@ tipc-y += addr.o bcast.o bearer.o \
core.o link.o discover.o msg.o \
name_distr.o subscr.o monitor.o name_table.o net.o \
netlink.o netlink_compat.o node.o socket.o eth_media.o \
- server.o socket.o
+ server.o socket.o group.o
tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o
tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a140dd4a84af..329325bd553e 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -258,20 +258,20 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts,
static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts,
struct tipc_nlist *dests, u16 *cong_link_cnt)
{
+ struct tipc_dest *dst, *tmp;
struct sk_buff_head _pkts;
- struct u32_item *n, *tmp;
- u32 dst, selector;
+ u32 dnode, selector;
selector = msg_link_selector(buf_msg(skb_peek(pkts)));
skb_queue_head_init(&_pkts);
- list_for_each_entry_safe(n, tmp, &dests->list, list) {
- dst = n->value;
- if (!tipc_msg_pskb_copy(dst, pkts, &_pkts))
+ list_for_each_entry_safe(dst, tmp, &dests->list, list) {
+ dnode = dst->node;
+ if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts))
return -ENOMEM;
/* Any other return value than -ELINKCONG is ignored */
- if (tipc_node_xmit(net, &_pkts, dst, selector) == -ELINKCONG)
+ if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG)
(*cong_link_cnt)++;
}
return 0;
@@ -554,7 +554,7 @@ void tipc_nlist_add(struct tipc_nlist *nl, u32 node)
{
if (node == nl->self)
nl->local = true;
- else if (u32_push(&nl->list, node))
+ else if (tipc_dest_push(&nl->list, node, 0))
nl->remote++;
}
@@ -562,13 +562,13 @@ void tipc_nlist_del(struct tipc_nlist *nl, u32 node)
{
if (node == nl->self)
nl->local = false;
- else if (u32_del(&nl->list, node))
+ else if (tipc_dest_del(&nl->list, node, 0))
nl->remote--;
}
void tipc_nlist_purge(struct tipc_nlist *nl)
{
- u32_list_purge(&nl->list);
+ tipc_dest_list_purge(&nl->list);
nl->remote = 0;
nl->local = 0;
}
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 5cc5398be722..964342689f2c 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -132,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net)
return &tipc_net(net)->node_list;
}
+static inline struct tipc_server *tipc_topsrv(struct net *net)
+{
+ return tipc_net(net)->topsrv;
+}
+
static inline unsigned int tipc_hashfn(u32 addr)
{
return addr & (NODE_HTABLE_SIZE - 1);
diff --git a/net/tipc/group.c b/net/tipc/group.c
new file mode 100644
index 000000000000..7821085a7dd8
--- /dev/null
+++ b/net/tipc/group.c
@@ -0,0 +1,871 @@
+/*
+ * net/tipc/group.c: TIPC group messaging code
+ *
+ * Copyright (c) 2017, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "core.h"
+#include "addr.h"
+#include "group.h"
+#include "bcast.h"
+#include "server.h"
+#include "msg.h"
+#include "socket.h"
+#include "node.h"
+#include "name_table.h"
+#include "subscr.h"
+
+#define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1)
+#define ADV_IDLE ADV_UNIT
+#define ADV_ACTIVE (ADV_UNIT * 12)
+
+enum mbr_state {
+ MBR_QUARANTINED,
+ MBR_DISCOVERED,
+ MBR_JOINING,
+ MBR_PUBLISHED,
+ MBR_JOINED,
+ MBR_PENDING,
+ MBR_ACTIVE,
+ MBR_RECLAIMING,
+ MBR_REMITTED,
+ MBR_LEAVING
+};
+
+struct tipc_member {
+ struct rb_node tree_node;
+ struct list_head list;
+ struct list_head congested;
+ struct sk_buff *event_msg;
+ struct sk_buff_head deferredq;
+ struct tipc_group *group;
+ u32 node;
+ u32 port;
+ u32 instance;
+ enum mbr_state state;
+ u16 advertised;
+ u16 window;
+ u16 bc_rcv_nxt;
+ u16 bc_syncpt;
+ u16 bc_acked;
+ bool usr_pending;
+};
+
+struct tipc_group {
+ struct rb_root members;
+ struct list_head congested;
+ struct list_head pending;
+ struct list_head active;
+ struct list_head reclaiming;
+ struct tipc_nlist dests;
+ struct net *net;
+ int subid;
+ u32 type;
+ u32 instance;
+ u32 domain;
+ u32 scope;
+ u32 portid;
+ u16 member_cnt;
+ u16 active_cnt;
+ u16 max_active;
+ u16 bc_snd_nxt;
+ u16 bc_ackers;
+ bool loopback;
+ bool events;
+};
+
+static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
+ int mtyp, struct sk_buff_head *xmitq);
+
+static void tipc_group_decr_active(struct tipc_group *grp,
+ struct tipc_member *m)
+{
+ if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING)
+ grp->active_cnt--;
+}
+
+static int tipc_group_rcvbuf_limit(struct tipc_group *grp)
+{
+ int max_active, active_pool, idle_pool;
+ int mcnt = grp->member_cnt + 1;
+
+ /* Limit simultaneous reception from other members */
+ max_active = min(mcnt / 8, 64);
+ max_active = max(max_active, 16);
+ grp->max_active = max_active;
+
+ /* Reserve blocks for active and idle members */
+ active_pool = max_active * ADV_ACTIVE;
+ idle_pool = (mcnt - max_active) * ADV_IDLE;
+
+ /* Scale to bytes, considering worst-case truesize/msgsize ratio */
+ return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4;
+}
+
+u16 tipc_group_bc_snd_nxt(struct tipc_group *grp)
+{
+ return grp->bc_snd_nxt;
+}
+
+static bool tipc_group_is_enabled(struct tipc_member *m)
+{
+ return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING;
+}
+
+static bool tipc_group_is_receiver(struct tipc_member *m)
+{
+ return m && m->state >= MBR_JOINED;
+}
+
+u32 tipc_group_exclude(struct tipc_group *grp)
+{
+ if (!grp->loopback)
+ return grp->portid;
+ return 0;
+}
+
+int tipc_group_size(struct tipc_group *grp)
+{
+ return grp->member_cnt;
+}
+
+struct tipc_group *tipc_group_create(struct net *net, u32 portid,
+ struct tipc_group_req *mreq)
+{
+ struct tipc_group *grp;
+ u32 type = mreq->type;
+
+ grp = kzalloc(sizeof(*grp), GFP_ATOMIC);
+ if (!grp)
+ return NULL;
+ tipc_nlist_init(&grp->dests, tipc_own_addr(net));
+ INIT_LIST_HEAD(&grp->congested);
+ INIT_LIST_HEAD(&grp->active);
+ INIT_LIST_HEAD(&grp->pending);
+ INIT_LIST_HEAD(&grp->reclaiming);
+ grp->members = RB_ROOT;
+ grp->net = net;
+ grp->portid = portid;
+ grp->domain = addr_domain(net, mreq->scope);
+ grp->type = type;
+ grp->instance = mreq->instance;
+ grp->scope = mreq->scope;
+ grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK;
+ grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS;
+ if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid))
+ return grp;
+ kfree(grp);
+ return NULL;
+}
+
+void tipc_group_delete(struct net *net, struct tipc_group *grp)
+{
+ struct rb_root *tree = &grp->members;
+ struct tipc_member *m, *tmp;
+ struct sk_buff_head xmitq;
+
+ __skb_queue_head_init(&xmitq);
+
+ rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) {
+ tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq);
+ list_del(&m->list);
+ kfree(m);
+ }
+ tipc_node_distr_xmit(net, &xmitq);
+ tipc_nlist_purge(&grp->dests);
+ tipc_topsrv_kern_unsubscr(net, grp->subid);
+ kfree(grp);
+}
+
+struct tipc_member *tipc_group_find_member(struct tipc_group *grp,
+ u32 node, u32 port)
+{
+ struct rb_node *n = grp->members.rb_node;
+ u64 nkey, key = (u64)node << 32 | port;
+ struct tipc_member *m;
+
+ while (n) {
+ m = container_of(n, struct tipc_member, tree_node);
+ nkey = (u64)m->node << 32 | m->port;
+ if (key < nkey)
+ n = n->rb_left;
+ else if (key > nkey)
+ n = n->rb_right;
+ else
+ return m;
+ }
+ return NULL;
+}
+
+static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp,
+ u32 node, u32 port)
+{
+ struct tipc_member *m;
+
+ m = tipc_group_find_member(grp, node, port);
+ if (m && tipc_group_is_enabled(m))
+ return m;
+ return NULL;
+}
+
+static struct tipc_member *tipc_group_find_node(struct tipc_group *grp,
+ u32 node)
+{
+ struct tipc_member *m;
+ struct rb_node *n;
+
+ for (n = rb_first(&grp->members); n; n = rb_next(n)) {
+ m = container_of(n, struct tipc_member, tree_node);
+ if (m->node == node)
+ return m;
+ }
+ return NULL;
+}
+
+static void tipc_group_add_to_tree(struct tipc_group *grp,
+ struct tipc_member *m)
+{
+ u64 nkey, key = (u64)m->node << 32 | m->port;
+ struct rb_node **n, *parent = NULL;
+ struct tipc_member *tmp;
+
+ n = &grp->members.rb_node;
+ while (*n) {
+ tmp = container_of(*n, struct tipc_member, tree_node);
+ parent = *n;
+ tmp = container_of(parent, struct tipc_member, tree_node);
+ nkey = (u64)tmp->node << 32 | tmp->port;
+ if (key < nkey)
+ n = &(*n)->rb_left;
+ else if (key > nkey)
+ n = &(*n)->rb_right;
+ else
+ return;
+ }
+ rb_link_node(&m->tree_node, parent, n);
+ rb_insert_color(&m->tree_node, &grp->members);
+}
+
+static struct tipc_member *tipc_group_create_member(struct tipc_group *grp,
+ u32 node, u32 port,
+ int state)
+{
+ struct tipc_member *m;
+
+ m = kzalloc(sizeof(*m), GFP_ATOMIC);
+ if (!m)
+ return NULL;
+ INIT_LIST_HEAD(&m->list);
+ INIT_LIST_HEAD(&m->congested);
+ __skb_queue_head_init(&m->deferredq);
+ m->group = grp;
+ m->node = node;
+ m->port = port;
+ m->bc_acked = grp->bc_snd_nxt - 1;
+ grp->member_cnt++;
+ tipc_group_add_to_tree(grp, m);
+ tipc_nlist_add(&grp->dests, m->node);
+ m->state = state;
+ return m;
+}
+
+void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port)
+{
+ tipc_group_create_member(grp, node, port, MBR_DISCOVERED);
+}
+
+static void tipc_group_delete_member(struct tipc_group *grp,
+ struct tipc_member *m)
+{
+ rb_erase(&m->tree_node, &grp->members);
+ grp->member_cnt--;
+
+ /* Check if we were waiting for replicast ack from this member */
+ if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1))
+ grp->bc_ackers--;
+
+ list_del_init(&m->list);
+ list_del_init(&m->congested);
+ tipc_group_decr_active(grp, m);
+
+ /* If last member on a node, remove node from dest list */
+ if (!tipc_group_find_node(grp, m->node))
+ tipc_nlist_del(&grp->dests, m->node);
+
+ kfree(m);
+}
+
+struct tipc_nlist *tipc_group_dests(struct tipc_group *grp)
+{
+ return &grp->dests;
+}
+
+void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
+ int *scope)
+{
+ seq->type = grp->type;
+ seq->lower = grp->instance;
+ seq->upper = grp->instance;
+ *scope = grp->scope;
+}
+
+void tipc_group_update_member(struct tipc_member *m, int len)
+{
+ struct tipc_group *grp = m->group;
+ struct tipc_member *_m, *tmp;
+
+ if (!tipc_group_is_enabled(m))
+ return;
+
+ m->window -= len;
+
+ if (m->window >= ADV_IDLE)
+ return;
+
+ if (!list_empty(&m->congested))
+ return;
+
+ /* Sort member into congested members' list */
+ list_for_each_entry_safe(_m, tmp, &grp->congested, congested) {
+ if (m->window > _m->window)
+ continue;
+ list_add_tail(&m->congested, &_m->congested);
+ return;
+ }
+ list_add_tail(&m->congested, &grp->congested);
+}
+
+void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack)
+{
+ u16 prev = grp->bc_snd_nxt - 1;
+ struct tipc_member *m;
+ struct rb_node *n;
+
+ for (n = rb_first(&grp->members); n; n = rb_next(n)) {
+ m = container_of(n, struct tipc_member, tree_node);
+ if (tipc_group_is_enabled(m)) {
+ tipc_group_update_member(m, len);
+ m->bc_acked = prev;
+ }
+ }
+
+ /* Mark number of acknowledges to expect, if any */
+ if (ack)
+ grp->bc_ackers = grp->member_cnt;
+ grp->bc_snd_nxt++;
+}
+
+bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
+ int len, struct tipc_member **mbr)
+{
+ struct sk_buff_head xmitq;
+ struct tipc_member *m;
+ int adv, state;
+
+ m = tipc_group_find_dest(grp, dnode, dport);
+ *mbr = m;
+ if (!m)
+ return false;
+ if (m->usr_pending)
+ return true;
+ if (m->window >= len)
+ return false;
+ m->usr_pending = true;
+
+ /* If not fully advertised, do it now to prevent mutual blocking */
+ adv = m->advertised;
+ state = m->state;
+ if (state < MBR_JOINED)
+ return true;
+ if (state == MBR_JOINED && adv == ADV_IDLE)
+ return true;
+ if (state == MBR_ACTIVE && adv == ADV_ACTIVE)
+ return true;
+ if (state == MBR_PENDING && adv == ADV_IDLE)
+ return true;
+ skb_queue_head_init(&xmitq);
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq);
+ tipc_node_distr_xmit(grp->net, &xmitq);
+ return true;
+}
+
+bool tipc_group_bc_cong(struct tipc_group *grp, int len)
+{
+ struct tipc_member *m = NULL;
+
+ /* If prev bcast was replicast, reject until all receivers have acked */
+ if (grp->bc_ackers)
+ return true;
+
+ if (list_empty(&grp->congested))
+ return false;
+
+ m = list_first_entry(&grp->congested, struct tipc_member, congested);
+ if (m->window >= len)
+ return false;
+
+ return tipc_group_cong(grp, m->node, m->port, len, &m);
+}
+
+/* tipc_group_sort_msg() - sort msg into queue by bcast sequence number
+ */
+static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq)
+{
+ struct tipc_msg *_hdr, *hdr = buf_msg(skb);
+ u16 bc_seqno = msg_grp_bc_seqno(hdr);
+ struct sk_buff *_skb, *tmp;
+ int mtyp = msg_type(hdr);
+
+ /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */
+ if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) {
+ skb_queue_walk_safe(defq, _skb, tmp) {
+ _hdr = buf_msg(_skb);
+ if (!less(bc_seqno, msg_grp_bc_seqno(_hdr)))
+ continue;
+ __skb_queue_before(defq, _skb, skb);
+ return;
+ }
+ /* Bcast was not bypassed, - add to tail */
+ }
+ /* Unicasts are never bypassed, - always add to tail */
+ __skb_queue_tail(defq, skb);
+}
+
+/* tipc_group_filter_msg() - determine if we should accept arriving message
+ */
+void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb = __skb_dequeue(inputq);
+ bool ack, deliver, update, leave = false;
+ struct sk_buff_head *defq;
+ struct tipc_member *m;
+ struct tipc_msg *hdr;
+ u32 node, port;
+ int mtyp, blks;
+
+ if (!skb)
+ return;
+
+ hdr = buf_msg(skb);
+ node = msg_orignode(hdr);
+ port = msg_origport(hdr);
+
+ if (!msg_in_group(hdr))
+ goto drop;
+
+ m = tipc_group_find_member(grp, node, port);
+ if (!tipc_group_is_receiver(m))
+ goto drop;
+
+ if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
+ goto drop;
+
+ TIPC_SKB_CB(skb)->orig_member = m->instance;
+ defq = &m->deferredq;
+ tipc_group_sort_msg(skb, defq);
+
+ while ((skb = skb_peek(defq))) {
+ hdr = buf_msg(skb);
+ mtyp = msg_type(hdr);
+ deliver = true;
+ ack = false;
+ update = false;
+
+ if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt))
+ break;
+
+ /* Decide what to do with message */
+ switch (mtyp) {
+ case TIPC_GRP_MCAST_MSG:
+ if (msg_nameinst(hdr) != grp->instance) {
+ update = true;
+ deliver = false;
+ }
+ /* Fall thru */
+ case TIPC_GRP_BCAST_MSG:
+ m->bc_rcv_nxt++;
+ ack = msg_grp_bc_ack_req(hdr);
+ break;
+ case TIPC_GRP_UCAST_MSG:
+ break;
+ case TIPC_GRP_MEMBER_EVT:
+ if (m->state == MBR_LEAVING)
+ leave = true;
+ if (!grp->events)
+ deliver = false;
+ break;
+ default:
+ break;
+ }
+
+ /* Execute decisions */
+ __skb_dequeue(defq);
+ if (deliver)
+ __skb_queue_tail(inputq, skb);
+ else
+ kfree_skb(skb);
+
+ if (ack)
+ tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq);
+
+ if (leave) {
+ tipc_group_delete_member(grp, m);
+ __skb_queue_purge(defq);
+ break;
+ }
+ if (!update)
+ continue;
+
+ blks = msg_blocks(hdr);
+ tipc_group_update_rcv_win(grp, blks, node, port, xmitq);
+ }
+ return;
+drop:
+ kfree_skb(skb);
+}
+
+void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
+ u32 port, struct sk_buff_head *xmitq)
+{
+ struct list_head *active = &grp->active;
+ int max_active = grp->max_active;
+ int reclaim_limit = max_active * 3 / 4;
+ int active_cnt = grp->active_cnt;
+ struct tipc_member *m, *rm;
+
+ m = tipc_group_find_member(grp, node, port);
+ if (!m)
+ return;
+
+ m->advertised -= blks;
+
+ switch (m->state) {
+ case MBR_JOINED:
+ /* Reclaim advertised space from least active member */
+ if (!list_empty(active) && active_cnt >= reclaim_limit) {
+ rm = list_first_entry(active, struct tipc_member, list);
+ rm->state = MBR_RECLAIMING;
+ list_move_tail(&rm->list, &grp->reclaiming);
+ tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq);
+ }
+ /* If max active, become pending and wait for reclaimed space */
+ if (active_cnt >= max_active) {
+ m->state = MBR_PENDING;
+ list_add_tail(&m->list, &grp->pending);
+ break;
+ }
+ /* Otherwise become active */
+ m->state = MBR_ACTIVE;
+ list_add_tail(&m->list, &grp->active);
+ grp->active_cnt++;
+ /* Fall through */
+ case MBR_ACTIVE:
+ if (!list_is_last(&m->list, &grp->active))
+ list_move_tail(&m->list, &grp->active);
+ if (m->advertised > (ADV_ACTIVE * 3 / 4))
+ break;
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ break;
+ case MBR_REMITTED:
+ if (m->advertised > ADV_IDLE)
+ break;
+ m->state = MBR_JOINED;
+ if (m->advertised < ADV_IDLE) {
+ pr_warn_ratelimited("Rcv unexpected msg after REMIT\n");
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ }
+ break;
+ case MBR_RECLAIMING:
+ case MBR_DISCOVERED:
+ case MBR_JOINING:
+ case MBR_LEAVING:
+ default:
+ break;
+ }
+}
+
+static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m,
+ int mtyp, struct sk_buff_head *xmitq)
+{
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ int adv = 0;
+
+ skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0,
+ m->node, tipc_own_addr(grp->net),
+ m->port, grp->portid, 0);
+ if (!skb)
+ return;
+
+ if (m->state == MBR_ACTIVE)
+ adv = ADV_ACTIVE - m->advertised;
+ else if (m->state == MBR_JOINED || m->state == MBR_PENDING)
+ adv = ADV_IDLE - m->advertised;
+
+ hdr = buf_msg(skb);
+
+ if (mtyp == GRP_JOIN_MSG) {
+ msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
+ msg_set_adv_win(hdr, adv);
+ m->advertised += adv;
+ } else if (mtyp == GRP_LEAVE_MSG) {
+ msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt);
+ } else if (mtyp == GRP_ADV_MSG) {
+ msg_set_adv_win(hdr, adv);
+ m->advertised += adv;
+ } else if (mtyp == GRP_ACK_MSG) {
+ msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt);
+ } else if (mtyp == GRP_REMIT_MSG) {
+ msg_set_grp_remitted(hdr, m->window);
+ }
+ __skb_queue_tail(xmitq, skb);
+}
+
+void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup,
+ struct tipc_msg *hdr, struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ u32 node = msg_orignode(hdr);
+ u32 port = msg_origport(hdr);
+ struct tipc_member *m, *pm;
+ struct tipc_msg *ehdr;
+ u16 remitted, in_flight;
+
+ if (!grp)
+ return;
+
+ m = tipc_group_find_member(grp, node, port);
+
+ switch (msg_type(hdr)) {
+ case GRP_JOIN_MSG:
+ if (!m)
+ m = tipc_group_create_member(grp, node, port,
+ MBR_QUARANTINED);
+ if (!m)
+ return;
+ m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+ m->bc_rcv_nxt = m->bc_syncpt;
+ m->window += msg_adv_win(hdr);
+
+ /* Wait until PUBLISH event is received */
+ if (m->state == MBR_DISCOVERED) {
+ m->state = MBR_JOINING;
+ } else if (m->state == MBR_PUBLISHED) {
+ m->state = MBR_JOINED;
+ *usr_wakeup = true;
+ m->usr_pending = false;
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+ ehdr = buf_msg(m->event_msg);
+ msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
+ __skb_queue_tail(inputq, m->event_msg);
+ }
+ if (m->window < ADV_IDLE)
+ tipc_group_update_member(m, 0);
+ else
+ list_del_init(&m->congested);
+ return;
+ case GRP_LEAVE_MSG:
+ if (!m)
+ return;
+ m->bc_syncpt = msg_grp_bc_syncpt(hdr);
+
+ /* Wait until WITHDRAW event is received */
+ if (m->state != MBR_LEAVING) {
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
+ return;
+ }
+ /* Otherwise deliver already received WITHDRAW event */
+ ehdr = buf_msg(m->event_msg);
+ msg_set_grp_bc_seqno(ehdr, m->bc_syncpt);
+ __skb_queue_tail(inputq, m->event_msg);
+ *usr_wakeup = true;
+ list_del_init(&m->congested);
+ return;
+ case GRP_ADV_MSG:
+ if (!m)
+ return;
+ m->window += msg_adv_win(hdr);
+ *usr_wakeup = m->usr_pending;
+ m->usr_pending = false;
+ list_del_init(&m->congested);
+ return;
+ case GRP_ACK_MSG:
+ if (!m)
+ return;
+ m->bc_acked = msg_grp_bc_acked(hdr);
+ if (--grp->bc_ackers)
+ break;
+ *usr_wakeup = true;
+ m->usr_pending = false;
+ return;
+ case GRP_RECLAIM_MSG:
+ if (!m)
+ return;
+ *usr_wakeup = m->usr_pending;
+ m->usr_pending = false;
+ tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq);
+ m->window = ADV_IDLE;
+ return;
+ case GRP_REMIT_MSG:
+ if (!m || m->state != MBR_RECLAIMING)
+ return;
+
+ list_del_init(&m->list);
+ grp->active_cnt--;
+ remitted = msg_grp_remitted(hdr);
+
+ /* Messages preceding the REMIT still in receive queue */
+ if (m->advertised > remitted) {
+ m->state = MBR_REMITTED;
+ in_flight = m->advertised - remitted;
+ }
+ /* All messages preceding the REMIT have been read */
+ if (m->advertised <= remitted) {
+ m->state = MBR_JOINED;
+ in_flight = 0;
+ }
+ /* ..and the REMIT overtaken by more messages => re-advertise */
+ if (m->advertised < remitted)
+ tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq);
+
+ m->advertised = ADV_IDLE + in_flight;
+
+ /* Set oldest pending member to active and advertise */
+ if (list_empty(&grp->pending))
+ return;
+ pm = list_first_entry(&grp->pending, struct tipc_member, list);
+ pm->state = MBR_ACTIVE;
+ list_move_tail(&pm->list, &grp->active);
+ grp->active_cnt++;
+ if (pm->advertised <= (ADV_ACTIVE * 3 / 4))
+ tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq);
+ return;
+ default:
+ pr_warn("Received unknown GROUP_PROTO message\n");
+ }
+}
+
+/* tipc_group_member_evt() - receive and handle a member up/down event
+ */
+void tipc_group_member_evt(struct tipc_group *grp,
+ bool *usr_wakeup,
+ int *sk_rcvbuf,
+ struct sk_buff *skb,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct tipc_event *evt = (void *)msg_data(hdr);
+ u32 instance = evt->found_lower;
+ u32 node = evt->port.node;
+ u32 port = evt->port.ref;
+ int event = evt->event;
+ struct tipc_member *m;
+ struct net *net;
+ bool node_up;
+ u32 self;
+
+ if (!grp)
+ goto drop;
+
+ net = grp->net;
+ self = tipc_own_addr(net);
+ if (!grp->loopback && node == self && port == grp->portid)
+ goto drop;
+
+ /* Convert message before delivery to user */
+ msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+ msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
+ msg_set_type(hdr, TIPC_GRP_MEMBER_EVT);
+ msg_set_origport(hdr, port);
+ msg_set_orignode(hdr, node);
+ msg_set_nametype(hdr, grp->type);
+ msg_set_grp_evt(hdr, event);
+
+ m = tipc_group_find_member(grp, node, port);
+
+ if (event == TIPC_PUBLISHED) {
+ if (!m)
+ m = tipc_group_create_member(grp, node, port,
+ MBR_DISCOVERED);
+ if (!m)
+ goto drop;
+
+ /* Hold back event if JOIN message not yet received */
+ if (m->state == MBR_DISCOVERED) {
+ m->event_msg = skb;
+ m->state = MBR_PUBLISHED;
+ } else {
+ msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
+ __skb_queue_tail(inputq, skb);
+ m->state = MBR_JOINED;
+ *usr_wakeup = true;
+ m->usr_pending = false;
+ }
+ m->instance = instance;
+ TIPC_SKB_CB(skb)->orig_member = m->instance;
+ tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq);
+ if (m->window < ADV_IDLE)
+ tipc_group_update_member(m, 0);
+ else
+ list_del_init(&m->congested);
+ } else if (event == TIPC_WITHDRAWN) {
+ if (!m)
+ goto drop;
+
+ TIPC_SKB_CB(skb)->orig_member = m->instance;
+
+ *usr_wakeup = true;
+ m->usr_pending = false;
+ node_up = tipc_node_is_up(net, node);
+
+ /* Hold back event if more messages might be expected */
+ if (m->state != MBR_LEAVING && node_up) {
+ m->event_msg = skb;
+ tipc_group_decr_active(grp, m);
+ m->state = MBR_LEAVING;
+ } else {
+ if (node_up)
+ msg_set_grp_bc_seqno(hdr, m->bc_syncpt);
+ else
+ msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt);
+ __skb_queue_tail(inputq, skb);
+ }
+ list_del_init(&m->congested);
+ }
+ *sk_rcvbuf = tipc_group_rcvbuf_limit(grp);
+ return;
+drop:
+ kfree_skb(skb);
+}
diff --git a/net/tipc/group.h b/net/tipc/group.h
new file mode 100644
index 000000000000..d525e1cd7de5
--- /dev/null
+++ b/net/tipc/group.h
@@ -0,0 +1,73 @@
+/*
+ * net/tipc/group.h: Include file for TIPC group unicast/multicast functions
+ *
+ * Copyright (c) 2017, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _TIPC_GROUP_H
+#define _TIPC_GROUP_H
+
+#include "core.h"
+
+struct tipc_group;
+struct tipc_member;
+struct tipc_msg;
+
+struct tipc_group *tipc_group_create(struct net *net, u32 portid,
+ struct tipc_group_req *mreq);
+void tipc_group_delete(struct net *net, struct tipc_group *grp);
+void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port);
+struct tipc_nlist *tipc_group_dests(struct tipc_group *grp);
+void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq,
+ int *scope);
+u32 tipc_group_exclude(struct tipc_group *grp);
+void tipc_group_filter_msg(struct tipc_group *grp,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq);
+void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup,
+ int *sk_rcvbuf, struct sk_buff *skb,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq);
+void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup,
+ struct tipc_msg *hdr,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq);
+void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack);
+bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport,
+ int len, struct tipc_member **m);
+bool tipc_group_bc_cong(struct tipc_group *grp, int len);
+void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node,
+ u32 port, struct sk_buff_head *xmitq);
+u16 tipc_group_bc_snd_nxt(struct tipc_group *grp);
+void tipc_group_update_member(struct tipc_member *m, int len);
+int tipc_group_size(struct tipc_group *grp);
+#endif
diff --git a/net/tipc/link.c b/net/tipc/link.c
index ac0144f532aa..870b9b8f877a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1039,6 +1039,7 @@ int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker,
static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *inputq)
{
+ struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq;
struct tipc_msg *hdr = buf_msg(skb);
switch (msg_user(hdr)) {
@@ -1046,13 +1047,16 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb,
case TIPC_MEDIUM_IMPORTANCE:
case TIPC_HIGH_IMPORTANCE:
case TIPC_CRITICAL_IMPORTANCE:
- if (unlikely(msg_type(hdr) == TIPC_MCAST_MSG)) {
- skb_queue_tail(l->bc_rcvlink->inputq, skb);
+ if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) {
+ skb_queue_tail(mc_inputq, skb);
return true;
}
case CONN_MANAGER:
skb_queue_tail(inputq, skb);
return true;
+ case GROUP_PROTOCOL:
+ skb_queue_tail(mc_inputq, skb);
+ return true;
case NAME_DISTRIBUTOR:
l->bc_rcvlink->state = LINK_ESTABLISHED;
skb_queue_tail(l->namedq, skb);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 17146c16ee2d..1649d456e22d 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -666,3 +666,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
}
kfree_skb(skb);
}
+
+void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
+{
+ if (tipc_msg_reverse(tipc_own_addr(net), &skb, err))
+ __skb_queue_tail(xmitq, skb);
+}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index c843fd2bc48d..cedf811317fb 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1,7 +1,7 @@
/*
* net/tipc/msg.h: Include file for TIPC message header routines
*
- * Copyright (c) 2000-2007, 2014-2015 Ericsson AB
+ * Copyright (c) 2000-2007, 2014-2017 Ericsson AB
* Copyright (c) 2005-2008, 2010-2011, Wind River Systems
* All rights reserved.
*
@@ -61,10 +61,14 @@ struct plist;
/*
* Payload message types
*/
-#define TIPC_CONN_MSG 0
-#define TIPC_MCAST_MSG 1
-#define TIPC_NAMED_MSG 2
-#define TIPC_DIRECT_MSG 3
+#define TIPC_CONN_MSG 0
+#define TIPC_MCAST_MSG 1
+#define TIPC_NAMED_MSG 2
+#define TIPC_DIRECT_MSG 3
+#define TIPC_GRP_MEMBER_EVT 4
+#define TIPC_GRP_BCAST_MSG 5
+#define TIPC_GRP_MCAST_MSG 6
+#define TIPC_GRP_UCAST_MSG 7
/*
* Internal message users
@@ -73,11 +77,13 @@ struct plist;
#define MSG_BUNDLER 6
#define LINK_PROTOCOL 7
#define CONN_MANAGER 8
+#define GROUP_PROTOCOL 9
#define TUNNEL_PROTOCOL 10
#define NAME_DISTRIBUTOR 11
#define MSG_FRAGMENTER 12
#define LINK_CONFIG 13
#define SOCK_WAKEUP 14 /* pseudo user */
+#define TOP_SRV 15 /* pseudo user */
/*
* Message header sizes
@@ -86,6 +92,7 @@ struct plist;
#define BASIC_H_SIZE 32 /* Basic payload message */
#define NAMED_H_SIZE 40 /* Named payload message */
#define MCAST_H_SIZE 44 /* Multicast payload message */
+#define GROUP_H_SIZE 44 /* Group payload message */
#define INT_H_SIZE 40 /* Internal messages */
#define MIN_H_SIZE 24 /* Smallest legal TIPC header size */
#define MAX_H_SIZE 60 /* Largest possible TIPC header size */
@@ -96,6 +103,7 @@ struct plist;
struct tipc_skb_cb {
u32 bytes_read;
+ u32 orig_member;
struct sk_buff *tail;
bool validated;
u16 chain_imp;
@@ -188,6 +196,11 @@ static inline u32 msg_size(struct tipc_msg *m)
return msg_bits(m, 0, 0, 0x1ffff);
}
+static inline u32 msg_blocks(struct tipc_msg *m)
+{
+ return (msg_size(m) / 1024) + 1;
+}
+
static inline u32 msg_data_sz(struct tipc_msg *m)
{
return msg_size(m) - msg_hdr_sz(m);
@@ -251,6 +264,18 @@ static inline void msg_set_type(struct tipc_msg *m, u32 n)
msg_set_bits(m, 1, 29, 0x7, n);
}
+static inline int msg_in_group(struct tipc_msg *m)
+{
+ int mtyp = msg_type(m);
+
+ return mtyp >= TIPC_GRP_MEMBER_EVT && mtyp <= TIPC_GRP_UCAST_MSG;
+}
+
+static inline bool msg_is_grp_evt(struct tipc_msg *m)
+{
+ return msg_type(m) == TIPC_GRP_MEMBER_EVT;
+}
+
static inline u32 msg_named(struct tipc_msg *m)
{
return msg_type(m) == TIPC_NAMED_MSG;
@@ -258,7 +283,10 @@ static inline u32 msg_named(struct tipc_msg *m)
static inline u32 msg_mcast(struct tipc_msg *m)
{
- return msg_type(m) == TIPC_MCAST_MSG;
+ int mtyp = msg_type(m);
+
+ return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG) ||
+ (mtyp == TIPC_GRP_MCAST_MSG));
}
static inline u32 msg_connected(struct tipc_msg *m)
@@ -514,6 +542,16 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n)
#define DSC_RESP_MSG 1
/*
+ * Group protocol message types
+ */
+#define GRP_JOIN_MSG 0
+#define GRP_LEAVE_MSG 1
+#define GRP_ADV_MSG 2
+#define GRP_ACK_MSG 3
+#define GRP_RECLAIM_MSG 4
+#define GRP_REMIT_MSG 5
+
+/*
* Word 1
*/
static inline u32 msg_seq_gap(struct tipc_msg *m)
@@ -764,12 +802,12 @@ static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n)
msg_set_bits(m, 9, 16, 0xffff, n);
}
-static inline u32 msg_adv_win(struct tipc_msg *m)
+static inline u16 msg_adv_win(struct tipc_msg *m)
{
return msg_bits(m, 9, 0, 0xffff);
}
-static inline void msg_set_adv_win(struct tipc_msg *m, u32 n)
+static inline void msg_set_adv_win(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 9, 0, 0xffff, n);
}
@@ -794,6 +832,68 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
msg_set_bits(m, 9, 0, 0xffff, n);
}
+static inline u16 msg_grp_bc_syncpt(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+static inline u16 msg_grp_bc_acked(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+static inline u16 msg_grp_remitted(struct tipc_msg *m)
+{
+ return msg_bits(m, 9, 16, 0xffff);
+}
+
+static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n)
+{
+ msg_set_bits(m, 9, 16, 0xffff, n);
+}
+
+/* Word 10
+ */
+static inline u16 msg_grp_evt(struct tipc_msg *m)
+{
+ return msg_bits(m, 10, 0, 0x3);
+}
+
+static inline void msg_set_grp_evt(struct tipc_msg *m, int n)
+{
+ msg_set_bits(m, 10, 0, 0x3, n);
+}
+
+static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m)
+{
+ return msg_bits(m, 10, 0, 0x1);
+}
+
+static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n)
+{
+ msg_set_bits(m, 10, 0, 0x1, n);
+}
+
+static inline u16 msg_grp_bc_seqno(struct tipc_msg *m)
+{
+ return msg_bits(m, 10, 16, 0xffff);
+}
+
+static inline void msg_set_grp_bc_seqno(struct tipc_msg *m, u32 n)
+{
+ msg_set_bits(m, 10, 16, 0xffff, n);
+}
+
static inline bool msg_peer_link_is_up(struct tipc_msg *m)
{
if (likely(msg_user(m) != LINK_PROTOCOL))
@@ -818,6 +918,8 @@ static inline bool msg_is_reset(struct tipc_msg *hdr)
struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp);
bool tipc_msg_validate(struct sk_buff *skb);
bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
+void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
+ struct sk_buff_head *xmitq);
void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
u32 hsize, u32 destnode);
struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index bd0aac87b41a..2856e19e036e 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -43,6 +43,7 @@
#include "bcast.h"
#include "addr.h"
#include "node.h"
+#include "group.h"
#include <net/genetlink.h>
#define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */
@@ -596,18 +597,47 @@ not_found:
return ref;
}
-/**
- * tipc_nametbl_mc_translate - find multicast destinations
- *
- * Creates list of all local ports that overlap the given multicast address;
- * also determines if any off-node ports overlap.
- *
- * Note: Publications with a scope narrower than 'limit' are ignored.
- * (i.e. local node-scope publications mustn't receive messages arriving
- * from another node, even if the multcast link brought it here)
- *
- * Returns non-zero if any off-node ports overlap
- */
+bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
+ struct list_head *dsts, int *dstcnt, u32 exclude,
+ bool all)
+{
+ u32 self = tipc_own_addr(net);
+ struct publication *publ;
+ struct name_info *info;
+ struct name_seq *seq;
+ struct sub_seq *sseq;
+
+ if (!tipc_in_scope(domain, self))
+ return false;
+
+ *dstcnt = 0;
+ rcu_read_lock();
+ seq = nametbl_find_seq(net, type);
+ if (unlikely(!seq))
+ goto exit;
+ spin_lock_bh(&seq->lock);
+ sseq = nameseq_find_subseq(seq, instance);
+ if (likely(sseq)) {
+ info = sseq->info;
+ list_for_each_entry(publ, &info->zone_list, zone_list) {
+ if (!tipc_in_scope(domain, publ->node))
+ continue;
+ if (publ->ref == exclude && publ->node == self)
+ continue;
+ tipc_dest_push(dsts, publ->node, publ->ref);
+ (*dstcnt)++;
+ if (all)
+ continue;
+ list_move_tail(&publ->zone_list, &info->zone_list);
+ break;
+ }
+ }
+ spin_unlock_bh(&seq->lock);
+exit:
+ rcu_read_unlock();
+ return !list_empty(dsts);
+}
+
int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
u32 limit, struct list_head *dports)
{
@@ -634,7 +664,7 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
info = sseq->info;
list_for_each_entry(publ, &info->node_list, node_list) {
if (publ->scope <= limit)
- u32_push(dports, publ->ref);
+ tipc_dest_push(dports, 0, publ->ref);
}
if (info->cluster_list_size != info->node_list_size)
@@ -679,6 +709,37 @@ exit:
rcu_read_unlock();
}
+/* tipc_nametbl_build_group - build list of communication group members
+ */
+void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
+ u32 type, u32 domain)
+{
+ struct sub_seq *sseq, *stop;
+ struct name_info *info;
+ struct publication *p;
+ struct name_seq *seq;
+
+ rcu_read_lock();
+ seq = nametbl_find_seq(net, type);
+ if (!seq)
+ goto exit;
+
+ spin_lock_bh(&seq->lock);
+ sseq = seq->sseqs;
+ stop = seq->sseqs + seq->first_free;
+ for (; sseq != stop; sseq++) {
+ info = sseq->info;
+ list_for_each_entry(p, &info->zone_list, zone_list) {
+ if (!tipc_in_scope(domain, p->node))
+ continue;
+ tipc_group_add_member(grp, p->node, p->ref);
+ }
+ }
+ spin_unlock_bh(&seq->lock);
+exit:
+ rcu_read_unlock();
+}
+
/*
* tipc_nametbl_publish - add name publication to network name tables
*/
@@ -1057,78 +1118,79 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-bool u32_find(struct list_head *l, u32 value)
+struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port)
{
- struct u32_item *item;
+ u64 value = (u64)node << 32 | port;
+ struct tipc_dest *dst;
- list_for_each_entry(item, l, list) {
- if (item->value == value)
- return true;
+ list_for_each_entry(dst, l, list) {
+ if (dst->value != value)
+ continue;
+ return dst;
}
- return false;
+ return NULL;
}
-bool u32_push(struct list_head *l, u32 value)
+bool tipc_dest_push(struct list_head *l, u32 node, u32 port)
{
- struct u32_item *item;
+ u64 value = (u64)node << 32 | port;
+ struct tipc_dest *dst;
- list_for_each_entry(item, l, list) {
- if (item->value == value)
- return false;
- }
- item = kmalloc(sizeof(*item), GFP_ATOMIC);
- if (unlikely(!item))
+ if (tipc_dest_find(l, node, port))
return false;
- item->value = value;
- list_add(&item->list, l);
+ dst = kmalloc(sizeof(*dst), GFP_ATOMIC);
+ if (unlikely(!dst))
+ return false;
+ dst->value = value;
+ list_add(&dst->list, l);
return true;
}
-u32 u32_pop(struct list_head *l)
+bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port)
{
- struct u32_item *item;
- u32 value = 0;
+ struct tipc_dest *dst;
if (list_empty(l))
- return 0;
- item = list_first_entry(l, typeof(*item), list);
- value = item->value;
- list_del(&item->list);
- kfree(item);
- return value;
+ return false;
+ dst = list_first_entry(l, typeof(*dst), list);
+ if (port)
+ *port = dst->port;
+ if (node)
+ *node = dst->node;
+ list_del(&dst->list);
+ kfree(dst);
+ return true;
}
-bool u32_del(struct list_head *l, u32 value)
+bool tipc_dest_del(struct list_head *l, u32 node, u32 port)
{
- struct u32_item *item, *tmp;
+ struct tipc_dest *dst;
- list_for_each_entry_safe(item, tmp, l, list) {
- if (item->value != value)
- continue;
- list_del(&item->list);
- kfree(item);
- return true;
- }
- return false;
+ dst = tipc_dest_find(l, node, port);
+ if (!dst)
+ return false;
+ list_del(&dst->list);
+ kfree(dst);
+ return true;
}
-void u32_list_purge(struct list_head *l)
+void tipc_dest_list_purge(struct list_head *l)
{
- struct u32_item *item, *tmp;
+ struct tipc_dest *dst, *tmp;
- list_for_each_entry_safe(item, tmp, l, list) {
- list_del(&item->list);
- kfree(item);
+ list_for_each_entry_safe(dst, tmp, l, list) {
+ list_del(&dst->list);
+ kfree(dst);
}
}
-int u32_list_len(struct list_head *l)
+int tipc_dest_list_len(struct list_head *l)
{
- struct u32_item *item;
+ struct tipc_dest *dst;
int i = 0;
- list_for_each_entry(item, l, list) {
+ list_for_each_entry(dst, l, list) {
i++;
}
return i;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 6ebdeb1d84a5..71926e429446 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -40,6 +40,7 @@
struct tipc_subscription;
struct tipc_plist;
struct tipc_nlist;
+struct tipc_group;
/*
* TIPC name types reserved for internal TIPC use (both current and planned)
@@ -101,9 +102,14 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node);
int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper,
u32 limit, struct list_head *dports);
+void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp,
+ u32 type, u32 domain);
void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
u32 upper, u32 domain,
struct tipc_nlist *nodes);
+bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain,
+ struct list_head *dsts, int *dstcnt, u32 exclude,
+ bool all);
struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower,
u32 upper, u32 scope, u32 port_ref,
u32 key);
@@ -120,16 +126,22 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s);
int tipc_nametbl_init(struct net *net);
void tipc_nametbl_stop(struct net *net);
-struct u32_item {
+struct tipc_dest {
struct list_head list;
- u32 value;
+ union {
+ struct {
+ u32 port;
+ u32 node;
+ };
+ u64 value;
+ };
};
-bool u32_push(struct list_head *l, u32 value);
-u32 u32_pop(struct list_head *l);
-bool u32_find(struct list_head *l, u32 value);
-bool u32_del(struct list_head *l, u32 value);
-void u32_list_purge(struct list_head *l);
-int u32_list_len(struct list_head *l);
+struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port);
+bool tipc_dest_push(struct list_head *l, u32 node, u32 port);
+bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port);
+bool tipc_dest_del(struct list_head *l, u32 node, u32 port);
+void tipc_dest_list_purge(struct list_head *l);
+int tipc_dest_list_len(struct list_head *l);
#endif
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 198dbc7adbe1..89f8ac73bf65 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -157,7 +157,7 @@ static void tipc_node_timeout(unsigned long data);
static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
static struct tipc_node *tipc_node_find(struct net *net, u32 addr);
static void tipc_node_put(struct tipc_node *node);
-static bool tipc_node_is_up(struct tipc_node *n);
+static bool node_is_up(struct tipc_node *n);
struct tipc_sock_conn {
u32 port;
@@ -657,7 +657,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
*slot1 = i;
}
- if (!tipc_node_is_up(n)) {
+ if (!node_is_up(n)) {
if (tipc_link_peer_is_down(l))
tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
@@ -717,11 +717,27 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
tipc_sk_rcv(n->net, &le->inputq);
}
-static bool tipc_node_is_up(struct tipc_node *n)
+static bool node_is_up(struct tipc_node *n)
{
return n->active_links[0] != INVALID_BEARER_ID;
}
+bool tipc_node_is_up(struct net *net, u32 addr)
+{
+ struct tipc_node *n;
+ bool retval = false;
+
+ if (in_own_node(net, addr))
+ return true;
+
+ n = tipc_node_find(net, addr);
+ if (!n)
+ return false;
+ retval = node_is_up(n);
+ tipc_node_put(n);
+ return retval;
+}
+
void tipc_node_check_dest(struct net *net, u32 onode,
struct tipc_bearer *b,
u16 capabilities, u32 signature,
@@ -1149,7 +1165,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node)
if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr))
goto attr_msg_full;
- if (tipc_node_is_up(node))
+ if (node_is_up(node))
if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP))
goto attr_msg_full;
@@ -1238,6 +1254,22 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
return 0;
}
+/* tipc_node_distr_xmit(): send single buffer msgs to individual destinations
+ * Note: this is only for SYSTEM_IMPORTANCE messages, which cannot be rejected
+ */
+int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb;
+ u32 selector, dnode;
+
+ while ((skb = __skb_dequeue(xmitq))) {
+ selector = msg_origport(buf_msg(skb));
+ dnode = msg_destnode(buf_msg(skb));
+ tipc_node_xmit_skb(net, skb, dnode, selector);
+ }
+ return 0;
+}
+
void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
{
struct sk_buff *txskb;
@@ -1249,7 +1281,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb)
dst = n->addr;
if (in_own_node(net, dst))
continue;
- if (!tipc_node_is_up(n))
+ if (!node_is_up(n))
continue;
txskb = pskb_copy(skb, GFP_ATOMIC);
if (!txskb)
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 898c22916984..acd58d23a70e 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -48,7 +48,8 @@ enum {
TIPC_BCAST_SYNCH = (1 << 1),
TIPC_BCAST_STATE_NACK = (1 << 2),
TIPC_BLOCK_FLOWCTL = (1 << 3),
- TIPC_BCAST_RCAST = (1 << 4)
+ TIPC_BCAST_RCAST = (1 << 4),
+ TIPC_MCAST_GROUPS = (1 << 5)
};
#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \
@@ -68,6 +69,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
char *linkname, size_t len);
int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
int selector);
+int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *list);
int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
u32 selector);
void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr);
@@ -76,6 +78,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel);
+bool tipc_node_is_up(struct net *net, u32 addr);
u16 tipc_node_get_capabilities(struct net *net, u32 addr);
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 3cd6402e812c..acaef80fb88c 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -36,6 +36,8 @@
#include "server.h"
#include "core.h"
#include "socket.h"
+#include "addr.h"
+#include "msg.h"
#include <net/sock.h>
#include <linux/module.h>
@@ -105,13 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref)
kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
sock_release(sock);
con->sock = NULL;
-
- spin_lock_bh(&s->idr_lock);
- idr_remove(&s->conn_idr, con->conid);
- s->idr_in_use--;
- spin_unlock_bh(&s->idr_lock);
}
-
+ spin_lock_bh(&s->idr_lock);
+ idr_remove(&s->conn_idr, con->conid);
+ s->idr_in_use--;
+ spin_unlock_bh(&s->idr_lock);
tipc_clean_outqueues(con);
kfree(con);
}
@@ -197,7 +197,8 @@ static void tipc_close_conn(struct tipc_conn *con)
struct tipc_server *s = con->server;
if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
- tipc_unregister_callbacks(con);
+ if (con->sock)
+ tipc_unregister_callbacks(con);
if (con->conid)
s->tipc_conn_release(con->conid, con->usr_data);
@@ -207,8 +208,8 @@ static void tipc_close_conn(struct tipc_conn *con)
* are harmless for us here as we have already deleted this
* connection from server connection list.
*/
- kernel_sock_shutdown(con->sock, SHUT_RDWR);
-
+ if (con->sock)
+ kernel_sock_shutdown(con->sock, SHUT_RDWR);
conn_put(con);
}
}
@@ -487,38 +488,104 @@ void tipc_conn_terminate(struct tipc_server *s, int conid)
}
}
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
+ u32 lower, u32 upper, int *conid)
+{
+ struct tipc_subscriber *scbr;
+ struct tipc_subscr sub;
+ struct tipc_server *s;
+ struct tipc_conn *con;
+
+ sub.seq.type = type;
+ sub.seq.lower = lower;
+ sub.seq.upper = upper;
+ sub.timeout = TIPC_WAIT_FOREVER;
+ sub.filter = TIPC_SUB_PORTS;
+ *(u32 *)&sub.usr_handle = port;
+
+ con = tipc_alloc_conn(tipc_topsrv(net));
+ if (IS_ERR(con))
+ return false;
+
+ *conid = con->conid;
+ s = con->server;
+ scbr = s->tipc_conn_new(*conid);
+ if (!scbr) {
+ tipc_close_conn(con);
+ return false;
+ }
+
+ con->usr_data = scbr;
+ con->sock = NULL;
+ s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub));
+ return true;
+}
+
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
+{
+ struct tipc_conn *con;
+
+ con = tipc_conn_lookup(tipc_topsrv(net), conid);
+ if (!con)
+ return;
+ tipc_close_conn(con);
+ conn_put(con);
+}
+
+static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt)
+{
+ u32 port = *(u32 *)&evt->s.usr_handle;
+ u32 self = tipc_own_addr(net);
+ struct sk_buff_head evtq;
+ struct sk_buff *skb;
+
+ skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt),
+ self, self, port, port, 0);
+ if (!skb)
+ return;
+ msg_set_dest_droppable(buf_msg(skb), true);
+ memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt));
+ skb_queue_head_init(&evtq);
+ __skb_queue_tail(&evtq, skb);
+ tipc_sk_rcv(net, &evtq);
+}
+
static void tipc_send_to_sock(struct tipc_conn *con)
{
- int count = 0;
struct tipc_server *s = con->server;
struct outqueue_entry *e;
+ struct tipc_event *evt;
struct msghdr msg;
+ int count = 0;
int ret;
spin_lock_bh(&con->outqueue_lock);
while (test_bit(CF_CONNECTED, &con->flags)) {
- e = list_entry(con->outqueue.next, struct outqueue_entry,
- list);
+ e = list_entry(con->outqueue.next, struct outqueue_entry, list);
if ((struct list_head *) e == &con->outqueue)
break;
- spin_unlock_bh(&con->outqueue_lock);
- memset(&msg, 0, sizeof(msg));
- msg.msg_flags = MSG_DONTWAIT;
+ spin_unlock_bh(&con->outqueue_lock);
- if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
- msg.msg_name = &e->dest;
- msg.msg_namelen = sizeof(struct sockaddr_tipc);
- }
- ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
- e->iov.iov_len);
- if (ret == -EWOULDBLOCK || ret == 0) {
- cond_resched();
- goto out;
- } else if (ret < 0) {
- goto send_err;
+ if (con->sock) {
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_flags = MSG_DONTWAIT;
+ if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) {
+ msg.msg_name = &e->dest;
+ msg.msg_namelen = sizeof(struct sockaddr_tipc);
+ }
+ ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1,
+ e->iov.iov_len);
+ if (ret == -EWOULDBLOCK || ret == 0) {
+ cond_resched();
+ goto out;
+ } else if (ret < 0) {
+ goto send_err;
+ }
+ } else {
+ evt = e->iov.iov_base;
+ tipc_send_kern_top_evt(s->net, evt);
}
-
/* Don't starve users filling buffers */
if (++count >= MAX_SEND_MSG_COUNT) {
cond_resched();
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 34f8055afa3b..2113c9192633 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -83,13 +83,16 @@ struct tipc_server {
int tipc_conn_sendmsg(struct tipc_server *s, int conid,
struct sockaddr_tipc *addr, void *data, size_t len);
+bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
+ u32 lower, u32 upper, int *conid);
+void tipc_topsrv_kern_unsubscr(struct net *net, int conid);
+
/**
* tipc_conn_terminate - terminate connection with server
*
* Note: Must call it in process context since it might sleep
*/
void tipc_conn_terminate(struct tipc_server *s, int conid);
-
int tipc_server_start(struct tipc_server *s);
void tipc_server_stop(struct tipc_server *s);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index d50edd6e0019..b3b72d8e9543 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1,7 +1,7 @@
/*
* net/tipc/socket.c: TIPC socket API
*
- * Copyright (c) 2001-2007, 2012-2016, Ericsson AB
+ * Copyright (c) 2001-2007, 2012-2017, Ericsson AB
* Copyright (c) 2004-2008, 2010-2013, Wind River Systems
* All rights reserved.
*
@@ -45,9 +45,10 @@
#include "socket.h"
#include "bcast.h"
#include "netlink.h"
+#include "group.h"
#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
-#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */
+#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */
#define TIPC_FWD_MSG 1
#define TIPC_MAX_PORT 0xffffffff
#define TIPC_MIN_PORT 1
@@ -61,6 +62,11 @@ enum {
TIPC_CONNECTING = TCP_SYN_SENT,
};
+struct sockaddr_pair {
+ struct sockaddr_tipc sock;
+ struct sockaddr_tipc member;
+};
+
/**
* struct tipc_sock - TIPC socket structure
* @sk: socket - interacts with 'port' and with user via the socket API
@@ -78,7 +84,7 @@ enum {
* @conn_timeout: the time we can wait for an unresponded setup request
* @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
* @cong_link_cnt: number of congested links
- * @sent_unacked: # messages sent by socket, and not yet acked by peer
+ * @snt_unacked: # messages sent by socket, and not yet acked by peer
* @rcv_unacked: # messages read by user, but not yet acked back to peer
* @peer: 'connected' peer for dgram/rdm
* @node: hash table node
@@ -109,9 +115,10 @@ struct tipc_sock {
struct rhash_head node;
struct tipc_mc_method mc_method;
struct rcu_head rcu;
+ struct tipc_group *group;
};
-static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
+static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
static void tipc_data_ready(struct sock *sk);
static void tipc_write_space(struct sock *sk);
static void tipc_sock_destruct(struct sock *sk);
@@ -123,6 +130,7 @@ static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
struct tipc_name_seq const *seq);
static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
struct tipc_name_seq const *seq);
+static int tipc_sk_leave(struct tipc_sock *tsk);
static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
static int tipc_sk_insert(struct tipc_sock *tsk);
static void tipc_sk_remove(struct tipc_sock *tsk);
@@ -193,6 +201,11 @@ static bool tsk_conn_cong(struct tipc_sock *tsk)
return tsk->snt_unacked > tsk->snd_win;
}
+static u16 tsk_blocks(int len)
+{
+ return ((len / FLOWCTL_BLK_SZ) + 1);
+}
+
/* tsk_blocks(): translate a buffer size in bytes to number of
* advertisable blocks, taking into account the ratio truesize(len)/len
* We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ
@@ -453,7 +466,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
msg_set_origport(msg, tsk->portid);
setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk);
sk->sk_shutdown = 0;
- sk->sk_backlog_rcv = tipc_backlog_rcv;
+ sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
@@ -559,13 +572,14 @@ static int tipc_release(struct socket *sock)
__tipc_shutdown(sock, TIPC_ERR_NO_PORT);
sk->sk_shutdown = SHUTDOWN_MASK;
+ tipc_sk_leave(tsk);
tipc_sk_withdraw(tsk, 0, NULL);
sk_stop_timer(sk, &sk->sk_timer);
tipc_sk_remove(tsk);
/* Reject any messages that accumulated in backlog queue */
release_sock(sk);
- u32_list_purge(&tsk->cong_links);
+ tipc_dest_list_purge(&tsk->cong_links);
tsk->cong_link_cnt = 0;
call_rcu(&tsk->rcu, tipc_sk_callback);
sock->sk = NULL;
@@ -601,7 +615,10 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
res = tipc_sk_withdraw(tsk, 0, NULL);
goto exit;
}
-
+ if (tsk->group) {
+ res = -EACCES;
+ goto exit;
+ }
if (uaddr_len < sizeof(struct sockaddr_tipc)) {
res = -EINVAL;
goto exit;
@@ -698,38 +715,41 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- u32 mask = 0;
+ struct tipc_group *grp = tsk->group;
+ u32 revents = 0;
sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= POLLRDHUP | POLLIN | POLLRDNORM;
+ revents |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
- mask |= POLLHUP;
+ revents |= POLLHUP;
switch (sk->sk_state) {
case TIPC_ESTABLISHED:
if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk))
- mask |= POLLOUT;
+ revents |= POLLOUT;
/* fall thru' */
case TIPC_LISTEN:
case TIPC_CONNECTING:
if (!skb_queue_empty(&sk->sk_receive_queue))
- mask |= (POLLIN | POLLRDNORM);
+ revents |= POLLIN | POLLRDNORM;
break;
case TIPC_OPEN:
- if (!tsk->cong_link_cnt)
- mask |= POLLOUT;
- if (tipc_sk_type_connectionless(sk) &&
- (!skb_queue_empty(&sk->sk_receive_queue)))
- mask |= (POLLIN | POLLRDNORM);
+ if (!grp || tipc_group_size(grp))
+ if (!tsk->cong_link_cnt)
+ revents |= POLLOUT;
+ if (!tipc_sk_type_connectionless(sk))
+ break;
+ if (skb_queue_empty(&sk->sk_receive_queue))
+ break;
+ revents |= POLLIN | POLLRDNORM;
break;
case TIPC_DISCONNECTING:
- mask = (POLLIN | POLLRDNORM | POLLHUP);
+ revents = POLLIN | POLLRDNORM | POLLHUP;
break;
}
-
- return mask;
+ return revents;
}
/**
@@ -757,6 +777,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
struct tipc_nlist dsts;
int rc;
+ if (tsk->group)
+ return -EACCES;
+
/* Block or return if any destination link is congested */
rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt);
if (unlikely(rc))
@@ -794,6 +817,296 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
}
/**
+ * tipc_send_group_msg - send a message to a member in the group
+ * @net: network namespace
+ * @m: message to send
+ * @mb: group member
+ * @dnode: destination node
+ * @dport: destination port
+ * @dlen: total length of message data
+ */
+static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk,
+ struct msghdr *m, struct tipc_member *mb,
+ u32 dnode, u32 dport, int dlen)
+{
+ u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group);
+ struct tipc_mc_method *method = &tsk->mc_method;
+ int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+ struct tipc_msg *hdr = &tsk->phdr;
+ struct sk_buff_head pkts;
+ int mtu, rc;
+
+ /* Complete message header */
+ msg_set_type(hdr, TIPC_GRP_UCAST_MSG);
+ msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+ msg_set_destport(hdr, dport);
+ msg_set_destnode(hdr, dnode);
+ msg_set_grp_bc_seqno(hdr, bc_snd_nxt);
+
+ /* Build message as chain of buffers */
+ skb_queue_head_init(&pkts);
+ mtu = tipc_node_get_mtu(net, dnode, tsk->portid);
+ rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
+ if (unlikely(rc != dlen))
+ return rc;
+
+ /* Send message */
+ rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
+ if (unlikely(rc == -ELINKCONG)) {
+ tipc_dest_push(&tsk->cong_links, dnode, 0);
+ tsk->cong_link_cnt++;
+ }
+
+ /* Update send window */
+ tipc_group_update_member(mb, blks);
+
+ /* A broadcast sent within next EXPIRE period must follow same path */
+ method->rcast = true;
+ method->mandatory = true;
+ return dlen;
+}
+
+/**
+ * tipc_send_group_unicast - send message to a member in the group
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ struct sock *sk = sock->sk;
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
+ struct net *net = sock_net(sk);
+ struct tipc_member *mb = NULL;
+ u32 node, port;
+ int rc;
+
+ node = dest->addr.id.node;
+ port = dest->addr.id.ref;
+ if (!port && !node)
+ return -EHOSTUNREACH;
+
+ /* Block or return if destination link or member is congested */
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tipc_dest_find(&tsk->cong_links, node, 0) &&
+ !tipc_group_cong(grp, node, port, blks, &mb));
+ if (unlikely(rc))
+ return rc;
+
+ if (unlikely(!mb))
+ return -EHOSTUNREACH;
+
+ rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen);
+
+ return rc ? rc : dlen;
+}
+
+/**
+ * tipc_send_group_anycast - send message to any member with given identity
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ struct sock *sk = sock->sk;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct list_head *cong_links = &tsk->cong_links;
+ int blks = tsk_blocks(GROUP_H_SIZE + dlen);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_member *first = NULL;
+ struct tipc_member *mbr = NULL;
+ struct net *net = sock_net(sk);
+ u32 node, port, exclude;
+ u32 type, inst, domain;
+ struct list_head dsts;
+ int lookups = 0;
+ int dstcnt, rc;
+ bool cong;
+
+ INIT_LIST_HEAD(&dsts);
+
+ type = dest->addr.name.name.type;
+ inst = dest->addr.name.name.instance;
+ domain = addr_domain(net, dest->scope);
+ exclude = tipc_group_exclude(grp);
+
+ while (++lookups < 4) {
+ first = NULL;
+
+ /* Look for a non-congested destination member, if any */
+ while (1) {
+ if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts,
+ &dstcnt, exclude, false))
+ return -EHOSTUNREACH;
+ tipc_dest_pop(&dsts, &node, &port);
+ cong = tipc_group_cong(grp, node, port, blks, &mbr);
+ if (!cong)
+ break;
+ if (mbr == first)
+ break;
+ if (!first)
+ first = mbr;
+ }
+
+ /* Start over if destination was not in member list */
+ if (unlikely(!mbr))
+ continue;
+
+ if (likely(!cong && !tipc_dest_find(cong_links, node, 0)))
+ break;
+
+ /* Block or return if destination link or member is congested */
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tipc_dest_find(cong_links, node, 0) &&
+ !tipc_group_cong(grp, node, port,
+ blks, &mbr));
+ if (unlikely(rc))
+ return rc;
+
+ /* Send, unless destination disappeared while waiting */
+ if (likely(mbr))
+ break;
+ }
+
+ if (unlikely(lookups >= 4))
+ return -EHOSTUNREACH;
+
+ rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen);
+
+ return rc ? rc : dlen;
+}
+
+/**
+ * tipc_send_group_bcast - send message to all members in communication group
+ * @sk: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ struct sock *sk = sock->sk;
+ struct net *net = sock_net(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_nlist *dsts = tipc_group_dests(grp);
+ struct tipc_mc_method *method = &tsk->mc_method;
+ bool ack = method->mandatory && method->rcast;
+ int blks = tsk_blocks(MCAST_H_SIZE + dlen);
+ struct tipc_msg *hdr = &tsk->phdr;
+ int mtu = tipc_bcast_get_mtu(net);
+ struct sk_buff_head pkts;
+ int rc = -EHOSTUNREACH;
+
+ if (!dsts->local && !dsts->remote)
+ return -EHOSTUNREACH;
+
+ /* Block or return if any destination link or member is congested */
+ rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt &&
+ !tipc_group_bc_cong(grp, blks));
+ if (unlikely(rc))
+ return rc;
+
+ /* Complete message header */
+ if (dest) {
+ msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
+ msg_set_nameinst(hdr, dest->addr.name.name.instance);
+ } else {
+ msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
+ msg_set_nameinst(hdr, 0);
+ }
+ msg_set_hdr_sz(hdr, GROUP_H_SIZE);
+ msg_set_destport(hdr, 0);
+ msg_set_destnode(hdr, 0);
+ msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp));
+
+ /* Avoid getting stuck with repeated forced replicasts */
+ msg_set_grp_bc_ack_req(hdr, ack);
+
+ /* Build message as chain of buffers */
+ skb_queue_head_init(&pkts);
+ rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
+ if (unlikely(rc != dlen))
+ return rc;
+
+ /* Send message */
+ rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt);
+ if (unlikely(rc))
+ return rc;
+
+ /* Update broadcast sequence number and send windows */
+ tipc_group_update_bc_members(tsk->group, blks, ack);
+
+ /* Broadcast link is now free to choose method for next broadcast */
+ method->mandatory = false;
+ method->expires = jiffies;
+
+ return dlen;
+}
+
+/**
+ * tipc_send_group_mcast - send message to all members with given identity
+ * @sock: socket structure
+ * @m: message to send
+ * @dlen: total length of message data
+ * @timeout: timeout to wait for wakeup
+ *
+ * Called from function tipc_sendmsg(), which has done all sanity checks
+ * Returns the number of bytes sent on success, or errno
+ */
+static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
+ int dlen, long timeout)
+{
+ struct sock *sk = sock->sk;
+ DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
+ struct tipc_name_seq *seq = &dest->addr.nameseq;
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
+ struct net *net = sock_net(sk);
+ u32 domain, exclude, dstcnt;
+ struct list_head dsts;
+
+ INIT_LIST_HEAD(&dsts);
+
+ if (seq->lower != seq->upper)
+ return -ENOTSUPP;
+
+ domain = addr_domain(net, dest->scope);
+ exclude = tipc_group_exclude(grp);
+ if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain,
+ &dsts, &dstcnt, exclude, true))
+ return -EHOSTUNREACH;
+
+ if (dstcnt == 1) {
+ tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
+ return tipc_send_group_unicast(sock, m, dlen, timeout);
+ }
+
+ tipc_dest_list_purge(&dsts);
+ return tipc_send_group_bcast(sock, m, dlen, timeout);
+}
+
+/**
* tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets
* @arrvq: queue with arriving messages, to be cloned after destination lookup
* @inputq: queue with cloned messages, delivered to socket after dest lookup
@@ -803,13 +1116,15 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
struct sk_buff_head *inputq)
{
- struct tipc_msg *msg;
- struct list_head dports;
- u32 portid;
u32 scope = TIPC_CLUSTER_SCOPE;
- struct sk_buff_head tmpq;
- uint hsz;
+ u32 self = tipc_own_addr(net);
struct sk_buff *skb, *_skb;
+ u32 lower = 0, upper = ~0;
+ struct sk_buff_head tmpq;
+ u32 portid, oport, onode;
+ struct list_head dports;
+ struct tipc_msg *msg;
+ int user, mtyp, hsz;
__skb_queue_head_init(&tmpq);
INIT_LIST_HEAD(&dports);
@@ -817,17 +1132,32 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
skb = tipc_skb_peek(arrvq, &inputq->lock);
for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
msg = buf_msg(skb);
+ user = msg_user(msg);
+ mtyp = msg_type(msg);
+ if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
+ spin_lock_bh(&inputq->lock);
+ if (skb_peek(arrvq) == skb) {
+ __skb_dequeue(arrvq);
+ __skb_queue_tail(inputq, skb);
+ }
+ refcount_dec(&skb->users);
+ spin_unlock_bh(&inputq->lock);
+ continue;
+ }
hsz = skb_headroom(skb) + msg_hdr_sz(msg);
-
- if (in_own_node(net, msg_orignode(msg)))
+ oport = msg_origport(msg);
+ onode = msg_orignode(msg);
+ if (onode == self)
scope = TIPC_NODE_SCOPE;
/* Create destination port list and message clones: */
- tipc_nametbl_mc_translate(net,
- msg_nametype(msg), msg_namelower(msg),
- msg_nameupper(msg), scope, &dports);
- portid = u32_pop(&dports);
- for (; portid; portid = u32_pop(&dports)) {
+ if (!msg_in_group(msg)) {
+ lower = msg_namelower(msg);
+ upper = msg_nameupper(msg);
+ }
+ tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper,
+ scope, &dports);
+ while (tipc_dest_pop(&dports, NULL, &portid)) {
_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
if (_skb) {
msg_set_destport(buf_msg(_skb), portid);
@@ -850,16 +1180,16 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
}
/**
- * tipc_sk_proto_rcv - receive a connection mng protocol message
+ * tipc_sk_conn_proto_rcv - receive a connection mng protocol message
* @tsk: receiving socket
* @skb: pointer to message buffer.
*/
-static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
- struct sk_buff_head *xmitq)
+static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
- struct sock *sk = &tsk->sk;
- u32 onode = tsk_own_node(tsk);
struct tipc_msg *hdr = buf_msg(skb);
+ u32 onode = tsk_own_node(tsk);
+ struct sock *sk = &tsk->sk;
int mtyp = msg_type(hdr);
bool conn_cong;
@@ -931,6 +1261,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
struct list_head *clinks = &tsk->cong_links;
bool syn = !tipc_sk_type_connectionless(sk);
+ struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = &tsk->phdr;
struct tipc_name_seq *seq;
struct sk_buff_head pkts;
@@ -941,18 +1272,31 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
return -EMSGSIZE;
+ if (likely(dest)) {
+ if (unlikely(m->msg_namelen < sizeof(*dest)))
+ return -EINVAL;
+ if (unlikely(dest->family != AF_TIPC))
+ return -EINVAL;
+ }
+
+ if (grp) {
+ if (!dest)
+ return tipc_send_group_bcast(sock, m, dlen, timeout);
+ if (dest->addrtype == TIPC_ADDR_NAME)
+ return tipc_send_group_anycast(sock, m, dlen, timeout);
+ if (dest->addrtype == TIPC_ADDR_ID)
+ return tipc_send_group_unicast(sock, m, dlen, timeout);
+ if (dest->addrtype == TIPC_ADDR_MCAST)
+ return tipc_send_group_mcast(sock, m, dlen, timeout);
+ return -EINVAL;
+ }
+
if (unlikely(!dest)) {
dest = &tsk->peer;
if (!syn || dest->family != AF_TIPC)
return -EDESTADDRREQ;
}
- if (unlikely(m->msg_namelen < sizeof(*dest)))
- return -EINVAL;
-
- if (unlikely(dest->family != AF_TIPC))
- return -EINVAL;
-
if (unlikely(syn)) {
if (sk->sk_state == TIPC_LISTEN)
return -EPIPE;
@@ -985,7 +1329,6 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
msg_set_destport(hdr, dport);
if (unlikely(!dport && !dnode))
return -EHOSTUNREACH;
-
} else if (dest->addrtype == TIPC_ADDR_ID) {
dnode = dest->addr.id.node;
msg_set_type(hdr, TIPC_DIRECT_MSG);
@@ -996,7 +1339,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
}
/* Block or return if destination link is congested */
- rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode));
+ rc = tipc_wait_for_cond(sock, &timeout,
+ !tipc_dest_find(clinks, dnode, 0));
if (unlikely(rc))
return rc;
@@ -1008,7 +1352,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
if (unlikely(rc == -ELINKCONG)) {
- u32_push(clinks, dnode);
+ tipc_dest_push(clinks, dnode, 0);
tsk->cong_link_cnt++;
rc = 0;
}
@@ -1128,7 +1472,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
msg_set_lookup_scope(msg, 0);
msg_set_hdr_sz(msg, SHORT_H_SIZE);
- sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
tipc_set_sk_state(sk, TIPC_ESTABLISHED);
tipc_node_add_conn(net, peer_node, tsk->portid, peer_port);
tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid);
@@ -1142,26 +1486,38 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
}
/**
- * set_orig_addr - capture sender's address for received message
+ * tipc_sk_set_orig_addr - capture sender's address for received message
* @m: descriptor for message info
- * @msg: received message header
+ * @hdr: received message header
*
* Note: Address is not captured if not requested by receiver.
*/
-static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
+static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb)
{
- DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name);
+ DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name);
+ struct tipc_msg *hdr = buf_msg(skb);
- if (addr) {
- addr->family = AF_TIPC;
- addr->addrtype = TIPC_ADDR_ID;
- memset(&addr->addr, 0, sizeof(addr->addr));
- addr->addr.id.ref = msg_origport(msg);
- addr->addr.id.node = msg_orignode(msg);
- addr->addr.name.domain = 0; /* could leave uninitialized */
- addr->scope = 0; /* could leave uninitialized */
- m->msg_namelen = sizeof(struct sockaddr_tipc);
- }
+ if (!srcaddr)
+ return;
+
+ srcaddr->sock.family = AF_TIPC;
+ srcaddr->sock.addrtype = TIPC_ADDR_ID;
+ srcaddr->sock.addr.id.ref = msg_origport(hdr);
+ srcaddr->sock.addr.id.node = msg_orignode(hdr);
+ srcaddr->sock.addr.name.domain = 0;
+ srcaddr->sock.scope = 0;
+ m->msg_namelen = sizeof(struct sockaddr_tipc);
+
+ if (!msg_in_group(hdr))
+ return;
+
+ /* Group message users may also want to know sending member's id */
+ srcaddr->member.family = AF_TIPC;
+ srcaddr->member.addrtype = TIPC_ADDR_NAME;
+ srcaddr->member.addr.name.name.type = msg_nametype(hdr);
+ srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member;
+ srcaddr->member.addr.name.domain = 0;
+ m->msg_namelen = sizeof(*srcaddr);
}
/**
@@ -1318,11 +1674,13 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
size_t buflen, int flags)
{
struct sock *sk = sock->sk;
- struct tipc_sock *tsk = tipc_sk(sk);
- struct sk_buff *skb;
- struct tipc_msg *hdr;
bool connected = !tipc_sk_type_connectionless(sk);
+ struct tipc_sock *tsk = tipc_sk(sk);
int rc, err, hlen, dlen, copy;
+ struct sk_buff_head xmitq;
+ struct tipc_msg *hdr;
+ struct sk_buff *skb;
+ bool grp_evt;
long timeout;
/* Catch invalid receive requests */
@@ -1336,8 +1694,8 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
}
timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ /* Step rcv queue to first msg with data or error; wait if necessary */
do {
- /* Look at first msg in receive queue; wait if necessary */
rc = tipc_wait_for_rcvmsg(sock, &timeout);
if (unlikely(rc))
goto exit;
@@ -1346,13 +1704,14 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
dlen = msg_data_sz(hdr);
hlen = msg_hdr_sz(hdr);
err = msg_errcode(hdr);
+ grp_evt = msg_is_grp_evt(hdr);
if (likely(dlen || err))
break;
tsk_advance_rx_queue(sk);
} while (1);
/* Collect msg meta data, including error code and rejected data */
- set_orig_addr(m, hdr);
+ tipc_sk_set_orig_addr(m, skb);
rc = tipc_sk_anc_data_recv(m, hdr, tsk);
if (unlikely(rc))
goto exit;
@@ -1372,15 +1731,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
if (unlikely(rc))
goto exit;
+ /* Mark message as group event if applicable */
+ if (unlikely(grp_evt)) {
+ if (msg_grp_evt(hdr) == TIPC_WITHDRAWN)
+ m->msg_flags |= MSG_EOR;
+ m->msg_flags |= MSG_OOB;
+ copy = 0;
+ }
+
/* Caption of data or error code/rejected data was successful */
if (unlikely(flags & MSG_PEEK))
goto exit;
+ /* Send group flow control advertisement when applicable */
+ if (tsk->group && msg_in_group(hdr) && !grp_evt) {
+ skb_queue_head_init(&xmitq);
+ tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen),
+ msg_orignode(hdr), msg_origport(hdr),
+ &xmitq);
+ tipc_node_distr_xmit(sock_net(sk), &xmitq);
+ }
+
tsk_advance_rx_queue(sk);
+
if (likely(!connected))
goto exit;
- /* Send connection flow control ack when applicable */
+ /* Send connection flow control advertisement when applicable */
tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen);
if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE)
tipc_sk_send_ack(tsk);
@@ -1446,7 +1823,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m,
/* Collect msg meta data, incl. error code and rejected data */
if (!copied) {
- set_orig_addr(m, hdr);
+ tipc_sk_set_orig_addr(m, skb);
rc = tipc_sk_anc_data_recv(m, hdr, tsk);
if (rc)
break;
@@ -1532,14 +1909,51 @@ static void tipc_sock_destruct(struct sock *sk)
__skb_queue_purge(&sk->sk_receive_queue);
}
+static void tipc_sk_proto_rcv(struct sock *sk,
+ struct sk_buff_head *inputq,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb = __skb_dequeue(inputq);
+ struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct tipc_group *grp = tsk->group;
+ bool wakeup = false;
+
+ switch (msg_user(hdr)) {
+ case CONN_MANAGER:
+ tipc_sk_conn_proto_rcv(tsk, skb, xmitq);
+ return;
+ case SOCK_WAKEUP:
+ tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
+ tsk->cong_link_cnt--;
+ wakeup = true;
+ break;
+ case GROUP_PROTOCOL:
+ tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
+ break;
+ case TOP_SRV:
+ tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
+ skb, inputq, xmitq);
+ skb = NULL;
+ break;
+ default:
+ break;
+ }
+
+ if (wakeup)
+ sk->sk_write_space(sk);
+
+ kfree_skb(skb);
+}
+
/**
- * filter_connect - Handle all incoming messages for a connection-based socket
+ * tipc_filter_connect - Handle incoming message for a connection-based socket
* @tsk: TIPC socket
* @skb: pointer to message buffer. Set to NULL if buffer is consumed
*
* Returns true if everything ok, false otherwise
*/
-static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
+static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
{
struct sock *sk = &tsk->sk;
struct net *net = sock_net(sk);
@@ -1643,6 +2057,9 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
struct tipc_sock *tsk = tipc_sk(sk);
struct tipc_msg *hdr = buf_msg(skb);
+ if (unlikely(msg_in_group(hdr)))
+ return sk->sk_rcvbuf;
+
if (unlikely(!msg_connected(hdr)))
return sk->sk_rcvbuf << msg_importance(hdr);
@@ -1653,7 +2070,7 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
}
/**
- * filter_rcv - validate incoming message
+ * tipc_sk_filter_rcv - validate incoming message
* @sk: socket
* @skb: pointer to message.
*
@@ -1662,99 +2079,71 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb)
*
* Called with socket lock already taken
*
- * Returns true if message was added to socket receive queue, otherwise false
*/
-static bool filter_rcv(struct sock *sk, struct sk_buff *skb,
- struct sk_buff_head *xmitq)
+static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
+ bool sk_conn = !tipc_sk_type_connectionless(sk);
struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group *grp = tsk->group;
struct tipc_msg *hdr = buf_msg(skb);
- unsigned int limit = rcvbuf_limit(sk, skb);
- int err = TIPC_OK;
- int usr = msg_user(hdr);
- u32 onode;
+ struct net *net = sock_net(sk);
+ struct sk_buff_head inputq;
+ int limit, err = TIPC_OK;
- if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
- tipc_sk_proto_rcv(tsk, skb, xmitq);
- return false;
- }
+ TIPC_SKB_CB(skb)->bytes_read = 0;
+ __skb_queue_head_init(&inputq);
+ __skb_queue_tail(&inputq, skb);
- if (unlikely(usr == SOCK_WAKEUP)) {
- onode = msg_orignode(hdr);
- kfree_skb(skb);
- u32_del(&tsk->cong_links, onode);
- tsk->cong_link_cnt--;
- sk->sk_write_space(sk);
- return false;
- }
+ if (unlikely(!msg_isdata(hdr)))
+ tipc_sk_proto_rcv(sk, &inputq, xmitq);
- /* Drop if illegal message type */
- if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) {
- kfree_skb(skb);
- return false;
- }
+ if (unlikely(grp))
+ tipc_group_filter_msg(grp, &inputq, xmitq);
- /* Reject if wrong message type for current socket state */
- if (tipc_sk_type_connectionless(sk)) {
- if (msg_connected(hdr)) {
+ /* Validate and add to receive buffer if there is space */
+ while ((skb = __skb_dequeue(&inputq))) {
+ hdr = buf_msg(skb);
+ limit = rcvbuf_limit(sk, skb);
+ if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) ||
+ (!sk_conn && msg_connected(hdr)) ||
+ (!grp && msg_in_group(hdr)))
err = TIPC_ERR_NO_PORT;
- goto reject;
- }
- } else if (unlikely(!filter_connect(tsk, skb))) {
- err = TIPC_ERR_NO_PORT;
- goto reject;
- }
+ else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit)
+ err = TIPC_ERR_OVERLOAD;
- /* Reject message if there isn't room to queue it */
- if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) {
- err = TIPC_ERR_OVERLOAD;
- goto reject;
+ if (unlikely(err)) {
+ tipc_skb_reject(net, err, skb, xmitq);
+ err = TIPC_OK;
+ continue;
+ }
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ skb_set_owner_r(skb, sk);
+ sk->sk_data_ready(sk);
}
-
- /* Enqueue message */
- TIPC_SKB_CB(skb)->bytes_read = 0;
- __skb_queue_tail(&sk->sk_receive_queue, skb);
- skb_set_owner_r(skb, sk);
-
- sk->sk_data_ready(sk);
- return true;
-
-reject:
- if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err))
- __skb_queue_tail(xmitq, skb);
- return false;
}
/**
- * tipc_backlog_rcv - handle incoming message from backlog queue
+ * tipc_sk_backlog_rcv - handle incoming message from backlog queue
* @sk: socket
* @skb: message
*
* Caller must hold socket lock
- *
- * Returns 0
*/
-static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
+static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
- unsigned int truesize = skb->truesize;
+ unsigned int before = sk_rmem_alloc_get(sk);
struct sk_buff_head xmitq;
- u32 dnode, selector;
+ unsigned int added;
__skb_queue_head_init(&xmitq);
- if (likely(filter_rcv(sk, skb, &xmitq))) {
- atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
- return 0;
- }
-
- if (skb_queue_empty(&xmitq))
- return 0;
+ tipc_sk_filter_rcv(sk, skb, &xmitq);
+ added = sk_rmem_alloc_get(sk) - before;
+ atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt);
- /* Send response/rejected message */
- skb = __skb_dequeue(&xmitq);
- dnode = msg_destnode(buf_msg(skb));
- selector = msg_origport(buf_msg(skb));
- tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
+ /* Send pending response/rejected messages, if any */
+ tipc_node_distr_xmit(sock_net(sk), &xmitq);
return 0;
}
@@ -1786,7 +2175,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
/* Add message directly to receive queue if possible */
if (!sock_owned_by_user(sk)) {
- filter_rcv(sk, skb, xmitq);
+ tipc_sk_filter_rcv(sk, skb, xmitq);
continue;
}
@@ -1833,14 +2222,10 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
spin_unlock_bh(&sk->sk_lock.slock);
}
/* Send pending response/rejected messages, if any */
- while ((skb = __skb_dequeue(&xmitq))) {
- dnode = msg_destnode(buf_msg(skb));
- tipc_node_xmit_skb(net, skb, dnode, dport);
- }
+ tipc_node_distr_xmit(sock_net(sk), &xmitq);
sock_put(sk);
continue;
}
-
/* No destination socket => dequeue skb if still there */
skb = tipc_skb_dequeue(inputq, dport);
if (!skb)
@@ -1903,28 +2288,32 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
int previous;
int res = 0;
+ if (destlen != sizeof(struct sockaddr_tipc))
+ return -EINVAL;
+
lock_sock(sk);
- /* DGRAM/RDM connect(), just save the destaddr */
- if (tipc_sk_type_connectionless(sk)) {
- if (dst->family == AF_UNSPEC) {
- memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
- } else if (destlen != sizeof(struct sockaddr_tipc)) {
- res = -EINVAL;
- } else {
- memcpy(&tsk->peer, dest, destlen);
- }
+ if (tsk->group) {
+ res = -EINVAL;
goto exit;
}
- /*
- * Reject connection attempt using multicast address
- *
- * Note: send_msg() validates the rest of the address fields,
- * so there's no need to do it here
- */
- if (dst->addrtype == TIPC_ADDR_MCAST) {
+ if (dst->family == AF_UNSPEC) {
+ memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc));
+ if (!tipc_sk_type_connectionless(sk))
+ res = -EINVAL;
+ goto exit;
+ } else if (dst->family != AF_TIPC) {
res = -EINVAL;
+ }
+ if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME)
+ res = -EINVAL;
+ if (res)
+ goto exit;
+
+ /* DGRAM/RDM connect(), just save the destaddr */
+ if (tipc_sk_type_connectionless(sk)) {
+ memcpy(&tsk->peer, dest, destlen);
goto exit;
}
@@ -2144,43 +2533,40 @@ static int tipc_shutdown(struct socket *sock, int how)
static void tipc_sk_timeout(unsigned long data)
{
struct tipc_sock *tsk = (struct tipc_sock *)data;
+ u32 peer_port = tsk_peer_port(tsk);
+ u32 peer_node = tsk_peer_node(tsk);
+ u32 own_node = tsk_own_node(tsk);
+ u32 own_port = tsk->portid;
struct sock *sk = &tsk->sk;
+ struct net *net = sock_net(sk);
struct sk_buff *skb = NULL;
- u32 peer_port, peer_node;
- u32 own_node = tsk_own_node(tsk);
bh_lock_sock(sk);
- if (!tipc_sk_connected(sk)) {
- bh_unlock_sock(sk);
+ if (!tipc_sk_connected(sk))
+ goto exit;
+
+ /* Try again later if socket is busy */
+ if (sock_owned_by_user(sk)) {
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20);
goto exit;
}
- peer_port = tsk_peer_port(tsk);
- peer_node = tsk_peer_node(tsk);
if (tsk->probe_unacked) {
- if (!sock_owned_by_user(sk)) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
- tsk_peer_port(tsk));
- sk->sk_state_change(sk);
- } else {
- /* Try again later */
- sk_reset_timer(sk, &sk->sk_timer, (HZ / 20));
- }
-
- bh_unlock_sock(sk);
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ tipc_node_remove_conn(net, peer_node, peer_port);
+ sk->sk_state_change(sk);
goto exit;
}
-
- skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE,
- INT_H_SIZE, 0, peer_node, own_node,
- peer_port, tsk->portid, TIPC_OK);
+ /* Send new probe */
+ skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0,
+ peer_node, own_node, peer_port, own_port,
+ TIPC_OK);
tsk->probe_unacked = true;
- sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL);
+ sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV);
+exit:
bh_unlock_sock(sk);
if (skb)
- tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
-exit:
+ tipc_node_xmit_skb(net, skb, peer_node, own_port);
sock_put(sk);
}
@@ -2345,6 +2731,56 @@ void tipc_sk_rht_destroy(struct net *net)
rhashtable_destroy(&tn->sk_rht);
}
+static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
+{
+ struct net *net = sock_net(&tsk->sk);
+ u32 domain = addr_domain(net, mreq->scope);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_msg *hdr = &tsk->phdr;
+ struct tipc_name_seq seq;
+ int rc;
+
+ if (mreq->type < TIPC_RESERVED_TYPES)
+ return -EACCES;
+ if (grp)
+ return -EACCES;
+ grp = tipc_group_create(net, tsk->portid, mreq);
+ if (!grp)
+ return -ENOMEM;
+ tsk->group = grp;
+ msg_set_lookup_scope(hdr, mreq->scope);
+ msg_set_nametype(hdr, mreq->type);
+ msg_set_dest_droppable(hdr, true);
+ seq.type = mreq->type;
+ seq.lower = mreq->instance;
+ seq.upper = seq.lower;
+ tipc_nametbl_build_group(net, grp, mreq->type, domain);
+ rc = tipc_sk_publish(tsk, mreq->scope, &seq);
+ if (rc)
+ tipc_group_delete(net, grp);
+
+ /* Eliminate any risk that a broadcast overtakes the sent JOIN */
+ tsk->mc_method.rcast = true;
+ tsk->mc_method.mandatory = true;
+ return rc;
+}
+
+static int tipc_sk_leave(struct tipc_sock *tsk)
+{
+ struct net *net = sock_net(&tsk->sk);
+ struct tipc_group *grp = tsk->group;
+ struct tipc_name_seq seq;
+ int scope;
+
+ if (!grp)
+ return -EINVAL;
+ tipc_group_self(grp, &seq, &scope);
+ tipc_group_delete(net, grp);
+ tsk->group = NULL;
+ tipc_sk_withdraw(tsk, scope, &seq);
+ return 0;
+}
+
/**
* tipc_setsockopt - set socket option
* @sock: socket structure
@@ -2363,6 +2799,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
+ struct tipc_group_req mreq;
u32 value = 0;
int res = 0;
@@ -2378,9 +2815,14 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
case TIPC_CONN_TIMEOUT:
if (ol < sizeof(value))
return -EINVAL;
- res = get_user(value, (u32 __user *)ov);
- if (res)
- return res;
+ if (get_user(value, (u32 __user *)ov))
+ return -EFAULT;
+ break;
+ case TIPC_GROUP_JOIN:
+ if (ol < sizeof(mreq))
+ return -EINVAL;
+ if (copy_from_user(&mreq, ov, sizeof(mreq)))
+ return -EFAULT;
break;
default:
if (ov || ol)
@@ -2413,6 +2855,12 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
tsk->mc_method.rcast = true;
tsk->mc_method.mandatory = true;
break;
+ case TIPC_GROUP_JOIN:
+ res = tipc_sk_join(tsk, &mreq);
+ break;
+ case TIPC_GROUP_LEAVE:
+ res = tipc_sk_leave(tsk);
+ break;
default:
res = -EINVAL;
}
@@ -2440,7 +2888,8 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- int len;
+ struct tipc_name_seq seq;
+ int len, scope;
u32 value;
int res;
@@ -2474,6 +2923,12 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
case TIPC_SOCK_RECVQ_DEPTH:
value = skb_queue_len(&sk->sk_receive_queue);
break;
+ case TIPC_GROUP_JOIN:
+ seq.type = 0;
+ if (tsk->group)
+ tipc_group_self(tsk->group, &seq, &scope);
+ value = seq.type;
+ break;
default:
res = -EINVAL;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7f46bab4ce5c..a9ee634f3c42 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -814,6 +814,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol,
*/
case SOCK_RAW:
sock->type = SOCK_DGRAM;
+ /* fall through */
case SOCK_DGRAM:
sock->ops = &unix_dgram_ops;
break;
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index a24369d175fd..970f96489fe7 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig
@@ -15,6 +15,16 @@ config VSOCKETS
To compile this driver as a module, choose M here: the module
will be called vsock. If unsure, say N.
+config VSOCKETS_DIAG
+ tristate "Virtual Sockets monitoring interface"
+ depends on VSOCKETS
+ default y
+ help
+ Support for PF_VSOCK sockets monitoring interface used by the ss tool.
+ If unsure, say Y.
+
+ Enable this module so userspace applications can query open sockets.
+
config VMWARE_VMCI_VSOCKETS
tristate "VMware VMCI transport for Virtual Sockets"
depends on VSOCKETS && VMWARE_VMCI
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index e63d574234a9..64afc06805da 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -1,4 +1,5 @@
obj-$(CONFIG_VSOCKETS) += vsock.o
+obj-$(CONFIG_VSOCKETS_DIAG) += vsock_diag.o
obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
@@ -6,6 +7,8 @@ obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o
vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
+vsock_diag-y += diag.o
+
vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
vmci_transport_notify_qstate.o
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index dfc8c51e4d74..98359c19522f 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -36,7 +36,7 @@
* not support simultaneous connects (two "client" sockets connecting).
*
* - "Server" sockets are referred to as listener sockets throughout this
- * implementation because they are in the VSOCK_SS_LISTEN state. When a
+ * implementation because they are in the TCP_LISTEN state. When a
* connection request is received (the second kind of socket mentioned above),
* we create a new socket and refer to it as a pending socket. These pending
* sockets are placed on the pending connection list of the listener socket.
@@ -82,6 +82,15 @@
* argument, we must ensure the reference count is increased to ensure the
* socket isn't freed before the function is run; the deferred function will
* then drop the reference.
+ *
+ * - sk->sk_state uses the TCP state constants because they are widely used by
+ * other address families and exposed to userspace tools like ss(8):
+ *
+ * TCP_CLOSE - unconnected
+ * TCP_SYN_SENT - connecting
+ * TCP_ESTABLISHED - connected
+ * TCP_CLOSING - disconnecting
+ * TCP_LISTEN - listening
*/
#include <linux/types.h>
@@ -153,7 +162,6 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
* vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function
* mods with VSOCK_HASH_SIZE to ensure this.
*/
-#define VSOCK_HASH_SIZE 251
#define MAX_PORT_RETRIES 24
#define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE)
@@ -168,9 +176,12 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
#define vsock_connected_sockets_vsk(vsk) \
vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
-static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
-static struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
-static DEFINE_SPINLOCK(vsock_table_lock);
+struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
+EXPORT_SYMBOL_GPL(vsock_bind_table);
+struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
+EXPORT_SYMBOL_GPL(vsock_connected_table);
+DEFINE_SPINLOCK(vsock_table_lock);
+EXPORT_SYMBOL_GPL(vsock_table_lock);
/* Autobind this socket to the local address if necessary. */
static int vsock_auto_bind(struct vsock_sock *vsk)
@@ -248,16 +259,6 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
return NULL;
}
-static bool __vsock_in_bound_table(struct vsock_sock *vsk)
-{
- return !list_empty(&vsk->bound_table);
-}
-
-static bool __vsock_in_connected_table(struct vsock_sock *vsk)
-{
- return !list_empty(&vsk->connected_table);
-}
-
static void vsock_insert_unbound(struct vsock_sock *vsk)
{
spin_lock_bh(&vsock_table_lock);
@@ -485,7 +486,7 @@ void vsock_pending_work(struct work_struct *work)
if (vsock_in_connected_table(vsk))
vsock_remove_connected(vsk);
- sk->sk_state = SS_FREE;
+ sk->sk_state = TCP_CLOSE;
out:
release_sock(sk);
@@ -625,7 +626,6 @@ struct sock *__vsock_create(struct net *net,
sk->sk_destruct = vsock_sk_destruct;
sk->sk_backlog_rcv = vsock_queue_rcv_skb;
- sk->sk_state = 0;
sock_reset_flag(sk, SOCK_DONE);
INIT_LIST_HEAD(&vsk->bound_table);
@@ -899,7 +899,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
/* Listening sockets that have connections in their accept
* queue can be read.
*/
- if (sk->sk_state == VSOCK_SS_LISTEN
+ if (sk->sk_state == TCP_LISTEN
&& !vsock_is_accept_queue_empty(sk))
mask |= POLLIN | POLLRDNORM;
@@ -928,7 +928,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
}
/* Connected sockets that can produce data can be written. */
- if (sk->sk_state == SS_CONNECTED) {
+ if (sk->sk_state == TCP_ESTABLISHED) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
bool space_avail_now = false;
int ret = transport->notify_poll_out(
@@ -950,7 +950,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
* POLLOUT|POLLWRNORM when peer is closed and nothing to read,
* but local send is not shutdown.
*/
- if (sk->sk_state == SS_UNCONNECTED) {
+ if (sk->sk_state == TCP_CLOSE) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
mask |= POLLOUT | POLLWRNORM;
@@ -1120,9 +1120,9 @@ static void vsock_connect_timeout(struct work_struct *work)
sk = sk_vsock(vsk);
lock_sock(sk);
- if (sk->sk_state == SS_CONNECTING &&
+ if (sk->sk_state == TCP_SYN_SENT &&
(sk->sk_shutdown != SHUTDOWN_MASK)) {
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = ETIMEDOUT;
sk->sk_error_report(sk);
cancel = 1;
@@ -1168,7 +1168,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
err = -EALREADY;
break;
default:
- if ((sk->sk_state == VSOCK_SS_LISTEN) ||
+ if ((sk->sk_state == TCP_LISTEN) ||
vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
err = -EINVAL;
goto out;
@@ -1191,7 +1191,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (err)
goto out;
- sk->sk_state = SS_CONNECTING;
+ sk->sk_state = TCP_SYN_SENT;
err = transport->connect(vsk);
if (err < 0)
@@ -1211,7 +1211,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
timeout = vsk->connect_timeout;
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) {
+ while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) {
if (flags & O_NONBLOCK) {
/* If we're not going to block, we schedule a timeout
* function to generate a timeout on the connection
@@ -1234,13 +1234,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
goto out_wait;
} else if (timeout == 0) {
err = -ETIMEDOUT;
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
goto out_wait;
@@ -1251,7 +1251,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (sk->sk_err) {
err = -sk->sk_err;
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
} else {
err = 0;
@@ -1284,7 +1284,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
goto out;
}
- if (listener->sk_state != VSOCK_SS_LISTEN) {
+ if (listener->sk_state != TCP_LISTEN) {
err = -EINVAL;
goto out;
}
@@ -1374,7 +1374,7 @@ static int vsock_listen(struct socket *sock, int backlog)
}
sk->sk_max_ack_backlog = backlog;
- sk->sk_state = VSOCK_SS_LISTEN;
+ sk->sk_state = TCP_LISTEN;
err = 0;
@@ -1554,7 +1554,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
/* Callers should not provide a destination with stream sockets. */
if (msg->msg_namelen) {
- err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP;
+ err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
goto out;
}
@@ -1565,7 +1565,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out;
}
- if (sk->sk_state != SS_CONNECTED ||
+ if (sk->sk_state != TCP_ESTABLISHED ||
!vsock_addr_bound(&vsk->local_addr)) {
err = -ENOTCONN;
goto out;
@@ -1689,7 +1689,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
lock_sock(sk);
- if (sk->sk_state != SS_CONNECTED) {
+ if (sk->sk_state != TCP_ESTABLISHED) {
/* Recvmsg is supposed to return 0 if a peer performs an
* orderly shutdown. Differentiate between that case and when a
* peer has not connected or a local shutdown occured with the
diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c
new file mode 100644
index 000000000000..31b567652250
--- /dev/null
+++ b/net/vmw_vsock/diag.c
@@ -0,0 +1,186 @@
+/*
+ * vsock sock_diag(7) module
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/sock_diag.h>
+#include <linux/vm_sockets_diag.h>
+#include <net/af_vsock.h>
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+ u32 portid, u32 seq, u32 flags)
+{
+ struct vsock_sock *vsk = vsock_sk(sk);
+ struct vsock_diag_msg *rep;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
+ flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ rep = nlmsg_data(nlh);
+ rep->vdiag_family = AF_VSOCK;
+
+ /* Lock order dictates that sk_lock is acquired before
+ * vsock_table_lock, so we cannot lock here. Simply don't take
+ * sk_lock; sk is guaranteed to stay alive since vsock_table_lock is
+ * held.
+ */
+ rep->vdiag_type = sk->sk_type;
+ rep->vdiag_state = sk->sk_state;
+ rep->vdiag_shutdown = sk->sk_shutdown;
+ rep->vdiag_src_cid = vsk->local_addr.svm_cid;
+ rep->vdiag_src_port = vsk->local_addr.svm_port;
+ rep->vdiag_dst_cid = vsk->remote_addr.svm_cid;
+ rep->vdiag_dst_port = vsk->remote_addr.svm_port;
+ rep->vdiag_ino = sock_i_ino(sk);
+
+ sock_diag_save_cookie(sk, rep->vdiag_cookie);
+
+ return 0;
+}
+
+static int vsock_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct vsock_diag_req *req;
+ struct vsock_sock *vsk;
+ unsigned int bucket;
+ unsigned int last_i;
+ unsigned int table;
+ struct net *net;
+ unsigned int i;
+
+ req = nlmsg_data(cb->nlh);
+ net = sock_net(skb->sk);
+
+ /* State saved between calls: */
+ table = cb->args[0];
+ bucket = cb->args[1];
+ i = last_i = cb->args[2];
+
+ /* TODO VMCI pending sockets? */
+
+ spin_lock_bh(&vsock_table_lock);
+
+ /* Bind table (locally created sockets) */
+ if (table == 0) {
+ while (bucket < ARRAY_SIZE(vsock_bind_table)) {
+ struct list_head *head = &vsock_bind_table[bucket];
+
+ i = 0;
+ list_for_each_entry(vsk, head, bound_table) {
+ struct sock *sk = sk_vsock(vsk);
+
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (i < last_i)
+ goto next_bind;
+ if (!(req->vdiag_states & (1 << sk->sk_state)))
+ goto next_bind;
+ if (sk_diag_fill(sk, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI) < 0)
+ goto done;
+next_bind:
+ i++;
+ }
+ last_i = 0;
+ bucket++;
+ }
+
+ table++;
+ bucket = 0;
+ }
+
+ /* Connected table (accepted connections) */
+ while (bucket < ARRAY_SIZE(vsock_connected_table)) {
+ struct list_head *head = &vsock_connected_table[bucket];
+
+ i = 0;
+ list_for_each_entry(vsk, head, connected_table) {
+ struct sock *sk = sk_vsock(vsk);
+
+ /* Skip sockets we've already seen above */
+ if (__vsock_in_bound_table(vsk))
+ continue;
+
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (i < last_i)
+ goto next_connected;
+ if (!(req->vdiag_states & (1 << sk->sk_state)))
+ goto next_connected;
+ if (sk_diag_fill(sk, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI) < 0)
+ goto done;
+next_connected:
+ i++;
+ }
+ last_i = 0;
+ bucket++;
+ }
+
+done:
+ spin_unlock_bh(&vsock_table_lock);
+
+ cb->args[0] = table;
+ cb->args[1] = bucket;
+ cb->args[2] = i;
+
+ return skb->len;
+}
+
+static int vsock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+ int hdrlen = sizeof(struct vsock_diag_req);
+ struct net *net = sock_net(skb->sk);
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = vsock_diag_dump,
+ };
+ return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler vsock_diag_handler = {
+ .family = AF_VSOCK,
+ .dump = vsock_diag_handler_dump,
+};
+
+static int __init vsock_diag_init(void)
+{
+ return sock_diag_register(&vsock_diag_handler);
+}
+
+static void __exit vsock_diag_exit(void)
+{
+ sock_diag_unregister(&vsock_diag_handler);
+}
+
+module_init(vsock_diag_init);
+module_exit(vsock_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG,
+ 40 /* AF_VSOCK */);
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index e21991fe883a..5583df708b8c 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -312,7 +312,7 @@ static void hvs_close_connection(struct vmbus_channel *chan)
lock_sock(sk);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
@@ -349,9 +349,8 @@ static void hvs_open_connection(struct vmbus_channel *chan)
return;
lock_sock(sk);
-
- if ((conn_from_host && sk->sk_state != VSOCK_SS_LISTEN) ||
- (!conn_from_host && sk->sk_state != SS_CONNECTING))
+ if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
+ (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
goto out;
if (conn_from_host) {
@@ -363,7 +362,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
if (!new)
goto out;
- new->sk_state = SS_CONNECTING;
+ new->sk_state = TCP_SYN_SENT;
vnew = vsock_sk(new);
hvs_new = vnew->trans;
hvs_new->chan = chan;
@@ -390,7 +389,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
if (conn_from_host) {
- new->sk_state = SS_CONNECTED;
+ new->sk_state = TCP_ESTABLISHED;
sk->sk_ack_backlog++;
hvs_addr_init(&vnew->local_addr, if_type);
@@ -403,7 +402,7 @@ static void hvs_open_connection(struct vmbus_channel *chan)
vsock_enqueue_accept(sk, new);
} else {
- sk->sk_state = SS_CONNECTED;
+ sk->sk_state = TCP_ESTABLISHED;
sk->sk_socket->state = SS_CONNECTED;
vsock_insert_connected(vsock_sk(sk));
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 403d86e80162..8e03bd3f3668 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -414,7 +414,7 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
static void virtio_vsock_reset_sock(struct sock *sk)
{
lock_sock(sk);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
release_sock(sk);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index edba7ab97563..3ae3a33da70b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -708,7 +708,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown = SHUTDOWN_MASK;
if (vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
sk->sk_state_change(sk);
if (vsk->close_work_scheduled &&
@@ -748,8 +748,8 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
{
struct sock *sk = &vsk->sk;
- if (!(sk->sk_state == SS_CONNECTED ||
- sk->sk_state == SS_DISCONNECTING))
+ if (!(sk->sk_state == TCP_ESTABLISHED ||
+ sk->sk_state == TCP_CLOSING))
return true;
/* Already received SHUTDOWN from peer, reply with RST */
@@ -801,7 +801,7 @@ virtio_transport_recv_connecting(struct sock *sk,
switch (le16_to_cpu(pkt->hdr.op)) {
case VIRTIO_VSOCK_OP_RESPONSE:
- sk->sk_state = SS_CONNECTED;
+ sk->sk_state = TCP_ESTABLISHED;
sk->sk_socket->state = SS_CONNECTED;
vsock_insert_connected(vsk);
sk->sk_state_change(sk);
@@ -821,7 +821,7 @@ virtio_transport_recv_connecting(struct sock *sk,
destroy:
virtio_transport_reset(vsk, pkt);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = skerr;
sk->sk_error_report(sk);
return err;
@@ -857,7 +857,7 @@ virtio_transport_recv_connected(struct sock *sk,
vsk->peer_shutdown |= SEND_SHUTDOWN;
if (vsk->peer_shutdown == SHUTDOWN_MASK &&
vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
if (le32_to_cpu(pkt->hdr.flags))
sk->sk_state_change(sk);
break;
@@ -928,7 +928,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
lock_sock_nested(child, SINGLE_DEPTH_NESTING);
- child->sk_state = SS_CONNECTED;
+ child->sk_state = TCP_ESTABLISHED;
vchild = vsock_sk(child);
vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
@@ -1016,18 +1016,18 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
sk->sk_write_space(sk);
switch (sk->sk_state) {
- case VSOCK_SS_LISTEN:
+ case TCP_LISTEN:
virtio_transport_recv_listen(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
- case SS_CONNECTING:
+ case TCP_SYN_SENT:
virtio_transport_recv_connecting(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
- case SS_CONNECTED:
+ case TCP_ESTABLISHED:
virtio_transport_recv_connected(sk, pkt);
break;
- case SS_DISCONNECTING:
+ case TCP_CLOSING:
virtio_transport_recv_disconnecting(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 10ae7823a19d..391775e3575c 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -21,7 +21,6 @@
#include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/list.h>
-#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/net.h>
@@ -743,7 +742,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
/* The local context ID may be out of date, update it. */
vsk->local_addr.svm_cid = dst.svm_cid;
- if (sk->sk_state == SS_CONNECTED)
+ if (sk->sk_state == TCP_ESTABLISHED)
vmci_trans(vsk)->notify_ops->handle_notify_pkt(
sk, pkt, true, &dst, &src,
&bh_process_pkt);
@@ -801,7 +800,9 @@ static void vmci_transport_handle_detach(struct sock *sk)
* left in our consume queue.
*/
if (vsock_stream_has_data(vsk) <= 0) {
- if (sk->sk_state == SS_CONNECTING) {
+ sk->sk_state = TCP_CLOSE;
+
+ if (sk->sk_state == TCP_SYN_SENT) {
/* The peer may detach from a queue pair while
* we are still in the connecting state, i.e.,
* if the peer VM is killed after attaching to
@@ -810,12 +811,10 @@ static void vmci_transport_handle_detach(struct sock *sk)
* event like a reset.
*/
- sk->sk_state = SS_UNCONNECTED;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
return;
}
- sk->sk_state = SS_UNCONNECTED;
}
sk->sk_state_change(sk);
}
@@ -883,17 +882,17 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work)
vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
switch (sk->sk_state) {
- case VSOCK_SS_LISTEN:
+ case TCP_LISTEN:
vmci_transport_recv_listen(sk, pkt);
break;
- case SS_CONNECTING:
+ case TCP_SYN_SENT:
/* Processing of pending connections for servers goes through
* the listening socket, so see vmci_transport_recv_listen()
* for that path.
*/
vmci_transport_recv_connecting_client(sk, pkt);
break;
- case SS_CONNECTED:
+ case TCP_ESTABLISHED:
vmci_transport_recv_connected(sk, pkt);
break;
default:
@@ -942,7 +941,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
switch (pending->sk_state) {
- case SS_CONNECTING:
+ case TCP_SYN_SENT:
err = vmci_transport_recv_connecting_server(sk,
pending,
pkt);
@@ -1072,7 +1071,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
vsock_add_pending(sk, pending);
sk->sk_ack_backlog++;
- pending->sk_state = SS_CONNECTING;
+ pending->sk_state = TCP_SYN_SENT;
vmci_trans(vpending)->produce_size =
vmci_trans(vpending)->consume_size = qp_size;
vmci_trans(vpending)->queue_pair_size = qp_size;
@@ -1197,11 +1196,11 @@ vmci_transport_recv_connecting_server(struct sock *listener,
* the socket will be valid until it is removed from the queue.
*
* If we fail sending the attach below, we remove the socket from the
- * connected list and move the socket to SS_UNCONNECTED before
+ * connected list and move the socket to TCP_CLOSE before
* releasing the lock, so a pending slow path processing of an incoming
* packet will not see the socket in the connected state in that case.
*/
- pending->sk_state = SS_CONNECTED;
+ pending->sk_state = TCP_ESTABLISHED;
vsock_insert_connected(vpending);
@@ -1232,7 +1231,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
destroy:
pending->sk_err = skerr;
- pending->sk_state = SS_UNCONNECTED;
+ pending->sk_state = TCP_CLOSE;
/* As long as we drop our reference, all necessary cleanup will handle
* when the cleanup function drops its reference and our destruct
* implementation is called. Note that since the listen handler will
@@ -1270,7 +1269,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
* accounting (it can already be found since it's in the bound
* table).
*/
- sk->sk_state = SS_CONNECTED;
+ sk->sk_state = TCP_ESTABLISHED;
sk->sk_socket->state = SS_CONNECTED;
vsock_insert_connected(vsk);
sk->sk_state_change(sk);
@@ -1338,7 +1337,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
destroy:
vmci_transport_send_reset(sk, pkt);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = skerr;
sk->sk_error_report(sk);
return err;
@@ -1526,7 +1525,7 @@ static int vmci_transport_recv_connected(struct sock *sk,
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown = SHUTDOWN_MASK;
if (vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
sk->sk_state_change(sk);
break;
@@ -1790,7 +1789,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
err = vmci_transport_send_conn_request(
sk, vmci_trans(vsk)->queue_pair_size);
if (err < 0) {
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
return err;
}
} else {
@@ -1800,7 +1799,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
sk, vmci_trans(vsk)->queue_pair_size,
supported_proto_versions);
if (err < 0) {
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
return err;
}
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index 1406db4d97d1..41fb427f150a 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -355,7 +355,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
* queue. Ask for notifications when there is something to
* read.
*/
- if (sk->sk_state == SS_CONNECTED) {
+ if (sk->sk_state == TCP_ESTABLISHED) {
if (!send_waiting_read(sk, 1))
return -1;
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index f3a0afc46208..0cc84f2bb05e 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -176,7 +176,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
* queue. Ask for notifications when there is something to
* read.
*/
- if (sk->sk_state == SS_CONNECTED)
+ if (sk->sk_state == TCP_ESTABLISHED)
vsock_block_update_write_window(sk);
*data_ready_now = false;
}
diff --git a/net/wireless/.gitignore b/net/wireless/.gitignore
index c33451b896d9..61cbc304a3d3 100644
--- a/net/wireless/.gitignore
+++ b/net/wireless/.gitignore
@@ -1 +1,2 @@
-regdb.c
+shipped-certs.c
+extra-certs.c
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 6c606120abfe..da91bb547db3 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -19,6 +19,7 @@ config WEXT_PRIV
config CFG80211
tristate "cfg80211 - wireless configuration API"
depends on RFKILL || !RFKILL
+ select FW_LOADER
---help---
cfg80211 is the Linux wireless LAN (802.11) configuration API.
Enable this if you have a wireless device.
@@ -82,6 +83,36 @@ config CFG80211_CERTIFICATION_ONUS
you are a wireless researcher and are working in a controlled
and approved environment by your local regulatory agency.
+config CFG80211_REQUIRE_SIGNED_REGDB
+ bool "require regdb signature" if CFG80211_CERTIFICATION_ONUS
+ default y
+ select SYSTEM_DATA_VERIFICATION
+ help
+ Require that in addition to the "regulatory.db" file a
+ "regulatory.db.p7s" can be loaded with a valid PKCS#7
+ signature for the regulatory.db file made by one of the
+ keys in the certs/ directory.
+
+config CFG80211_USE_KERNEL_REGDB_KEYS
+ bool "allow regdb keys shipped with the kernel" if CFG80211_CERTIFICATION_ONUS
+ default y
+ depends on CFG80211_REQUIRE_SIGNED_REGDB
+ help
+ Allow the regulatory database to be signed by one of the keys for
+ which certificates are part of the kernel sources
+ (in net/wireless/certs/).
+
+ This is currently only Seth Forshee's key, who is the regulatory
+ database maintainer.
+
+config CFG80211_EXTRA_REGDB_KEYDIR
+ string "additional regdb key directory" if CFG80211_CERTIFICATION_ONUS
+ depends on CFG80211_REQUIRE_SIGNED_REGDB
+ help
+ If selected, point to a directory with DER-encoded X.509
+ certificates like in the kernel sources (net/wireless/certs/)
+ that shall be accepted for a signed regulatory database.
+
config CFG80211_REG_CELLULAR_HINTS
bool "cfg80211 regulatory support for cellular base station hints"
depends on CFG80211_CERTIFICATION_ONUS
@@ -139,35 +170,14 @@ config CFG80211_DEBUGFS
If unsure, say N.
-config CFG80211_INTERNAL_REGDB
- bool "use statically compiled regulatory rules database" if EXPERT
- default n
- depends on CFG80211
- ---help---
- This option generates an internal data structure representing
- the wireless regulatory rules described in net/wireless/db.txt
- and includes code to query that database. This is an alternative
- to using CRDA for defining regulatory rules for the kernel.
-
- Using this option requires some parsing of the db.txt at build time,
- the parser will be upkept with the latest wireless-regdb updates but
- older wireless-regdb formats will be ignored. The parser may later
- be replaced to avoid issues with conflicts on versions of
- wireless-regdb.
-
- For details see:
-
- http://wireless.kernel.org/en/developers/Regulatory
-
- Most distributions have a CRDA package. So if unsure, say N.
-
config CFG80211_CRDA_SUPPORT
- bool "support CRDA" if CFG80211_INTERNAL_REGDB
+ bool "support CRDA" if EXPERT
default y
depends on CFG80211
help
You should enable this option unless you know for sure you have no
- need for it, for example when using internal regdb (above.)
+ need for it, for example when using internal regdb (above) or the
+ database loaded as a firmware file.
If unsure, say Y.
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index d06e5015751a..e585f3f71f77 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -14,11 +14,27 @@ cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o
cfg80211-$(CONFIG_OF) += of.o
cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o
cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o
-cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o
CFLAGS_trace.o := -I$(src)
-$(obj)/regdb.c: $(src)/db.txt $(src)/genregdb.awk
- @$(AWK) -f $(srctree)/$(src)/genregdb.awk < $< > $@
+cfg80211-$(CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS) += shipped-certs.o
+ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
+cfg80211-y += extra-certs.o
+endif
-clean-files := regdb.c
+$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509)
+ @$(kecho) " GEN $@"
+ @echo '#include "reg.h"' > $@
+ @echo 'const u8 shipped_regdb_certs[] = {' >> $@
+ @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done
+ @echo '};' >> $@
+ @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@
+
+$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \
+ $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509)
+ @$(kecho) " GEN $@"
+ @echo '#include "reg.h"' > $@
+ @echo 'const u8 extra_regdb_certs[] = {' >> $@
+ @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done
+ @echo '};' >> $@
+ @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@
diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509
new file mode 100644
index 000000000000..c6f8f9d6b988
--- /dev/null
+++ b/net/wireless/certs/sforshee.x509
Binary files differ
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index b8aa5a7d5c77..eb824270f6e3 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -464,7 +464,7 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
struct ieee80211_channel *chan)
{
int width;
- u32 cf_offset, freq;
+ u32 freq;
if (chandef->chan->center_freq == chan->center_freq)
return true;
@@ -473,8 +473,6 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef,
if (width <= 20)
return false;
- cf_offset = width / 2 - 10;
-
for (freq = chandef->center_freq1 - width / 2 + 10;
freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) {
if (chan->center_freq == freq)
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7b33e8c366bc..fdde0d98fde1 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1384,7 +1384,7 @@ out_fail_sysfs:
out_fail_pernet:
return err;
}
-subsys_initcall(cfg80211_init);
+fs_initcall(cfg80211_init);
static void __exit cfg80211_exit(void)
{
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 6e809325af3b..35165f42c2a8 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -216,6 +216,7 @@ enum cfg80211_event_type {
EVENT_DISCONNECTED,
EVENT_IBSS_JOINED,
EVENT_STOPPED,
+ EVENT_PORT_AUTHORIZED,
};
struct cfg80211_event {
@@ -235,6 +236,9 @@ struct cfg80211_event {
u8 bssid[ETH_ALEN];
struct ieee80211_channel *channel;
} ij;
+ struct {
+ u8 bssid[ETH_ALEN];
+ } pa;
};
};
@@ -385,6 +389,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev,
bool wextev);
void __cfg80211_roamed(struct wireless_dev *wdev,
struct cfg80211_roam_info *info);
+void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid);
int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
void cfg80211_autodisconnect_wk(struct work_struct *work);
diff --git a/net/wireless/db.txt b/net/wireless/db.txt
deleted file mode 100644
index a2fc3a09ccdc..000000000000
--- a/net/wireless/db.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# This file is a placeholder to prevent accidental build breakage if someone
-# enables CONFIG_CFG80211_INTERNAL_REGDB. Almost no one actually needs to
-# enable that build option.
-#
-# You should be using CRDA instead. It is even better if you use the CRDA
-# package provided by your distribution, since they will probably keep it
-# up-to-date on your behalf.
-#
-# If you _really_ intend to use CONFIG_CFG80211_INTERNAL_REGDB then you will
-# need to replace this file with one containing appropriately formatted
-# regulatory rules that cover the regulatory domains you will be using. Your
-# best option is to extract the db.txt file from the wireless-regdb git
-# repository:
-#
-# git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-regdb.git
-#
diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk
deleted file mode 100644
index baf2426b555a..000000000000
--- a/net/wireless/genregdb.awk
+++ /dev/null
@@ -1,158 +0,0 @@
-#!/usr/bin/awk -f
-#
-# genregdb.awk -- generate regdb.c from db.txt
-#
-# Actually, it reads from stdin (presumed to be db.txt) and writes
-# to stdout (presumed to be regdb.c), but close enough...
-#
-# Copyright 2009 John W. Linville <linville@tuxdriver.com>
-#
-# Permission to use, copy, modify, and/or distribute this software for any
-# purpose with or without fee is hereby granted, provided that the above
-# copyright notice and this permission notice appear in all copies.
-#
-# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
-# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
-# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
-# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
-# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
-# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
-# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
-
-BEGIN {
- active = 0
- rules = 0;
- print "/*"
- print " * DO NOT EDIT -- file generated from data in db.txt"
- print " */"
- print ""
- print "#include <linux/nl80211.h>"
- print "#include <net/cfg80211.h>"
- print "#include \"regdb.h\""
- print ""
- regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n"
-}
-
-function parse_country_head() {
- country=$2
- sub(/:/, "", country)
- printf "static const struct ieee80211_regdomain regdom_%s = {\n", country
- printf "\t.alpha2 = \"%s\",\n", country
- if ($NF ~ /DFS-ETSI/)
- printf "\t.dfs_region = NL80211_DFS_ETSI,\n"
- else if ($NF ~ /DFS-FCC/)
- printf "\t.dfs_region = NL80211_DFS_FCC,\n"
- else if ($NF ~ /DFS-JP/)
- printf "\t.dfs_region = NL80211_DFS_JP,\n"
- printf "\t.reg_rules = {\n"
- active = 1
- regdb = regdb "\t&regdom_" country ",\n"
-}
-
-function parse_reg_rule()
-{
- flag_starts_at = 7
-
- start = $1
- sub(/\(/, "", start)
- end = $3
- bw = $5
- sub(/\),/, "", bw)
- gain = 0
- power = $6
- # power might be in mW...
- units = $7
- dfs_cac = 0
-
- sub(/\(/, "", power)
- sub(/\),/, "", power)
- sub(/\),/, "", units)
- sub(/\)/, "", units)
-
- if (units == "mW") {
- flag_starts_at = 8
- power = 10 * log(power)/log(10)
- if ($8 ~ /[[:digit:]]/) {
- flag_starts_at = 9
- dfs_cac = $8
- }
- } else {
- if ($7 ~ /[[:digit:]]/) {
- flag_starts_at = 8
- dfs_cac = $7
- }
- }
- sub(/\(/, "", dfs_cac)
- sub(/\),/, "", dfs_cac)
- flagstr = ""
- for (i=flag_starts_at; i<=NF; i++)
- flagstr = flagstr $i
- split(flagstr, flagarray, ",")
- flags = ""
- for (arg in flagarray) {
- if (flagarray[arg] == "NO-OFDM") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_OFDM | "
- } else if (flagarray[arg] == "NO-CCK") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_CCK | "
- } else if (flagarray[arg] == "NO-INDOOR") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_INDOOR | "
- } else if (flagarray[arg] == "NO-OUTDOOR") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_OUTDOOR | "
- } else if (flagarray[arg] == "DFS") {
- flags = flags "\n\t\t\tNL80211_RRF_DFS | "
- } else if (flagarray[arg] == "PTP-ONLY") {
- flags = flags "\n\t\t\tNL80211_RRF_PTP_ONLY | "
- } else if (flagarray[arg] == "PTMP-ONLY") {
- flags = flags "\n\t\t\tNL80211_RRF_PTMP_ONLY | "
- } else if (flagarray[arg] == "PASSIVE-SCAN") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
- } else if (flagarray[arg] == "NO-IBSS") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
- } else if (flagarray[arg] == "NO-IR") {
- flags = flags "\n\t\t\tNL80211_RRF_NO_IR | "
- } else if (flagarray[arg] == "AUTO-BW") {
- flags = flags "\n\t\t\tNL80211_RRF_AUTO_BW | "
- }
-
- }
- flags = flags "0"
- printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags
- rules++
-}
-
-function print_tail_country()
-{
- active = 0
- printf "\t},\n"
- printf "\t.n_reg_rules = %d\n", rules
- printf "};\n\n"
- rules = 0;
-}
-
-/^[ \t]*#/ {
- # Ignore
-}
-
-!active && /^[ \t]*$/ {
- # Ignore
-}
-
-!active && /country/ {
- parse_country_head()
-}
-
-active && /^[ \t]*\(/ {
- parse_reg_rule()
-}
-
-active && /^[ \t]*$/ {
- print_tail_country()
-}
-
-END {
- if (active)
- print_tail_country()
- print regdb "};"
- print ""
- print "int reg_regdb_size = ARRAY_SIZE(reg_regdb);"
-}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d396cb61a280..fce2cbe6a193 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2130,6 +2130,15 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
case NL80211_CHAN_HT40MINUS:
cfg80211_chandef_create(chandef, chandef->chan,
chantype);
+ /* user input for center_freq is incorrect */
+ if (info->attrs[NL80211_ATTR_CENTER_FREQ1] &&
+ chandef->center_freq1 != nla_get_u32(
+ info->attrs[NL80211_ATTR_CENTER_FREQ1]))
+ return -EINVAL;
+ /* center_freq2 must be zero */
+ if (info->attrs[NL80211_ATTR_CENTER_FREQ2] &&
+ nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2]))
+ return -EINVAL;
break;
default:
return -EINVAL;
@@ -5677,6 +5686,11 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
}
}
+static int nl80211_reload_regdb(struct sk_buff *skb, struct genl_info *info)
+{
+ return reg_reload_regdb();
+}
+
static int nl80211_get_mesh_config(struct sk_buff *skb,
struct genl_info *info)
{
@@ -6618,6 +6632,77 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev)
return regulatory_pre_cac_allowed(wdev->wiphy);
}
+static int
+nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev,
+ void *request, struct nlattr **attrs,
+ bool is_sched_scan)
+{
+ u8 *mac_addr, *mac_addr_mask;
+ u32 *flags;
+ enum nl80211_feature_flags randomness_flag;
+
+ if (!attrs[NL80211_ATTR_SCAN_FLAGS])
+ return 0;
+
+ if (is_sched_scan) {
+ struct cfg80211_sched_scan_request *req = request;
+
+ randomness_flag = wdev ?
+ NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR :
+ NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
+ flags = &req->flags;
+ mac_addr = req->mac_addr;
+ mac_addr_mask = req->mac_addr_mask;
+ } else {
+ struct cfg80211_scan_request *req = request;
+
+ randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
+ flags = &req->flags;
+ mac_addr = req->mac_addr;
+ mac_addr_mask = req->mac_addr_mask;
+ }
+
+ *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]);
+
+ if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
+ !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN))
+ return -EOPNOTSUPP;
+
+ if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
+ int err;
+
+ if (!(wiphy->features & randomness_flag) ||
+ (wdev && wdev->current_bss))
+ return -EOPNOTSUPP;
+
+ err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask);
+ if (err)
+ return err;
+ }
+
+ if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME))
+ return -EOPNOTSUPP;
+
+ if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP))
+ return -EOPNOTSUPP;
+
+ if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION))
+ return -EOPNOTSUPP;
+
+ if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) &&
+ !wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -6823,34 +6908,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]);
}
- if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) {
- request->flags = nla_get_u32(
- info->attrs[NL80211_ATTR_SCAN_FLAGS]);
- if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
- !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
- if (!(wiphy->features &
- NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (wdev->current_bss) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- err = nl80211_parse_random_mac(info->attrs,
- request->mac_addr,
- request->mac_addr_mask);
- if (err)
- goto out_free;
- }
- }
+ err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs,
+ false);
+ if (err)
+ goto out_free;
request->no_cck =
nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]);
@@ -7298,37 +7359,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
request->ie_len);
}
- if (attrs[NL80211_ATTR_SCAN_FLAGS]) {
- request->flags = nla_get_u32(
- attrs[NL80211_ATTR_SCAN_FLAGS]);
- if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) &&
- !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
- u32 flg = NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR;
-
- if (!wdev) /* must be net-detect */
- flg = NL80211_FEATURE_ND_RANDOM_MAC_ADDR;
-
- if (!(wiphy->features & flg)) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- if (wdev && wdev->current_bss) {
- err = -EOPNOTSUPP;
- goto out_free;
- }
-
- err = nl80211_parse_random_mac(attrs, request->mac_addr,
- request->mac_addr_mask);
- if (err)
- goto out_free;
- }
- }
+ err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true);
+ if (err)
+ goto out_free;
if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY])
request->delay =
@@ -8932,8 +8965,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_USE_MFP]) {
connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
+ if (connect.mfp == NL80211_MFP_OPTIONAL &&
+ !wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_MFP_OPTIONAL))
+ return -EOPNOTSUPP;
+
if (connect.mfp != NL80211_MFP_REQUIRED &&
- connect.mfp != NL80211_MFP_NO)
+ connect.mfp != NL80211_MFP_NO &&
+ connect.mfp != NL80211_MFP_OPTIONAL)
return -EINVAL;
} else {
connect.mfp = NL80211_MFP_NO;
@@ -12685,6 +12724,12 @@ static const struct genl_ops nl80211_ops[] = {
.flags = GENL_ADMIN_PERM,
},
{
+ .cmd = NL80211_CMD_RELOAD_REGDB,
+ .doit = nl80211_reload_regdb,
+ .policy = nl80211_policy,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
.cmd = NL80211_CMD_GET_MESH_CONFIG,
.doit = nl80211_get_mesh_config,
.policy = nl80211_policy,
@@ -13812,9 +13857,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
info->req_ie)) ||
(info->resp_ie &&
nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len,
- info->resp_ie)) ||
- (info->authorized &&
- nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED)))
+ info->resp_ie)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -13828,6 +13871,36 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
nlmsg_free(msg);
}
+void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
+ struct net_device *netdev, const u8 *bssid)
+{
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED);
+ if (!hdr) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+ NL80211_MCGRP_MLME, GFP_KERNEL);
+ return;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ nlmsg_free(msg);
+}
+
void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
struct net_device *netdev, u16 reason,
const u8 *ie, size_t ie_len, bool from_ap)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index b96933322077..bf9e772a30b9 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -58,6 +58,8 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev,
void nl80211_send_roamed(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
struct cfg80211_roam_info *info, gfp_t gfp);
+void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev,
+ struct net_device *netdev, const u8 *bssid);
void nl80211_send_disconnected(struct cfg80211_registered_device *rdev,
struct net_device *netdev, u16 reason,
const u8 *ie, size_t ie_len, bool from_ap);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 6e94f6934a0e..3871998059de 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -53,12 +53,13 @@
#include <linux/ctype.h>
#include <linux/nl80211.h>
#include <linux/platform_device.h>
+#include <linux/verification.h>
#include <linux/moduleparam.h>
+#include <linux/firmware.h>
#include <net/cfg80211.h>
#include "core.h"
#include "reg.h"
#include "rdev-ops.h"
-#include "regdb.h"
#include "nl80211.h"
/*
@@ -100,7 +101,7 @@ static struct regulatory_request core_request_world = {
static struct regulatory_request __rcu *last_request =
(void __force __rcu *)&core_request_world;
-/* To trigger userspace events */
+/* To trigger userspace events and load firmware */
static struct platform_device *reg_pdev;
/*
@@ -443,7 +444,6 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd)
return regd;
}
-#ifdef CONFIG_CFG80211_INTERNAL_REGDB
struct reg_regdb_apply_request {
struct list_head list;
const struct ieee80211_regdomain *regdom;
@@ -475,55 +475,26 @@ static void reg_regdb_apply(struct work_struct *work)
static DECLARE_WORK(reg_regdb_work, reg_regdb_apply);
-static int reg_query_builtin(const char *alpha2)
+static int reg_schedule_apply(const struct ieee80211_regdomain *regdom)
{
- const struct ieee80211_regdomain *regdom = NULL;
struct reg_regdb_apply_request *request;
- unsigned int i;
-
- for (i = 0; i < reg_regdb_size; i++) {
- if (alpha2_equal(alpha2, reg_regdb[i]->alpha2)) {
- regdom = reg_regdb[i];
- break;
- }
- }
-
- if (!regdom)
- return -ENODATA;
request = kzalloc(sizeof(struct reg_regdb_apply_request), GFP_KERNEL);
- if (!request)
- return -ENOMEM;
-
- request->regdom = reg_copy_regd(regdom);
- if (IS_ERR_OR_NULL(request->regdom)) {
- kfree(request);
+ if (!request) {
+ kfree(regdom);
return -ENOMEM;
}
+ request->regdom = regdom;
+
mutex_lock(&reg_regdb_apply_mutex);
list_add_tail(&request->list, &reg_regdb_apply_list);
mutex_unlock(&reg_regdb_apply_mutex);
schedule_work(&reg_regdb_work);
-
return 0;
}
-/* Feel free to add any other sanity checks here */
-static void reg_regdb_size_check(void)
-{
- /* We should ideally BUILD_BUG_ON() but then random builds would fail */
- WARN_ONCE(!reg_regdb_size, "db.txt is empty, you should update it...");
-}
-#else
-static inline void reg_regdb_size_check(void) {}
-static inline int reg_query_builtin(const char *alpha2)
-{
- return -ENODATA;
-}
-#endif /* CONFIG_CFG80211_INTERNAL_REGDB */
-
#ifdef CONFIG_CFG80211_CRDA_SUPPORT
/* Max number of consecutive attempts to communicate with CRDA */
#define REG_MAX_CRDA_TIMEOUTS 10
@@ -599,10 +570,402 @@ static inline int call_crda(const char *alpha2)
}
#endif /* CONFIG_CFG80211_CRDA_SUPPORT */
+/* code to directly load a firmware database through request_firmware */
+static const struct fwdb_header *regdb;
+
+struct fwdb_country {
+ u8 alpha2[2];
+ __be16 coll_ptr;
+ /* this struct cannot be extended */
+} __packed __aligned(4);
+
+struct fwdb_collection {
+ u8 len;
+ u8 n_rules;
+ u8 dfs_region;
+ /* no optional data yet */
+ /* aligned to 2, then followed by __be16 array of rule pointers */
+} __packed __aligned(4);
+
+enum fwdb_flags {
+ FWDB_FLAG_NO_OFDM = BIT(0),
+ FWDB_FLAG_NO_OUTDOOR = BIT(1),
+ FWDB_FLAG_DFS = BIT(2),
+ FWDB_FLAG_NO_IR = BIT(3),
+ FWDB_FLAG_AUTO_BW = BIT(4),
+};
+
+struct fwdb_rule {
+ u8 len;
+ u8 flags;
+ __be16 max_eirp;
+ __be32 start, end, max_bw;
+ /* start of optional data */
+ __be16 cac_timeout;
+} __packed __aligned(4);
+
+#define FWDB_MAGIC 0x52474442
+#define FWDB_VERSION 20
+
+struct fwdb_header {
+ __be32 magic;
+ __be32 version;
+ struct fwdb_country country[];
+} __packed __aligned(4);
+
+static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr)
+{
+ struct fwdb_rule *rule = (void *)(data + (rule_ptr << 2));
+
+ if ((u8 *)rule + sizeof(rule->len) > data + size)
+ return false;
+
+ /* mandatory fields */
+ if (rule->len < offsetofend(struct fwdb_rule, max_bw))
+ return false;
+
+ return true;
+}
+
+static bool valid_country(const u8 *data, unsigned int size,
+ const struct fwdb_country *country)
+{
+ unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
+ struct fwdb_collection *coll = (void *)(data + ptr);
+ __be16 *rules_ptr;
+ unsigned int i;
+
+ /* make sure we can read len/n_rules */
+ if ((u8 *)coll + offsetofend(typeof(*coll), n_rules) > data + size)
+ return false;
+
+ /* make sure base struct and all rules fit */
+ if ((u8 *)coll + ALIGN(coll->len, 2) +
+ (coll->n_rules * 2) > data + size)
+ return false;
+
+ /* mandatory fields must exist */
+ if (coll->len < offsetofend(struct fwdb_collection, dfs_region))
+ return false;
+
+ rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
+
+ for (i = 0; i < coll->n_rules; i++) {
+ u16 rule_ptr = be16_to_cpu(rules_ptr[i]);
+
+ if (!valid_rule(data, size, rule_ptr))
+ return false;
+ }
+
+ return true;
+}
+
+#ifdef CONFIG_CFG80211_REQUIRE_SIGNED_REGDB
+static struct key *builtin_regdb_keys;
+
+static void __init load_keys_from_buffer(const u8 *p, unsigned int buflen)
+{
+ const u8 *end = p + buflen;
+ size_t plen;
+ key_ref_t key;
+
+ while (p < end) {
+ /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
+ * than 256 bytes in size.
+ */
+ if (end - p < 4)
+ goto dodgy_cert;
+ if (p[0] != 0x30 &&
+ p[1] != 0x82)
+ goto dodgy_cert;
+ plen = (p[2] << 8) | p[3];
+ plen += 4;
+ if (plen > end - p)
+ goto dodgy_cert;
+
+ key = key_create_or_update(make_key_ref(builtin_regdb_keys, 1),
+ "asymmetric", NULL, p, plen,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
+ KEY_ALLOC_NOT_IN_QUOTA |
+ KEY_ALLOC_BUILT_IN |
+ KEY_ALLOC_BYPASS_RESTRICTION);
+ if (IS_ERR(key)) {
+ pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
+ PTR_ERR(key));
+ } else {
+ pr_notice("Loaded X.509 cert '%s'\n",
+ key_ref_to_ptr(key)->description);
+ key_ref_put(key);
+ }
+ p += plen;
+ }
+
+ return;
+
+dodgy_cert:
+ pr_err("Problem parsing in-kernel X.509 certificate list\n");
+}
+
+static int __init load_builtin_regdb_keys(void)
+{
+ builtin_regdb_keys =
+ keyring_alloc(".builtin_regdb_keys",
+ KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
+ KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+ if (IS_ERR(builtin_regdb_keys))
+ return PTR_ERR(builtin_regdb_keys);
+
+ pr_notice("Loading compiled-in X.509 certificates for regulatory database\n");
+
+#ifdef CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS
+ load_keys_from_buffer(shipped_regdb_certs, shipped_regdb_certs_len);
+#endif
+#ifdef CONFIG_CFG80211_EXTRA_REGDB_KEYDIR
+ if (CONFIG_CFG80211_EXTRA_REGDB_KEYDIR[0] != '\0')
+ load_keys_from_buffer(extra_regdb_certs, extra_regdb_certs_len);
+#endif
+
+ return 0;
+}
+
+static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
+{
+ const struct firmware *sig;
+ bool result;
+
+ if (request_firmware(&sig, "regulatory.db.p7s", &reg_pdev->dev))
+ return false;
+
+ result = verify_pkcs7_signature(data, size, sig->data, sig->size,
+ builtin_regdb_keys,
+ VERIFYING_UNSPECIFIED_SIGNATURE,
+ NULL, NULL) == 0;
+
+ release_firmware(sig);
+
+ return result;
+}
+
+static void free_regdb_keyring(void)
+{
+ key_put(builtin_regdb_keys);
+}
+#else
+static int load_builtin_regdb_keys(void)
+{
+ return 0;
+}
+
+static bool regdb_has_valid_signature(const u8 *data, unsigned int size)
+{
+ return true;
+}
+
+static void free_regdb_keyring(void)
+{
+}
+#endif /* CONFIG_CFG80211_REQUIRE_SIGNED_REGDB */
+
+static bool valid_regdb(const u8 *data, unsigned int size)
+{
+ const struct fwdb_header *hdr = (void *)data;
+ const struct fwdb_country *country;
+
+ if (size < sizeof(*hdr))
+ return false;
+
+ if (hdr->magic != cpu_to_be32(FWDB_MAGIC))
+ return false;
+
+ if (hdr->version != cpu_to_be32(FWDB_VERSION))
+ return false;
+
+ if (!regdb_has_valid_signature(data, size))
+ return false;
+
+ country = &hdr->country[0];
+ while ((u8 *)(country + 1) <= data + size) {
+ if (!country->coll_ptr)
+ break;
+ if (!valid_country(data, size, country))
+ return false;
+ country++;
+ }
+
+ return true;
+}
+
+static int regdb_query_country(const struct fwdb_header *db,
+ const struct fwdb_country *country)
+{
+ unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
+ struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
+ struct ieee80211_regdomain *regdom;
+ unsigned int size_of_regd;
+ unsigned int i;
+
+ size_of_regd =
+ sizeof(struct ieee80211_regdomain) +
+ coll->n_rules * sizeof(struct ieee80211_reg_rule);
+
+ regdom = kzalloc(size_of_regd, GFP_KERNEL);
+ if (!regdom)
+ return -ENOMEM;
+
+ regdom->n_reg_rules = coll->n_rules;
+ regdom->alpha2[0] = country->alpha2[0];
+ regdom->alpha2[1] = country->alpha2[1];
+ regdom->dfs_region = coll->dfs_region;
+
+ for (i = 0; i < regdom->n_reg_rules; i++) {
+ __be16 *rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2));
+ unsigned int rule_ptr = be16_to_cpu(rules_ptr[i]) << 2;
+ struct fwdb_rule *rule = (void *)((u8 *)db + rule_ptr);
+ struct ieee80211_reg_rule *rrule = &regdom->reg_rules[i];
+
+ rrule->freq_range.start_freq_khz = be32_to_cpu(rule->start);
+ rrule->freq_range.end_freq_khz = be32_to_cpu(rule->end);
+ rrule->freq_range.max_bandwidth_khz = be32_to_cpu(rule->max_bw);
+
+ rrule->power_rule.max_antenna_gain = 0;
+ rrule->power_rule.max_eirp = be16_to_cpu(rule->max_eirp);
+
+ rrule->flags = 0;
+ if (rule->flags & FWDB_FLAG_NO_OFDM)
+ rrule->flags |= NL80211_RRF_NO_OFDM;
+ if (rule->flags & FWDB_FLAG_NO_OUTDOOR)
+ rrule->flags |= NL80211_RRF_NO_OUTDOOR;
+ if (rule->flags & FWDB_FLAG_DFS)
+ rrule->flags |= NL80211_RRF_DFS;
+ if (rule->flags & FWDB_FLAG_NO_IR)
+ rrule->flags |= NL80211_RRF_NO_IR;
+ if (rule->flags & FWDB_FLAG_AUTO_BW)
+ rrule->flags |= NL80211_RRF_AUTO_BW;
+
+ rrule->dfs_cac_ms = 0;
+
+ /* handle optional data */
+ if (rule->len >= offsetofend(struct fwdb_rule, cac_timeout))
+ rrule->dfs_cac_ms =
+ 1000 * be16_to_cpu(rule->cac_timeout);
+ }
+
+ return reg_schedule_apply(regdom);
+}
+
+static int query_regdb(const char *alpha2)
+{
+ const struct fwdb_header *hdr = regdb;
+ const struct fwdb_country *country;
+
+ ASSERT_RTNL();
+
+ if (IS_ERR(regdb))
+ return PTR_ERR(regdb);
+
+ country = &hdr->country[0];
+ while (country->coll_ptr) {
+ if (alpha2_equal(alpha2, country->alpha2))
+ return regdb_query_country(regdb, country);
+ country++;
+ }
+
+ return -ENODATA;
+}
+
+static void regdb_fw_cb(const struct firmware *fw, void *context)
+{
+ int set_error = 0;
+ bool restore = true;
+ void *db;
+
+ if (!fw) {
+ pr_info("failed to load regulatory.db\n");
+ set_error = -ENODATA;
+ } else if (!valid_regdb(fw->data, fw->size)) {
+ pr_info("loaded regulatory.db is malformed or signature is missing/invalid\n");
+ set_error = -EINVAL;
+ }
+
+ rtnl_lock();
+ if (WARN_ON(regdb && !IS_ERR(regdb))) {
+ /* just restore and free new db */
+ } else if (set_error) {
+ regdb = ERR_PTR(set_error);
+ } else if (fw) {
+ db = kmemdup(fw->data, fw->size, GFP_KERNEL);
+ if (db) {
+ regdb = db;
+ restore = context && query_regdb(context);
+ } else {
+ restore = true;
+ }
+ }
+
+ if (restore)
+ restore_regulatory_settings(true);
+
+ rtnl_unlock();
+
+ kfree(context);
+
+ release_firmware(fw);
+}
+
+static int query_regdb_file(const char *alpha2)
+{
+ ASSERT_RTNL();
+
+ if (regdb)
+ return query_regdb(alpha2);
+
+ alpha2 = kmemdup(alpha2, 2, GFP_KERNEL);
+ if (!alpha2)
+ return -ENOMEM;
+
+ return request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
+ &reg_pdev->dev, GFP_KERNEL,
+ (void *)alpha2, regdb_fw_cb);
+}
+
+int reg_reload_regdb(void)
+{
+ const struct firmware *fw;
+ void *db;
+ int err;
+
+ err = request_firmware(&fw, "regulatory.db", &reg_pdev->dev);
+ if (err)
+ return err;
+
+ if (!valid_regdb(fw->data, fw->size)) {
+ err = -ENODATA;
+ goto out;
+ }
+
+ db = kmemdup(fw->data, fw->size, GFP_KERNEL);
+ if (!db) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ rtnl_lock();
+ if (!IS_ERR_OR_NULL(regdb))
+ kfree(regdb);
+ regdb = db;
+ rtnl_unlock();
+
+ out:
+ release_firmware(fw);
+ return err;
+}
+
static bool reg_query_database(struct regulatory_request *request)
{
- /* query internal regulatory database (if it exists) */
- if (reg_query_builtin(request->alpha2) == 0)
+ if (query_regdb_file(request->alpha2) == 0)
return true;
if (call_crda(request->alpha2) == 0)
@@ -3285,6 +3648,10 @@ int __init regulatory_init(void)
{
int err = 0;
+ err = load_builtin_regdb_keys();
+ if (err)
+ return err;
+
reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
if (IS_ERR(reg_pdev))
return PTR_ERR(reg_pdev);
@@ -3293,8 +3660,6 @@ int __init regulatory_init(void)
spin_lock_init(&reg_pending_beacons_lock);
spin_lock_init(&reg_indoor_lock);
- reg_regdb_size_check();
-
rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom);
user_alpha2[0] = '9';
@@ -3360,4 +3725,9 @@ void regulatory_exit(void)
list_del(&reg_request->list);
kfree(reg_request);
}
+
+ if (!IS_ERR_OR_NULL(regdb))
+ kfree(regdb);
+
+ free_regdb_keyring();
}
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index ca7fedf2e7a1..9ceeb5f3a7cb 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -1,5 +1,8 @@
#ifndef __NET_WIRELESS_REG_H
#define __NET_WIRELESS_REG_H
+
+#include <net/cfg80211.h>
+
/*
* Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
*
@@ -179,4 +182,15 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy,
* @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1
*/
bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2);
+
+/**
+ * reg_reload_regdb - reload the regulatory.db firmware file
+ */
+int reg_reload_regdb(void);
+
+extern const u8 shipped_regdb_certs[];
+extern unsigned int shipped_regdb_certs_len;
+extern const u8 extra_regdb_certs[];
+extern unsigned int extra_regdb_certs_len;
+
#endif /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/regdb.h b/net/wireless/regdb.h
deleted file mode 100644
index 3279cfcefb0c..000000000000
--- a/net/wireless/regdb.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef __REGDB_H__
-#define __REGDB_H__
-
-/*
- * Copyright 2009 John W. Linville <linville@tuxdriver.com>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-extern const struct ieee80211_regdomain *reg_regdb[];
-extern int reg_regdb_size;
-
-#endif /* __REGDB_H__ */
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 0a49b88070d0..f38ed490e42b 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -960,7 +960,6 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
ev->rm.resp_ie_len = info->resp_ie_len;
memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len);
ev->rm.bss = info->bss;
- ev->rm.authorized = info->authorized;
spin_lock_irqsave(&wdev->event_lock, flags);
list_add_tail(&ev->list, &wdev->event_list);
@@ -969,6 +968,50 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
}
EXPORT_SYMBOL(cfg80211_roamed);
+void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid)
+{
+ ASSERT_WDEV_LOCK(wdev);
+
+ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+ return;
+
+ if (WARN_ON(!wdev->current_bss) ||
+ WARN_ON(!ether_addr_equal(wdev->current_bss->pub.bssid, bssid)))
+ return;
+
+ nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev,
+ bssid);
+}
+
+void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
+ gfp_t gfp)
+{
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct cfg80211_event *ev;
+ unsigned long flags;
+
+ if (WARN_ON(!bssid))
+ return;
+
+ ev = kzalloc(sizeof(*ev), gfp);
+ if (!ev)
+ return;
+
+ ev->type = EVENT_PORT_AUTHORIZED;
+ memcpy(ev->pa.bssid, bssid, ETH_ALEN);
+
+ /*
+ * Use the wdev event list so that if there are pending
+ * connected/roamed events, they will be reported first.
+ */
+ spin_lock_irqsave(&wdev->event_lock, flags);
+ list_add_tail(&ev->list, &wdev->event_list);
+ spin_unlock_irqrestore(&wdev->event_lock, flags);
+ queue_work(cfg80211_wq, &rdev->event_work);
+}
+EXPORT_SYMBOL(cfg80211_port_authorized);
+
void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
size_t ie_len, u16 reason, bool from_ap)
{
diff --git a/net/wireless/util.c b/net/wireless/util.c
index bcb1284c3415..ff21c314a609 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -157,32 +157,30 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband)
case NL80211_BAND_2GHZ:
want = 7;
for (i = 0; i < sband->n_bitrates; i++) {
- if (sband->bitrates[i].bitrate == 10) {
+ switch (sband->bitrates[i].bitrate) {
+ case 10:
+ case 20:
+ case 55:
+ case 110:
sband->bitrates[i].flags |=
IEEE80211_RATE_MANDATORY_B |
IEEE80211_RATE_MANDATORY_G;
want--;
- }
-
- if (sband->bitrates[i].bitrate == 20 ||
- sband->bitrates[i].bitrate == 55 ||
- sband->bitrates[i].bitrate == 110 ||
- sband->bitrates[i].bitrate == 60 ||
- sband->bitrates[i].bitrate == 120 ||
- sband->bitrates[i].bitrate == 240) {
+ break;
+ case 60:
+ case 120:
+ case 240:
sband->bitrates[i].flags |=
IEEE80211_RATE_MANDATORY_G;
want--;
- }
-
- if (sband->bitrates[i].bitrate != 10 &&
- sband->bitrates[i].bitrate != 20 &&
- sband->bitrates[i].bitrate != 55 &&
- sband->bitrates[i].bitrate != 110)
+ /* fall through */
+ default:
sband->bitrates[i].flags |=
IEEE80211_RATE_ERP_G;
+ break;
+ }
}
- WARN_ON(want != 0 && want != 3 && want != 6);
+ WARN_ON(want != 0 && want != 3);
break;
case NL80211_BAND_60GHZ:
/* check for mandatory HT MCS 1..4 */
@@ -529,121 +527,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
}
EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr);
-int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
- enum nl80211_iftype iftype,
- const u8 *bssid, bool qos)
-{
- struct ieee80211_hdr hdr;
- u16 hdrlen, ethertype;
- __le16 fc;
- const u8 *encaps_data;
- int encaps_len, skip_header_bytes;
- int nh_pos, h_pos;
- int head_need;
-
- if (unlikely(skb->len < ETH_HLEN))
- return -EINVAL;
-
- nh_pos = skb_network_header(skb) - skb->data;
- h_pos = skb_transport_header(skb) - skb->data;
-
- /* convert Ethernet header to proper 802.11 header (based on
- * operation mode) */
- ethertype = (skb->data[12] << 8) | skb->data[13];
- fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA);
-
- switch (iftype) {
- case NL80211_IFTYPE_AP:
- case NL80211_IFTYPE_AP_VLAN:
- case NL80211_IFTYPE_P2P_GO:
- fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
- /* DA BSSID SA */
- memcpy(hdr.addr1, skb->data, ETH_ALEN);
- memcpy(hdr.addr2, addr, ETH_ALEN);
- memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN);
- hdrlen = 24;
- break;
- case NL80211_IFTYPE_STATION:
- case NL80211_IFTYPE_P2P_CLIENT:
- fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
- /* BSSID SA DA */
- memcpy(hdr.addr1, bssid, ETH_ALEN);
- memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
- memcpy(hdr.addr3, skb->data, ETH_ALEN);
- hdrlen = 24;
- break;
- case NL80211_IFTYPE_OCB:
- case NL80211_IFTYPE_ADHOC:
- /* DA SA BSSID */
- memcpy(hdr.addr1, skb->data, ETH_ALEN);
- memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
- memcpy(hdr.addr3, bssid, ETH_ALEN);
- hdrlen = 24;
- break;
- default:
- return -EOPNOTSUPP;
- }
-
- if (qos) {
- fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
- hdrlen += 2;
- }
-
- hdr.frame_control = fc;
- hdr.duration_id = 0;
- hdr.seq_ctrl = 0;
-
- skip_header_bytes = ETH_HLEN;
- if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) {
- encaps_data = bridge_tunnel_header;
- encaps_len = sizeof(bridge_tunnel_header);
- skip_header_bytes -= 2;
- } else if (ethertype >= ETH_P_802_3_MIN) {
- encaps_data = rfc1042_header;
- encaps_len = sizeof(rfc1042_header);
- skip_header_bytes -= 2;
- } else {
- encaps_data = NULL;
- encaps_len = 0;
- }
-
- skb_pull(skb, skip_header_bytes);
- nh_pos -= skip_header_bytes;
- h_pos -= skip_header_bytes;
-
- head_need = hdrlen + encaps_len - skb_headroom(skb);
-
- if (head_need > 0 || skb_cloned(skb)) {
- head_need = max(head_need, 0);
- if (head_need)
- skb_orphan(skb);
-
- if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC))
- return -ENOMEM;
- }
-
- if (encaps_data) {
- memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
- nh_pos += encaps_len;
- h_pos += encaps_len;
- }
-
- memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
-
- nh_pos += hdrlen;
- h_pos += hdrlen;
-
- /* Update skb pointers to various headers since this modified frame
- * is going to go through Linux networking code that may potentially
- * need things like pointer to IP header. */
- skb_reset_mac_header(skb);
- skb_set_network_header(skb, nh_pos);
- skb_set_transport_header(skb, h_pos);
-
- return 0;
-}
-EXPORT_SYMBOL(ieee80211_data_from_8023);
-
static void
__frame_add_frag(struct sk_buff *skb, struct page *page,
void *ptr, int len, int size)
@@ -963,6 +846,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev)
case EVENT_STOPPED:
__cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev);
break;
+ case EVENT_PORT_AUTHORIZED:
+ __cfg80211_port_authorized(wdev, ev->pa.bssid);
+ break;
}
wdev_unlock(wdev);
@@ -1367,13 +1253,29 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len,
}
EXPORT_SYMBOL(cfg80211_get_p2p_attr);
-static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id)
+static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id, bool id_ext)
{
int i;
- for (i = 0; i < n_ids; i++)
- if (ids[i] == id)
+ /* Make sure array values are legal */
+ if (WARN_ON(ids[n_ids - 1] == WLAN_EID_EXTENSION))
+ return false;
+
+ i = 0;
+ while (i < n_ids) {
+ if (ids[i] == WLAN_EID_EXTENSION) {
+ if (id_ext && (ids[i + 1] == id))
+ return true;
+
+ i += 2;
+ continue;
+ }
+
+ if (ids[i] == id && !id_ext)
return true;
+
+ i++;
+ }
return false;
}
@@ -1403,14 +1305,36 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen,
{
size_t pos = offset;
- while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) {
+ while (pos < ielen) {
+ u8 ext = 0;
+
+ if (ies[pos] == WLAN_EID_EXTENSION)
+ ext = 2;
+ if ((pos + ext) >= ielen)
+ break;
+
+ if (!ieee80211_id_in_list(ids, n_ids, ies[pos + ext],
+ ies[pos] == WLAN_EID_EXTENSION))
+ break;
+
if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) {
pos = skip_ie(ies, ielen, pos);
- while (pos < ielen &&
- !ieee80211_id_in_list(after_ric, n_after_ric,
- ies[pos]))
- pos = skip_ie(ies, ielen, pos);
+ while (pos < ielen) {
+ if (ies[pos] == WLAN_EID_EXTENSION)
+ ext = 2;
+ else
+ ext = 0;
+
+ if ((pos + ext) >= ielen)
+ break;
+
+ if (!ieee80211_id_in_list(after_ric,
+ n_after_ric,
+ ies[pos + ext],
+ ext == 2))
+ pos = skip_ie(ies, ielen, pos);
+ }
} else {
pos = skip_ie(ies, ielen, pos);
}
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index ac095936552d..ea87143314f3 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -374,9 +374,11 @@ static void __x25_destroy_socket(struct sock *);
/*
* handler for deferred kills.
*/
-static void x25_destroy_timer(unsigned long data)
+static void x25_destroy_timer(struct timer_list *t)
{
- x25_destroy_socket_from_timer((struct sock *)data);
+ struct sock *sk = from_timer(sk, t, sk_timer);
+
+ x25_destroy_socket_from_timer(sk);
}
/*
@@ -413,8 +415,7 @@ static void __x25_destroy_socket(struct sock *sk)
if (sk_has_allocations(sk)) {
/* Defer: outstanding buffers */
sk->sk_timer.expires = jiffies + 10 * HZ;
- sk->sk_timer.function = x25_destroy_timer;
- sk->sk_timer.data = (unsigned long)sk;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_destroy_timer;
add_timer(&sk->sk_timer);
} else {
/* drop last reference so sock_put will free */
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 997ff7b2509b..ad1734d36ed7 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -103,7 +103,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities,
*vc_fac_mask |= X25_MASK_REVERSE;
break;
}
-
+ /*fall through */
case X25_FAC_THROUGHPUT:
facilities->throughput = p[1];
*vc_fac_mask |= X25_MASK_THROUGHPUT;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 7ac50098a375..3c12cae32001 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -345,6 +345,7 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp
case X25_RESET_REQUEST:
x25_write_internal(sk, X25_RESET_CONFIRMATION);
+ /* fall through */
case X25_RESET_CONFIRMATION: {
x25_stop_timer(sk);
x25->condition = 0x00;
diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c
index 5c5db1a36399..1dfba3c23459 100644
--- a/net/x25/x25_timer.c
+++ b/net/x25/x25_timer.c
@@ -26,18 +26,17 @@
#include <net/tcp_states.h>
#include <net/x25.h>
-static void x25_heartbeat_expiry(unsigned long);
-static void x25_timer_expiry(unsigned long);
+static void x25_heartbeat_expiry(struct timer_list *t);
+static void x25_timer_expiry(struct timer_list *t);
void x25_init_timers(struct sock *sk)
{
struct x25_sock *x25 = x25_sk(sk);
- setup_timer(&x25->timer, x25_timer_expiry, (unsigned long)sk);
+ timer_setup(&x25->timer, x25_timer_expiry, 0);
/* initialized by sock_init_data */
- sk->sk_timer.data = (unsigned long)sk;
- sk->sk_timer.function = &x25_heartbeat_expiry;
+ sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_heartbeat_expiry;
}
void x25_start_heartbeat(struct sock *sk)
@@ -93,9 +92,9 @@ unsigned long x25_display_timer(struct sock *sk)
return x25->timer.expires - jiffies;
}
-static void x25_heartbeat_expiry(unsigned long param)
+static void x25_heartbeat_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct sock *sk = from_timer(sk, t, sk_timer);
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) /* can currently only occur in state 3 */
@@ -160,9 +159,10 @@ static inline void x25_do_timer_expiry(struct sock * sk)
}
}
-static void x25_timer_expiry(unsigned long param)
+static void x25_timer_expiry(struct timer_list *t)
{
- struct sock *sk = (struct sock *)param;
+ struct x25_sock *x25 = from_timer(x25, t, timer);
+ struct sock *sk = &x25->sk;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) { /* can currently only occur in state 3 */
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f06253969972..4838329bb43a 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -57,7 +57,7 @@ static __read_mostly seqcount_t xfrm_policy_hash_generation;
static void xfrm_init_pmtu(struct dst_entry *dst);
static int stale_bundle(struct dst_entry *dst);
static int xfrm_bundle_ok(struct xfrm_dst *xdst);
-static void xfrm_policy_queue_process(unsigned long arg);
+static void xfrm_policy_queue_process(struct timer_list *t);
static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
@@ -179,9 +179,9 @@ static inline unsigned long make_jiffies(long secs)
return secs*HZ;
}
-static void xfrm_policy_timer(unsigned long data)
+static void xfrm_policy_timer(struct timer_list *t)
{
- struct xfrm_policy *xp = (struct xfrm_policy *)data;
+ struct xfrm_policy *xp = from_timer(xp, t, timer);
unsigned long now = get_seconds();
long next = LONG_MAX;
int warn = 0;
@@ -267,10 +267,9 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
rwlock_init(&policy->lock);
refcount_set(&policy->refcnt, 1);
skb_queue_head_init(&policy->polq.hold_queue);
- setup_timer(&policy->timer, xfrm_policy_timer,
- (unsigned long)policy);
- setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
- (unsigned long)policy);
+ timer_setup(&policy->timer, xfrm_policy_timer, 0);
+ timer_setup(&policy->polq.hold_timer,
+ xfrm_policy_queue_process, 0);
}
return policy;
}
@@ -1852,12 +1851,12 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
return xdst;
}
-static void xfrm_policy_queue_process(unsigned long arg)
+static void xfrm_policy_queue_process(struct timer_list *t)
{
struct sk_buff *skb;
struct sock *sk;
struct dst_entry *dst;
- struct xfrm_policy *pol = (struct xfrm_policy *)arg;
+ struct xfrm_policy *pol = from_timer(pol, t, polq.hold_timer);
struct net *net = xp_net(pol);
struct xfrm_policy_queue *pq = &pol->polq;
struct flowi fl;
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index cf17c7932a6e..ea2b9e6135f3 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -39,6 +39,7 @@ hostprogs-y += per_socket_stats_example
hostprogs-y += load_sock_ops
hostprogs-y += xdp_redirect
hostprogs-y += xdp_redirect_map
+hostprogs-y += xdp_redirect_cpu
hostprogs-y += xdp_monitor
hostprogs-y += syscall_tp
@@ -84,6 +85,7 @@ test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
+xdp_redirect_cpu-objs := bpf_load.o $(LIBBPF) xdp_redirect_cpu_user.o
xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
@@ -127,8 +129,10 @@ always += tcp_bufs_kern.o
always += tcp_cong_kern.o
always += tcp_iw_kern.o
always += tcp_clamp_kern.o
+always += tcp_basertt_kern.o
always += xdp_redirect_kern.o
always += xdp_redirect_map_kern.o
+always += xdp_redirect_cpu_kern.o
always += xdp_monitor_kern.o
always += syscall_tp_kern.o
@@ -169,6 +173,7 @@ HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
HOSTLOADLIBES_test_map_in_map += -lelf
HOSTLOADLIBES_xdp_redirect += -lelf
HOSTLOADLIBES_xdp_redirect_map += -lelf
+HOSTLOADLIBES_xdp_redirect_cpu += -lelf
HOSTLOADLIBES_xdp_monitor += -lelf
HOSTLOADLIBES_syscall_tp += -lelf
@@ -177,6 +182,12 @@ HOSTLOADLIBES_syscall_tp += -lelf
LLC ?= llc
CLANG ?= clang
+# Detect that we're cross compiling and use the cross compiler
+ifdef CROSS_COMPILE
+HOSTCC = $(CROSS_COMPILE)gcc
+CLANG_ARCH_ARGS = -target $(ARCH)
+endif
+
# Trick to allow make to be run from this directory
all:
$(MAKE) -C ../../ $(CURDIR)/
@@ -224,9 +235,9 @@ $(obj)/tracex5_kern.o: $(obj)/syscall_nrs.h
$(obj)/%.o: $(src)/%.c
$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \
-I$(srctree)/tools/testing/selftests/bpf/ \
- -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \
- -Wno-compare-distinct-pointer-types \
+ -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
+ -D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \
-Wno-gnu-variable-sized-type-not-at-end \
-Wno-address-of-packed-member -Wno-tautological-compare \
- -Wno-unknown-warning-option \
+ -Wno-unknown-warning-option $(CLANG_ARCH_ARGS) \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
diff --git a/samples/bpf/README.rst b/samples/bpf/README.rst
index 79f9a58f1872..5f27e4faca50 100644
--- a/samples/bpf/README.rst
+++ b/samples/bpf/README.rst
@@ -64,3 +64,13 @@ It is also possible to point make to the newly compiled 'llc' or
'clang' command via redefining LLC or CLANG on the make command line::
make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
+
+Cross compiling samples
+-----------------------
+In order to cross-compile, say for arm64 targets, export CROSS_COMPILE and ARCH
+environment variables before calling make. This will direct make to build
+samples for the cross target.
+
+export ARCH=arm64
+export CROSS_COMPILE="aarch64-linux-gnu-"
+make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 6aa50098dfb8..18b1c8dd0391 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -221,6 +221,7 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps,
int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
map_fd[i] = bpf_create_map_in_map_node(maps[i].def.type,
+ maps[i].name,
maps[i].def.key_size,
inner_map_fd,
maps[i].def.max_entries,
@@ -228,6 +229,7 @@ static int load_maps(struct bpf_map_data *maps, int nr_maps,
numa_node);
} else {
map_fd[i] = bpf_create_map_node(maps[i].def.type,
+ maps[i].name,
maps[i].def.key_size,
maps[i].def.value_size,
maps[i].def.max_entries,
diff --git a/samples/bpf/cgroup_helpers.c b/samples/bpf/cgroup_helpers.c
index 9d1be9426401..88bdcf4b1670 100644
--- a/samples/bpf/cgroup_helpers.c
+++ b/samples/bpf/cgroup_helpers.c
@@ -56,7 +56,7 @@ int setup_cgroup_environment(void)
return 1;
}
- if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) {
+ if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
log_err("mount cgroup2");
return 1;
}
@@ -163,7 +163,7 @@ int create_and_get_cgroup(char *path)
format_cgroup_path(cgroup_path, path);
if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
- log_err("mkdiring cgroup");
+ log_err("mkdiring cgroup %s .. %s", path, cgroup_path);
return 0;
}
diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
index 098c857f1eda..2b2ffb97018b 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test_kern.c
@@ -266,7 +266,7 @@ int stress_hash_map_lookup(struct pt_regs *ctx)
return 0;
}
-SEC("kprobe/sys_getpgrp")
+SEC("kprobe/sys_getppid")
int stress_array_map_lookup(struct pt_regs *ctx)
{
u32 key = 1, i;
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index f388254896f6..519d9af4b04a 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -137,6 +137,7 @@ static void do_test_lru(enum test_type test, int cpu)
inner_lru_map_fds[cpu] =
bpf_create_map_node(BPF_MAP_TYPE_LRU_HASH,
+ test_map_names[INNER_LRU_HASH_PREALLOC],
sizeof(uint32_t),
sizeof(long),
inner_lru_hash_size, 0,
@@ -282,7 +283,7 @@ static void test_array_lookup(int cpu)
start_time = time_get_ns();
for (i = 0; i < max_cnt; i++)
- syscall(__NR_getpgrp, 0);
+ syscall(__NR_getppid, 0);
printf("%d:array_lookup %lld lookups per sec\n",
cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time));
}
diff --git a/samples/bpf/tcp_basertt_kern.c b/samples/bpf/tcp_basertt_kern.c
new file mode 100644
index 000000000000..4bf4fc597db9
--- /dev/null
+++ b/samples/bpf/tcp_basertt_kern.c
@@ -0,0 +1,78 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * BPF program to set base_rtt to 80us when host is running TCP-NV and
+ * both hosts are in the same datacenter (as determined by IPv6 prefix).
+ *
+ * Use load_sock_ops to load this BPF program.
+ */
+
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <linux/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define DEBUG 1
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sockops")
+int bpf_basertt(struct bpf_sock_ops *skops)
+{
+ char cong[20];
+ char nv[] = "nv";
+ int rv = 0, n;
+ int op;
+
+ op = (int) skops->op;
+
+#ifdef DEBUG
+ bpf_printk("BPF command: %d\n", op);
+#endif
+
+ /* Check if both hosts are in the same datacenter. For this
+ * example they are if the 1st 5.5 bytes in the IPv6 address
+ * are the same.
+ */
+ if (skops->family == AF_INET6 &&
+ skops->local_ip6[0] == skops->remote_ip6[0] &&
+ (bpf_ntohl(skops->local_ip6[1]) & 0xfff00000) ==
+ (bpf_ntohl(skops->remote_ip6[1]) & 0xfff00000)) {
+ switch (op) {
+ case BPF_SOCK_OPS_BASE_RTT:
+ n = bpf_getsockopt(skops, SOL_TCP, TCP_CONGESTION,
+ cong, sizeof(cong));
+ if (!n && !__builtin_memcmp(cong, nv, sizeof(nv)+1)) {
+ /* Set base_rtt to 80us */
+ rv = 80;
+ } else if (n) {
+ rv = n;
+ } else {
+ rv = -1;
+ }
+ break;
+ default:
+ rv = -1;
+ }
+ } else {
+ rv = -1;
+ }
+#ifdef DEBUG
+ bpf_printk("Returning %d\n", rv);
+#endif
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tcp_bbf.readme b/samples/bpf/tcp_bbf.readme
new file mode 100644
index 000000000000..831fb601e3c9
--- /dev/null
+++ b/samples/bpf/tcp_bbf.readme
@@ -0,0 +1,26 @@
+This file describes how to run the tcp_*_kern.o tcp_bpf (or socket_ops)
+programs. These programs attach to a cgroupv2. The following commands create
+a cgroupv2 and attach a bash shell to the group.
+
+ mkdir -p /tmp/cgroupv2
+ mount -t cgroup2 none /tmp/cgroupv2
+ mkdir -p /tmp/cgroupv2/foo
+ bash
+ echo $$ >> /tmp/cgroupv2/foo/cgroup.procs
+
+Anything that runs under this shell belongs to the foo cgroupv2 To load
+(attach) one of the tcp_*_kern.o programs:
+
+ ./load_sock_ops -l /tmp/cgroupv2/foo tcp_basertt_kern.o
+
+If the "-l" flag is used, the load_sock_ops program will continue to run
+printing the BPF log buffer. The tcp_*_kern.o programs use special print
+functions to print logging information (if enabled by the ifdef).
+
+If using netperf/netserver to create traffic, you need to run them under the
+cgroupv2 to which the BPF programs are attached (i.e. under bash shell
+attached to the cgroupv2).
+
+To remove (unattach) a socket_ops BPF program from a cgroupv2:
+
+ ./load_sock_ops -r /tmp/cgroupv2/foo
diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c
index 3049b1f26267..3e8232cc04a8 100644
--- a/samples/bpf/test_cgrp2_attach2.c
+++ b/samples/bpf/test_cgrp2_attach2.c
@@ -30,7 +30,7 @@
#define FOO "/foo"
#define BAR "/foo/bar/"
-#define PING_CMD "ping -c1 -w1 127.0.0.1"
+#define PING_CMD "ping -c1 -w1 127.0.0.1 > /dev/null"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
@@ -55,8 +55,7 @@ static int prog_load(int verdict)
return ret;
}
-
-int main(int argc, char **argv)
+static int test_foo_bar(void)
{
int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0;
@@ -189,8 +188,223 @@ out:
close(bar);
cleanup_cgroup_environment();
if (!rc)
- printf("PASS\n");
+ printf("### override:PASS\n");
else
- printf("FAIL\n");
+ printf("### override:FAIL\n");
return rc;
}
+
+static int map_fd = -1;
+
+static int prog_load_cnt(int verdict, int val)
+{
+ if (map_fd < 0)
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+ if (map_fd < 0) {
+ printf("failed to create map '%s'\n", strerror(errno));
+ return -1;
+ }
+
+ struct bpf_insn prog[] = {
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+ BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ int ret;
+
+ ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, insns_cnt, "GPL", 0,
+ bpf_log_buf, BPF_LOG_BUF_SIZE);
+
+ if (ret < 0) {
+ log_err("Loading program");
+ printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
+ return 0;
+ }
+ return ret;
+}
+
+
+static int test_multiprog(void)
+{
+ __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
+ int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
+ int drop_prog, allow_prog[6] = {}, rc = 0;
+ unsigned long long value;
+ int i = 0;
+
+ for (i = 0; i < 6; i++) {
+ allow_prog[i] = prog_load_cnt(1, 1 << i);
+ if (!allow_prog[i])
+ goto err;
+ }
+ drop_prog = prog_load_cnt(0, 1);
+ if (!drop_prog)
+ goto err;
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cg1 = create_and_get_cgroup("/cg1");
+ if (!cg1)
+ goto err;
+ cg2 = create_and_get_cgroup("/cg1/cg2");
+ if (!cg2)
+ goto err;
+ cg3 = create_and_get_cgroup("/cg1/cg2/cg3");
+ if (!cg3)
+ goto err;
+ cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4");
+ if (!cg4)
+ goto err;
+ cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5");
+ if (!cg5)
+ goto err;
+
+ if (join_cgroup("/cg1/cg2/cg3/cg4/cg5"))
+ goto err;
+
+ if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 2)) {
+ log_err("Attaching prog to cg1");
+ goto err;
+ }
+ if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 2)) {
+ log_err("Unexpected success attaching the same prog to cg1");
+ goto err;
+ }
+ if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, 2)) {
+ log_err("Attaching prog2 to cg1");
+ goto err;
+ }
+ if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, 1)) {
+ log_err("Attaching prog to cg2");
+ goto err;
+ }
+ if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, 2)) {
+ log_err("Attaching prog to cg3");
+ goto err;
+ }
+ if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, 1)) {
+ log_err("Attaching prog to cg4");
+ goto err;
+ }
+ if (bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0)) {
+ log_err("Attaching prog to cg5");
+ goto err;
+ }
+ assert(system(PING_CMD) == 0);
+ assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+ assert(value == 1 + 2 + 8 + 32);
+
+ /* query the number of effective progs in cg5 */
+ assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
+ NULL, NULL, &prog_cnt) == 0);
+ assert(prog_cnt == 4);
+ /* retrieve prog_ids of effective progs in cg5 */
+ assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
+ &attach_flags, prog_ids, &prog_cnt) == 0);
+ assert(prog_cnt == 4);
+ assert(attach_flags == 0);
+ saved_prog_id = prog_ids[0];
+ /* check enospc handling */
+ prog_ids[0] = 0;
+ prog_cnt = 2;
+ assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
+ &attach_flags, prog_ids, &prog_cnt) == -1 &&
+ errno == ENOSPC);
+ assert(prog_cnt == 4);
+ /* check that prog_ids are returned even when buffer is too small */
+ assert(prog_ids[0] == saved_prog_id);
+ /* retrieve prog_id of single attached prog in cg5 */
+ prog_ids[0] = 0;
+ assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
+ NULL, prog_ids, &prog_cnt) == 0);
+ assert(prog_cnt == 1);
+ assert(prog_ids[0] == saved_prog_id);
+
+ /* detach bottom program and ping again */
+ if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) {
+ log_err("Detaching prog from cg5");
+ goto err;
+ }
+ value = 0;
+ assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
+ assert(system(PING_CMD) == 0);
+ assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+ assert(value == 1 + 2 + 8 + 16);
+
+ /* detach 3rd from bottom program and ping again */
+ errno = 0;
+ if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) {
+ log_err("Unexpected success on detach from cg3");
+ goto err;
+ }
+ if (bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS)) {
+ log_err("Detaching from cg3");
+ goto err;
+ }
+ value = 0;
+ assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
+ assert(system(PING_CMD) == 0);
+ assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+ assert(value == 1 + 2 + 16);
+
+ /* detach 2nd from bottom program and ping again */
+ if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) {
+ log_err("Detaching prog from cg4");
+ goto err;
+ }
+ value = 0;
+ assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
+ assert(system(PING_CMD) == 0);
+ assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
+ assert(value == 1 + 2 + 4);
+
+ prog_cnt = 4;
+ assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
+ &attach_flags, prog_ids, &prog_cnt) == 0);
+ assert(prog_cnt == 3);
+ assert(attach_flags == 0);
+ assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
+ NULL, prog_ids, &prog_cnt) == 0);
+ assert(prog_cnt == 0);
+ goto out;
+err:
+ rc = 1;
+
+out:
+ for (i = 0; i < 6; i++)
+ if (allow_prog[i] > 0)
+ close(allow_prog[i]);
+ close(cg1);
+ close(cg2);
+ close(cg3);
+ close(cg4);
+ close(cg5);
+ cleanup_cgroup_environment();
+ if (!rc)
+ printf("### multi:PASS\n");
+ else
+ printf("### multi:FAIL\n");
+ return rc;
+}
+
+int main(int argc, char **argv)
+{
+ int rc = 0;
+
+ rc = test_foo_bar();
+ if (rc)
+ return rc;
+
+ return test_multiprog();
+}
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index 41b6115a32eb..a77a583d94d4 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -37,10 +37,14 @@ struct bpf_map_def SEC("maps") stackmap = {
SEC("perf_event")
int bpf_prog1(struct bpf_perf_event_data *ctx)
{
+ char time_fmt1[] = "Time Enabled: %llu, Time Running: %llu";
+ char time_fmt2[] = "Get Time Failed, ErrCode: %d";
char fmt[] = "CPU-%d period %lld ip %llx";
u32 cpu = bpf_get_smp_processor_id();
+ struct bpf_perf_event_value value_buf;
struct key_t key;
u64 *val, one = 1;
+ int ret;
if (ctx->sample_period < 10000)
/* ignore warmup */
@@ -54,6 +58,12 @@ int bpf_prog1(struct bpf_perf_event_data *ctx)
return 0;
}
+ ret = bpf_perf_prog_read_value(ctx, (void *)&value_buf, sizeof(struct bpf_perf_event_value));
+ if (!ret)
+ bpf_trace_printk(time_fmt1, sizeof(time_fmt1), value_buf.enabled, value_buf.running);
+ else
+ bpf_trace_printk(time_fmt2, sizeof(time_fmt2), ret);
+
val = bpf_map_lookup_elem(&counts, &key);
if (val)
(*val)++;
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index 7bd827b84a67..bf4f1b6d9a52 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -127,6 +127,9 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr)
int *pmu_fd = malloc(nr_cpus * sizeof(int));
int i, error = 0;
+ /* system wide perf event, no need to inherit */
+ attr->inherit = 0;
+
/* open perf_event on all cpus */
for (i = 0; i < nr_cpus; i++) {
pmu_fd[i] = sys_perf_event_open(attr, -1, i, -1, 0);
@@ -154,6 +157,11 @@ static void test_perf_event_task(struct perf_event_attr *attr)
{
int pmu_fd;
+ /* per task perf event, enable inherit so the "dd ..." command can be traced properly.
+ * Enabling inherit will cause bpf_perf_prog_read_time helper failure.
+ */
+ attr->inherit = 1;
+
/* open task bound event */
pmu_fd = sys_perf_event_open(attr, 0, -1, -1, 0);
if (pmu_fd < 0) {
@@ -175,14 +183,12 @@ static void test_bpf_perf_event(void)
.freq = 1,
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
- .inherit = 1,
};
struct perf_event_attr attr_type_sw = {
.sample_freq = SAMPLE_FREQ,
.freq = 1,
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_CPU_CLOCK,
- .inherit = 1,
};
struct perf_event_attr attr_hw_cache_l1d = {
.sample_freq = SAMPLE_FREQ,
@@ -192,7 +198,6 @@ static void test_bpf_perf_event(void)
PERF_COUNT_HW_CACHE_L1D |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
- .inherit = 1,
};
struct perf_event_attr attr_hw_cache_branch_miss = {
.sample_freq = SAMPLE_FREQ,
@@ -202,7 +207,6 @@ static void test_bpf_perf_event(void)
PERF_COUNT_HW_CACHE_BPU |
(PERF_COUNT_HW_CACHE_OP_READ << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
- .inherit = 1,
};
struct perf_event_attr attr_type_raw = {
.sample_freq = SAMPLE_FREQ,
@@ -210,7 +214,6 @@ static void test_bpf_perf_event(void)
.type = PERF_TYPE_RAW,
/* Intel Instruction Retired */
.config = 0xc0,
- .inherit = 1,
};
printf("Test HW_CPU_CYCLES\n");
diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c
index e7d180305974..46c557afac73 100644
--- a/samples/bpf/tracex6_kern.c
+++ b/samples/bpf/tracex6_kern.c
@@ -15,6 +15,12 @@ struct bpf_map_def SEC("maps") values = {
.value_size = sizeof(u64),
.max_entries = 64,
};
+struct bpf_map_def SEC("maps") values2 = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct bpf_perf_event_value),
+ .max_entries = 64,
+};
SEC("kprobe/htab_map_get_next_key")
int bpf_prog1(struct pt_regs *ctx)
@@ -37,5 +43,25 @@ int bpf_prog1(struct pt_regs *ctx)
return 0;
}
+SEC("kprobe/htab_map_lookup_elem")
+int bpf_prog2(struct pt_regs *ctx)
+{
+ u32 key = bpf_get_smp_processor_id();
+ struct bpf_perf_event_value *val, buf;
+ int error;
+
+ error = bpf_perf_event_read_value(&counters, key, &buf, sizeof(buf));
+ if (error)
+ return 0;
+
+ val = bpf_map_lookup_elem(&values2, &key);
+ if (val)
+ *val = buf;
+ else
+ bpf_map_update_elem(&values2, &key, &buf, BPF_NOEXIST);
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c
index a05a99a0752f..3341a96fc046 100644
--- a/samples/bpf/tracex6_user.c
+++ b/samples/bpf/tracex6_user.c
@@ -22,6 +22,7 @@
static void check_on_cpu(int cpu, struct perf_event_attr *attr)
{
+ struct bpf_perf_event_value value2;
int pmu_fd, error = 0;
cpu_set_t set;
__u64 value;
@@ -46,8 +47,18 @@ static void check_on_cpu(int cpu, struct perf_event_attr *attr)
fprintf(stderr, "Value missing for CPU %d\n", cpu);
error = 1;
goto on_exit;
+ } else {
+ fprintf(stderr, "CPU %d: %llu\n", cpu, value);
+ }
+ /* The above bpf_map_lookup_elem should trigger the second kprobe */
+ if (bpf_map_lookup_elem(map_fd[2], &cpu, &value2)) {
+ fprintf(stderr, "Value2 missing for CPU %d\n", cpu);
+ error = 1;
+ goto on_exit;
+ } else {
+ fprintf(stderr, "CPU %d: counter: %llu, enabled: %llu, running: %llu\n", cpu,
+ value2.counter, value2.enabled, value2.running);
}
- fprintf(stderr, "CPU %d: %llu\n", cpu, value);
on_exit:
assert(bpf_map_delete_elem(map_fd[0], &cpu) == 0 || error);
diff --git a/samples/bpf/xdp_monitor_kern.c b/samples/bpf/xdp_monitor_kern.c
index 74f3fd8ed729..2fe2f761a0d0 100644
--- a/samples/bpf/xdp_monitor_kern.c
+++ b/samples/bpf/xdp_monitor_kern.c
@@ -13,23 +13,27 @@ struct bpf_map_def SEC("maps") redirect_err_cnt = {
/* TODO: have entries for all possible errno's */
};
+#define XDP_UNKNOWN XDP_REDIRECT + 1
+struct bpf_map_def SEC("maps") exception_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u64),
+ .max_entries = XDP_UNKNOWN + 1,
+};
+
/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
* Code in: kernel/include/trace/events/xdp.h
*/
struct xdp_redirect_ctx {
- unsigned short common_type; // offset:0; size:2; signed:0;
- unsigned char common_flags; // offset:2; size:1; signed:0;
- unsigned char common_preempt_count;// offset:3; size:1; signed:0;
- int common_pid; // offset:4; size:4; signed:1;
-
- int prog_id; // offset:8; size:4; signed:1;
- u32 act; // offset:12 size:4; signed:0;
- int ifindex; // offset:16 size:4; signed:1;
- int err; // offset:20 size:4; signed:1;
- int to_ifindex; // offset:24 size:4; signed:1;
- u32 map_id; // offset:28 size:4; signed:0;
- int map_index; // offset:32 size:4; signed:1;
-}; // offset:36
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int prog_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12 size:4; signed:0;
+ int ifindex; // offset:16 size:4; signed:1;
+ int err; // offset:20 size:4; signed:1;
+ int to_ifindex; // offset:24 size:4; signed:1;
+ u32 map_id; // offset:28 size:4; signed:0;
+ int map_index; // offset:32 size:4; signed:1;
+}; // offset:36
enum {
XDP_REDIRECT_SUCCESS = 0,
@@ -48,7 +52,7 @@ int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
cnt = bpf_map_lookup_elem(&redirect_err_cnt, &key);
if (!cnt)
- return 0;
+ return 1;
*cnt += 1;
return 0; /* Indicate event was filtered (no further processing)*/
@@ -86,3 +90,31 @@ int trace_xdp_redirect_map(struct xdp_redirect_ctx *ctx)
{
return xdp_redirect_collect_stat(ctx);
}
+
+/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
+ * Code in: kernel/include/trace/events/xdp.h
+ */
+struct xdp_exception_ctx {
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int prog_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ int ifindex; // offset:16; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_exception")
+int trace_xdp_exception(struct xdp_exception_ctx *ctx)
+{
+ u64 *cnt;;
+ u32 key;
+
+ key = ctx->act;
+ if (key > XDP_REDIRECT)
+ key = XDP_UNKNOWN;
+
+ cnt = bpf_map_lookup_elem(&exception_cnt, &key);
+ if (!cnt)
+ return 1;
+ *cnt += 1;
+
+ return 0;
+}
diff --git a/samples/bpf/xdp_monitor_user.c b/samples/bpf/xdp_monitor_user.c
index b51b4f5e3257..eaba165b3549 100644
--- a/samples/bpf/xdp_monitor_user.c
+++ b/samples/bpf/xdp_monitor_user.c
@@ -20,6 +20,7 @@ static const char *__doc_err_only__=
#include <unistd.h>
#include <locale.h>
+#include <sys/resource.h>
#include <getopt.h>
#include <net/if.h>
#include <time.h>
@@ -61,7 +62,7 @@ static void usage(char *argv[])
}
#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
-__u64 gettime(void)
+static __u64 gettime(void)
{
struct timespec t;
int res;
@@ -89,6 +90,23 @@ static const char *err2str(int err)
return redir_names[err];
return NULL;
}
+/* enum xdp_action */
+#define XDP_UNKNOWN XDP_REDIRECT + 1
+#define XDP_ACTION_MAX (XDP_UNKNOWN + 1)
+static const char *xdp_action_names[XDP_ACTION_MAX] = {
+ [XDP_ABORTED] = "XDP_ABORTED",
+ [XDP_DROP] = "XDP_DROP",
+ [XDP_PASS] = "XDP_PASS",
+ [XDP_TX] = "XDP_TX",
+ [XDP_REDIRECT] = "XDP_REDIRECT",
+ [XDP_UNKNOWN] = "XDP_UNKNOWN",
+};
+static const char *action2str(int action)
+{
+ if (action < XDP_ACTION_MAX)
+ return xdp_action_names[action];
+ return NULL;
+}
struct record {
__u64 counter;
@@ -97,6 +115,7 @@ struct record {
struct stats_record {
struct record xdp_redir[REDIR_RES_MAX];
+ struct record xdp_exception[XDP_ACTION_MAX];
};
static void stats_print_headers(bool err_only)
@@ -104,39 +123,72 @@ static void stats_print_headers(bool err_only)
if (err_only)
printf("\n%s\n", __doc_err_only__);
- printf("%-14s %-10s %-18s %-9s\n",
- "XDP_REDIRECT", "pps ", "pps-human-readable", "measure-period");
+ printf("%-14s %-11s %-10s %-18s %-9s\n",
+ "ACTION", "result", "pps ", "pps-human-readable", "measure-period");
+}
+
+static double calc_period(struct record *r, struct record *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double) period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+static double calc_pps(struct record *r, struct record *p, double period)
+{
+ __u64 packets = 0;
+ double pps = 0;
+
+ if (period > 0) {
+ packets = r->counter - p->counter;
+ pps = packets / period;
+ }
+ return pps;
}
static void stats_print(struct stats_record *rec,
struct stats_record *prev,
bool err_only)
{
+ double period = 0, pps = 0;
+ struct record *r, *p;
int i = 0;
+ char *fmt = "%-14s %-11s %-10.0f %'-18.0f %f\n";
+
+ /* tracepoint: xdp:xdp_redirect_* */
if (err_only)
i = REDIR_ERROR;
for (; i < REDIR_RES_MAX; i++) {
- struct record *r = &rec->xdp_redir[i];
- struct record *p = &prev->xdp_redir[i];
- __u64 period = 0;
- __u64 packets = 0;
- double pps = 0;
- double period_ = 0;
+ r = &rec->xdp_redir[i];
+ p = &prev->xdp_redir[i];
if (p->timestamp) {
- packets = r->counter - p->counter;
- period = r->timestamp - p->timestamp;
- if (period > 0) {
- period_ = ((double) period / NANOSEC_PER_SEC);
- pps = packets / period_;
- }
+ period = calc_period(r, p);
+ pps = calc_pps(r, p, period);
}
+ printf(fmt, "XDP_REDIRECT", err2str(i), pps, pps, period);
+ }
- printf("%-14s %-10.0f %'-18.0f %f\n",
- err2str(i), pps, pps, period_);
+ /* tracepoint: xdp:xdp_exception */
+ for (i = 0; i < XDP_ACTION_MAX; i++) {
+ r = &rec->xdp_exception[i];
+ p = &prev->xdp_exception[i];
+ if (p->timestamp) {
+ period = calc_period(r, p);
+ pps = calc_pps(r, p, period);
+ }
+ if (pps > 0)
+ printf(fmt, action2str(i), "Exception",
+ pps, pps, period);
}
+ printf("\n");
}
static __u64 get_key32_value64_percpu(int fd, __u32 key)
@@ -160,25 +212,33 @@ static __u64 get_key32_value64_percpu(int fd, __u32 key)
return sum;
}
-static bool stats_collect(int fd, struct stats_record *rec)
+static bool stats_collect(struct stats_record *rec)
{
+ int fd;
int i;
/* TODO: Detect if someone unloaded the perf event_fd's, as
* this can happen by someone running perf-record -e
*/
+ fd = map_data[0].fd; /* map0: redirect_err_cnt */
for (i = 0; i < REDIR_RES_MAX; i++) {
rec->xdp_redir[i].timestamp = gettime();
rec->xdp_redir[i].counter = get_key32_value64_percpu(fd, i);
}
+
+ fd = map_data[1].fd; /* map1: exception_cnt */
+ for (i = 0; i < XDP_ACTION_MAX; i++) {
+ rec->xdp_exception[i].timestamp = gettime();
+ rec->xdp_exception[i].counter = get_key32_value64_percpu(fd, i);
+ }
+
return true;
}
static void stats_poll(int interval, bool err_only)
{
struct stats_record rec, prev;
- int map_fd;
memset(&rec, 0, sizeof(rec));
@@ -190,23 +250,24 @@ static void stats_poll(int interval, bool err_only)
printf("\n%s", __doc__);
/* TODO Need more advanced stats on error types */
- if (verbose)
- printf(" - Stats map: %s\n", map_data[0].name);
- map_fd = map_data[0].fd;
-
- stats_print_headers(err_only);
+ if (verbose) {
+ printf(" - Stats map0: %s\n", map_data[0].name);
+ printf(" - Stats map1: %s\n", map_data[1].name);
+ printf("\n");
+ }
fflush(stdout);
while (1) {
memcpy(&prev, &rec, sizeof(rec));
- stats_collect(map_fd, &rec);
+ stats_collect(&rec);
+ stats_print_headers(err_only);
stats_print(&rec, &prev, err_only);
fflush(stdout);
sleep(interval);
}
}
-void print_bpf_prog_info(void)
+static void print_bpf_prog_info(void)
{
int i;
@@ -235,6 +296,7 @@ void print_bpf_prog_info(void)
int main(int argc, char **argv)
{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
int longindex = 0, opt;
int ret = EXIT_SUCCESS;
char bpf_obj_file[256];
@@ -265,13 +327,18 @@ int main(int argc, char **argv)
}
}
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return EXIT_FAILURE;
+ }
+
if (load_bpf_file(bpf_obj_file)) {
printf("ERROR - bpf_log_buf: %s", bpf_log_buf);
- return 1;
+ return EXIT_FAILURE;
}
if (!prog_fd[0]) {
printf("ERROR - load_bpf_file: %s\n", strerror(errno));
- return 1;
+ return EXIT_FAILURE;
}
if (debug) {
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c
new file mode 100644
index 000000000000..303e9e7161f3
--- /dev/null
+++ b/samples/bpf/xdp_redirect_cpu_kern.c
@@ -0,0 +1,609 @@
+/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
+ *
+ * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+ */
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/if_vlan.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/ipv6.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/tcp.h>
+#include <uapi/linux/udp.h>
+
+#include <uapi/linux/bpf.h>
+#include "bpf_helpers.h"
+
+#define MAX_CPUS 12 /* WARNING - sync with _user.c */
+
+/* Special map type that can XDP_REDIRECT frames to another CPU */
+struct bpf_map_def SEC("maps") cpu_map = {
+ .type = BPF_MAP_TYPE_CPUMAP,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+ .max_entries = MAX_CPUS,
+};
+
+/* Common stats data record to keep userspace more simple */
+struct datarec {
+ __u64 processed;
+ __u64 dropped;
+ __u64 issue;
+};
+
+/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
+ * feedback. Redirect TX errors can be caught via a tracepoint.
+ */
+struct bpf_map_def SEC("maps") rx_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = 1,
+};
+
+/* Used by trace point */
+struct bpf_map_def SEC("maps") redirect_err_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = 2,
+ /* TODO: have entries for all possible errno's */
+};
+
+/* Used by trace point */
+struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = MAX_CPUS,
+};
+
+/* Used by trace point */
+struct bpf_map_def SEC("maps") cpumap_kthread_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = 1,
+};
+
+/* Set of maps controlling available CPU, and for iterating through
+ * selectable redirect CPUs.
+ */
+struct bpf_map_def SEC("maps") cpus_available = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+ .max_entries = MAX_CPUS,
+};
+struct bpf_map_def SEC("maps") cpus_count = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+ .max_entries = 1,
+};
+struct bpf_map_def SEC("maps") cpus_iterator = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(u32),
+ .max_entries = 1,
+};
+
+/* Used by trace point */
+struct bpf_map_def SEC("maps") exception_cnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(u32),
+ .value_size = sizeof(struct datarec),
+ .max_entries = 1,
+};
+
+/* Helper parse functions */
+
+/* Parse Ethernet layer 2, extract network layer 3 offset and protocol
+ *
+ * Returns false on error and non-supported ether-type
+ */
+struct vlan_hdr {
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+static __always_inline
+bool parse_eth(struct ethhdr *eth, void *data_end,
+ u16 *eth_proto, u64 *l3_offset)
+{
+ u16 eth_type;
+ u64 offset;
+
+ offset = sizeof(*eth);
+ if ((void *)eth + offset > data_end)
+ return false;
+
+ eth_type = eth->h_proto;
+
+ /* Skip non 802.3 Ethertypes */
+ if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
+ return false;
+
+ /* Handle VLAN tagged packet */
+ if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
+ struct vlan_hdr *vlan_hdr;
+
+ vlan_hdr = (void *)eth + offset;
+ offset += sizeof(*vlan_hdr);
+ if ((void *)eth + offset > data_end)
+ return false;
+ eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+ }
+ /* TODO: Handle double VLAN tagged packet */
+
+ *eth_proto = ntohs(eth_type);
+ *l3_offset = offset;
+ return true;
+}
+
+static __always_inline
+u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct iphdr *iph = data + nh_off;
+ struct udphdr *udph;
+ u16 dport;
+
+ if (iph + 1 > data_end)
+ return 0;
+ if (!(iph->protocol == IPPROTO_UDP))
+ return 0;
+
+ udph = (void *)(iph + 1);
+ if (udph + 1 > data_end)
+ return 0;
+
+ dport = ntohs(udph->dest);
+ return dport;
+}
+
+static __always_inline
+int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct iphdr *iph = data + nh_off;
+
+ if (iph + 1 > data_end)
+ return 0;
+ return iph->protocol;
+}
+
+static __always_inline
+int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ipv6hdr *ip6h = data + nh_off;
+
+ if (ip6h + 1 > data_end)
+ return 0;
+ return ip6h->nexthdr;
+}
+
+SEC("xdp_cpu_map0")
+int xdp_prognum0_no_touch(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct datarec *rec;
+ u32 *cpu_selected;
+ u32 cpu_dest;
+ u32 key = 0;
+
+ /* Only use first entry in cpus_available */
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
+ if (!cpu_selected)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_selected;
+
+ /* Count RX packet in map */
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_ABORTED;
+ rec->processed++;
+
+ if (cpu_dest >= MAX_CPUS) {
+ rec->issue++;
+ return XDP_ABORTED;
+ }
+
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp_cpu_map1_touch_data")
+int xdp_prognum1_touch_data(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ struct datarec *rec;
+ u32 *cpu_selected;
+ u32 cpu_dest;
+ u16 eth_type;
+ u32 key = 0;
+
+ /* Only use first entry in cpus_available */
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
+ if (!cpu_selected)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_selected;
+
+ /* Validate packet length is minimum Eth header size */
+ if (eth + 1 > data_end)
+ return XDP_ABORTED;
+
+ /* Count RX packet in map */
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_ABORTED;
+ rec->processed++;
+
+ /* Read packet data, and use it (drop non 802.3 Ethertypes) */
+ eth_type = eth->h_proto;
+ if (ntohs(eth_type) < ETH_P_802_3_MIN) {
+ rec->dropped++;
+ return XDP_DROP;
+ }
+
+ if (cpu_dest >= MAX_CPUS) {
+ rec->issue++;
+ return XDP_ABORTED;
+ }
+
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp_cpu_map2_round_robin")
+int xdp_prognum2_round_robin(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ struct datarec *rec;
+ u32 cpu_dest;
+ u32 *cpu_lookup;
+ u32 key0 = 0;
+
+ u32 *cpu_selected;
+ u32 *cpu_iterator;
+ u32 *cpu_max;
+ u32 cpu_idx;
+
+ cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
+ if (!cpu_max)
+ return XDP_ABORTED;
+
+ cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
+ if (!cpu_iterator)
+ return XDP_ABORTED;
+ cpu_idx = *cpu_iterator;
+
+ *cpu_iterator += 1;
+ if (*cpu_iterator == *cpu_max)
+ *cpu_iterator = 0;
+
+ cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_selected)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_selected;
+
+ /* Count RX packet in map */
+ rec = bpf_map_lookup_elem(&rx_cnt, &key0);
+ if (!rec)
+ return XDP_ABORTED;
+ rec->processed++;
+
+ if (cpu_dest >= MAX_CPUS) {
+ rec->issue++;
+ return XDP_ABORTED;
+ }
+
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp_cpu_map3_proto_separate")
+int xdp_prognum3_proto_separate(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u8 ip_proto = IPPROTO_UDP;
+ struct datarec *rec;
+ u16 eth_proto = 0;
+ u64 l3_offset = 0;
+ u32 cpu_dest = 0;
+ u32 cpu_idx = 0;
+ u32 *cpu_lookup;
+ u32 key = 0;
+
+ /* Count RX packet in map */
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_ABORTED;
+ rec->processed++;
+
+ if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+ return XDP_PASS; /* Just skip */
+
+ /* Extract L4 protocol */
+ switch (eth_proto) {
+ case ETH_P_IP:
+ ip_proto = get_proto_ipv4(ctx, l3_offset);
+ break;
+ case ETH_P_IPV6:
+ ip_proto = get_proto_ipv6(ctx, l3_offset);
+ break;
+ case ETH_P_ARP:
+ cpu_idx = 0; /* ARP packet handled on separate CPU */
+ break;
+ default:
+ cpu_idx = 0;
+ }
+
+ /* Choose CPU based on L4 protocol */
+ switch (ip_proto) {
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ cpu_idx = 2;
+ break;
+ case IPPROTO_TCP:
+ cpu_idx = 0;
+ break;
+ case IPPROTO_UDP:
+ cpu_idx = 1;
+ break;
+ default:
+ cpu_idx = 0;
+ }
+
+ cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_lookup)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_lookup;
+
+ if (cpu_dest >= MAX_CPUS) {
+ rec->issue++;
+ return XDP_ABORTED;
+ }
+
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp_cpu_map4_ddos_filter_pktgen")
+int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ u8 ip_proto = IPPROTO_UDP;
+ struct datarec *rec;
+ u16 eth_proto = 0;
+ u64 l3_offset = 0;
+ u32 cpu_dest = 0;
+ u32 cpu_idx = 0;
+ u16 dest_port;
+ u32 *cpu_lookup;
+ u32 key = 0;
+
+ /* Count RX packet in map */
+ rec = bpf_map_lookup_elem(&rx_cnt, &key);
+ if (!rec)
+ return XDP_ABORTED;
+ rec->processed++;
+
+ if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+ return XDP_PASS; /* Just skip */
+
+ /* Extract L4 protocol */
+ switch (eth_proto) {
+ case ETH_P_IP:
+ ip_proto = get_proto_ipv4(ctx, l3_offset);
+ break;
+ case ETH_P_IPV6:
+ ip_proto = get_proto_ipv6(ctx, l3_offset);
+ break;
+ case ETH_P_ARP:
+ cpu_idx = 0; /* ARP packet handled on separate CPU */
+ break;
+ default:
+ cpu_idx = 0;
+ }
+
+ /* Choose CPU based on L4 protocol */
+ switch (ip_proto) {
+ case IPPROTO_ICMP:
+ case IPPROTO_ICMPV6:
+ cpu_idx = 2;
+ break;
+ case IPPROTO_TCP:
+ cpu_idx = 0;
+ break;
+ case IPPROTO_UDP:
+ cpu_idx = 1;
+ /* DDoS filter UDP port 9 (pktgen) */
+ dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
+ if (dest_port == 9) {
+ if (rec)
+ rec->dropped++;
+ return XDP_DROP;
+ }
+ break;
+ default:
+ cpu_idx = 0;
+ }
+
+ cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+ if (!cpu_lookup)
+ return XDP_ABORTED;
+ cpu_dest = *cpu_lookup;
+
+ if (cpu_dest >= MAX_CPUS) {
+ rec->issue++;
+ return XDP_ABORTED;
+ }
+
+ return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+
+char _license[] SEC("license") = "GPL";
+
+/*** Trace point code ***/
+
+/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
+ * Code in: kernel/include/trace/events/xdp.h
+ */
+struct xdp_redirect_ctx {
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int prog_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12 size:4; signed:0;
+ int ifindex; // offset:16 size:4; signed:1;
+ int err; // offset:20 size:4; signed:1;
+ int to_ifindex; // offset:24 size:4; signed:1;
+ u32 map_id; // offset:28 size:4; signed:0;
+ int map_index; // offset:32 size:4; signed:1;
+}; // offset:36
+
+enum {
+ XDP_REDIRECT_SUCCESS = 0,
+ XDP_REDIRECT_ERROR = 1
+};
+
+static __always_inline
+int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
+{
+ u32 key = XDP_REDIRECT_ERROR;
+ struct datarec *rec;
+ int err = ctx->err;
+
+ if (!err)
+ key = XDP_REDIRECT_SUCCESS;
+
+ rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
+ if (!rec)
+ return 0;
+ rec->dropped += 1;
+
+ return 0; /* Indicate event was filtered (no further processing)*/
+ /*
+ * Returning 1 here would allow e.g. a perf-record tracepoint
+ * to see and record these events, but it doesn't work well
+ * in-practice as stopping perf-record also unload this
+ * bpf_prog. Plus, there is additional overhead of doing so.
+ */
+}
+
+SEC("tracepoint/xdp/xdp_redirect_err")
+int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
+{
+ return xdp_redirect_collect_stat(ctx);
+}
+
+SEC("tracepoint/xdp/xdp_redirect_map_err")
+int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
+{
+ return xdp_redirect_collect_stat(ctx);
+}
+
+/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
+ * Code in: kernel/include/trace/events/xdp.h
+ */
+struct xdp_exception_ctx {
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int prog_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ int ifindex; // offset:16; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_exception")
+int trace_xdp_exception(struct xdp_exception_ctx *ctx)
+{
+ struct datarec *rec;
+ u32 key = 0;
+
+ rec = bpf_map_lookup_elem(&exception_cnt, &key);
+ if (!rec)
+ return 1;
+ rec->dropped += 1;
+
+ return 0;
+}
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
+ * Code in: kernel/include/trace/events/xdp.h
+ */
+struct cpumap_enqueue_ctx {
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int map_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ int cpu; // offset:16; size:4; signed:1;
+ unsigned int drops; // offset:20; size:4; signed:0;
+ unsigned int processed; // offset:24; size:4; signed:0;
+ int to_cpu; // offset:28; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_enqueue")
+int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
+{
+ u32 to_cpu = ctx->to_cpu;
+ struct datarec *rec;
+
+ if (to_cpu >= MAX_CPUS)
+ return 1;
+
+ rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
+ if (!rec)
+ return 0;
+ rec->processed += ctx->processed;
+ rec->dropped += ctx->drops;
+
+ /* Record bulk events, then userspace can calc average bulk size */
+ if (ctx->processed > 0)
+ rec->issue += 1;
+
+ /* Inception: It's possible to detect overload situations, via
+ * this tracepoint. This can be used for creating a feedback
+ * loop to XDP, which can take appropriate actions to mitigate
+ * this overload situation.
+ */
+ return 0;
+}
+
+/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
+ * Code in: kernel/include/trace/events/xdp.h
+ */
+struct cpumap_kthread_ctx {
+ u64 __pad; // First 8 bytes are not accessible by bpf code
+ int map_id; // offset:8; size:4; signed:1;
+ u32 act; // offset:12; size:4; signed:0;
+ int cpu; // offset:16; size:4; signed:1;
+ unsigned int drops; // offset:20; size:4; signed:0;
+ unsigned int processed; // offset:24; size:4; signed:0;
+ int sched; // offset:28; size:4; signed:1;
+};
+
+SEC("tracepoint/xdp/xdp_cpumap_kthread")
+int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
+{
+ struct datarec *rec;
+ u32 key = 0;
+
+ rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
+ if (!rec)
+ return 0;
+ rec->processed += ctx->processed;
+ rec->dropped += ctx->drops;
+
+ /* Count times kthread yielded CPU via schedule call */
+ if (ctx->sched)
+ rec->issue++;
+
+ return 0;
+}
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
new file mode 100644
index 000000000000..35fec9fecb57
--- /dev/null
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -0,0 +1,697 @@
+/* GPLv2 Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+ */
+static const char *__doc__ =
+ " XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\"";
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <locale.h>
+#include <sys/resource.h>
+#include <getopt.h>
+#include <net/if.h>
+#include <time.h>
+
+#include <arpa/inet.h>
+#include <linux/if_link.h>
+
+#define MAX_CPUS 12 /* WARNING - sync with _kern.c */
+
+/* How many xdp_progs are defined in _kern.c */
+#define MAX_PROG 5
+
+/* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead
+ * use bpf/libbpf.h), but cannot as (currently) needed for XDP
+ * attaching to a device via set_link_xdp_fd()
+ */
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#include "bpf_util.h"
+
+static int ifindex = -1;
+static char ifname_buf[IF_NAMESIZE];
+static char *ifname;
+
+static __u32 xdp_flags;
+
+/* Exit return codes */
+#define EXIT_OK 0
+#define EXIT_FAIL 1
+#define EXIT_FAIL_OPTION 2
+#define EXIT_FAIL_XDP 3
+#define EXIT_FAIL_BPF 4
+#define EXIT_FAIL_MEM 5
+
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"dev", required_argument, NULL, 'd' },
+ {"skb-mode", no_argument, NULL, 'S' },
+ {"debug", no_argument, NULL, 'D' },
+ {"sec", required_argument, NULL, 's' },
+ {"prognum", required_argument, NULL, 'p' },
+ {"qsize", required_argument, NULL, 'q' },
+ {"cpu", required_argument, NULL, 'c' },
+ {"stress-mode", no_argument, NULL, 'x' },
+ {"no-separators", no_argument, NULL, 'z' },
+ {0, 0, NULL, 0 }
+};
+
+static void int_exit(int sig)
+{
+ fprintf(stderr,
+ "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
+ ifindex, ifname);
+ if (ifindex > -1)
+ set_link_xdp_fd(ifindex, -1, xdp_flags);
+ exit(EXIT_OK);
+}
+
+static void usage(char *argv[])
+{
+ int i;
+
+ printf("\nDOCUMENTATION:\n%s\n", __doc__);
+ printf("\n");
+ printf(" Usage: %s (options-see-below)\n", argv[0]);
+ printf(" Listing options:\n");
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-12s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value:%d)",
+ *long_options[i].flag);
+ else
+ printf(" short-option: -%c",
+ long_options[i].val);
+ printf("\n");
+ }
+ printf("\n");
+}
+
+/* gettime returns the current time of day in nanoseconds.
+ * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC)
+ * clock_gettime (ns) => 9ns (CLOCK_MONOTONIC_COARSE)
+ */
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+static __u64 gettime(void)
+{
+ struct timespec t;
+ int res;
+
+ res = clock_gettime(CLOCK_MONOTONIC, &t);
+ if (res < 0) {
+ fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
+ exit(EXIT_FAIL);
+ }
+ return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+/* Common stats data record shared with _kern.c */
+struct datarec {
+ __u64 processed;
+ __u64 dropped;
+ __u64 issue;
+};
+struct record {
+ __u64 timestamp;
+ struct datarec total;
+ struct datarec *cpu;
+};
+struct stats_record {
+ struct record rx_cnt;
+ struct record redir_err;
+ struct record kthread;
+ struct record exception;
+ struct record enq[MAX_CPUS];
+};
+
+static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
+{
+ /* For percpu maps, userspace gets a value per possible CPU */
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct datarec values[nr_cpus];
+ __u64 sum_processed = 0;
+ __u64 sum_dropped = 0;
+ __u64 sum_issue = 0;
+ int i;
+
+ if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
+ fprintf(stderr,
+ "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
+ return false;
+ }
+ /* Get time as close as possible to reading map contents */
+ rec->timestamp = gettime();
+
+ /* Record and sum values from each CPU */
+ for (i = 0; i < nr_cpus; i++) {
+ rec->cpu[i].processed = values[i].processed;
+ sum_processed += values[i].processed;
+ rec->cpu[i].dropped = values[i].dropped;
+ sum_dropped += values[i].dropped;
+ rec->cpu[i].issue = values[i].issue;
+ sum_issue += values[i].issue;
+ }
+ rec->total.processed = sum_processed;
+ rec->total.dropped = sum_dropped;
+ rec->total.issue = sum_issue;
+ return true;
+}
+
+static struct datarec *alloc_record_per_cpu(void)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct datarec *array;
+ size_t size;
+
+ size = sizeof(struct datarec) * nr_cpus;
+ array = malloc(size);
+ memset(array, 0, size);
+ if (!array) {
+ fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
+ exit(EXIT_FAIL_MEM);
+ }
+ return array;
+}
+
+static struct stats_record *alloc_stats_record(void)
+{
+ struct stats_record *rec;
+ int i;
+
+ rec = malloc(sizeof(*rec));
+ memset(rec, 0, sizeof(*rec));
+ if (!rec) {
+ fprintf(stderr, "Mem alloc error\n");
+ exit(EXIT_FAIL_MEM);
+ }
+ rec->rx_cnt.cpu = alloc_record_per_cpu();
+ rec->redir_err.cpu = alloc_record_per_cpu();
+ rec->kthread.cpu = alloc_record_per_cpu();
+ rec->exception.cpu = alloc_record_per_cpu();
+ for (i = 0; i < MAX_CPUS; i++)
+ rec->enq[i].cpu = alloc_record_per_cpu();
+
+ return rec;
+}
+
+static void free_stats_record(struct stats_record *r)
+{
+ int i;
+
+ for (i = 0; i < MAX_CPUS; i++)
+ free(r->enq[i].cpu);
+ free(r->exception.cpu);
+ free(r->kthread.cpu);
+ free(r->redir_err.cpu);
+ free(r->rx_cnt.cpu);
+ free(r);
+}
+
+static double calc_period(struct record *r, struct record *p)
+{
+ double period_ = 0;
+ __u64 period = 0;
+
+ period = r->timestamp - p->timestamp;
+ if (period > 0)
+ period_ = ((double) period / NANOSEC_PER_SEC);
+
+ return period_;
+}
+
+static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->processed - p->processed;
+ pps = packets / period_;
+ }
+ return pps;
+}
+
+static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->dropped - p->dropped;
+ pps = packets / period_;
+ }
+ return pps;
+}
+
+static __u64 calc_errs_pps(struct datarec *r,
+ struct datarec *p, double period_)
+{
+ __u64 packets = 0;
+ __u64 pps = 0;
+
+ if (period_ > 0) {
+ packets = r->issue - p->issue;
+ pps = packets / period_;
+ }
+ return pps;
+}
+
+static void stats_print(struct stats_record *stats_rec,
+ struct stats_record *stats_prev,
+ int prog_num)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ double pps = 0, drop = 0, err = 0;
+ struct record *rec, *prev;
+ int to_cpu;
+ double t;
+ int i;
+
+ /* Header */
+ printf("Running XDP/eBPF prog_num:%d\n", prog_num);
+ printf("%-15s %-7s %-14s %-11s %-9s\n",
+ "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info");
+
+ /* XDP rx_cnt */
+ {
+ char *fmt_rx = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
+ char *fm2_rx = "%-15s %-7s %'-14.0f %'-11.0f\n";
+ char *errstr = "";
+
+ rec = &stats_rec->rx_cnt;
+ prev = &stats_prev->rx_cnt;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (err > 0)
+ errstr = "cpu-dest/err";
+ if (pps > 0)
+ printf(fmt_rx, "XDP-RX",
+ i, pps, drop, err, errstr);
+ }
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+ printf(fm2_rx, "XDP-RX", "total", pps, drop);
+ }
+
+ /* cpumap enqueue stats */
+ for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
+ char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
+ char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
+ char *errstr = "";
+
+ rec = &stats_rec->enq[to_cpu];
+ prev = &stats_prev->enq[to_cpu];
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (err > 0) {
+ errstr = "bulk-average";
+ err = pps / err; /* calc average bulk size */
+ }
+ if (pps > 0)
+ printf(fmt, "cpumap-enqueue",
+ i, to_cpu, pps, drop, err, errstr);
+ }
+ pps = calc_pps(&rec->total, &prev->total, t);
+ if (pps > 0) {
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+ if (err > 0) {
+ errstr = "bulk-average";
+ err = pps / err; /* calc average bulk size */
+ }
+ printf(fm2, "cpumap-enqueue",
+ "sum", to_cpu, pps, drop, err, errstr);
+ }
+ }
+
+ /* cpumap kthread stats */
+ {
+ char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
+ char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n";
+ char *e_str = "";
+
+ rec = &stats_rec->kthread;
+ prev = &stats_prev->kthread;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ err = calc_errs_pps(r, p, t);
+ if (err > 0)
+ e_str = "sched";
+ if (pps > 0)
+ printf(fmt_k, "cpumap_kthread",
+ i, pps, drop, err, e_str);
+ }
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ err = calc_errs_pps(&rec->total, &prev->total, t);
+ if (err > 0)
+ e_str = "sched-sum";
+ printf(fm2_k, "cpumap_kthread", "total", pps, drop, err, e_str);
+ }
+
+ /* XDP redirect err tracepoints (very unlikely) */
+ {
+ char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
+ char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
+
+ rec = &stats_rec->redir_err;
+ prev = &stats_prev->redir_err;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ if (pps > 0)
+ printf(fmt_err, "redirect_err", i, pps, drop);
+ }
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ printf(fm2_err, "redirect_err", "total", pps, drop);
+ }
+
+ /* XDP general exception tracepoints */
+ {
+ char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
+ char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
+
+ rec = &stats_rec->exception;
+ prev = &stats_prev->exception;
+ t = calc_period(rec, prev);
+ for (i = 0; i < nr_cpus; i++) {
+ struct datarec *r = &rec->cpu[i];
+ struct datarec *p = &prev->cpu[i];
+
+ pps = calc_pps(r, p, t);
+ drop = calc_drop_pps(r, p, t);
+ if (pps > 0)
+ printf(fmt_err, "xdp_exception", i, pps, drop);
+ }
+ pps = calc_pps(&rec->total, &prev->total, t);
+ drop = calc_drop_pps(&rec->total, &prev->total, t);
+ printf(fm2_err, "xdp_exception", "total", pps, drop);
+ }
+
+ printf("\n");
+ fflush(stdout);
+}
+
+static void stats_collect(struct stats_record *rec)
+{
+ int fd, i;
+
+ fd = map_fd[1]; /* map: rx_cnt */
+ map_collect_percpu(fd, 0, &rec->rx_cnt);
+
+ fd = map_fd[2]; /* map: redirect_err_cnt */
+ map_collect_percpu(fd, 1, &rec->redir_err);
+
+ fd = map_fd[3]; /* map: cpumap_enqueue_cnt */
+ for (i = 0; i < MAX_CPUS; i++)
+ map_collect_percpu(fd, i, &rec->enq[i]);
+
+ fd = map_fd[4]; /* map: cpumap_kthread_cnt */
+ map_collect_percpu(fd, 0, &rec->kthread);
+
+ fd = map_fd[8]; /* map: exception_cnt */
+ map_collect_percpu(fd, 0, &rec->exception);
+}
+
+
+/* Pointer swap trick */
+static inline void swap(struct stats_record **a, struct stats_record **b)
+{
+ struct stats_record *tmp;
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+}
+
+static int create_cpu_entry(__u32 cpu, __u32 queue_size,
+ __u32 avail_idx, bool new)
+{
+ __u32 curr_cpus_count = 0;
+ __u32 key = 0;
+ int ret;
+
+ /* Add a CPU entry to cpumap, as this allocate a cpu entry in
+ * the kernel for the cpu.
+ */
+ ret = bpf_map_update_elem(map_fd[0], &cpu, &queue_size, 0);
+ if (ret) {
+ fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
+ exit(EXIT_FAIL_BPF);
+ }
+
+ /* Inform bpf_prog's that a new CPU is available to select
+ * from via some control maps.
+ */
+ /* map_fd[5] = cpus_available */
+ ret = bpf_map_update_elem(map_fd[5], &avail_idx, &cpu, 0);
+ if (ret) {
+ fprintf(stderr, "Add to avail CPUs failed\n");
+ exit(EXIT_FAIL_BPF);
+ }
+
+ /* When not replacing/updating existing entry, bump the count */
+ /* map_fd[6] = cpus_count */
+ ret = bpf_map_lookup_elem(map_fd[6], &key, &curr_cpus_count);
+ if (ret) {
+ fprintf(stderr, "Failed reading curr cpus_count\n");
+ exit(EXIT_FAIL_BPF);
+ }
+ if (new) {
+ curr_cpus_count++;
+ ret = bpf_map_update_elem(map_fd[6], &key, &curr_cpus_count, 0);
+ if (ret) {
+ fprintf(stderr, "Failed write curr cpus_count\n");
+ exit(EXIT_FAIL_BPF);
+ }
+ }
+ /* map_fd[7] = cpus_iterator */
+ printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n",
+ new ? "Add-new":"Replace", cpu, avail_idx,
+ queue_size, curr_cpus_count);
+
+ return 0;
+}
+
+/* CPUs are zero-indexed. Thus, add a special sentinel default value
+ * in map cpus_available to mark CPU index'es not configured
+ */
+static void mark_cpus_unavailable(void)
+{
+ __u32 invalid_cpu = MAX_CPUS;
+ int ret, i;
+
+ for (i = 0; i < MAX_CPUS; i++) {
+ /* map_fd[5] = cpus_available */
+ ret = bpf_map_update_elem(map_fd[5], &i, &invalid_cpu, 0);
+ if (ret) {
+ fprintf(stderr, "Failed marking CPU unavailable\n");
+ exit(EXIT_FAIL_BPF);
+ }
+ }
+}
+
+/* Stress cpumap management code by concurrently changing underlying cpumap */
+static void stress_cpumap(void)
+{
+ /* Changing qsize will cause kernel to free and alloc a new
+ * bpf_cpu_map_entry, with an associated/complicated tear-down
+ * procedure.
+ */
+ create_cpu_entry(1, 1024, 0, false);
+ create_cpu_entry(1, 128, 0, false);
+ create_cpu_entry(1, 16000, 0, false);
+}
+
+static void stats_poll(int interval, bool use_separators, int prog_num,
+ bool stress_mode)
+{
+ struct stats_record *record, *prev;
+
+ record = alloc_stats_record();
+ prev = alloc_stats_record();
+ stats_collect(record);
+
+ /* Trick to pretty printf with thousands separators use %' */
+ if (use_separators)
+ setlocale(LC_NUMERIC, "en_US");
+
+ while (1) {
+ swap(&prev, &record);
+ stats_collect(record);
+ stats_print(record, prev, prog_num);
+ sleep(interval);
+ if (stress_mode)
+ stress_cpumap();
+ }
+
+ free_stats_record(record);
+ free_stats_record(prev);
+}
+
+int main(int argc, char **argv)
+{
+ struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
+ bool use_separators = true;
+ bool stress_mode = false;
+ char filename[256];
+ bool debug = false;
+ int added_cpus = 0;
+ int longindex = 0;
+ int interval = 2;
+ int prog_num = 0;
+ int add_cpu = -1;
+ __u32 qsize;
+ int opt;
+
+ /* Notice: choosing he queue size is very important with the
+ * ixgbe driver, because it's driver page recycling trick is
+ * dependend on pages being returned quickly. The number of
+ * out-standing packets in the system must be less-than 2x
+ * RX-ring size.
+ */
+ qsize = 128+64;
+
+ snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
+ if (load_bpf_file(filename)) {
+ fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf);
+ return EXIT_FAIL;
+ }
+
+ if (!prog_fd[0]) {
+ fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno));
+ return EXIT_FAIL;
+ }
+
+ mark_cpus_unavailable();
+
+ /* Parse commands line args */
+ while ((opt = getopt_long(argc, argv, "hSd:",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 'd':
+ if (strlen(optarg) >= IF_NAMESIZE) {
+ fprintf(stderr, "ERR: --dev name too long\n");
+ goto error;
+ }
+ ifname = (char *)&ifname_buf;
+ strncpy(ifname, optarg, IF_NAMESIZE);
+ ifindex = if_nametoindex(ifname);
+ if (ifindex == 0) {
+ fprintf(stderr,
+ "ERR: --dev name unknown err(%d):%s\n",
+ errno, strerror(errno));
+ goto error;
+ }
+ break;
+ case 's':
+ interval = atoi(optarg);
+ break;
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'D':
+ debug = true;
+ break;
+ case 'x':
+ stress_mode = true;
+ break;
+ case 'z':
+ use_separators = false;
+ break;
+ case 'p':
+ /* Selecting eBPF prog to load */
+ prog_num = atoi(optarg);
+ if (prog_num < 0 || prog_num >= MAX_PROG) {
+ fprintf(stderr,
+ "--prognum too large err(%d):%s\n",
+ errno, strerror(errno));
+ goto error;
+ }
+ break;
+ case 'c':
+ /* Add multiple CPUs */
+ add_cpu = strtoul(optarg, NULL, 0);
+ if (add_cpu >= MAX_CPUS) {
+ fprintf(stderr,
+ "--cpu nr too large for cpumap err(%d):%s\n",
+ errno, strerror(errno));
+ goto error;
+ }
+ create_cpu_entry(add_cpu, qsize, added_cpus, true);
+ added_cpus++;
+ break;
+ case 'q':
+ qsize = atoi(optarg);
+ break;
+ case 'h':
+ error:
+ default:
+ usage(argv);
+ return EXIT_FAIL_OPTION;
+ }
+ }
+ /* Required option */
+ if (ifindex == -1) {
+ fprintf(stderr, "ERR: required option --dev missing\n");
+ usage(argv);
+ return EXIT_FAIL_OPTION;
+ }
+ /* Required option */
+ if (add_cpu == -1) {
+ fprintf(stderr, "ERR: required option --cpu missing\n");
+ fprintf(stderr, " Specify multiple --cpu option to add more\n");
+ usage(argv);
+ return EXIT_FAIL_OPTION;
+ }
+
+ /* Remove XDP program when program is interrupted */
+ signal(SIGINT, int_exit);
+
+ if (set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) {
+ fprintf(stderr, "link set xdp fd failed\n");
+ return EXIT_FAIL_XDP;
+ }
+
+ if (debug) {
+ printf("Debug-mode reading trace pipe (fix #define DEBUG)\n");
+ read_trace_pipe();
+ }
+
+ stats_poll(interval, use_separators, prog_num, stress_mode);
+ return EXIT_OK;
+}
diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c
index 812657ab5aa3..c6ad9b1585a1 100644
--- a/scripts/mod/devicetable-offsets.c
+++ b/scripts/mod/devicetable-offsets.c
@@ -207,5 +207,12 @@ int main(void)
DEVID_FIELD(fsl_mc_device_id, vendor);
DEVID_FIELD(fsl_mc_device_id, obj_type);
+ DEVID(tb_service_id);
+ DEVID_FIELD(tb_service_id, match_flags);
+ DEVID_FIELD(tb_service_id, protocol_key);
+ DEVID_FIELD(tb_service_id, protocol_id);
+ DEVID_FIELD(tb_service_id, protocol_version);
+ DEVID_FIELD(tb_service_id, protocol_revision);
+
return 0;
}
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index bc25898f6df0..2b9395501d81 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -1305,6 +1305,31 @@ static int do_fsl_mc_entry(const char *filename, void *symval,
}
ADD_TO_DEVTABLE("fslmc", fsl_mc_device_id, do_fsl_mc_entry);
+/* Looks like: tbsvc:kSpNvNrN */
+static int do_tbsvc_entry(const char *filename, void *symval, char *alias)
+{
+ DEF_FIELD(symval, tb_service_id, match_flags);
+ DEF_FIELD_ADDR(symval, tb_service_id, protocol_key);
+ DEF_FIELD(symval, tb_service_id, protocol_id);
+ DEF_FIELD(symval, tb_service_id, protocol_version);
+ DEF_FIELD(symval, tb_service_id, protocol_revision);
+
+ strcpy(alias, "tbsvc:");
+ if (match_flags & TBSVC_MATCH_PROTOCOL_KEY)
+ sprintf(alias + strlen(alias), "k%s", *protocol_key);
+ else
+ strcat(alias + strlen(alias), "k*");
+ ADD(alias, "p", match_flags & TBSVC_MATCH_PROTOCOL_ID, protocol_id);
+ ADD(alias, "v", match_flags & TBSVC_MATCH_PROTOCOL_VERSION,
+ protocol_version);
+ ADD(alias, "r", match_flags & TBSVC_MATCH_PROTOCOL_REVISION,
+ protocol_revision);
+
+ add_wildcard(alias);
+ return 1;
+}
+ADD_TO_DEVTABLE("tbsvc", tb_service_id, do_tbsvc_entry);
+
/* Does namelen bytes of name exactly match the symbol? */
static bool sym_is(const char *name, unsigned namelen, const char *symbol)
{
diff --git a/security/security.c b/security/security.c
index 4bf0f571b4ef..1cd8526cb0b7 100644
--- a/security/security.c
+++ b/security/security.c
@@ -12,6 +12,7 @@
* (at your option) any later version.
*/
+#include <linux/bpf.h>
#include <linux/capability.h>
#include <linux/dcache.h>
#include <linux/module.h>
@@ -1703,3 +1704,34 @@ int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule,
actx);
}
#endif /* CONFIG_AUDIT */
+
+#ifdef CONFIG_BPF_SYSCALL
+int security_bpf(int cmd, union bpf_attr *attr, unsigned int size)
+{
+ return call_int_hook(bpf, 0, cmd, attr, size);
+}
+int security_bpf_map(struct bpf_map *map, fmode_t fmode)
+{
+ return call_int_hook(bpf_map, 0, map, fmode);
+}
+int security_bpf_prog(struct bpf_prog *prog)
+{
+ return call_int_hook(bpf_prog, 0, prog);
+}
+int security_bpf_map_alloc(struct bpf_map *map)
+{
+ return call_int_hook(bpf_map_alloc_security, 0, map);
+}
+int security_bpf_prog_alloc(struct bpf_prog_aux *aux)
+{
+ return call_int_hook(bpf_prog_alloc_security, 0, aux);
+}
+void security_bpf_map_free(struct bpf_map *map)
+{
+ call_void_hook(bpf_map_free_security, map);
+}
+void security_bpf_prog_free(struct bpf_prog_aux *aux)
+{
+ call_void_hook(bpf_prog_free_security, aux);
+}
+#endif /* CONFIG_BPF_SYSCALL */
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index f5d304736852..2e3a627fc0b1 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -85,6 +85,7 @@
#include <linux/export.h>
#include <linux/msg.h>
#include <linux/shm.h>
+#include <linux/bpf.h>
#include "avc.h"
#include "objsec.h"
@@ -1814,6 +1815,10 @@ static inline int file_path_has_perm(const struct cred *cred,
return inode_has_perm(cred, file_inode(file), av, &ad);
}
+#ifdef CONFIG_BPF_SYSCALL
+static int bpf_fd_pass(struct file *file, u32 sid);
+#endif
+
/* Check whether a task can use an open file descriptor to
access an inode in a given way. Check access to the
descriptor itself, and then use dentry_has_perm to
@@ -1844,6 +1849,12 @@ static int file_has_perm(const struct cred *cred,
goto out;
}
+#ifdef CONFIG_BPF_SYSCALL
+ rc = bpf_fd_pass(file, cred_sid(cred));
+ if (rc)
+ return rc;
+#endif
+
/* av is zero if only checking access to the descriptor. */
rc = 0;
if (av)
@@ -2164,6 +2175,12 @@ static int selinux_binder_transfer_file(struct task_struct *from,
return rc;
}
+#ifdef CONFIG_BPF_SYSCALL
+ rc = bpf_fd_pass(file, sid);
+ if (rc)
+ return rc;
+#endif
+
if (unlikely(IS_PRIVATE(d_backing_inode(dentry))))
return 0;
@@ -6252,6 +6269,139 @@ static void selinux_ib_free_security(void *ib_sec)
}
#endif
+#ifdef CONFIG_BPF_SYSCALL
+static int selinux_bpf(int cmd, union bpf_attr *attr,
+ unsigned int size)
+{
+ u32 sid = current_sid();
+ int ret;
+
+ switch (cmd) {
+ case BPF_MAP_CREATE:
+ ret = avc_has_perm(sid, sid, SECCLASS_BPF, BPF__MAP_CREATE,
+ NULL);
+ break;
+ case BPF_PROG_LOAD:
+ ret = avc_has_perm(sid, sid, SECCLASS_BPF, BPF__PROG_LOAD,
+ NULL);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+static u32 bpf_map_fmode_to_av(fmode_t fmode)
+{
+ u32 av = 0;
+
+ if (fmode & FMODE_READ)
+ av |= BPF__MAP_READ;
+ if (fmode & FMODE_WRITE)
+ av |= BPF__MAP_WRITE;
+ return av;
+}
+
+/* This function will check the file pass through unix socket or binder to see
+ * if it is a bpf related object. And apply correspinding checks on the bpf
+ * object based on the type. The bpf maps and programs, not like other files and
+ * socket, are using a shared anonymous inode inside the kernel as their inode.
+ * So checking that inode cannot identify if the process have privilege to
+ * access the bpf object and that's why we have to add this additional check in
+ * selinux_file_receive and selinux_binder_transfer_files.
+ */
+static int bpf_fd_pass(struct file *file, u32 sid)
+{
+ struct bpf_security_struct *bpfsec;
+ struct bpf_prog *prog;
+ struct bpf_map *map;
+ int ret;
+
+ if (file->f_op == &bpf_map_fops) {
+ map = file->private_data;
+ bpfsec = map->security;
+ ret = avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF,
+ bpf_map_fmode_to_av(file->f_mode), NULL);
+ if (ret)
+ return ret;
+ } else if (file->f_op == &bpf_prog_fops) {
+ prog = file->private_data;
+ bpfsec = prog->aux->security;
+ ret = avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF,
+ BPF__PROG_RUN, NULL);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int selinux_bpf_map(struct bpf_map *map, fmode_t fmode)
+{
+ u32 sid = current_sid();
+ struct bpf_security_struct *bpfsec;
+
+ bpfsec = map->security;
+ return avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF,
+ bpf_map_fmode_to_av(fmode), NULL);
+}
+
+static int selinux_bpf_prog(struct bpf_prog *prog)
+{
+ u32 sid = current_sid();
+ struct bpf_security_struct *bpfsec;
+
+ bpfsec = prog->aux->security;
+ return avc_has_perm(sid, bpfsec->sid, SECCLASS_BPF,
+ BPF__PROG_RUN, NULL);
+}
+
+static int selinux_bpf_map_alloc(struct bpf_map *map)
+{
+ struct bpf_security_struct *bpfsec;
+
+ bpfsec = kzalloc(sizeof(*bpfsec), GFP_KERNEL);
+ if (!bpfsec)
+ return -ENOMEM;
+
+ bpfsec->sid = current_sid();
+ map->security = bpfsec;
+
+ return 0;
+}
+
+static void selinux_bpf_map_free(struct bpf_map *map)
+{
+ struct bpf_security_struct *bpfsec = map->security;
+
+ map->security = NULL;
+ kfree(bpfsec);
+}
+
+static int selinux_bpf_prog_alloc(struct bpf_prog_aux *aux)
+{
+ struct bpf_security_struct *bpfsec;
+
+ bpfsec = kzalloc(sizeof(*bpfsec), GFP_KERNEL);
+ if (!bpfsec)
+ return -ENOMEM;
+
+ bpfsec->sid = current_sid();
+ aux->security = bpfsec;
+
+ return 0;
+}
+
+static void selinux_bpf_prog_free(struct bpf_prog_aux *aux)
+{
+ struct bpf_security_struct *bpfsec = aux->security;
+
+ aux->security = NULL;
+ kfree(bpfsec);
+}
+#endif
+
static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
LSM_HOOK_INIT(binder_set_context_mgr, selinux_binder_set_context_mgr),
LSM_HOOK_INIT(binder_transaction, selinux_binder_transaction),
@@ -6471,6 +6621,16 @@ static struct security_hook_list selinux_hooks[] __lsm_ro_after_init = {
LSM_HOOK_INIT(audit_rule_match, selinux_audit_rule_match),
LSM_HOOK_INIT(audit_rule_free, selinux_audit_rule_free),
#endif
+
+#ifdef CONFIG_BPF_SYSCALL
+ LSM_HOOK_INIT(bpf, selinux_bpf),
+ LSM_HOOK_INIT(bpf_map, selinux_bpf_map),
+ LSM_HOOK_INIT(bpf_prog, selinux_bpf_prog),
+ LSM_HOOK_INIT(bpf_map_alloc_security, selinux_bpf_map_alloc),
+ LSM_HOOK_INIT(bpf_prog_alloc_security, selinux_bpf_prog_alloc),
+ LSM_HOOK_INIT(bpf_map_free_security, selinux_bpf_map_free),
+ LSM_HOOK_INIT(bpf_prog_free_security, selinux_bpf_prog_free),
+#endif
};
static __init int selinux_init(void)
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index 35ffb29a69cb..0a7023b5f000 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -237,6 +237,8 @@ struct security_class_mapping secclass_map[] = {
{ "access", NULL } },
{ "infiniband_endport",
{ "manage_subnet", NULL } },
+ { "bpf",
+ {"map_create", "map_read", "map_write", "prog_load", "prog_run"} },
{ NULL }
};
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 1649cd18eb0b..3d54468ce334 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -150,6 +150,10 @@ struct pkey_security_struct {
u32 sid; /* SID of pkey */
};
+struct bpf_security_struct {
+ u32 sid; /*SID of bpf obj creater*/
+};
+
extern unsigned int selinux_checkreqprot;
#endif /* _SELINUX_OBJSEC_H_ */
diff --git a/tools/Makefile b/tools/Makefile
index 9dfede37c8ff..df6fcb293fbc 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -19,7 +19,7 @@ help:
@echo ' kvm_stat - top-like utility for displaying kvm statistics'
@echo ' leds - LEDs tools'
@echo ' liblockdep - user-space wrapper for kernel locking-validator'
- @echo ' net - misc networking tools'
+ @echo ' bpf - misc BPF tools'
@echo ' perf - Linux performance measurement and analysis tool'
@echo ' selftests - various kernel selftests'
@echo ' spi - spi tools'
@@ -57,7 +57,7 @@ acpi: FORCE
cpupower: FORCE
$(call descend,power/$@)
-cgroup firewire hv guest spi usb virtio vm net iio gpio objtool leds: FORCE
+cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds: FORCE
$(call descend,$@)
liblockdep: FORCE
@@ -91,7 +91,7 @@ kvm_stat: FORCE
all: acpi cgroup cpupower gpio hv firewire liblockdep \
perf selftests spi turbostat usb \
- virtio vm net x86_energy_perf_policy \
+ virtio vm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat
acpi_install:
@@ -100,7 +100,7 @@ acpi_install:
cpupower_install:
$(call descend,power/$(@:_install=),install)
-cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install net_install objtool_install:
+cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install:
$(call descend,$(@:_install=),install)
liblockdep_install:
@@ -124,7 +124,7 @@ kvm_stat_install:
install: acpi_install cgroup_install cpupower_install gpio_install \
hv_install firewire_install iio_install liblockdep_install \
perf_install selftests_install turbostat_install usb_install \
- virtio_install vm_install net_install x86_energy_perf_policy_install \
+ virtio_install vm_install bpf_install x86_energy_perf_policy_install \
tmon_install freefall_install objtool_install kvm_stat_install
acpi_clean:
@@ -133,7 +133,7 @@ acpi_clean:
cpupower_clean:
$(call descend,power/cpupower,clean)
-cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean net_clean iio_clean gpio_clean objtool_clean leds_clean:
+cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean:
$(call descend,$(@:_clean=),clean)
liblockdep_clean:
@@ -169,7 +169,7 @@ build_clean:
clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \
perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \
- vm_clean net_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
+ vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \
gpio_clean objtool_clean leds_clean
diff --git a/tools/net/Makefile b/tools/bpf/Makefile
index ddf888010652..325a35e1c28e 100644
--- a/tools/net/Makefile
+++ b/tools/bpf/Makefile
@@ -3,6 +3,7 @@ prefix = /usr
CC = gcc
LEX = flex
YACC = bison
+MAKE = make
CFLAGS += -Wall -O2
CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
@@ -13,7 +14,7 @@ CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
%.lex.c: %.l
$(LEX) -o $@ $<
-all : bpf_jit_disasm bpf_dbg bpf_asm
+all: bpf_jit_disasm bpf_dbg bpf_asm bpftool
bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm'
bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
@@ -26,10 +27,21 @@ bpf_asm : LDLIBS =
bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
bpf_exp.lex.o : bpf_exp.yacc.c
-clean :
+clean: bpftool_clean
rm -rf *.o bpf_jit_disasm bpf_dbg bpf_asm bpf_exp.yacc.* bpf_exp.lex.*
-install :
+install: bpftool_install
install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm
install bpf_dbg $(prefix)/bin/bpf_dbg
install bpf_asm $(prefix)/bin/bpf_asm
+
+bpftool:
+ $(MAKE) -C bpftool
+
+bpftool_install:
+ $(MAKE) -C bpftool install
+
+bpftool_clean:
+ $(MAKE) -C bpftool clean
+
+.PHONY: bpftool FORCE
diff --git a/tools/net/bpf_asm.c b/tools/bpf/bpf_asm.c
index c15aef097b04..c15aef097b04 100644
--- a/tools/net/bpf_asm.c
+++ b/tools/bpf/bpf_asm.c
diff --git a/tools/net/bpf_dbg.c b/tools/bpf/bpf_dbg.c
index 4f254bcc4423..4f254bcc4423 100644
--- a/tools/net/bpf_dbg.c
+++ b/tools/bpf/bpf_dbg.c
diff --git a/tools/net/bpf_exp.l b/tools/bpf/bpf_exp.l
index bd83149e7be0..bd83149e7be0 100644
--- a/tools/net/bpf_exp.l
+++ b/tools/bpf/bpf_exp.l
diff --git a/tools/net/bpf_exp.y b/tools/bpf/bpf_exp.y
index 56ba1de50784..56ba1de50784 100644
--- a/tools/net/bpf_exp.y
+++ b/tools/bpf/bpf_exp.y
diff --git a/tools/net/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c
index 422d9abd666a..422d9abd666a 100644
--- a/tools/net/bpf_jit_disasm.c
+++ b/tools/bpf/bpf_jit_disasm.c
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
new file mode 100644
index 000000000000..bde77d7c4390
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -0,0 +1,34 @@
+include ../../../scripts/Makefile.include
+include ../../../scripts/utilities.mak
+
+INSTALL ?= install
+RM ?= rm -f
+
+# Make the path relative to DESTDIR, not prefix
+ifndef DESTDIR
+prefix?=$(HOME)
+endif
+mandir ?= $(prefix)/share/man
+man8dir = $(mandir)/man8
+
+MAN8_RST = $(wildcard *.rst)
+
+_DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
+DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
+
+man: man8
+man8: $(DOC_MAN8)
+
+$(OUTPUT)%.8: %.rst
+ rst2man $< > $@
+
+clean:
+ $(call QUIET_CLEAN, Documentation) $(RM) $(DOC_MAN8)
+
+install: man
+ $(call QUIET_INSTALL, Documentation-man) \
+ $(INSTALL) -d -m 755 $(DESTDIR)$(man8dir); \
+ $(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir);
+
+.PHONY: man man8 clean install
+.DEFAULT_GOAL := man
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
new file mode 100644
index 000000000000..ff63e89e4b6c
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -0,0 +1,110 @@
+================
+bpftool-map
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF maps
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** **map** *COMMAND*
+
+ *COMMANDS* :=
+ { show | dump | update | lookup | getnext | delete | pin | help }
+
+MAP COMMANDS
+=============
+
+| **bpftool** map show [*MAP*]
+| **bpftool** map dump *MAP*
+| **bpftool** map update *MAP* key *BYTES* value *VALUE* [*UPDATE_FLAGS*]
+| **bpftool** map lookup *MAP* key *BYTES*
+| **bpftool** map getnext *MAP* [key *BYTES*]
+| **bpftool** map delete *MAP* key *BYTES*
+| **bpftool** map pin *MAP* *FILE*
+| **bpftool** map help
+|
+| *MAP* := { id MAP_ID | pinned FILE }
+| *VALUE* := { BYTES | MAP | PROGRAM }
+| *UPDATE_FLAGS* := { any | exist | noexist }
+
+DESCRIPTION
+===========
+ **bpftool map show** [*MAP*]
+ Show information about loaded maps. If *MAP* is specified
+ show information only about given map, otherwise list all
+ maps currently loaded on the system.
+
+ Output will start with map ID followed by map type and
+ zero or more named attributes (depending on kernel version).
+
+ **bpftool map dump** *MAP*
+ Dump all entries in a given *MAP*.
+
+ **bpftool map update** *MAP* **key** *BYTES* **value** *VALUE* [*UPDATE_FLAGS*]
+ Update map entry for a given *KEY*.
+
+ *UPDATE_FLAGS* can be one of: **any** update existing entry
+ or add if doesn't exit; **exist** update only if entry already
+ exists; **noexist** update only if entry doesn't exist.
+
+ **bpftool map lookup** *MAP* **key** *BYTES*
+ Lookup **key** in the map.
+
+ **bpftool map getnext** *MAP* [**key** *BYTES*]
+ Get next key. If *key* is not specified, get first key.
+
+ **bpftool map delete** *MAP* **key** *BYTES*
+ Remove entry from the map.
+
+ **bpftool map pin** *MAP* *FILE*
+ Pin map *MAP* as *FILE*.
+
+ Note: *FILE* must be located in *bpffs* mount.
+
+ **bpftool map help**
+ Print short help message.
+
+EXAMPLES
+========
+**# bpftool map show**
+::
+
+ 10: hash name some_map flags 0x0
+ key 4B value 8B max_entries 2048 memlock 167936B
+
+**# bpftool map update id 10 key 13 00 07 00 value 02 00 00 00 01 02 03 04**
+
+**# bpftool map lookup id 10 key 0 1 2 3**
+
+::
+
+ key: 00 01 02 03 value: 00 01 02 03 04 05 06 07
+
+
+**# bpftool map dump id 10**
+::
+
+ key: 00 01 02 03 value: 00 01 02 03 04 05 06 07
+ key: 0d 00 07 00 value: 02 00 00 00 01 02 03 04
+ Found 2 elements
+
+**# bpftool map getnext id 10 key 0 1 2 3**
+::
+
+ key:
+ 00 01 02 03
+ next key:
+ 0d 00 07 00
+
+|
+| **# mount -t bpf none /sys/fs/bpf/**
+| **# bpftool map pin id 10 /sys/fs/bpf/map**
+| **# bpftool map del pinned /sys/fs/bpf/map key 13 00 07 00**
+
+SEE ALSO
+========
+ **bpftool**\ (8), **bpftool-prog**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
new file mode 100644
index 000000000000..69b3770370c8
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -0,0 +1,82 @@
+================
+bpftool-prog
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF progs
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+| **bpftool** prog show [*PROG*]
+| **bpftool** prog dump xlated *PROG* [{file *FILE* | opcodes }]
+| **bpftool** prog dump jited *PROG* [{file *FILE* | opcodes }]
+| **bpftool** prog pin *PROG* *FILE*
+| **bpftool** prog help
+|
+| *PROG* := { id *PROG_ID* | pinned *FILE* | tag *PROG_TAG* }
+
+DESCRIPTION
+===========
+ **bpftool prog show** [*PROG*]
+ Show information about loaded programs. If *PROG* is
+ specified show information only about given program, otherwise
+ list all programs currently loaded on the system.
+
+ Output will start with program ID followed by program type and
+ zero or more named attributes (depending on kernel version).
+
+ **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** }]
+ Dump eBPF instructions of the program from the kernel.
+ If *FILE* is specified image will be written to a file,
+ otherwise it will be disassembled and printed to stdout.
+
+ **opcodes** controls if raw opcodes will be printed.
+
+ **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** }]
+ Dump jited image (host machine code) of the program.
+ If *FILE* is specified image will be written to a file,
+ otherwise it will be disassembled and printed to stdout.
+
+ **opcodes** controls if raw opcodes will be printed.
+
+ **bpftool prog pin** *PROG* *FILE*
+ Pin program *PROG* as *FILE*.
+
+ Note: *FILE* must be located in *bpffs* mount.
+
+ **bpftool prog help**
+ Print short help message.
+
+EXAMPLES
+========
+**# bpftool prog show**
+::
+
+ 10: xdp name some_prog tag 005a3d2123620c8b
+ loaded_at Sep 29/20:11 uid 0
+ xlated 528B jited 370B memlock 4096B map_ids 10
+
+|
+| **# bpftool prog dump xlated id 10 file /tmp/t**
+| **# ls -l /tmp/t**
+| -rw------- 1 root root 560 Jul 22 01:42 /tmp/t
+
+|
+| **# bpftool prog dum jited pinned /sys/fs/bpf/prog**
+
+::
+
+ push %rbp
+ mov %rsp,%rbp
+ sub $0x228,%rsp
+ sub $0x28,%rbp
+ mov %rbx,0x0(%rbp)
+
+
+
+SEE ALSO
+========
+ **bpftool**\ (8), **bpftool-map**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
new file mode 100644
index 000000000000..45ad8baf1915
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -0,0 +1,36 @@
+================
+BPFTOOL
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF programs and maps
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** *OBJECT* { *COMMAND* | help }
+
+ **bpftool** batch file *FILE*
+
+ **bpftool** version
+
+ *OBJECT* := { **map** | **program** }
+
+ *MAP-COMMANDS* :=
+ { show | dump | update | lookup | getnext | delete | pin | help }
+
+ *PROG-COMMANDS* := { show | dump jited | dump xlated | pin | help }
+
+DESCRIPTION
+===========
+ *bpftool* allows for inspection and simple modification of BPF objects
+ on the system.
+
+ Note that format of the output of all tools is not guaranteed to be
+ stable and should not be depended upon.
+
+SEE ALSO
+========
+ **bpftool-map**\ (8), **bpftool-prog**\ (8)
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
new file mode 100644
index 000000000000..4f339824ca57
--- /dev/null
+++ b/tools/bpf/bpftool/Makefile
@@ -0,0 +1,89 @@
+include ../../scripts/Makefile.include
+
+include ../../scripts/utilities.mak
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+ifneq ($(objtree),)
+#$(info Determined 'objtree' to be $(objtree))
+endif
+
+ifneq ($(OUTPUT),)
+#$(info Determined 'OUTPUT' to be $(OUTPUT))
+# Adding $(OUTPUT) as a directory to look for source files,
+# because use generated output files as sources dependency
+# for flex/bison parsers.
+VPATH += $(OUTPUT)
+export VPATH
+endif
+
+ifeq ($(V),1)
+ Q =
+else
+ Q = @
+endif
+
+BPF_DIR = $(srctree)/tools/lib/bpf/
+
+ifneq ($(OUTPUT),)
+ BPF_PATH=$(OUTPUT)
+else
+ BPF_PATH=$(BPF_DIR)
+endif
+
+LIBBPF = $(BPF_PATH)libbpf.a
+
+$(LIBBPF): FORCE
+ $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
+
+$(LIBBPF)-clean:
+ $(call QUIET_CLEAN, libbpf)
+ $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null
+
+prefix = /usr
+
+CC = gcc
+
+CFLAGS += -O2
+CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow
+CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/tools/include/uapi -I$(srctree)/tools/include -I$(srctree)/tools/lib/bpf -I$(srctree)/kernel/bpf/
+LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
+
+include $(wildcard *.d)
+
+all: $(OUTPUT)bpftool
+
+SRCS=$(wildcard *.c)
+OBJS=$(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
+
+$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
+ $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
+
+$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
+ $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+$(OUTPUT)%.o: %.c
+ $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
+
+clean: $(LIBBPF)-clean
+ $(call QUIET_CLEAN, bpftool)
+ $(Q)rm -rf $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+
+install:
+ install $(OUTPUT)bpftool $(prefix)/sbin/bpftool
+
+doc:
+ $(Q)$(MAKE) -C Documentation/
+
+doc-install:
+ $(Q)$(MAKE) -C Documentation/ install
+
+FORCE:
+
+.PHONY: all clean FORCE
+.DEFAULT_GOAL := all
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
new file mode 100644
index 000000000000..df8396a0c400
--- /dev/null
+++ b/tools/bpf/bpftool/common.c
@@ -0,0 +1,216 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Author: Jakub Kicinski <kubakici@wp.pl> */
+
+#include <errno.h>
+#include <libgen.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/limits.h>
+#include <linux/magic.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+static bool is_bpffs(char *path)
+{
+ struct statfs st_fs;
+
+ if (statfs(path, &st_fs) < 0)
+ return false;
+
+ return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
+}
+
+int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
+{
+ enum bpf_obj_type type;
+ int fd;
+
+ fd = bpf_obj_get(path);
+ if (fd < 0) {
+ err("bpf obj get (%s): %s\n", path,
+ errno == EACCES && !is_bpffs(dirname(path)) ?
+ "directory not in bpf file system (bpffs)" :
+ strerror(errno));
+ return -1;
+ }
+
+ type = get_fd_type(fd);
+ if (type < 0) {
+ close(fd);
+ return type;
+ }
+ if (type != exp_type) {
+ err("incorrect object type: %s\n", get_fd_type_name(type));
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
+{
+ unsigned int id;
+ char *endptr;
+ int err;
+ int fd;
+
+ if (!is_prefix(*argv, "id")) {
+ err("expected 'id' got %s\n", *argv);
+ return -1;
+ }
+ NEXT_ARG();
+
+ id = strtoul(*argv, &endptr, 0);
+ if (*endptr) {
+ err("can't parse %s as ID\n", *argv);
+ return -1;
+ }
+ NEXT_ARG();
+
+ if (argc != 1)
+ usage();
+
+ fd = get_fd_by_id(id);
+ if (fd < 0) {
+ err("can't get prog by id (%u): %s\n", id, strerror(errno));
+ return -1;
+ }
+
+ err = bpf_obj_pin(fd, *argv);
+ close(fd);
+ if (err) {
+ err("can't pin the object (%s): %s\n", *argv,
+ errno == EACCES && !is_bpffs(dirname(*argv)) ?
+ "directory not in bpf file system (bpffs)" :
+ strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+const char *get_fd_type_name(enum bpf_obj_type type)
+{
+ static const char * const names[] = {
+ [BPF_OBJ_UNKNOWN] = "unknown",
+ [BPF_OBJ_PROG] = "prog",
+ [BPF_OBJ_MAP] = "map",
+ };
+
+ if (type < 0 || type >= ARRAY_SIZE(names) || !names[type])
+ return names[BPF_OBJ_UNKNOWN];
+
+ return names[type];
+}
+
+int get_fd_type(int fd)
+{
+ char path[PATH_MAX];
+ char buf[512];
+ ssize_t n;
+
+ snprintf(path, sizeof(path), "/proc/%d/fd/%d", getpid(), fd);
+
+ n = readlink(path, buf, sizeof(buf));
+ if (n < 0) {
+ err("can't read link type: %s\n", strerror(errno));
+ return -1;
+ }
+ if (n == sizeof(path)) {
+ err("can't read link type: path too long!\n");
+ return -1;
+ }
+
+ if (strstr(buf, "bpf-map"))
+ return BPF_OBJ_MAP;
+ else if (strstr(buf, "bpf-prog"))
+ return BPF_OBJ_PROG;
+
+ return BPF_OBJ_UNKNOWN;
+}
+
+char *get_fdinfo(int fd, const char *key)
+{
+ char path[PATH_MAX];
+ char *line = NULL;
+ size_t line_n = 0;
+ ssize_t n;
+ FILE *fdi;
+
+ snprintf(path, sizeof(path), "/proc/%d/fdinfo/%d", getpid(), fd);
+
+ fdi = fopen(path, "r");
+ if (!fdi) {
+ err("can't open fdinfo: %s\n", strerror(errno));
+ return NULL;
+ }
+
+ while ((n = getline(&line, &line_n, fdi))) {
+ char *value;
+ int len;
+
+ if (!strstr(line, key))
+ continue;
+
+ fclose(fdi);
+
+ value = strchr(line, '\t');
+ if (!value || !value[1]) {
+ err("malformed fdinfo!?\n");
+ free(line);
+ return NULL;
+ }
+ value++;
+
+ len = strlen(value);
+ memmove(line, value, len);
+ line[len - 1] = '\0';
+
+ return line;
+ }
+
+ err("key '%s' not found in fdinfo\n", key);
+ free(line);
+ fclose(fdi);
+ return NULL;
+}
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
new file mode 100644
index 000000000000..70e480b59e9d
--- /dev/null
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -0,0 +1,87 @@
+/*
+ * Based on:
+ *
+ * Minimal BPF JIT image disassembler
+ *
+ * Disassembles BPF JIT compiler emitted opcodes back to asm insn's for
+ * debugging or verification purposes.
+ *
+ * Copyright 2013 Daniel Borkmann <daniel@iogearbox.net>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+#include <bfd.h>
+#include <dis-asm.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+static void get_exec_path(char *tpath, size_t size)
+{
+ ssize_t len;
+ char *path;
+
+ snprintf(tpath, size, "/proc/%d/exe", (int) getpid());
+ tpath[size - 1] = 0;
+
+ path = strdup(tpath);
+ assert(path);
+
+ len = readlink(path, tpath, size - 1);
+ assert(len > 0);
+ tpath[len] = 0;
+
+ free(path);
+}
+
+void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes)
+{
+ disassembler_ftype disassemble;
+ struct disassemble_info info;
+ int count, i, pc = 0;
+ char tpath[256];
+ bfd *bfdf;
+
+ if (!len)
+ return;
+
+ memset(tpath, 0, sizeof(tpath));
+ get_exec_path(tpath, sizeof(tpath));
+
+ bfdf = bfd_openr(tpath, NULL);
+ assert(bfdf);
+ assert(bfd_check_format(bfdf, bfd_object));
+
+ init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf);
+ info.arch = bfd_get_arch(bfdf);
+ info.mach = bfd_get_mach(bfdf);
+ info.buffer = image;
+ info.buffer_length = len;
+
+ disassemble_init_for_target(&info);
+
+ disassemble = disassembler(bfdf);
+ assert(disassemble);
+
+ do {
+ printf("%4x:\t", pc);
+
+ count = disassemble(pc, &info);
+
+ if (opcodes) {
+ printf("\n\t");
+ for (i = 0; i < count; ++i)
+ printf("%02x ", (uint8_t) image[pc + i]);
+ }
+ printf("\n");
+
+ pc += count;
+ } while (count > 0 && pc < len);
+
+ bfd_close(bfdf);
+}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
new file mode 100644
index 000000000000..814d19e1b53f
--- /dev/null
+++ b/tools/bpf/bpftool/main.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Author: Jakub Kicinski <kubakici@wp.pl> */
+
+#include <bfd.h>
+#include <ctype.h>
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+const char *bin_name;
+static int last_argc;
+static char **last_argv;
+static int (*last_do_help)(int argc, char **argv);
+
+void usage(void)
+{
+ last_do_help(last_argc - 1, last_argv + 1);
+
+ exit(-1);
+}
+
+static int do_help(int argc, char **argv)
+{
+ fprintf(stderr,
+ "Usage: %s OBJECT { COMMAND | help }\n"
+ " %s batch file FILE\n"
+ " %s version\n"
+ "\n"
+ " OBJECT := { prog | map }\n",
+ bin_name, bin_name, bin_name);
+
+ return 0;
+}
+
+static int do_version(int argc, char **argv)
+{
+ printf("%s v%d.%d.%d\n", bin_name,
+ LINUX_VERSION_CODE >> 16,
+ LINUX_VERSION_CODE >> 8 & 0xf,
+ LINUX_VERSION_CODE & 0xf);
+ return 0;
+}
+
+int cmd_select(const struct cmd *cmds, int argc, char **argv,
+ int (*help)(int argc, char **argv))
+{
+ unsigned int i;
+
+ last_argc = argc;
+ last_argv = argv;
+ last_do_help = help;
+
+ if (argc < 1 && cmds[0].func)
+ return cmds[0].func(argc, argv);
+
+ for (i = 0; cmds[i].func; i++)
+ if (is_prefix(*argv, cmds[i].cmd))
+ return cmds[i].func(argc - 1, argv + 1);
+
+ help(argc - 1, argv + 1);
+
+ return -1;
+}
+
+bool is_prefix(const char *pfx, const char *str)
+{
+ if (!pfx)
+ return false;
+ if (strlen(str) < strlen(pfx))
+ return false;
+
+ return !memcmp(str, pfx, strlen(pfx));
+}
+
+void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep)
+{
+ unsigned char *data = arg;
+ unsigned int i;
+
+ for (i = 0; i < n; i++) {
+ const char *pfx = "";
+
+ if (!i)
+ /* nothing */;
+ else if (!(i % 16))
+ fprintf(f, "\n");
+ else if (!(i % 8))
+ fprintf(f, " ");
+ else
+ pfx = sep;
+
+ fprintf(f, "%s%02hhx", i ? pfx : "", data[i]);
+ }
+}
+
+static int do_batch(int argc, char **argv);
+
+static const struct cmd cmds[] = {
+ { "help", do_help },
+ { "batch", do_batch },
+ { "prog", do_prog },
+ { "map", do_map },
+ { "version", do_version },
+ { 0 }
+};
+
+static int do_batch(int argc, char **argv)
+{
+ unsigned int lines = 0;
+ char *n_argv[4096];
+ char buf[65536];
+ int n_argc;
+ FILE *fp;
+ int err;
+
+ if (argc < 2) {
+ err("too few parameters for batch\n");
+ return -1;
+ } else if (!is_prefix(*argv, "file")) {
+ err("expected 'file', got: %s\n", *argv);
+ return -1;
+ } else if (argc > 2) {
+ err("too many parameters for batch\n");
+ return -1;
+ }
+ NEXT_ARG();
+
+ fp = fopen(*argv, "r");
+ if (!fp) {
+ err("Can't open file (%s): %s\n", *argv, strerror(errno));
+ return -1;
+ }
+
+ while (fgets(buf, sizeof(buf), fp)) {
+ if (strlen(buf) == sizeof(buf) - 1) {
+ errno = E2BIG;
+ break;
+ }
+
+ n_argc = 0;
+ n_argv[n_argc] = strtok(buf, " \t\n");
+
+ while (n_argv[n_argc]) {
+ n_argc++;
+ if (n_argc == ARRAY_SIZE(n_argv)) {
+ err("line %d has too many arguments, skip\n",
+ lines);
+ n_argc = 0;
+ break;
+ }
+ n_argv[n_argc] = strtok(NULL, " \t\n");
+ }
+
+ if (!n_argc)
+ continue;
+
+ err = cmd_select(cmds, n_argc, n_argv, do_help);
+ if (err)
+ goto err_close;
+
+ lines++;
+ }
+
+ if (errno && errno != ENOENT) {
+ perror("reading batch file failed");
+ err = -1;
+ } else {
+ info("processed %d lines\n", lines);
+ err = 0;
+ }
+err_close:
+ fclose(fp);
+
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ bin_name = argv[0];
+ NEXT_ARG();
+
+ bfd_init();
+
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
new file mode 100644
index 000000000000..41e6c7d3fcad
--- /dev/null
+++ b/tools/bpf/bpftool/main.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Author: Jakub Kicinski <kubakici@wp.pl> */
+
+#ifndef __BPF_TOOL_H
+#define __BPF_TOOL_H
+
+/* BFD and kernel.h both define GCC_VERSION, differently */
+#undef GCC_VERSION
+#include <stdbool.h>
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <linux/kernel.h>
+
+#define err(msg...) fprintf(stderr, "Error: " msg)
+#define warn(msg...) fprintf(stderr, "Warning: " msg)
+#define info(msg...) fprintf(stderr, msg)
+
+#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
+
+#define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); })
+#define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); })
+#define BAD_ARG() ({ err("what is '%s'?\n", *argv); -1; })
+
+#define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
+
+#define HELP_SPEC_PROGRAM \
+ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }"
+
+enum bpf_obj_type {
+ BPF_OBJ_UNKNOWN,
+ BPF_OBJ_PROG,
+ BPF_OBJ_MAP,
+};
+
+extern const char *bin_name;
+
+bool is_prefix(const char *pfx, const char *str);
+void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep);
+void usage(void) __attribute__((noreturn));
+
+struct cmd {
+ const char *cmd;
+ int (*func)(int argc, char **argv);
+};
+
+int cmd_select(const struct cmd *cmds, int argc, char **argv,
+ int (*help)(int argc, char **argv));
+
+int get_fd_type(int fd);
+const char *get_fd_type_name(enum bpf_obj_type type);
+char *get_fdinfo(int fd, const char *key);
+int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
+int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
+
+int do_prog(int argc, char **arg);
+int do_map(int argc, char **arg);
+
+int prog_parse_fd(int *argc, char ***argv);
+
+void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes);
+
+#endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
new file mode 100644
index 000000000000..e1004d825392
--- /dev/null
+++ b/tools/bpf/bpftool/map.c
@@ -0,0 +1,744 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Author: Jakub Kicinski <kubakici@wp.pl> */
+
+#include <assert.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+static const char * const map_type_name[] = {
+ [BPF_MAP_TYPE_UNSPEC] = "unspec",
+ [BPF_MAP_TYPE_HASH] = "hash",
+ [BPF_MAP_TYPE_ARRAY] = "array",
+ [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array",
+ [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array",
+ [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash",
+ [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array",
+ [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace",
+ [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array",
+ [BPF_MAP_TYPE_LRU_HASH] = "lru_hash",
+ [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash",
+ [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie",
+ [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
+ [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
+ [BPF_MAP_TYPE_DEVMAP] = "devmap",
+ [BPF_MAP_TYPE_SOCKMAP] = "sockmap",
+};
+
+static unsigned int get_possible_cpus(void)
+{
+ static unsigned int result;
+ char buf[128];
+ long int n;
+ char *ptr;
+ int fd;
+
+ if (result)
+ return result;
+
+ fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
+ if (fd < 0) {
+ err("can't open sysfs possible cpus\n");
+ exit(-1);
+ }
+
+ n = read(fd, buf, sizeof(buf));
+ if (n < 2) {
+ err("can't read sysfs possible cpus\n");
+ exit(-1);
+ }
+ close(fd);
+
+ if (n == sizeof(buf)) {
+ err("read sysfs possible cpus overflow\n");
+ exit(-1);
+ }
+
+ ptr = buf;
+ n = 0;
+ while (*ptr && *ptr != '\n') {
+ unsigned int a, b;
+
+ if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
+ n += b - a + 1;
+
+ ptr = strchr(ptr, '-') + 1;
+ } else if (sscanf(ptr, "%u", &a) == 1) {
+ n++;
+ } else {
+ assert(0);
+ }
+
+ while (isdigit(*ptr))
+ ptr++;
+ if (*ptr == ',')
+ ptr++;
+ }
+
+ result = n;
+
+ return result;
+}
+
+static bool map_is_per_cpu(__u32 type)
+{
+ return type == BPF_MAP_TYPE_PERCPU_HASH ||
+ type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+ type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
+static bool map_is_map_of_maps(__u32 type)
+{
+ return type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+ type == BPF_MAP_TYPE_HASH_OF_MAPS;
+}
+
+static bool map_is_map_of_progs(__u32 type)
+{
+ return type == BPF_MAP_TYPE_PROG_ARRAY;
+}
+
+static void *alloc_value(struct bpf_map_info *info)
+{
+ if (map_is_per_cpu(info->type))
+ return malloc(info->value_size * get_possible_cpus());
+ else
+ return malloc(info->value_size);
+}
+
+static int map_parse_fd(int *argc, char ***argv)
+{
+ int fd;
+
+ if (is_prefix(**argv, "id")) {
+ unsigned int id;
+ char *endptr;
+
+ NEXT_ARGP();
+
+ id = strtoul(**argv, &endptr, 0);
+ if (*endptr) {
+ err("can't parse %s as ID\n", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0)
+ err("get map by id (%u): %s\n", id, strerror(errno));
+ return fd;
+ } else if (is_prefix(**argv, "pinned")) {
+ char *path;
+
+ NEXT_ARGP();
+
+ path = **argv;
+ NEXT_ARGP();
+
+ return open_obj_pinned_any(path, BPF_OBJ_MAP);
+ }
+
+ err("expected 'id' or 'pinned', got: '%s'?\n", **argv);
+ return -1;
+}
+
+static int
+map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
+{
+ int err;
+ int fd;
+
+ fd = map_parse_fd(argc, argv);
+ if (fd < 0)
+ return -1;
+
+ err = bpf_obj_get_info_by_fd(fd, info, info_len);
+ if (err) {
+ err("can't get map info: %s\n", strerror(errno));
+ close(fd);
+ return err;
+ }
+
+ return fd;
+}
+
+static void print_entry(struct bpf_map_info *info, unsigned char *key,
+ unsigned char *value)
+{
+ if (!map_is_per_cpu(info->type)) {
+ bool single_line, break_names;
+
+ break_names = info->key_size > 16 || info->value_size > 16;
+ single_line = info->key_size + info->value_size <= 24 &&
+ !break_names;
+
+ printf("key:%c", break_names ? '\n' : ' ');
+ fprint_hex(stdout, key, info->key_size, " ");
+
+ printf(single_line ? " " : "\n");
+
+ printf("value:%c", break_names ? '\n' : ' ');
+ fprint_hex(stdout, value, info->value_size, " ");
+
+ printf("\n");
+ } else {
+ unsigned int i, n;
+
+ n = get_possible_cpus();
+
+ printf("key:\n");
+ fprint_hex(stdout, key, info->key_size, " ");
+ printf("\n");
+ for (i = 0; i < n; i++) {
+ printf("value (CPU %02d):%c",
+ i, info->value_size > 16 ? '\n' : ' ');
+ fprint_hex(stdout, value + i * info->value_size,
+ info->value_size, " ");
+ printf("\n");
+ }
+ }
+}
+
+static char **parse_bytes(char **argv, const char *name, unsigned char *val,
+ unsigned int n)
+{
+ unsigned int i = 0;
+ char *endptr;
+
+ while (i < n && argv[i]) {
+ val[i] = strtoul(argv[i], &endptr, 0);
+ if (*endptr) {
+ err("error parsing byte: %s\n", argv[i]);
+ return NULL;
+ }
+ i++;
+ }
+
+ if (i != n) {
+ err("%s expected %d bytes got %d\n", name, n, i);
+ return NULL;
+ }
+
+ return argv + i;
+}
+
+static int parse_elem(char **argv, struct bpf_map_info *info,
+ void *key, void *value, __u32 key_size, __u32 value_size,
+ __u32 *flags, __u32 **value_fd)
+{
+ if (!*argv) {
+ if (!key && !value)
+ return 0;
+ err("did not find %s\n", key ? "key" : "value");
+ return -1;
+ }
+
+ if (is_prefix(*argv, "key")) {
+ if (!key) {
+ if (key_size)
+ err("duplicate key\n");
+ else
+ err("unnecessary key\n");
+ return -1;
+ }
+
+ argv = parse_bytes(argv + 1, "key", key, key_size);
+ if (!argv)
+ return -1;
+
+ return parse_elem(argv, info, NULL, value, key_size, value_size,
+ flags, value_fd);
+ } else if (is_prefix(*argv, "value")) {
+ int fd;
+
+ if (!value) {
+ if (value_size)
+ err("duplicate value\n");
+ else
+ err("unnecessary value\n");
+ return -1;
+ }
+
+ argv++;
+
+ if (map_is_map_of_maps(info->type)) {
+ int argc = 2;
+
+ if (value_size != 4) {
+ err("value smaller than 4B for map in map?\n");
+ return -1;
+ }
+ if (!argv[0] || !argv[1]) {
+ err("not enough value arguments for map in map\n");
+ return -1;
+ }
+
+ fd = map_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return -1;
+
+ *value_fd = value;
+ **value_fd = fd;
+ } else if (map_is_map_of_progs(info->type)) {
+ int argc = 2;
+
+ if (value_size != 4) {
+ err("value smaller than 4B for map of progs?\n");
+ return -1;
+ }
+ if (!argv[0] || !argv[1]) {
+ err("not enough value arguments for map of progs\n");
+ return -1;
+ }
+
+ fd = prog_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return -1;
+
+ *value_fd = value;
+ **value_fd = fd;
+ } else {
+ argv = parse_bytes(argv, "value", value, value_size);
+ if (!argv)
+ return -1;
+ }
+
+ return parse_elem(argv, info, key, NULL, key_size, value_size,
+ flags, NULL);
+ } else if (is_prefix(*argv, "any") || is_prefix(*argv, "noexist") ||
+ is_prefix(*argv, "exist")) {
+ if (!flags) {
+ err("flags specified multiple times: %s\n", *argv);
+ return -1;
+ }
+
+ if (is_prefix(*argv, "any"))
+ *flags = BPF_ANY;
+ else if (is_prefix(*argv, "noexist"))
+ *flags = BPF_NOEXIST;
+ else if (is_prefix(*argv, "exist"))
+ *flags = BPF_EXIST;
+
+ return parse_elem(argv + 1, info, key, value, key_size,
+ value_size, NULL, value_fd);
+ }
+
+ err("expected key or value, got: %s\n", *argv);
+ return -1;
+}
+
+static int show_map_close(int fd, struct bpf_map_info *info)
+{
+ char *memlock;
+
+ memlock = get_fdinfo(fd, "memlock");
+ close(fd);
+
+ printf("%u: ", info->id);
+ if (info->type < ARRAY_SIZE(map_type_name))
+ printf("%s ", map_type_name[info->type]);
+ else
+ printf("type %u ", info->type);
+
+ if (*info->name)
+ printf("name %s ", info->name);
+
+ printf("flags 0x%x\n", info->map_flags);
+ printf("\tkey %uB value %uB max_entries %u",
+ info->key_size, info->value_size, info->max_entries);
+
+ if (memlock)
+ printf(" memlock %sB", memlock);
+ free(memlock);
+
+ printf("\n");
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ __u32 id = 0;
+ int err;
+ int fd;
+
+ if (argc == 2) {
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ if (fd < 0)
+ return -1;
+
+ return show_map_close(fd, &info);
+ }
+
+ if (argc)
+ return BAD_ARG();
+
+ while (true) {
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ break;
+ err("can't get next map: %s\n", strerror(errno));
+ if (errno == EINVAL)
+ err("kernel too old?\n");
+ return -1;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ err("can't get map by id (%u): %s\n",
+ id, strerror(errno));
+ return -1;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ err("can't get map info: %s\n", strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ show_map_close(fd, &info);
+ }
+
+ return errno == ENOENT ? 0 : -1;
+}
+
+static int do_dump(int argc, char **argv)
+{
+ void *key, *value, *prev_key;
+ unsigned int num_elems = 0;
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ int err;
+ int fd;
+
+ if (argc != 2)
+ usage();
+
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ if (fd < 0)
+ return -1;
+
+ if (map_is_map_of_maps(info.type) || map_is_map_of_progs(info.type)) {
+ err("Dumping maps of maps and program maps not supported\n");
+ close(fd);
+ return -1;
+ }
+
+ key = malloc(info.key_size);
+ value = alloc_value(&info);
+ if (!key || !value) {
+ err("mem alloc failed\n");
+ err = -1;
+ goto exit_free;
+ }
+
+ prev_key = NULL;
+ while (true) {
+ err = bpf_map_get_next_key(fd, prev_key, key);
+ if (err) {
+ if (errno == ENOENT)
+ err = 0;
+ break;
+ }
+
+ if (!bpf_map_lookup_elem(fd, key, value)) {
+ print_entry(&info, key, value);
+ } else {
+ info("can't lookup element with key: ");
+ fprint_hex(stderr, key, info.key_size, " ");
+ fprintf(stderr, "\n");
+ }
+
+ prev_key = key;
+ num_elems++;
+ }
+
+ printf("Found %u element%s\n", num_elems, num_elems != 1 ? "s" : "");
+
+exit_free:
+ free(key);
+ free(value);
+ close(fd);
+
+ return err;
+}
+
+static int do_update(int argc, char **argv)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ __u32 *value_fd = NULL;
+ __u32 flags = BPF_ANY;
+ void *key, *value;
+ int fd, err;
+
+ if (argc < 2)
+ usage();
+
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ if (fd < 0)
+ return -1;
+
+ key = malloc(info.key_size);
+ value = alloc_value(&info);
+ if (!key || !value) {
+ err("mem alloc failed");
+ err = -1;
+ goto exit_free;
+ }
+
+ err = parse_elem(argv, &info, key, value, info.key_size,
+ info.value_size, &flags, &value_fd);
+ if (err)
+ goto exit_free;
+
+ err = bpf_map_update_elem(fd, key, value, flags);
+ if (err) {
+ err("update failed: %s\n", strerror(errno));
+ goto exit_free;
+ }
+
+exit_free:
+ if (value_fd)
+ close(*value_fd);
+ free(key);
+ free(value);
+ close(fd);
+
+ return err;
+}
+
+static int do_lookup(int argc, char **argv)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ void *key, *value;
+ int err;
+ int fd;
+
+ if (argc < 2)
+ usage();
+
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ if (fd < 0)
+ return -1;
+
+ key = malloc(info.key_size);
+ value = alloc_value(&info);
+ if (!key || !value) {
+ err("mem alloc failed");
+ err = -1;
+ goto exit_free;
+ }
+
+ err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL);
+ if (err)
+ goto exit_free;
+
+ err = bpf_map_lookup_elem(fd, key, value);
+ if (!err) {
+ print_entry(&info, key, value);
+ } else if (errno == ENOENT) {
+ printf("key:\n");
+ fprint_hex(stdout, key, info.key_size, " ");
+ printf("\n\nNot found\n");
+ } else {
+ err("lookup failed: %s\n", strerror(errno));
+ }
+
+exit_free:
+ free(key);
+ free(value);
+ close(fd);
+
+ return err;
+}
+
+static int do_getnext(int argc, char **argv)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ void *key, *nextkey;
+ int err;
+ int fd;
+
+ if (argc < 2)
+ usage();
+
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ if (fd < 0)
+ return -1;
+
+ key = malloc(info.key_size);
+ nextkey = malloc(info.key_size);
+ if (!key || !nextkey) {
+ err("mem alloc failed");
+ err = -1;
+ goto exit_free;
+ }
+
+ if (argc) {
+ err = parse_elem(argv, &info, key, NULL, info.key_size, 0,
+ NULL, NULL);
+ if (err)
+ goto exit_free;
+ } else {
+ free(key);
+ key = NULL;
+ }
+
+ err = bpf_map_get_next_key(fd, key, nextkey);
+ if (err) {
+ err("can't get next key: %s\n", strerror(errno));
+ goto exit_free;
+ }
+
+ if (key) {
+ printf("key:\n");
+ fprint_hex(stdout, key, info.key_size, " ");
+ printf("\n");
+ } else {
+ printf("key: None\n");
+ }
+
+ printf("next key:\n");
+ fprint_hex(stdout, nextkey, info.key_size, " ");
+ printf("\n");
+
+exit_free:
+ free(nextkey);
+ free(key);
+ close(fd);
+
+ return err;
+}
+
+static int do_delete(int argc, char **argv)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ void *key;
+ int err;
+ int fd;
+
+ if (argc < 2)
+ usage();
+
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ if (fd < 0)
+ return -1;
+
+ key = malloc(info.key_size);
+ if (!key) {
+ err("mem alloc failed");
+ err = -1;
+ goto exit_free;
+ }
+
+ err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL);
+ if (err)
+ goto exit_free;
+
+ err = bpf_map_delete_elem(fd, key);
+ if (err)
+ err("delete failed: %s\n", strerror(errno));
+
+exit_free:
+ free(key);
+ close(fd);
+
+ return err;
+}
+
+static int do_pin(int argc, char **argv)
+{
+ return do_pin_any(argc, argv, bpf_map_get_fd_by_id);
+}
+
+static int do_help(int argc, char **argv)
+{
+ fprintf(stderr,
+ "Usage: %s %s show [MAP]\n"
+ " %s %s dump MAP\n"
+ " %s %s update MAP key BYTES value VALUE [UPDATE_FLAGS]\n"
+ " %s %s lookup MAP key BYTES\n"
+ " %s %s getnext MAP [key BYTES]\n"
+ " %s %s delete MAP key BYTES\n"
+ " %s %s pin MAP FILE\n"
+ " %s %s help\n"
+ "\n"
+ " MAP := { id MAP_ID | pinned FILE }\n"
+ " " HELP_SPEC_PROGRAM "\n"
+ " VALUE := { BYTES | MAP | PROG }\n"
+ " UPDATE_FLAGS := { any | exist | noexist }\n"
+ "",
+ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+ bin_name, argv[-2], bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "help", do_help },
+ { "dump", do_dump },
+ { "update", do_update },
+ { "lookup", do_lookup },
+ { "getnext", do_getnext },
+ { "delete", do_delete },
+ { "pin", do_pin },
+ { 0 }
+};
+
+int do_map(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
new file mode 100644
index 000000000000..7838206a455b
--- /dev/null
+++ b/tools/bpf/bpftool/prog.c
@@ -0,0 +1,496 @@
+/*
+ * Copyright (C) 2017 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Author: Jakub Kicinski <kubakici@wp.pl> */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <bpf.h>
+
+#include "main.h"
+#include "disasm.h"
+
+static const char * const prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = "unspec",
+ [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
+ [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
+ [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
+ [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
+ [BPF_PROG_TYPE_XDP] = "xdp",
+ [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
+ [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
+ [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
+ [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
+ [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
+ [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
+ [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
+ [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
+};
+
+static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
+{
+ struct timespec real_time_ts, boot_time_ts;
+ time_t wallclock_secs;
+ struct tm load_tm;
+
+ buf[--size] = '\0';
+
+ if (clock_gettime(CLOCK_REALTIME, &real_time_ts) ||
+ clock_gettime(CLOCK_BOOTTIME, &boot_time_ts)) {
+ perror("Can't read clocks");
+ snprintf(buf, size, "%llu", nsecs / 1000000000);
+ return;
+ }
+
+ wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) +
+ nsecs / 1000000000;
+
+ if (!localtime_r(&wallclock_secs, &load_tm)) {
+ snprintf(buf, size, "%llu", nsecs / 1000000000);
+ return;
+ }
+
+ strftime(buf, size, "%b %d/%H:%M", &load_tm);
+}
+
+static int prog_fd_by_tag(unsigned char *tag)
+{
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ unsigned int id = 0;
+ int err;
+ int fd;
+
+ while (true) {
+ err = bpf_prog_get_next_id(id, &id);
+ if (err) {
+ err("%s\n", strerror(errno));
+ return -1;
+ }
+
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0) {
+ err("can't get prog by id (%u): %s\n",
+ id, strerror(errno));
+ return -1;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ err("can't get prog info (%u): %s\n",
+ id, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ if (!memcmp(tag, info.tag, BPF_TAG_SIZE))
+ return fd;
+
+ close(fd);
+ }
+}
+
+int prog_parse_fd(int *argc, char ***argv)
+{
+ int fd;
+
+ if (is_prefix(**argv, "id")) {
+ unsigned int id;
+ char *endptr;
+
+ NEXT_ARGP();
+
+ id = strtoul(**argv, &endptr, 0);
+ if (*endptr) {
+ err("can't parse %s as ID\n", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0)
+ err("get by id (%u): %s\n", id, strerror(errno));
+ return fd;
+ } else if (is_prefix(**argv, "tag")) {
+ unsigned char tag[BPF_TAG_SIZE];
+
+ NEXT_ARGP();
+
+ if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2,
+ tag + 3, tag + 4, tag + 5, tag + 6, tag + 7)
+ != BPF_TAG_SIZE) {
+ err("can't parse tag\n");
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return prog_fd_by_tag(tag);
+ } else if (is_prefix(**argv, "pinned")) {
+ char *path;
+
+ NEXT_ARGP();
+
+ path = **argv;
+ NEXT_ARGP();
+
+ return open_obj_pinned_any(path, BPF_OBJ_PROG);
+ }
+
+ err("expected 'id', 'tag' or 'pinned', got: '%s'?\n", **argv);
+ return -1;
+}
+
+static void show_prog_maps(int fd, u32 num_maps)
+{
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ __u32 map_ids[num_maps];
+ unsigned int i;
+ int err;
+
+ info.nr_map_ids = num_maps;
+ info.map_ids = ptr_to_u64(map_ids);
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err || !info.nr_map_ids)
+ return;
+
+ printf(" map_ids ");
+ for (i = 0; i < info.nr_map_ids; i++)
+ printf("%u%s", map_ids[i],
+ i == info.nr_map_ids - 1 ? "" : ",");
+}
+
+static int show_prog(int fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ char *memlock;
+ int err;
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ err("can't get prog info: %s\n", strerror(errno));
+ return -1;
+ }
+
+ printf("%u: ", info.id);
+ if (info.type < ARRAY_SIZE(prog_type_name))
+ printf("%s ", prog_type_name[info.type]);
+ else
+ printf("type %u ", info.type);
+
+ if (*info.name)
+ printf("name %s ", info.name);
+
+ printf("tag ");
+ fprint_hex(stdout, info.tag, BPF_TAG_SIZE, "");
+ printf("\n");
+
+ if (info.load_time) {
+ char buf[32];
+
+ print_boot_time(info.load_time, buf, sizeof(buf));
+
+ /* Piggy back on load_time, since 0 uid is a valid one */
+ printf("\tloaded_at %s uid %u\n", buf, info.created_by_uid);
+ }
+
+ printf("\txlated %uB", info.xlated_prog_len);
+
+ if (info.jited_prog_len)
+ printf(" jited %uB", info.jited_prog_len);
+ else
+ printf(" not jited");
+
+ memlock = get_fdinfo(fd, "memlock");
+ if (memlock)
+ printf(" memlock %sB", memlock);
+ free(memlock);
+
+ if (info.nr_map_ids)
+ show_prog_maps(fd, info.nr_map_ids);
+
+ printf("\n");
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{ __u32 id = 0;
+ int err;
+ int fd;
+
+ if (argc == 2) {
+ fd = prog_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return -1;
+
+ return show_prog(fd);
+ }
+
+ if (argc)
+ return BAD_ARG();
+
+ while (true) {
+ err = bpf_prog_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT) {
+ err = 0;
+ break;
+ }
+ err("can't get next program: %s\n", strerror(errno));
+ if (errno == EINVAL)
+ err("kernel too old?\n");
+ return -1;
+ }
+
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0) {
+ err("can't get prog by id (%u): %s\n",
+ id, strerror(errno));
+ return -1;
+ }
+
+ err = show_prog(fd);
+ close(fd);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void print_insn(struct bpf_verifier_env *env, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+}
+
+static void dump_xlated(void *buf, unsigned int len, bool opcodes)
+{
+ struct bpf_insn *insn = buf;
+ bool double_insn = false;
+ unsigned int i;
+
+ for (i = 0; i < len / sizeof(*insn); i++) {
+ if (double_insn) {
+ double_insn = false;
+ continue;
+ }
+
+ double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+ printf("% 4d: ", i);
+ print_bpf_insn(print_insn, NULL, insn + i, true);
+
+ if (opcodes) {
+ printf(" ");
+ fprint_hex(stdout, insn + i, 8, " ");
+ if (double_insn && i < len - 1) {
+ printf(" ");
+ fprint_hex(stdout, insn + i + 1, 8, " ");
+ }
+ printf("\n");
+ }
+ }
+}
+
+static int do_dump(int argc, char **argv)
+{
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ unsigned int buf_size;
+ char *filepath = NULL;
+ bool opcodes = false;
+ unsigned char *buf;
+ __u32 *member_len;
+ __u64 *member_ptr;
+ ssize_t n;
+ int err;
+ int fd;
+
+ if (is_prefix(*argv, "jited")) {
+ member_len = &info.jited_prog_len;
+ member_ptr = &info.jited_prog_insns;
+ } else if (is_prefix(*argv, "xlated")) {
+ member_len = &info.xlated_prog_len;
+ member_ptr = &info.xlated_prog_insns;
+ } else {
+ err("expected 'xlated' or 'jited', got: %s\n", *argv);
+ return -1;
+ }
+ NEXT_ARG();
+
+ if (argc < 2)
+ usage();
+
+ fd = prog_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return -1;
+
+ if (is_prefix(*argv, "file")) {
+ NEXT_ARG();
+ if (!argc) {
+ err("expected file path\n");
+ return -1;
+ }
+
+ filepath = *argv;
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "opcodes")) {
+ opcodes = true;
+ NEXT_ARG();
+ }
+
+ if (argc) {
+ usage();
+ return -1;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ err("can't get prog info: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (!*member_len) {
+ info("no instructions returned\n");
+ close(fd);
+ return 0;
+ }
+
+ buf_size = *member_len;
+
+ buf = malloc(buf_size);
+ if (!buf) {
+ err("mem alloc failed\n");
+ close(fd);
+ return -1;
+ }
+
+ memset(&info, 0, sizeof(info));
+
+ *member_ptr = ptr_to_u64(buf);
+ *member_len = buf_size;
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ close(fd);
+ if (err) {
+ err("can't get prog info: %s\n", strerror(errno));
+ goto err_free;
+ }
+
+ if (*member_len > buf_size) {
+ err("too many instructions returned\n");
+ goto err_free;
+ }
+
+ if (filepath) {
+ fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+ if (fd < 0) {
+ err("can't open file %s: %s\n", filepath,
+ strerror(errno));
+ goto err_free;
+ }
+
+ n = write(fd, buf, *member_len);
+ close(fd);
+ if (n != *member_len) {
+ err("error writing output file: %s\n",
+ n < 0 ? strerror(errno) : "short write");
+ goto err_free;
+ }
+ } else {
+ if (member_len == &info.jited_prog_len)
+ disasm_print_insn(buf, *member_len, opcodes);
+ else
+ dump_xlated(buf, *member_len, opcodes);
+ }
+
+ free(buf);
+
+ return 0;
+
+err_free:
+ free(buf);
+ return -1;
+}
+
+static int do_pin(int argc, char **argv)
+{
+ return do_pin_any(argc, argv, bpf_prog_get_fd_by_id);
+}
+
+static int do_help(int argc, char **argv)
+{
+ fprintf(stderr,
+ "Usage: %s %s show [PROG]\n"
+ " %s %s dump xlated PROG [{ file FILE | opcodes }]\n"
+ " %s %s dump jited PROG [{ file FILE | opcodes }]\n"
+ " %s %s pin PROG FILE\n"
+ " %s %s help\n"
+ "\n"
+ " " HELP_SPEC_PROGRAM "\n"
+ "",
+ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+ bin_name, argv[-2], bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "help", do_help },
+ { "dump", do_dump },
+ { "pin", do_pin },
+ { 0 }
+};
+
+int do_prog(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 24b35a1fd4d6..850a5497dcc3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -92,6 +92,7 @@ enum bpf_cmd {
BPF_PROG_GET_FD_BY_ID,
BPF_MAP_GET_FD_BY_ID,
BPF_OBJ_GET_INFO_BY_FD,
+ BPF_PROG_QUERY,
};
enum bpf_map_type {
@@ -111,6 +112,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_HASH_OF_MAPS,
BPF_MAP_TYPE_DEVMAP,
BPF_MAP_TYPE_SOCKMAP,
+ BPF_MAP_TYPE_CPUMAP,
};
enum bpf_prog_type {
@@ -143,11 +145,47 @@ enum bpf_attach_type {
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
-/* If BPF_F_ALLOW_OVERRIDE flag is used in BPF_PROG_ATTACH command
- * to the given target_fd cgroup the descendent cgroup will be able to
- * override effective bpf program that was inherited from this cgroup
+/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
+ *
+ * NONE(default): No further bpf programs allowed in the subtree.
+ *
+ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
+ * the program in this cgroup yields to sub-cgroup program.
+ *
+ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
+ * that cgroup program gets run in addition to the program in this cgroup.
+ *
+ * Only one program is allowed to be attached to a cgroup with
+ * NONE or BPF_F_ALLOW_OVERRIDE flag.
+ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
+ * release old program and attach the new one. Attach flags has to match.
+ *
+ * Multiple programs are allowed to be attached to a cgroup with
+ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
+ * (those that were attached first, run first)
+ * The programs of sub-cgroup are executed first, then programs of
+ * this cgroup and then programs of parent cgroup.
+ * When children program makes decision (like picking TCP CA or sock bind)
+ * parent program has a chance to override it.
+ *
+ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
+ * A cgroup with NONE doesn't allow any programs in sub-cgroups.
+ * Ex1:
+ * cgrp1 (MULTI progs A, B) ->
+ * cgrp2 (OVERRIDE prog C) ->
+ * cgrp3 (MULTI prog D) ->
+ * cgrp4 (OVERRIDE prog E) ->
+ * cgrp5 (NONE prog F)
+ * the event in cgrp5 triggers execution of F,D,A,B in that order.
+ * if prog F is detached, the execution is E,D,A,B
+ * if prog F and D are detached, the execution is E,A,B
+ * if prog F, E and D are detached, the execution is C,A,B
+ *
+ * All eligible programs are executed regardless of return code from
+ * earlier programs.
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
+#define BPF_F_ALLOW_MULTI (1U << 1)
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will perform strict alignment checking as if the kernel
@@ -175,6 +213,11 @@ enum bpf_attach_type {
/* Specify numa node during map creation */
#define BPF_F_NUMA_NODE (1U << 2)
+/* flags for BPF_PROG_QUERY */
+#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+
+#define BPF_OBJ_NAME_LEN 16U
+
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
@@ -188,6 +231,7 @@ union bpf_attr {
__u32 numa_node; /* numa node (effective only if
* BPF_F_NUMA_NODE is set).
*/
+ char map_name[BPF_OBJ_NAME_LEN];
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@ -210,6 +254,7 @@ union bpf_attr {
__aligned_u64 log_buf; /* user supplied buffer */
__u32 kern_version; /* checked when prog_type=kprobe */
__u32 prog_flags;
+ char prog_name[BPF_OBJ_NAME_LEN];
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -249,6 +294,15 @@ union bpf_attr {
__u32 info_len;
__aligned_u64 info;
} info;
+
+ struct { /* anonymous struct used by BPF_PROG_QUERY command */
+ __u32 target_fd; /* container object to query */
+ __u32 attach_type;
+ __u32 query_flags;
+ __u32 attach_flags;
+ __aligned_u64 prog_ids;
+ __u32 prog_cnt;
+ } query;
} __attribute__((aligned(8)));
/* BPF helper function descriptions:
@@ -583,6 +637,12 @@ union bpf_attr {
* @map: pointer to sockmap to update
* @key: key to insert/update sock in map
* @flags: same flags as map update elem
+ *
+ * int bpf_xdp_adjust_meta(xdp_md, delta)
+ * Adjust the xdp_md.data_meta by delta
+ * @xdp_md: pointer to xdp_md
+ * @delta: An positive/negative integer to be added to xdp_md.data_meta
+ * Return: 0 on success or negative on error
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -639,6 +699,9 @@ union bpf_attr {
FN(redirect_map), \
FN(sk_redirect_map), \
FN(sock_map_update), \
+ FN(xdp_adjust_meta), \
+ FN(perf_event_read_value), \
+ FN(perf_prog_read_value),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -716,7 +779,7 @@ struct __sk_buff {
__u32 data_end;
__u32 napi_id;
- /* accessed by BPF_PROG_TYPE_sk_skb types */
+ /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
__u32 family;
__u32 remote_ip4; /* Stored in network byte order */
__u32 local_ip4; /* Stored in network byte order */
@@ -724,6 +787,9 @@ struct __sk_buff {
__u32 local_ip6[4]; /* Stored in network byte order */
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
+ /* ... here. */
+
+ __u32 data_meta;
};
struct bpf_tunnel_key {
@@ -784,6 +850,7 @@ enum xdp_action {
struct xdp_md {
__u32 data;
__u32 data_end;
+ __u32 data_meta;
};
enum sk_action {
@@ -802,6 +869,11 @@ struct bpf_prog_info {
__u32 xlated_prog_len;
__aligned_u64 jited_prog_insns;
__aligned_u64 xlated_prog_insns;
+ __u64 load_time; /* ns since boottime */
+ __u32 created_by_uid;
+ __u32 nr_map_ids;
+ __aligned_u64 map_ids;
+ char name[BPF_OBJ_NAME_LEN];
} __attribute__((aligned(8)));
struct bpf_map_info {
@@ -811,6 +883,7 @@ struct bpf_map_info {
__u32 value_size;
__u32 max_entries;
__u32 map_flags;
+ char name[BPF_OBJ_NAME_LEN];
} __attribute__((aligned(8)));
/* User bpf_sock_ops struct to access socket values and specify request ops
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 1d6907d379c9..5128677e4117 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -46,6 +46,8 @@
# endif
#endif
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
@@ -57,10 +59,11 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
return syscall(__NR_bpf, cmd, attr, size);
}
-int bpf_create_map_node(enum bpf_map_type map_type, int key_size,
- int value_size, int max_entries, __u32 map_flags,
- int node)
+int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size, int max_entries,
+ __u32 map_flags, int node)
{
+ __u32 name_len = name ? strlen(name) : 0;
union bpf_attr attr;
memset(&attr, '\0', sizeof(attr));
@@ -70,6 +73,8 @@ int bpf_create_map_node(enum bpf_map_type map_type, int key_size,
attr.value_size = value_size;
attr.max_entries = max_entries;
attr.map_flags = map_flags;
+ memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
+
if (node >= 0) {
attr.map_flags |= BPF_F_NUMA_NODE;
attr.numa_node = node;
@@ -81,14 +86,23 @@ int bpf_create_map_node(enum bpf_map_type map_type, int key_size,
int bpf_create_map(enum bpf_map_type map_type, int key_size,
int value_size, int max_entries, __u32 map_flags)
{
- return bpf_create_map_node(map_type, key_size, value_size,
+ return bpf_create_map_node(map_type, NULL, key_size, value_size,
max_entries, map_flags, -1);
}
-int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size,
- int inner_map_fd, int max_entries,
+int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size, int max_entries,
+ __u32 map_flags)
+{
+ return bpf_create_map_node(map_type, name, key_size, value_size,
+ max_entries, map_flags, -1);
+}
+
+int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int inner_map_fd, int max_entries,
__u32 map_flags, int node)
{
+ __u32 name_len = name ? strlen(name) : 0;
union bpf_attr attr;
memset(&attr, '\0', sizeof(attr));
@@ -99,6 +113,8 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size,
attr.inner_map_fd = inner_map_fd;
attr.max_entries = max_entries;
attr.map_flags = map_flags;
+ memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
+
if (node >= 0) {
attr.map_flags |= BPF_F_NUMA_NODE;
attr.numa_node = node;
@@ -107,19 +123,24 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size,
return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}
-int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
- int inner_map_fd, int max_entries, __u32 map_flags)
+int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
+ int key_size, int inner_map_fd, int max_entries,
+ __u32 map_flags)
{
- return bpf_create_map_in_map_node(map_type, key_size, inner_map_fd,
- max_entries, map_flags, -1);
+ return bpf_create_map_in_map_node(map_type, name, key_size,
+ inner_map_fd, max_entries, map_flags,
+ -1);
}
-int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
- size_t insns_cnt, const char *license,
- __u32 kern_version, char *log_buf, size_t log_buf_sz)
+int bpf_load_program_name(enum bpf_prog_type type, const char *name,
+ const struct bpf_insn *insns,
+ size_t insns_cnt, const char *license,
+ __u32 kern_version, char *log_buf,
+ size_t log_buf_sz)
{
int fd;
union bpf_attr attr;
+ __u32 name_len = name ? strlen(name) : 0;
bzero(&attr, sizeof(attr));
attr.prog_type = type;
@@ -130,6 +151,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
attr.log_size = 0;
attr.log_level = 0;
attr.kern_version = kern_version;
+ memcpy(attr.prog_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
if (fd >= 0 || !log_buf || !log_buf_sz)
@@ -143,6 +165,15 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
+int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, const char *license,
+ __u32 kern_version, char *log_buf,
+ size_t log_buf_sz)
+{
+ return bpf_load_program_name(type, NULL, insns, insns_cnt, license,
+ kern_version, log_buf, log_buf_sz);
+}
+
int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
size_t insns_cnt, int strict_alignment,
const char *license, __u32 kern_version,
@@ -260,6 +291,38 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
}
+int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.target_fd = target_fd;
+ attr.attach_bpf_fd = prog_fd;
+ attr.attach_type = type;
+
+ return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
+ __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
+{
+ union bpf_attr attr;
+ int ret;
+
+ bzero(&attr, sizeof(attr));
+ attr.query.target_fd = target_fd;
+ attr.query.attach_type = type;
+ attr.query.query_flags = query_flags;
+ attr.query.prog_cnt = *prog_cnt;
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+
+ ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
+ if (attach_flags)
+ *attach_flags = attr.query.attach_flags;
+ *prog_cnt = attr.query.prog_cnt;
+ return ret;
+}
+
int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
void *data_out, __u32 *size_out, __u32 *retval,
__u32 *duration)
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index b8ea5843c39e..6534889e2b2f 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -24,19 +24,28 @@
#include <linux/bpf.h>
#include <stddef.h>
-int bpf_create_map_node(enum bpf_map_type map_type, int key_size,
- int value_size, int max_entries, __u32 map_flags,
- int node);
+int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size, int max_entries,
+ __u32 map_flags, int node);
+int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size, int max_entries,
+ __u32 map_flags);
int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
int max_entries, __u32 map_flags);
-int bpf_create_map_in_map_node(enum bpf_map_type map_type, int key_size,
- int inner_map_fd, int max_entries,
+int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int inner_map_fd, int max_entries,
__u32 map_flags, int node);
-int bpf_create_map_in_map(enum bpf_map_type map_type, int key_size,
- int inner_map_fd, int max_entries, __u32 map_flags);
+int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
+ int key_size, int inner_map_fd, int max_entries,
+ __u32 map_flags);
/* Recommend log buffer size */
#define BPF_LOG_BUF_SIZE 65536
+int bpf_load_program_name(enum bpf_prog_type type, const char *name,
+ const struct bpf_insn *insns,
+ size_t insns_cnt, const char *license,
+ __u32 kern_version, char *log_buf,
+ size_t log_buf_sz);
int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
size_t insns_cnt, const char *license,
__u32 kern_version, char *log_buf,
@@ -57,6 +66,7 @@ int bpf_obj_get(const char *pathname);
int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
unsigned int flags);
int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
+int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type);
int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
void *data_out, __u32 *size_out, __u32 *retval,
__u32 *duration);
@@ -65,5 +75,6 @@ int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
int bpf_prog_get_fd_by_id(__u32 id);
int bpf_map_get_fd_by_id(__u32 id);
int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
-
+int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
+ __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 35f6dfcdc565..5aa45f89da93 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -171,6 +171,7 @@ int libbpf_strerror(int err, char *buf, size_t size)
struct bpf_program {
/* Index in elf obj file, for relocation use. */
int idx;
+ char *name;
char *section_name;
struct bpf_insn *insns;
size_t insns_cnt;
@@ -283,6 +284,7 @@ static void bpf_program__exit(struct bpf_program *prog)
prog->clear_priv = NULL;
bpf_program__unload(prog);
+ zfree(&prog->name);
zfree(&prog->section_name);
zfree(&prog->insns);
zfree(&prog->reloc_desc);
@@ -293,26 +295,27 @@ static void bpf_program__exit(struct bpf_program *prog)
}
static int
-bpf_program__init(void *data, size_t size, char *name, int idx,
- struct bpf_program *prog)
+bpf_program__init(void *data, size_t size, char *section_name, int idx,
+ struct bpf_program *prog)
{
if (size < sizeof(struct bpf_insn)) {
- pr_warning("corrupted section '%s'\n", name);
+ pr_warning("corrupted section '%s'\n", section_name);
return -EINVAL;
}
bzero(prog, sizeof(*prog));
- prog->section_name = strdup(name);
+ prog->section_name = strdup(section_name);
if (!prog->section_name) {
- pr_warning("failed to alloc name for prog %s\n",
- name);
+ pr_warning("failed to alloc name for prog under section %s\n",
+ section_name);
goto errout;
}
prog->insns = malloc(size);
if (!prog->insns) {
- pr_warning("failed to alloc insns for %s\n", name);
+ pr_warning("failed to alloc insns for prog under section %s\n",
+ section_name);
goto errout;
}
prog->insns_cnt = size / sizeof(struct bpf_insn);
@@ -331,12 +334,12 @@ errout:
static int
bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
- char *name, int idx)
+ char *section_name, int idx)
{
struct bpf_program prog, *progs;
int nr_progs, err;
- err = bpf_program__init(data, size, name, idx, &prog);
+ err = bpf_program__init(data, size, section_name, idx, &prog);
if (err)
return err;
@@ -350,8 +353,8 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
* is still valid, so don't need special treat for
* bpf_close_object().
*/
- pr_warning("failed to alloc a new program '%s'\n",
- name);
+ pr_warning("failed to alloc a new program under section '%s'\n",
+ section_name);
bpf_program__exit(&prog);
return -ENOMEM;
}
@@ -364,6 +367,54 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
return 0;
}
+static int
+bpf_object__init_prog_names(struct bpf_object *obj)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ struct bpf_program *prog;
+ size_t pi, si;
+
+ for (pi = 0; pi < obj->nr_programs; pi++) {
+ char *name = NULL;
+
+ prog = &obj->programs[pi];
+
+ for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
+ si++) {
+ GElf_Sym sym;
+
+ if (!gelf_getsym(symbols, si, &sym))
+ continue;
+ if (sym.st_shndx != prog->idx)
+ continue;
+
+ name = elf_strptr(obj->efile.elf,
+ obj->efile.strtabidx,
+ sym.st_name);
+ if (!name) {
+ pr_warning("failed to get sym name string for prog %s\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__LIBELF;
+ }
+ }
+
+ if (!name) {
+ pr_warning("failed to find sym for prog %s\n",
+ prog->section_name);
+ return -EINVAL;
+ }
+
+ prog->name = strdup(name);
+ if (!prog->name) {
+ pr_warning("failed to allocate memory for prog sym %s\n",
+ name);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
static struct bpf_object *bpf_object__new(const char *path,
void *obj_buf,
size_t obj_buf_sz)
@@ -528,31 +579,6 @@ bpf_object__init_kversion(struct bpf_object *obj,
return 0;
}
-static int
-bpf_object__validate_maps(struct bpf_object *obj)
-{
- int i;
-
- /*
- * If there's only 1 map, the only error case should have been
- * catched in bpf_object__init_maps().
- */
- if (!obj->maps || !obj->nr_maps || (obj->nr_maps == 1))
- return 0;
-
- for (i = 1; i < obj->nr_maps; i++) {
- const struct bpf_map *a = &obj->maps[i - 1];
- const struct bpf_map *b = &obj->maps[i];
-
- if (b->offset - a->offset < sizeof(struct bpf_map_def)) {
- pr_warning("corrupted map section in %s: map \"%s\" too small\n",
- obj->path, a->name);
- return -EINVAL;
- }
- }
- return 0;
-}
-
static int compare_bpf_map(const void *_a, const void *_b)
{
const struct bpf_map *a = _a;
@@ -564,7 +590,7 @@ static int compare_bpf_map(const void *_a, const void *_b)
static int
bpf_object__init_maps(struct bpf_object *obj)
{
- int i, map_idx, nr_maps = 0;
+ int i, map_idx, map_def_sz, nr_maps = 0;
Elf_Scn *scn;
Elf_Data *data;
Elf_Data *symbols = obj->efile.symbols;
@@ -607,6 +633,15 @@ bpf_object__init_maps(struct bpf_object *obj)
if (!nr_maps)
return 0;
+ /* Assume equally sized map definitions */
+ map_def_sz = data->d_size / nr_maps;
+ if (!data->d_size || (data->d_size % nr_maps) != 0) {
+ pr_warning("unable to determine map definition size "
+ "section %s, %d maps in %zd bytes\n",
+ obj->path, nr_maps, data->d_size);
+ return -EINVAL;
+ }
+
obj->maps = calloc(nr_maps, sizeof(obj->maps[0]));
if (!obj->maps) {
pr_warning("alloc maps for object failed\n");
@@ -639,7 +674,7 @@ bpf_object__init_maps(struct bpf_object *obj)
obj->efile.strtabidx,
sym.st_name);
obj->maps[map_idx].offset = sym.st_value;
- if (sym.st_value + sizeof(struct bpf_map_def) > data->d_size) {
+ if (sym.st_value + map_def_sz > data->d_size) {
pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",
obj->path, map_name);
return -EINVAL;
@@ -653,12 +688,40 @@ bpf_object__init_maps(struct bpf_object *obj)
pr_debug("map %d is \"%s\"\n", map_idx,
obj->maps[map_idx].name);
def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
- obj->maps[map_idx].def = *def;
+ /*
+ * If the definition of the map in the object file fits in
+ * bpf_map_def, copy it. Any extra fields in our version
+ * of bpf_map_def will default to zero as a result of the
+ * calloc above.
+ */
+ if (map_def_sz <= sizeof(struct bpf_map_def)) {
+ memcpy(&obj->maps[map_idx].def, def, map_def_sz);
+ } else {
+ /*
+ * Here the map structure being read is bigger than what
+ * we expect, truncate if the excess bits are all zero.
+ * If they are not zero, reject this map as
+ * incompatible.
+ */
+ char *b;
+ for (b = ((char *)def) + sizeof(struct bpf_map_def);
+ b < ((char *)def) + map_def_sz; b++) {
+ if (*b != 0) {
+ pr_warning("maps section in %s: \"%s\" "
+ "has unrecognized, non-zero "
+ "options\n",
+ obj->path, map_name);
+ return -EINVAL;
+ }
+ }
+ memcpy(&obj->maps[map_idx].def, def,
+ sizeof(struct bpf_map_def));
+ }
map_idx++;
}
qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map);
- return bpf_object__validate_maps(obj);
+ return 0;
}
static int bpf_object__elf_collect(struct bpf_object *obj)
@@ -766,8 +829,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
pr_warning("Corrupted ELF file: index of strtab invalid\n");
return LIBBPF_ERRNO__FORMAT;
}
- if (obj->efile.maps_shndx >= 0)
+ if (obj->efile.maps_shndx >= 0) {
err = bpf_object__init_maps(obj);
+ if (err)
+ goto out;
+ }
+ err = bpf_object__init_prog_names(obj);
out:
return err;
}
@@ -870,11 +937,12 @@ bpf_object__create_maps(struct bpf_object *obj)
struct bpf_map_def *def = &obj->maps[i].def;
int *pfd = &obj->maps[i].fd;
- *pfd = bpf_create_map(def->type,
- def->key_size,
- def->value_size,
- def->max_entries,
- 0);
+ *pfd = bpf_create_map_name(def->type,
+ obj->maps[i].name,
+ def->key_size,
+ def->value_size,
+ def->max_entries,
+ def->map_flags);
if (*pfd < 0) {
size_t j;
int err = *pfd;
@@ -982,7 +1050,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
}
static int
-load_program(enum bpf_prog_type type, struct bpf_insn *insns,
+load_program(enum bpf_prog_type type, const char *name, struct bpf_insn *insns,
int insns_cnt, char *license, u32 kern_version, int *pfd)
{
int ret;
@@ -995,8 +1063,8 @@ load_program(enum bpf_prog_type type, struct bpf_insn *insns,
if (!log_buf)
pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
- ret = bpf_load_program(type, insns, insns_cnt, license,
- kern_version, log_buf, BPF_LOG_BUF_SIZE);
+ ret = bpf_load_program_name(type, name, insns, insns_cnt, license,
+ kern_version, log_buf, BPF_LOG_BUF_SIZE);
if (ret >= 0) {
*pfd = ret;
@@ -1021,9 +1089,9 @@ load_program(enum bpf_prog_type type, struct bpf_insn *insns,
if (type != BPF_PROG_TYPE_KPROBE) {
int fd;
- fd = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
- insns_cnt, license, kern_version,
- NULL, 0);
+ fd = bpf_load_program_name(BPF_PROG_TYPE_KPROBE, name,
+ insns, insns_cnt, license,
+ kern_version, NULL, 0);
if (fd >= 0) {
close(fd);
ret = -LIBBPF_ERRNO__PROGTYPE;
@@ -1067,8 +1135,8 @@ bpf_program__load(struct bpf_program *prog,
pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
prog->section_name, prog->instances.nr);
}
- err = load_program(prog->type, prog->insns, prog->insns_cnt,
- license, kern_version, &fd);
+ err = load_program(prog->type, prog->name, prog->insns,
+ prog->insns_cnt, license, kern_version, &fd);
if (!err)
prog->instances.fds[0] = fd;
goto out;
@@ -1096,7 +1164,8 @@ bpf_program__load(struct bpf_program *prog,
continue;
}
- err = load_program(prog->type, result.new_insn_ptr,
+ err = load_program(prog->type, prog->name,
+ result.new_insn_ptr,
result.new_insn_cnt,
license, kern_version, &fd);
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 7959086eb9c9..6e20003109e0 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -207,6 +207,7 @@ struct bpf_map_def {
unsigned int key_size;
unsigned int value_size;
unsigned int max_entries;
+ unsigned int map_flags;
};
/*
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f4b23d697448..2e7880ea0add 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -12,12 +12,13 @@ CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../i
LDLIBS += -lcap -lelf
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
- test_align
+ test_align test_verifier_log
TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
- test_pkt_md_access.o test_xdp_redirect.o sockmap_parse_prog.o sockmap_verdict_prog.o
+ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
+ sockmap_verdict_prog.o
-TEST_PROGS := test_kmod.sh test_xdp_redirect.sh
+TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh
include ../lib.mk
@@ -34,8 +35,20 @@ $(BPFOBJ): force
$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
CLANG ?= clang
+LLC ?= llc
+
+PROBE := $(shell llc -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+
+# Let newer LLVM versions transparently probe the kernel for availability
+# of full BPF instruction set.
+ifeq ($(PROBE),)
+ CPU ?= probe
+else
+ CPU ?= generic
+endif
%.o: %.c
$(CLANG) -I. -I./include/uapi -I../../../include/uapi \
- -Wno-compare-distinct-pointer-types \
- -O2 -target bpf -c $< -o $@
+ -Wno-compare-distinct-pointer-types \
+ -O2 -target bpf -emit-llvm -c $< -o - | \
+ $(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
index b2e02bdcd098..abfa4c5c8527 100644
--- a/tools/testing/selftests/bpf/bpf_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -62,14 +62,25 @@ static unsigned long long (*bpf_get_prandom_u32)(void) =
(void *) BPF_FUNC_get_prandom_u32;
static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
(void *) BPF_FUNC_xdp_adjust_head;
+static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) =
+ (void *) BPF_FUNC_xdp_adjust_meta;
static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
int optlen) =
(void *) BPF_FUNC_setsockopt;
+static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval,
+ int optlen) =
+ (void *) BPF_FUNC_getsockopt;
static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
(void *) BPF_FUNC_sk_redirect_map;
static int (*bpf_sock_map_update)(void *map, void *key, void *value,
unsigned long long flags) =
(void *) BPF_FUNC_sock_map_update;
+static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
+ void *buf, unsigned int buf_size) =
+ (void *) BPF_FUNC_perf_event_read_value;
+static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
+ unsigned int buf_size) =
+ (void *) BPF_FUNC_perf_prog_read_value;
/* llvm builtin functions that eBPF C program may use to
@@ -109,7 +120,47 @@ static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
static int (*bpf_skb_change_head)(void *, int len, int flags) =
(void *) BPF_FUNC_skb_change_head;
+/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
+#if defined(__TARGET_ARCH_x86)
+ #define bpf_target_x86
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_s930x)
+ #define bpf_target_s930x
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm64)
+ #define bpf_target_arm64
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_mips)
+ #define bpf_target_mips
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_powerpc)
+ #define bpf_target_powerpc
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_sparc)
+ #define bpf_target_sparc
+ #define bpf_target_defined
+#else
+ #undef bpf_target_defined
+#endif
+
+/* Fall back to what the compiler says */
+#ifndef bpf_target_defined
#if defined(__x86_64__)
+ #define bpf_target_x86
+#elif defined(__s390x__)
+ #define bpf_target_s930x
+#elif defined(__aarch64__)
+ #define bpf_target_arm64
+#elif defined(__mips__)
+ #define bpf_target_mips
+#elif defined(__powerpc__)
+ #define bpf_target_powerpc
+#elif defined(__sparc__)
+ #define bpf_target_sparc
+#endif
+#endif
+
+#if defined(bpf_target_x86)
#define PT_REGS_PARM1(x) ((x)->di)
#define PT_REGS_PARM2(x) ((x)->si)
@@ -122,7 +173,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) =
#define PT_REGS_SP(x) ((x)->sp)
#define PT_REGS_IP(x) ((x)->ip)
-#elif defined(__s390x__)
+#elif defined(bpf_target_s390x)
#define PT_REGS_PARM1(x) ((x)->gprs[2])
#define PT_REGS_PARM2(x) ((x)->gprs[3])
@@ -135,7 +186,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) =
#define PT_REGS_SP(x) ((x)->gprs[15])
#define PT_REGS_IP(x) ((x)->psw.addr)
-#elif defined(__aarch64__)
+#elif defined(bpf_target_arm64)
#define PT_REGS_PARM1(x) ((x)->regs[0])
#define PT_REGS_PARM2(x) ((x)->regs[1])
@@ -148,7 +199,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) =
#define PT_REGS_SP(x) ((x)->sp)
#define PT_REGS_IP(x) ((x)->pc)
-#elif defined(__mips__)
+#elif defined(bpf_target_mips)
#define PT_REGS_PARM1(x) ((x)->regs[4])
#define PT_REGS_PARM2(x) ((x)->regs[5])
@@ -161,7 +212,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) =
#define PT_REGS_SP(x) ((x)->regs[29])
#define PT_REGS_IP(x) ((x)->cp0_epc)
-#elif defined(__powerpc__)
+#elif defined(bpf_target_powerpc)
#define PT_REGS_PARM1(x) ((x)->gpr[3])
#define PT_REGS_PARM2(x) ((x)->gpr[4])
@@ -172,7 +223,7 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) =
#define PT_REGS_SP(x) ((x)->sp)
#define PT_REGS_IP(x) ((x)->nip)
-#elif defined(__sparc__)
+#elif defined(bpf_target_sparc)
#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
@@ -182,6 +233,8 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) =
#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
+
+/* Should this also be a bpf_target check for the sparc case? */
#if defined(__arch64__)
#define PT_REGS_IP(x) ((x)->tpc)
#else
@@ -190,10 +243,10 @@ static int (*bpf_skb_change_head)(void *, int len, int flags) =
#endif
-#ifdef __powerpc__
+#ifdef bpf_target_powerpc
#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; })
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
-#elif defined(__sparc__)
+#elif bpf_target_sparc
#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); })
#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
#else
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index e97565243d59..6fedb9fec36b 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -31,6 +31,10 @@ struct tlpm_node {
uint8_t key[];
};
+static struct tlpm_node *tlpm_match(struct tlpm_node *list,
+ const uint8_t *key,
+ size_t n_bits);
+
static struct tlpm_node *tlpm_add(struct tlpm_node *list,
const uint8_t *key,
size_t n_bits)
@@ -38,9 +42,17 @@ static struct tlpm_node *tlpm_add(struct tlpm_node *list,
struct tlpm_node *node;
size_t n;
+ n = (n_bits + 7) / 8;
+
+ /* 'overwrite' an equivalent entry if one already exists */
+ node = tlpm_match(list, key, n_bits);
+ if (node && node->n_bits == n_bits) {
+ memcpy(node->key, key, n);
+ return list;
+ }
+
/* add new entry with @key/@n_bits to @list and return new head */
- n = (n_bits + 7) / 8;
node = malloc(sizeof(*node) + n);
assert(node);
@@ -92,6 +104,34 @@ static struct tlpm_node *tlpm_match(struct tlpm_node *list,
return best;
}
+static struct tlpm_node *tlpm_delete(struct tlpm_node *list,
+ const uint8_t *key,
+ size_t n_bits)
+{
+ struct tlpm_node *best = tlpm_match(list, key, n_bits);
+ struct tlpm_node *node;
+
+ if (!best || best->n_bits != n_bits)
+ return list;
+
+ if (best == list) {
+ node = best->next;
+ free(best);
+ return node;
+ }
+
+ for (node = list; node; node = node->next) {
+ if (node->next == best) {
+ node->next = best->next;
+ free(best);
+ return list;
+ }
+ }
+ /* should never get here */
+ assert(0);
+ return list;
+}
+
static void test_lpm_basic(void)
{
struct tlpm_node *list = NULL, *t1, *t2;
@@ -114,6 +154,13 @@ static void test_lpm_basic(void)
assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15));
assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16));
+ list = tlpm_delete(list, (uint8_t[]){ 0xff, 0xff }, 16);
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+
+ list = tlpm_delete(list, (uint8_t[]){ 0xff }, 8);
+ assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+
tlpm_clear(list);
}
@@ -158,7 +205,7 @@ static void test_lpm_order(void)
static void test_lpm_map(int keysize)
{
- size_t i, j, n_matches, n_nodes, n_lookups;
+ size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
struct tlpm_node *t, *list = NULL;
struct bpf_lpm_trie_key *key;
uint8_t *data, *value;
@@ -170,6 +217,7 @@ static void test_lpm_map(int keysize)
*/
n_matches = 0;
+ n_matches_after_delete = 0;
n_nodes = 1 << 8;
n_lookups = 1 << 16;
@@ -223,15 +271,54 @@ static void test_lpm_map(int keysize)
}
}
+ /* Remove the first half of the elements in the tlpm and the
+ * corresponding nodes from the bpf-lpm. Then run the same
+ * large number of random lookups in both and make sure they match.
+ * Note: we need to count the number of nodes actually inserted
+ * since there may have been duplicates.
+ */
+ for (i = 0, t = list; t; i++, t = t->next)
+ ;
+ for (j = 0; j < i / 2; ++j) {
+ key->prefixlen = list->n_bits;
+ memcpy(key->data, list->key, keysize);
+ r = bpf_map_delete_elem(map, key);
+ assert(!r);
+ list = tlpm_delete(list, list->key, list->n_bits);
+ assert(list);
+ }
+ for (i = 0; i < n_lookups; ++i) {
+ for (j = 0; j < keysize; ++j)
+ data[j] = rand() & 0xff;
+
+ t = tlpm_match(list, data, 8 * keysize);
+
+ key->prefixlen = 8 * keysize;
+ memcpy(key->data, data, keysize);
+ r = bpf_map_lookup_elem(map, key, value);
+ assert(!r || errno == ENOENT);
+ assert(!t == !!r);
+
+ if (t) {
+ ++n_matches_after_delete;
+ assert(t->n_bits == value[keysize]);
+ for (j = 0; j < t->n_bits; ++j)
+ assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
+ (value[j / 8] & (1 << (7 - j % 8))));
+ }
+ }
+
close(map);
tlpm_clear(list);
/* With 255 random nodes in the map, we are pretty likely to match
* something on every lookup. For statistics, use this:
*
- * printf(" nodes: %zu\n"
- * "lookups: %zu\n"
- * "matches: %zu\n", n_nodes, n_lookups, n_matches);
+ * printf(" nodes: %zu\n"
+ * " lookups: %zu\n"
+ * " matches: %zu\n"
+ * "matches(delete): %zu\n",
+ * n_nodes, n_lookups, n_matches, n_matches_after_delete);
*/
}
@@ -331,6 +418,108 @@ static void test_lpm_ipaddr(void)
close(map_fd_ipv6);
}
+static void test_lpm_delete(void)
+{
+ struct bpf_lpm_trie_key *key;
+ size_t key_size;
+ int map_fd;
+ __u64 value;
+
+ key_size = sizeof(*key) + sizeof(__u32);
+ key = alloca(key_size);
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ key_size, sizeof(value),
+ 100, BPF_F_NO_PREALLOC);
+ assert(map_fd >= 0);
+
+ /* Add nodes:
+ * 192.168.0.0/16 (1)
+ * 192.168.0.0/24 (2)
+ * 192.168.128.0/24 (3)
+ * 192.168.1.0/24 (4)
+ *
+ * (1)
+ * / \
+ * (IM) (3)
+ * / \
+ * (2) (4)
+ */
+ value = 1;
+ key->prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", key->data);
+ assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+ value = 2;
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", key->data);
+ assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+ value = 3;
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", key->data);
+ assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+ value = 4;
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", key->data);
+ assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+ /* remove non-existent node */
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "10.0.0.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
+ errno == ENOENT);
+
+ /* assert initial lookup */
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.0.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+ assert(value == 2);
+
+ /* remove leaf node */
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.0.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+ assert(value == 1);
+
+ /* remove leaf (and intermediary) node */
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.1.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+ assert(value == 1);
+
+ /* remove root node */
+ key->prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.128.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+ assert(value == 3);
+
+ /* remove last node */
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.128.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
+ errno == ENOENT);
+
+ close(map_fd);
+}
+
int main(void)
{
struct rlimit limit = { RLIM_INFINITY, RLIM_INFINITY };
@@ -353,6 +542,8 @@ int main(void)
test_lpm_ipaddr();
+ test_lpm_delete();
+
printf("test_lpm: OK\n");
return 0;
}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 50ce52d2013d..057da0cba517 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1043,6 +1043,51 @@ static void test_map_parallel(void)
assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
}
+static void test_map_rdonly(void)
+{
+ int i, fd, key = 0, value = 0;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE, map_flags | BPF_F_RDONLY);
+ if (fd < 0) {
+ printf("Failed to create map for read only test '%s'!\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* Insert key=1 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
+ errno == EPERM);
+
+ /* Check that key=2 is not found. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+}
+
+static void test_map_wronly(void)
+{
+ int i, fd, key = 0, value = 0;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE, map_flags | BPF_F_WRONLY);
+ if (fd < 0) {
+ printf("Failed to create map for read only test '%s'!\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* Insert key=1 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0)
+
+ /* Check that key=2 is not found. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
+ assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+}
+
static void run_all_tests(void)
{
test_hashmap(0, NULL);
@@ -1060,6 +1105,9 @@ static void run_all_tests(void)
test_map_large();
test_map_parallel();
test_map_stress();
+
+ test_map_rdonly();
+ test_map_wronly();
}
int main(void)
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 11ee25cea227..69427531408d 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -10,6 +10,7 @@
#include <string.h>
#include <assert.h>
#include <stdlib.h>
+#include <time.h>
#include <linux/types.h>
typedef __u16 __sum16;
@@ -19,6 +20,8 @@ typedef __u16 __sum16;
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
#include <sys/wait.h>
#include <sys/resource.h>
@@ -273,16 +276,26 @@ static void test_bpf_obj_id(void)
const int nr_iters = 2;
const char *file = "./test_obj_id.o";
const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+ const char *expected_prog_name = "test_obj_id";
+ const char *expected_map_name = "test_map_id";
+ const __u64 nsec_per_sec = 1000000000;
struct bpf_object *objs[nr_iters];
int prog_fds[nr_iters], map_fds[nr_iters];
/* +1 to test for the info_len returned by kernel */
struct bpf_prog_info prog_infos[nr_iters + 1];
struct bpf_map_info map_infos[nr_iters + 1];
+ /* Each prog only uses one map. +1 to test nr_map_ids
+ * returned by kernel.
+ */
+ __u32 map_ids[nr_iters + 1];
char jited_insns[128], xlated_insns[128], zeros[128];
__u32 i, next_id, info_len, nr_id_found, duration = 0;
+ struct timespec real_time_ts, boot_time_ts;
int sysctl_fd, jit_enabled = 0, err = 0;
__u64 array_value;
+ uid_t my_uid = getuid();
+ time_t now, load_time;
sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
if (sysctl_fd != -1) {
@@ -307,6 +320,7 @@ static void test_bpf_obj_id(void)
/* Check bpf_obj_get_info_by_fd() */
bzero(zeros, sizeof(zeros));
for (i = 0; i < nr_iters; i++) {
+ now = time(NULL);
err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER,
&objs[i], &prog_fds[i]);
/* test_obj_id.o is a dumb prog. It should never fail
@@ -316,6 +330,38 @@ static void test_bpf_obj_id(void)
error_cnt++;
assert(!err);
+ /* Insert a magic value to the map */
+ map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
+ assert(map_fds[i] >= 0);
+ err = bpf_map_update_elem(map_fds[i], &array_key,
+ &array_magic_value, 0);
+ assert(!err);
+
+ /* Check getting map info */
+ info_len = sizeof(struct bpf_map_info) * 2;
+ bzero(&map_infos[i], info_len);
+ err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i],
+ &info_len);
+ if (CHECK(err ||
+ map_infos[i].type != BPF_MAP_TYPE_ARRAY ||
+ map_infos[i].key_size != sizeof(__u32) ||
+ map_infos[i].value_size != sizeof(__u64) ||
+ map_infos[i].max_entries != 1 ||
+ map_infos[i].map_flags != 0 ||
+ info_len != sizeof(struct bpf_map_info) ||
+ strcmp((char *)map_infos[i].name, expected_map_name),
+ "get-map-info(fd)",
+ "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
+ err, errno,
+ map_infos[i].type, BPF_MAP_TYPE_ARRAY,
+ info_len, sizeof(struct bpf_map_info),
+ map_infos[i].key_size,
+ map_infos[i].value_size,
+ map_infos[i].max_entries,
+ map_infos[i].map_flags,
+ map_infos[i].name, expected_map_name))
+ goto done;
+
/* Check getting prog info */
info_len = sizeof(struct bpf_prog_info) * 2;
bzero(&prog_infos[i], info_len);
@@ -325,8 +371,16 @@ static void test_bpf_obj_id(void)
prog_infos[i].jited_prog_len = sizeof(jited_insns);
prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns);
prog_infos[i].xlated_prog_len = sizeof(xlated_insns);
+ prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
+ prog_infos[i].nr_map_ids = 2;
+ err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
+ assert(!err);
+ err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
+ assert(!err);
err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i],
&info_len);
+ load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
+ + (prog_infos[i].load_time / nsec_per_sec);
if (CHECK(err ||
prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
info_len != sizeof(struct bpf_prog_info) ||
@@ -334,9 +388,14 @@ static void test_bpf_obj_id(void)
(jit_enabled &&
!memcmp(jited_insns, zeros, sizeof(zeros))) ||
!prog_infos[i].xlated_prog_len ||
- !memcmp(xlated_insns, zeros, sizeof(zeros)),
+ !memcmp(xlated_insns, zeros, sizeof(zeros)) ||
+ load_time < now - 60 || load_time > now + 60 ||
+ prog_infos[i].created_by_uid != my_uid ||
+ prog_infos[i].nr_map_ids != 1 ||
+ *(int *)prog_infos[i].map_ids != map_infos[i].id ||
+ strcmp((char *)prog_infos[i].name, expected_prog_name),
"get-prog-info(fd)",
- "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d\n",
+ "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
err, errno, i,
prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
info_len, sizeof(struct bpf_prog_info),
@@ -344,36 +403,12 @@ static void test_bpf_obj_id(void)
prog_infos[i].jited_prog_len,
prog_infos[i].xlated_prog_len,
!!memcmp(jited_insns, zeros, sizeof(zeros)),
- !!memcmp(xlated_insns, zeros, sizeof(zeros))))
- goto done;
-
- map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
- assert(map_fds[i] >= 0);
- err = bpf_map_update_elem(map_fds[i], &array_key,
- &array_magic_value, 0);
- assert(!err);
-
- /* Check getting map info */
- info_len = sizeof(struct bpf_map_info) * 2;
- bzero(&map_infos[i], info_len);
- err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i],
- &info_len);
- if (CHECK(err ||
- map_infos[i].type != BPF_MAP_TYPE_ARRAY ||
- map_infos[i].key_size != sizeof(__u32) ||
- map_infos[i].value_size != sizeof(__u64) ||
- map_infos[i].max_entries != 1 ||
- map_infos[i].map_flags != 0 ||
- info_len != sizeof(struct bpf_map_info),
- "get-map-info(fd)",
- "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X\n",
- err, errno,
- map_infos[i].type, BPF_MAP_TYPE_ARRAY,
- info_len, sizeof(struct bpf_map_info),
- map_infos[i].key_size,
- map_infos[i].value_size,
- map_infos[i].max_entries,
- map_infos[i].map_flags))
+ !!memcmp(xlated_insns, zeros, sizeof(zeros)),
+ load_time, now,
+ prog_infos[i].created_by_uid, my_uid,
+ prog_infos[i].nr_map_ids, 1,
+ *(int *)prog_infos[i].map_ids, map_infos[i].id,
+ prog_infos[i].name, expected_prog_name))
goto done;
}
@@ -382,6 +417,7 @@ static void test_bpf_obj_id(void)
next_id = 0;
while (!bpf_prog_get_next_id(next_id, &next_id)) {
struct bpf_prog_info prog_info = {};
+ __u32 saved_map_id;
int prog_fd;
info_len = sizeof(prog_info);
@@ -404,16 +440,33 @@ static void test_bpf_obj_id(void)
nr_id_found++;
+ /* Negative test:
+ * prog_info.nr_map_ids = 1
+ * prog_info.map_ids = NULL
+ */
+ prog_info.nr_map_ids = 1;
+ err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ if (CHECK(!err || errno != EFAULT,
+ "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)",
+ err, errno, EFAULT))
+ break;
+ bzero(&prog_info, sizeof(prog_info));
+ info_len = sizeof(prog_info);
+
+ saved_map_id = *(int *)(prog_infos[i].map_ids);
+ prog_info.map_ids = prog_infos[i].map_ids;
+ prog_info.nr_map_ids = 2;
err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
prog_infos[i].jited_prog_insns = 0;
prog_infos[i].xlated_prog_insns = 0;
CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
- memcmp(&prog_info, &prog_infos[i], info_len),
+ memcmp(&prog_info, &prog_infos[i], info_len) ||
+ *(int *)prog_info.map_ids != saved_map_id,
"get-prog-info(next_id->fd)",
- "err %d errno %d info_len %u(%lu) memcmp %d\n",
+ "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n",
err, errno, info_len, sizeof(struct bpf_prog_info),
- memcmp(&prog_info, &prog_infos[i], info_len));
-
+ memcmp(&prog_info, &prog_infos[i], info_len),
+ *(int *)prog_info.map_ids, saved_map_id);
close(prog_fd);
}
CHECK(nr_id_found != nr_iters,
@@ -495,6 +548,75 @@ static void test_pkt_md_access(void)
bpf_object__close(obj);
}
+static void test_obj_name(void)
+{
+ struct {
+ const char *name;
+ int success;
+ int expected_errno;
+ } tests[] = {
+ { "", 1, 0 },
+ { "_123456789ABCDE", 1, 0 },
+ { "_123456789ABCDEF", 0, EINVAL },
+ { "_123456789ABCD\n", 0, EINVAL },
+ };
+ struct bpf_insn prog[] = {
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ __u32 duration = 0;
+ int i;
+
+ for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ size_t name_len = strlen(tests[i].name) + 1;
+ union bpf_attr attr;
+ size_t ncopy;
+ int fd;
+
+ /* test different attr.prog_name during BPF_PROG_LOAD */
+ ncopy = name_len < sizeof(attr.prog_name) ?
+ name_len : sizeof(attr.prog_name);
+ bzero(&attr, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SCHED_CLS;
+ attr.insn_cnt = 2;
+ attr.insns = ptr_to_u64(prog);
+ attr.license = ptr_to_u64("");
+ memcpy(attr.prog_name, tests[i].name, ncopy);
+
+ fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ CHECK((tests[i].success && fd < 0) ||
+ (!tests[i].success && fd != -1) ||
+ (!tests[i].success && errno != tests[i].expected_errno),
+ "check-bpf-prog-name",
+ "fd %d(%d) errno %d(%d)\n",
+ fd, tests[i].success, errno, tests[i].expected_errno);
+
+ if (fd != -1)
+ close(fd);
+
+ /* test different attr.map_name during BPF_MAP_CREATE */
+ ncopy = name_len < sizeof(attr.map_name) ?
+ name_len : sizeof(attr.map_name);
+ bzero(&attr, sizeof(attr));
+ attr.map_type = BPF_MAP_TYPE_ARRAY;
+ attr.key_size = 4;
+ attr.value_size = 4;
+ attr.max_entries = 1;
+ attr.map_flags = 0;
+ memcpy(attr.map_name, tests[i].name, ncopy);
+ fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
+ CHECK((tests[i].success && fd < 0) ||
+ (!tests[i].success && fd != -1) ||
+ (!tests[i].success && errno != tests[i].expected_errno),
+ "check-bpf-map-name",
+ "fd %d(%d) errno %d(%d)\n",
+ fd, tests[i].success, errno, tests[i].expected_errno);
+
+ if (fd != -1)
+ close(fd);
+ }
+}
+
int main(void)
{
struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
@@ -507,6 +629,7 @@ int main(void)
test_tcp_estats();
test_bpf_obj_id();
test_pkt_md_access();
+ test_obj_name();
printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 64ae21f64489..1b93941bdfea 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -6658,6 +6658,253 @@ static struct bpf_test tests[] = {
.result = REJECT,
},
{
+ "meta access, test1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet, off=-8",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test5",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_4, 3),
+ BPF_MOV64_IMM(BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_xdp_adjust_meta),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R3 !read_ok",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test7",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test8",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test9",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test10",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_IMM(BPF_REG_5, 42),
+ BPF_MOV64_IMM(BPF_REG_6, 24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
+ BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_5, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test11",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_IMM(BPF_REG_5, 42),
+ BPF_MOV64_IMM(BPF_REG_6, 24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
+ BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_5, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test12",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 5),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
"arithmetic ops make PTR_TO_CTX unusable",
.insns = {
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
@@ -7003,6 +7250,78 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_XDP,
},
{
+ "bpf_exit with invalid return code. test1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 has value (0x0; 0xffffffff)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 has value (0x0; 0x3)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 has value (0x2; 0x0)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test6",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 is not a known value (ctx)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test7",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4),
+ BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 has unknown scalar value",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
"XDP pkt read, pkt_end >= pkt_data', bad access 1",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@ -7198,7 +7517,7 @@ static int create_map_in_map(void)
return inner_map_fd;
}
- outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL,
sizeof(int), inner_map_fd, 1, 0);
if (outer_map_fd < 0)
printf("Failed to create array of maps '%s'!\n",
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
new file mode 100644
index 000000000000..3cc0b561489e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_verifier_log.c
@@ -0,0 +1,171 @@
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+
+#include <bpf/bpf.h>
+
+#define LOG_SIZE (1 << 20)
+
+#define err(str...) printf("ERROR: " str)
+
+static const struct bpf_insn code_sample[] = {
+ /* We need a few instructions to pass the min log length */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static int load(char *log, size_t log_len, int log_level)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insn_cnt = (__u32)(sizeof(code_sample) / sizeof(struct bpf_insn));
+ attr.insns = ptr_to_u64(code_sample);
+ attr.license = ptr_to_u64("GPL");
+ attr.log_buf = ptr_to_u64(log);
+ attr.log_size = log_len;
+ attr.log_level = log_level;
+
+ return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+static void check_ret(int ret, int exp_errno)
+{
+ if (ret > 0) {
+ close(ret);
+ err("broken sample loaded successfully!?\n");
+ exit(1);
+ }
+
+ if (!ret || errno != exp_errno) {
+ err("Program load returned: ret:%d/errno:%d, expected ret:%d/errno:%d\n",
+ ret, errno, -1, exp_errno);
+ exit(1);
+ }
+}
+
+static void check_ones(const char *buf, size_t len, const char *msg)
+{
+ while (len--)
+ if (buf[len] != 1) {
+ err("%s", msg);
+ exit(1);
+ }
+}
+
+static void test_log_good(char *log, size_t buf_len, size_t log_len,
+ size_t exp_len, int exp_errno, const char *full_log)
+{
+ size_t len;
+ int ret;
+
+ memset(log, 1, buf_len);
+
+ ret = load(log, log_len, 1);
+ check_ret(ret, exp_errno);
+
+ len = strnlen(log, buf_len);
+ if (len == buf_len) {
+ err("verifier did not NULL terminate the log\n");
+ exit(1);
+ }
+ if (exp_len && len != exp_len) {
+ err("incorrect log length expected:%zd have:%zd\n",
+ exp_len, len);
+ exit(1);
+ }
+
+ if (strchr(log, 1)) {
+ err("verifier leaked a byte through\n");
+ exit(1);
+ }
+
+ check_ones(log + len + 1, buf_len - len - 1,
+ "verifier wrote bytes past NULL termination\n");
+
+ if (memcmp(full_log, log, LOG_SIZE)) {
+ err("log did not match expected output\n");
+ exit(1);
+ }
+}
+
+static void test_log_bad(char *log, size_t log_len, int log_level)
+{
+ int ret;
+
+ ret = load(log, log_len, log_level);
+ check_ret(ret, EINVAL);
+ if (log)
+ check_ones(log, LOG_SIZE,
+ "verifier touched log with bad parameters\n");
+}
+
+int main(int argc, char **argv)
+{
+ char full_log[LOG_SIZE];
+ char log[LOG_SIZE];
+ size_t want_len;
+ int i;
+
+ memset(log, 1, LOG_SIZE);
+
+ /* Test incorrect attr */
+ printf("Test log_level 0...\n");
+ test_log_bad(log, LOG_SIZE, 0);
+
+ printf("Test log_size < 128...\n");
+ test_log_bad(log, 15, 1);
+
+ printf("Test log_buff = NULL...\n");
+ test_log_bad(NULL, LOG_SIZE, 1);
+
+ /* Test with log big enough */
+ printf("Test oversized buffer...\n");
+ test_log_good(full_log, LOG_SIZE, LOG_SIZE, 0, EACCES, full_log);
+
+ want_len = strlen(full_log);
+
+ printf("Test exact buffer...\n");
+ test_log_good(log, LOG_SIZE, want_len + 2, want_len, EACCES, full_log);
+
+ printf("Test undersized buffers...\n");
+ for (i = 0; i < 64; i++) {
+ full_log[want_len - i + 1] = 1;
+ full_log[want_len - i] = 0;
+
+ test_log_good(log, LOG_SIZE, want_len + 1 - i, want_len - i,
+ ENOSPC, full_log);
+ }
+
+ printf("test_verifier_log: OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.c b/tools/testing/selftests/bpf/test_xdp_meta.c
new file mode 100644
index 000000000000..8d0182650653
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_meta.c
@@ -0,0 +1,53 @@
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+
+#include "bpf_helpers.h"
+
+#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
+#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
+#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
+
+SEC("t")
+int ing_cls(struct __sk_buff *ctx)
+{
+ __u8 *data, *data_meta, *data_end;
+ __u32 diff = 0;
+
+ data_meta = ctx_ptr(ctx, data_meta);
+ data_end = ctx_ptr(ctx, data_end);
+ data = ctx_ptr(ctx, data);
+
+ if (data + ETH_ALEN > data_end ||
+ data_meta + round_up(ETH_ALEN, 4) > data)
+ return TC_ACT_SHOT;
+
+ diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0];
+ diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2];
+
+ return diff ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+SEC("x")
+int ing_xdp(struct xdp_md *ctx)
+{
+ __u8 *data, *data_meta, *data_end;
+ int ret;
+
+ ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4));
+ if (ret < 0)
+ return XDP_DROP;
+
+ data_meta = ctx_ptr(ctx, data_meta);
+ data_end = ctx_ptr(ctx, data_end);
+ data = ctx_ptr(ctx, data);
+
+ if (data + ETH_ALEN > data_end ||
+ data_meta + round_up(ETH_ALEN, 4) > data)
+ return XDP_DROP;
+
+ __builtin_memcpy(data_meta, data, ETH_ALEN);
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
new file mode 100755
index 000000000000..307aa856cee3
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_meta.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+
+cleanup()
+{
+ if [ "$?" = "0" ]; then
+ echo "selftests: test_xdp_meta [PASS]";
+ else
+ echo "selftests: test_xdp_meta [FAILED]";
+ fi
+
+ set +e
+ ip netns del ns1 2> /dev/null
+ ip netns del ns2 2> /dev/null
+}
+
+ip link set dev lo xdp off 2>/dev/null > /dev/null
+if [ $? -ne 0 ];then
+ echo "selftests: [SKIP] Could not run test without the ip xdp support"
+ exit 0
+fi
+set -e
+
+ip netns add ns1
+ip netns add ns2
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 type veth peer name veth2
+
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+
+ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth1
+ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth2
+
+ip netns exec ns1 tc qdisc add dev veth1 clsact
+ip netns exec ns2 tc qdisc add dev veth2 clsact
+
+ip netns exec ns1 tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t
+ip netns exec ns2 tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t
+
+ip netns exec ns1 ip link set dev veth1 xdp obj test_xdp_meta.o sec x
+ip netns exec ns2 ip link set dev veth2 xdp obj test_xdp_meta.o sec x
+
+ip netns exec ns1 ip link set dev veth1 up
+ip netns exec ns2 ip link set dev veth2 up
+
+ip netns exec ns1 ping -c 1 10.1.1.22
+ip netns exec ns2 ping -c 1 10.1.1.11
+
+exit 0
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 57b5ff576240..5215493166c9 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -15,6 +15,14 @@ check_err()
fi
}
+# same but inverted -- used when command must fail for test to pass
+check_fail()
+{
+ if [ $1 -eq 0 ]; then
+ ret=1
+ fi
+}
+
kci_add_dummy()
{
ip link add name "$devdummy" type dummy
@@ -29,6 +37,26 @@ kci_del_dummy()
check_err $?
}
+kci_test_netconf()
+{
+ dev="$1"
+ r=$ret
+
+ ip netconf show dev "$dev" > /dev/null
+ check_err $?
+
+ for f in 4 6; do
+ ip -$f netconf show dev "$dev" > /dev/null
+ check_err $?
+ done
+
+ if [ $ret -ne 0 ] ;then
+ echo "FAIL: ip netconf show $dev"
+ test $r -eq 0 && ret=0
+ return 1
+ fi
+}
+
# add a bridge with vlans on top
kci_test_bridge()
{
@@ -55,6 +83,11 @@ kci_test_bridge()
check_err $?
ip r s t all > /dev/null
check_err $?
+
+ for name in "$devbr" "$vlandev" "$devdummy" ; do
+ kci_test_netconf "$name"
+ done
+
ip -6 addr del dev "$vlandev" dead:42::1234/64
check_err $?
@@ -92,6 +125,9 @@ kci_test_gre()
check_err $?
ip addr > /dev/null
check_err $?
+
+ kci_test_netconf "$gredev"
+
ip addr del dev "$devdummy" 10.23.7.11/24
check_err $?
@@ -235,6 +271,237 @@ kci_test_addrlabel()
echo "PASS: ipv6 addrlabel"
}
+kci_test_ifalias()
+{
+ ret=0
+ namewant=$(uuidgen)
+ syspathname="/sys/class/net/$devdummy/ifalias"
+
+ ip link set dev "$devdummy" alias "$namewant"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: cannot set interface alias of $devdummy to $namewant"
+ return 1
+ fi
+
+ ip link show "$devdummy" | grep -q "alias $namewant"
+ check_err $?
+
+ if [ -r "$syspathname" ] ; then
+ read namehave < "$syspathname"
+ if [ "$namewant" != "$namehave" ]; then
+ echo "FAIL: did set ifalias $namewant but got $namehave"
+ return 1
+ fi
+
+ namewant=$(uuidgen)
+ echo "$namewant" > "$syspathname"
+ ip link show "$devdummy" | grep -q "alias $namewant"
+ check_err $?
+
+ # sysfs interface allows to delete alias again
+ echo "" > "$syspathname"
+
+ ip link show "$devdummy" | grep -q "alias $namewant"
+ check_fail $?
+
+ for i in $(seq 1 100); do
+ uuidgen > "$syspathname" &
+ done
+
+ wait
+
+ # re-add the alias -- kernel should free mem when dummy dev is removed
+ ip link set dev "$devdummy" alias "$namewant"
+ check_err $?
+ fi
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: set interface alias $devdummy to $namewant"
+ return 1
+ fi
+
+ echo "PASS: set ifalias $namewant for $devdummy"
+}
+
+kci_test_vrf()
+{
+ vrfname="test-vrf"
+ ret=0
+
+ ip link show type vrf 2>/dev/null
+ if [ $? -ne 0 ]; then
+ echo "SKIP: vrf: iproute2 too old"
+ return 0
+ fi
+
+ ip link add "$vrfname" type vrf table 10
+ check_err $?
+ if [ $ret -ne 0 ];then
+ echo "FAIL: can't add vrf interface, skipping test"
+ return 0
+ fi
+
+ ip -br link show type vrf | grep -q "$vrfname"
+ check_err $?
+ if [ $ret -ne 0 ];then
+ echo "FAIL: created vrf device not found"
+ return 1
+ fi
+
+ ip link set dev "$vrfname" up
+ check_err $?
+
+ ip link set dev "$devdummy" master "$vrfname"
+ check_err $?
+ ip link del dev "$vrfname"
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: vrf"
+ return 1
+ fi
+
+ echo "PASS: vrf"
+}
+
+kci_test_encap_vxlan()
+{
+ ret=0
+ vxlan="test-vxlan0"
+ vlan="test-vlan0"
+ testns="$1"
+
+ ip netns exec "$testns" ip link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
+ dev "$devdummy" dstport 4789 2>/dev/null
+ if [ $? -ne 0 ]; then
+ echo "FAIL: can't add vxlan interface, skipping test"
+ return 0
+ fi
+ check_err $?
+
+ ip netns exec "$testns" ip addr add 10.2.11.49/24 dev "$vxlan"
+ check_err $?
+
+ ip netns exec "$testns" ip link set up dev "$vxlan"
+ check_err $?
+
+ ip netns exec "$testns" ip link add link "$vxlan" name "$vlan" type vlan id 1
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$vxlan"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: vxlan"
+ return 1
+ fi
+ echo "PASS: vxlan"
+}
+
+kci_test_encap_fou()
+{
+ ret=0
+ name="test-fou"
+ testns="$1"
+
+ ip fou help 2>&1 |grep -q 'Usage: ip fou'
+ if [ $? -ne 0 ];then
+ echo "SKIP: fou: iproute2 too old"
+ return 1
+ fi
+
+ ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null
+ if [ $? -ne 0 ];then
+ echo "FAIL: can't add fou port 7777, skipping test"
+ return 1
+ fi
+
+ ip netns exec "$testns" ip fou add port 8888 ipproto 4
+ check_err $?
+
+ ip netns exec "$testns" ip fou del port 9999 2>/dev/null
+ check_fail $?
+
+ ip netns exec "$testns" ip fou del port 7777
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: fou"
+ return 1
+ fi
+
+ echo "PASS: fou"
+}
+
+# test various encap methods, use netns to avoid unwanted interference
+kci_test_encap()
+{
+ testns="testns"
+ ret=0
+
+ ip netns add "$testns"
+ if [ $? -ne 0 ]; then
+ echo "SKIP encap tests: cannot add net namespace $testns"
+ return 1
+ fi
+
+ ip netns exec "$testns" ip link set lo up
+ check_err $?
+
+ ip netns exec "$testns" ip link add name "$devdummy" type dummy
+ check_err $?
+ ip netns exec "$testns" ip link set "$devdummy" up
+ check_err $?
+
+ kci_test_encap_vxlan "$testns"
+ kci_test_encap_fou "$testns"
+
+ ip netns del "$testns"
+}
+
+kci_test_macsec()
+{
+ msname="test_macsec0"
+ ret=0
+
+ ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
+ if [ $? -ne 0 ]; then
+ echo "SKIP: macsec: iproute2 too old"
+ return 0
+ fi
+
+ ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
+ check_err $?
+ if [ $ret -ne 0 ];then
+ echo "FAIL: can't add macsec interface, skipping test"
+ return 1
+ fi
+
+ ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+ check_err $?
+
+ ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
+ check_err $?
+
+ ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef
+ check_err $?
+
+ ip macsec show > /dev/null
+ check_err $?
+
+ ip link del dev "$msname"
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: macsec"
+ return 1
+ fi
+
+ echo "PASS: macsec"
+}
+
kci_test_rtnl()
{
kci_add_dummy
@@ -249,6 +516,10 @@ kci_test_rtnl()
kci_test_gre
kci_test_bridge
kci_test_addrlabel
+ kci_test_ifalias
+ kci_test_vrf
+ kci_test_encap
+ kci_test_macsec
kci_del_dummy
}
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
new file mode 100644
index 000000000000..e2187b6e0b7a
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -0,0 +1,469 @@
+[
+ {
+ "id": "e89a",
+ "name": "Add valid pass action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pass index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pass.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "a02c",
+ "name": "Add valid pipe action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pipe index 6",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pipe.*index 6 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "feef",
+ "name": "Add valid reclassify action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action reclassify index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action reclassify.*index 5 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "8a7a",
+ "name": "Add valid drop action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action drop index 30",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 30 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "9a52",
+ "name": "Add valid continue action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action continue index 432",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action continue.*index 432 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "d700",
+ "name": "Add invalid action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pump index 386",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action.*index 386 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "9215",
+ "name": "Add action with duplicate index",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action pipe index 15"
+ ],
+ "cmdUnderTest": "$TC actions add action drop index 15",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 15 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "798e",
+ "name": "Add action with index exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action drop index 4294967296",
+ "expExitCode": "255",
+ "verifyCmd": "actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 4294967296 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "22be",
+ "name": "Add action with index at 32-bit maximum",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action drop index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 4294967295 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "ac2a",
+ "name": "List actions",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action reclassify index 101",
+ "$TC actions add action reclassify index 102",
+ "$TC actions add action reclassify index 103",
+ "$TC actions add action reclassify index 104",
+ "$TC actions add action reclassify index 105"
+ ],
+ "cmdUnderTest": "$TC actions list action gact",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action reclassify",
+ "matchCount": "5",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "3edf",
+ "name": "Flush gact actions",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ "$TC actions add action reclassify index 101",
+ "$TC actions add action reclassify index 102",
+ "$TC actions add action reclassify index 103",
+ "$TC actions add action reclassify index 104",
+ "$TC actions add action reclassify index 105"
+ ],
+ "cmdUnderTest": "$TC actions flush action gact",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action reclassify",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "63ec",
+ "name": "Delete pass action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action pass index 1"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pass.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "46be",
+ "name": "Delete pipe action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action pipe index 9"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pipe.*index 9 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "2e08",
+ "name": "Delete reclassify action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action reclassify index 65536"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 65536",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action reclassify.*index 65536 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "99c4",
+ "name": "Delete drop action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action drop index 16"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 16",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 16 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "fb6b",
+ "name": "Delete continue action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action continue index 32"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 32",
+ "expExitCode": "0",
+ "verifyCmd": "actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action continue.*index 32 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "0eb3",
+ "name": "Delete non-existent action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 2",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "f02c",
+ "name": "Replace gact action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action drop index 10",
+ "$TC actions add action drop index 12"
+ ],
+ "cmdUnderTest": "$TC actions replace action ok index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action gact",
+ "matchPattern": "action order [0-9]*: gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "525f",
+ "name": "Get gact action by index",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action drop index 3900800700"
+ ],
+ "cmdUnderTest": "$TC actions get action gact index 3900800700",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action gact index 3900800700",
+ "matchPattern": "index 3900800700",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
new file mode 100644
index 000000000000..9f34f0753969
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
@@ -0,0 +1,52 @@
+[
+ {
+ "id": "a568",
+ "name": "Add action with ife type",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action ife encode type 0xDEAD index 1"
+ ],
+ "cmdUnderTest": "$TC actions get action ife index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "type 0xDEAD",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "b983",
+ "name": "Add action without ife type",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action ife encode index 1"
+ ],
+ "cmdUnderTest": "$TC actions get action ife index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "type 0xED3E",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
new file mode 100644
index 000000000000..0fcccf18399b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -0,0 +1,223 @@
+[
+ {
+ "id": "5124",
+ "name": "Add mirred mirror to egress action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress mirror index 1 dev lo",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "6fb4",
+ "name": "Add mirred redirect to egress action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress redirect index 2 dev lo action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "ba38",
+ "name": "Get mirred actions",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mirred egress mirror index 1 dev lo",
+ "$TC actions add action mirred egress redirect index 2 dev lo"
+ ],
+ "cmdUnderTest": "$TC actions show action mirred",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "[Mirror|Redirect] to device lo",
+ "matchCount": "2",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "d7c0",
+ "name": "Add invalid mirred direction",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred inbound mirror index 20 dev lo",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 20 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "e213",
+ "name": "Add invalid mirred action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress remirror index 20 dev lo",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress.*to device lo\\).*index 20 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "2d89",
+ "name": "Add mirred action with invalid device",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress mirror index 20 dev eltoh",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(.*to device eltoh\\).*index 20 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "300b",
+ "name": "Add mirred action with duplicate index",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mirred egress redirect index 15 dev lo"
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress mirror index 15 dev lo",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 15 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "a70e",
+ "name": "Delete mirred mirror action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mirred egress mirror index 5 dev lo"
+ ],
+ "cmdUnderTest": "$TC actions del action mirred index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 5 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "3fb3",
+ "name": "Delete mirred redirect action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mirred egress redirect index 5 dev lo"
+ ],
+ "cmdUnderTest": "$TC actions del action mirred index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 5 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
new file mode 100644
index 000000000000..0e602a3f9393
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -0,0 +1,527 @@
+[
+ {
+ "id": "49aa",
+ "name": "Add valid basic police action",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 1Kbit burst 10Kb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "3abe",
+ "name": "Add police action with duplicate index",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 4Mbit burst 120k index 9"
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 8kbit burst 24k index 9",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "49fa",
+ "name": "Add valid police action with mtu",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 1k index 98",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 98",
+ "matchPattern": "action order [0-9]*: police 0x62 rate 90Kbit burst 10Kb mtu 1Kb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "7943",
+ "name": "Add valid police action with peakrate",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 2kb peakrate 100kbit index 3",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x3 rate 90Kbit burst 10Kb mtu 2Kb peakrate 100Kbit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "055e",
+ "name": "Add police action with peakrate and no mtu",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 5kbit burst 6kb peakrate 10kbit index 9",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x9 rate 5Kb burst 10Kb",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "f057",
+ "name": "Add police action with valid overhead",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 1mbit burst 100k overhead 64 index 64",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 64",
+ "matchPattern": "action order [0-9]*: police 0x40 rate 1Mbit burst 100Kb mtu 2Kb action reclassify overhead 64b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "7ffb",
+ "name": "Add police action with ethernet linklayer type",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 2mbit burst 200k linklayer ethernet index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions show action police",
+ "matchPattern": "action order [0-9]*: police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "3dda",
+ "name": "Add police action with atm linklayer type",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 2mbit burst 200k linklayer atm index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions show action police",
+ "matchPattern": "action order [0-9]*: police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b linklayer atm",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "551b",
+ "name": "Add police actions with conform-exceed control continue/drop",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed continue/drop index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 1",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action continue/drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "0c70",
+ "name": "Add police actions with conform-exceed control pass/reclassify",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed pass/reclassify index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x4 rate 3Mbit burst 250Kb mtu 2Kb action pass/reclassify",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "d946",
+ "name": "Add police actions with conform-exceed control pass/pipe",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed pass/pipe index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x5 rate 3Mbit burst 250Kb mtu 2Kb action pass/pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "336e",
+ "name": "Delete police action",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 5mbit burst 2m index 12"
+ ],
+ "cmdUnderTest": "$TC actions delete action police index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0xc rate 5Mb burst 2Mb",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "77fa",
+ "name": "Get single police action from many actions",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 1mbit burst 100k index 1",
+ "$TC actions add action police rate 2mbit burst 200k index 2",
+ "$TC actions add action police rate 3mbit burst 300k index 3",
+ "$TC actions add action police rate 4mbit burst 400k index 4",
+ "$TC actions add action police rate 5mbit burst 500k index 5",
+ "$TC actions add action police rate 6mbit burst 600k index 6",
+ "$TC actions add action police rate 7mbit burst 700k index 7",
+ "$TC actions add action police rate 8mbit burst 800k index 8"
+ ],
+ "cmdUnderTest": "$TC actions get action police index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 4",
+ "matchPattern": "action order [0-9]*: police 0x4 rate 4Mbit burst 400Kb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "aa43",
+ "name": "Get single police action without specifying index",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 1mbit burst 100k index 1"
+ ],
+ "cmdUnderTest": "$TC actions get action police",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action police",
+ "matchPattern": "action order [0-9]*: police",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "858b",
+ "name": "List police actions",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 1mbit burst 100k index 1",
+ "$TC actions add action police rate 2mbit burst 200k index 2",
+ "$TC actions add action police rate 3mbit burst 300k index 3",
+ "$TC actions add action police rate 4mbit burst 400k index 4",
+ "$TC actions add action police rate 5mbit burst 500k index 5",
+ "$TC actions add action police rate 6mbit burst 600k index 6",
+ "$TC actions add action police rate 7mbit burst 700k index 7",
+ "$TC actions add action police rate 8mbit burst 800k index 8"
+ ],
+ "cmdUnderTest": "$TC actions list action police",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x[1-8] rate [1-8]Mbit burst [1-8]00Kb",
+ "matchCount": "8",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "1c3a",
+ "name": "Flush police actions",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ "$TC actions add action police rate 1mbit burst 100k index 1",
+ "$TC actions add action police rate 2mbit burst 200k index 2",
+ "$TC actions add action police rate 3mbit burst 300k index 3",
+ "$TC actions add action police rate 4mbit burst 400k index 4",
+ "$TC actions add action police rate 5mbit burst 500k index 5",
+ "$TC actions add action police rate 6mbit burst 600k index 6",
+ "$TC actions add action police rate 7mbit burst 700k index 7",
+ "$TC actions add action police rate 8mbit burst 800k index 8"
+ ],
+ "cmdUnderTest": "$TC actions flush action police",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police",
+ "matchCount": "0",
+ "teardown": [
+ ""
+ ]
+ },
+ {
+ "id": "7326",
+ "name": "Add police action with control continue",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 1",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action continue",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "34fa",
+ "name": "Add police action with control drop",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m drop index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "8dd5",
+ "name": "Add police action with control ok",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m ok index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "b9d1",
+ "name": "Add police action with control reclassify",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m reclassify index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 1",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action reclassify",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "c534",
+ "name": "Add police action with control pipe",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
new file mode 100644
index 000000000000..e89a7aa4012d
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
@@ -0,0 +1,130 @@
+[
+ {
+ "id": "b078",
+ "name": "Add simple action",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action simple sdata \"A triumph\" index 60",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <A triumph>.*index 60 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "6d4c",
+ "name": "Add simple action with duplicate index",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"Aruba\" index 4"
+ ],
+ "cmdUnderTest": "$TC actions add action simple sdata \"Jamaica\" index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <Jamaica>.*ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "2542",
+ "name": "List simple actions",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"Rock\"",
+ "$TC actions add action simple sdata \"Paper\"",
+ "$TC actions add action simple sdata \"Scissors\" index 98"
+ ],
+ "cmdUnderTest": "$TC actions list action simple",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>",
+ "matchCount": "3",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "ea67",
+ "name": "Delete simple action",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"Blinkenlights\" index 1"
+ ],
+ "cmdUnderTest": "$TC actions delete action simple index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <Blinkenlights>.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "8ff1",
+ "name": "Flush simple actions",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"Kirk\"",
+ "$TC actions add action simple sdata \"Spock\" index 50",
+ "$TC actions add action simple sdata \"McCoy\" index 9"
+ ],
+ "cmdUnderTest": "$TC actions flush action simple",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>",
+ "matchCount": "0",
+ "teardown": [
+ ""
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
new file mode 100644
index 000000000000..99635ea4722e
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
@@ -0,0 +1,320 @@
+[
+ {
+ "id": "6236",
+ "name": "Add skbedit action with valid mark",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit mark 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit mark 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "407b",
+ "name": "Add skbedit action with invalid mark",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit mark 666777888999",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit mark",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "081d",
+ "name": "Add skbedit action with priority",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit prio 99",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit priority :99",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "cc37",
+ "name": "Add skbedit action with invalid priority",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit prio foo",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit priority",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "3c95",
+ "name": "Add skbedit action with queue_mapping",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit queue_mapping 909",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit queue_mapping 909",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "985c",
+ "name": "Add skbedit action with invalid queue_mapping",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit queue_mapping 67000",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit queue_mapping",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "224f",
+ "name": "Add skbedit action with ptype host",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit ptype host",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit ptype host",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "d1a3",
+ "name": "Add skbedit action with ptype otherhost",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit ptype otherhost",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit ptype otherhost",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "b9c6",
+ "name": "Add skbedit action with invalid ptype",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit ptype openair",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit ptype openair",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "5172",
+ "name": "List skbedit actions",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbedit ptype otherhost",
+ "$TC actions add action skbedit ptype broadcast",
+ "$TC actions add action skbedit mark 59",
+ "$TC actions add action skbedit mark 409"
+ ],
+ "cmdUnderTest": "$TC actions list action skbedit",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit",
+ "matchCount": "4",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "a6d6",
+ "name": "Add skbedit action with index",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit mark 808 index 4040404040",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "index 4040404040",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "38f3",
+ "name": "Delete skbedit action",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbedit mark 42 index 9009"
+ ],
+ "cmdUnderTest": "$TC actions del action skbedit index 9009",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit mark 42",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "ce97",
+ "name": "Flush skbedit actions",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ "$TC actions add action skbedit mark 500",
+ "$TC actions add action skbedit mark 501",
+ "$TC actions add action skbedit mark 502",
+ "$TC actions add action skbedit mark 503",
+ "$TC actions add action skbedit mark 504",
+ "$TC actions add action skbedit mark 505",
+ "$TC actions add action skbedit mark 506"
+ ],
+ "cmdUnderTest": "$TC actions flush action skbedit",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
new file mode 100644
index 000000000000..e34075059c26
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
@@ -0,0 +1,372 @@
+[
+ {
+ "id": "7d50",
+ "name": "Add skbmod action to set destination mac",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set dmac 11:22:33:44:55:66 index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set dmac 11:22:33:44:55:66\\s+index 5",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "9b29",
+ "name": "Add skbmod action to set source mac",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set smac 77:88:99:AA:BB:CC index 7",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 7",
+ "matchPattern": "action order [0-9]*: skbmod pipe set smac 77:88:99:aa:bb:cc\\s+index 7",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "1724",
+ "name": "Add skbmod action with invalid mac",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set smac 00:44:55:44:55",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set smac 00:44:55:44:55",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "3cf1",
+ "name": "Add skbmod action with valid etype",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0xfefe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set etype 0xFEFE",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "a749",
+ "name": "Add skbmod action with invalid etype",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0xfefef",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set etype 0xFEFEF",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "bfe6",
+ "name": "Add skbmod action to swap mac",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod swap mac",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 1",
+ "matchPattern": "action order [0-9]*: skbmod pipe swap mac",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "839b",
+ "name": "Add skbmod action with control pipe",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod swap mac pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe swap mac",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "c167",
+ "name": "Add skbmod action with control reclassify",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0xbeef reclassify",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod reclassify set etype 0xBEEF",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "0c2f",
+ "name": "Add skbmod action with control drop",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0x0001 drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 1",
+ "matchPattern": "action order [0-9]*: skbmod drop set etype 0x1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "d113",
+ "name": "Add skbmod action with control continue",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0x1 continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod continue set etype 0x1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "7242",
+ "name": "Add skbmod action with control pass",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set smac 00:00:00:00:00:01 pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pass set smac 00:00:00:00:00:01",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "58cb",
+ "name": "List skbmod actions",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbmod set etype 0x0001",
+ "$TC actions add action skbmod set etype 0x0011",
+ "$TC actions add action skbmod set etype 0x0021",
+ "$TC actions add action skbmod set etype 0x0031",
+ "$TC actions add action skbmod set etype 0x0041"
+ ],
+ "cmdUnderTest": "$TC actions ls action skbmod",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod",
+ "matchCount": "5",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "9aa8",
+ "name": "Get a single skbmod action from a list",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbmod set etype 0x0001",
+ "$TC actions add action skbmod set etype 0x0011",
+ "$TC actions add action skbmod set etype 0x0021",
+ "$TC actions add action skbmod set etype 0x0031",
+ "$TC actions add action skbmod set etype 0x0041"
+ ],
+ "cmdUnderTest": "$TC actions ls action skbmod",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 4",
+ "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x0031",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "e93a",
+ "name": "Delete an skbmod action",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbmod set etype 0x1111 index 909"
+ ],
+ "cmdUnderTest": "$TC actions del action skbmod index 909",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x1111\\s+index 909",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "40c2",
+ "name": "Flush skbmod actions",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ "$TC actions add action skbmod set etype 0x0001",
+ "$TC actions add action skbmod set etype 0x0011",
+ "$TC actions add action skbmod set etype 0x0021",
+ "$TC actions add action skbmod set etype 0x0031",
+ "$TC actions add action skbmod set etype 0x0041"
+ ],
+ "cmdUnderTest": "$TC actions flush action skbmod",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json
deleted file mode 100644
index 6973bdc5b5bf..000000000000
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/tests.json
+++ /dev/null
@@ -1,1165 +0,0 @@
-[
- {
- "id": "e89a",
- "name": "Add valid pass action",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action pass index 8",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action pass.*index 8 ref",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "a02c",
- "name": "Add valid pipe action",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action pipe index 6",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action pipe.*index 6 ref",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "feef",
- "name": "Add valid reclassify action",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action reclassify index 5",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action reclassify.*index 5 ref",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "8a7a",
- "name": "Add valid drop action",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action drop index 30",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action drop.*index 30 ref",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "9a52",
- "name": "Add valid continue action",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action continue index 432",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action continue.*index 432 ref",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "d700",
- "name": "Add invalid action",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action pump index 386",
- "expExitCode": "255",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action.*index 386 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "9215",
- "name": "Add action with duplicate index",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ],
- "$TC actions add action pipe index 15"
- ],
- "cmdUnderTest": "$TC actions add action drop index 15",
- "expExitCode": "255",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action drop.*index 15 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "798e",
- "name": "Add action with index exceeding 32-bit maximum",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action drop index 4294967296",
- "expExitCode": "255",
- "verifyCmd": "actions list action gact",
- "matchPattern": "action order [0-9]*: gact action drop.*index 4294967296 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "22be",
- "name": "Add action with index at 32-bit maximum",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action drop index 4294967295",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action drop.*index 4294967295 ref",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "ac2a",
- "name": "List actions",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ],
- "$TC actions add action reclassify index 101",
- "$TC actions add action reclassify index 102",
- "$TC actions add action reclassify index 103",
- "$TC actions add action reclassify index 104",
- "$TC actions add action reclassify index 105"
- ],
- "cmdUnderTest": "$TC actions list action gact",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action reclassify",
- "matchCount": "5",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "63ec",
- "name": "Delete pass action",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ],
- "$TC actions add action pass index 1"
- ],
- "cmdUnderTest": "$TC actions del action gact index 1",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action pass.*index 1 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "46be",
- "name": "Delete pipe action",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ],
- "$TC actions add action pipe index 9"
- ],
- "cmdUnderTest": "$TC actions del action gact index 9",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action pipe.*index 9 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "2e08",
- "name": "Delete reclassify action",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ],
- "$TC actions add action reclassify index 65536"
- ],
- "cmdUnderTest": "$TC actions del action gact index 65536",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action reclassify.*index 65536 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "99c4",
- "name": "Delete drop action",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ],
- "$TC actions add action drop index 16"
- ],
- "cmdUnderTest": "$TC actions del action gact index 16",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action drop.*index 16 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "fb6b",
- "name": "Delete continue action",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ],
- "$TC actions add action continue index 32"
- ],
- "cmdUnderTest": "$TC actions del action gact index 32",
- "expExitCode": "0",
- "verifyCmd": "actions list action gact",
- "matchPattern": "action order [0-9]*: gact action continue.*index 32 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "0eb3",
- "name": "Delete non-existent action",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions del action gact index 2",
- "expExitCode": "255",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order [0-9]*: gact action",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "5124",
- "name": "Add mirred mirror to egress action",
- "category": [
- "actions",
- "mirred"
- ],
- "setup": [
- [
- "$TC actions flush action mirred",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action mirred egress mirror index 1 dev lo",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action mirred",
- "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 1 ref",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action mirred"
- ]
- },
- {
- "id": "6fb4",
- "name": "Add mirred redirect to egress action",
- "category": [
- "actions",
- "mirred"
- ],
- "setup": [
- [
- "$TC actions flush action mirred",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action mirred egress redirect index 2 dev lo action pipe",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action mirred",
- "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 2 ref",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action mirred"
- ]
- },
- {
- "id": "ba38",
- "name": "Get mirred actions",
- "category": [
- "actions",
- "mirred"
- ],
- "setup": [
- [
- "$TC actions flush action mirred",
- 0,
- 1,
- 255
- ],
- "$TC actions add action mirred egress mirror index 1 dev lo",
- "$TC actions add action mirred egress redirect index 2 dev lo"
- ],
- "cmdUnderTest": "$TC actions show action mirred",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action mirred",
- "matchPattern": "[Mirror|Redirect] to device lo",
- "matchCount": "2",
- "teardown": [
- "$TC actions flush action mirred"
- ]
- },
- {
- "id": "d7c0",
- "name": "Add invalid mirred direction",
- "category": [
- "actions",
- "mirred"
- ],
- "setup": [
- [
- "$TC actions flush action mirred",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action mirred inbound mirror index 20 dev lo",
- "expExitCode": "255",
- "verifyCmd": "$TC actions list action mirred",
- "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 20 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action mirred"
- ]
- },
- {
- "id": "e213",
- "name": "Add invalid mirred action",
- "category": [
- "actions",
- "mirred"
- ],
- "setup": [
- [
- "$TC actions flush action mirred",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action mirred egress remirror index 20 dev lo",
- "expExitCode": "255",
- "verifyCmd": "$TC actions list action mirred",
- "matchPattern": "action order [0-9]*: mirred \\(Egress.*to device lo\\).*index 20 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action mirred"
- ]
- },
- {
- "id": "2d89",
- "name": "Add mirred action with invalid device",
- "category": [
- "actions",
- "mirred"
- ],
- "setup": [
- [
- "$TC actions flush action mirred",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action mirred egress mirror index 20 dev eltoh",
- "expExitCode": "255",
- "verifyCmd": "$TC actions list action mirred",
- "matchPattern": "action order [0-9]*: mirred \\(.*to device eltoh\\).*index 20 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action mirred"
- ]
- },
- {
- "id": "300b",
- "name": "Add mirred action with duplicate index",
- "category": [
- "actions",
- "mirred"
- ],
- "setup": [
- [
- "$TC actions flush action mirred",
- 0,
- 1,
- 255
- ],
- "$TC actions add action mirred egress redirect index 15 dev lo"
- ],
- "cmdUnderTest": "$TC actions add action mirred egress mirror index 15 dev lo",
- "expExitCode": "255",
- "verifyCmd": "$TC actions list action mirred",
- "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 15 ref",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action mirred"
- ]
- },
- {
- "id": "a70e",
- "name": "Delete mirred mirror action",
- "category": [
- "actions",
- "mirred"
- ],
- "setup": [
- [
- "$TC actions flush action mirred",
- 0,
- 1,
- 255
- ],
- "$TC actions add action mirred egress mirror index 5 dev lo"
- ],
- "cmdUnderTest": "$TC actions del action mirred index 5",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action mirred",
- "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 5 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action mirred"
- ]
- },
- {
- "id": "3fb3",
- "name": "Delete mirred redirect action",
- "category": [
- "actions",
- "mirred"
- ],
- "setup": [
- [
- "$TC actions flush action mirred",
- 0,
- 1,
- 255
- ],
- "$TC actions add action mirred egress redirect index 5 dev lo"
- ],
- "cmdUnderTest": "$TC actions del action mirred index 5",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action mirred",
- "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 5 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action mirred"
- ]
- },
- {
- "id": "b078",
- "name": "Add simple action",
- "category": [
- "actions",
- "simple"
- ],
- "setup": [
- [
- "$TC actions flush action simple",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action simple sdata \"A triumph\" index 60",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action simple",
- "matchPattern": "action order [0-9]*: Simple <A triumph>.*index 60 ref",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action simple"
- ]
- },
- {
- "id": "6d4c",
- "name": "Add simple action with duplicate index",
- "category": [
- "actions",
- "simple"
- ],
- "setup": [
- [
- "$TC actions flush action simple",
- 0,
- 1,
- 255
- ],
- "$TC actions add action simple sdata \"Aruba\" index 4"
- ],
- "cmdUnderTest": "$TC actions add action simple sdata \"Jamaica\" index 4",
- "expExitCode": "255",
- "verifyCmd": "$TC actions list action simple",
- "matchPattern": "action order [0-9]*: Simple <Jamaica>.*ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action simple"
- ]
- },
- {
- "id": "2542",
- "name": "List simple actions",
- "category": [
- "actions",
- "simple"
- ],
- "setup": [
- [
- "$TC actions flush action simple",
- 0,
- 1,
- 255
- ],
- "$TC actions add action simple sdata \"Rock\"",
- "$TC actions add action simple sdata \"Paper\"",
- "$TC actions add action simple sdata \"Scissors\" index 98"
- ],
- "cmdUnderTest": "$TC actions list action simple",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action simple",
- "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>",
- "matchCount": "3",
- "teardown": [
- "$TC actions flush action simple"
- ]
- },
- {
- "id": "ea67",
- "name": "Delete simple action",
- "category": [
- "actions",
- "simple"
- ],
- "setup": [
- [
- "$TC actions flush action simple",
- 0,
- 1,
- 255
- ],
- "$TC actions add action simple sdata \"Blinkenlights\" index 1"
- ],
- "cmdUnderTest": "$TC actions delete action simple index 1",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action simple",
- "matchPattern": "action order [0-9]*: Simple <Blinkenlights>.*index 1 ref",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action simple"
- ]
- },
- {
- "id": "8ff1",
- "name": "Flush simple actions",
- "category": [
- "actions",
- "simple"
- ],
- "setup": [
- [
- "$TC actions flush action simple",
- 0,
- 1,
- 255
- ],
- "$TC actions add action simple sdata \"Kirk\"",
- "$TC actions add action simple sdata \"Spock\" index 50",
- "$TC actions add action simple sdata \"McCoy\" index 9"
- ],
- "cmdUnderTest": "$TC actions flush action simple",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action simple",
- "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>",
- "matchCount": "0",
- "teardown": [
- ""
- ]
- },
- {
- "id": "6236",
- "name": "Add skbedit action with valid mark",
- "category": [
- "actions",
- "skbedit"
- ],
- "setup": [
- [
- "$TC actions flush action skbedit",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action skbedit mark 1",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action skbedit",
- "matchPattern": "action order [0-9]*: skbedit mark 1",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action skbedit"
- ]
- },
- {
- "id": "407b",
- "name": "Add skbedit action with invalid mark",
- "category": [
- "actions",
- "skbedit"
- ],
- "setup": [
- [
- "$TC actions flush action skbedit",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action skbedit mark 666777888999",
- "expExitCode": "255",
- "verifyCmd": "$TC actions list action skbedit",
- "matchPattern": "action order [0-9]*: skbedit mark",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action skbedit"
- ]
- },
- {
- "id": "081d",
- "name": "Add skbedit action with priority",
- "category": [
- "actions",
- "skbedit"
- ],
- "setup": [
- [
- "$TC actions flush action skbedit",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action skbedit prio 99",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action skbedit",
- "matchPattern": "action order [0-9]*: skbedit priority :99",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action skbedit"
- ]
- },
- {
- "id": "cc37",
- "name": "Add skbedit action with invalid priority",
- "category": [
- "actions",
- "skbedit"
- ],
- "setup": [
- [
- "$TC actions flush action skbedit",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action skbedit prio foo",
- "expExitCode": "255",
- "verifyCmd": "$TC actions list action skbedit",
- "matchPattern": "action order [0-9]*: skbedit priority",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action skbedit"
- ]
- },
- {
- "id": "3c95",
- "name": "Add skbedit action with queue_mapping",
- "category": [
- "actions",
- "skbedit"
- ],
- "setup": [
- [
- "$TC actions flush action skbedit",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action skbedit queue_mapping 909",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action skbedit",
- "matchPattern": "action order [0-9]*: skbedit queue_mapping 909",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action skbedit"
- ]
- },
- {
- "id": "985c",
- "name": "Add skbedit action with invalid queue_mapping",
- "category": [
- "actions",
- "skbedit"
- ],
- "setup": [
- [
- "$TC actions flush action skbedit",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action skbedit queue_mapping 67000",
- "expExitCode": "255",
- "verifyCmd": "$TC actions list action skbedit",
- "matchPattern": "action order [0-9]*: skbedit queue_mapping",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action skbedit"
- ]
- },
- {
- "id": "224f",
- "name": "Add skbedit action with ptype host",
- "category": [
- "actions",
- "skbedit"
- ],
- "setup": [
- [
- "$TC actions flush action skbedit",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action skbedit ptype host",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action skbedit",
- "matchPattern": "action order [0-9]*: skbedit ptype host",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action skbedit"
- ]
- },
- {
- "id": "d1a3",
- "name": "Add skbedit action with ptype otherhost",
- "category": [
- "actions",
- "skbedit"
- ],
- "setup": [
- [
- "$TC actions flush action skbedit",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action skbedit ptype otherhost",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action skbedit",
- "matchPattern": "action order [0-9]*: skbedit ptype otherhost",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action skbedit"
- ]
- },
- {
- "id": "b9c6",
- "name": "Add skbedit action with invalid ptype",
- "category": [
- "actions",
- "skbedit"
- ],
- "setup": [
- [
- "$TC actions flush action skbedit",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action skbedit ptype openair",
- "expExitCode": "255",
- "verifyCmd": "$TC actions list action skbedit",
- "matchPattern": "action order [0-9]*: skbedit ptype openair",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action skbedit"
- ]
- },
- {
- "id": "5172",
- "name": "List skbedit actions",
- "category": [
- "actions",
- "skbedit"
- ],
- "setup": [
- [
- "$TC actions flush action skbedit",
- 0,
- 1,
- 255
- ],
- "$TC actions add action skbedit ptype otherhost",
- "$TC actions add action skbedit ptype broadcast",
- "$TC actions add action skbedit mark 59",
- "$TC actions add action skbedit mark 409"
- ],
- "cmdUnderTest": "$TC actions list action skbedit",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action skbedit",
- "matchPattern": "action order [0-9]*: skbedit",
- "matchCount": "4",
- "teardown": [
- "$TC actions flush action skbedit"
- ]
- },
- {
- "id": "a6d6",
- "name": "Add skbedit action with index",
- "category": [
- "actions",
- "skbedit"
- ],
- "setup": [
- [
- "$TC actions flush action skbedit",
- 0,
- 1,
- 255
- ]
- ],
- "cmdUnderTest": "$TC actions add action skbedit mark 808 index 4040404040",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action skbedit",
- "matchPattern": "index 4040404040",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action skbedit"
- ]
- },
- {
- "id": "38f3",
- "name": "Delete skbedit action",
- "category": [
- "actions",
- "skbedit"
- ],
- "setup": [
- [
- "$TC actions flush action skbedit",
- 0,
- 1,
- 255
- ],
- "$TC actions add action skbedit mark 42 index 9009"
- ],
- "cmdUnderTest": "$TC actions del action skbedit index 9009",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action skbedit",
- "matchPattern": "action order [0-9]*: skbedit mark 42",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action skbedit"
- ]
- },
- {
- "id": "ce97",
- "name": "Flush skbedit actions",
- "category": [
- "actions",
- "skbedit"
- ],
- "setup": [
- "$TC actions add action skbedit mark 500",
- "$TC actions add action skbedit mark 501",
- "$TC actions add action skbedit mark 502",
- "$TC actions add action skbedit mark 503",
- "$TC actions add action skbedit mark 504",
- "$TC actions add action skbedit mark 505",
- "$TC actions add action skbedit mark 506"
- ],
- "cmdUnderTest": "$TC actions flush action skbedit",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action skbedit",
- "matchPattern": "action order [0-9]*: skbedit",
- "matchCount": "0",
- "teardown": [
- "$TC actions flush action skbedit"
- ]
- },
- {
- "id": "f02c",
- "name": "Replace gact action",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ],
- "$TC actions add action drop index 10",
- "$TC actions add action drop index 12"
- ],
- "cmdUnderTest": "$TC actions replace action ok index 12",
- "expExitCode": "0",
- "verifyCmd": "$TC actions ls action gact",
- "matchPattern": "action order [0-9]*: gact action pass",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "525f",
- "name": "Get gact action by index",
- "category": [
- "actions",
- "gact"
- ],
- "setup": [
- [
- "$TC actions flush action gact",
- 0,
- 1,
- 255
- ],
- "$TC actions add action drop index 3900800700"
- ],
- "cmdUnderTest": "$TC actions get action gact index 3900800700",
- "expExitCode": "0",
- "verifyCmd": "$TC actions get action gact index 3900800700",
- "matchPattern": "index 3900800700",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action gact"
- ]
- },
- {
- "id": "a568",
- "name": "Add action with ife type",
- "category": [
- "actions",
- "ife"
- ],
- "setup": [
- [
- "$TC actions flush action ife",
- 0,
- 1,
- 255
- ],
- "$TC actions add action ife encode type 0xDEAD index 1"
- ],
- "cmdUnderTest": "$TC actions get action ife index 1",
- "expExitCode": "0",
- "verifyCmd": "$TC actions get action ife index 1",
- "matchPattern": "type 0xDEAD",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action ife"
- ]
- },
- {
- "id": "b983",
- "name": "Add action without ife type",
- "category": [
- "actions",
- "ife"
- ],
- "setup": [
- [
- "$TC actions flush action ife",
- 0,
- 1,
- 255
- ],
- "$TC actions add action ife encode index 1"
- ],
- "cmdUnderTest": "$TC actions get action ife index 1",
- "expExitCode": "0",
- "verifyCmd": "$TC actions get action ife index 1",
- "matchPattern": "type 0xED3E",
- "matchCount": "1",
- "teardown": [
- "$TC actions flush action ife"
- ]
- }
-] \ No newline at end of file
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index cd61b7844c0d..d2391df2cf7d 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -49,7 +49,7 @@ def exec_cmd(command, nsonly=True):
stderr=subprocess.PIPE)
(rawout, serr) = proc.communicate()
- if proc.returncode != 0:
+ if proc.returncode != 0 and len(serr) > 0:
foutput = serr.decode("utf-8")
else:
foutput = rawout.decode("utf-8")
@@ -203,7 +203,7 @@ def set_args(parser):
help='Run tests only from the specified category, or if no category is specified, list known categories.')
parser.add_argument('-f', '--file', type=str,
help='Run tests from the specified file')
- parser.add_argument('-l', '--list', type=str, nargs='?', const="", metavar='CATEGORY',
+ parser.add_argument('-l', '--list', type=str, nargs='?', const="++", metavar='CATEGORY',
help='List all test cases, or those only within the specified category')
parser.add_argument('-s', '--show', type=str, nargs=1, metavar='ID', dest='showID',
help='Display the test case with specified id')
@@ -357,10 +357,10 @@ def set_operation_mode(args):
testcases = get_categorized_testlist(alltests, ucat)
if args.list:
- if (len(args.list) == 0):
+ if (args.list == "++"):
list_test_cases(alltests)
exit(0)
- elif(len(args.list > 0)):
+ elif(len(args.list) > 0):
if (args.list not in ucat):
print("Unknown category " + args.list)
print("Available categories:")
diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore
new file mode 100644
index 000000000000..dc5f11faf530
--- /dev/null
+++ b/tools/testing/vsock/.gitignore
@@ -0,0 +1,2 @@
+*.d
+vsock_diag_test
diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
new file mode 100644
index 000000000000..66ba0924194d
--- /dev/null
+++ b/tools/testing/vsock/Makefile
@@ -0,0 +1,9 @@
+all: test
+test: vsock_diag_test
+vsock_diag_test: vsock_diag_test.o timeout.o control.o
+
+CFLAGS += -g -O2 -Werror -Wall -I. -I../../include/uapi -I../../include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE
+.PHONY: all test clean
+clean:
+ ${RM} *.o *.d vsock_diag_test
+-include *.d
diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README
new file mode 100644
index 000000000000..2cc6d7302db6
--- /dev/null
+++ b/tools/testing/vsock/README
@@ -0,0 +1,36 @@
+AF_VSOCK test suite
+-------------------
+These tests exercise net/vmw_vsock/ host<->guest sockets for VMware, KVM, and
+Hyper-V.
+
+The following tests are available:
+
+ * vsock_diag_test - vsock_diag.ko module for listing open sockets
+
+The following prerequisite steps are not automated and must be performed prior
+to running tests:
+
+1. Build the kernel and these tests.
+2. Install the kernel and tests on the host.
+3. Install the kernel and tests inside the guest.
+4. Boot the guest and ensure that the AF_VSOCK transport is enabled.
+
+Invoke test binaries in both directions as follows:
+
+ # host=server, guest=client
+ (host)# $TEST_BINARY --mode=server \
+ --control-port=1234 \
+ --peer-cid=3
+ (guest)# $TEST_BINARY --mode=client \
+ --control-host=$HOST_IP \
+ --control-port=1234 \
+ --peer-cid=2
+
+ # host=client, guest=server
+ (guest)# $TEST_BINARY --mode=server \
+ --control-port=1234 \
+ --peer-cid=2
+ (host)# $TEST_BINARY --mode=client \
+ --control-port=$GUEST_IP \
+ --control-port=1234 \
+ --peer-cid=3
diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c
new file mode 100644
index 000000000000..90fd47f0e422
--- /dev/null
+++ b/tools/testing/vsock/control.c
@@ -0,0 +1,219 @@
+/* Control socket for client/server test execution
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+/* The client and server may need to coordinate to avoid race conditions like
+ * the client attempting to connect to a socket that the server is not
+ * listening on yet. The control socket offers a communications channel for
+ * such coordination tasks.
+ *
+ * If the client calls control_expectln("LISTENING"), then it will block until
+ * the server calls control_writeln("LISTENING"). This provides a simple
+ * mechanism for coordinating between the client and the server.
+ */
+
+#include <errno.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "timeout.h"
+#include "control.h"
+
+static int control_fd = -1;
+
+/* Open the control socket, either in server or client mode */
+void control_init(const char *control_host,
+ const char *control_port,
+ bool server)
+{
+ struct addrinfo hints = {
+ .ai_socktype = SOCK_STREAM,
+ };
+ struct addrinfo *result = NULL;
+ struct addrinfo *ai;
+ int ret;
+
+ ret = getaddrinfo(control_host, control_port, &hints, &result);
+ if (ret != 0) {
+ fprintf(stderr, "%s\n", gai_strerror(ret));
+ exit(EXIT_FAILURE);
+ }
+
+ for (ai = result; ai; ai = ai->ai_next) {
+ int fd;
+ int val = 1;
+
+ fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+ if (fd < 0)
+ continue;
+
+ if (!server) {
+ if (connect(fd, ai->ai_addr, ai->ai_addrlen) < 0)
+ goto next;
+ control_fd = fd;
+ printf("Control socket connected to %s:%s.\n",
+ control_host, control_port);
+ break;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
+ &val, sizeof(val)) < 0) {
+ perror("setsockopt");
+ exit(EXIT_FAILURE);
+ }
+
+ if (bind(fd, ai->ai_addr, ai->ai_addrlen) < 0)
+ goto next;
+ if (listen(fd, 1) < 0)
+ goto next;
+
+ printf("Control socket listening on %s:%s\n",
+ control_host, control_port);
+ fflush(stdout);
+
+ control_fd = accept(fd, NULL, 0);
+ close(fd);
+
+ if (control_fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+ printf("Control socket connection accepted...\n");
+ break;
+
+next:
+ close(fd);
+ }
+
+ if (control_fd < 0) {
+ fprintf(stderr, "Control socket initialization failed. Invalid address %s:%s?\n",
+ control_host, control_port);
+ exit(EXIT_FAILURE);
+ }
+
+ freeaddrinfo(result);
+}
+
+/* Free resources */
+void control_cleanup(void)
+{
+ close(control_fd);
+ control_fd = -1;
+}
+
+/* Write a line to the control socket */
+void control_writeln(const char *str)
+{
+ ssize_t len = strlen(str);
+ ssize_t ret;
+
+ timeout_begin(TIMEOUT);
+
+ do {
+ ret = send(control_fd, str, len, MSG_MORE);
+ timeout_check("send");
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret != len) {
+ perror("send");
+ exit(EXIT_FAILURE);
+ }
+
+ do {
+ ret = send(control_fd, "\n", 1, 0);
+ timeout_check("send");
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret != 1) {
+ perror("send");
+ exit(EXIT_FAILURE);
+ }
+
+ timeout_end();
+}
+
+/* Return the next line from the control socket (without the trailing newline).
+ *
+ * The program terminates if a timeout occurs.
+ *
+ * The caller must free() the returned string.
+ */
+char *control_readln(void)
+{
+ char *buf = NULL;
+ size_t idx = 0;
+ size_t buflen = 0;
+
+ timeout_begin(TIMEOUT);
+
+ for (;;) {
+ ssize_t ret;
+
+ if (idx >= buflen) {
+ char *new_buf;
+
+ new_buf = realloc(buf, buflen + 80);
+ if (!new_buf) {
+ perror("realloc");
+ exit(EXIT_FAILURE);
+ }
+
+ buf = new_buf;
+ buflen += 80;
+ }
+
+ do {
+ ret = recv(control_fd, &buf[idx], 1, 0);
+ timeout_check("recv");
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret == 0) {
+ fprintf(stderr, "unexpected EOF on control socket\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (ret != 1) {
+ perror("recv");
+ exit(EXIT_FAILURE);
+ }
+
+ if (buf[idx] == '\n') {
+ buf[idx] = '\0';
+ break;
+ }
+
+ idx++;
+ }
+
+ timeout_end();
+
+ return buf;
+}
+
+/* Wait until a given line is received or a timeout occurs */
+void control_expectln(const char *str)
+{
+ char *line;
+
+ line = control_readln();
+ if (strcmp(str, line) != 0) {
+ fprintf(stderr, "expected \"%s\" on control socket, got \"%s\"\n",
+ str, line);
+ exit(EXIT_FAILURE);
+ }
+
+ free(line);
+}
diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h
new file mode 100644
index 000000000000..54a07efd267c
--- /dev/null
+++ b/tools/testing/vsock/control.h
@@ -0,0 +1,13 @@
+#ifndef CONTROL_H
+#define CONTROL_H
+
+#include <stdbool.h>
+
+void control_init(const char *control_host, const char *control_port,
+ bool server);
+void control_cleanup(void);
+void control_writeln(const char *str);
+char *control_readln(void);
+void control_expectln(const char *str);
+
+#endif /* CONTROL_H */
diff --git a/tools/testing/vsock/timeout.c b/tools/testing/vsock/timeout.c
new file mode 100644
index 000000000000..c49b3003b2db
--- /dev/null
+++ b/tools/testing/vsock/timeout.c
@@ -0,0 +1,64 @@
+/* Timeout API for single-threaded programs that use blocking
+ * syscalls (read/write/send/recv/connect/accept).
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+/* Use the following pattern:
+ *
+ * timeout_begin(TIMEOUT);
+ * do {
+ * ret = accept(...);
+ * timeout_check("accept");
+ * } while (ret < 0 && ret == EINTR);
+ * timeout_end();
+ */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdio.h>
+#include "timeout.h"
+
+static volatile bool timeout;
+
+/* SIGALRM handler function. Do not use sleep(2), alarm(2), or
+ * setitimer(2) while using this API - they may interfere with each
+ * other.
+ */
+void sigalrm(int signo)
+{
+ timeout = true;
+}
+
+/* Start a timeout. Call timeout_check() to verify that the timeout hasn't
+ * expired. timeout_end() must be called to stop the timeout. Timeouts cannot
+ * be nested.
+ */
+void timeout_begin(unsigned int seconds)
+{
+ alarm(seconds);
+}
+
+/* Exit with an error message if the timeout has expired */
+void timeout_check(const char *operation)
+{
+ if (timeout) {
+ fprintf(stderr, "%s timed out\n", operation);
+ exit(EXIT_FAILURE);
+ }
+}
+
+/* Stop a timeout */
+void timeout_end(void)
+{
+ alarm(0);
+ timeout = false;
+}
diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h
new file mode 100644
index 000000000000..77db9ce9860a
--- /dev/null
+++ b/tools/testing/vsock/timeout.h
@@ -0,0 +1,14 @@
+#ifndef TIMEOUT_H
+#define TIMEOUT_H
+
+enum {
+ /* Default timeout */
+ TIMEOUT = 10 /* seconds */
+};
+
+void sigalrm(int signo);
+void timeout_begin(unsigned int seconds);
+void timeout_check(const char *operation);
+void timeout_end(void);
+
+#endif /* TIMEOUT_H */
diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c
new file mode 100644
index 000000000000..e896a4af52f4
--- /dev/null
+++ b/tools/testing/vsock/vsock_diag_test.c
@@ -0,0 +1,681 @@
+/*
+ * vsock_diag_test - vsock_diag.ko test suite
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/list.h>
+#include <linux/net.h>
+#include <linux/netlink.h>
+#include <linux/sock_diag.h>
+#include <netinet/tcp.h>
+
+#include "../../../include/uapi/linux/vm_sockets.h"
+#include "../../../include/uapi/linux/vm_sockets_diag.h"
+
+#include "timeout.h"
+#include "control.h"
+
+enum test_mode {
+ TEST_MODE_UNSET,
+ TEST_MODE_CLIENT,
+ TEST_MODE_SERVER
+};
+
+/* Per-socket status */
+struct vsock_stat {
+ struct list_head list;
+ struct vsock_diag_msg msg;
+};
+
+static const char *sock_type_str(int type)
+{
+ switch (type) {
+ case SOCK_DGRAM:
+ return "DGRAM";
+ case SOCK_STREAM:
+ return "STREAM";
+ default:
+ return "INVALID TYPE";
+ }
+}
+
+static const char *sock_state_str(int state)
+{
+ switch (state) {
+ case TCP_CLOSE:
+ return "UNCONNECTED";
+ case TCP_SYN_SENT:
+ return "CONNECTING";
+ case TCP_ESTABLISHED:
+ return "CONNECTED";
+ case TCP_CLOSING:
+ return "DISCONNECTING";
+ case TCP_LISTEN:
+ return "LISTEN";
+ default:
+ return "INVALID STATE";
+ }
+}
+
+static const char *sock_shutdown_str(int shutdown)
+{
+ switch (shutdown) {
+ case 1:
+ return "RCV_SHUTDOWN";
+ case 2:
+ return "SEND_SHUTDOWN";
+ case 3:
+ return "RCV_SHUTDOWN | SEND_SHUTDOWN";
+ default:
+ return "0";
+ }
+}
+
+static void print_vsock_addr(FILE *fp, unsigned int cid, unsigned int port)
+{
+ if (cid == VMADDR_CID_ANY)
+ fprintf(fp, "*:");
+ else
+ fprintf(fp, "%u:", cid);
+
+ if (port == VMADDR_PORT_ANY)
+ fprintf(fp, "*");
+ else
+ fprintf(fp, "%u", port);
+}
+
+static void print_vsock_stat(FILE *fp, struct vsock_stat *st)
+{
+ print_vsock_addr(fp, st->msg.vdiag_src_cid, st->msg.vdiag_src_port);
+ fprintf(fp, " ");
+ print_vsock_addr(fp, st->msg.vdiag_dst_cid, st->msg.vdiag_dst_port);
+ fprintf(fp, " %s %s %s %u\n",
+ sock_type_str(st->msg.vdiag_type),
+ sock_state_str(st->msg.vdiag_state),
+ sock_shutdown_str(st->msg.vdiag_shutdown),
+ st->msg.vdiag_ino);
+}
+
+static void print_vsock_stats(FILE *fp, struct list_head *head)
+{
+ struct vsock_stat *st;
+
+ list_for_each_entry(st, head, list)
+ print_vsock_stat(fp, st);
+}
+
+static struct vsock_stat *find_vsock_stat(struct list_head *head, int fd)
+{
+ struct vsock_stat *st;
+ struct stat stat;
+
+ if (fstat(fd, &stat) < 0) {
+ perror("fstat");
+ exit(EXIT_FAILURE);
+ }
+
+ list_for_each_entry(st, head, list)
+ if (st->msg.vdiag_ino == stat.st_ino)
+ return st;
+
+ fprintf(stderr, "cannot find fd %d\n", fd);
+ exit(EXIT_FAILURE);
+}
+
+static void check_no_sockets(struct list_head *head)
+{
+ if (!list_empty(head)) {
+ fprintf(stderr, "expected no sockets\n");
+ print_vsock_stats(stderr, head);
+ exit(1);
+ }
+}
+
+static void check_num_sockets(struct list_head *head, int expected)
+{
+ struct list_head *node;
+ int n = 0;
+
+ list_for_each(node, head)
+ n++;
+
+ if (n != expected) {
+ fprintf(stderr, "expected %d sockets, found %d\n",
+ expected, n);
+ print_vsock_stats(stderr, head);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void check_socket_state(struct vsock_stat *st, __u8 state)
+{
+ if (st->msg.vdiag_state != state) {
+ fprintf(stderr, "expected socket state %#x, got %#x\n",
+ state, st->msg.vdiag_state);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void send_req(int fd)
+{
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK,
+ };
+ struct {
+ struct nlmsghdr nlh;
+ struct vsock_diag_req vreq;
+ } req = {
+ .nlh = {
+ .nlmsg_len = sizeof(req),
+ .nlmsg_type = SOCK_DIAG_BY_FAMILY,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
+ },
+ .vreq = {
+ .sdiag_family = AF_VSOCK,
+ .vdiag_states = ~(__u32)0,
+ },
+ };
+ struct iovec iov = {
+ .iov_base = &req,
+ .iov_len = sizeof(req),
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ for (;;) {
+ if (sendmsg(fd, &msg, 0) < 0) {
+ if (errno == EINTR)
+ continue;
+
+ perror("sendmsg");
+ exit(EXIT_FAILURE);
+ }
+
+ return;
+ }
+}
+
+static ssize_t recv_resp(int fd, void *buf, size_t len)
+{
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK,
+ };
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = len,
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ ssize_t ret;
+
+ do {
+ ret = recvmsg(fd, &msg, 0);
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0) {
+ perror("recvmsg");
+ exit(EXIT_FAILURE);
+ }
+
+ return ret;
+}
+
+static void add_vsock_stat(struct list_head *sockets,
+ const struct vsock_diag_msg *resp)
+{
+ struct vsock_stat *st;
+
+ st = malloc(sizeof(*st));
+ if (!st) {
+ perror("malloc");
+ exit(EXIT_FAILURE);
+ }
+
+ st->msg = *resp;
+ list_add_tail(&st->list, sockets);
+}
+
+/*
+ * Read vsock stats into a list.
+ */
+static void read_vsock_stat(struct list_head *sockets)
+{
+ long buf[8192 / sizeof(long)];
+ int fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ if (fd < 0) {
+ perror("socket");
+ exit(EXIT_FAILURE);
+ }
+
+ send_req(fd);
+
+ for (;;) {
+ const struct nlmsghdr *h;
+ ssize_t ret;
+
+ ret = recv_resp(fd, buf, sizeof(buf));
+ if (ret == 0)
+ goto done;
+ if (ret < sizeof(*h)) {
+ fprintf(stderr, "short read of %zd bytes\n", ret);
+ exit(EXIT_FAILURE);
+ }
+
+ h = (struct nlmsghdr *)buf;
+
+ while (NLMSG_OK(h, ret)) {
+ if (h->nlmsg_type == NLMSG_DONE)
+ goto done;
+
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ const struct nlmsgerr *err = NLMSG_DATA(h);
+
+ if (h->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
+ fprintf(stderr, "NLMSG_ERROR\n");
+ else {
+ errno = -err->error;
+ perror("NLMSG_ERROR");
+ }
+
+ exit(EXIT_FAILURE);
+ }
+
+ if (h->nlmsg_type != SOCK_DIAG_BY_FAMILY) {
+ fprintf(stderr, "unexpected nlmsg_type %#x\n",
+ h->nlmsg_type);
+ exit(EXIT_FAILURE);
+ }
+ if (h->nlmsg_len <
+ NLMSG_LENGTH(sizeof(struct vsock_diag_msg))) {
+ fprintf(stderr, "short vsock_diag_msg\n");
+ exit(EXIT_FAILURE);
+ }
+
+ add_vsock_stat(sockets, NLMSG_DATA(h));
+
+ h = NLMSG_NEXT(h, ret);
+ }
+ }
+
+done:
+ close(fd);
+}
+
+static void free_sock_stat(struct list_head *sockets)
+{
+ struct vsock_stat *st;
+ struct vsock_stat *next;
+
+ list_for_each_entry_safe(st, next, sockets, list)
+ free(st);
+}
+
+static void test_no_sockets(unsigned int peer_cid)
+{
+ LIST_HEAD(sockets);
+
+ read_vsock_stat(&sockets);
+
+ check_no_sockets(&sockets);
+
+ free_sock_stat(&sockets);
+}
+
+static void test_listen_socket_server(unsigned int peer_cid)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = VMADDR_CID_ANY,
+ },
+ };
+ LIST_HEAD(sockets);
+ struct vsock_stat *st;
+ int fd;
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
+ perror("bind");
+ exit(EXIT_FAILURE);
+ }
+
+ if (listen(fd, 1) < 0) {
+ perror("listen");
+ exit(EXIT_FAILURE);
+ }
+
+ read_vsock_stat(&sockets);
+
+ check_num_sockets(&sockets, 1);
+ st = find_vsock_stat(&sockets, fd);
+ check_socket_state(st, TCP_LISTEN);
+
+ close(fd);
+ free_sock_stat(&sockets);
+}
+
+static void test_connect_client(unsigned int peer_cid)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = peer_cid,
+ },
+ };
+ int fd;
+ int ret;
+ LIST_HEAD(sockets);
+ struct vsock_stat *st;
+
+ control_expectln("LISTENING");
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ timeout_begin(TIMEOUT);
+ do {
+ ret = connect(fd, &addr.sa, sizeof(addr.svm));
+ timeout_check("connect");
+ } while (ret < 0 && errno == EINTR);
+ timeout_end();
+
+ if (ret < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ read_vsock_stat(&sockets);
+
+ check_num_sockets(&sockets, 1);
+ st = find_vsock_stat(&sockets, fd);
+ check_socket_state(st, TCP_ESTABLISHED);
+
+ control_expectln("DONE");
+ control_writeln("DONE");
+
+ close(fd);
+ free_sock_stat(&sockets);
+}
+
+static void test_connect_server(unsigned int peer_cid)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = VMADDR_CID_ANY,
+ },
+ };
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } clientaddr;
+ socklen_t clientaddr_len = sizeof(clientaddr.svm);
+ LIST_HEAD(sockets);
+ struct vsock_stat *st;
+ int fd;
+ int client_fd;
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
+ perror("bind");
+ exit(EXIT_FAILURE);
+ }
+
+ if (listen(fd, 1) < 0) {
+ perror("listen");
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("LISTENING");
+
+ timeout_begin(TIMEOUT);
+ do {
+ client_fd = accept(fd, &clientaddr.sa, &clientaddr_len);
+ timeout_check("accept");
+ } while (client_fd < 0 && errno == EINTR);
+ timeout_end();
+
+ if (client_fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+ if (clientaddr.sa.sa_family != AF_VSOCK) {
+ fprintf(stderr, "expected AF_VSOCK from accept(2), got %d\n",
+ clientaddr.sa.sa_family);
+ exit(EXIT_FAILURE);
+ }
+ if (clientaddr.svm.svm_cid != peer_cid) {
+ fprintf(stderr, "expected peer CID %u from accept(2), got %u\n",
+ peer_cid, clientaddr.svm.svm_cid);
+ exit(EXIT_FAILURE);
+ }
+
+ read_vsock_stat(&sockets);
+
+ check_num_sockets(&sockets, 2);
+ find_vsock_stat(&sockets, fd);
+ st = find_vsock_stat(&sockets, client_fd);
+ check_socket_state(st, TCP_ESTABLISHED);
+
+ control_writeln("DONE");
+ control_expectln("DONE");
+
+ close(client_fd);
+ close(fd);
+ free_sock_stat(&sockets);
+}
+
+static struct {
+ const char *name;
+ void (*run_client)(unsigned int peer_cid);
+ void (*run_server)(unsigned int peer_cid);
+} test_cases[] = {
+ {
+ .name = "No sockets",
+ .run_server = test_no_sockets,
+ },
+ {
+ .name = "Listen socket",
+ .run_server = test_listen_socket_server,
+ },
+ {
+ .name = "Connect",
+ .run_client = test_connect_client,
+ .run_server = test_connect_server,
+ },
+ {},
+};
+
+static void init_signals(void)
+{
+ struct sigaction act = {
+ .sa_handler = sigalrm,
+ };
+
+ sigaction(SIGALRM, &act, NULL);
+ signal(SIGPIPE, SIG_IGN);
+}
+
+static unsigned int parse_cid(const char *str)
+{
+ char *endptr = NULL;
+ unsigned long int n;
+
+ errno = 0;
+ n = strtoul(str, &endptr, 10);
+ if (errno || *endptr != '\0') {
+ fprintf(stderr, "malformed CID \"%s\"\n", str);
+ exit(EXIT_FAILURE);
+ }
+ return n;
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+ {
+ .name = "control-host",
+ .has_arg = required_argument,
+ .val = 'H',
+ },
+ {
+ .name = "control-port",
+ .has_arg = required_argument,
+ .val = 'P',
+ },
+ {
+ .name = "mode",
+ .has_arg = required_argument,
+ .val = 'm',
+ },
+ {
+ .name = "peer-cid",
+ .has_arg = required_argument,
+ .val = 'p',
+ },
+ {
+ .name = "help",
+ .has_arg = no_argument,
+ .val = '?',
+ },
+ {},
+};
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid>\n"
+ "\n"
+ " Server: vsock_diag_test --control-port=1234 --mode=server --peer-cid=3\n"
+ " Client: vsock_diag_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n"
+ "\n"
+ "Run vsock_diag.ko tests. Must be launched in both\n"
+ "guest and host. One side must use --mode=client and\n"
+ "the other side must use --mode=server.\n"
+ "\n"
+ "A TCP control socket connection is used to coordinate tests\n"
+ "between the client and the server. The server requires a\n"
+ "listen address and the client requires an address to\n"
+ "connect to.\n"
+ "\n"
+ "The CID of the other side must be given with --peer-cid=<cid>.\n");
+ exit(EXIT_FAILURE);
+}
+
+int main(int argc, char **argv)
+{
+ const char *control_host = NULL;
+ const char *control_port = NULL;
+ int mode = TEST_MODE_UNSET;
+ unsigned int peer_cid = VMADDR_CID_ANY;
+ int i;
+
+ init_signals();
+
+ for (;;) {
+ int opt = getopt_long(argc, argv, optstring, longopts, NULL);
+
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'H':
+ control_host = optarg;
+ break;
+ case 'm':
+ if (strcmp(optarg, "client") == 0)
+ mode = TEST_MODE_CLIENT;
+ else if (strcmp(optarg, "server") == 0)
+ mode = TEST_MODE_SERVER;
+ else {
+ fprintf(stderr, "--mode must be \"client\" or \"server\"\n");
+ return EXIT_FAILURE;
+ }
+ break;
+ case 'p':
+ peer_cid = parse_cid(optarg);
+ break;
+ case 'P':
+ control_port = optarg;
+ break;
+ case '?':
+ default:
+ usage();
+ }
+ }
+
+ if (!control_port)
+ usage();
+ if (mode == TEST_MODE_UNSET)
+ usage();
+ if (peer_cid == VMADDR_CID_ANY)
+ usage();
+
+ if (!control_host) {
+ if (mode != TEST_MODE_SERVER)
+ usage();
+ control_host = "0.0.0.0";
+ }
+
+ control_init(control_host, control_port, mode == TEST_MODE_SERVER);
+
+ for (i = 0; test_cases[i].name; i++) {
+ void (*run)(unsigned int peer_cid);
+
+ printf("%s...", test_cases[i].name);
+ fflush(stdout);
+
+ if (mode == TEST_MODE_CLIENT)
+ run = test_cases[i].run_client;
+ else
+ run = test_cases[i].run_server;
+
+ if (run)
+ run(peer_cid);
+
+ printf("ok\n");
+ }
+
+ control_cleanup();
+ return EXIT_SUCCESS;
+}