aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-phydev8
-rw-r--r--Documentation/devicetree/bindings/net/ethernet.txt1
-rw-r--r--Documentation/devicetree/bindings/net/qca,ar71xx.txt45
-rw-r--r--Documentation/devicetree/bindings/net/ti,dp83867.txt14
-rw-r--r--Documentation/devicetree/bindings/ptp/ptp-qoriq.txt1
-rw-r--r--Documentation/networking/ip-sysctl.txt20
-rw-r--r--Documentation/networking/sfp-phylink.rst5
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi6
-rw-r--r--arch/mips/boot/dts/qca/ar9331.dtsi26
-rw-r--r--arch/mips/boot/dts/qca/ar9331_dpt_module.dts8
-rw-r--r--drivers/i2c/i2c-core-acpi.c3
-rw-r--r--drivers/infiniband/hw/qedr/main.c25
-rw-r--r--drivers/infiniband/hw/qedr/qedr.h2
-rw-r--r--drivers/media/dvb-frontends/tua6100.c22
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c11
-rw-r--r--drivers/net/dsa/sja1105/sja1105_static_config.c2
-rw-r--r--drivers/net/ethernet/atheros/Kconfig10
-rw-r--r--drivers/net/ethernet/atheros/Makefile1
-rw-r--r--drivers/net/ethernet/atheros/ag71xx.c1898
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c41
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c52
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c79
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_regs.h4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h25
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c7
-rw-r--r--drivers/net/ethernet/freescale/enetc/Kconfig10
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.c216
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.h18
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_ethtool.c31
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_hw.h25
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_pf.c2
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_ptp.c5
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_vf.c2
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c1
-rw-r--r--drivers/net/ethernet/freescale/fman/fman_keygen.c3
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c112
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c6
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h8
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c57
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c442
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h18
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c95
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h2
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_main.c4
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c21
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h63
-rw-r--r--drivers/net/ethernet/intel/ice/ice_adminq_cmd.h49
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c250
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.h11
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_controlq.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb.c35
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb.h12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.c230
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.h5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c1027
-rw-r--r--drivers/net/ethernet/intel/ice/ice_hw_autogen.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c479
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h14
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c362
-rw-r--r--drivers/net/ethernet/intel/ice/ice_nvm.c35
-rw-r--r--drivers/net/ethernet/intel/ice/ice_status.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.h7
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c16
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h35
-rw-r--r--drivers/net/ethernet/intel/ice/ice_type.h13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c301
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h33
-rw-r--r--drivers/net/ethernet/intel/igb/e1000_82575.c2
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c3
-rw-r--r--drivers/net/ethernet/intel/igc/igc_base.c49
-rw-r--r--drivers/net/ethernet/intel/igc/igc_defines.h18
-rw-r--r--drivers/net/ethernet/intel/igc/igc_hw.h3
-rw-r--r--drivers/net/ethernet/intel/igc/igc_mac.c23
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c22
-rw-r--r--drivers/net/ethernet/marvell/mvneta.c36
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2.h21
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c276
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h15
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c110
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core.c21
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_env.c27
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c135
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_thermal.c46
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/i2c.c76
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/minimal.c18
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h62
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c31
-rw-r--r--drivers/net/ethernet/mscc/Makefile2
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c13
-rw-r--r--drivers/net/ethernet/mscc/ocelot.h3
-rw-r--r--drivers/net/ethernet/mscc/ocelot_police.c227
-rw-r--r--drivers/net/ethernet/mscc/ocelot_police.h22
-rw-r--r--drivers/net/ethernet/mscc/ocelot_tc.c174
-rw-r--r--drivers/net/ethernet/mscc/ocelot_tc.h22
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed.h24
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_cxt.c5
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_debug.c2
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev.c1275
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev_api.h113
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_fcoe.c26
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hsi.h16
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hw.c44
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_init_ops.c9
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_int.c8
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iscsi.c35
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iwarp.c24
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iwarp.h4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_l2.c4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ll2.c406
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c157
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.c65
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_mcp.h16
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_ptp.c3
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_rdma.c75
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_reg_addr.h6
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sriov.c3
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ethtool.c1
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c6
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ptp.c37
-rw-r--r--drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h25
-rw-r--r--drivers/net/ethernet/realtek/r8169.c317
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Kconfig10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/Makefile2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c13
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c13
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4.h3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c29
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c15
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.c9
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.h21
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc.h4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc_core.c13
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c8
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c850
-rw-r--r--drivers/net/ethernet/ti/cpsw.c7
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac.h5
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c258
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_mdio.c20
-rw-r--r--drivers/net/macvlan.c2
-rw-r--r--drivers/net/phy/Kconfig6
-rw-r--r--drivers/net/phy/Makefile1
-rw-r--r--drivers/net/phy/aquantia_main.c8
-rw-r--r--drivers/net/phy/bcm87xx.c20
-rw-r--r--drivers/net/phy/dp83867.c193
-rw-r--r--drivers/net/phy/lxt.c6
-rw-r--r--drivers/net/phy/nxp-tja11xx.c403
-rw-r--r--drivers/net/phy/phy-core.c4
-rw-r--r--drivers/net/phy/phy.c53
-rw-r--r--drivers/net/phy/phy_device.c104
-rw-r--r--drivers/net/phy/phylink.c221
-rw-r--r--drivers/net/phy/sfp.c35
-rw-r--r--drivers/scsi/qedf/qedf_main.c39
-rw-r--r--drivers/scsi/qedi/qedi_main.c34
-rw-r--r--include/dt-bindings/net/ti-dp83867.h2
-rw-r--r--include/linux/i2c.h6
-rw-r--r--include/linux/if_rmnet.h55
-rw-r--r--include/linux/igmp.h2
-rw-r--r--include/linux/netfilter_ipv6.h52
-rw-r--r--include/linux/phy.h9
-rw-r--r--include/linux/phylink.h57
-rw-r--r--include/linux/platform_data/xilinx-ll-temac.h3
-rw-r--r--include/linux/qed/qed_if.h10
-rw-r--r--include/linux/qed/qed_rdma_if.h2
-rw-r--r--include/linux/rhashtable.h36
-rw-r--r--include/net/devlink.h2
-rw-r--r--include/net/dsa.h2
-rw-r--r--include/net/fib_rules.h1
-rw-r--r--include/net/inet_frag.h37
-rw-r--r--include/net/ip.h39
-rw-r--r--include/net/ip6_fib.h24
-rw-r--r--include/net/ip6_route.h4
-rw-r--r--include/net/ip_fib.h9
-rw-r--r--include/net/ipv6.h44
-rw-r--r--include/net/ipv6_stubs.h5
-rw-r--r--include/net/net_namespace.h2
-rw-r--r--include/net/netfilter/nf_conntrack.h1
-rw-r--r--include/net/netfilter/nf_conntrack_bridge.h20
-rw-r--r--include/net/netfilter/nf_conntrack_core.h3
-rw-r--r--include/net/netns/ieee802154_6lowpan.h2
-rw-r--r--include/net/netns/ipv4.h2
-rw-r--r--include/net/netns/ipv6.h4
-rw-r--r--include/net/netns/nexthop.h18
-rw-r--r--include/net/nexthop.h195
-rw-r--r--include/net/sctp/checksum.h12
-rw-r--r--include/net/tc_act/tc_ctinfo.h28
-rw-r--r--include/net/tcp.h41
-rw-r--r--include/trace/events/neigh.h49
-rw-r--r--include/uapi/linux/ethtool.h2
-rw-r--r--include/uapi/linux/nexthop.h56
-rw-r--r--include/uapi/linux/pkt_cls.h1
-rw-r--r--include/uapi/linux/rtnetlink.h10
-rw-r--r--include/uapi/linux/snmp.h1
-rw-r--r--include/uapi/linux/tc_act/tc_ctinfo.h34
-rw-r--r--include/uapi/linux/unix_diag.h2
-rw-r--r--net/bridge/br_device.c1
-rw-r--r--net/bridge/br_private.h1
-rw-r--r--net/bridge/netfilter/Kconfig14
-rw-r--r--net/bridge/netfilter/Makefile3
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c433
-rw-r--r--net/core/devlink.c38
-rw-r--r--net/core/flow_offload.c3
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/skbuff.c15
-rw-r--r--net/dsa/dsa2.c89
-rw-r--r--net/dsa/dsa_priv.h17
-rw-r--r--net/dsa/port.c157
-rw-r--r--net/dsa/slave.c99
-rw-r--r--net/ieee802154/6lowpan/reassembly.c38
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/fib_semantics.c27
-rw-r--r--net/ipv4/fib_trie.c71
-rw-r--r--net/ipv4/inet_fragment.c141
-rw-r--r--net/ipv4/ip_fragment.c67
-rw-r--r--net/ipv4/ip_output.c309
-rw-r--r--net/ipv4/nexthop.c1479
-rw-r--r--net/ipv4/proc.c5
-rw-r--r--net/ipv4/sysctl_net_ipv4.c93
-rw-r--r--net/ipv4/tcp.c29
-rw-r--r--net/ipv4/tcp_fastopen.c233
-rw-r--r--net/ipv6/addrconf.c10
-rw-r--r--net/ipv6/addrconf_core.c6
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/ip6_fib.c103
-rw-r--r--net/ipv6/ip6_output.c315
-rw-r--r--net/ipv6/ndisc.c8
-rw-r--r--net/ipv6/netfilter.c123
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c43
-rw-r--r--net/ipv6/proc.c4
-rw-r--r--net/ipv6/reassembly.c42
-rw-r--r--net/ipv6/route.c485
-rw-r--r--net/netfilter/nf_conntrack_proto.c126
-rw-r--r--net/sched/Kconfig17
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_ctinfo.c392
-rw-r--r--net/sctp/offload.c7
-rw-r--r--net/unix/diag.c12
-rw-r--r--net/vmw_vsock/hyperv_transport.c95
-rw-r--r--security/selinux/nlmsgtab.c5
-rw-r--r--tools/testing/selftests/net/.gitignore4
-rw-r--r--tools/testing/selftests/net/Makefile5
-rw-r--r--tools/testing/selftests/net/config2
-rwxr-xr-xtools/testing/selftests/net/fib-onlink-tests.sh48
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh485
-rw-r--r--tools/testing/selftests/net/ipv6_flowlabel.c229
-rwxr-xr-xtools/testing/selftests/net/ipv6_flowlabel.sh21
-rw-r--r--tools/testing/selftests/net/ipv6_flowlabel_mgr.c199
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh17
-rw-r--r--tools/testing/selftests/net/so_txtime.c296
-rwxr-xr-xtools/testing/selftests/net/so_txtime.sh31
-rw-r--r--tools/testing/selftests/net/tcp_fastopen_backup_key.c336
-rwxr-xr-xtools/testing/selftests/net/tcp_fastopen_backup_key.sh55
-rw-r--r--tools/testing/selftests/tc-testing/config1
274 files changed, 17384 insertions, 3470 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-net-phydev b/Documentation/ABI/testing/sysfs-class-net-phydev
index 2a5723343aba..206cbf538b59 100644
--- a/Documentation/ABI/testing/sysfs-class-net-phydev
+++ b/Documentation/ABI/testing/sysfs-class-net-phydev
@@ -41,3 +41,11 @@ Description:
xgmii, moca, qsgmii, trgmii, 1000base-x, 2500base-x, rxaui,
xaui, 10gbase-kr, unknown
+What: /sys/class/mdio_bus/<bus>/<device>/phy_standalone
+Date: May 2019
+KernelVersion: 5.3
+Contact: netdev@vger.kernel.org
+Description:
+ Boolean value indicating whether the PHY device is used in
+ standalone mode, without a net_device associated, by PHYLINK.
+ Attribute created only when this is the case.
diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
index e88c3641d613..5475682bf06e 100644
--- a/Documentation/devicetree/bindings/net/ethernet.txt
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -43,6 +43,7 @@ Documentation/devicetree/bindings/phy/phy-bindings.txt.
* "rxaui"
* "xaui"
* "10gbase-kr" (10GBASE-KR, XFI, SFI)
+ * "usxgmii"
- phy-connection-type: the same as "phy-mode" property but described in the
Devicetree Specification;
- phy-handle: phandle, specifies a reference to a node representing a PHY
diff --git a/Documentation/devicetree/bindings/net/qca,ar71xx.txt b/Documentation/devicetree/bindings/net/qca,ar71xx.txt
new file mode 100644
index 000000000000..2a33e71ba72b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qca,ar71xx.txt
@@ -0,0 +1,45 @@
+Required properties:
+- compatible: Should be "qca,<soc>-eth". Currently support compatibles are:
+ qca,ar7100-eth - Atheros AR7100
+ qca,ar7240-eth - Atheros AR7240
+ qca,ar7241-eth - Atheros AR7241
+ qca,ar7242-eth - Atheros AR7242
+ qca,ar9130-eth - Atheros AR9130
+ qca,ar9330-eth - Atheros AR9330
+ qca,ar9340-eth - Atheros AR9340
+ qca,qca9530-eth - Qualcomm Atheros QCA9530
+ qca,qca9550-eth - Qualcomm Atheros QCA9550
+ qca,qca9560-eth - Qualcomm Atheros QCA9560
+
+- reg : Address and length of the register set for the device
+- interrupts : Should contain eth interrupt
+- phy-mode : See ethernet.txt file in the same directory
+- clocks: the clock used by the core
+- clock-names: the names of the clock listed in the clocks property. These are
+ "eth" and "mdio".
+- resets: Should contain phandles to the reset signals
+- reset-names: Should contain the names of reset signal listed in the resets
+ property. These are "mac" and "mdio"
+
+Optional properties:
+- phy-handle : phandle to the PHY device connected to this device.
+- fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.
+ Use instead of phy-handle.
+
+Optional subnodes:
+- mdio : specifies the mdio bus, used as a container for phy nodes
+ according to phy.txt in the same directory
+
+Example:
+
+ethernet@1a000000 {
+ compatible = "qca,ar9330-eth";
+ reg = <0x1a000000 0x200>;
+ interrupts = <5>;
+ resets = <&rst 13>, <&rst 23>;
+ reset-names = "mac", "mdio";
+ clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_MDIO>;
+ clock-names = "eth", "mdio";
+
+ phy-mode = "gmii";
+};
diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt
index 9ef9338aaee1..db6aa3f2215b 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt
@@ -11,6 +11,14 @@ Required properties:
- ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h
for applicable values
+Note: If the interface type is PHY_INTERFACE_MODE_RGMII the TX/RX clock delays
+ will be left at their default values, as set by the PHY's pin strapping.
+ The default strapping will use a delay of 2.00 ns. Thus
+ PHY_INTERFACE_MODE_RGMII, by default, does not behave as RGMII with no
+ internal delay, but as PHY_INTERFACE_MODE_RGMII_ID. The device tree
+ should use "rgmii-id" if internal delays are desired as this may be
+ changed in future to cause "rgmii" mode to disable delays.
+
Optional property:
- ti,min-output-impedance - MAC Interface Impedance control to set
the programmable output impedance to
@@ -25,8 +33,10 @@ Optional property:
software needs to take when this pin is
strapped in these modes. See data manual
for details.
- - ti,clk-output-sel - Muxing option for CLK_OUT pin - see dt-bindings/net/ti-dp83867.h
- for applicable values.
+ - ti,clk-output-sel - Muxing option for CLK_OUT pin. See dt-bindings/net/ti-dp83867.h
+ for applicable values. The CLK_OUT pin can also
+ be disabled by this property. When omitted, the
+ PHY's default will be left as is.
Note: ti,min-output-impedance and ti,max-output-impedance are mutually
exclusive. When both properties are present ti,max-output-impedance
diff --git a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
index 454c937076a2..6ec053449278 100644
--- a/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
+++ b/Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
@@ -4,6 +4,7 @@ General Properties:
- compatible Should be "fsl,etsec-ptp" for eTSEC
Should be "fsl,fman-ptp-timer" for DPAA FMan
+ Should be "fsl,enetc-ptp" for ENETC
- reg Offset and length of the register set for the device
- interrupts There should be at least two interrupts. Some devices
have as many as four PTP related interrupts.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 14fe93049d28..a73b3a02e49a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -648,6 +648,26 @@ tcp_fastopen_blackhole_timeout_sec - INTEGER
0 to disable the blackhole detection.
By default, it is set to 1hr.
+tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs
+ The list consists of a primary key and an optional backup key. The
+ primary key is used for both creating and validating cookies, while the
+ optional backup key is only used for validating cookies. The purpose of
+ the backup key is to maximize TFO validation when keys are rotated.
+
+ A randomly chosen primary key may be configured by the kernel if
+ the tcp_fastopen sysctl is set to 0x400 (see above), or if the
+ TCP_FASTOPEN setsockopt() optname is set and a key has not been
+ previously configured via sysctl. If keys are configured via
+ setsockopt() by using the TCP_FASTOPEN_KEY optname, then those
+ per-socket keys will be used instead of any keys that are specified via
+ sysctl.
+
+ A key is specified as 4 8-digit hexadecimal integers which are separated
+ by a '-' as: xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxxxx. Leading zeros may be
+ omitted. A primary and a backup key may be specified by separating them
+ by a comma. If only one key is specified, it becomes the primary key and
+ any previously configured backup keys are removed.
+
tcp_syn_retries - INTEGER
Number of times initial SYNs for an active TCP connection attempt
will be retransmitted. Should not be higher than 127. Default value
diff --git a/Documentation/networking/sfp-phylink.rst b/Documentation/networking/sfp-phylink.rst
index 5bd26cb07244..91446b431b70 100644
--- a/Documentation/networking/sfp-phylink.rst
+++ b/Documentation/networking/sfp-phylink.rst
@@ -98,6 +98,7 @@ this documentation.
4. Add::
struct phylink *phylink;
+ struct phylink_config phylink_config;
to the driver's private data structure. We shall refer to the
driver's private data pointer as ``priv`` below, and the driver's
@@ -223,8 +224,10 @@ this documentation.
.. code-block:: c
struct phylink *phylink;
+ priv->phylink_config.dev = &dev.dev;
+ priv->phylink_config.type = PHYLINK_NETDEV;
- phylink = phylink_create(dev, node, phy_mode, &phylink_ops);
+ phylink = phylink_create(&priv->phylink_config, node, phy_mode, &phylink_ops);
if (IS_ERR(phylink)) {
err = PTR_ERR(phylink);
fail probe;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index b04581249f0b..4cdf84c63320 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -431,6 +431,12 @@
compatible = "fsl,enetc";
reg = <0x000100 0 0 0 0>;
};
+ ethernet@0,4 {
+ compatible = "fsl,enetc-ptp";
+ reg = <0x000400 0 0 0 0>;
+ clocks = <&clockgen 4 0>;
+ little-endian;
+ };
};
};
};
diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi
index 2bae201aa365..63a9f33aa43e 100644
--- a/arch/mips/boot/dts/qca/ar9331.dtsi
+++ b/arch/mips/boot/dts/qca/ar9331.dtsi
@@ -116,6 +116,32 @@
};
};
+ eth0: ethernet@19000000 {
+ compatible = "qca,ar9330-eth";
+ reg = <0x19000000 0x200>;
+ interrupts = <4>;
+
+ resets = <&rst 9>, <&rst 22>;
+ reset-names = "mac", "mdio";
+ clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
+ clock-names = "eth", "mdio";
+
+ status = "disabled";
+ };
+
+ eth1: ethernet@1a000000 {
+ compatible = "qca,ar9330-eth";
+ reg = <0x1a000000 0x200>;
+ interrupts = <5>;
+
+ resets = <&rst 13>, <&rst 23>;
+ reset-names = "mac", "mdio";
+ clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
+ clock-names = "eth", "mdio";
+
+ status = "disabled";
+ };
+
usb: usb@1b000100 {
compatible = "chipidea,usb2";
reg = <0x1b000000 0x200>;
diff --git a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
index e7af2cf5f4c1..77bab823eb3b 100644
--- a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
+++ b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
@@ -76,3 +76,11 @@
reg = <0>;
};
};
+
+&eth0 {
+ status = "okay";
+};
+
+&eth1 {
+ status = "okay";
+};
diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c
index 272800692088..964687534754 100644
--- a/drivers/i2c/i2c-core-acpi.c
+++ b/drivers/i2c/i2c-core-acpi.c
@@ -337,7 +337,7 @@ static int i2c_acpi_find_match_device(struct device *dev, void *data)
return ACPI_COMPANION(dev) == data;
}
-static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
+struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
{
struct device *dev;
@@ -345,6 +345,7 @@ static struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
i2c_acpi_find_match_adapter);
return dev ? i2c_verify_adapter(dev) : NULL;
}
+EXPORT_SYMBOL_GPL(i2c_acpi_find_adapter_by_handle);
static struct i2c_client *i2c_acpi_find_client_by_adev(struct acpi_device *adev)
{
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 083c2c00a8e9..5ebf3c53b3fb 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -312,7 +312,8 @@ static void qedr_free_mem_sb(struct qedr_dev *dev,
struct qed_sb_info *sb_info, int sb_id)
{
if (sb_info->sb_virt) {
- dev->ops->common->sb_release(dev->cdev, sb_info, sb_id);
+ dev->ops->common->sb_release(dev->cdev, sb_info, sb_id,
+ QED_SB_TYPE_CNQ);
dma_free_coherent(&dev->pdev->dev, sizeof(*sb_info->sb_virt),
(void *)sb_info->sb_virt, sb_info->sb_phys);
}
@@ -504,11 +505,13 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
static void qedr_sync_free_irqs(struct qedr_dev *dev)
{
u32 vector;
+ u16 idx;
int i;
for (i = 0; i < dev->int_info.used_cnt; i++) {
if (dev->int_info.msix_cnt) {
- vector = dev->int_info.msix[i * dev->num_hwfns].vector;
+ idx = i * dev->num_hwfns + dev->affin_hwfn_idx;
+ vector = dev->int_info.msix[idx].vector;
synchronize_irq(vector);
free_irq(vector, &dev->cnq_array[i]);
}
@@ -520,6 +523,7 @@ static void qedr_sync_free_irqs(struct qedr_dev *dev)
static int qedr_req_msix_irqs(struct qedr_dev *dev)
{
int i, rc = 0;
+ u16 idx;
if (dev->num_cnq > dev->int_info.msix_cnt) {
DP_ERR(dev,
@@ -529,7 +533,8 @@ static int qedr_req_msix_irqs(struct qedr_dev *dev)
}
for (i = 0; i < dev->num_cnq; i++) {
- rc = request_irq(dev->int_info.msix[i * dev->num_hwfns].vector,
+ idx = i * dev->num_hwfns + dev->affin_hwfn_idx;
+ rc = request_irq(dev->int_info.msix[idx].vector,
qedr_irq_handler, 0, dev->cnq_array[i].name,
&dev->cnq_array[i]);
if (rc) {
@@ -866,6 +871,16 @@ static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
dev->user_dpm_enabled = dev_info.user_dpm_enabled;
dev->rdma_type = dev_info.rdma_type;
dev->num_hwfns = dev_info.common.num_hwfns;
+
+ if (IS_IWARP(dev) && QEDR_IS_CMT(dev)) {
+ rc = dev->ops->iwarp_set_engine_affin(cdev, false);
+ if (rc) {
+ DP_ERR(dev, "iWARP is disabled over a 100g device Enabling it may impact L2 performance. To enable it run devlink dev param set <dev> name iwarp_cmt value true cmode runtime\n");
+ goto init_err;
+ }
+ }
+ dev->affin_hwfn_idx = dev->ops->common->get_affin_hwfn_idx(cdev);
+
dev->rdma_ctx = dev->ops->rdma_get_rdma_ctx(cdev);
dev->num_cnq = dev->ops->rdma_get_min_cnq_msix(cdev);
@@ -926,6 +941,10 @@ static void qedr_remove(struct qedr_dev *dev)
qedr_stop_hw(dev);
qedr_sync_free_irqs(dev);
qedr_free_resources(dev);
+
+ if (IS_IWARP(dev) && QEDR_IS_CMT(dev))
+ dev->ops->iwarp_set_engine_affin(dev->cdev, true);
+
ib_dealloc_device(&dev->ibdev);
}
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 6175d1e98717..a92ca22e5de1 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -157,6 +157,8 @@ struct qedr_dev {
u32 dp_module;
u8 dp_level;
u8 num_hwfns;
+#define QEDR_IS_CMT(dev) ((dev)->num_hwfns > 1)
+ u8 affin_hwfn_idx;
u8 gsi_ll2_handle;
uint wq_multiplier;
diff --git a/drivers/media/dvb-frontends/tua6100.c b/drivers/media/dvb-frontends/tua6100.c
index b233b7be0b84..e6aaf4973aef 100644
--- a/drivers/media/dvb-frontends/tua6100.c
+++ b/drivers/media/dvb-frontends/tua6100.c
@@ -75,8 +75,8 @@ static int tua6100_set_params(struct dvb_frontend *fe)
struct i2c_msg msg1 = { .addr = priv->i2c_address, .flags = 0, .buf = reg1, .len = 4 };
struct i2c_msg msg2 = { .addr = priv->i2c_address, .flags = 0, .buf = reg2, .len = 3 };
-#define _R 4
-#define _P 32
+#define _R_VAL 4
+#define _P_VAL 32
#define _ri 4000000
// setup register 0
@@ -91,14 +91,14 @@ static int tua6100_set_params(struct dvb_frontend *fe)
else
reg1[1] = 0x0c;
- if (_P == 64)
+ if (_P_VAL == 64)
reg1[1] |= 0x40;
if (c->frequency >= 1525000)
reg1[1] |= 0x80;
// register 2
- reg2[1] = (_R >> 8) & 0x03;
- reg2[2] = _R;
+ reg2[1] = (_R_VAL >> 8) & 0x03;
+ reg2[2] = _R_VAL;
if (c->frequency < 1455000)
reg2[1] |= 0x1c;
else if (c->frequency < 1630000)
@@ -110,18 +110,18 @@ static int tua6100_set_params(struct dvb_frontend *fe)
* The N divisor ratio (note: c->frequency is in kHz, but we
* need it in Hz)
*/
- prediv = (c->frequency * _R) / (_ri / 1000);
- div = prediv / _P;
+ prediv = (c->frequency * _R_VAL) / (_ri / 1000);
+ div = prediv / _P_VAL;
reg1[1] |= (div >> 9) & 0x03;
reg1[2] = div >> 1;
reg1[3] = (div << 7);
- priv->frequency = ((div * _P) * (_ri / 1000)) / _R;
+ priv->frequency = ((div * _P_VAL) * (_ri / 1000)) / _R_VAL;
// Finally, calculate and store the value for A
- reg1[3] |= (prediv - (div*_P)) & 0x7f;
+ reg1[3] |= (prediv - (div*_P_VAL)) & 0x7f;
-#undef _R
-#undef _P
+#undef _R_VAL
+#undef _P_VAL
#undef _ri
if (fe->ops.i2c_gate_ctrl)
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 0663b78a2f6c..cfdefd9f1905 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -734,15 +734,16 @@ static int sja1105_adjust_port_config(struct sja1105_private *priv, int port,
return sja1105_clocking_setup_port(priv, port);
}
-static void sja1105_adjust_link(struct dsa_switch *ds, int port,
- struct phy_device *phydev)
+static void sja1105_mac_config(struct dsa_switch *ds, int port,
+ unsigned int link_an_mode,
+ const struct phylink_link_state *state)
{
struct sja1105_private *priv = ds->priv;
- if (!phydev->link)
+ if (!state->link)
sja1105_adjust_port_config(priv, port, 0, false);
else
- sja1105_adjust_port_config(priv, port, phydev->speed, true);
+ sja1105_adjust_port_config(priv, port, state->speed, true);
}
static void sja1105_phylink_validate(struct dsa_switch *ds, int port,
@@ -1515,9 +1516,9 @@ static int sja1105_set_ageing_time(struct dsa_switch *ds,
static const struct dsa_switch_ops sja1105_switch_ops = {
.get_tag_protocol = sja1105_get_tag_protocol,
.setup = sja1105_setup,
- .adjust_link = sja1105_adjust_link,
.set_ageing_time = sja1105_set_ageing_time,
.phylink_validate = sja1105_phylink_validate,
+ .phylink_mac_config = sja1105_mac_config,
.get_strings = sja1105_get_strings,
.get_ethtool_stats = sja1105_get_ethtool_stats,
.get_sset_count = sja1105_get_sset_count,
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index b3c992b0abb0..7e90e62da389 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -442,7 +442,7 @@ const char *sja1105_static_config_error_msg[] = {
"vl-forwarding-parameters-table.partspc.",
};
-sja1105_config_valid_t
+static sja1105_config_valid_t
static_config_check_memory_size(const struct sja1105_table *tables)
{
const struct sja1105_l2_forwarding_params_entry *l2_fwd_params;
diff --git a/drivers/net/ethernet/atheros/Kconfig b/drivers/net/ethernet/atheros/Kconfig
index 953ff1f9ac70..0058051ba925 100644
--- a/drivers/net/ethernet/atheros/Kconfig
+++ b/drivers/net/ethernet/atheros/Kconfig
@@ -6,7 +6,7 @@
config NET_VENDOR_ATHEROS
bool "Atheros devices"
default y
- depends on PCI
+ depends on (PCI || ATH79)
---help---
If you have a network (Ethernet) card belonging to this class, say Y.
@@ -17,6 +17,14 @@ config NET_VENDOR_ATHEROS
if NET_VENDOR_ATHEROS
+config AG71XX
+ tristate "Atheros AR7XXX/AR9XXX built-in ethernet mac support"
+ depends on ATH79
+ select PHYLIB
+ help
+ If you wish to compile a kernel for AR7XXX/91XXX and enable
+ ethernet support, then you should always answer Y to this.
+
config ATL2
tristate "Atheros L2 Fast Ethernet support"
depends on PCI
diff --git a/drivers/net/ethernet/atheros/Makefile b/drivers/net/ethernet/atheros/Makefile
index aa3d394b87e6..aca696cb6425 100644
--- a/drivers/net/ethernet/atheros/Makefile
+++ b/drivers/net/ethernet/atheros/Makefile
@@ -3,6 +3,7 @@
# Makefile for the Atheros network device drivers.
#
+obj-$(CONFIG_AG71XX) += ag71xx.o
obj-$(CONFIG_ATL1) += atlx/
obj-$(CONFIG_ATL2) += atlx/
obj-$(CONFIG_ATL1E) += atl1e/
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
new file mode 100644
index 000000000000..72a57c6cd254
--- /dev/null
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -0,0 +1,1898 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Atheros AR71xx built-in ethernet mac driver
+ *
+ * Copyright (C) 2019 Oleksij Rempel <o.rempel@pengutronix.de>
+ *
+ * List of authors contributed to this driver before mainlining:
+ * Alexander Couzens <lynxis@fe80.eu>
+ * Christian Lamparter <chunkeey@gmail.com>
+ * Chuanhong Guo <gch981213@gmail.com>
+ * Daniel F. Dickinson <cshored@thecshore.com>
+ * David Bauer <mail@david-bauer.net>
+ * Felix Fietkau <nbd@nbd.name>
+ * Gabor Juhos <juhosg@freemail.hu>
+ * Hauke Mehrtens <hauke@hauke-m.de>
+ * Johann Neuhauser <johann@it-neuhauser.de>
+ * John Crispin <john@phrozen.org>
+ * Jo-Philipp Wich <jo@mein.io>
+ * Koen Vandeputte <koen.vandeputte@ncentric.com>
+ * Lucian Cristian <lucian.cristian@gmail.com>
+ * Matt Merhar <mattmerhar@protonmail.com>
+ * Milan Krstic <milan.krstic@gmail.com>
+ * Petr Å tetiar <ynezz@true.cz>
+ * Rosen Penev <rosenp@gmail.com>
+ * Stephen Walker <stephendwalker+github@gmail.com>
+ * Vittorio Gambaletta <openwrt@vittgam.net>
+ * Weijie Gao <hackpascal@gmail.com>
+ * Imre Kaloz <kaloz@openwrt.org>
+ */
+
+#include <linux/if_vlan.h>
+#include <linux/mfd/syscon.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/of_platform.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/clk.h>
+
+/* For our NAPI weight bigger does *NOT* mean better - it means more
+ * D-cache misses and lots more wasted cycles than we'll ever
+ * possibly gain from saving instructions.
+ */
+#define AG71XX_NAPI_WEIGHT 32
+#define AG71XX_OOM_REFILL (1 + HZ / 10)
+
+#define AG71XX_INT_ERR (AG71XX_INT_RX_BE | AG71XX_INT_TX_BE)
+#define AG71XX_INT_TX (AG71XX_INT_TX_PS)
+#define AG71XX_INT_RX (AG71XX_INT_RX_PR | AG71XX_INT_RX_OF)
+
+#define AG71XX_INT_POLL (AG71XX_INT_RX | AG71XX_INT_TX)
+#define AG71XX_INT_INIT (AG71XX_INT_ERR | AG71XX_INT_POLL)
+
+#define AG71XX_TX_MTU_LEN 1540
+
+#define AG71XX_TX_RING_SPLIT 512
+#define AG71XX_TX_RING_DS_PER_PKT DIV_ROUND_UP(AG71XX_TX_MTU_LEN, \
+ AG71XX_TX_RING_SPLIT)
+#define AG71XX_TX_RING_SIZE_DEFAULT 128
+#define AG71XX_RX_RING_SIZE_DEFAULT 256
+
+#define AG71XX_MDIO_RETRY 1000
+#define AG71XX_MDIO_DELAY 5
+#define AG71XX_MDIO_MAX_CLK 5000000
+
+/* Register offsets */
+#define AG71XX_REG_MAC_CFG1 0x0000
+#define MAC_CFG1_TXE BIT(0) /* Tx Enable */
+#define MAC_CFG1_STX BIT(1) /* Synchronize Tx Enable */
+#define MAC_CFG1_RXE BIT(2) /* Rx Enable */
+#define MAC_CFG1_SRX BIT(3) /* Synchronize Rx Enable */
+#define MAC_CFG1_TFC BIT(4) /* Tx Flow Control Enable */
+#define MAC_CFG1_RFC BIT(5) /* Rx Flow Control Enable */
+#define MAC_CFG1_SR BIT(31) /* Soft Reset */
+#define MAC_CFG1_INIT (MAC_CFG1_RXE | MAC_CFG1_TXE | \
+ MAC_CFG1_SRX | MAC_CFG1_STX)
+
+#define AG71XX_REG_MAC_CFG2 0x0004
+#define MAC_CFG2_FDX BIT(0)
+#define MAC_CFG2_PAD_CRC_EN BIT(2)
+#define MAC_CFG2_LEN_CHECK BIT(4)
+#define MAC_CFG2_IF_1000 BIT(9)
+#define MAC_CFG2_IF_10_100 BIT(8)
+
+#define AG71XX_REG_MAC_MFL 0x0010
+
+#define AG71XX_REG_MII_CFG 0x0020
+#define MII_CFG_CLK_DIV_4 0
+#define MII_CFG_CLK_DIV_6 2
+#define MII_CFG_CLK_DIV_8 3
+#define MII_CFG_CLK_DIV_10 4
+#define MII_CFG_CLK_DIV_14 5
+#define MII_CFG_CLK_DIV_20 6
+#define MII_CFG_CLK_DIV_28 7
+#define MII_CFG_CLK_DIV_34 8
+#define MII_CFG_CLK_DIV_42 9
+#define MII_CFG_CLK_DIV_50 10
+#define MII_CFG_CLK_DIV_58 11
+#define MII_CFG_CLK_DIV_66 12
+#define MII_CFG_CLK_DIV_74 13
+#define MII_CFG_CLK_DIV_82 14
+#define MII_CFG_CLK_DIV_98 15
+#define MII_CFG_RESET BIT(31)
+
+#define AG71XX_REG_MII_CMD 0x0024
+#define MII_CMD_READ BIT(0)
+
+#define AG71XX_REG_MII_ADDR 0x0028
+#define MII_ADDR_SHIFT 8
+
+#define AG71XX_REG_MII_CTRL 0x002c
+#define AG71XX_REG_MII_STATUS 0x0030
+#define AG71XX_REG_MII_IND 0x0034
+#define MII_IND_BUSY BIT(0)
+#define MII_IND_INVALID BIT(2)
+
+#define AG71XX_REG_MAC_IFCTL 0x0038
+#define MAC_IFCTL_SPEED BIT(16)
+
+#define AG71XX_REG_MAC_ADDR1 0x0040
+#define AG71XX_REG_MAC_ADDR2 0x0044
+#define AG71XX_REG_FIFO_CFG0 0x0048
+#define FIFO_CFG0_WTM BIT(0) /* Watermark Module */
+#define FIFO_CFG0_RXS BIT(1) /* Rx System Module */
+#define FIFO_CFG0_RXF BIT(2) /* Rx Fabric Module */
+#define FIFO_CFG0_TXS BIT(3) /* Tx System Module */
+#define FIFO_CFG0_TXF BIT(4) /* Tx Fabric Module */
+#define FIFO_CFG0_ALL (FIFO_CFG0_WTM | FIFO_CFG0_RXS | FIFO_CFG0_RXF \
+ | FIFO_CFG0_TXS | FIFO_CFG0_TXF)
+#define FIFO_CFG0_INIT (FIFO_CFG0_ALL << FIFO_CFG0_ENABLE_SHIFT)
+
+#define FIFO_CFG0_ENABLE_SHIFT 8
+
+#define AG71XX_REG_FIFO_CFG1 0x004c
+#define AG71XX_REG_FIFO_CFG2 0x0050
+#define AG71XX_REG_FIFO_CFG3 0x0054
+#define AG71XX_REG_FIFO_CFG4 0x0058
+#define FIFO_CFG4_DE BIT(0) /* Drop Event */
+#define FIFO_CFG4_DV BIT(1) /* RX_DV Event */
+#define FIFO_CFG4_FC BIT(2) /* False Carrier */
+#define FIFO_CFG4_CE BIT(3) /* Code Error */
+#define FIFO_CFG4_CR BIT(4) /* CRC error */
+#define FIFO_CFG4_LM BIT(5) /* Length Mismatch */
+#define FIFO_CFG4_LO BIT(6) /* Length out of range */
+#define FIFO_CFG4_OK BIT(7) /* Packet is OK */
+#define FIFO_CFG4_MC BIT(8) /* Multicast Packet */
+#define FIFO_CFG4_BC BIT(9) /* Broadcast Packet */
+#define FIFO_CFG4_DR BIT(10) /* Dribble */
+#define FIFO_CFG4_LE BIT(11) /* Long Event */
+#define FIFO_CFG4_CF BIT(12) /* Control Frame */
+#define FIFO_CFG4_PF BIT(13) /* Pause Frame */
+#define FIFO_CFG4_UO BIT(14) /* Unsupported Opcode */
+#define FIFO_CFG4_VT BIT(15) /* VLAN tag detected */
+#define FIFO_CFG4_FT BIT(16) /* Frame Truncated */
+#define FIFO_CFG4_UC BIT(17) /* Unicast Packet */
+#define FIFO_CFG4_INIT (FIFO_CFG4_DE | FIFO_CFG4_DV | FIFO_CFG4_FC | \
+ FIFO_CFG4_CE | FIFO_CFG4_CR | FIFO_CFG4_LM | \
+ FIFO_CFG4_LO | FIFO_CFG4_OK | FIFO_CFG4_MC | \
+ FIFO_CFG4_BC | FIFO_CFG4_DR | FIFO_CFG4_LE | \
+ FIFO_CFG4_CF | FIFO_CFG4_PF | FIFO_CFG4_UO | \
+ FIFO_CFG4_VT)
+
+#define AG71XX_REG_FIFO_CFG5 0x005c
+#define FIFO_CFG5_DE BIT(0) /* Drop Event */
+#define FIFO_CFG5_DV BIT(1) /* RX_DV Event */
+#define FIFO_CFG5_FC BIT(2) /* False Carrier */
+#define FIFO_CFG5_CE BIT(3) /* Code Error */
+#define FIFO_CFG5_LM BIT(4) /* Length Mismatch */
+#define FIFO_CFG5_LO BIT(5) /* Length Out of Range */
+#define FIFO_CFG5_OK BIT(6) /* Packet is OK */
+#define FIFO_CFG5_MC BIT(7) /* Multicast Packet */
+#define FIFO_CFG5_BC BIT(8) /* Broadcast Packet */
+#define FIFO_CFG5_DR BIT(9) /* Dribble */
+#define FIFO_CFG5_CF BIT(10) /* Control Frame */
+#define FIFO_CFG5_PF BIT(11) /* Pause Frame */
+#define FIFO_CFG5_UO BIT(12) /* Unsupported Opcode */
+#define FIFO_CFG5_VT BIT(13) /* VLAN tag detected */
+#define FIFO_CFG5_LE BIT(14) /* Long Event */
+#define FIFO_CFG5_FT BIT(15) /* Frame Truncated */
+#define FIFO_CFG5_16 BIT(16) /* unknown */
+#define FIFO_CFG5_17 BIT(17) /* unknown */
+#define FIFO_CFG5_SF BIT(18) /* Short Frame */
+#define FIFO_CFG5_BM BIT(19) /* Byte Mode */
+#define FIFO_CFG5_INIT (FIFO_CFG5_DE | FIFO_CFG5_DV | FIFO_CFG5_FC | \
+ FIFO_CFG5_CE | FIFO_CFG5_LO | FIFO_CFG5_OK | \
+ FIFO_CFG5_MC | FIFO_CFG5_BC | FIFO_CFG5_DR | \
+ FIFO_CFG5_CF | FIFO_CFG5_PF | FIFO_CFG5_VT | \
+ FIFO_CFG5_LE | FIFO_CFG5_FT | FIFO_CFG5_16 | \
+ FIFO_CFG5_17 | FIFO_CFG5_SF)
+
+#define AG71XX_REG_TX_CTRL 0x0180
+#define TX_CTRL_TXE BIT(0) /* Tx Enable */
+
+#define AG71XX_REG_TX_DESC 0x0184
+#define AG71XX_REG_TX_STATUS 0x0188
+#define TX_STATUS_PS BIT(0) /* Packet Sent */
+#define TX_STATUS_UR BIT(1) /* Tx Underrun */
+#define TX_STATUS_BE BIT(3) /* Bus Error */
+
+#define AG71XX_REG_RX_CTRL 0x018c
+#define RX_CTRL_RXE BIT(0) /* Rx Enable */
+
+#define AG71XX_DMA_RETRY 10
+#define AG71XX_DMA_DELAY 1
+
+#define AG71XX_REG_RX_DESC 0x0190
+#define AG71XX_REG_RX_STATUS 0x0194
+#define RX_STATUS_PR BIT(0) /* Packet Received */
+#define RX_STATUS_OF BIT(2) /* Rx Overflow */
+#define RX_STATUS_BE BIT(3) /* Bus Error */
+
+#define AG71XX_REG_INT_ENABLE 0x0198
+#define AG71XX_REG_INT_STATUS 0x019c
+#define AG71XX_INT_TX_PS BIT(0)
+#define AG71XX_INT_TX_UR BIT(1)
+#define AG71XX_INT_TX_BE BIT(3)
+#define AG71XX_INT_RX_PR BIT(4)
+#define AG71XX_INT_RX_OF BIT(6)
+#define AG71XX_INT_RX_BE BIT(7)
+
+#define AG71XX_REG_FIFO_DEPTH 0x01a8
+#define AG71XX_REG_RX_SM 0x01b0
+#define AG71XX_REG_TX_SM 0x01b4
+
+#define ETH_SWITCH_HEADER_LEN 2
+
+#define AG71XX_DEFAULT_MSG_ENABLE \
+ (NETIF_MSG_DRV \
+ | NETIF_MSG_PROBE \
+ | NETIF_MSG_LINK \
+ | NETIF_MSG_TIMER \
+ | NETIF_MSG_IFDOWN \
+ | NETIF_MSG_IFUP \
+ | NETIF_MSG_RX_ERR \
+ | NETIF_MSG_TX_ERR)
+
+#define DESC_EMPTY BIT(31)
+#define DESC_MORE BIT(24)
+#define DESC_PKTLEN_M 0xfff
+struct ag71xx_desc {
+ u32 data;
+ u32 ctrl;
+ u32 next;
+ u32 pad;
+} __aligned(4);
+
+#define AG71XX_DESC_SIZE roundup(sizeof(struct ag71xx_desc), \
+ L1_CACHE_BYTES)
+
+struct ag71xx_buf {
+ union {
+ struct {
+ struct sk_buff *skb;
+ unsigned int len;
+ } tx;
+ struct {
+ dma_addr_t dma_addr;
+ void *rx_buf;
+ } rx;
+ };
+};
+
+struct ag71xx_ring {
+ /* "Hot" fields in the data path. */
+ unsigned int curr;
+ unsigned int dirty;
+
+ /* "Cold" fields - not used in the data path. */
+ struct ag71xx_buf *buf;
+ u16 order;
+ u16 desc_split;
+ dma_addr_t descs_dma;
+ u8 *descs_cpu;
+};
+
+enum ag71xx_type {
+ AR7100,
+ AR7240,
+ AR9130,
+ AR9330,
+ AR9340,
+ QCA9530,
+ QCA9550,
+};
+
+struct ag71xx_dcfg {
+ u32 max_frame_len;
+ const u32 *fifodata;
+ u16 desc_pktlen_mask;
+ bool tx_hang_workaround;
+ enum ag71xx_type type;
+};
+
+struct ag71xx {
+ /* Critical data related to the per-packet data path are clustered
+ * early in this structure to help improve the D-cache footprint.
+ */
+ struct ag71xx_ring rx_ring ____cacheline_aligned;
+ struct ag71xx_ring tx_ring ____cacheline_aligned;
+
+ u16 rx_buf_size;
+ u8 rx_buf_offset;
+
+ struct net_device *ndev;
+ struct platform_device *pdev;
+ struct napi_struct napi;
+ u32 msg_enable;
+ const struct ag71xx_dcfg *dcfg;
+
+ /* From this point onwards we're not looking at per-packet fields. */
+ void __iomem *mac_base;
+
+ struct ag71xx_desc *stop_desc;
+ dma_addr_t stop_desc_dma;
+
+ int phy_if_mode;
+
+ struct delayed_work restart_work;
+ struct timer_list oom_timer;
+
+ struct reset_control *mac_reset;
+
+ u32 fifodata[3];
+ int mac_idx;
+
+ struct reset_control *mdio_reset;
+ struct mii_bus *mii_bus;
+ struct clk *clk_mdio;
+ struct clk *clk_eth;
+};
+
+static int ag71xx_desc_empty(struct ag71xx_desc *desc)
+{
+ return (desc->ctrl & DESC_EMPTY) != 0;
+}
+
+static struct ag71xx_desc *ag71xx_ring_desc(struct ag71xx_ring *ring, int idx)
+{
+ return (struct ag71xx_desc *)&ring->descs_cpu[idx * AG71XX_DESC_SIZE];
+}
+
+static int ag71xx_ring_size_order(int size)
+{
+ return fls(size - 1);
+}
+
+static bool ag71xx_is(struct ag71xx *ag, enum ag71xx_type type)
+{
+ return ag->dcfg->type == type;
+}
+
+static void ag71xx_wr(struct ag71xx *ag, unsigned int reg, u32 value)
+{
+ iowrite32(value, ag->mac_base + reg);
+ /* flush write */
+ (void)ioread32(ag->mac_base + reg);
+}
+
+static u32 ag71xx_rr(struct ag71xx *ag, unsigned int reg)
+{
+ return ioread32(ag->mac_base + reg);
+}
+
+static void ag71xx_sb(struct ag71xx *ag, unsigned int reg, u32 mask)
+{
+ void __iomem *r;
+
+ r = ag->mac_base + reg;
+ iowrite32(ioread32(r) | mask, r);
+ /* flush write */
+ (void)ioread32(r);
+}
+
+static void ag71xx_cb(struct ag71xx *ag, unsigned int reg, u32 mask)
+{
+ void __iomem *r;
+
+ r = ag->mac_base + reg;
+ iowrite32(ioread32(r) & ~mask, r);
+ /* flush write */
+ (void)ioread32(r);
+}
+
+static void ag71xx_int_enable(struct ag71xx *ag, u32 ints)
+{
+ ag71xx_sb(ag, AG71XX_REG_INT_ENABLE, ints);
+}
+
+static void ag71xx_int_disable(struct ag71xx *ag, u32 ints)
+{
+ ag71xx_cb(ag, AG71XX_REG_INT_ENABLE, ints);
+}
+
+static int ag71xx_mdio_wait_busy(struct ag71xx *ag)
+{
+ struct net_device *ndev = ag->ndev;
+ int i;
+
+ for (i = 0; i < AG71XX_MDIO_RETRY; i++) {
+ u32 busy;
+
+ udelay(AG71XX_MDIO_DELAY);
+
+ busy = ag71xx_rr(ag, AG71XX_REG_MII_IND);
+ if (!busy)
+ return 0;
+
+ udelay(AG71XX_MDIO_DELAY);
+ }
+
+ netif_err(ag, link, ndev, "MDIO operation timed out\n");
+
+ return -ETIMEDOUT;
+}
+
+static int ag71xx_mdio_mii_read(struct mii_bus *bus, int addr, int reg)
+{
+ struct ag71xx *ag = bus->priv;
+ int err, val;
+
+ err = ag71xx_mdio_wait_busy(ag);
+ if (err)
+ return err;
+
+ ag71xx_wr(ag, AG71XX_REG_MII_ADDR,
+ ((addr & 0x1f) << MII_ADDR_SHIFT) | (reg & 0xff));
+ /* enable read mode */
+ ag71xx_wr(ag, AG71XX_REG_MII_CMD, MII_CMD_READ);
+
+ err = ag71xx_mdio_wait_busy(ag);
+ if (err)
+ return err;
+
+ val = ag71xx_rr(ag, AG71XX_REG_MII_STATUS);
+ /* disable read mode */
+ ag71xx_wr(ag, AG71XX_REG_MII_CMD, 0);
+
+ netif_dbg(ag, link, ag->ndev, "mii_read: addr=%04x, reg=%04x, value=%04x\n",
+ addr, reg, val);
+
+ return val;
+}
+
+static int ag71xx_mdio_mii_write(struct mii_bus *bus, int addr, int reg,
+ u16 val)
+{
+ struct ag71xx *ag = bus->priv;
+
+ netif_dbg(ag, link, ag->ndev, "mii_write: addr=%04x, reg=%04x, value=%04x\n",
+ addr, reg, val);
+
+ ag71xx_wr(ag, AG71XX_REG_MII_ADDR,
+ ((addr & 0x1f) << MII_ADDR_SHIFT) | (reg & 0xff));
+ ag71xx_wr(ag, AG71XX_REG_MII_CTRL, val);
+
+ return ag71xx_mdio_wait_busy(ag);
+}
+
+static const u32 ar71xx_mdio_div_table[] = {
+ 4, 4, 6, 8, 10, 14, 20, 28,
+};
+
+static const u32 ar7240_mdio_div_table[] = {
+ 2, 2, 4, 6, 8, 12, 18, 26, 32, 40, 48, 56, 62, 70, 78, 96,
+};
+
+static const u32 ar933x_mdio_div_table[] = {
+ 4, 4, 6, 8, 10, 14, 20, 28, 34, 42, 50, 58, 66, 74, 82, 98,
+};
+
+static int ag71xx_mdio_get_divider(struct ag71xx *ag, u32 *div)
+{
+ unsigned long ref_clock;
+ const u32 *table;
+ int ndivs, i;
+
+ ref_clock = clk_get_rate(ag->clk_mdio);
+ if (!ref_clock)
+ return -EINVAL;
+
+ if (ag71xx_is(ag, AR9330) || ag71xx_is(ag, AR9340)) {
+ table = ar933x_mdio_div_table;
+ ndivs = ARRAY_SIZE(ar933x_mdio_div_table);
+ } else if (ag71xx_is(ag, AR7240)) {
+ table = ar7240_mdio_div_table;
+ ndivs = ARRAY_SIZE(ar7240_mdio_div_table);
+ } else {
+ table = ar71xx_mdio_div_table;
+ ndivs = ARRAY_SIZE(ar71xx_mdio_div_table);
+ }
+
+ for (i = 0; i < ndivs; i++) {
+ unsigned long t;
+
+ t = ref_clock / table[i];
+ if (t <= AG71XX_MDIO_MAX_CLK) {
+ *div = i;
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static int ag71xx_mdio_reset(struct mii_bus *bus)
+{
+ struct ag71xx *ag = bus->priv;
+ int err;
+ u32 t;
+
+ err = ag71xx_mdio_get_divider(ag, &t);
+ if (err)
+ return err;
+
+ ag71xx_wr(ag, AG71XX_REG_MII_CFG, t | MII_CFG_RESET);
+ usleep_range(100, 200);
+
+ ag71xx_wr(ag, AG71XX_REG_MII_CFG, t);
+ usleep_range(100, 200);
+
+ return 0;
+}
+
+static int ag71xx_mdio_probe(struct ag71xx *ag)
+{
+ struct device *dev = &ag->pdev->dev;
+ struct net_device *ndev = ag->ndev;
+ static struct mii_bus *mii_bus;
+ struct device_node *np;
+ int err;
+
+ np = dev->of_node;
+ ag->mii_bus = NULL;
+
+ ag->clk_mdio = devm_clk_get(dev, "mdio");
+ if (IS_ERR(ag->clk_mdio)) {
+ netif_err(ag, probe, ndev, "Failed to get mdio clk.\n");
+ return PTR_ERR(ag->clk_mdio);
+ }
+
+ err = clk_prepare_enable(ag->clk_mdio);
+ if (err) {
+ netif_err(ag, probe, ndev, "Failed to enable mdio clk.\n");
+ return err;
+ }
+
+ mii_bus = devm_mdiobus_alloc(dev);
+ if (!mii_bus) {
+ err = -ENOMEM;
+ goto mdio_err_put_clk;
+ }
+
+ ag->mdio_reset = of_reset_control_get_exclusive(np, "mdio");
+ if (IS_ERR(ag->mdio_reset)) {
+ netif_err(ag, probe, ndev, "Failed to get reset mdio.\n");
+ return PTR_ERR(ag->mdio_reset);
+ }
+
+ mii_bus->name = "ag71xx_mdio";
+ mii_bus->read = ag71xx_mdio_mii_read;
+ mii_bus->write = ag71xx_mdio_mii_write;
+ mii_bus->reset = ag71xx_mdio_reset;
+ mii_bus->priv = ag;
+ mii_bus->parent = dev;
+ snprintf(mii_bus->id, MII_BUS_ID_SIZE, "%s.%d", np->name, ag->mac_idx);
+
+ if (!IS_ERR(ag->mdio_reset)) {
+ reset_control_assert(ag->mdio_reset);
+ msleep(100);
+ reset_control_deassert(ag->mdio_reset);
+ msleep(200);
+ }
+
+ err = of_mdiobus_register(mii_bus, np);
+ if (err)
+ goto mdio_err_put_clk;
+
+ ag->mii_bus = mii_bus;
+
+ return 0;
+
+mdio_err_put_clk:
+ clk_disable_unprepare(ag->clk_mdio);
+ return err;
+}
+
+static void ag71xx_mdio_remove(struct ag71xx *ag)
+{
+ if (ag->mii_bus)
+ mdiobus_unregister(ag->mii_bus);
+ clk_disable_unprepare(ag->clk_mdio);
+}
+
+static void ag71xx_hw_stop(struct ag71xx *ag)
+{
+ /* disable all interrupts and stop the rx/tx engine */
+ ag71xx_wr(ag, AG71XX_REG_INT_ENABLE, 0);
+ ag71xx_wr(ag, AG71XX_REG_RX_CTRL, 0);
+ ag71xx_wr(ag, AG71XX_REG_TX_CTRL, 0);
+}
+
+static bool ag71xx_check_dma_stuck(struct ag71xx *ag)
+{
+ unsigned long timestamp;
+ u32 rx_sm, tx_sm, rx_fd;
+
+ timestamp = netdev_get_tx_queue(ag->ndev, 0)->trans_start;
+ if (likely(time_before(jiffies, timestamp + HZ / 10)))
+ return false;
+
+ if (!netif_carrier_ok(ag->ndev))
+ return false;
+
+ rx_sm = ag71xx_rr(ag, AG71XX_REG_RX_SM);
+ if ((rx_sm & 0x7) == 0x3 && ((rx_sm >> 4) & 0x7) == 0x6)
+ return true;
+
+ tx_sm = ag71xx_rr(ag, AG71XX_REG_TX_SM);
+ rx_fd = ag71xx_rr(ag, AG71XX_REG_FIFO_DEPTH);
+ if (((tx_sm >> 4) & 0x7) == 0 && ((rx_sm & 0x7) == 0) &&
+ ((rx_sm >> 4) & 0x7) == 0 && rx_fd == 0)
+ return true;
+
+ return false;
+}
+
+static int ag71xx_tx_packets(struct ag71xx *ag, bool flush)
+{
+ struct ag71xx_ring *ring = &ag->tx_ring;
+ int sent = 0, bytes_compl = 0, n = 0;
+ struct net_device *ndev = ag->ndev;
+ int ring_mask, ring_size;
+ bool dma_stuck = false;
+
+ ring_mask = BIT(ring->order) - 1;
+ ring_size = BIT(ring->order);
+
+ netif_dbg(ag, tx_queued, ndev, "processing TX ring\n");
+
+ while (ring->dirty + n != ring->curr) {
+ struct ag71xx_desc *desc;
+ struct sk_buff *skb;
+ unsigned int i;
+
+ i = (ring->dirty + n) & ring_mask;
+ desc = ag71xx_ring_desc(ring, i);
+ skb = ring->buf[i].tx.skb;
+
+ if (!flush && !ag71xx_desc_empty(desc)) {
+ if (ag->dcfg->tx_hang_workaround &&
+ ag71xx_check_dma_stuck(ag)) {
+ schedule_delayed_work(&ag->restart_work,
+ HZ / 2);
+ dma_stuck = true;
+ }
+ break;
+ }
+
+ if (flush)
+ desc->ctrl |= DESC_EMPTY;
+
+ n++;
+ if (!skb)
+ continue;
+
+ dev_kfree_skb_any(skb);
+ ring->buf[i].tx.skb = NULL;
+
+ bytes_compl += ring->buf[i].tx.len;
+
+ sent++;
+ ring->dirty += n;
+
+ while (n > 0) {
+ ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS);
+ n--;
+ }
+ }
+
+ netif_dbg(ag, tx_done, ndev, "%d packets sent out\n", sent);
+
+ if (!sent)
+ return 0;
+
+ ag->ndev->stats.tx_bytes += bytes_compl;
+ ag->ndev->stats.tx_packets += sent;
+
+ netdev_completed_queue(ag->ndev, sent, bytes_compl);
+ if ((ring->curr - ring->dirty) < (ring_size * 3) / 4)
+ netif_wake_queue(ag->ndev);
+
+ if (!dma_stuck)
+ cancel_delayed_work(&ag->restart_work);
+
+ return sent;
+}
+
+static void ag71xx_dma_wait_stop(struct ag71xx *ag)
+{
+ struct net_device *ndev = ag->ndev;
+ int i;
+
+ for (i = 0; i < AG71XX_DMA_RETRY; i++) {
+ u32 rx, tx;
+
+ mdelay(AG71XX_DMA_DELAY);
+
+ rx = ag71xx_rr(ag, AG71XX_REG_RX_CTRL) & RX_CTRL_RXE;
+ tx = ag71xx_rr(ag, AG71XX_REG_TX_CTRL) & TX_CTRL_TXE;
+ if (!rx && !tx)
+ return;
+ }
+
+ netif_err(ag, hw, ndev, "DMA stop operation timed out\n");
+}
+
+static void ag71xx_dma_reset(struct ag71xx *ag)
+{
+ struct net_device *ndev = ag->ndev;
+ u32 val;
+ int i;
+
+ /* stop RX and TX */
+ ag71xx_wr(ag, AG71XX_REG_RX_CTRL, 0);
+ ag71xx_wr(ag, AG71XX_REG_TX_CTRL, 0);
+
+ /* give the hardware some time to really stop all rx/tx activity
+ * clearing the descriptors too early causes random memory corruption
+ */
+ ag71xx_dma_wait_stop(ag);
+
+ /* clear descriptor addresses */
+ ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->stop_desc_dma);
+ ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->stop_desc_dma);
+
+ /* clear pending RX/TX interrupts */
+ for (i = 0; i < 256; i++) {
+ ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR);
+ ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS);
+ }
+
+ /* clear pending errors */
+ ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_BE | RX_STATUS_OF);
+ ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_BE | TX_STATUS_UR);
+
+ val = ag71xx_rr(ag, AG71XX_REG_RX_STATUS);
+ if (val)
+ netif_err(ag, hw, ndev, "unable to clear DMA Rx status: %08x\n",
+ val);
+
+ val = ag71xx_rr(ag, AG71XX_REG_TX_STATUS);
+
+ /* mask out reserved bits */
+ val &= ~0xff000000;
+
+ if (val)
+ netif_err(ag, hw, ndev, "unable to clear DMA Tx status: %08x\n",
+ val);
+}
+
+static void ag71xx_hw_setup(struct ag71xx *ag)
+{
+ u32 init = MAC_CFG1_INIT;
+
+ /* setup MAC configuration registers */
+ ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, init);
+
+ ag71xx_sb(ag, AG71XX_REG_MAC_CFG2,
+ MAC_CFG2_PAD_CRC_EN | MAC_CFG2_LEN_CHECK);
+
+ /* setup max frame length to zero */
+ ag71xx_wr(ag, AG71XX_REG_MAC_MFL, 0);
+
+ /* setup FIFO configuration registers */
+ ag71xx_wr(ag, AG71XX_REG_FIFO_CFG0, FIFO_CFG0_INIT);
+ ag71xx_wr(ag, AG71XX_REG_FIFO_CFG1, ag->fifodata[0]);
+ ag71xx_wr(ag, AG71XX_REG_FIFO_CFG2, ag->fifodata[1]);
+ ag71xx_wr(ag, AG71XX_REG_FIFO_CFG4, FIFO_CFG4_INIT);
+ ag71xx_wr(ag, AG71XX_REG_FIFO_CFG5, FIFO_CFG5_INIT);
+}
+
+static unsigned int ag71xx_max_frame_len(unsigned int mtu)
+{
+ return ETH_SWITCH_HEADER_LEN + ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN;
+}
+
+static void ag71xx_hw_set_macaddr(struct ag71xx *ag, unsigned char *mac)
+{
+ u32 t;
+
+ t = (((u32)mac[5]) << 24) | (((u32)mac[4]) << 16)
+ | (((u32)mac[3]) << 8) | ((u32)mac[2]);
+
+ ag71xx_wr(ag, AG71XX_REG_MAC_ADDR1, t);
+
+ t = (((u32)mac[1]) << 24) | (((u32)mac[0]) << 16);
+ ag71xx_wr(ag, AG71XX_REG_MAC_ADDR2, t);
+}
+
+static void ag71xx_fast_reset(struct ag71xx *ag)
+{
+ struct net_device *dev = ag->ndev;
+ u32 rx_ds;
+ u32 mii_reg;
+
+ ag71xx_hw_stop(ag);
+
+ mii_reg = ag71xx_rr(ag, AG71XX_REG_MII_CFG);
+ rx_ds = ag71xx_rr(ag, AG71XX_REG_RX_DESC);
+
+ ag71xx_tx_packets(ag, true);
+
+ reset_control_assert(ag->mac_reset);
+ usleep_range(10, 20);
+ reset_control_deassert(ag->mac_reset);
+ usleep_range(10, 20);
+
+ ag71xx_dma_reset(ag);
+ ag71xx_hw_setup(ag);
+ ag->tx_ring.curr = 0;
+ ag->tx_ring.dirty = 0;
+ netdev_reset_queue(ag->ndev);
+
+ /* setup max frame length */
+ ag71xx_wr(ag, AG71XX_REG_MAC_MFL,
+ ag71xx_max_frame_len(ag->ndev->mtu));
+
+ ag71xx_wr(ag, AG71XX_REG_RX_DESC, rx_ds);
+ ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
+ ag71xx_wr(ag, AG71XX_REG_MII_CFG, mii_reg);
+
+ ag71xx_hw_set_macaddr(ag, dev->dev_addr);
+}
+
+static void ag71xx_hw_start(struct ag71xx *ag)
+{
+ /* start RX engine */
+ ag71xx_wr(ag, AG71XX_REG_RX_CTRL, RX_CTRL_RXE);
+
+ /* enable interrupts */
+ ag71xx_wr(ag, AG71XX_REG_INT_ENABLE, AG71XX_INT_INIT);
+
+ netif_wake_queue(ag->ndev);
+}
+
+static void ag71xx_link_adjust(struct ag71xx *ag, bool update)
+{
+ struct phy_device *phydev = ag->ndev->phydev;
+ u32 cfg2;
+ u32 ifctl;
+ u32 fifo5;
+
+ if (!phydev->link && update) {
+ ag71xx_hw_stop(ag);
+ return;
+ }
+
+ if (!ag71xx_is(ag, AR7100) && !ag71xx_is(ag, AR9130))
+ ag71xx_fast_reset(ag);
+
+ cfg2 = ag71xx_rr(ag, AG71XX_REG_MAC_CFG2);
+ cfg2 &= ~(MAC_CFG2_IF_1000 | MAC_CFG2_IF_10_100 | MAC_CFG2_FDX);
+ cfg2 |= (phydev->duplex) ? MAC_CFG2_FDX : 0;
+
+ ifctl = ag71xx_rr(ag, AG71XX_REG_MAC_IFCTL);
+ ifctl &= ~(MAC_IFCTL_SPEED);
+
+ fifo5 = ag71xx_rr(ag, AG71XX_REG_FIFO_CFG5);
+ fifo5 &= ~FIFO_CFG5_BM;
+
+ switch (phydev->speed) {
+ case SPEED_1000:
+ cfg2 |= MAC_CFG2_IF_1000;
+ fifo5 |= FIFO_CFG5_BM;
+ break;
+ case SPEED_100:
+ cfg2 |= MAC_CFG2_IF_10_100;
+ ifctl |= MAC_IFCTL_SPEED;
+ break;
+ case SPEED_10:
+ cfg2 |= MAC_CFG2_IF_10_100;
+ break;
+ default:
+ WARN(1, "not supported speed %i\n", phydev->speed);
+ return;
+ }
+
+ if (ag->tx_ring.desc_split) {
+ ag->fifodata[2] &= 0xffff;
+ ag->fifodata[2] |= ((2048 - ag->tx_ring.desc_split) / 4) << 16;
+ }
+
+ ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, ag->fifodata[2]);
+
+ ag71xx_wr(ag, AG71XX_REG_MAC_CFG2, cfg2);
+ ag71xx_wr(ag, AG71XX_REG_FIFO_CFG5, fifo5);
+ ag71xx_wr(ag, AG71XX_REG_MAC_IFCTL, ifctl);
+
+ ag71xx_hw_start(ag);
+
+ if (update)
+ phy_print_status(phydev);
+}
+
+static void ag71xx_phy_link_adjust(struct net_device *ndev)
+{
+ struct ag71xx *ag = netdev_priv(ndev);
+
+ ag71xx_link_adjust(ag, true);
+}
+
+static int ag71xx_phy_connect(struct ag71xx *ag)
+{
+ struct device_node *np = ag->pdev->dev.of_node;
+ struct net_device *ndev = ag->ndev;
+ struct device_node *phy_node;
+ struct phy_device *phydev;
+ int ret;
+
+ if (of_phy_is_fixed_link(np)) {
+ ret = of_phy_register_fixed_link(np);
+ if (ret < 0) {
+ netif_err(ag, probe, ndev, "Failed to register fixed PHY link: %d\n",
+ ret);
+ return ret;
+ }
+
+ phy_node = of_node_get(np);
+ } else {
+ phy_node = of_parse_phandle(np, "phy-handle", 0);
+ }
+
+ if (!phy_node) {
+ netif_err(ag, probe, ndev, "Could not find valid phy node\n");
+ return -ENODEV;
+ }
+
+ phydev = of_phy_connect(ag->ndev, phy_node, ag71xx_phy_link_adjust,
+ 0, ag->phy_if_mode);
+
+ of_node_put(phy_node);
+
+ if (!phydev) {
+ netif_err(ag, probe, ndev, "Could not connect to PHY device\n");
+ return -ENODEV;
+ }
+
+ phy_attached_info(phydev);
+
+ return 0;
+}
+
+static void ag71xx_ring_tx_clean(struct ag71xx *ag)
+{
+ struct ag71xx_ring *ring = &ag->tx_ring;
+ int ring_mask = BIT(ring->order) - 1;
+ u32 bytes_compl = 0, pkts_compl = 0;
+ struct net_device *ndev = ag->ndev;
+
+ while (ring->curr != ring->dirty) {
+ struct ag71xx_desc *desc;
+ u32 i = ring->dirty & ring_mask;
+
+ desc = ag71xx_ring_desc(ring, i);
+ if (!ag71xx_desc_empty(desc)) {
+ desc->ctrl = 0;
+ ndev->stats.tx_errors++;
+ }
+
+ if (ring->buf[i].tx.skb) {
+ bytes_compl += ring->buf[i].tx.len;
+ pkts_compl++;
+ dev_kfree_skb_any(ring->buf[i].tx.skb);
+ }
+ ring->buf[i].tx.skb = NULL;
+ ring->dirty++;
+ }
+
+ /* flush descriptors */
+ wmb();
+
+ netdev_completed_queue(ndev, pkts_compl, bytes_compl);
+}
+
+static void ag71xx_ring_tx_init(struct ag71xx *ag)
+{
+ struct ag71xx_ring *ring = &ag->tx_ring;
+ int ring_size = BIT(ring->order);
+ int ring_mask = ring_size - 1;
+ int i;
+
+ for (i = 0; i < ring_size; i++) {
+ struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+ desc->next = (u32)(ring->descs_dma +
+ AG71XX_DESC_SIZE * ((i + 1) & ring_mask));
+
+ desc->ctrl = DESC_EMPTY;
+ ring->buf[i].tx.skb = NULL;
+ }
+
+ /* flush descriptors */
+ wmb();
+
+ ring->curr = 0;
+ ring->dirty = 0;
+ netdev_reset_queue(ag->ndev);
+}
+
+static void ag71xx_ring_rx_clean(struct ag71xx *ag)
+{
+ struct ag71xx_ring *ring = &ag->rx_ring;
+ int ring_size = BIT(ring->order);
+ int i;
+
+ if (!ring->buf)
+ return;
+
+ for (i = 0; i < ring_size; i++)
+ if (ring->buf[i].rx.rx_buf) {
+ dma_unmap_single(&ag->pdev->dev,
+ ring->buf[i].rx.dma_addr,
+ ag->rx_buf_size, DMA_FROM_DEVICE);
+ skb_free_frag(ring->buf[i].rx.rx_buf);
+ }
+}
+
+static int ag71xx_buffer_size(struct ag71xx *ag)
+{
+ return ag->rx_buf_size +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+}
+
+static bool ag71xx_fill_rx_buf(struct ag71xx *ag, struct ag71xx_buf *buf,
+ int offset,
+ void *(*alloc)(unsigned int size))
+{
+ struct ag71xx_ring *ring = &ag->rx_ring;
+ struct ag71xx_desc *desc;
+ void *data;
+
+ desc = ag71xx_ring_desc(ring, buf - &ring->buf[0]);
+
+ data = alloc(ag71xx_buffer_size(ag));
+ if (!data)
+ return false;
+
+ buf->rx.rx_buf = data;
+ buf->rx.dma_addr = dma_map_single(&ag->pdev->dev, data, ag->rx_buf_size,
+ DMA_FROM_DEVICE);
+ desc->data = (u32)buf->rx.dma_addr + offset;
+ return true;
+}
+
+static int ag71xx_ring_rx_init(struct ag71xx *ag)
+{
+ struct ag71xx_ring *ring = &ag->rx_ring;
+ struct net_device *ndev = ag->ndev;
+ int ring_mask = BIT(ring->order) - 1;
+ int ring_size = BIT(ring->order);
+ unsigned int i;
+ int ret;
+
+ ret = 0;
+ for (i = 0; i < ring_size; i++) {
+ struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+ desc->next = (u32)(ring->descs_dma +
+ AG71XX_DESC_SIZE * ((i + 1) & ring_mask));
+
+ netif_dbg(ag, rx_status, ndev, "RX desc at %p, next is %08x\n",
+ desc, desc->next);
+ }
+
+ for (i = 0; i < ring_size; i++) {
+ struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+ if (!ag71xx_fill_rx_buf(ag, &ring->buf[i], ag->rx_buf_offset,
+ netdev_alloc_frag)) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ desc->ctrl = DESC_EMPTY;
+ }
+
+ /* flush descriptors */
+ wmb();
+
+ ring->curr = 0;
+ ring->dirty = 0;
+
+ return ret;
+}
+
+static int ag71xx_ring_rx_refill(struct ag71xx *ag)
+{
+ struct ag71xx_ring *ring = &ag->rx_ring;
+ int ring_mask = BIT(ring->order) - 1;
+ int offset = ag->rx_buf_offset;
+ unsigned int count;
+
+ count = 0;
+ for (; ring->curr - ring->dirty > 0; ring->dirty++) {
+ struct ag71xx_desc *desc;
+ unsigned int i;
+
+ i = ring->dirty & ring_mask;
+ desc = ag71xx_ring_desc(ring, i);
+
+ if (!ring->buf[i].rx.rx_buf &&
+ !ag71xx_fill_rx_buf(ag, &ring->buf[i], offset,
+ napi_alloc_frag))
+ break;
+
+ desc->ctrl = DESC_EMPTY;
+ count++;
+ }
+
+ /* flush descriptors */
+ wmb();
+
+ netif_dbg(ag, rx_status, ag->ndev, "%u rx descriptors refilled\n",
+ count);
+
+ return count;
+}
+
+static int ag71xx_rings_init(struct ag71xx *ag)
+{
+ struct ag71xx_ring *tx = &ag->tx_ring;
+ struct ag71xx_ring *rx = &ag->rx_ring;
+ int ring_size, tx_size;
+
+ ring_size = BIT(tx->order) + BIT(rx->order);
+ tx_size = BIT(tx->order);
+
+ tx->buf = kcalloc(ring_size, sizeof(*tx->buf), GFP_KERNEL);
+ if (!tx->buf)
+ return -ENOMEM;
+
+ tx->descs_cpu = dma_alloc_coherent(&ag->pdev->dev,
+ ring_size * AG71XX_DESC_SIZE,
+ &tx->descs_dma, GFP_ATOMIC);
+ if (!tx->descs_cpu) {
+ kfree(tx->buf);
+ tx->buf = NULL;
+ return -ENOMEM;
+ }
+
+ rx->buf = &tx->buf[BIT(tx->order)];
+ rx->descs_cpu = ((void *)tx->descs_cpu) + tx_size * AG71XX_DESC_SIZE;
+ rx->descs_dma = tx->descs_dma + tx_size * AG71XX_DESC_SIZE;
+
+ ag71xx_ring_tx_init(ag);
+ return ag71xx_ring_rx_init(ag);
+}
+
+static void ag71xx_rings_free(struct ag71xx *ag)
+{
+ struct ag71xx_ring *tx = &ag->tx_ring;
+ struct ag71xx_ring *rx = &ag->rx_ring;
+ int ring_size;
+
+ ring_size = BIT(tx->order) + BIT(rx->order);
+
+ if (tx->descs_cpu)
+ dma_free_coherent(&ag->pdev->dev, ring_size * AG71XX_DESC_SIZE,
+ tx->descs_cpu, tx->descs_dma);
+
+ kfree(tx->buf);
+
+ tx->descs_cpu = NULL;
+ rx->descs_cpu = NULL;
+ tx->buf = NULL;
+ rx->buf = NULL;
+}
+
+static void ag71xx_rings_cleanup(struct ag71xx *ag)
+{
+ ag71xx_ring_rx_clean(ag);
+ ag71xx_ring_tx_clean(ag);
+ ag71xx_rings_free(ag);
+
+ netdev_reset_queue(ag->ndev);
+}
+
+static void ag71xx_hw_init(struct ag71xx *ag)
+{
+ ag71xx_hw_stop(ag);
+
+ ag71xx_sb(ag, AG71XX_REG_MAC_CFG1, MAC_CFG1_SR);
+ usleep_range(20, 30);
+
+ reset_control_assert(ag->mac_reset);
+ msleep(100);
+ reset_control_deassert(ag->mac_reset);
+ msleep(200);
+
+ ag71xx_hw_setup(ag);
+
+ ag71xx_dma_reset(ag);
+}
+
+static int ag71xx_hw_enable(struct ag71xx *ag)
+{
+ int ret;
+
+ ret = ag71xx_rings_init(ag);
+ if (ret)
+ return ret;
+
+ napi_enable(&ag->napi);
+ ag71xx_wr(ag, AG71XX_REG_TX_DESC, ag->tx_ring.descs_dma);
+ ag71xx_wr(ag, AG71XX_REG_RX_DESC, ag->rx_ring.descs_dma);
+ netif_start_queue(ag->ndev);
+
+ return 0;
+}
+
+static void ag71xx_hw_disable(struct ag71xx *ag)
+{
+ netif_stop_queue(ag->ndev);
+
+ ag71xx_hw_stop(ag);
+ ag71xx_dma_reset(ag);
+
+ napi_disable(&ag->napi);
+ del_timer_sync(&ag->oom_timer);
+
+ ag71xx_rings_cleanup(ag);
+}
+
+static int ag71xx_open(struct net_device *ndev)
+{
+ struct ag71xx *ag = netdev_priv(ndev);
+ unsigned int max_frame_len;
+ int ret;
+
+ max_frame_len = ag71xx_max_frame_len(ndev->mtu);
+ ag->rx_buf_size =
+ SKB_DATA_ALIGN(max_frame_len + NET_SKB_PAD + NET_IP_ALIGN);
+
+ /* setup max frame length */
+ ag71xx_wr(ag, AG71XX_REG_MAC_MFL, max_frame_len);
+ ag71xx_hw_set_macaddr(ag, ndev->dev_addr);
+
+ ret = ag71xx_hw_enable(ag);
+ if (ret)
+ goto err;
+
+ ret = ag71xx_phy_connect(ag);
+ if (ret)
+ goto err;
+
+ phy_start(ndev->phydev);
+
+ return 0;
+
+err:
+ ag71xx_rings_cleanup(ag);
+ return ret;
+}
+
+static int ag71xx_stop(struct net_device *ndev)
+{
+ struct ag71xx *ag = netdev_priv(ndev);
+
+ phy_stop(ndev->phydev);
+ phy_disconnect(ndev->phydev);
+ ag71xx_hw_disable(ag);
+
+ return 0;
+}
+
+static int ag71xx_fill_dma_desc(struct ag71xx_ring *ring, u32 addr, int len)
+{
+ int i, ring_mask, ndesc, split;
+ struct ag71xx_desc *desc;
+
+ ring_mask = BIT(ring->order) - 1;
+ ndesc = 0;
+ split = ring->desc_split;
+
+ if (!split)
+ split = len;
+
+ while (len > 0) {
+ unsigned int cur_len = len;
+
+ i = (ring->curr + ndesc) & ring_mask;
+ desc = ag71xx_ring_desc(ring, i);
+
+ if (!ag71xx_desc_empty(desc))
+ return -1;
+
+ if (cur_len > split) {
+ cur_len = split;
+
+ /* TX will hang if DMA transfers <= 4 bytes,
+ * make sure next segment is more than 4 bytes long.
+ */
+ if (len <= split + 4)
+ cur_len -= 4;
+ }
+
+ desc->data = addr;
+ addr += cur_len;
+ len -= cur_len;
+
+ if (len > 0)
+ cur_len |= DESC_MORE;
+
+ /* prevent early tx attempt of this descriptor */
+ if (!ndesc)
+ cur_len |= DESC_EMPTY;
+
+ desc->ctrl = cur_len;
+ ndesc++;
+ }
+
+ return ndesc;
+}
+
+static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
+ struct net_device *ndev)
+{
+ int i, n, ring_min, ring_mask, ring_size;
+ struct ag71xx *ag = netdev_priv(ndev);
+ struct ag71xx_ring *ring;
+ struct ag71xx_desc *desc;
+ dma_addr_t dma_addr;
+
+ ring = &ag->tx_ring;
+ ring_mask = BIT(ring->order) - 1;
+ ring_size = BIT(ring->order);
+
+ if (skb->len <= 4) {
+ netif_dbg(ag, tx_err, ndev, "packet len is too small\n");
+ goto err_drop;
+ }
+
+ dma_addr = dma_map_single(&ag->pdev->dev, skb->data, skb->len,
+ DMA_TO_DEVICE);
+
+ i = ring->curr & ring_mask;
+ desc = ag71xx_ring_desc(ring, i);
+
+ /* setup descriptor fields */
+ n = ag71xx_fill_dma_desc(ring, (u32)dma_addr,
+ skb->len & ag->dcfg->desc_pktlen_mask);
+ if (n < 0)
+ goto err_drop_unmap;
+
+ i = (ring->curr + n - 1) & ring_mask;
+ ring->buf[i].tx.len = skb->len;
+ ring->buf[i].tx.skb = skb;
+
+ netdev_sent_queue(ndev, skb->len);
+
+ skb_tx_timestamp(skb);
+
+ desc->ctrl &= ~DESC_EMPTY;
+ ring->curr += n;
+
+ /* flush descriptor */
+ wmb();
+
+ ring_min = 2;
+ if (ring->desc_split)
+ ring_min *= AG71XX_TX_RING_DS_PER_PKT;
+
+ if (ring->curr - ring->dirty >= ring_size - ring_min) {
+ netif_dbg(ag, tx_err, ndev, "tx queue full\n");
+ netif_stop_queue(ndev);
+ }
+
+ netif_dbg(ag, tx_queued, ndev, "packet injected into TX queue\n");
+
+ /* enable TX engine */
+ ag71xx_wr(ag, AG71XX_REG_TX_CTRL, TX_CTRL_TXE);
+
+ return NETDEV_TX_OK;
+
+err_drop_unmap:
+ dma_unmap_single(&ag->pdev->dev, dma_addr, skb->len, DMA_TO_DEVICE);
+
+err_drop:
+ ndev->stats.tx_dropped++;
+
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
+static int ag71xx_do_ioctl(struct net_device *ndev, struct ifreq *ifr, int cmd)
+{
+ if (!ndev->phydev)
+ return -EINVAL;
+
+ return phy_mii_ioctl(ndev->phydev, ifr, cmd);
+}
+
+static void ag71xx_oom_timer_handler(struct timer_list *t)
+{
+ struct ag71xx *ag = from_timer(ag, t, oom_timer);
+
+ napi_schedule(&ag->napi);
+}
+
+static void ag71xx_tx_timeout(struct net_device *ndev)
+{
+ struct ag71xx *ag = netdev_priv(ndev);
+
+ netif_err(ag, tx_err, ndev, "tx timeout\n");
+
+ schedule_delayed_work(&ag->restart_work, 1);
+}
+
+static void ag71xx_restart_work_func(struct work_struct *work)
+{
+ struct ag71xx *ag = container_of(work, struct ag71xx,
+ restart_work.work);
+ struct net_device *ndev = ag->ndev;
+
+ rtnl_lock();
+ ag71xx_hw_disable(ag);
+ ag71xx_hw_enable(ag);
+ if (ndev->phydev->link)
+ ag71xx_link_adjust(ag, false);
+ rtnl_unlock();
+}
+
+static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
+{
+ struct net_device *ndev = ag->ndev;
+ int ring_mask, ring_size, done = 0;
+ unsigned int pktlen_mask, offset;
+ struct sk_buff *next, *skb;
+ struct ag71xx_ring *ring;
+ struct list_head rx_list;
+
+ ring = &ag->rx_ring;
+ pktlen_mask = ag->dcfg->desc_pktlen_mask;
+ offset = ag->rx_buf_offset;
+ ring_mask = BIT(ring->order) - 1;
+ ring_size = BIT(ring->order);
+
+ netif_dbg(ag, rx_status, ndev, "rx packets, limit=%d, curr=%u, dirty=%u\n",
+ limit, ring->curr, ring->dirty);
+
+ INIT_LIST_HEAD(&rx_list);
+
+ while (done < limit) {
+ unsigned int i = ring->curr & ring_mask;
+ struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+ int pktlen;
+ int err = 0;
+
+ if (ag71xx_desc_empty(desc))
+ break;
+
+ if ((ring->dirty + ring_size) == ring->curr) {
+ WARN_ONCE(1, "RX out of ring");
+ break;
+ }
+
+ ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR);
+
+ pktlen = desc->ctrl & pktlen_mask;
+ pktlen -= ETH_FCS_LEN;
+
+ dma_unmap_single(&ag->pdev->dev, ring->buf[i].rx.dma_addr,
+ ag->rx_buf_size, DMA_FROM_DEVICE);
+
+ ndev->stats.rx_packets++;
+ ndev->stats.rx_bytes += pktlen;
+
+ skb = build_skb(ring->buf[i].rx.rx_buf, ag71xx_buffer_size(ag));
+ if (!skb) {
+ skb_free_frag(ring->buf[i].rx.rx_buf);
+ goto next;
+ }
+
+ skb_reserve(skb, offset);
+ skb_put(skb, pktlen);
+
+ if (err) {
+ ndev->stats.rx_dropped++;
+ kfree_skb(skb);
+ } else {
+ skb->dev = ndev;
+ skb->ip_summed = CHECKSUM_NONE;
+ list_add_tail(&skb->list, &rx_list);
+ }
+
+next:
+ ring->buf[i].rx.rx_buf = NULL;
+ done++;
+
+ ring->curr++;
+ }
+
+ ag71xx_ring_rx_refill(ag);
+
+ list_for_each_entry_safe(skb, next, &rx_list, list)
+ skb->protocol = eth_type_trans(skb, ndev);
+ netif_receive_skb_list(&rx_list);
+
+ netif_dbg(ag, rx_status, ndev, "rx finish, curr=%u, dirty=%u, done=%d\n",
+ ring->curr, ring->dirty, done);
+
+ return done;
+}
+
+static int ag71xx_poll(struct napi_struct *napi, int limit)
+{
+ struct ag71xx *ag = container_of(napi, struct ag71xx, napi);
+ struct ag71xx_ring *rx_ring = &ag->rx_ring;
+ int rx_ring_size = BIT(rx_ring->order);
+ struct net_device *ndev = ag->ndev;
+ int tx_done, rx_done;
+ u32 status;
+
+ tx_done = ag71xx_tx_packets(ag, false);
+
+ netif_dbg(ag, rx_status, ndev, "processing RX ring\n");
+ rx_done = ag71xx_rx_packets(ag, limit);
+
+ if (!rx_ring->buf[rx_ring->dirty % rx_ring_size].rx.rx_buf)
+ goto oom;
+
+ status = ag71xx_rr(ag, AG71XX_REG_RX_STATUS);
+ if (unlikely(status & RX_STATUS_OF)) {
+ ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_OF);
+ ndev->stats.rx_fifo_errors++;
+
+ /* restart RX */
+ ag71xx_wr(ag, AG71XX_REG_RX_CTRL, RX_CTRL_RXE);
+ }
+
+ if (rx_done < limit) {
+ if (status & RX_STATUS_PR)
+ goto more;
+
+ status = ag71xx_rr(ag, AG71XX_REG_TX_STATUS);
+ if (status & TX_STATUS_PS)
+ goto more;
+
+ netif_dbg(ag, rx_status, ndev, "disable polling mode, rx=%d, tx=%d,limit=%d\n",
+ rx_done, tx_done, limit);
+
+ napi_complete(napi);
+
+ /* enable interrupts */
+ ag71xx_int_enable(ag, AG71XX_INT_POLL);
+ return rx_done;
+ }
+
+more:
+ netif_dbg(ag, rx_status, ndev, "stay in polling mode, rx=%d, tx=%d, limit=%d\n",
+ rx_done, tx_done, limit);
+ return limit;
+
+oom:
+ netif_err(ag, rx_err, ndev, "out of memory\n");
+
+ mod_timer(&ag->oom_timer, jiffies + AG71XX_OOM_REFILL);
+ napi_complete(napi);
+ return 0;
+}
+
+static irqreturn_t ag71xx_interrupt(int irq, void *dev_id)
+{
+ struct net_device *ndev = dev_id;
+ struct ag71xx *ag;
+ u32 status;
+
+ ag = netdev_priv(ndev);
+ status = ag71xx_rr(ag, AG71XX_REG_INT_STATUS);
+
+ if (unlikely(!status))
+ return IRQ_NONE;
+
+ if (unlikely(status & AG71XX_INT_ERR)) {
+ if (status & AG71XX_INT_TX_BE) {
+ ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_BE);
+ netif_err(ag, intr, ndev, "TX BUS error\n");
+ }
+ if (status & AG71XX_INT_RX_BE) {
+ ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_BE);
+ netif_err(ag, intr, ndev, "RX BUS error\n");
+ }
+ }
+
+ if (likely(status & AG71XX_INT_POLL)) {
+ ag71xx_int_disable(ag, AG71XX_INT_POLL);
+ netif_dbg(ag, intr, ndev, "enable polling mode\n");
+ napi_schedule(&ag->napi);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int ag71xx_change_mtu(struct net_device *ndev, int new_mtu)
+{
+ struct ag71xx *ag = netdev_priv(ndev);
+
+ ndev->mtu = new_mtu;
+ ag71xx_wr(ag, AG71XX_REG_MAC_MFL,
+ ag71xx_max_frame_len(ndev->mtu));
+
+ return 0;
+}
+
+static const struct net_device_ops ag71xx_netdev_ops = {
+ .ndo_open = ag71xx_open,
+ .ndo_stop = ag71xx_stop,
+ .ndo_start_xmit = ag71xx_hard_start_xmit,
+ .ndo_do_ioctl = ag71xx_do_ioctl,
+ .ndo_tx_timeout = ag71xx_tx_timeout,
+ .ndo_change_mtu = ag71xx_change_mtu,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+};
+
+static const u32 ar71xx_addr_ar7100[] = {
+ 0x19000000, 0x1a000000,
+};
+
+static int ag71xx_probe(struct platform_device *pdev)
+{
+ struct device_node *np = pdev->dev.of_node;
+ const struct ag71xx_dcfg *dcfg;
+ struct net_device *ndev;
+ struct resource *res;
+ const void *mac_addr;
+ int tx_size, err, i;
+ struct ag71xx *ag;
+
+ if (!np)
+ return -ENODEV;
+
+ ndev = devm_alloc_etherdev(&pdev->dev, sizeof(*ag));
+ if (!ndev)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -EINVAL;
+
+ dcfg = of_device_get_match_data(&pdev->dev);
+ if (!dcfg)
+ return -EINVAL;
+
+ ag = netdev_priv(ndev);
+ ag->mac_idx = -1;
+ for (i = 0; i < ARRAY_SIZE(ar71xx_addr_ar7100); i++) {
+ if (ar71xx_addr_ar7100[i] == res->start)
+ ag->mac_idx = i;
+ }
+
+ if (ag->mac_idx < 0) {
+ netif_err(ag, probe, ndev, "unknown mac idx\n");
+ return -EINVAL;
+ }
+
+ ag->clk_eth = devm_clk_get(&pdev->dev, "eth");
+ if (IS_ERR(ag->clk_eth)) {
+ netif_err(ag, probe, ndev, "Failed to get eth clk.\n");
+ return PTR_ERR(ag->clk_eth);
+ }
+
+ SET_NETDEV_DEV(ndev, &pdev->dev);
+
+ ag->pdev = pdev;
+ ag->ndev = ndev;
+ ag->dcfg = dcfg;
+ ag->msg_enable = netif_msg_init(-1, AG71XX_DEFAULT_MSG_ENABLE);
+ memcpy(ag->fifodata, dcfg->fifodata, sizeof(ag->fifodata));
+
+ ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac");
+ if (IS_ERR(ag->mac_reset)) {
+ netif_err(ag, probe, ndev, "missing mac reset\n");
+ err = PTR_ERR(ag->mac_reset);
+ goto err_free;
+ }
+
+ ag->mac_base = devm_ioremap_nocache(&pdev->dev, res->start,
+ res->end - res->start + 1);
+ if (!ag->mac_base) {
+ err = -ENOMEM;
+ goto err_free;
+ }
+
+ ndev->irq = platform_get_irq(pdev, 0);
+ err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt,
+ 0x0, dev_name(&pdev->dev), ndev);
+ if (err) {
+ netif_err(ag, probe, ndev, "unable to request IRQ %d\n",
+ ndev->irq);
+ goto err_free;
+ }
+
+ ndev->netdev_ops = &ag71xx_netdev_ops;
+
+ INIT_DELAYED_WORK(&ag->restart_work, ag71xx_restart_work_func);
+ timer_setup(&ag->oom_timer, ag71xx_oom_timer_handler, 0);
+
+ tx_size = AG71XX_TX_RING_SIZE_DEFAULT;
+ ag->rx_ring.order = ag71xx_ring_size_order(AG71XX_RX_RING_SIZE_DEFAULT);
+
+ ndev->min_mtu = 68;
+ ndev->max_mtu = dcfg->max_frame_len - ag71xx_max_frame_len(0);
+
+ ag->rx_buf_offset = NET_SKB_PAD;
+ if (!ag71xx_is(ag, AR7100) && !ag71xx_is(ag, AR9130))
+ ag->rx_buf_offset += NET_IP_ALIGN;
+
+ if (ag71xx_is(ag, AR7100)) {
+ ag->tx_ring.desc_split = AG71XX_TX_RING_SPLIT;
+ tx_size *= AG71XX_TX_RING_DS_PER_PKT;
+ }
+ ag->tx_ring.order = ag71xx_ring_size_order(tx_size);
+
+ ag->stop_desc = dmam_alloc_coherent(&pdev->dev,
+ sizeof(struct ag71xx_desc),
+ &ag->stop_desc_dma, GFP_KERNEL);
+ if (!ag->stop_desc)
+ goto err_free;
+
+ ag->stop_desc->data = 0;
+ ag->stop_desc->ctrl = 0;
+ ag->stop_desc->next = (u32)ag->stop_desc_dma;
+
+ mac_addr = of_get_mac_address(np);
+ if (mac_addr)
+ memcpy(ndev->dev_addr, mac_addr, ETH_ALEN);
+ if (!mac_addr || !is_valid_ether_addr(ndev->dev_addr)) {
+ netif_err(ag, probe, ndev, "invalid MAC address, using random address\n");
+ eth_random_addr(ndev->dev_addr);
+ }
+
+ ag->phy_if_mode = of_get_phy_mode(np);
+ if (ag->phy_if_mode < 0) {
+ netif_err(ag, probe, ndev, "missing phy-mode property in DT\n");
+ err = ag->phy_if_mode;
+ goto err_free;
+ }
+
+ netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT);
+
+ err = clk_prepare_enable(ag->clk_eth);
+ if (err) {
+ netif_err(ag, probe, ndev, "Failed to enable eth clk.\n");
+ goto err_free;
+ }
+
+ ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0);
+
+ ag71xx_hw_init(ag);
+
+ err = ag71xx_mdio_probe(ag);
+ if (err)
+ goto err_put_clk;
+
+ platform_set_drvdata(pdev, ndev);
+
+ err = register_netdev(ndev);
+ if (err) {
+ netif_err(ag, probe, ndev, "unable to register net device\n");
+ platform_set_drvdata(pdev, NULL);
+ goto err_mdio_remove;
+ }
+
+ netif_info(ag, probe, ndev, "Atheros AG71xx at 0x%08lx, irq %d, mode:%s\n",
+ (unsigned long)ag->mac_base, ndev->irq,
+ phy_modes(ag->phy_if_mode));
+
+ return 0;
+
+err_mdio_remove:
+ ag71xx_mdio_remove(ag);
+err_put_clk:
+ clk_disable_unprepare(ag->clk_eth);
+err_free:
+ free_netdev(ndev);
+ return err;
+}
+
+static int ag71xx_remove(struct platform_device *pdev)
+{
+ struct net_device *ndev = platform_get_drvdata(pdev);
+ struct ag71xx *ag;
+
+ if (!ndev)
+ return 0;
+
+ ag = netdev_priv(ndev);
+ unregister_netdev(ndev);
+ ag71xx_mdio_remove(ag);
+ clk_disable_unprepare(ag->clk_eth);
+ platform_set_drvdata(pdev, NULL);
+
+ return 0;
+}
+
+static const u32 ar71xx_fifo_ar7100[] = {
+ 0x0fff0000, 0x00001fff, 0x00780fff,
+};
+
+static const u32 ar71xx_fifo_ar9130[] = {
+ 0x0fff0000, 0x00001fff, 0x008001ff,
+};
+
+static const u32 ar71xx_fifo_ar9330[] = {
+ 0x0010ffff, 0x015500aa, 0x01f00140,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar7100 = {
+ .type = AR7100,
+ .fifodata = ar71xx_fifo_ar7100,
+ .max_frame_len = 1540,
+ .desc_pktlen_mask = SZ_4K - 1,
+ .tx_hang_workaround = false,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar7240 = {
+ .type = AR7240,
+ .fifodata = ar71xx_fifo_ar7100,
+ .max_frame_len = 1540,
+ .desc_pktlen_mask = SZ_4K - 1,
+ .tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar9130 = {
+ .type = AR9130,
+ .fifodata = ar71xx_fifo_ar9130,
+ .max_frame_len = 1540,
+ .desc_pktlen_mask = SZ_4K - 1,
+ .tx_hang_workaround = false,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar9330 = {
+ .type = AR9330,
+ .fifodata = ar71xx_fifo_ar9330,
+ .max_frame_len = 1540,
+ .desc_pktlen_mask = SZ_4K - 1,
+ .tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_ar9340 = {
+ .type = AR9340,
+ .fifodata = ar71xx_fifo_ar9330,
+ .max_frame_len = SZ_16K - 1,
+ .desc_pktlen_mask = SZ_16K - 1,
+ .tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_qca9530 = {
+ .type = QCA9530,
+ .fifodata = ar71xx_fifo_ar9330,
+ .max_frame_len = SZ_16K - 1,
+ .desc_pktlen_mask = SZ_16K - 1,
+ .tx_hang_workaround = true,
+};
+
+static const struct ag71xx_dcfg ag71xx_dcfg_qca9550 = {
+ .type = QCA9550,
+ .fifodata = ar71xx_fifo_ar9330,
+ .max_frame_len = 1540,
+ .desc_pktlen_mask = SZ_16K - 1,
+ .tx_hang_workaround = true,
+};
+
+static const struct of_device_id ag71xx_match[] = {
+ { .compatible = "qca,ar7100-eth", .data = &ag71xx_dcfg_ar7100 },
+ { .compatible = "qca,ar7240-eth", .data = &ag71xx_dcfg_ar7240 },
+ { .compatible = "qca,ar7241-eth", .data = &ag71xx_dcfg_ar7240 },
+ { .compatible = "qca,ar7242-eth", .data = &ag71xx_dcfg_ar7240 },
+ { .compatible = "qca,ar9130-eth", .data = &ag71xx_dcfg_ar9130 },
+ { .compatible = "qca,ar9330-eth", .data = &ag71xx_dcfg_ar9330 },
+ { .compatible = "qca,ar9340-eth", .data = &ag71xx_dcfg_ar9340 },
+ { .compatible = "qca,qca9530-eth", .data = &ag71xx_dcfg_qca9530 },
+ { .compatible = "qca,qca9550-eth", .data = &ag71xx_dcfg_qca9550 },
+ { .compatible = "qca,qca9560-eth", .data = &ag71xx_dcfg_qca9550 },
+ {}
+};
+
+static struct platform_driver ag71xx_driver = {
+ .probe = ag71xx_probe,
+ .remove = ag71xx_remove,
+ .driver = {
+ .name = "ag71xx",
+ .of_match_table = ag71xx_match,
+ }
+};
+
+module_platform_driver(ag71xx_driver);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index a8fe0808823d..7c06e2aebc9e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -280,6 +280,7 @@ struct tp_params {
unsigned short tx_modq[NCHAN]; /* channel to modulation queue map */
u32 vlan_pri_map; /* cached TP_VLAN_PRI_MAP */
+ u32 filter_mask;
u32 ingress_config; /* cached TP_INGRESS_CONFIG */
/* cached TP_OUT_CONFIG compressed error vector
@@ -600,6 +601,7 @@ struct port_info {
u8 vin;
u8 vivld;
u8 smt_idx;
+ u8 rx_cchan;
};
struct dentry;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 4107007b6ec4..6232236d7abc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -248,8 +248,9 @@ static int validate_filter(struct net_device *dev,
u32 fconf, iconf;
/* Check for unconfigured fields being used. */
- fconf = adapter->params.tp.vlan_pri_map;
iconf = adapter->params.tp.ingress_config;
+ fconf = fs->hash ? adapter->params.tp.filter_mask :
+ adapter->params.tp.vlan_pri_map;
if (unsupported(fconf, FCOE_F, fs->val.fcoe, fs->mask.fcoe) ||
unsupported(fconf, PORT_F, fs->val.iport, fs->mask.iport) ||
@@ -1041,7 +1042,7 @@ static void mk_act_open_req6(struct filter_entry *f, struct sk_buff *skb,
RSS_QUEUE_V(f->fs.iq) |
TX_QUEUE_V(f->fs.nat_mode) |
T5_OPT_2_VALID_F |
- RX_CHANNEL_F |
+ RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
(f->fs.dirsteer << 1)) |
PACE_V((f->fs.maskhash) |
@@ -1081,7 +1082,7 @@ static void mk_act_open_req(struct filter_entry *f, struct sk_buff *skb,
RSS_QUEUE_V(f->fs.iq) |
TX_QUEUE_V(f->fs.nat_mode) |
T5_OPT_2_VALID_F |
- RX_CHANNEL_F |
+ RX_CHANNEL_V(cxgb4_port_e2cchan(f->dev)) |
CONG_CNTRL_V((f->fs.action == FILTER_DROP) |
(f->fs.dirsteer << 1)) |
PACE_V((f->fs.maskhash) |
@@ -1833,24 +1834,38 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
}
}
-int init_hash_filter(struct adapter *adap)
+void init_hash_filter(struct adapter *adap)
{
+ u32 reg;
+
/* On T6, verify the necessary register configs and warn the user in
* case of improper config
*/
if (is_t6(adap->params.chip)) {
- if (TCAM_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_0_A)) != 4)
- goto err;
+ if (is_offload(adap)) {
+ if (!(t4_read_reg(adap, TP_GLOBAL_CONFIG_A)
+ & ACTIVEFILTERCOUNTS_F)) {
+ dev_err(adap->pdev_dev, "Invalid hash filter + ofld config\n");
+ return;
+ }
+ } else {
+ reg = t4_read_reg(adap, LE_DB_RSP_CODE_0_A);
+ if (TCAM_ACTV_HIT_G(reg) != 4) {
+ dev_err(adap->pdev_dev, "Invalid hash filter config\n");
+ return;
+ }
+
+ reg = t4_read_reg(adap, LE_DB_RSP_CODE_1_A);
+ if (HASH_ACTV_HIT_G(reg) != 4) {
+ dev_err(adap->pdev_dev, "Invalid hash filter config\n");
+ return;
+ }
+ }
- if (HASH_ACTV_HIT_G(t4_read_reg(adap, LE_DB_RSP_CODE_1_A)) != 4)
- goto err;
} else {
dev_err(adap->pdev_dev, "Hash filter supported only on T6\n");
- return -EINVAL;
+ return;
}
+
adap->params.hash_filter = 1;
- return 0;
-err:
- dev_warn(adap->pdev_dev, "Invalid hash filter config!\n");
- return -EINVAL;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
index 8db5fca6dcc9..b0751c0611ec 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h
@@ -50,7 +50,7 @@ int delete_filter(struct adapter *adapter, unsigned int fidx);
int writable_filter(struct filter_entry *f);
void clear_all_filters(struct adapter *adapter);
-int init_hash_filter(struct adapter *adap);
+void init_hash_filter(struct adapter *adap);
bool is_filter_exact_match(struct adapter *adap,
struct ch_filter_specification *fs);
#endif /* __CXGB4_FILTER_H */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 715e4edcf4a2..7d7df59f9a70 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1646,6 +1646,18 @@ unsigned int cxgb4_port_chan(const struct net_device *dev)
}
EXPORT_SYMBOL(cxgb4_port_chan);
+/**
+ * cxgb4_port_e2cchan - get the HW c-channel of a port
+ * @dev: the net device for the port
+ *
+ * Return the HW RX c-channel of the given port.
+ */
+unsigned int cxgb4_port_e2cchan(const struct net_device *dev)
+{
+ return netdev2pinfo(dev)->rx_cchan;
+}
+EXPORT_SYMBOL(cxgb4_port_e2cchan);
+
unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo)
{
struct adapter *adap = netdev2adap(dev);
@@ -3905,14 +3917,14 @@ static int adap_init0_phy(struct adapter *adap)
*/
static int adap_init0_config(struct adapter *adapter, int reset)
{
+ char *fw_config_file, fw_config_file_path[256];
+ u32 finiver, finicsum, cfcsum, param, val;
struct fw_caps_config_cmd caps_cmd;
- const struct firmware *cf;
unsigned long mtype = 0, maddr = 0;
- u32 finiver, finicsum, cfcsum;
- int ret;
- int config_issued = 0;
- char *fw_config_file, fw_config_file_path[256];
+ const struct firmware *cf;
char *config_name = NULL;
+ int config_issued = 0;
+ int ret;
/*
* Reset device if necessary.
@@ -4020,6 +4032,24 @@ static int adap_init0_config(struct adapter *adapter, int reset)
goto bye;
}
+ val = 0;
+
+ /* Ofld + Hash filter is supported. Older fw will fail this request and
+ * it is fine.
+ */
+ param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+ FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD));
+ ret = t4_set_params(adapter, adapter->mbox, adapter->pf, 0,
+ 1, &param, &val);
+
+ /* FW doesn't know about Hash filter + ofld support,
+ * it's not a problem, don't return an error.
+ */
+ if (ret < 0) {
+ dev_warn(adapter->pdev_dev,
+ "Hash filter with ofld is not supported by FW\n");
+ }
+
/*
* Issue a Capability Configuration command to the firmware to get it
* to parse the Configuration File. We don't use t4_fw_config_file()
@@ -4580,6 +4610,13 @@ static int adap_init0(struct adapter *adap)
if (ret < 0)
goto bye;
+ /* hash filter has some mandatory register settings to be tested and for
+ * that it needs to test whether offload is enabled or not, hence
+ * checking and setting it here.
+ */
+ if (caps_cmd.ofldcaps)
+ adap->params.offload = 1;
+
if (caps_cmd.ofldcaps ||
(caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER))) {
/* query offload-related parameters */
@@ -4619,11 +4656,8 @@ static int adap_init0(struct adapter *adap)
adap->params.ofldq_wr_cred = val[5];
if (caps_cmd.niccaps & htons(FW_CAPS_CONFIG_NIC_HASHFILTER)) {
- ret = init_hash_filter(adap);
- if (ret < 0)
- goto bye;
+ init_hash_filter(adap);
} else {
- adap->params.offload = 1;
adap->num_ofld_uld += 1;
}
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 21da34a4ca24..42ae28d651e7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -393,6 +393,7 @@ int cxgb4_immdata_send(struct net_device *dev, unsigned int idx,
int cxgb4_crypto_send(struct net_device *dev, struct sk_buff *skb);
unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
unsigned int cxgb4_port_chan(const struct net_device *dev);
+unsigned int cxgb4_port_e2cchan(const struct net_device *dev);
unsigned int cxgb4_port_viid(const struct net_device *dev);
unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid);
unsigned int cxgb4_port_idx(const struct net_device *dev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 93feb258067b..9dd5ed9a2965 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -6209,6 +6209,37 @@ unsigned int t4_get_mps_bg_map(struct adapter *adapter, int pidx)
}
/**
+ * t4_get_tp_e2c_map - return the E2C channel map associated with a port
+ * @adapter: the adapter
+ * @pidx: the port index
+ */
+static unsigned int t4_get_tp_e2c_map(struct adapter *adapter, int pidx)
+{
+ unsigned int nports;
+ u32 param, val = 0;
+ int ret;
+
+ nports = 1 << NUMPORTS_G(t4_read_reg(adapter, MPS_CMN_CTL_A));
+ if (pidx >= nports) {
+ CH_WARN(adapter, "TP E2C Channel Port Index %d >= Nports %d\n",
+ pidx, nports);
+ return 0;
+ }
+
+ /* FW version >= 1.16.44.0 can determine E2C channel map using
+ * FW_PARAMS_PARAM_DEV_TPCHMAP API.
+ */
+ param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+ FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_TPCHMAP));
+ ret = t4_query_params_ns(adapter, adapter->mbox, adapter->pf,
+ 0, 1, &param, &val);
+ if (!ret)
+ return (val >> (8 * pidx)) & 0xff;
+
+ return 0;
+}
+
+/**
* t4_get_tp_ch_map - return TP ingress channels associated with a port
* @adapter: the adapter
* @pidx: the port index
@@ -9368,8 +9399,9 @@ int t4_init_sge_params(struct adapter *adapter)
*/
int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
{
- int chan;
- u32 v;
+ u32 param, val, v;
+ int chan, ret;
+
v = t4_read_reg(adap, TP_TIMER_RESOLUTION_A);
adap->params.tp.tre = TIMERRESOLUTION_G(v);
@@ -9379,11 +9411,47 @@ int t4_init_tp_params(struct adapter *adap, bool sleep_ok)
for (chan = 0; chan < NCHAN; chan++)
adap->params.tp.tx_modq[chan] = chan;
- /* Cache the adapter's Compressed Filter Mode and global Incress
+ /* Cache the adapter's Compressed Filter Mode/Mask and global Ingress
* Configuration.
*/
- t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1,
- TP_VLAN_PRI_MAP_A, sleep_ok);
+ param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+ FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_FILTER) |
+ FW_PARAMS_PARAM_Y_V(FW_PARAM_DEV_FILTER_MODE_MASK));
+
+ /* Read current value */
+ ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1,
+ &param, &val);
+ if (ret == 0) {
+ dev_info(adap->pdev_dev,
+ "Current filter mode/mask 0x%x:0x%x\n",
+ FW_PARAMS_PARAM_FILTER_MODE_G(val),
+ FW_PARAMS_PARAM_FILTER_MASK_G(val));
+ adap->params.tp.vlan_pri_map =
+ FW_PARAMS_PARAM_FILTER_MODE_G(val);
+ adap->params.tp.filter_mask =
+ FW_PARAMS_PARAM_FILTER_MASK_G(val);
+ } else {
+ dev_info(adap->pdev_dev,
+ "Failed to read filter mode/mask via fw api, using indirect-reg-read\n");
+
+ /* Incase of older-fw (which doesn't expose the api
+ * FW_PARAM_DEV_FILTER_MODE_MASK) and newer-driver (which uses
+ * the fw api) combination, fall-back to older method of reading
+ * the filter mode from indirect-register
+ */
+ t4_tp_pio_read(adap, &adap->params.tp.vlan_pri_map, 1,
+ TP_VLAN_PRI_MAP_A, sleep_ok);
+
+ /* With the older-fw and newer-driver combination we might run
+ * into an issue when user wants to use hash filter region but
+ * the filter_mask is zero, in this case filter_mask validation
+ * is tough. To avoid that we set the filter_mask same as filter
+ * mode, which will behave exactly as the older way of ignoring
+ * the filter mask validation.
+ */
+ adap->params.tp.filter_mask = adap->params.tp.vlan_pri_map;
+ }
+
t4_tp_pio_read(adap, &adap->params.tp.ingress_config, 1,
TP_INGRESS_CONFIG_A, sleep_ok);
@@ -9594,6 +9662,7 @@ int t4_init_portinfo(struct port_info *pi, int mbox,
pi->tx_chan = port;
pi->lport = port;
pi->rss_size = rss_size;
+ pi->rx_cchan = t4_get_tp_e2c_map(pi->adapter, port);
/* If fw supports returning the VIN as part of FW_VI_CMD,
* save the returned values.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index eb222d40ddbf..a957a6e4d4c4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -1334,6 +1334,10 @@
#define TP_OUT_CONFIG_A 0x7d04
#define TP_GLOBAL_CONFIG_A 0x7d08
+#define ACTIVEFILTERCOUNTS_S 22
+#define ACTIVEFILTERCOUNTS_V(x) ((x) << ACTIVEFILTERCOUNTS_S)
+#define ACTIVEFILTERCOUNTS_F ACTIVEFILTERCOUNTS_V(1U)
+
#define TP_CMM_TCB_BASE_A 0x7d10
#define TP_CMM_MM_BASE_A 0x7d14
#define TP_CMM_TIMER_BASE_A 0x7d18
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index b2a618e72fcf..0be4ce520352 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1221,6 +1221,23 @@ enum fw_params_mnem {
/*
* device parameters
*/
+
+#define FW_PARAMS_PARAM_FILTER_MODE_S 16
+#define FW_PARAMS_PARAM_FILTER_MODE_M 0xffff
+#define FW_PARAMS_PARAM_FILTER_MODE_V(x) \
+ ((x) << FW_PARAMS_PARAM_FILTER_MODE_S)
+#define FW_PARAMS_PARAM_FILTER_MODE_G(x) \
+ (((x) >> FW_PARAMS_PARAM_FILTER_MODE_S) & \
+ FW_PARAMS_PARAM_FILTER_MODE_M)
+
+#define FW_PARAMS_PARAM_FILTER_MASK_S 0
+#define FW_PARAMS_PARAM_FILTER_MASK_M 0xffff
+#define FW_PARAMS_PARAM_FILTER_MASK_V(x) \
+ ((x) << FW_PARAMS_PARAM_FILTER_MASK_S)
+#define FW_PARAMS_PARAM_FILTER_MASK_G(x) \
+ (((x) >> FW_PARAMS_PARAM_FILTER_MASK_S) & \
+ FW_PARAMS_PARAM_FILTER_MASK_M)
+
enum fw_params_param_dev {
FW_PARAMS_PARAM_DEV_CCLK = 0x00, /* chip core clock in khz */
FW_PARAMS_PARAM_DEV_PORTVEC = 0x01, /* the port vector */
@@ -1250,12 +1267,15 @@ enum fw_params_param_dev {
FW_PARAMS_PARAM_DEV_RI_FR_NSMR_TPTE_WR = 0x1C,
FW_PARAMS_PARAM_DEV_FILTER2_WR = 0x1D,
FW_PARAMS_PARAM_DEV_MPSBGMAP = 0x1E,
+ FW_PARAMS_PARAM_DEV_TPCHMAP = 0x1F,
FW_PARAMS_PARAM_DEV_HMA_SIZE = 0x20,
FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR = 0x24,
FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27,
+ FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD = 0x28,
FW_PARAMS_PARAM_DEV_DBQ_TIMER = 0x29,
FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A,
+ FW_PARAMS_PARAM_DEV_FILTER = 0x2E,
};
/*
@@ -1347,6 +1367,11 @@ enum fw_params_param_dev_diag {
FW_PARAM_DEV_DIAG_MAXTMPTHRESH = 0x02,
};
+enum fw_params_param_dev_filter {
+ FW_PARAM_DEV_FILTER_VNIC_MODE = 0x00,
+ FW_PARAM_DEV_FILTER_MODE_MASK = 0x01,
+};
+
enum fw_params_param_dev_fwcache {
FW_PARAM_DEV_FWCACHE_FLUSH = 0x00,
FW_PARAM_DEV_FWCACHE_FLUSHINV = 0x01,
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 7d2390e3df77..753957ec72be 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -2479,14 +2479,9 @@ static int setup_rx_flow(struct dpaa2_eth_priv *priv,
queue.destination.type = DPNI_DEST_DPCON;
queue.destination.priority = 1;
queue.user_context = (u64)(uintptr_t)fq;
- queue.flc.stash_control = 1;
- queue.flc.value &= 0xFFFFFFFFFFFFFFC0;
- /* 01 01 00 - data, annotation, flow context */
- queue.flc.value |= 0x14;
err = dpni_set_queue(priv->mc_io, 0, priv->mc_token,
DPNI_QUEUE_RX, 0, fq->flowid,
- DPNI_QUEUE_OPT_USER_CTX | DPNI_QUEUE_OPT_DEST |
- DPNI_QUEUE_OPT_FLC,
+ DPNI_QUEUE_OPT_USER_CTX | DPNI_QUEUE_OPT_DEST,
&queue);
if (err) {
dev_err(dev, "dpni_set_queue(RX) failed\n");
diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index 8429f5c1d810..ed0d010c7cf2 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -29,3 +29,13 @@ config FSL_ENETC_PTP_CLOCK
packets using the SO_TIMESTAMPING API.
If compiled as module (M), the module name is fsl-enetc-ptp.
+
+config FSL_ENETC_HW_TIMESTAMPING
+ bool "ENETC hardware timestamping support"
+ depends on FSL_ENETC || FSL_ENETC_VF
+ help
+ Enable hardware timestamping support on the Ethernet packets
+ using the SO_TIMESTAMPING API. Because the RX BD ring dynamic
+ allocation has not been supported and it is too expensive to use
+ extended RX BDs if timestamping is not used, this option enables
+ extended RX BDs in order to support hardware timestamping.
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 491475d87736..223709443ea4 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -13,7 +13,8 @@
#define ENETC_MAX_SKB_FRAGS 13
#define ENETC_TXBDS_MAX_NEEDED ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1)
-static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb);
+static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
+ int active_offloads);
netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
{
@@ -33,7 +34,7 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev)
return NETDEV_TX_BUSY;
}
- count = enetc_map_tx_buffs(tx_ring, skb);
+ count = enetc_map_tx_buffs(tx_ring, skb, priv->active_offloads);
if (unlikely(!count))
goto drop_packet_err;
@@ -105,7 +106,8 @@ static void enetc_free_tx_skb(struct enetc_bdr *tx_ring,
}
}
-static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
+static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
+ int active_offloads)
{
struct enetc_tx_swbd *tx_swbd;
struct skb_frag_struct *frag;
@@ -137,7 +139,10 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
count++;
do_vlan = skb_vlan_tag_present(skb);
- do_tstamp = skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP;
+ do_tstamp = (active_offloads & ENETC_F_TX_TSTAMP) &&
+ (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP);
+ tx_swbd->do_tstamp = do_tstamp;
+ tx_swbd->check_wb = tx_swbd->do_tstamp;
if (do_vlan || do_tstamp)
flags |= ENETC_TXBD_FLAGS_EX;
@@ -299,24 +304,70 @@ static int enetc_bd_ready_count(struct enetc_bdr *tx_ring, int ci)
return pi >= ci ? pi - ci : tx_ring->bd_count - ci + pi;
}
+static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd,
+ u64 *tstamp)
+{
+ u32 lo, hi, tstamp_lo;
+
+ lo = enetc_rd(hw, ENETC_SICTR0);
+ hi = enetc_rd(hw, ENETC_SICTR1);
+ tstamp_lo = le32_to_cpu(txbd->wb.tstamp);
+ if (lo <= tstamp_lo)
+ hi -= 1;
+ *tstamp = (u64)hi << 32 | tstamp_lo;
+}
+
+static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp)
+{
+ struct skb_shared_hwtstamps shhwtstamps;
+
+ if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) {
+ memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+ shhwtstamps.hwtstamp = ns_to_ktime(tstamp);
+ skb_tstamp_tx(skb, &shhwtstamps);
+ }
+}
+
static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
{
struct net_device *ndev = tx_ring->ndev;
int tx_frm_cnt = 0, tx_byte_cnt = 0;
struct enetc_tx_swbd *tx_swbd;
int i, bds_to_clean;
+ bool do_tstamp;
+ u64 tstamp = 0;
i = tx_ring->next_to_clean;
tx_swbd = &tx_ring->tx_swbd[i];
bds_to_clean = enetc_bd_ready_count(tx_ring, i);
+ do_tstamp = false;
+
while (bds_to_clean && tx_frm_cnt < ENETC_DEFAULT_TX_WORK) {
bool is_eof = !!tx_swbd->skb;
+ if (unlikely(tx_swbd->check_wb)) {
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ union enetc_tx_bd *txbd;
+
+ txbd = ENETC_TXBD(*tx_ring, i);
+
+ if (txbd->flags & ENETC_TXBD_FLAGS_W &&
+ tx_swbd->do_tstamp) {
+ enetc_get_tx_tstamp(&priv->si->hw, txbd,
+ &tstamp);
+ do_tstamp = true;
+ }
+ }
+
if (likely(tx_swbd->dma))
enetc_unmap_tx_buff(tx_ring, tx_swbd);
if (is_eof) {
+ if (unlikely(do_tstamp)) {
+ enetc_tstamp_tx(tx_swbd->skb, tstamp);
+ do_tstamp = false;
+ }
napi_consume_skb(tx_swbd->skb, napi_budget);
tx_swbd->skb = NULL;
}
@@ -425,10 +476,38 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
return j;
}
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+static void enetc_get_rx_tstamp(struct net_device *ndev,
+ union enetc_rx_bd *rxbd,
+ struct sk_buff *skb)
+{
+ struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ struct enetc_hw *hw = &priv->si->hw;
+ u32 lo, hi, tstamp_lo;
+ u64 tstamp;
+
+ if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_TSTMP) {
+ lo = enetc_rd(hw, ENETC_SICTR0);
+ hi = enetc_rd(hw, ENETC_SICTR1);
+ tstamp_lo = le32_to_cpu(rxbd->r.tstamp);
+ if (lo <= tstamp_lo)
+ hi -= 1;
+
+ tstamp = (u64)hi << 32 | tstamp_lo;
+ memset(shhwtstamps, 0, sizeof(*shhwtstamps));
+ shhwtstamps->hwtstamp = ns_to_ktime(tstamp);
+ }
+}
+#endif
+
static void enetc_get_offloads(struct enetc_bdr *rx_ring,
union enetc_rx_bd *rxbd, struct sk_buff *skb)
{
- /* TODO: add tstamp, hashing */
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+ struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev);
+#endif
+ /* TODO: hashing */
if (rx_ring->ndev->features & NETIF_F_RXCSUM) {
u16 inet_csum = le16_to_cpu(rxbd->r.inet_csum);
@@ -442,6 +521,10 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring,
if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
le16_to_cpu(rxbd->r.vlan_opt));
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+ if (priv->active_offloads & ENETC_F_RX_TSTAMP)
+ enetc_get_rx_tstamp(rx_ring->ndev, rxbd, skb);
+#endif
}
static void enetc_process_skb(struct enetc_bdr *rx_ring,
@@ -1074,6 +1157,9 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
enetc_rxbdr_wr(hw, idx, ENETC_RBICIR0, ENETC_RBICIR0_ICEN | 0x1);
rbmr = ENETC_RBMR_EN;
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+ rbmr |= ENETC_RBMR_BDS;
+#endif
if (rx_ring->ndev->features & NETIF_F_HW_VLAN_CTAG_RX)
rbmr |= ENETC_RBMR_VTE;
@@ -1341,6 +1427,62 @@ int enetc_close(struct net_device *ndev)
return 0;
}
+int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ struct tc_mqprio_qopt *mqprio = type_data;
+ struct enetc_bdr *tx_ring;
+ u8 num_tc;
+ int i;
+
+ if (type != TC_SETUP_QDISC_MQPRIO)
+ return -EOPNOTSUPP;
+
+ mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
+ num_tc = mqprio->num_tc;
+
+ if (!num_tc) {
+ netdev_reset_tc(ndev);
+ netif_set_real_num_tx_queues(ndev, priv->num_tx_rings);
+
+ /* Reset all ring priorities to 0 */
+ for (i = 0; i < priv->num_tx_rings; i++) {
+ tx_ring = priv->tx_ring[i];
+ enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, 0);
+ }
+
+ return 0;
+ }
+
+ /* Check if we have enough BD rings available to accommodate all TCs */
+ if (num_tc > priv->num_tx_rings) {
+ netdev_err(ndev, "Max %d traffic classes supported\n",
+ priv->num_tx_rings);
+ return -EINVAL;
+ }
+
+ /* For the moment, we use only one BD ring per TC.
+ *
+ * Configure num_tc BD rings with increasing priorities.
+ */
+ for (i = 0; i < num_tc; i++) {
+ tx_ring = priv->tx_ring[i];
+ enetc_set_bdr_prio(&priv->si->hw, tx_ring->index, i);
+ }
+
+ /* Reset the number of netdev queues based on the TC count */
+ netif_set_real_num_tx_queues(ndev, num_tc);
+
+ netdev_set_num_tc(ndev, num_tc);
+
+ /* Each TC is associated with one netdev queue */
+ for (i = 0; i < num_tc; i++)
+ netdev_set_tc_queue(ndev, i, 1, i);
+
+ return 0;
+}
+
struct net_device_stats *enetc_get_stats(struct net_device *ndev)
{
struct enetc_ndev_priv *priv = netdev_priv(ndev);
@@ -1396,6 +1538,70 @@ int enetc_set_features(struct net_device *ndev,
return 0;
}
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+static int enetc_hwtstamp_set(struct net_device *ndev, struct ifreq *ifr)
+{
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ struct hwtstamp_config config;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ switch (config.tx_type) {
+ case HWTSTAMP_TX_OFF:
+ priv->active_offloads &= ~ENETC_F_TX_TSTAMP;
+ break;
+ case HWTSTAMP_TX_ON:
+ priv->active_offloads |= ENETC_F_TX_TSTAMP;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ switch (config.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ priv->active_offloads &= ~ENETC_F_RX_TSTAMP;
+ break;
+ default:
+ priv->active_offloads |= ENETC_F_RX_TSTAMP;
+ config.rx_filter = HWTSTAMP_FILTER_ALL;
+ }
+
+ return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+ -EFAULT : 0;
+}
+
+static int enetc_hwtstamp_get(struct net_device *ndev, struct ifreq *ifr)
+{
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ struct hwtstamp_config config;
+
+ config.flags = 0;
+
+ if (priv->active_offloads & ENETC_F_TX_TSTAMP)
+ config.tx_type = HWTSTAMP_TX_ON;
+ else
+ config.tx_type = HWTSTAMP_TX_OFF;
+
+ config.rx_filter = (priv->active_offloads & ENETC_F_RX_TSTAMP) ?
+ HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE;
+
+ return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+ -EFAULT : 0;
+}
+#endif
+
+int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
+{
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+ if (cmd == SIOCSHWTSTAMP)
+ return enetc_hwtstamp_set(ndev, rq);
+ if (cmd == SIOCGHWTSTAMP)
+ return enetc_hwtstamp_get(ndev, rq);
+#endif
+ return -EINVAL;
+}
+
int enetc_alloc_msix(struct enetc_ndev_priv *priv)
{
struct pci_dev *pdev = priv->si->pdev;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index b274135c5103..541b4e2073fe 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -21,7 +21,9 @@ struct enetc_tx_swbd {
struct sk_buff *skb;
dma_addr_t dma;
u16 len;
- u16 is_dma_page;
+ u8 is_dma_page:1;
+ u8 check_wb:1;
+ u8 do_tstamp:1;
};
#define ENETC_RX_MAXFRM_SIZE ENETC_MAC_MAXFRM_SIZE
@@ -167,6 +169,12 @@ struct enetc_cls_rule {
#define ENETC_MAX_BDR_INT 2 /* fixed to max # of available cpus */
+/* TODO: more hardware offloads */
+enum enetc_active_offloads {
+ ENETC_F_RX_TSTAMP = BIT(0),
+ ENETC_F_TX_TSTAMP = BIT(1),
+};
+
struct enetc_ndev_priv {
struct net_device *ndev;
struct device *dev; /* dma-mapping device */
@@ -178,6 +186,7 @@ struct enetc_ndev_priv {
u16 rx_bd_count, tx_bd_count;
u16 msg_enable;
+ int active_offloads;
struct enetc_bdr *tx_ring[16];
struct enetc_bdr *rx_ring[16];
@@ -200,6 +209,9 @@ struct enetc_msg_cmd_set_primary_mac {
#define ENETC_CBDR_TIMEOUT 1000 /* usecs */
+/* PTP driver exports */
+extern int enetc_phc_index;
+
/* SI common */
int enetc_pci_probe(struct pci_dev *pdev, const char *name, int sizeof_priv);
void enetc_pci_remove(struct pci_dev *pdev);
@@ -216,6 +228,10 @@ netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev);
struct net_device_stats *enetc_get_stats(struct net_device *ndev);
int enetc_set_features(struct net_device *ndev,
netdev_features_t features);
+int enetc_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd);
+int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+ void *type_data);
+
/* ethtool */
void enetc_set_ethtool_ops(struct net_device *ndev);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index b9519b6ad727..fcb52efec075 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -555,6 +555,35 @@ static void enetc_get_ringparam(struct net_device *ndev,
}
}
+static int enetc_get_ts_info(struct net_device *ndev,
+ struct ethtool_ts_info *info)
+{
+ int *phc_idx;
+
+ phc_idx = symbol_get(enetc_phc_index);
+ if (phc_idx) {
+ info->phc_index = *phc_idx;
+ symbol_put(enetc_phc_index);
+ } else {
+ info->phc_index = -1;
+ }
+
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+ info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ info->tx_types = (1 << HWTSTAMP_TX_OFF) |
+ (1 << HWTSTAMP_TX_ON);
+ info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) |
+ (1 << HWTSTAMP_FILTER_ALL);
+#else
+ info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE;
+#endif
+ return 0;
+}
+
static const struct ethtool_ops enetc_pf_ethtool_ops = {
.get_regs_len = enetc_get_reglen,
.get_regs = enetc_get_regs,
@@ -571,6 +600,7 @@ static const struct ethtool_ops enetc_pf_ethtool_ops = {
.get_link_ksettings = phy_ethtool_get_link_ksettings,
.set_link_ksettings = phy_ethtool_set_link_ksettings,
.get_link = ethtool_op_get_link,
+ .get_ts_info = enetc_get_ts_info,
};
static const struct ethtool_ops enetc_vf_ethtool_ops = {
@@ -586,6 +616,7 @@ static const struct ethtool_ops enetc_vf_ethtool_ops = {
.set_rxfh = enetc_set_rxfh,
.get_ringparam = enetc_get_ringparam,
.get_link = ethtool_op_get_link,
+ .get_ts_info = enetc_get_ts_info,
};
void enetc_set_ethtool_ops(struct net_device *ndev)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index df8eb8882d92..88276299f447 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -127,7 +127,7 @@ enum enetc_bdr_type {TX, RX};
#define ENETC_TBSR_BUSY BIT(0)
#define ENETC_TBMR_VIH BIT(9)
#define ENETC_TBMR_PRIO_MASK GENMASK(2, 0)
-#define ENETC_TBMR_PRIO_SET(val) val
+#define ENETC_TBMR_SET_PRIO(val) ((val) & ENETC_TBMR_PRIO_MASK)
#define ENETC_TBMR_EN BIT(31)
#define ENETC_TBSR 0x4
#define ENETC_TBBAR0 0x10
@@ -361,6 +361,12 @@ union enetc_tx_bd {
u8 e_flags;
u8 flags;
} ext; /* Tx BD extension */
+ struct {
+ __le32 tstamp;
+ u8 reserved[10];
+ u8 status;
+ u8 flags;
+ } wb; /* writeback descriptor */
};
#define ENETC_TXBD_FLAGS_L4CS BIT(0)
@@ -399,6 +405,9 @@ union enetc_rx_bd {
struct {
__le64 addr;
u8 reserved[8];
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+ u8 reserved1[16];
+#endif
} w;
struct {
__le16 inet_csum;
@@ -413,6 +422,10 @@ union enetc_rx_bd {
};
__le32 lstatus;
};
+#ifdef CONFIG_FSL_ENETC_HW_TIMESTAMPING
+ __le32 tstamp;
+ u8 reserved[12];
+#endif
} r;
};
@@ -531,3 +544,13 @@ static inline void enetc_enable_txvlan(struct enetc_hw *hw, int si_idx,
val = (val & ~ENETC_TBMR_VIH) | (en ? ENETC_TBMR_VIH : 0);
enetc_txbdr_wr(hw, si_idx, ENETC_TBMR, val);
}
+
+static inline void enetc_set_bdr_prio(struct enetc_hw *hw, int bdr_idx,
+ int prio)
+{
+ u32 val = enetc_txbdr_rd(hw, bdr_idx, ENETC_TBMR);
+
+ val &= ~ENETC_TBMR_PRIO_MASK;
+ val |= ENETC_TBMR_SET_PRIO(prio);
+ enetc_txbdr_wr(hw, bdr_idx, ENETC_TBMR, val);
+}
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 78287c517095..258b3cb38a6f 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -702,6 +702,8 @@ static const struct net_device_ops enetc_ndev_ops = {
.ndo_set_vf_vlan = enetc_pf_set_vf_vlan,
.ndo_set_vf_spoofchk = enetc_pf_set_vf_spoofchk,
.ndo_set_features = enetc_pf_set_features,
+ .ndo_do_ioctl = enetc_ioctl,
+ .ndo_setup_tc = enetc_setup_tc,
};
static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
index 8c1497e7d9c5..2fd2586e42bf 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c
@@ -7,6 +7,9 @@
#include "enetc.h"
+int enetc_phc_index = -1;
+EXPORT_SYMBOL(enetc_phc_index);
+
static struct ptp_clock_info enetc_ptp_caps = {
.owner = THIS_MODULE,
.name = "ENETC PTP clock",
@@ -96,6 +99,7 @@ static int enetc_ptp_probe(struct pci_dev *pdev,
if (err)
goto err_no_clock;
+ enetc_phc_index = ptp_qoriq->phc_index;
pci_set_drvdata(pdev, ptp_qoriq);
return 0;
@@ -119,6 +123,7 @@ static void enetc_ptp_remove(struct pci_dev *pdev)
{
struct ptp_qoriq *ptp_qoriq = pci_get_drvdata(pdev);
+ enetc_phc_index = -1;
ptp_qoriq_free(ptp_qoriq);
kfree(ptp_qoriq);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
index 72c3ea887bcf..ebd21bf4cfa1 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c
@@ -111,6 +111,8 @@ static const struct net_device_ops enetc_ndev_ops = {
.ndo_get_stats = enetc_get_stats,
.ndo_set_mac_address = enetc_vf_set_mac_addr,
.ndo_set_features = enetc_vf_set_features,
+ .ndo_do_ioctl = enetc_ioctl,
+ .ndo_setup_tc = enetc_setup_tc,
};
static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 38f10f7dcbc3..848defa33d3a 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -3473,7 +3473,6 @@ fec_probe(struct platform_device *pdev)
if (ret) {
dev_err(&pdev->dev,
"Failed to enable phy regulator: %d\n", ret);
- clk_disable_unprepare(fep->clk_ipg);
goto failed_regulator;
}
} else {
diff --git a/drivers/net/ethernet/freescale/fman/fman_keygen.c b/drivers/net/ethernet/freescale/fman/fman_keygen.c
index f54da3c684d0..e1bdfed16134 100644
--- a/drivers/net/ethernet/freescale/fman/fman_keygen.c
+++ b/drivers/net/ethernet/freescale/fman/fman_keygen.c
@@ -144,7 +144,8 @@
/* Hash Key extraction fields: */
#define DEFAULT_HASH_KEY_EXTRACT_FIELDS \
(KG_SCH_KN_IPSRC1 | KG_SCH_KN_IPDST1 | \
- KG_SCH_KN_L4PSRC | KG_SCH_KN_L4PDST)
+ KG_SCH_KN_L4PSRC | KG_SCH_KN_L4PDST | \
+ KG_SCH_KN_IPSEC_SPI)
/* Default values to be used as hash key in case IPv4 or L4 (TCP, UDP)
* don't exist in the frame
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index ad21b0ef1946..a18645e24434 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -343,6 +343,8 @@ struct hnae3_ae_dev {
* Enable/disable hardware strip vlan tag of packets received
* set_gro_en
* Enable/disable HW GRO
+ * add_arfs_entry
+ * Check the 5-tuples of flow, and create flow director rule
*/
struct hnae3_ae_ops {
int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev);
@@ -492,6 +494,8 @@ struct hnae3_ae_ops {
struct ethtool_rxnfc *cmd, u32 *rule_locs);
int (*restore_fd_rules)(struct hnae3_handle *handle);
void (*enable_fd)(struct hnae3_handle *handle, bool enable);
+ int (*add_arfs_entry)(struct hnae3_handle *handle, u16 queue_id,
+ u16 flow_id, struct flow_keys *fkeys);
int (*dbg_run_cmd)(struct hnae3_handle *handle, char *cmd_buf);
pci_ers_result_t (*handle_hw_ras_error)(struct hnae3_ae_dev *ae_dev);
bool (*get_hw_reset_stat)(struct hnae3_handle *handle);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index fc4917ac44be..30354fa33a36 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -252,6 +252,7 @@ static void hns3_dbg_help(struct hnae3_handle *h)
dev_info(&h->pdev->dev, "dump qos buf cfg\n");
dev_info(&h->pdev->dev, "dump mng tbl\n");
dev_info(&h->pdev->dev, "dump reset info\n");
+ dev_info(&h->pdev->dev, "dump m7 info\n");
dev_info(&h->pdev->dev, "dump ncl_config <offset> <length>(in hex)\n");
dev_info(&h->pdev->dev, "dump mac tnl status\n");
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 196a3d780dcf..f6dc3054bcd1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -4,6 +4,9 @@
#include <linux/dma-mapping.h>
#include <linux/etherdevice.h>
#include <linux/interrupt.h>
+#ifdef CONFIG_RFS_ACCEL
+#include <linux/cpu_rmap.h>
+#endif
#include <linux/if_vlan.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
@@ -79,23 +82,6 @@ static irqreturn_t hns3_irq_handle(int irq, void *vector)
return IRQ_HANDLED;
}
-/* This callback function is used to set affinity changes to the irq affinity
- * masks when the irq_set_affinity_notifier function is used.
- */
-static void hns3_nic_irq_affinity_notify(struct irq_affinity_notify *notify,
- const cpumask_t *mask)
-{
- struct hns3_enet_tqp_vector *tqp_vectors =
- container_of(notify, struct hns3_enet_tqp_vector,
- affinity_notify);
-
- tqp_vectors->affinity_mask = *mask;
-}
-
-static void hns3_nic_irq_affinity_release(struct kref *ref)
-{
-}
-
static void hns3_nic_uninit_irq(struct hns3_nic_priv *priv)
{
struct hns3_enet_tqp_vector *tqp_vectors;
@@ -107,8 +93,7 @@ static void hns3_nic_uninit_irq(struct hns3_nic_priv *priv)
if (tqp_vectors->irq_init_flag != HNS3_VECTOR_INITED)
continue;
- /* clear the affinity notifier and affinity mask */
- irq_set_affinity_notifier(tqp_vectors->vector_irq, NULL);
+ /* clear the affinity mask */
irq_set_affinity_hint(tqp_vectors->vector_irq, NULL);
/* release the irq resource */
@@ -161,12 +146,6 @@ static int hns3_nic_init_irq(struct hns3_nic_priv *priv)
return ret;
}
- tqp_vectors->affinity_notify.notify =
- hns3_nic_irq_affinity_notify;
- tqp_vectors->affinity_notify.release =
- hns3_nic_irq_affinity_release;
- irq_set_affinity_notifier(tqp_vectors->vector_irq,
- &tqp_vectors->affinity_notify);
irq_set_affinity_hint(tqp_vectors->vector_irq,
&tqp_vectors->affinity_mask);
@@ -340,6 +319,40 @@ static void hns3_tqp_disable(struct hnae3_queue *tqp)
hns3_write_dev(tqp, HNS3_RING_EN_REG, rcb_reg);
}
+static void hns3_free_rx_cpu_rmap(struct net_device *netdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ free_irq_cpu_rmap(netdev->rx_cpu_rmap);
+ netdev->rx_cpu_rmap = NULL;
+#endif
+}
+
+static int hns3_set_rx_cpu_rmap(struct net_device *netdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ struct hns3_nic_priv *priv = netdev_priv(netdev);
+ struct hns3_enet_tqp_vector *tqp_vector;
+ int i, ret;
+
+ if (!netdev->rx_cpu_rmap) {
+ netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->vector_num);
+ if (!netdev->rx_cpu_rmap)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < priv->vector_num; i++) {
+ tqp_vector = &priv->tqp_vector[i];
+ ret = irq_cpu_rmap_add(netdev->rx_cpu_rmap,
+ tqp_vector->vector_irq);
+ if (ret) {
+ hns3_free_rx_cpu_rmap(netdev);
+ return ret;
+ }
+ }
+#endif
+ return 0;
+}
+
static int hns3_nic_net_up(struct net_device *netdev)
{
struct hns3_nic_priv *priv = netdev_priv(netdev);
@@ -351,11 +364,16 @@ static int hns3_nic_net_up(struct net_device *netdev)
if (ret)
return ret;
+ /* the device can work without cpu rmap, only aRFS needs it */
+ ret = hns3_set_rx_cpu_rmap(netdev);
+ if (ret)
+ netdev_warn(netdev, "set rx cpu rmap fail, ret=%d!\n", ret);
+
/* get irq resource for all vectors */
ret = hns3_nic_init_irq(priv);
if (ret) {
netdev_err(netdev, "hns init irq failed! ret=%d\n", ret);
- return ret;
+ goto free_rmap;
}
clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
@@ -384,7 +402,8 @@ out_start_err:
hns3_vector_disable(&priv->tqp_vector[j]);
hns3_nic_uninit_irq(priv);
-
+free_rmap:
+ hns3_free_rx_cpu_rmap(netdev);
return ret;
}
@@ -467,6 +486,8 @@ static void hns3_nic_net_down(struct net_device *netdev)
if (ops->stop)
ops->stop(priv->ae_handle);
+ hns3_free_rx_cpu_rmap(netdev);
+
/* free irq resources */
hns3_nic_uninit_irq(priv);
@@ -1722,6 +1743,32 @@ static void hns3_nic_net_timeout(struct net_device *ndev)
h->ae_algo->ops->reset_event(h->pdev, h);
}
+#ifdef CONFIG_RFS_ACCEL
+static int hns3_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
+ u16 rxq_index, u32 flow_id)
+{
+ struct hnae3_handle *h = hns3_get_handle(dev);
+ struct flow_keys fkeys;
+
+ if (!h->ae_algo->ops->add_arfs_entry)
+ return -EOPNOTSUPP;
+
+ if (skb->encapsulation)
+ return -EPROTONOSUPPORT;
+
+ if (!skb_flow_dissect_flow_keys(skb, &fkeys, 0))
+ return -EPROTONOSUPPORT;
+
+ if ((fkeys.basic.n_proto != htons(ETH_P_IP) &&
+ fkeys.basic.n_proto != htons(ETH_P_IPV6)) ||
+ (fkeys.basic.ip_proto != IPPROTO_TCP &&
+ fkeys.basic.ip_proto != IPPROTO_UDP))
+ return -EPROTONOSUPPORT;
+
+ return h->ae_algo->ops->add_arfs_entry(h, rxq_index, flow_id, &fkeys);
+}
+#endif
+
static const struct net_device_ops hns3_nic_netdev_ops = {
.ndo_open = hns3_nic_net_open,
.ndo_stop = hns3_nic_net_stop,
@@ -1737,6 +1784,10 @@ static const struct net_device_ops hns3_nic_netdev_ops = {
.ndo_vlan_rx_add_vid = hns3_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = hns3_vlan_rx_kill_vid,
.ndo_set_vf_vlan = hns3_ndo_set_vf_vlan,
+#ifdef CONFIG_RFS_ACCEL
+ .ndo_rx_flow_steer = hns3_rx_flow_steer,
+#endif
+
};
bool hns3_is_phys_func(struct pci_dev *pdev)
@@ -2828,6 +2879,7 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring,
return ret;
}
+ skb_record_rx_queue(skb, ring->tqp->tqp_index);
*out_skb = skb;
return 0;
@@ -3331,8 +3383,6 @@ static void hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
if (tqp_vector->irq_init_flag == HNS3_VECTOR_INITED) {
- irq_set_affinity_notifier(tqp_vector->vector_irq,
- NULL);
irq_set_affinity_hint(tqp_vector->vector_irq, NULL);
free_irq(tqp_vector->vector_irq, tqp_vector);
tqp_vector->irq_init_flag = HNS3_VECTOR_NOT_INITED;
@@ -3851,6 +3901,8 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
hns3_client_stop(handle);
+ hns3_uninit_phy(netdev);
+
if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) {
netdev_warn(netdev, "already uninitialized\n");
goto out_netdev_free;
@@ -3860,8 +3912,6 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
hns3_force_clear_all_rx_ring(handle);
- hns3_uninit_phy(netdev);
-
hns3_nic_uninit_vector_data(priv);
ret = hns3_nic_dealloc_vector_data(priv);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index c14480f9b625..408efd55ba48 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -145,7 +145,7 @@ enum hns3_nic_state {
#define HNS3_RXD_TSIND_M (0x7 << HNS3_RXD_TSIND_S)
#define HNS3_RXD_LKBK_B 15
#define HNS3_RXD_GRO_SIZE_S 16
-#define HNS3_RXD_GRO_SIZE_M (0x3ff << HNS3_RXD_GRO_SIZE_S)
+#define HNS3_RXD_GRO_SIZE_M (0x3fff << HNS3_RXD_GRO_SIZE_S)
#define HNS3_TXD_L3T_S 0
#define HNS3_TXD_L3T_M (0x3 << HNS3_TXD_L3T_S)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index fbd904e3077c..e5329053842b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -110,8 +110,7 @@ static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring)
hclge_write_dev(hw, HCLGE_NIC_CSQ_BASEADDR_H_REG,
upper_32_bits(dma));
hclge_write_dev(hw, HCLGE_NIC_CSQ_DEPTH_REG,
- (ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
- HCLGE_NIC_CMQ_ENABLE);
+ ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S);
hclge_write_dev(hw, HCLGE_NIC_CSQ_HEAD_REG, 0);
hclge_write_dev(hw, HCLGE_NIC_CSQ_TAIL_REG, 0);
} else {
@@ -120,8 +119,7 @@ static void hclge_cmd_config_regs(struct hclge_cmq_ring *ring)
hclge_write_dev(hw, HCLGE_NIC_CRQ_BASEADDR_H_REG,
upper_32_bits(dma));
hclge_write_dev(hw, HCLGE_NIC_CRQ_DEPTH_REG,
- (ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S) |
- HCLGE_NIC_CMQ_ENABLE);
+ ring->desc_num >> HCLGE_NIC_CMQ_DESC_NUM_S);
hclge_write_dev(hw, HCLGE_NIC_CRQ_HEAD_REG, 0);
hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0);
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index d79a209b80f6..61cb10dc2b9b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -243,6 +243,9 @@ enum hclge_opcode_type {
/* NCL config command */
HCLGE_OPC_QUERY_NCL_CONFIG = 0x7011,
+ /* M7 stats command */
+ HCLGE_OPC_M7_STATS_BD = 0x7012,
+ HCLGE_OPC_M7_STATS_INFO = 0x7013,
/* SFP command */
HCLGE_OPC_GET_SFP_INFO = 0x7104,
@@ -970,6 +973,11 @@ struct hclge_fd_ad_config_cmd {
u8 rsv2[8];
};
+struct hclge_get_m7_bd_cmd {
+ __le32 bd_num;
+ u8 rsv[20];
+};
+
int hclge_cmd_init(struct hclge_dev *hdev);
static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
{
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index a9ffb57c4607..ed1f533d4082 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -921,6 +921,61 @@ static void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
hdev->rst_stats.reset_cnt);
}
+void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev)
+{
+ struct hclge_desc *desc_src, *desc_tmp;
+ struct hclge_get_m7_bd_cmd *req;
+ struct hclge_desc desc;
+ u32 bd_num, buf_len;
+ int ret, i;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_M7_STATS_BD, true);
+
+ req = (struct hclge_get_m7_bd_cmd *)desc.data;
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "get firmware statistics bd number failed, ret=%d\n",
+ ret);
+ return;
+ }
+
+ bd_num = le32_to_cpu(req->bd_num);
+
+ buf_len = sizeof(struct hclge_desc) * bd_num;
+ desc_src = kzalloc(buf_len, GFP_KERNEL);
+ if (!desc_src) {
+ dev_err(&hdev->pdev->dev,
+ "allocate desc for get_m7_stats failed\n");
+ return;
+ }
+
+ desc_tmp = desc_src;
+ ret = hclge_dbg_cmd_send(hdev, desc_tmp, 0, bd_num,
+ HCLGE_OPC_M7_STATS_INFO);
+ if (ret) {
+ kfree(desc_src);
+ dev_err(&hdev->pdev->dev,
+ "get firmware statistics failed, ret=%d\n", ret);
+ return;
+ }
+
+ for (i = 0; i < bd_num; i++) {
+ dev_info(&hdev->pdev->dev, "0x%08x 0x%08x 0x%08x\n",
+ le32_to_cpu(desc_tmp->data[0]),
+ le32_to_cpu(desc_tmp->data[1]),
+ le32_to_cpu(desc_tmp->data[2]));
+ dev_info(&hdev->pdev->dev, "0x%08x 0x%08x 0x%08x\n",
+ le32_to_cpu(desc_tmp->data[3]),
+ le32_to_cpu(desc_tmp->data[4]),
+ le32_to_cpu(desc_tmp->data[5]));
+
+ desc_tmp++;
+ }
+
+ kfree(desc_src);
+}
+
/* hclge_dbg_dump_ncl_config: print specified range of NCL_CONFIG file
* @hdev: pointer to struct hclge_dev
* @cmd_buf: string that contains offset and length
@@ -1029,6 +1084,8 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, char *cmd_buf)
hclge_dbg_dump_reg_cmd(hdev, cmd_buf);
} else if (strncmp(cmd_buf, "dump reset info", 15) == 0) {
hclge_dbg_dump_rst_info(hdev);
+ } else if (strncmp(cmd_buf, "dump m7 info", 12) == 0) {
+ hclge_dbg_get_m7_stats_info(hdev);
} else if (strncmp(cmd_buf, "dump ncl_config", 15) == 0) {
hclge_dbg_dump_ncl_config(hdev,
&cmd_buf[sizeof("dump ncl_config")]);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d3b1f8cb1155..0545f3890d1c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -35,6 +35,8 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev);
static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle);
static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
u16 *allocated_size, bool is_alloc);
+static void hclge_rfs_filter_expire(struct hclge_dev *hdev);
+static void hclge_clear_arfs_rules(struct hnae3_handle *handle);
static struct hnae3_ae_algo ae_algo;
@@ -1226,8 +1228,10 @@ static int hclge_configure(struct hclge_dev *hdev)
hdev->tm_info.hw_pfc_map = 0;
hdev->wanted_umv_size = cfg.umv_space;
- if (hnae3_dev_fd_supported(hdev))
+ if (hnae3_dev_fd_supported(hdev)) {
hdev->fd_en = true;
+ hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+ }
ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed);
if (ret) {
@@ -2423,7 +2427,8 @@ static void hclge_mbx_task_schedule(struct hclge_dev *hdev)
static void hclge_reset_task_schedule(struct hclge_dev *hdev)
{
- if (!test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
+ if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
+ !test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
schedule_work(&hdev->rst_service_task);
}
@@ -2508,6 +2513,9 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
static void hclge_update_port_capability(struct hclge_mac *mac)
{
+ /* update fec ability by speed */
+ hclge_convert_setting_fec(mac);
+
/* firmware can not identify back plane type, the media type
* read from configuration can help deal it
*/
@@ -2580,6 +2588,10 @@ static int hclge_get_sfp_info(struct hclge_dev *hdev, struct hclge_mac *mac)
mac->speed_ability = le32_to_cpu(resp->speed_ability);
mac->autoneg = resp->autoneg;
mac->support_autoneg = resp->autoneg_ability;
+ if (!resp->active_fec)
+ mac->fec_mode = 0;
+ else
+ mac->fec_mode = BIT(resp->active_fec);
} else {
mac->speed_type = QUERY_SFP_SPEED;
}
@@ -2645,6 +2657,7 @@ static void hclge_service_timer(struct timer_list *t)
mod_timer(&hdev->service_timer, jiffies + HZ);
hdev->hw_stats.stats_timer++;
+ hdev->fd_arfs_expire_timer++;
hclge_task_schedule(hdev);
}
@@ -2861,6 +2874,10 @@ int hclge_notify_client(struct hclge_dev *hdev,
struct hnae3_client *client = hdev->nic_client;
u16 i;
+ if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state) ||
+ !client)
+ return 0;
+
if (!client->ops->reset_notify)
return -EOPNOTSUPP;
@@ -2886,7 +2903,8 @@ static int hclge_notify_roce_client(struct hclge_dev *hdev,
int ret = 0;
u16 i;
- if (!client)
+ if (!test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state) ||
+ !client)
return 0;
if (!client->ops->reset_notify)
@@ -3180,6 +3198,8 @@ static int hclge_reset_prepare_down(struct hclge_dev *hdev)
static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
{
+#define HCLGE_RESET_SYNC_TIME 100
+
u32 reg_val;
int ret = 0;
@@ -3188,7 +3208,7 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
/* There is no mechanism for PF to know if VF has stopped IO
* for now, just wait 100 ms for VF to stop IO
*/
- msleep(100);
+ msleep(HCLGE_RESET_SYNC_TIME);
ret = hclge_func_reset_cmd(hdev, 0);
if (ret) {
dev_err(&hdev->pdev->dev,
@@ -3208,7 +3228,7 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
/* There is no mechanism for PF to know if VF has stopped IO
* for now, just wait 100 ms for VF to stop IO
*/
- msleep(100);
+ msleep(HCLGE_RESET_SYNC_TIME);
set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
set_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
hdev->rst_stats.flr_rst_cnt++;
@@ -3222,6 +3242,10 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
break;
}
+ /* inform hardware that preparatory work is done */
+ msleep(HCLGE_RESET_SYNC_TIME);
+ hclge_write_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG,
+ HCLGE_NIC_CMQ_ENABLE);
dev_info(&hdev->pdev->dev, "prepare wait ok\n");
return ret;
@@ -3521,6 +3545,10 @@ static void hclge_service_task(struct work_struct *work)
hclge_update_port_info(hdev);
hclge_update_link_status(hdev);
hclge_update_vport_alive(hdev);
+ if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) {
+ hclge_rfs_filter_expire(hdev);
+ hdev->fd_arfs_expire_timer = 0;
+ }
hclge_service_complete(hdev);
}
@@ -4906,14 +4934,18 @@ static bool hclge_fd_rule_exist(struct hclge_dev *hdev, u16 location)
struct hclge_fd_rule *rule = NULL;
struct hlist_node *node2;
+ spin_lock_bh(&hdev->fd_rule_lock);
hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) {
if (rule->location >= location)
break;
}
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
return rule && rule->location == location;
}
+/* make sure being called after lock up with fd_rule_lock */
static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
struct hclge_fd_rule *new_rule,
u16 location,
@@ -4937,9 +4969,13 @@ static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
kfree(rule);
hdev->hclge_fd_rule_num--;
- if (!is_add)
- return 0;
+ if (!is_add) {
+ if (!hdev->hclge_fd_rule_num)
+ hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+ clear_bit(location, hdev->fd_bmap);
+ return 0;
+ }
} else if (!is_add) {
dev_err(&hdev->pdev->dev,
"delete fail, rule %d is inexistent\n",
@@ -4954,7 +4990,9 @@ static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
else
hlist_add_head(&new_rule->rule_node, &hdev->fd_rule_list);
+ set_bit(location, hdev->fd_bmap);
hdev->hclge_fd_rule_num++;
+ hdev->fd_active_type = new_rule->rule_type;
return 0;
}
@@ -5112,6 +5150,36 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev,
return 0;
}
+/* make sure being called after lock up with fd_rule_lock */
+static int hclge_fd_config_rule(struct hclge_dev *hdev,
+ struct hclge_fd_rule *rule)
+{
+ int ret;
+
+ if (!rule) {
+ dev_err(&hdev->pdev->dev,
+ "The flow director rule is NULL\n");
+ return -EINVAL;
+ }
+
+ /* it will never fail here, so needn't to check return value */
+ hclge_fd_update_rule_list(hdev, rule, rule->location, true);
+
+ ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
+ if (ret)
+ goto clear_rule;
+
+ ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
+ if (ret)
+ goto clear_rule;
+
+ return 0;
+
+clear_rule:
+ hclge_fd_update_rule_list(hdev, rule, rule->location, false);
+ return ret;
+}
+
static int hclge_add_fd_entry(struct hnae3_handle *handle,
struct ethtool_rxnfc *cmd)
{
@@ -5174,8 +5242,10 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
return -ENOMEM;
ret = hclge_fd_get_tuple(hdev, fs, rule);
- if (ret)
- goto free_rule;
+ if (ret) {
+ kfree(rule);
+ return ret;
+ }
rule->flow_type = fs->flow_type;
@@ -5184,24 +5254,19 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
rule->vf_id = dst_vport_id;
rule->queue_id = q_index;
rule->action = action;
+ rule->rule_type = HCLGE_FD_EP_ACTIVE;
- ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
- if (ret)
- goto free_rule;
+ /* to avoid rule conflict, when user configure rule by ethtool,
+ * we need to clear all arfs rules
+ */
+ hclge_clear_arfs_rules(handle);
- ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
- if (ret)
- goto free_rule;
+ spin_lock_bh(&hdev->fd_rule_lock);
+ ret = hclge_fd_config_rule(hdev, rule);
- ret = hclge_fd_update_rule_list(hdev, rule, fs->location, true);
- if (ret)
- goto free_rule;
+ spin_unlock_bh(&hdev->fd_rule_lock);
return ret;
-
-free_rule:
- kfree(rule);
- return ret;
}
static int hclge_del_fd_entry(struct hnae3_handle *handle,
@@ -5232,8 +5297,12 @@ static int hclge_del_fd_entry(struct hnae3_handle *handle,
if (ret)
return ret;
- return hclge_fd_update_rule_list(hdev, NULL, fs->location,
- false);
+ spin_lock_bh(&hdev->fd_rule_lock);
+ ret = hclge_fd_update_rule_list(hdev, NULL, fs->location, false);
+
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
+ return ret;
}
static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
@@ -5243,25 +5312,30 @@ static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
struct hclge_dev *hdev = vport->back;
struct hclge_fd_rule *rule;
struct hlist_node *node;
+ u16 location;
if (!hnae3_dev_fd_supported(hdev))
return;
+ spin_lock_bh(&hdev->fd_rule_lock);
+ for_each_set_bit(location, hdev->fd_bmap,
+ hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
+ hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, location,
+ NULL, false);
+
if (clear_list) {
hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list,
rule_node) {
- hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
- rule->location, NULL, false);
hlist_del(&rule->rule_node);
kfree(rule);
- hdev->hclge_fd_rule_num--;
}
- } else {
- hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list,
- rule_node)
- hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
- rule->location, NULL, false);
+ hdev->fd_active_type = HCLGE_FD_RULE_NONE;
+ hdev->hclge_fd_rule_num = 0;
+ bitmap_zero(hdev->fd_bmap,
+ hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]);
}
+
+ spin_unlock_bh(&hdev->fd_rule_lock);
}
static int hclge_restore_fd_entries(struct hnae3_handle *handle)
@@ -5283,6 +5357,7 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle)
if (!hdev->fd_en)
return 0;
+ spin_lock_bh(&hdev->fd_rule_lock);
hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
if (!ret)
@@ -5292,11 +5367,18 @@ static int hclge_restore_fd_entries(struct hnae3_handle *handle)
dev_warn(&hdev->pdev->dev,
"Restore rule %d failed, remove it\n",
rule->location);
+ clear_bit(rule->location, hdev->fd_bmap);
hlist_del(&rule->rule_node);
kfree(rule);
hdev->hclge_fd_rule_num--;
}
}
+
+ if (hdev->hclge_fd_rule_num)
+ hdev->fd_active_type = HCLGE_FD_EP_ACTIVE;
+
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
return 0;
}
@@ -5329,13 +5411,18 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
+ spin_lock_bh(&hdev->fd_rule_lock);
+
hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) {
if (rule->location >= fs->location)
break;
}
- if (!rule || fs->location != rule->location)
+ if (!rule || fs->location != rule->location) {
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
return -ENOENT;
+ }
fs->flow_type = rule->flow_type;
switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
@@ -5474,6 +5561,7 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
break;
default:
+ spin_unlock_bh(&hdev->fd_rule_lock);
return -EOPNOTSUPP;
}
@@ -5505,6 +5593,8 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
fs->ring_cookie |= vf_id;
}
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
return 0;
}
@@ -5522,20 +5612,208 @@ static int hclge_get_all_rules(struct hnae3_handle *handle,
cmd->data = hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1];
+ spin_lock_bh(&hdev->fd_rule_lock);
hlist_for_each_entry_safe(rule, node2,
&hdev->fd_rule_list, rule_node) {
- if (cnt == cmd->rule_cnt)
+ if (cnt == cmd->rule_cnt) {
+ spin_unlock_bh(&hdev->fd_rule_lock);
return -EMSGSIZE;
+ }
rule_locs[cnt] = rule->location;
cnt++;
}
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
cmd->rule_cnt = cnt;
return 0;
}
+static void hclge_fd_get_flow_tuples(const struct flow_keys *fkeys,
+ struct hclge_fd_rule_tuples *tuples)
+{
+ tuples->ether_proto = be16_to_cpu(fkeys->basic.n_proto);
+ tuples->ip_proto = fkeys->basic.ip_proto;
+ tuples->dst_port = be16_to_cpu(fkeys->ports.dst);
+
+ if (fkeys->basic.n_proto == htons(ETH_P_IP)) {
+ tuples->src_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.src);
+ tuples->dst_ip[3] = be32_to_cpu(fkeys->addrs.v4addrs.dst);
+ } else {
+ memcpy(tuples->src_ip,
+ fkeys->addrs.v6addrs.src.in6_u.u6_addr32,
+ sizeof(tuples->src_ip));
+ memcpy(tuples->dst_ip,
+ fkeys->addrs.v6addrs.dst.in6_u.u6_addr32,
+ sizeof(tuples->dst_ip));
+ }
+}
+
+/* traverse all rules, check whether an existed rule has the same tuples */
+static struct hclge_fd_rule *
+hclge_fd_search_flow_keys(struct hclge_dev *hdev,
+ const struct hclge_fd_rule_tuples *tuples)
+{
+ struct hclge_fd_rule *rule = NULL;
+ struct hlist_node *node;
+
+ hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+ if (!memcmp(tuples, &rule->tuples, sizeof(*tuples)))
+ return rule;
+ }
+
+ return NULL;
+}
+
+static void hclge_fd_build_arfs_rule(const struct hclge_fd_rule_tuples *tuples,
+ struct hclge_fd_rule *rule)
+{
+ rule->unused_tuple = BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
+ BIT(INNER_VLAN_TAG_FST) | BIT(INNER_IP_TOS) |
+ BIT(INNER_SRC_PORT);
+ rule->action = 0;
+ rule->vf_id = 0;
+ rule->rule_type = HCLGE_FD_ARFS_ACTIVE;
+ if (tuples->ether_proto == ETH_P_IP) {
+ if (tuples->ip_proto == IPPROTO_TCP)
+ rule->flow_type = TCP_V4_FLOW;
+ else
+ rule->flow_type = UDP_V4_FLOW;
+ } else {
+ if (tuples->ip_proto == IPPROTO_TCP)
+ rule->flow_type = TCP_V6_FLOW;
+ else
+ rule->flow_type = UDP_V6_FLOW;
+ }
+ memcpy(&rule->tuples, tuples, sizeof(rule->tuples));
+ memset(&rule->tuples_mask, 0xFF, sizeof(rule->tuples_mask));
+}
+
+static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id,
+ u16 flow_id, struct flow_keys *fkeys)
+{
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_fd_rule_tuples new_tuples;
+ struct hclge_dev *hdev = vport->back;
+ struct hclge_fd_rule *rule;
+ u16 tmp_queue_id;
+ u16 bit_id;
+ int ret;
+
+ if (!hnae3_dev_fd_supported(hdev))
+ return -EOPNOTSUPP;
+
+ memset(&new_tuples, 0, sizeof(new_tuples));
+ hclge_fd_get_flow_tuples(fkeys, &new_tuples);
+
+ spin_lock_bh(&hdev->fd_rule_lock);
+
+ /* when there is already fd rule existed add by user,
+ * arfs should not work
+ */
+ if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) {
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
+ return -EOPNOTSUPP;
+ }
+
+ /* check is there flow director filter existed for this flow,
+ * if not, create a new filter for it;
+ * if filter exist with different queue id, modify the filter;
+ * if filter exist with same queue id, do nothing
+ */
+ rule = hclge_fd_search_flow_keys(hdev, &new_tuples);
+ if (!rule) {
+ bit_id = find_first_zero_bit(hdev->fd_bmap, MAX_FD_FILTER_NUM);
+ if (bit_id >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) {
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
+ return -ENOSPC;
+ }
+
+ rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+ if (!rule) {
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
+ return -ENOMEM;
+ }
+
+ set_bit(bit_id, hdev->fd_bmap);
+ rule->location = bit_id;
+ rule->flow_id = flow_id;
+ rule->queue_id = queue_id;
+ hclge_fd_build_arfs_rule(&new_tuples, rule);
+ ret = hclge_fd_config_rule(hdev, rule);
+
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
+ if (ret)
+ return ret;
+
+ return rule->location;
+ }
+
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
+ if (rule->queue_id == queue_id)
+ return rule->location;
+
+ tmp_queue_id = rule->queue_id;
+ rule->queue_id = queue_id;
+ ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
+ if (ret) {
+ rule->queue_id = tmp_queue_id;
+ return ret;
+ }
+
+ return rule->location;
+}
+
+static void hclge_rfs_filter_expire(struct hclge_dev *hdev)
+{
+#ifdef CONFIG_RFS_ACCEL
+ struct hnae3_handle *handle = &hdev->vport[0].nic;
+ struct hclge_fd_rule *rule;
+ struct hlist_node *node;
+ HLIST_HEAD(del_list);
+
+ spin_lock_bh(&hdev->fd_rule_lock);
+ if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) {
+ spin_unlock_bh(&hdev->fd_rule_lock);
+ return;
+ }
+ hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+ if (rps_may_expire_flow(handle->netdev, rule->queue_id,
+ rule->flow_id, rule->location)) {
+ hlist_del_init(&rule->rule_node);
+ hlist_add_head(&rule->rule_node, &del_list);
+ hdev->hclge_fd_rule_num--;
+ clear_bit(rule->location, hdev->fd_bmap);
+ }
+ }
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
+ hlist_for_each_entry_safe(rule, node, &del_list, rule_node) {
+ hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
+ rule->location, NULL, false);
+ kfree(rule);
+ }
+#endif
+}
+
+static void hclge_clear_arfs_rules(struct hnae3_handle *handle)
+{
+#ifdef CONFIG_RFS_ACCEL
+ struct hclge_vport *vport = hclge_get_vport(handle);
+ struct hclge_dev *hdev = vport->back;
+
+ if (hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE)
+ hclge_del_all_fd_entries(handle, true);
+#endif
+}
+
static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle)
{
struct hclge_vport *vport = hclge_get_vport(handle);
@@ -5565,10 +5843,12 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
{
struct hclge_vport *vport = hclge_get_vport(handle);
struct hclge_dev *hdev = vport->back;
+ bool clear;
hdev->fd_en = enable;
+ clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE ? true : false;
if (!enable)
- hclge_del_all_fd_entries(handle, false);
+ hclge_del_all_fd_entries(handle, clear);
else
hclge_restore_fd_entries(handle);
}
@@ -5838,6 +6118,8 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
set_bit(HCLGE_STATE_DOWN, &hdev->state);
+ hclge_clear_arfs_rules(handle);
+
/* If it is not PF reset, the firmware will disable the MAC,
* so it only need to stop phy here.
*/
@@ -7894,6 +8176,52 @@ static void hclge_info_show(struct hclge_dev *hdev)
dev_info(dev, "PF info end.\n");
}
+static int hclge_init_nic_client_instance(struct hnae3_ae_dev *ae_dev,
+ struct hclge_vport *vport)
+{
+ struct hnae3_client *client = vport->nic.client;
+ struct hclge_dev *hdev = ae_dev->priv;
+ int ret;
+
+ ret = client->ops->init_instance(&vport->nic);
+ if (ret)
+ return ret;
+
+ set_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
+ hnae3_set_client_init_flag(client, ae_dev, 1);
+
+ if (netif_msg_drv(&hdev->vport->nic))
+ hclge_info_show(hdev);
+
+ return 0;
+}
+
+static int hclge_init_roce_client_instance(struct hnae3_ae_dev *ae_dev,
+ struct hclge_vport *vport)
+{
+ struct hnae3_client *client = vport->roce.client;
+ struct hclge_dev *hdev = ae_dev->priv;
+ int ret;
+
+ if (!hnae3_dev_roce_supported(hdev) || !hdev->roce_client ||
+ !hdev->nic_client)
+ return 0;
+
+ client = hdev->roce_client;
+ ret = hclge_init_roce_base_info(vport);
+ if (ret)
+ return ret;
+
+ ret = client->ops->init_instance(&vport->roce);
+ if (ret)
+ return ret;
+
+ set_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
+ hnae3_set_client_init_flag(client, ae_dev, 1);
+
+ return 0;
+}
+
static int hclge_init_client_instance(struct hnae3_client *client,
struct hnae3_ae_dev *ae_dev)
{
@@ -7909,30 +8237,13 @@ static int hclge_init_client_instance(struct hnae3_client *client,
hdev->nic_client = client;
vport->nic.client = client;
- ret = client->ops->init_instance(&vport->nic);
+ ret = hclge_init_nic_client_instance(ae_dev, vport);
if (ret)
goto clear_nic;
- hnae3_set_client_init_flag(client, ae_dev, 1);
-
- if (netif_msg_drv(&hdev->vport->nic))
- hclge_info_show(hdev);
-
- if (hdev->roce_client &&
- hnae3_dev_roce_supported(hdev)) {
- struct hnae3_client *rc = hdev->roce_client;
-
- ret = hclge_init_roce_base_info(vport);
- if (ret)
- goto clear_roce;
-
- ret = rc->ops->init_instance(&vport->roce);
- if (ret)
- goto clear_roce;
-
- hnae3_set_client_init_flag(hdev->roce_client,
- ae_dev, 1);
- }
+ ret = hclge_init_roce_client_instance(ae_dev, vport);
+ if (ret)
+ goto clear_roce;
break;
case HNAE3_CLIENT_UNIC:
@@ -7952,17 +8263,9 @@ static int hclge_init_client_instance(struct hnae3_client *client,
vport->roce.client = client;
}
- if (hdev->roce_client && hdev->nic_client) {
- ret = hclge_init_roce_base_info(vport);
- if (ret)
- goto clear_roce;
-
- ret = client->ops->init_instance(&vport->roce);
- if (ret)
- goto clear_roce;
-
- hnae3_set_client_init_flag(client, ae_dev, 1);
- }
+ ret = hclge_init_roce_client_instance(ae_dev, vport);
+ if (ret)
+ goto clear_roce;
break;
default:
@@ -7992,6 +8295,7 @@ static void hclge_uninit_client_instance(struct hnae3_client *client,
for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
vport = &hdev->vport[i];
if (hdev->roce_client) {
+ clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
hdev->roce_client->ops->uninit_instance(&vport->roce,
0);
hdev->roce_client = NULL;
@@ -8000,6 +8304,7 @@ static void hclge_uninit_client_instance(struct hnae3_client *client,
if (client->type == HNAE3_CLIENT_ROCE)
return;
if (hdev->nic_client && client->ops->uninit_instance) {
+ clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
client->ops->uninit_instance(&vport->nic, 0);
hdev->nic_client = NULL;
vport->nic.client = NULL;
@@ -8081,6 +8386,7 @@ static void hclge_state_init(struct hclge_dev *hdev)
static void hclge_state_uninit(struct hclge_dev *hdev)
{
set_bit(HCLGE_STATE_DOWN, &hdev->state);
+ set_bit(HCLGE_STATE_REMOVING, &hdev->state);
if (hdev->service_timer.function)
del_timer_sync(&hdev->service_timer);
@@ -8143,6 +8449,7 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
mutex_init(&hdev->vport_lock);
mutex_init(&hdev->vport_cfg_mutex);
+ spin_lock_init(&hdev->fd_rule_lock);
ret = hclge_pci_init(hdev);
if (ret) {
@@ -8908,6 +9215,7 @@ static const struct hnae3_ae_ops hclge_ops = {
.get_fd_all_rules = hclge_get_all_rules,
.restore_fd_rules = hclge_restore_fd_entries,
.enable_fd = hclge_enable_fd,
+ .add_arfs_entry = hclge_add_fd_entry_by_arfs,
.dbg_run_cmd = hclge_dbg_run_cmd,
.handle_hw_ras_error = hclge_handle_hw_ras_error,
.get_hw_reset_stat = hclge_get_hw_reset_stat,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index dd06b11187b0..2b3bc95ccbdf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -201,6 +201,8 @@ enum HCLGE_DEV_STATE {
HCLGE_STATE_DOWN,
HCLGE_STATE_DISABLED,
HCLGE_STATE_REMOVING,
+ HCLGE_STATE_NIC_REGISTERED,
+ HCLGE_STATE_ROCE_REGISTERED,
HCLGE_STATE_SERVICE_INITED,
HCLGE_STATE_SERVICE_SCHED,
HCLGE_STATE_RST_SERVICE_SCHED,
@@ -578,6 +580,16 @@ static const struct key_info tuple_key_info[] = {
#define MAX_KEY_BYTES (MAX_KEY_DWORDS * 4)
#define MAX_META_DATA_LENGTH 32
+/* assigned by firmware, the real filter number for each pf may be less */
+#define MAX_FD_FILTER_NUM 4096
+#define HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL 5
+
+enum HCLGE_FD_ACTIVE_RULE_TYPE {
+ HCLGE_FD_RULE_NONE,
+ HCLGE_FD_ARFS_ACTIVE,
+ HCLGE_FD_EP_ACTIVE,
+};
+
enum HCLGE_FD_PACKET_TYPE {
NIC_PACKET,
ROCE_PACKET,
@@ -630,6 +642,8 @@ struct hclge_fd_rule {
u16 vf_id;
u16 queue_id;
u16 location;
+ u16 flow_id; /* only used for arfs */
+ enum HCLGE_FD_ACTIVE_RULE_TYPE rule_type;
};
struct hclge_fd_ad_data {
@@ -809,7 +823,11 @@ struct hclge_dev {
struct hclge_fd_cfg fd_cfg;
struct hlist_head fd_rule_list;
+ spinlock_t fd_rule_lock; /* protect fd_rule_list and fd_bmap */
u16 hclge_fd_rule_num;
+ u16 fd_arfs_expire_timer;
+ unsigned long fd_bmap[BITS_TO_LONGS(MAX_FD_FILTER_NUM)];
+ enum HCLGE_FD_ACTIVE_RULE_TYPE fd_active_type;
u8 fd_en;
u16 wanted_umv_size;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 0e04e63f2a94..d20f01720719 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -192,12 +192,10 @@ static int hclge_map_unmap_ring_to_vf_vector(struct hclge_vport *vport, bool en,
return ret;
ret = hclge_bind_ring_with_vector(vport, vector_id, en, &ring_chain);
- if (ret)
- return ret;
hclge_free_vector_ring_chain(&ring_chain);
- return 0;
+ return ret;
}
static int hclge_set_vf_promisc_mode(struct hclge_vport *vport,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index 71f356fc2446..e1588c0e8bb9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -98,7 +98,6 @@ static void hclgevf_cmd_config_regs(struct hclgevf_cmq_ring *ring)
hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_BASEADDR_H_REG, reg_val);
reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
- reg_val |= HCLGEVF_NIC_CMQ_ENABLE;
hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_DEPTH_REG, reg_val);
hclgevf_write_dev(hw, HCLGEVF_NIC_CSQ_HEAD_REG, 0);
@@ -110,7 +109,6 @@ static void hclgevf_cmd_config_regs(struct hclgevf_cmq_ring *ring)
hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_BASEADDR_H_REG, reg_val);
reg_val = (ring->desc_num >> HCLGEVF_NIC_CMQ_DESC_NUM_S);
- reg_val |= HCLGEVF_NIC_CMQ_ENABLE;
hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_DEPTH_REG, reg_val);
hclgevf_write_dev(hw, HCLGEVF_NIC_CRQ_HEAD_REG, 0);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 5d53467ee2d2..87a619db2780 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1306,6 +1306,10 @@ static int hclgevf_notify_client(struct hclgevf_dev *hdev,
struct hnae3_handle *handle = &hdev->nic;
int ret;
+ if (!test_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state) ||
+ !client)
+ return 0;
+
if (!client->ops->reset_notify)
return -EOPNOTSUPP;
@@ -1410,6 +1414,8 @@ static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
{
+#define HCLGEVF_RESET_SYNC_TIME 100
+
int ret = 0;
switch (hdev->reset_type) {
@@ -1427,7 +1433,10 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
}
set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
-
+ /* inform hardware that preparatory work is done */
+ msleep(HCLGEVF_RESET_SYNC_TIME);
+ hclgevf_write_dev(&hdev->hw, HCLGEVF_NIC_CSQ_DEPTH_REG,
+ HCLGEVF_NIC_CMQ_ENABLE);
dev_info(&hdev->pdev->dev, "prepare reset(%d) wait done, ret:%d\n",
hdev->reset_type, ret);
@@ -1612,7 +1621,8 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev)
void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
{
- if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state)) {
+ if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) &&
+ !test_bit(HCLGEVF_STATE_REMOVING, &hdev->state)) {
set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
schedule_work(&hdev->rst_service_task);
}
@@ -2123,6 +2133,7 @@ static void hclgevf_state_init(struct hclgevf_dev *hdev)
static void hclgevf_state_uninit(struct hclgevf_dev *hdev)
{
set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
+ set_bit(HCLGEVF_STATE_REMOVING, &hdev->state);
if (hdev->keep_alive_timer.function)
del_timer_sync(&hdev->keep_alive_timer);
@@ -2249,6 +2260,48 @@ static void hclgevf_info_show(struct hclgevf_dev *hdev)
dev_info(dev, "VF info end.\n");
}
+static int hclgevf_init_nic_client_instance(struct hnae3_ae_dev *ae_dev,
+ struct hnae3_client *client)
+{
+ struct hclgevf_dev *hdev = ae_dev->priv;
+ int ret;
+
+ ret = client->ops->init_instance(&hdev->nic);
+ if (ret)
+ return ret;
+
+ set_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state);
+ hnae3_set_client_init_flag(client, ae_dev, 1);
+
+ if (netif_msg_drv(&hdev->nic))
+ hclgevf_info_show(hdev);
+
+ return 0;
+}
+
+static int hclgevf_init_roce_client_instance(struct hnae3_ae_dev *ae_dev,
+ struct hnae3_client *client)
+{
+ struct hclgevf_dev *hdev = ae_dev->priv;
+ int ret;
+
+ if (!hnae3_dev_roce_supported(hdev) || !hdev->roce_client ||
+ !hdev->nic_client)
+ return 0;
+
+ ret = hclgevf_init_roce_base_info(hdev);
+ if (ret)
+ return ret;
+
+ ret = client->ops->init_instance(&hdev->roce);
+ if (ret)
+ return ret;
+
+ hnae3_set_client_init_flag(client, ae_dev, 1);
+
+ return 0;
+}
+
static int hclgevf_init_client_instance(struct hnae3_client *client,
struct hnae3_ae_dev *ae_dev)
{
@@ -2260,28 +2313,15 @@ static int hclgevf_init_client_instance(struct hnae3_client *client,
hdev->nic_client = client;
hdev->nic.client = client;
- ret = client->ops->init_instance(&hdev->nic);
+ ret = hclgevf_init_nic_client_instance(ae_dev, client);
if (ret)
goto clear_nic;
- hnae3_set_client_init_flag(client, ae_dev, 1);
-
- if (netif_msg_drv(&hdev->nic))
- hclgevf_info_show(hdev);
-
- if (hdev->roce_client && hnae3_dev_roce_supported(hdev)) {
- struct hnae3_client *rc = hdev->roce_client;
-
- ret = hclgevf_init_roce_base_info(hdev);
- if (ret)
- goto clear_roce;
- ret = rc->ops->init_instance(&hdev->roce);
- if (ret)
- goto clear_roce;
+ ret = hclgevf_init_roce_client_instance(ae_dev,
+ hdev->roce_client);
+ if (ret)
+ goto clear_roce;
- hnae3_set_client_init_flag(hdev->roce_client, ae_dev,
- 1);
- }
break;
case HNAE3_CLIENT_UNIC:
hdev->nic_client = client;
@@ -2299,17 +2339,10 @@ static int hclgevf_init_client_instance(struct hnae3_client *client,
hdev->roce.client = client;
}
- if (hdev->roce_client && hdev->nic_client) {
- ret = hclgevf_init_roce_base_info(hdev);
- if (ret)
- goto clear_roce;
-
- ret = client->ops->init_instance(&hdev->roce);
- if (ret)
- goto clear_roce;
- }
+ ret = hclgevf_init_roce_client_instance(ae_dev, client);
+ if (ret)
+ goto clear_roce;
- hnae3_set_client_init_flag(client, ae_dev, 1);
break;
default:
return -EINVAL;
@@ -2342,6 +2375,8 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client,
/* un-init nic/unic, if this was not called by roce client */
if (client->ops->uninit_instance && hdev->nic_client &&
client->type != HNAE3_CLIENT_ROCE) {
+ clear_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state);
+
client->ops->uninit_instance(&hdev->nic, 0);
hdev->nic_client = NULL;
hdev->nic.client = NULL;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index cc52f54f8c08..a7fbd38c1492 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -130,6 +130,8 @@ enum hclgevf_states {
HCLGEVF_STATE_DOWN,
HCLGEVF_STATE_DISABLED,
HCLGEVF_STATE_IRQ_INITED,
+ HCLGEVF_STATE_REMOVING,
+ HCLGEVF_STATE_NIC_REGISTERED,
/* task states */
HCLGEVF_STATE_SERVICE_SCHED,
HCLGEVF_STATE_RST_SERVICE_SCHED,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index e64bc664f687..cfd3f4232cac 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -724,7 +724,6 @@ static void set_rx_mode(struct work_struct *work)
{
struct hinic_rx_mode_work *rx_mode_work = work_to_rx_mode_work(work);
struct hinic_dev *nic_dev = rx_mode_work_to_nic_dev(rx_mode_work);
- struct netdev_hw_addr *ha;
netif_info(nic_dev, drv, nic_dev->netdev, "set rx mode work\n");
@@ -732,9 +731,6 @@ static void set_rx_mode(struct work_struct *work)
__dev_uc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr);
__dev_mc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr);
-
- netdev_for_each_mc_addr(ha, nic_dev->netdev)
- add_mac_addr(nic_dev->netdev, ha->addr);
}
static void hinic_set_rx_mode(struct net_device *netdev)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 0e09bede42a2..b081a1ef6859 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4208,7 +4208,7 @@ void e1000e_up(struct e1000_adapter *adapter)
e1000_configure_msix(adapter);
e1000_irq_enable(adapter);
- netif_start_queue(adapter->netdev);
+ /* Tx queue started by watchdog timer when link is up */
e1000e_trigger_lsc(adapter);
}
@@ -4606,6 +4606,7 @@ int e1000e_open(struct net_device *netdev)
pm_runtime_get_sync(&pdev->dev);
netif_carrier_off(netdev);
+ netif_stop_queue(netdev);
/* allocate transmit descriptors */
err = e1000e_setup_tx_resources(adapter->tx_ring);
@@ -4666,7 +4667,6 @@ int e1000e_open(struct net_device *netdev)
e1000_irq_enable(adapter);
adapter->tx_hang_recheck = false;
- netif_start_queue(netdev);
hw->mac.get_link_status = true;
pm_runtime_put(&pdev->dev);
@@ -5288,6 +5288,7 @@ static void e1000_watchdog_task(struct work_struct *work)
if (phy->ops.cfg_on_link_up)
phy->ops.cfg_on_link_up(hw);
+ netif_wake_queue(netdev);
netif_carrier_on(netdev);
if (!test_bit(__E1000_DOWN, &adapter->state))
@@ -5301,6 +5302,7 @@ static void e1000_watchdog_task(struct work_struct *work)
/* Link status message must follow this format */
pr_info("%s NIC Link is Down\n", adapter->netdev->name);
netif_carrier_off(netdev);
+ netif_stop_queue(netdev);
if (!test_bit(__E1000_DOWN, &adapter->state))
mod_timer(&adapter->phy_info_timer,
round_jiffies(jiffies + 2 * HZ));
@@ -5308,13 +5310,8 @@ static void e1000_watchdog_task(struct work_struct *work)
/* 8000ES2LAN requires a Rx packet buffer work-around
* on link down event; reset the controller to flush
* the Rx packet buffer.
- *
- * If the link is lost the controller stops DMA, but
- * if there is queued Tx work it cannot be done. So
- * reset the controller to flush the Tx packet buffers.
*/
- if ((adapter->flags & FLAG_RX_NEEDS_RESTART) ||
- e1000_desc_unused(tx_ring) + 1 < tx_ring->count)
+ if (adapter->flags & FLAG_RX_NEEDS_RESTART)
adapter->flags |= FLAG_RESTART_NOW;
else
pm_schedule_suspend(netdev->dev.parent,
@@ -5337,6 +5334,14 @@ link_up:
adapter->gotc_old = adapter->stats.gotc;
spin_unlock(&adapter->stats64_lock);
+ /* If the link is lost the controller stops DMA, but
+ * if there is queued Tx work it cannot be done. So
+ * reset the controller to flush the Tx packet buffers.
+ */
+ if (!netif_carrier_ok(netdev) &&
+ (e1000_desc_unused(tx_ring) + 1 < tx_ring->count))
+ adapter->flags |= FLAG_RESTART_NOW;
+
/* If reset is necessary, do it outside of interrupt context. */
if (adapter->flags & FLAG_RESTART_NOW) {
schedule_work(&adapter->reset_task);
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 792e6e42030e..9ee6b55553c0 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -44,15 +44,22 @@
extern const char ice_drv_ver[];
#define ICE_BAR0 0
#define ICE_REQ_DESC_MULTIPLE 32
-#define ICE_MIN_NUM_DESC ICE_REQ_DESC_MULTIPLE
+#define ICE_MIN_NUM_DESC 64
#define ICE_MAX_NUM_DESC 8160
-/* set default number of Rx/Tx descriptors to the minimum between
- * ICE_MAX_NUM_DESC and the number of descriptors to fill up an entire page
+#define ICE_DFLT_MIN_RX_DESC 512
+/* if the default number of Rx descriptors between ICE_MAX_NUM_DESC and the
+ * number of descriptors to fill up an entire page is greater than or equal to
+ * ICE_DFLT_MIN_RX_DESC set it based on page size, otherwise set it to
+ * ICE_DFLT_MIN_RX_DESC
+ */
+#define ICE_DFLT_NUM_RX_DESC \
+ min_t(u16, ICE_MAX_NUM_DESC, \
+ max_t(u16, ALIGN(PAGE_SIZE / sizeof(union ice_32byte_rx_desc), \
+ ICE_REQ_DESC_MULTIPLE), \
+ ICE_DFLT_MIN_RX_DESC))
+/* set default number of Tx descriptors to the minimum between ICE_MAX_NUM_DESC
+ * and the number of descriptors to fill up an entire page
*/
-#define ICE_DFLT_NUM_RX_DESC min_t(u16, ICE_MAX_NUM_DESC, \
- ALIGN(PAGE_SIZE / \
- sizeof(union ice_32byte_rx_desc), \
- ICE_REQ_DESC_MULTIPLE))
#define ICE_DFLT_NUM_TX_DESC min_t(u16, ICE_MAX_NUM_DESC, \
ALIGN(PAGE_SIZE / \
sizeof(struct ice_tx_desc), \
@@ -160,7 +167,7 @@ struct ice_tc_cfg {
struct ice_res_tracker {
u16 num_entries;
- u16 search_hint;
+ u16 end;
u16 list[1];
};
@@ -182,6 +189,7 @@ struct ice_sw {
};
enum ice_state {
+ __ICE_TESTING,
__ICE_DOWN,
__ICE_NEEDS_RESTART,
__ICE_PREPARED_FOR_RESET, /* set by driver when prepared */
@@ -244,8 +252,7 @@ struct ice_vsi {
u32 rx_buf_failed;
u32 rx_page_failed;
int num_q_vectors;
- int sw_base_vector; /* Irq base for OS reserved vectors */
- int hw_base_vector; /* HW (absolute) index of a vector */
+ int base_vector; /* IRQ base for OS reserved vectors */
enum ice_vsi_type type;
u16 vsi_num; /* HW (absolute) index of this VSI */
u16 idx; /* software index in pf->vsi[] */
@@ -277,10 +284,10 @@ struct ice_vsi {
struct list_head tmp_sync_list; /* MAC filters to be synced */
struct list_head tmp_unsync_list; /* MAC filters to be unsynced */
- u8 irqs_ready;
- u8 current_isup; /* Sync 'link up' logging */
- u8 stat_offsets_loaded;
- u8 vlan_ena;
+ u8 irqs_ready:1;
+ u8 current_isup:1; /* Sync 'link up' logging */
+ u8 stat_offsets_loaded:1;
+ u8 vlan_ena:1;
/* queue information */
u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
@@ -330,7 +337,7 @@ enum ice_pf_flags {
ICE_FLAG_DCB_CAPABLE,
ICE_FLAG_DCB_ENA,
ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
- ICE_FLAG_DISABLE_FW_LLDP,
+ ICE_FLAG_ENABLE_FW_LLDP,
ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */
ICE_PF_FLAGS_NBITS /* must be last */
};
@@ -340,10 +347,12 @@ struct ice_pf {
/* OS reserved IRQ details */
struct msix_entry *msix_entries;
- struct ice_res_tracker *sw_irq_tracker;
-
- /* HW reserved Interrupts for this PF */
- struct ice_res_tracker *hw_irq_tracker;
+ struct ice_res_tracker *irq_tracker;
+ /* First MSIX vector used by SR-IOV VFs. Calculated by subtracting the
+ * number of MSIX vectors needed for all SR-IOV VFs from the number of
+ * MSIX vectors allowed on this PF.
+ */
+ u16 sriov_base_vector;
struct ice_vsi **vsi; /* VSIs created by the driver */
struct ice_sw *first_sw; /* first switch created by firmware */
@@ -365,10 +374,8 @@ struct ice_pf {
struct mutex sw_mutex; /* lock for protecting VSI alloc flow */
u32 msg_enable;
u32 hw_csum_rx_error;
- u32 sw_oicr_idx; /* Other interrupt cause SW vector index */
+ u32 oicr_idx; /* Other interrupt cause MSIX vector index */
u32 num_avail_sw_msix; /* remaining MSIX SW vectors left unclaimed */
- u32 hw_oicr_idx; /* Other interrupt cause vector HW index */
- u32 num_avail_hw_msix; /* remaining HW MSIX vectors left unclaimed */
u32 num_lan_msix; /* Total MSIX vectors for base driver */
u16 num_lan_tx; /* num LAN Tx queues setup */
u16 num_lan_rx; /* num LAN Rx queues setup */
@@ -384,7 +391,7 @@ struct ice_pf {
struct ice_hw_port_stats stats;
struct ice_hw_port_stats stats_prev;
struct ice_hw hw;
- u8 stat_prev_loaded; /* has previous stats been loaded */
+ u8 stat_prev_loaded:1; /* has previous stats been loaded */
#ifdef CONFIG_DCB
u16 dcbx_cap;
#endif /* CONFIG_DCB */
@@ -392,6 +399,7 @@ struct ice_pf {
unsigned long tx_timeout_last_recovery;
u32 tx_timeout_recovery_level;
char int_name[ICE_INT_NAME_STR_LEN];
+ u32 sw_int_count;
};
struct ice_netdev_priv {
@@ -409,7 +417,7 @@ ice_irq_dynamic_ena(struct ice_hw *hw, struct ice_vsi *vsi,
struct ice_q_vector *q_vector)
{
u32 vector = (vsi && q_vector) ? q_vector->reg_idx :
- ((struct ice_pf *)hw->back)->hw_oicr_idx;
+ ((struct ice_pf *)hw->back)->oicr_idx;
int itr = ICE_ITR_NONE;
u32 val;
@@ -444,17 +452,22 @@ ice_find_vsi_by_type(struct ice_pf *pf, enum ice_vsi_type type)
return NULL;
}
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
void ice_set_ethtool_ops(struct net_device *netdev);
int ice_up(struct ice_vsi *vsi);
int ice_down(struct ice_vsi *vsi);
+int ice_vsi_cfg(struct ice_vsi *vsi);
+struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi);
int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
-void ice_napi_del(struct ice_vsi *vsi);
#ifdef CONFIG_DCB
int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked);
void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked);
#endif /* CONFIG_DCB */
+int ice_open(struct net_device *netdev);
+int ice_stop(struct net_device *netdev);
#endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 6ef083002f5b..765e3c2ed045 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -35,8 +35,8 @@ struct ice_aqc_get_ver {
/* Queue Shutdown (direct 0x0003) */
struct ice_aqc_q_shutdown {
-#define ICE_AQC_DRIVER_UNLOADING BIT(0)
__le32 driver_unloading;
+#define ICE_AQC_DRIVER_UNLOADING BIT(0)
u8 reserved[12];
};
@@ -120,11 +120,9 @@ struct ice_aqc_manage_mac_read {
#define ICE_AQC_MAN_MAC_WOL_ADDR_VALID BIT(7)
#define ICE_AQC_MAN_MAC_READ_S 4
#define ICE_AQC_MAN_MAC_READ_M (0xF << ICE_AQC_MAN_MAC_READ_S)
- u8 lport_num;
- u8 lport_num_valid;
-#define ICE_AQC_MAN_MAC_PORT_NUM_IS_VALID BIT(0)
+ u8 rsvd[2];
u8 num_addr; /* Used in response */
- u8 reserved[3];
+ u8 rsvd1[3];
__le32 addr_high;
__le32 addr_low;
};
@@ -140,7 +138,7 @@ struct ice_aqc_manage_mac_read_resp {
/* Manage MAC address, write command - direct (0x0108) */
struct ice_aqc_manage_mac_write {
- u8 port_num;
+ u8 rsvd;
u8 flags;
#define ICE_AQC_MAN_MAC_WR_MC_MAG_EN BIT(0)
#define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP BIT(1)
@@ -920,6 +918,8 @@ struct ice_aqc_get_phy_caps_data {
#define ICE_AQC_PHY_EN_LINK BIT(3)
#define ICE_AQC_PHY_AN_MODE BIT(4)
#define ICE_AQC_GET_PHY_EN_MOD_QUAL BIT(5)
+#define ICE_AQC_PHY_EN_AUTO_FEC BIT(7)
+#define ICE_AQC_PHY_CAPS_MASK ICE_M(0xff, 0)
u8 low_power_ctrl;
#define ICE_AQC_PHY_EN_D3COLD_LOW_POWER_AUTONEG BIT(0)
__le16 eee_cap;
@@ -932,6 +932,7 @@ struct ice_aqc_get_phy_caps_data {
#define ICE_AQC_PHY_EEE_EN_40GBASE_KR4 BIT(6)
__le16 eeer_value;
u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */
+ u8 phy_fw_ver[8];
u8 link_fec_options;
#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN BIT(0)
#define ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ BIT(1)
@@ -940,6 +941,8 @@ struct ice_aqc_get_phy_caps_data {
#define ICE_AQC_PHY_FEC_25G_RS_544_REQ BIT(4)
#define ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN BIT(6)
#define ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN BIT(7)
+#define ICE_AQC_PHY_FEC_MASK ICE_M(0xdf, 0)
+ u8 rsvd1; /* Byte 35 reserved */
u8 extended_compliance_code;
#define ICE_MODULE_TYPE_TOTAL_BYTE 3
u8 module_type[ICE_MODULE_TYPE_TOTAL_BYTE];
@@ -954,13 +957,14 @@ struct ice_aqc_get_phy_caps_data {
#define ICE_AQC_MOD_TYPE_BYTE2_SFP_PLUS 0xA0
#define ICE_AQC_MOD_TYPE_BYTE2_QSFP_PLUS 0x86
u8 qualified_module_count;
+ u8 rsvd2[7]; /* Bytes 47:41 reserved */
#define ICE_AQC_QUAL_MOD_COUNT_MAX 16
struct {
u8 v_oui[3];
- u8 rsvd1;
+ u8 rsvd3;
u8 v_part[16];
__le32 v_rev;
- __le64 rsvd8;
+ __le64 rsvd4;
} qual_modules[ICE_AQC_QUAL_MOD_COUNT_MAX];
};
@@ -1062,6 +1066,7 @@ struct ice_aqc_get_link_status_data {
#define ICE_AQ_LINK_25G_KR_FEC_EN BIT(0)
#define ICE_AQ_LINK_25G_RS_528_FEC_EN BIT(1)
#define ICE_AQ_LINK_25G_RS_544_FEC_EN BIT(2)
+#define ICE_AQ_FEC_MASK ICE_M(0x7, 0)
/* Pacing Config */
#define ICE_AQ_CFG_PACING_S 3
#define ICE_AQ_CFG_PACING_M (0xF << ICE_AQ_CFG_PACING_S)
@@ -1112,6 +1117,14 @@ struct ice_aqc_set_event_mask {
u8 reserved1[6];
};
+/* Set MAC Loopback command (direct 0x0620) */
+struct ice_aqc_set_mac_lb {
+ u8 lb_mode;
+#define ICE_AQ_MAC_LB_EN BIT(0)
+#define ICE_AQ_MAC_LB_OSC_CLK BIT(1)
+ u8 reserved[15];
+};
+
/* Set Port Identification LED (direct, 0x06E9) */
struct ice_aqc_set_port_id_led {
u8 lport_num;
@@ -1145,6 +1158,17 @@ struct ice_aqc_nvm {
__le32 addr_low;
};
+/* NVM Checksum Command (direct, 0x0706) */
+struct ice_aqc_nvm_checksum {
+ u8 flags;
+#define ICE_AQC_NVM_CHECKSUM_VERIFY BIT(0)
+#define ICE_AQC_NVM_CHECKSUM_RECALC BIT(1)
+ u8 rsvd;
+ __le16 checksum; /* Used only by response */
+#define ICE_AQC_NVM_CHECKSUM_CORRECT 0xBABA
+ u8 rsvd2[12];
+};
+
/**
* Send to PF command (indirect 0x0801) ID is only used by PF
*
@@ -1249,7 +1273,7 @@ struct ice_aqc_get_cee_dcb_cfg_resp {
};
/* Set Local LLDP MIB (indirect 0x0A08)
- * Used to replace the local MIB of a given LLDP agent. e.g. DCBx
+ * Used to replace the local MIB of a given LLDP agent. e.g. DCBX
*/
struct ice_aqc_lldp_set_local_mib {
u8 type;
@@ -1266,7 +1290,7 @@ struct ice_aqc_lldp_set_local_mib {
};
/* Stop/Start LLDP Agent (direct 0x0A09)
- * Used for stopping/starting specific LLDP agent. e.g. DCBx.
+ * Used for stopping/starting specific LLDP agent. e.g. DCBX.
* The same structure is used for the response, with the command field
* being used as the status field.
*/
@@ -1539,6 +1563,7 @@ struct ice_aq_desc {
struct ice_aqc_query_txsched_res query_sched_res;
struct ice_aqc_query_port_ets port_ets;
struct ice_aqc_nvm nvm;
+ struct ice_aqc_nvm_checksum nvm_checksum;
struct ice_aqc_pf_vf_msg virt;
struct ice_aqc_lldp_get_mib lldp_get_mib;
struct ice_aqc_lldp_set_mib_change lldp_set_event;
@@ -1554,6 +1579,7 @@ struct ice_aq_desc {
struct ice_aqc_add_update_free_vsi_resp add_update_free_vsi_res;
struct ice_aqc_fw_logging fw_logging;
struct ice_aqc_get_clear_fw_log get_clear_fw_log;
+ struct ice_aqc_set_mac_lb set_mac_lb;
struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
struct ice_aqc_set_event_mask set_event_mask;
struct ice_aqc_get_link_status get_link_status;
@@ -1642,10 +1668,12 @@ enum ice_adminq_opc {
ice_aqc_opc_restart_an = 0x0605,
ice_aqc_opc_get_link_status = 0x0607,
ice_aqc_opc_set_event_mask = 0x0613,
+ ice_aqc_opc_set_mac_lb = 0x0620,
ice_aqc_opc_set_port_id_led = 0x06E9,
/* NVM commands */
ice_aqc_opc_nvm_read = 0x0701,
+ ice_aqc_opc_nvm_checksum = 0x0706,
/* PF/VF mailbox commands */
ice_mbx_opc_send_msg_to_pf = 0x0801,
@@ -1671,6 +1699,7 @@ enum ice_adminq_opc {
/* debug commands */
ice_aqc_opc_fw_logging = 0xFF09,
+ ice_aqc_opc_fw_logging_info = 0xFF10,
};
#endif /* _ICE_ADMINQ_CMD_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index da7878529929..2e0731c1e1a3 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -51,9 +51,6 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw)
*/
void ice_dev_onetime_setup(struct ice_hw *hw)
{
- /* configure Rx - set non pxe mode */
- wr32(hw, GLLAN_RCTL_0, 0x1);
-
#define MBX_PF_VT_PFALLOC 0x00231E80
/* set VFs per PF */
wr32(hw, MBX_PF_VT_PFALLOC, rd32(hw, PF_VT_PFALLOC_HIF));
@@ -307,6 +304,8 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
hw_link_info->an_info = link_data.an_info;
hw_link_info->ext_info = link_data.ext_info;
hw_link_info->max_frame_size = le16_to_cpu(link_data.max_frame_size);
+ hw_link_info->fec_info = link_data.cfg & ICE_AQ_FEC_MASK;
+ hw_link_info->topo_media_conflict = link_data.topo_media_conflict;
hw_link_info->pacing = link_data.cfg & ICE_AQ_CFG_PACING_M;
/* update fc info */
@@ -476,6 +475,49 @@ static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw)
ICE_FW_LOG_DESC_SIZE(ICE_AQC_FW_LOG_ID_MAX)
/**
+ * ice_get_fw_log_cfg - get FW logging configuration
+ * @hw: pointer to the HW struct
+ */
+static enum ice_status ice_get_fw_log_cfg(struct ice_hw *hw)
+{
+ struct ice_aqc_fw_logging_data *config;
+ struct ice_aq_desc desc;
+ enum ice_status status;
+ u16 size;
+
+ size = ICE_FW_LOG_DESC_SIZE_MAX;
+ config = devm_kzalloc(ice_hw_to_dev(hw), size, GFP_KERNEL);
+ if (!config)
+ return ICE_ERR_NO_MEMORY;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging_info);
+
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_BUF);
+ desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD);
+
+ status = ice_aq_send_cmd(hw, &desc, config, size, NULL);
+ if (!status) {
+ u16 i;
+
+ /* Save FW logging information into the HW structure */
+ for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) {
+ u16 v, m, flgs;
+
+ v = le16_to_cpu(config->entry[i]);
+ m = (v & ICE_AQC_FW_LOG_ID_M) >> ICE_AQC_FW_LOG_ID_S;
+ flgs = (v & ICE_AQC_FW_LOG_EN_M) >> ICE_AQC_FW_LOG_EN_S;
+
+ if (m < ICE_AQC_FW_LOG_ID_MAX)
+ hw->fw_log.evnts[m].cur = flgs;
+ }
+ }
+
+ devm_kfree(ice_hw_to_dev(hw), config);
+
+ return status;
+}
+
+/**
* ice_cfg_fw_log - configure FW logging
* @hw: pointer to the HW struct
* @enable: enable certain FW logging events if true, disable all if false
@@ -529,6 +571,11 @@ static enum ice_status ice_cfg_fw_log(struct ice_hw *hw, bool enable)
(!hw->fw_log.actv_evnts || !ice_check_sq_alive(hw, &hw->adminq)))
return 0;
+ /* Get current FW log settings */
+ status = ice_get_fw_log_cfg(hw);
+ if (status)
+ return status;
+
ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logging);
cmd = &desc.params.fw_logging;
@@ -634,17 +681,17 @@ out:
*/
void ice_output_fw_log(struct ice_hw *hw, struct ice_aq_desc *desc, void *buf)
{
- ice_debug(hw, ICE_DBG_AQ_MSG, "[ FW Log Msg Start ]\n");
- ice_debug_array(hw, ICE_DBG_AQ_MSG, 16, 1, (u8 *)buf,
+ ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg Start ]\n");
+ ice_debug_array(hw, ICE_DBG_FW_LOG, 16, 1, (u8 *)buf,
le16_to_cpu(desc->datalen));
- ice_debug(hw, ICE_DBG_AQ_MSG, "[ FW Log Msg End ]\n");
+ ice_debug(hw, ICE_DBG_FW_LOG, "[ FW Log Msg End ]\n");
}
/**
* ice_get_itr_intrl_gran - determine int/intrl granularity
* @hw: pointer to the HW struct
*
- * Determines the itr/intrl granularities based on the maximum aggregate
+ * Determines the ITR/intrl granularities based on the maximum aggregate
* bandwidth according to the device's configuration during power-on.
*/
static void ice_get_itr_intrl_gran(struct ice_hw *hw)
@@ -815,6 +862,10 @@ err_unroll_cqinit:
/**
* ice_deinit_hw - unroll initialization operations done by ice_init_hw
* @hw: pointer to the hardware structure
+ *
+ * This should be called only during nominal operation, not as a result of
+ * ice_init_hw() failing since ice_init_hw() will take care of unrolling
+ * applicable initializations if it fails for any reason.
*/
void ice_deinit_hw(struct ice_hw *hw)
{
@@ -1447,6 +1498,7 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
struct ice_hw_func_caps *func_p = NULL;
struct ice_hw_dev_caps *dev_p = NULL;
struct ice_hw_common_caps *caps;
+ char const *prefix;
u32 i;
if (!buf)
@@ -1457,9 +1509,11 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
if (opc == ice_aqc_opc_list_dev_caps) {
dev_p = &hw->dev_caps;
caps = &dev_p->common_cap;
+ prefix = "dev cap";
} else if (opc == ice_aqc_opc_list_func_caps) {
func_p = &hw->func_caps;
caps = &func_p->common_cap;
+ prefix = "func cap";
} else {
ice_debug(hw, ICE_DBG_INIT, "wrong opcode\n");
return;
@@ -1475,28 +1529,29 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
case ICE_AQC_CAPS_VALID_FUNCTIONS:
caps->valid_functions = number;
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: Valid Functions = %d\n",
+ "%s: valid functions = %d\n", prefix,
caps->valid_functions);
break;
case ICE_AQC_CAPS_SRIOV:
caps->sr_iov_1_1 = (number == 1);
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: SR-IOV = %d\n", caps->sr_iov_1_1);
+ "%s: SR-IOV = %d\n", prefix,
+ caps->sr_iov_1_1);
break;
case ICE_AQC_CAPS_VF:
if (dev_p) {
dev_p->num_vfs_exposed = number;
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: VFs exposed = %d\n",
+ "%s: VFs exposed = %d\n", prefix,
dev_p->num_vfs_exposed);
} else if (func_p) {
func_p->num_allocd_vfs = number;
func_p->vf_base_id = logical_id;
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: VFs allocated = %d\n",
+ "%s: VFs allocated = %d\n", prefix,
func_p->num_allocd_vfs);
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: VF base_id = %d\n",
+ "%s: VF base_id = %d\n", prefix,
func_p->vf_base_id);
}
break;
@@ -1504,69 +1559,69 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
if (dev_p) {
dev_p->num_vsi_allocd_to_host = number;
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: Dev.VSI cnt = %d\n",
+ "%s: num VSI alloc to host = %d\n",
+ prefix,
dev_p->num_vsi_allocd_to_host);
} else if (func_p) {
func_p->guar_num_vsi =
ice_get_num_per_func(hw, ICE_MAX_VSI);
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: Func.VSI cnt = %d\n",
- number);
+ "%s: num guaranteed VSI (fw) = %d\n",
+ prefix, number);
+ ice_debug(hw, ICE_DBG_INIT,
+ "%s: num guaranteed VSI = %d\n",
+ prefix, func_p->guar_num_vsi);
}
break;
case ICE_AQC_CAPS_RSS:
caps->rss_table_size = number;
caps->rss_table_entry_width = logical_id;
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: RSS table size = %d\n",
+ "%s: RSS table size = %d\n", prefix,
caps->rss_table_size);
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: RSS table width = %d\n",
+ "%s: RSS table width = %d\n", prefix,
caps->rss_table_entry_width);
break;
case ICE_AQC_CAPS_RXQS:
caps->num_rxq = number;
caps->rxq_first_id = phys_id;
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: Num Rx Qs = %d\n", caps->num_rxq);
+ "%s: num Rx queues = %d\n", prefix,
+ caps->num_rxq);
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: Rx first queue ID = %d\n",
+ "%s: Rx first queue ID = %d\n", prefix,
caps->rxq_first_id);
break;
case ICE_AQC_CAPS_TXQS:
caps->num_txq = number;
caps->txq_first_id = phys_id;
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: Num Tx Qs = %d\n", caps->num_txq);
+ "%s: num Tx queues = %d\n", prefix,
+ caps->num_txq);
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: Tx first queue ID = %d\n",
+ "%s: Tx first queue ID = %d\n", prefix,
caps->txq_first_id);
break;
case ICE_AQC_CAPS_MSIX:
caps->num_msix_vectors = number;
caps->msix_vector_first_id = phys_id;
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: MSIX vector count = %d\n",
+ "%s: MSIX vector count = %d\n", prefix,
caps->num_msix_vectors);
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: MSIX first vector index = %d\n",
+ "%s: MSIX first vector index = %d\n", prefix,
caps->msix_vector_first_id);
break;
case ICE_AQC_CAPS_MAX_MTU:
caps->max_mtu = number;
- if (dev_p)
- ice_debug(hw, ICE_DBG_INIT,
- "HW caps: Dev.MaxMTU = %d\n",
- caps->max_mtu);
- else if (func_p)
- ice_debug(hw, ICE_DBG_INIT,
- "HW caps: func.MaxMTU = %d\n",
- caps->max_mtu);
+ ice_debug(hw, ICE_DBG_INIT, "%s: max MTU = %d\n",
+ prefix, caps->max_mtu);
break;
default:
ice_debug(hw, ICE_DBG_INIT,
- "HW caps: Unknown capability[%d]: 0x%x\n", i,
- cap);
+ "%s: unknown capability[%d]: 0x%x\n", prefix,
+ i, cap);
break;
}
}
@@ -1947,36 +2002,37 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
*/
enum ice_status ice_update_link_info(struct ice_port_info *pi)
{
- struct ice_aqc_get_phy_caps_data *pcaps;
- struct ice_phy_info *phy_info;
+ struct ice_link_status *li;
enum ice_status status;
- struct ice_hw *hw;
if (!pi)
return ICE_ERR_PARAM;
- hw = pi->hw;
-
- pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps), GFP_KERNEL);
- if (!pcaps)
- return ICE_ERR_NO_MEMORY;
+ li = &pi->phy.link_info;
- phy_info = &pi->phy;
status = ice_aq_get_link_info(pi, true, NULL, NULL);
if (status)
- goto out;
+ return status;
+
+ if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) {
+ struct ice_aqc_get_phy_caps_data *pcaps;
+ struct ice_hw *hw;
+
+ hw = pi->hw;
+ pcaps = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*pcaps),
+ GFP_KERNEL);
+ if (!pcaps)
+ return ICE_ERR_NO_MEMORY;
- if (phy_info->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG,
pcaps, NULL);
- if (status)
- goto out;
+ if (!status)
+ memcpy(li->module_type, &pcaps->module_type,
+ sizeof(li->module_type));
- memcpy(phy_info->link_info.module_type, &pcaps->module_type,
- sizeof(phy_info->link_info.module_type));
+ devm_kfree(ice_hw_to_dev(hw), pcaps);
}
-out:
- devm_kfree(ice_hw_to_dev(hw), pcaps);
+
return status;
}
@@ -2081,6 +2137,74 @@ out:
}
/**
+ * ice_copy_phy_caps_to_cfg - Copy PHY ability data to configuration data
+ * @caps: PHY ability structure to copy date from
+ * @cfg: PHY configuration structure to copy data to
+ *
+ * Helper function to copy AQC PHY get ability data to PHY set configuration
+ * data structure
+ */
+void
+ice_copy_phy_caps_to_cfg(struct ice_aqc_get_phy_caps_data *caps,
+ struct ice_aqc_set_phy_cfg_data *cfg)
+{
+ if (!caps || !cfg)
+ return;
+
+ cfg->phy_type_low = caps->phy_type_low;
+ cfg->phy_type_high = caps->phy_type_high;
+ cfg->caps = caps->caps;
+ cfg->low_power_ctrl = caps->low_power_ctrl;
+ cfg->eee_cap = caps->eee_cap;
+ cfg->eeer_value = caps->eeer_value;
+ cfg->link_fec_opt = caps->link_fec_options;
+}
+
+/**
+ * ice_cfg_phy_fec - Configure PHY FEC data based on FEC mode
+ * @cfg: PHY configuration data to set FEC mode
+ * @fec: FEC mode to configure
+ *
+ * Caller should copy ice_aqc_get_phy_caps_data.caps ICE_AQC_PHY_EN_AUTO_FEC
+ * (bit 7) and ice_aqc_get_phy_caps_data.link_fec_options to cfg.caps
+ * ICE_AQ_PHY_ENA_AUTO_FEC (bit 7) and cfg.link_fec_options before calling.
+ */
+void
+ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec)
+{
+ switch (fec) {
+ case ICE_FEC_BASER:
+ /* Clear auto FEC and RS bits, and AND BASE-R ability
+ * bits and OR request bits.
+ */
+ cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
+ cfg->link_fec_opt &= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN |
+ ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN;
+ cfg->link_fec_opt |= ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ |
+ ICE_AQC_PHY_FEC_25G_KR_REQ;
+ break;
+ case ICE_FEC_RS:
+ /* Clear auto FEC and BASE-R bits, and AND RS ability
+ * bits and OR request bits.
+ */
+ cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
+ cfg->link_fec_opt &= ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN;
+ cfg->link_fec_opt |= ICE_AQC_PHY_FEC_25G_RS_528_REQ |
+ ICE_AQC_PHY_FEC_25G_RS_544_REQ;
+ break;
+ case ICE_FEC_NONE:
+ /* Clear auto FEC and all FEC option bits. */
+ cfg->caps &= ~ICE_AQC_PHY_EN_AUTO_FEC;
+ cfg->link_fec_opt &= ~ICE_AQC_PHY_FEC_MASK;
+ break;
+ case ICE_FEC_AUTO:
+ /* AND auto FEC bit, and all caps bits. */
+ cfg->caps &= ICE_AQC_PHY_CAPS_MASK;
+ break;
+ }
+}
+
+/**
* ice_get_link_status - get status of the HW network link
* @pi: port information structure
* @link_up: pointer to bool (true/false = linkup/linkdown)
@@ -2169,6 +2293,29 @@ ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
}
/**
+ * ice_aq_set_mac_loopback
+ * @hw: pointer to the HW struct
+ * @ena_lpbk: Enable or Disable loopback
+ * @cd: pointer to command details structure or NULL
+ *
+ * Enable/disable loopback on a given port
+ */
+enum ice_status
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd)
+{
+ struct ice_aqc_set_mac_lb *cmd;
+ struct ice_aq_desc desc;
+
+ cmd = &desc.params.set_mac_lb;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_mac_lb);
+ if (ena_lpbk)
+ cmd->lb_mode = ICE_AQ_MAC_LB_EN;
+
+ return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
+/**
* ice_aq_set_port_id_led
* @pi: pointer to the port information
* @is_orig_mode: is this LED set to original mode (by the net-list)
@@ -2552,7 +2699,7 @@ do_aq:
ice_debug(hw, ICE_DBG_SCHED, "VM%d disable failed %d\n",
vmvf_num, hw->adminq.sq_last_status);
else
- ice_debug(hw, ICE_DBG_SCHED, "disable Q %d failed %d\n",
+ ice_debug(hw, ICE_DBG_SCHED, "disable queue %d failed %d\n",
le16_to_cpu(qg_list[0].q_id[0]),
hw->adminq.sq_last_status);
}
@@ -2924,7 +3071,6 @@ ice_dis_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u8 num_queues,
if (!pi || pi->port_state != ICE_SCHED_PORT_STATE_READY)
return ICE_ERR_CFG;
-
if (!num_queues) {
/* if queue is disabled already yet the disable queue command
* has to be sent to complete the VF reset, then call
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index f1ddebf45231..d1f8353fe6bb 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -9,6 +9,8 @@
#include "ice_switch.h"
#include <linux/avf/virtchnl.h>
+enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw);
+
void
ice_debug_cq(struct ice_hw *hw, u32 mask, void *desc, void *buf, u16 buf_len);
enum ice_status ice_init_hw(struct ice_hw *hw);
@@ -84,7 +86,11 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
enum ice_status
ice_set_fc(struct ice_port_info *pi, u8 *aq_failures,
bool ena_auto_link_update);
-
+void
+ice_cfg_phy_fec(struct ice_aqc_set_phy_cfg_data *cfg, enum ice_fec_mode fec);
+void
+ice_copy_phy_caps_to_cfg(struct ice_aqc_get_phy_caps_data *caps,
+ struct ice_aqc_set_phy_cfg_data *cfg);
enum ice_status
ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
struct ice_sq_cd *cd);
@@ -95,6 +101,9 @@ enum ice_status
ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
struct ice_sq_cd *cd);
enum ice_status
+ice_aq_set_mac_loopback(struct ice_hw *hw, bool ena_lpbk, struct ice_sq_cd *cd);
+
+enum ice_status
ice_aq_set_port_id_led(struct ice_port_info *pi, bool is_orig_mode,
struct ice_sq_cd *cd);
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index cc8cb5fdcdc1..e91ac4df0242 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -439,7 +439,7 @@ do { \
/* free the buffer info list */ \
if ((qi)->ring.cmd_buf) \
devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf); \
- /* free dma head */ \
+ /* free DMA head */ \
devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head); \
} while (0)
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index e0585394d984..44945c2165d8 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -35,7 +35,7 @@ enum ice_ctl_q {
#define ICE_CTL_Q_SQ_CMD_TIMEOUT 250 /* msecs */
struct ice_ctl_q_ring {
- void *dma_head; /* Virtual address to dma head */
+ void *dma_head; /* Virtual address to DMA head */
struct ice_dma_mem desc_buf; /* descriptor ring memory */
void *cmd_buf; /* command buffer memory */
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c
index 8bbf48e04a1c..c2002ded65f6 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@ -82,12 +82,14 @@ ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
* @hw: pointer to the HW struct
* @shutdown_lldp_agent: True if LLDP Agent needs to be Shutdown
* False if LLDP Agent needs to be Stopped
+ * @persist: True if Stop/Shutdown of LLDP Agent needs to be persistent across
+ * reboots
* @cd: pointer to command details structure or NULL
*
* Stop or Shutdown the embedded LLDP Agent (0x0A05)
*/
enum ice_status
-ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent,
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
struct ice_sq_cd *cd)
{
struct ice_aqc_lldp_stop *cmd;
@@ -100,17 +102,22 @@ ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent,
if (shutdown_lldp_agent)
cmd->command |= ICE_AQ_LLDP_AGENT_SHUTDOWN;
+ if (persist)
+ cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_DIS;
+
return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
}
/**
* ice_aq_start_lldp
* @hw: pointer to the HW struct
+ * @persist: True if Start of LLDP Agent needs to be persistent across reboots
* @cd: pointer to command details structure or NULL
*
* Start the embedded LLDP Agent on all ports. (0x0A06)
*/
-enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd)
+enum ice_status
+ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd)
{
struct ice_aqc_lldp_start *cmd;
struct ice_aq_desc desc;
@@ -121,6 +128,9 @@ enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd)
cmd->command = ICE_AQ_LLDP_AGENT_START;
+ if (persist)
+ cmd->command |= ICE_AQ_LLDP_AGENT_PERSIST_ENA;
+
return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
}
@@ -163,7 +173,7 @@ ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size,
*
* Get the DCBX status from the Firmware
*/
-u8 ice_get_dcbx_status(struct ice_hw *hw)
+static u8 ice_get_dcbx_status(struct ice_hw *hw)
{
u32 reg;
@@ -614,7 +624,8 @@ ice_parse_org_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg)
*
* Parse DCB configuration from the LLDPDU
*/
-enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
+static enum ice_status
+ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
{
struct ice_lldp_org_tlv *tlv;
enum ice_status ret = 0;
@@ -658,13 +669,13 @@ enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg)
/**
* ice_aq_get_dcb_cfg
* @hw: pointer to the HW struct
- * @mib_type: mib type for the query
+ * @mib_type: MIB type for the query
* @bridgetype: bridge type for the query (remote)
* @dcbcfg: store for LLDPDU data
*
* Query DCB configuration from the firmware
*/
-static enum ice_status
+enum ice_status
ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
struct ice_dcbx_cfg *dcbcfg)
{
@@ -689,13 +700,13 @@ ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
}
/**
- * ice_aq_start_stop_dcbx - Start/Stop DCBx service in FW
+ * ice_aq_start_stop_dcbx - Start/Stop DCBX service in FW
* @hw: pointer to the HW struct
- * @start_dcbx_agent: True if DCBx Agent needs to be started
- * False if DCBx Agent needs to be stopped
- * @dcbx_agent_status: FW indicates back the DCBx agent status
- * True if DCBx Agent is active
- * False if DCBx Agent is stopped
+ * @start_dcbx_agent: True if DCBX Agent needs to be started
+ * False if DCBX Agent needs to be stopped
+ * @dcbx_agent_status: FW indicates back the DCBX agent status
+ * True if DCBX Agent is active
+ * False if DCBX Agent is stopped
* @cd: pointer to command details structure or NULL
*
* Start/Stop the embedded dcbx Agent. In case that this wrapper function
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.h b/drivers/net/ethernet/intel/ice/ice_dcb.h
index e7d4416e3a66..522e1452abe2 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.h
@@ -120,8 +120,9 @@ struct ice_cee_app_prio {
u8 prio_map;
} __packed;
-u8 ice_get_dcbx_status(struct ice_hw *hw);
-enum ice_status ice_lldp_to_dcb_cfg(u8 *lldpmib, struct ice_dcbx_cfg *dcbcfg);
+enum ice_status
+ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype,
+ struct ice_dcbx_cfg *dcbcfg);
enum ice_status ice_get_dcb_cfg(struct ice_port_info *pi);
enum ice_status ice_set_dcb_cfg(struct ice_port_info *pi);
enum ice_status ice_init_dcb(struct ice_hw *hw);
@@ -131,9 +132,10 @@ ice_query_port_ets(struct ice_port_info *pi,
struct ice_sq_cd *cmd_details);
#ifdef CONFIG_DCB
enum ice_status
-ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent,
+ice_aq_stop_lldp(struct ice_hw *hw, bool shutdown_lldp_agent, bool persist,
struct ice_sq_cd *cd);
-enum ice_status ice_aq_start_lldp(struct ice_hw *hw, struct ice_sq_cd *cd);
+enum ice_status
+ice_aq_start_lldp(struct ice_hw *hw, bool persist, struct ice_sq_cd *cd);
enum ice_status
ice_aq_start_stop_dcbx(struct ice_hw *hw, bool start_dcbx_agent,
bool *dcbx_agent_status, struct ice_sq_cd *cd);
@@ -144,6 +146,7 @@ ice_aq_cfg_lldp_mib_change(struct ice_hw *hw, bool ena_update,
static inline enum ice_status
ice_aq_stop_lldp(struct ice_hw __always_unused *hw,
bool __always_unused shutdown_lldp_agent,
+ bool __always_unused persist,
struct ice_sq_cd __always_unused *cd)
{
return 0;
@@ -151,6 +154,7 @@ ice_aq_stop_lldp(struct ice_hw __always_unused *hw,
static inline enum ice_status
ice_aq_start_lldp(struct ice_hw __always_unused *hw,
+ bool __always_unused persist,
struct ice_sq_cd __always_unused *cd)
{
return 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 3e81af1884fc..fe88b127ca42 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -120,12 +120,14 @@ static void ice_pf_dcb_recfg(struct ice_pf *pf)
tc_map = ICE_DFLT_TRAFFIC_CLASS;
ret = ice_vsi_cfg_tc(pf->vsi[v], tc_map);
- if (ret)
+ if (ret) {
dev_err(&pf->pdev->dev,
"Failed to config TC for VSI index: %d\n",
pf->vsi[v]->idx);
- else
- ice_vsi_map_rings_to_vectors(pf->vsi[v]);
+ continue;
+ }
+
+ ice_vsi_map_rings_to_vectors(pf->vsi[v]);
}
}
@@ -133,8 +135,10 @@ static void ice_pf_dcb_recfg(struct ice_pf *pf)
* ice_pf_dcb_cfg - Apply new DCB configuration
* @pf: pointer to the PF struct
* @new_cfg: DCBX config to apply
+ * @locked: is the RTNL held
*/
-static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg)
+static
+int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked)
{
struct ice_dcbx_cfg *old_cfg, *curr_cfg;
struct ice_aqc_port_ets_elem buf = { 0 };
@@ -163,7 +167,8 @@ static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg)
/* avoid race conditions by holding the lock while disabling and
* re-enabling the VSI
*/
- rtnl_lock();
+ if (!locked)
+ rtnl_lock();
ice_pf_dis_all_vsi(pf, true);
memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg));
@@ -192,7 +197,8 @@ static int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg)
out:
ice_pf_ena_all_vsi(pf, true);
- rtnl_unlock();
+ if (!locked)
+ rtnl_unlock();
devm_kfree(&pf->pdev->dev, old_cfg);
return ret;
}
@@ -271,15 +277,16 @@ dcb_error:
prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec));
- ice_pf_dcb_cfg(pf, prev_cfg);
+ ice_pf_dcb_cfg(pf, prev_cfg, false);
devm_kfree(&pf->pdev->dev, prev_cfg);
}
/**
* ice_dcb_init_cfg - set the initial DCB config in SW
- * @pf: pf to apply config to
+ * @pf: PF to apply config to
+ * @locked: Is the RTNL held
*/
-static int ice_dcb_init_cfg(struct ice_pf *pf)
+static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked)
{
struct ice_dcbx_cfg *newcfg;
struct ice_port_info *pi;
@@ -294,7 +301,7 @@ static int ice_dcb_init_cfg(struct ice_pf *pf)
memset(&pi->local_dcbx_cfg, 0, sizeof(*newcfg));
dev_info(&pf->pdev->dev, "Configuring initial DCB values\n");
- if (ice_pf_dcb_cfg(pf, newcfg))
+ if (ice_pf_dcb_cfg(pf, newcfg, locked))
ret = -EINVAL;
devm_kfree(&pf->pdev->dev, newcfg);
@@ -304,9 +311,10 @@ static int ice_dcb_init_cfg(struct ice_pf *pf)
/**
* ice_dcb_sw_default_config - Apply a default DCB config
- * @pf: pf to apply config to
+ * @pf: PF to apply config to
+ * @locked: was this function called with RTNL held
*/
-static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf)
+static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool locked)
{
struct ice_aqc_port_ets_elem buf = { 0 };
struct ice_dcbx_cfg *dcbcfg;
@@ -338,7 +346,7 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf)
dcbcfg->app[0].priority = 3;
dcbcfg->app[0].prot_id = ICE_APP_PROT_ID_FCOE;
- ret = ice_pf_dcb_cfg(pf, dcbcfg);
+ ret = ice_pf_dcb_cfg(pf, dcbcfg, locked);
devm_kfree(&pf->pdev->dev, dcbcfg);
if (ret)
return ret;
@@ -348,9 +356,10 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf)
/**
* ice_init_pf_dcb - initialize DCB for a PF
- * @pf: pf to initiialize DCB for
+ * @pf: PF to initialize DCB for
+ * @locked: Was function called with RTNL held
*/
-int ice_init_pf_dcb(struct ice_pf *pf)
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked)
{
struct device *dev = &pf->pdev->dev;
struct ice_port_info *port_info;
@@ -360,33 +369,10 @@ int ice_init_pf_dcb(struct ice_pf *pf)
port_info = hw->port_info;
- /* check if device is DCB capable */
- if (!hw->func_caps.common_cap.dcb) {
- dev_dbg(dev, "DCB not supported\n");
- return -EOPNOTSUPP;
- }
-
- /* Best effort to put DCBx and LLDP into a good state */
- port_info->dcbx_status = ice_get_dcbx_status(hw);
- if (port_info->dcbx_status != ICE_DCBX_STATUS_DONE &&
- port_info->dcbx_status != ICE_DCBX_STATUS_IN_PROGRESS) {
- bool dcbx_status;
-
- /* Attempt to start LLDP engine. Ignore errors
- * as this will error if it is already started
- */
- ice_aq_start_lldp(hw, NULL);
-
- /* Attempt to start DCBX. Ignore errors as this
- * will error if it is already started
- */
- ice_aq_start_stop_dcbx(hw, true, &dcbx_status, NULL);
- }
-
err = ice_init_dcb(hw);
if (err) {
- /* FW LLDP not in usable state, default to SW DCBx/LLDP */
- dev_info(&pf->pdev->dev, "FW LLDP not in usable state\n");
+ /* FW LLDP is not active, default to SW DCBX/LLDP */
+ dev_info(&pf->pdev->dev, "FW LLDP is not active\n");
hw->port_info->dcbx_status = ICE_DCBX_STATUS_NOT_STARTED;
hw->port_info->is_sw_lldp = true;
}
@@ -398,15 +384,16 @@ int ice_init_pf_dcb(struct ice_pf *pf)
if (port_info->is_sw_lldp) {
sw_default = 1;
dev_info(&pf->pdev->dev, "DCBx/LLDP in SW mode.\n");
+ clear_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags);
+ } else {
+ set_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags);
}
- if (port_info->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED) {
- sw_default = 1;
+ if (port_info->dcbx_status == ICE_DCBX_STATUS_NOT_STARTED)
dev_info(&pf->pdev->dev, "DCBX not started\n");
- }
if (sw_default) {
- err = ice_dcb_sw_dflt_cfg(pf);
+ err = ice_dcb_sw_dflt_cfg(pf, locked);
if (err) {
dev_err(&pf->pdev->dev,
"Failed to set local DCB config %d\n", err);
@@ -425,7 +412,7 @@ int ice_init_pf_dcb(struct ice_pf *pf)
set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
- err = ice_dcb_init_cfg(pf);
+ err = ice_dcb_init_cfg(pf, locked);
if (err)
goto dcb_init_err;
@@ -515,6 +502,55 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
}
/**
+ * ice_dcb_need_recfg - Check if DCB needs reconfig
+ * @pf: board private structure
+ * @old_cfg: current DCB config
+ * @new_cfg: new DCB config
+ */
+static bool ice_dcb_need_recfg(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
+ struct ice_dcbx_cfg *new_cfg)
+{
+ bool need_reconfig = false;
+
+ /* Check if ETS configuration has changed */
+ if (memcmp(&new_cfg->etscfg, &old_cfg->etscfg,
+ sizeof(new_cfg->etscfg))) {
+ /* If Priority Table has changed reconfig is needed */
+ if (memcmp(&new_cfg->etscfg.prio_table,
+ &old_cfg->etscfg.prio_table,
+ sizeof(new_cfg->etscfg.prio_table))) {
+ need_reconfig = true;
+ dev_dbg(&pf->pdev->dev, "ETS UP2TC changed.\n");
+ }
+
+ if (memcmp(&new_cfg->etscfg.tcbwtable,
+ &old_cfg->etscfg.tcbwtable,
+ sizeof(new_cfg->etscfg.tcbwtable)))
+ dev_dbg(&pf->pdev->dev, "ETS TC BW Table changed.\n");
+
+ if (memcmp(&new_cfg->etscfg.tsatable,
+ &old_cfg->etscfg.tsatable,
+ sizeof(new_cfg->etscfg.tsatable)))
+ dev_dbg(&pf->pdev->dev, "ETS TSA Table changed.\n");
+ }
+
+ /* Check if PFC configuration has changed */
+ if (memcmp(&new_cfg->pfc, &old_cfg->pfc, sizeof(new_cfg->pfc))) {
+ need_reconfig = true;
+ dev_dbg(&pf->pdev->dev, "PFC config change detected.\n");
+ }
+
+ /* Check if APP Table has changed */
+ if (memcmp(&new_cfg->app, &old_cfg->app, sizeof(new_cfg->app))) {
+ need_reconfig = true;
+ dev_dbg(&pf->pdev->dev, "APP Table change detected.\n");
+ }
+
+ dev_dbg(&pf->pdev->dev, "dcb need_reconfig=%d\n", need_reconfig);
+ return need_reconfig;
+}
+
+/**
* ice_dcb_process_lldp_set_mib_change - Process MIB change
* @pf: ptr to ice_pf
* @event: pointer to the admin queue receive event
@@ -523,29 +559,95 @@ void
ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
struct ice_rq_event_info *event)
{
- if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) {
- struct ice_dcbx_cfg *dcbcfg, *prev_cfg;
- int err;
-
- prev_cfg = &pf->hw.port_info->local_dcbx_cfg;
- dcbcfg = devm_kmemdup(&pf->pdev->dev, prev_cfg,
- sizeof(*dcbcfg), GFP_KERNEL);
- if (!dcbcfg)
+ struct ice_aqc_port_ets_elem buf = { 0 };
+ struct ice_aqc_lldp_get_mib *mib;
+ struct ice_dcbx_cfg tmp_dcbx_cfg;
+ bool need_reconfig = false;
+ struct ice_port_info *pi;
+ u8 type;
+ int ret;
+
+ /* Not DCB capable or capability disabled */
+ if (!(test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)))
+ return;
+
+ if (pf->dcbx_cap & DCB_CAP_DCBX_HOST) {
+ dev_dbg(&pf->pdev->dev,
+ "MIB Change Event in HOST mode\n");
+ return;
+ }
+
+ pi = pf->hw.port_info;
+ mib = (struct ice_aqc_lldp_get_mib *)&event->desc.params.raw;
+ /* Ignore if event is not for Nearest Bridge */
+ type = ((mib->type >> ICE_AQ_LLDP_BRID_TYPE_S) &
+ ICE_AQ_LLDP_BRID_TYPE_M);
+ dev_dbg(&pf->pdev->dev, "LLDP event MIB bridge type 0x%x\n", type);
+ if (type != ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID)
+ return;
+
+ /* Check MIB Type and return if event for Remote MIB update */
+ type = mib->type & ICE_AQ_LLDP_MIB_TYPE_M;
+ dev_dbg(&pf->pdev->dev,
+ "LLDP event mib type %s\n", type ? "remote" : "local");
+ if (type == ICE_AQ_LLDP_MIB_REMOTE) {
+ /* Update the remote cached instance and return */
+ ret = ice_aq_get_dcb_cfg(pi->hw, ICE_AQ_LLDP_MIB_REMOTE,
+ ICE_AQ_LLDP_BRID_TYPE_NEAREST_BRID,
+ &pi->remote_dcbx_cfg);
+ if (ret) {
+ dev_err(&pf->pdev->dev, "Failed to get remote DCB config\n");
return;
+ }
+ }
- err = ice_lldp_to_dcb_cfg(event->msg_buf, dcbcfg);
- if (!err)
- ice_pf_dcb_cfg(pf, dcbcfg);
+ /* store the old configuration */
+ tmp_dcbx_cfg = pf->hw.port_info->local_dcbx_cfg;
- devm_kfree(&pf->pdev->dev, dcbcfg);
+ /* Reset the old DCBX configuration data */
+ memset(&pi->local_dcbx_cfg, 0, sizeof(pi->local_dcbx_cfg));
- /* Get updated DCBx data from firmware */
- err = ice_get_dcb_cfg(pf->hw.port_info);
- if (err)
- dev_err(&pf->pdev->dev,
- "Failed to get DCB config\n");
- } else {
+ /* Get updated DCBX data from firmware */
+ ret = ice_get_dcb_cfg(pf->hw.port_info);
+ if (ret) {
+ dev_err(&pf->pdev->dev, "Failed to get DCB config\n");
+ return;
+ }
+
+ /* No change detected in DCBX configs */
+ if (!memcmp(&tmp_dcbx_cfg, &pi->local_dcbx_cfg, sizeof(tmp_dcbx_cfg))) {
dev_dbg(&pf->pdev->dev,
- "MIB Change Event in HOST mode\n");
+ "No change detected in DCBX configuration.\n");
+ return;
+ }
+
+ need_reconfig = ice_dcb_need_recfg(pf, &tmp_dcbx_cfg,
+ &pi->local_dcbx_cfg);
+ if (!need_reconfig)
+ return;
+
+ /* Enable DCB tagging only when more than one TC */
+ if (ice_dcb_get_num_tc(&pi->local_dcbx_cfg) > 1) {
+ dev_dbg(&pf->pdev->dev, "DCB tagging enabled (num TC > 1)\n");
+ set_bit(ICE_FLAG_DCB_ENA, pf->flags);
+ } else {
+ dev_dbg(&pf->pdev->dev, "DCB tagging disabled (num TC = 1)\n");
+ clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
}
+
+ rtnl_lock();
+ ice_pf_dis_all_vsi(pf, true);
+
+ ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL);
+ if (ret) {
+ dev_err(&pf->pdev->dev, "Query Port ETS failed\n");
+ rtnl_unlock();
+ return;
+ }
+
+ /* changes in configuration update VSI */
+ ice_pf_dcb_recfg(pf);
+
+ ice_pf_ena_all_vsi(pf, true);
+ rtnl_unlock();
}
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
index ca7b76faa03c..819081053ff5 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -14,7 +14,7 @@ void ice_dcb_rebuild(struct ice_pf *pf);
u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg);
u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
-int ice_init_pf_dcb(struct ice_pf *pf);
+int ice_init_pf_dcb(struct ice_pf *pf, bool locked);
void ice_update_dcb_stats(struct ice_pf *pf);
int
ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
@@ -40,7 +40,8 @@ static inline u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg __always_unused *dcbcfg)
return 1;
}
-static inline int ice_init_pf_dcb(struct ice_pf *pf)
+static inline int
+ice_init_pf_dcb(struct ice_pf *pf, bool __always_unused locked)
{
dev_dbg(&pf->pdev->dev, "DCB not supported\n");
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 1341fde8d53f..52083a63dee6 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -45,22 +45,40 @@ static int ice_q_stats_len(struct net_device *netdev)
ICE_VSI_STATS_LEN + ice_q_stats_len(n))
static const struct ice_stats ice_gstrings_vsi_stats[] = {
- ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
ICE_VSI_STAT("rx_unicast", eth_stats.rx_unicast),
- ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
+ ICE_VSI_STAT("tx_unicast", eth_stats.tx_unicast),
ICE_VSI_STAT("rx_multicast", eth_stats.rx_multicast),
- ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
+ ICE_VSI_STAT("tx_multicast", eth_stats.tx_multicast),
ICE_VSI_STAT("rx_broadcast", eth_stats.rx_broadcast),
- ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
+ ICE_VSI_STAT("tx_broadcast", eth_stats.tx_broadcast),
ICE_VSI_STAT("rx_bytes", eth_stats.rx_bytes),
- ICE_VSI_STAT("rx_discards", eth_stats.rx_discards),
- ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
- ICE_VSI_STAT("tx_linearize", tx_linearize),
+ ICE_VSI_STAT("tx_bytes", eth_stats.tx_bytes),
+ ICE_VSI_STAT("rx_dropped", eth_stats.rx_discards),
ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed),
ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
+ ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
+ ICE_VSI_STAT("tx_linearize", tx_linearize),
+};
+
+enum ice_ethtool_test_id {
+ ICE_ETH_TEST_REG = 0,
+ ICE_ETH_TEST_EEPROM,
+ ICE_ETH_TEST_INTR,
+ ICE_ETH_TEST_LOOP,
+ ICE_ETH_TEST_LINK,
};
+static const char ice_gstrings_test[][ETH_GSTRING_LEN] = {
+ "Register test (offline)",
+ "EEPROM test (offline)",
+ "Interrupt test (offline)",
+ "Loopback test (offline)",
+ "Link test (on/offline)",
+};
+
+#define ICE_TEST_LEN (sizeof(ice_gstrings_test) / ETH_GSTRING_LEN)
+
/* These PF_STATs might look like duplicates of some NETDEV_STATs,
* but they aren't. This device is capable of supporting multiple
* VSIs/netdevs on a single PF. The NETDEV_STATs are for individual
@@ -71,45 +89,45 @@ static const struct ice_stats ice_gstrings_vsi_stats[] = {
* is queried on the base PF netdev.
*/
static const struct ice_stats ice_gstrings_pf_stats[] = {
- ICE_PF_STAT("port.tx_bytes", stats.eth.tx_bytes),
- ICE_PF_STAT("port.rx_bytes", stats.eth.rx_bytes),
- ICE_PF_STAT("port.tx_unicast", stats.eth.tx_unicast),
- ICE_PF_STAT("port.rx_unicast", stats.eth.rx_unicast),
- ICE_PF_STAT("port.tx_multicast", stats.eth.tx_multicast),
- ICE_PF_STAT("port.rx_multicast", stats.eth.rx_multicast),
- ICE_PF_STAT("port.tx_broadcast", stats.eth.tx_broadcast),
- ICE_PF_STAT("port.rx_broadcast", stats.eth.rx_broadcast),
- ICE_PF_STAT("port.tx_errors", stats.eth.tx_errors),
- ICE_PF_STAT("port.tx_size_64", stats.tx_size_64),
- ICE_PF_STAT("port.rx_size_64", stats.rx_size_64),
- ICE_PF_STAT("port.tx_size_127", stats.tx_size_127),
- ICE_PF_STAT("port.rx_size_127", stats.rx_size_127),
- ICE_PF_STAT("port.tx_size_255", stats.tx_size_255),
- ICE_PF_STAT("port.rx_size_255", stats.rx_size_255),
- ICE_PF_STAT("port.tx_size_511", stats.tx_size_511),
- ICE_PF_STAT("port.rx_size_511", stats.rx_size_511),
- ICE_PF_STAT("port.tx_size_1023", stats.tx_size_1023),
- ICE_PF_STAT("port.rx_size_1023", stats.rx_size_1023),
- ICE_PF_STAT("port.tx_size_1522", stats.tx_size_1522),
- ICE_PF_STAT("port.rx_size_1522", stats.rx_size_1522),
- ICE_PF_STAT("port.tx_size_big", stats.tx_size_big),
- ICE_PF_STAT("port.rx_size_big", stats.rx_size_big),
- ICE_PF_STAT("port.link_xon_tx", stats.link_xon_tx),
- ICE_PF_STAT("port.link_xon_rx", stats.link_xon_rx),
- ICE_PF_STAT("port.link_xoff_tx", stats.link_xoff_tx),
- ICE_PF_STAT("port.link_xoff_rx", stats.link_xoff_rx),
- ICE_PF_STAT("port.tx_dropped_link_down", stats.tx_dropped_link_down),
- ICE_PF_STAT("port.rx_undersize", stats.rx_undersize),
- ICE_PF_STAT("port.rx_fragments", stats.rx_fragments),
- ICE_PF_STAT("port.rx_oversize", stats.rx_oversize),
- ICE_PF_STAT("port.rx_jabber", stats.rx_jabber),
- ICE_PF_STAT("port.rx_csum_bad", hw_csum_rx_error),
- ICE_PF_STAT("port.rx_length_errors", stats.rx_len_errors),
- ICE_PF_STAT("port.rx_dropped", stats.eth.rx_discards),
- ICE_PF_STAT("port.rx_crc_errors", stats.crc_errors),
- ICE_PF_STAT("port.illegal_bytes", stats.illegal_bytes),
- ICE_PF_STAT("port.mac_local_faults", stats.mac_local_faults),
- ICE_PF_STAT("port.mac_remote_faults", stats.mac_remote_faults),
+ ICE_PF_STAT("rx_bytes.nic", stats.eth.rx_bytes),
+ ICE_PF_STAT("tx_bytes.nic", stats.eth.tx_bytes),
+ ICE_PF_STAT("rx_unicast.nic", stats.eth.rx_unicast),
+ ICE_PF_STAT("tx_unicast.nic", stats.eth.tx_unicast),
+ ICE_PF_STAT("rx_multicast.nic", stats.eth.rx_multicast),
+ ICE_PF_STAT("tx_multicast.nic", stats.eth.tx_multicast),
+ ICE_PF_STAT("rx_broadcast.nic", stats.eth.rx_broadcast),
+ ICE_PF_STAT("tx_broadcast.nic", stats.eth.tx_broadcast),
+ ICE_PF_STAT("tx_errors.nic", stats.eth.tx_errors),
+ ICE_PF_STAT("rx_size_64.nic", stats.rx_size_64),
+ ICE_PF_STAT("tx_size_64.nic", stats.tx_size_64),
+ ICE_PF_STAT("rx_size_127.nic", stats.rx_size_127),
+ ICE_PF_STAT("tx_size_127.nic", stats.tx_size_127),
+ ICE_PF_STAT("rx_size_255.nic", stats.rx_size_255),
+ ICE_PF_STAT("tx_size_255.nic", stats.tx_size_255),
+ ICE_PF_STAT("rx_size_511.nic", stats.rx_size_511),
+ ICE_PF_STAT("tx_size_511.nic", stats.tx_size_511),
+ ICE_PF_STAT("rx_size_1023.nic", stats.rx_size_1023),
+ ICE_PF_STAT("tx_size_1023.nic", stats.tx_size_1023),
+ ICE_PF_STAT("rx_size_1522.nic", stats.rx_size_1522),
+ ICE_PF_STAT("tx_size_1522.nic", stats.tx_size_1522),
+ ICE_PF_STAT("rx_size_big.nic", stats.rx_size_big),
+ ICE_PF_STAT("tx_size_big.nic", stats.tx_size_big),
+ ICE_PF_STAT("link_xon_rx.nic", stats.link_xon_rx),
+ ICE_PF_STAT("link_xon_tx.nic", stats.link_xon_tx),
+ ICE_PF_STAT("link_xoff_rx.nic", stats.link_xoff_rx),
+ ICE_PF_STAT("link_xoff_tx.nic", stats.link_xoff_tx),
+ ICE_PF_STAT("tx_dropped_link_down.nic", stats.tx_dropped_link_down),
+ ICE_PF_STAT("rx_undersize.nic", stats.rx_undersize),
+ ICE_PF_STAT("rx_fragments.nic", stats.rx_fragments),
+ ICE_PF_STAT("rx_oversize.nic", stats.rx_oversize),
+ ICE_PF_STAT("rx_jabber.nic", stats.rx_jabber),
+ ICE_PF_STAT("rx_csum_bad.nic", hw_csum_rx_error),
+ ICE_PF_STAT("rx_length_errors.nic", stats.rx_len_errors),
+ ICE_PF_STAT("rx_dropped.nic", stats.eth.rx_discards),
+ ICE_PF_STAT("rx_crc_errors.nic", stats.crc_errors),
+ ICE_PF_STAT("illegal_bytes.nic", stats.illegal_bytes),
+ ICE_PF_STAT("mac_local_faults.nic", stats.mac_local_faults),
+ ICE_PF_STAT("mac_remote_faults.nic", stats.mac_remote_faults),
};
static const u32 ice_regs_dump_list[] = {
@@ -120,6 +138,9 @@ static const u32 ice_regs_dump_list[] = {
QINT_RQCTL(0),
PFINT_OICR_ENA,
QRX_ITR(0),
+ PF0INT_ITR_0(0),
+ PF0INT_ITR_1(0),
+ PF0INT_ITR_2(0),
};
struct ice_priv_flag {
@@ -134,7 +155,7 @@ struct ice_priv_flag {
static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
- ICE_PRIV_FLAG("disable-fw-lldp", ICE_FLAG_DISABLE_FW_LLDP),
+ ICE_PRIV_FLAG("enable-fw-lldp", ICE_FLAG_ENABLE_FW_LLDP),
};
#define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags)
@@ -278,6 +299,571 @@ out:
return ret;
}
+/**
+ * ice_active_vfs - check if there are any active VFs
+ * @pf: board private structure
+ *
+ * Returns true if an active VF is found, otherwise returns false
+ */
+static bool ice_active_vfs(struct ice_pf *pf)
+{
+ struct ice_vf *vf = pf->vf;
+ int i;
+
+ for (i = 0; i < pf->num_alloc_vfs; i++, vf++)
+ if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+ return true;
+ return false;
+}
+
+/**
+ * ice_link_test - perform a link test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_link_test(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ enum ice_status status;
+ bool link_up = false;
+
+ netdev_info(netdev, "link test\n");
+ status = ice_get_link_status(np->vsi->port_info, &link_up);
+ if (status) {
+ netdev_err(netdev, "link query error, status = %d\n", status);
+ return 1;
+ }
+
+ if (!link_up)
+ return 2;
+
+ return 0;
+}
+
+/**
+ * ice_eeprom_test - perform an EEPROM test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_eeprom_test(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+
+ netdev_info(netdev, "EEPROM test\n");
+ return !!(ice_nvm_validate_checksum(&pf->hw));
+}
+
+/**
+ * ice_reg_pattern_test
+ * @hw: pointer to the HW struct
+ * @reg: reg to be tested
+ * @mask: bits to be touched
+ */
+static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
+{
+ struct ice_pf *pf = (struct ice_pf *)hw->back;
+ static const u32 patterns[] = {
+ 0x5A5A5A5A, 0xA5A5A5A5,
+ 0x00000000, 0xFFFFFFFF
+ };
+ u32 val, orig_val;
+ int i;
+
+ orig_val = rd32(hw, reg);
+ for (i = 0; i < ARRAY_SIZE(patterns); ++i) {
+ u32 pattern = patterns[i] & mask;
+
+ wr32(hw, reg, pattern);
+ val = rd32(hw, reg);
+ if (val == pattern)
+ continue;
+ dev_err(&pf->pdev->dev,
+ "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n"
+ , __func__, reg, pattern, val);
+ return 1;
+ }
+
+ wr32(hw, reg, orig_val);
+ val = rd32(hw, reg);
+ if (val != orig_val) {
+ dev_err(&pf->pdev->dev,
+ "%s: reg restore test failed - reg 0x%08x orig 0x%08x val 0x%08x\n"
+ , __func__, reg, orig_val, val);
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * ice_reg_test - perform a register test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_reg_test(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_hw *hw = np->vsi->port_info->hw;
+ u32 int_elements = hw->func_caps.common_cap.num_msix_vectors ?
+ hw->func_caps.common_cap.num_msix_vectors - 1 : 1;
+ struct ice_diag_reg_test_info {
+ u32 address;
+ u32 mask;
+ u32 elem_num;
+ u32 elem_size;
+ } ice_reg_list[] = {
+ {GLINT_ITR(0, 0), 0x00000fff, int_elements,
+ GLINT_ITR(0, 1) - GLINT_ITR(0, 0)},
+ {GLINT_ITR(1, 0), 0x00000fff, int_elements,
+ GLINT_ITR(1, 1) - GLINT_ITR(1, 0)},
+ {GLINT_ITR(0, 0), 0x00000fff, int_elements,
+ GLINT_ITR(2, 1) - GLINT_ITR(2, 0)},
+ {GLINT_CTL, 0xffff0001, 1, 0}
+ };
+ int i;
+
+ netdev_dbg(netdev, "Register test\n");
+ for (i = 0; i < ARRAY_SIZE(ice_reg_list); ++i) {
+ u32 j;
+
+ for (j = 0; j < ice_reg_list[i].elem_num; ++j) {
+ u32 mask = ice_reg_list[i].mask;
+ u32 reg = ice_reg_list[i].address +
+ (j * ice_reg_list[i].elem_size);
+
+ /* bail on failure (non-zero return) */
+ if (ice_reg_pattern_test(hw, reg, mask))
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * ice_lbtest_prepare_rings - configure Tx/Rx test rings
+ * @vsi: pointer to the VSI structure
+ *
+ * Function configures rings of a VSI for loopback test without
+ * enabling interrupts or informing the kernel about new queues.
+ *
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_prepare_rings(struct ice_vsi *vsi)
+{
+ int status;
+
+ status = ice_vsi_setup_tx_rings(vsi);
+ if (status)
+ goto err_setup_tx_ring;
+
+ status = ice_vsi_setup_rx_rings(vsi);
+ if (status)
+ goto err_setup_rx_ring;
+
+ status = ice_vsi_cfg(vsi);
+ if (status)
+ goto err_setup_rx_ring;
+
+ status = ice_vsi_start_rx_rings(vsi);
+ if (status)
+ goto err_start_rx_ring;
+
+ return status;
+
+err_start_rx_ring:
+ ice_vsi_free_rx_rings(vsi);
+err_setup_rx_ring:
+ ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+err_setup_tx_ring:
+ ice_vsi_free_tx_rings(vsi);
+
+ return status;
+}
+
+/**
+ * ice_lbtest_disable_rings - disable Tx/Rx test rings after loopback test
+ * @vsi: pointer to the VSI structure
+ *
+ * Function stops and frees VSI rings after a loopback test.
+ * Returns 0 on success, negative on failure.
+ */
+static int ice_lbtest_disable_rings(struct ice_vsi *vsi)
+{
+ int status;
+
+ status = ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, 0);
+ if (status)
+ netdev_err(vsi->netdev, "Failed to stop Tx rings, VSI %d error %d\n",
+ vsi->vsi_num, status);
+
+ status = ice_vsi_stop_rx_rings(vsi);
+ if (status)
+ netdev_err(vsi->netdev, "Failed to stop Rx rings, VSI %d error %d\n",
+ vsi->vsi_num, status);
+
+ ice_vsi_free_tx_rings(vsi);
+ ice_vsi_free_rx_rings(vsi);
+
+ return status;
+}
+
+/**
+ * ice_lbtest_create_frame - create test packet
+ * @pf: pointer to the PF structure
+ * @ret_data: allocated frame buffer
+ * @size: size of the packet data
+ *
+ * Function allocates a frame with a test pattern on specific offsets.
+ * Returns 0 on success, non-zero on failure.
+ */
+static int ice_lbtest_create_frame(struct ice_pf *pf, u8 **ret_data, u16 size)
+{
+ u8 *data;
+
+ if (!pf)
+ return -EINVAL;
+
+ data = devm_kzalloc(&pf->pdev->dev, size, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ /* Since the ethernet test frame should always be at least
+ * 64 bytes long, fill some octets in the payload with test data.
+ */
+ memset(data, 0xFF, size);
+ data[32] = 0xDE;
+ data[42] = 0xAD;
+ data[44] = 0xBE;
+ data[46] = 0xEF;
+
+ *ret_data = data;
+
+ return 0;
+}
+
+/**
+ * ice_lbtest_check_frame - verify received loopback frame
+ * @frame: pointer to the raw packet data
+ *
+ * Function verifies received test frame with a pattern.
+ * Returns true if frame matches the pattern, false otherwise.
+ */
+static bool ice_lbtest_check_frame(u8 *frame)
+{
+ /* Validate bytes of a frame under offsets chosen earlier */
+ if (frame[32] == 0xDE &&
+ frame[42] == 0xAD &&
+ frame[44] == 0xBE &&
+ frame[46] == 0xEF &&
+ frame[48] == 0xFF)
+ return true;
+
+ return false;
+}
+
+/**
+ * ice_diag_send - send test frames to the test ring
+ * @tx_ring: pointer to the transmit ring
+ * @data: pointer to the raw packet data
+ * @size: size of the packet to send
+ *
+ * Function sends loopback packets on a test Tx ring.
+ */
+static int ice_diag_send(struct ice_ring *tx_ring, u8 *data, u16 size)
+{
+ struct ice_tx_desc *tx_desc;
+ struct ice_tx_buf *tx_buf;
+ dma_addr_t dma;
+ u64 td_cmd;
+
+ tx_desc = ICE_TX_DESC(tx_ring, tx_ring->next_to_use);
+ tx_buf = &tx_ring->tx_buf[tx_ring->next_to_use];
+
+ dma = dma_map_single(tx_ring->dev, data, size, DMA_TO_DEVICE);
+ if (dma_mapping_error(tx_ring->dev, dma))
+ return -EINVAL;
+
+ tx_desc->buf_addr = cpu_to_le64(dma);
+
+ /* These flags are required for a descriptor to be pushed out */
+ td_cmd = (u64)(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS);
+ tx_desc->cmd_type_offset_bsz =
+ cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
+ (td_cmd << ICE_TXD_QW1_CMD_S) |
+ ((u64)0 << ICE_TXD_QW1_OFFSET_S) |
+ ((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) |
+ ((u64)0 << ICE_TXD_QW1_L2TAG1_S));
+
+ tx_buf->next_to_watch = tx_desc;
+
+ /* Force memory write to complete before letting h/w know
+ * there are new descriptors to fetch.
+ */
+ wmb();
+
+ tx_ring->next_to_use++;
+ if (tx_ring->next_to_use >= tx_ring->count)
+ tx_ring->next_to_use = 0;
+
+ writel_relaxed(tx_ring->next_to_use, tx_ring->tail);
+
+ /* Wait until the packets get transmitted to the receive queue. */
+ usleep_range(1000, 2000);
+ dma_unmap_single(tx_ring->dev, dma, size, DMA_TO_DEVICE);
+
+ return 0;
+}
+
+#define ICE_LB_FRAME_SIZE 64
+/**
+ * ice_lbtest_receive_frames - receive and verify test frames
+ * @rx_ring: pointer to the receive ring
+ *
+ * Function receives loopback packets and verify their correctness.
+ * Returns number of received valid frames.
+ */
+static int ice_lbtest_receive_frames(struct ice_ring *rx_ring)
+{
+ struct ice_rx_buf *rx_buf;
+ int valid_frames, i;
+ u8 *received_buf;
+
+ valid_frames = 0;
+
+ for (i = 0; i < rx_ring->count; i++) {
+ union ice_32b_rx_flex_desc *rx_desc;
+
+ rx_desc = ICE_RX_DESC(rx_ring, i);
+
+ if (!(rx_desc->wb.status_error0 &
+ cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))
+ continue;
+
+ rx_buf = &rx_ring->rx_buf[i];
+ received_buf = page_address(rx_buf->page);
+
+ if (ice_lbtest_check_frame(received_buf))
+ valid_frames++;
+ }
+
+ return valid_frames;
+}
+
+/**
+ * ice_loopback_test - perform a loopback test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_loopback_test(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *orig_vsi = np->vsi, *test_vsi;
+ struct ice_pf *pf = orig_vsi->back;
+ struct ice_ring *tx_ring, *rx_ring;
+ u8 broadcast[ETH_ALEN], ret = 0;
+ int num_frames, valid_frames;
+ LIST_HEAD(tmp_list);
+ u8 *tx_frame;
+ int i;
+
+ netdev_info(netdev, "loopback test\n");
+
+ test_vsi = ice_lb_vsi_setup(pf, pf->hw.port_info);
+ if (!test_vsi) {
+ netdev_err(netdev, "Failed to create a VSI for the loopback test");
+ return 1;
+ }
+
+ test_vsi->netdev = netdev;
+ tx_ring = test_vsi->tx_rings[0];
+ rx_ring = test_vsi->rx_rings[0];
+
+ if (ice_lbtest_prepare_rings(test_vsi)) {
+ ret = 2;
+ goto lbtest_vsi_close;
+ }
+
+ if (ice_alloc_rx_bufs(rx_ring, rx_ring->count)) {
+ ret = 3;
+ goto lbtest_rings_dis;
+ }
+
+ /* Enable MAC loopback in firmware */
+ if (ice_aq_set_mac_loopback(&pf->hw, true, NULL)) {
+ ret = 4;
+ goto lbtest_mac_dis;
+ }
+
+ /* Test VSI needs to receive broadcast packets */
+ eth_broadcast_addr(broadcast);
+ if (ice_add_mac_to_list(test_vsi, &tmp_list, broadcast)) {
+ ret = 5;
+ goto lbtest_mac_dis;
+ }
+
+ if (ice_add_mac(&pf->hw, &tmp_list)) {
+ ret = 6;
+ goto free_mac_list;
+ }
+
+ if (ice_lbtest_create_frame(pf, &tx_frame, ICE_LB_FRAME_SIZE)) {
+ ret = 7;
+ goto remove_mac_filters;
+ }
+
+ num_frames = min_t(int, tx_ring->count, 32);
+ for (i = 0; i < num_frames; i++) {
+ if (ice_diag_send(tx_ring, tx_frame, ICE_LB_FRAME_SIZE)) {
+ ret = 8;
+ goto lbtest_free_frame;
+ }
+ }
+
+ valid_frames = ice_lbtest_receive_frames(rx_ring);
+ if (!valid_frames)
+ ret = 9;
+ else if (valid_frames != num_frames)
+ ret = 10;
+
+lbtest_free_frame:
+ devm_kfree(&pf->pdev->dev, tx_frame);
+remove_mac_filters:
+ if (ice_remove_mac(&pf->hw, &tmp_list))
+ netdev_err(netdev, "Could not remove MAC filter for the test VSI");
+free_mac_list:
+ ice_free_fltr_list(&pf->pdev->dev, &tmp_list);
+lbtest_mac_dis:
+ /* Disable MAC loopback after the test is completed. */
+ if (ice_aq_set_mac_loopback(&pf->hw, false, NULL))
+ netdev_err(netdev, "Could not disable MAC loopback\n");
+lbtest_rings_dis:
+ if (ice_lbtest_disable_rings(test_vsi))
+ netdev_err(netdev, "Could not disable test rings\n");
+lbtest_vsi_close:
+ test_vsi->netdev = NULL;
+ if (ice_vsi_release(test_vsi))
+ netdev_err(netdev, "Failed to remove the test VSI");
+
+ return ret;
+}
+
+/**
+ * ice_intr_test - perform an interrupt test on a given net_device
+ * @netdev: network interface device structure
+ *
+ * This function performs one of the self-tests required by ethtool.
+ * Returns 0 on success, non-zero on failure.
+ */
+static u64 ice_intr_test(struct net_device *netdev)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+ u16 swic_old = pf->sw_int_count;
+
+ netdev_info(netdev, "interrupt test\n");
+
+ wr32(&pf->hw, GLINT_DYN_CTL(pf->oicr_idx),
+ GLINT_DYN_CTL_SW_ITR_INDX_M |
+ GLINT_DYN_CTL_INTENA_MSK_M |
+ GLINT_DYN_CTL_SWINT_TRIG_M);
+
+ usleep_range(1000, 2000);
+ return (swic_old == pf->sw_int_count);
+}
+
+/**
+ * ice_self_test - handler function for performing a self-test by ethtool
+ * @netdev: network interface device structure
+ * @eth_test: ethtool_test structure
+ * @data: required by ethtool.self_test
+ *
+ * This function is called after invoking 'ethtool -t devname' command where
+ * devname is the name of the network device on which ethtool should operate.
+ * It performs a set of self-tests to check if a device works properly.
+ */
+static void
+ice_self_test(struct net_device *netdev, struct ethtool_test *eth_test,
+ u64 *data)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ bool if_running = netif_running(netdev);
+ struct ice_pf *pf = np->vsi->back;
+
+ if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+ netdev_info(netdev, "offline testing starting\n");
+
+ set_bit(__ICE_TESTING, pf->state);
+
+ if (ice_active_vfs(pf)) {
+ dev_warn(&pf->pdev->dev,
+ "Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
+ data[ICE_ETH_TEST_REG] = 1;
+ data[ICE_ETH_TEST_EEPROM] = 1;
+ data[ICE_ETH_TEST_INTR] = 1;
+ data[ICE_ETH_TEST_LOOP] = 1;
+ data[ICE_ETH_TEST_LINK] = 1;
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+ clear_bit(__ICE_TESTING, pf->state);
+ goto skip_ol_tests;
+ }
+ /* If the device is online then take it offline */
+ if (if_running)
+ /* indicate we're in test mode */
+ ice_stop(netdev);
+
+ data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+ data[ICE_ETH_TEST_EEPROM] = ice_eeprom_test(netdev);
+ data[ICE_ETH_TEST_INTR] = ice_intr_test(netdev);
+ data[ICE_ETH_TEST_LOOP] = ice_loopback_test(netdev);
+ data[ICE_ETH_TEST_REG] = ice_reg_test(netdev);
+
+ if (data[ICE_ETH_TEST_LINK] ||
+ data[ICE_ETH_TEST_EEPROM] ||
+ data[ICE_ETH_TEST_LOOP] ||
+ data[ICE_ETH_TEST_INTR] ||
+ data[ICE_ETH_TEST_REG])
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ clear_bit(__ICE_TESTING, pf->state);
+
+ if (if_running) {
+ int status = ice_open(netdev);
+
+ if (status) {
+ dev_err(&pf->pdev->dev,
+ "Could not open device %s, err %d",
+ pf->int_name, status);
+ }
+ }
+ } else {
+ /* Online tests */
+ netdev_info(netdev, "online testing starting\n");
+
+ data[ICE_ETH_TEST_LINK] = ice_link_test(netdev);
+ if (data[ICE_ETH_TEST_LINK])
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+
+ /* Offline only tests, not run in online; pass by default */
+ data[ICE_ETH_TEST_REG] = 0;
+ data[ICE_ETH_TEST_EEPROM] = 0;
+ data[ICE_ETH_TEST_INTR] = 0;
+ data[ICE_ETH_TEST_LOOP] = 0;
+ }
+
+skip_ol_tests:
+ netdev_info(netdev, "testing finished\n");
+}
+
static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
@@ -295,17 +881,17 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
ice_for_each_alloc_txq(vsi, i) {
snprintf(p, ETH_GSTRING_LEN,
- "tx-queue-%u.tx_packets", i);
+ "tx_queue_%u_packets", i);
p += ETH_GSTRING_LEN;
- snprintf(p, ETH_GSTRING_LEN, "tx-queue-%u.tx_bytes", i);
+ snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i);
p += ETH_GSTRING_LEN;
}
ice_for_each_alloc_rxq(vsi, i) {
snprintf(p, ETH_GSTRING_LEN,
- "rx-queue-%u.rx_packets", i);
+ "rx_queue_%u_packets", i);
p += ETH_GSTRING_LEN;
- snprintf(p, ETH_GSTRING_LEN, "rx-queue-%u.rx_bytes", i);
+ snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i);
p += ETH_GSTRING_LEN;
}
@@ -320,21 +906,24 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
snprintf(p, ETH_GSTRING_LEN,
- "port.tx-priority-%u-xon", i);
+ "tx_priority_%u_xon.nic", i);
p += ETH_GSTRING_LEN;
snprintf(p, ETH_GSTRING_LEN,
- "port.tx-priority-%u-xoff", i);
+ "tx_priority_%u_xoff.nic", i);
p += ETH_GSTRING_LEN;
}
for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
snprintf(p, ETH_GSTRING_LEN,
- "port.rx-priority-%u-xon", i);
+ "rx_priority_%u_xon.nic", i);
p += ETH_GSTRING_LEN;
snprintf(p, ETH_GSTRING_LEN,
- "port.rx-priority-%u-xoff", i);
+ "rx_priority_%u_xoff.nic", i);
p += ETH_GSTRING_LEN;
}
break;
+ case ETH_SS_TEST:
+ memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN);
+ break;
case ETH_SS_PRIV_FLAGS:
for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
snprintf(p, ETH_GSTRING_LEN, "%s",
@@ -371,6 +960,185 @@ ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
}
/**
+ * ice_set_fec_cfg - Set link FEC options
+ * @netdev: network interface device structure
+ * @req_fec: FEC mode to configure
+ */
+static int ice_set_fec_cfg(struct net_device *netdev, enum ice_fec_mode req_fec)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_aqc_set_phy_cfg_data config = { 0 };
+ struct ice_aqc_get_phy_caps_data *caps;
+ struct ice_vsi *vsi = np->vsi;
+ u8 sw_cfg_caps, sw_cfg_fec;
+ struct ice_port_info *pi;
+ enum ice_status status;
+ int err = 0;
+
+ pi = vsi->port_info;
+ if (!pi)
+ return -EOPNOTSUPP;
+
+ /* Changing the FEC parameters is not supported if not the PF VSI */
+ if (vsi->type != ICE_VSI_PF) {
+ netdev_info(netdev, "Changing FEC parameters only supported for PF VSI\n");
+ return -EOPNOTSUPP;
+ }
+
+ /* Get last SW configuration */
+ caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+ if (!caps)
+ return -ENOMEM;
+
+ status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG,
+ caps, NULL);
+ if (status) {
+ err = -EAGAIN;
+ goto done;
+ }
+
+ /* Copy SW configuration returned from PHY caps to PHY config */
+ ice_copy_phy_caps_to_cfg(caps, &config);
+ sw_cfg_caps = caps->caps;
+ sw_cfg_fec = caps->link_fec_options;
+
+ /* Get toloplogy caps, then copy PHY FEC topoloy caps to PHY config */
+ memset(caps, 0, sizeof(*caps));
+
+ status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
+ caps, NULL);
+ if (status) {
+ err = -EAGAIN;
+ goto done;
+ }
+
+ config.caps |= (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC);
+ config.link_fec_opt = caps->link_fec_options;
+
+ ice_cfg_phy_fec(&config, req_fec);
+
+ /* If FEC mode has changed, then set PHY configuration and enable AN. */
+ if ((config.caps & ICE_AQ_PHY_ENA_AUTO_FEC) !=
+ (sw_cfg_caps & ICE_AQC_PHY_EN_AUTO_FEC) ||
+ config.link_fec_opt != sw_cfg_fec) {
+ if (caps->caps & ICE_AQC_PHY_AN_MODE)
+ config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
+
+ status = ice_aq_set_phy_cfg(pi->hw, pi->lport, &config, NULL);
+
+ if (status)
+ err = -EAGAIN;
+ }
+
+done:
+ devm_kfree(&vsi->back->pdev->dev, caps);
+ return err;
+}
+
+/**
+ * ice_set_fecparam - Set FEC link options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_set_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_vsi *vsi = np->vsi;
+ enum ice_fec_mode fec;
+
+ switch (fecparam->fec) {
+ case ETHTOOL_FEC_AUTO:
+ fec = ICE_FEC_AUTO;
+ break;
+ case ETHTOOL_FEC_RS:
+ fec = ICE_FEC_RS;
+ break;
+ case ETHTOOL_FEC_BASER:
+ fec = ICE_FEC_BASER;
+ break;
+ case ETHTOOL_FEC_OFF:
+ case ETHTOOL_FEC_NONE:
+ fec = ICE_FEC_NONE;
+ break;
+ default:
+ dev_warn(&vsi->back->pdev->dev, "Unsupported FEC mode: %d\n",
+ fecparam->fec);
+ return -EINVAL;
+ }
+
+ return ice_set_fec_cfg(netdev, fec);
+}
+
+/**
+ * ice_get_fecparam - Get link FEC options
+ * @netdev: network interface device structure
+ * @fecparam: Ethtool structure to retrieve FEC parameters
+ */
+static int
+ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_aqc_get_phy_caps_data *caps;
+ struct ice_link_status *link_info;
+ struct ice_vsi *vsi = np->vsi;
+ struct ice_port_info *pi;
+ enum ice_status status;
+ int err = 0;
+
+ pi = vsi->port_info;
+
+ if (!pi)
+ return -EOPNOTSUPP;
+ link_info = &pi->phy.link_info;
+
+ /* Set FEC mode based on negotiated link info */
+ switch (link_info->fec_info) {
+ case ICE_AQ_LINK_25G_KR_FEC_EN:
+ fecparam->active_fec = ETHTOOL_FEC_BASER;
+ break;
+ case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+ /* fall through */
+ case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+ fecparam->active_fec = ETHTOOL_FEC_RS;
+ break;
+ default:
+ fecparam->active_fec = ETHTOOL_FEC_OFF;
+ break;
+ }
+
+ caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+ if (!caps)
+ return -ENOMEM;
+
+ status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
+ caps, NULL);
+ if (status) {
+ err = -EAGAIN;
+ goto done;
+ }
+
+ /* Set supported/configured FEC modes based on PHY capability */
+ if (caps->caps & ICE_AQC_PHY_EN_AUTO_FEC)
+ fecparam->fec |= ETHTOOL_FEC_AUTO;
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+ fecparam->fec |= ETHTOOL_FEC_BASER;
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+ fecparam->fec |= ETHTOOL_FEC_RS;
+ if (caps->link_fec_options == 0)
+ fecparam->fec |= ETHTOOL_FEC_OFF;
+
+done:
+ devm_kfree(&vsi->back->pdev->dev, caps);
+ return err;
+}
+
+/**
* ice_get_priv_flags - report device private flags
* @netdev: network interface device structure
*
@@ -433,10 +1201,11 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
bitmap_xor(change_flags, pf->flags, orig_flags, ICE_PF_FLAGS_NBITS);
- if (test_bit(ICE_FLAG_DISABLE_FW_LLDP, change_flags)) {
- if (test_bit(ICE_FLAG_DISABLE_FW_LLDP, pf->flags)) {
+ if (test_bit(ICE_FLAG_ENABLE_FW_LLDP, change_flags)) {
+ if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags)) {
enum ice_status status;
+ /* Disable FW LLDP engine */
status = ice_aq_cfg_lldp_mib_change(&pf->hw, false,
NULL);
/* If unregistering for LLDP events fails, this is
@@ -450,7 +1219,7 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
/* The AQ call to stop the FW LLDP agent will generate
* an error if the agent is already stopped.
*/
- status = ice_aq_stop_lldp(&pf->hw, true, NULL);
+ status = ice_aq_stop_lldp(&pf->hw, true, true, NULL);
if (status)
dev_warn(&pf->pdev->dev,
"Fail to stop LLDP agent\n");
@@ -458,9 +1227,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
* will likely not need DCB, so failure to init is
* not a concern of ethtool
*/
- status = ice_init_pf_dcb(pf);
+ status = ice_init_pf_dcb(pf, true);
if (status)
dev_warn(&pf->pdev->dev, "Fail to init DCB\n");
+
+ /* Forward LLDP packets to default VSI so that they
+ * are passed up the stack
+ */
+ ice_cfg_sw_lldp(vsi, false, true);
} else {
enum ice_status status;
bool dcbx_agent_status;
@@ -468,12 +1242,12 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
/* AQ command to start FW LLDP agent will return an
* error if the agent is already started
*/
- status = ice_aq_start_lldp(&pf->hw, NULL);
+ status = ice_aq_start_lldp(&pf->hw, true, NULL);
if (status)
dev_warn(&pf->pdev->dev,
"Fail to start LLDP Agent\n");
- /* AQ command to start FW DCBx agent will fail if
+ /* AQ command to start FW DCBX agent will fail if
* the agent is already started
*/
status = ice_aq_start_stop_dcbx(&pf->hw, true,
@@ -491,15 +1265,14 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
* registration/init failed but do not return error
* state to ethtool
*/
- status = ice_aq_cfg_lldp_mib_change(&pf->hw, false,
- NULL);
- if (status)
- dev_dbg(&pf->pdev->dev,
- "Fail to reg for MIB change\n");
-
- status = ice_init_pf_dcb(pf);
+ status = ice_init_pf_dcb(pf, true);
if (status)
dev_dbg(&pf->pdev->dev, "Fail to init DCB\n");
+
+ /* Remove rule to direct LLDP packets to default VSI.
+ * The FW LLDP engine will now be consuming them.
+ */
+ ice_cfg_sw_lldp(vsi, false, false);
}
}
clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
@@ -529,6 +1302,8 @@ static int ice_get_sset_count(struct net_device *netdev, int sset)
* not safe.
*/
return ICE_ALL_STATS_LEN(netdev);
+ case ETH_SS_TEST:
+ return ICE_TEST_LEN;
case ETH_SS_PRIV_FLAGS:
return ICE_PRIV_FLAG_ARRAY_SIZE;
default:
@@ -628,7 +1403,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_100M_SGMII) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100baseT_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100MB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100MB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
100baseT_Full);
}
@@ -636,14 +1412,16 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_1G_SGMII) {
ethtool_link_ksettings_add_link_mode(ks, supported,
1000baseT_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
1000baseT_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_KX) {
ethtool_link_ksettings_add_link_mode(ks, supported,
1000baseKX_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
1000baseKX_Full);
}
@@ -651,14 +1429,16 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_1000BASE_LX) {
ethtool_link_ksettings_add_link_mode(ks, supported,
1000baseX_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_1000MB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
1000baseX_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_T) {
ethtool_link_ksettings_add_link_mode(ks, supported,
2500baseT_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
2500baseT_Full);
}
@@ -666,7 +1446,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_2500BASE_KX) {
ethtool_link_ksettings_add_link_mode(ks, supported,
2500baseX_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_2500MB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
2500baseX_Full);
}
@@ -674,7 +1455,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_5GBASE_KR) {
ethtool_link_ksettings_add_link_mode(ks, supported,
5000baseT_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_5GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_5GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
5000baseT_Full);
}
@@ -684,28 +1466,32 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_10G_SFI_C2C) {
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseT_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
10000baseT_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_KR_CR1) {
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseKR_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
10000baseKR_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_SR) {
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseSR_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
10000baseSR_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_10GBASE_LR) {
ethtool_link_ksettings_add_link_mode(ks, supported,
10000baseLR_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_10GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
10000baseLR_Full);
}
@@ -717,7 +1503,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_25G_AUI_C2C) {
ethtool_link_ksettings_add_link_mode(ks, supported,
25000baseCR_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
25000baseCR_Full);
}
@@ -725,7 +1512,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_LR) {
ethtool_link_ksettings_add_link_mode(ks, supported,
25000baseSR_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
25000baseSR_Full);
}
@@ -734,14 +1522,16 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_25GBASE_KR1) {
ethtool_link_ksettings_add_link_mode(ks, supported,
25000baseKR_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_25GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
25000baseKR_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_KR4) {
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseKR4_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
40000baseKR4_Full);
}
@@ -750,21 +1540,24 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_40G_XLAUI) {
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseCR4_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
40000baseCR4_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_SR4) {
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseSR4_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
40000baseSR4_Full);
}
if (phy_types_low & ICE_PHY_TYPE_LOW_40GBASE_LR4) {
ethtool_link_ksettings_add_link_mode(ks, supported,
40000baseLR4_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_40GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
40000baseLR4_Full);
}
@@ -779,7 +1572,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_50G_AUI1) {
ethtool_link_ksettings_add_link_mode(ks, supported,
50000baseCR2_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
50000baseCR2_Full);
}
@@ -787,7 +1581,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_KR_PAM4) {
ethtool_link_ksettings_add_link_mode(ks, supported,
50000baseKR2_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
50000baseKR2_Full);
}
@@ -797,7 +1592,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_50GBASE_LR) {
ethtool_link_ksettings_add_link_mode(ks, supported,
50000baseSR2_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_50GB)
ethtool_link_ksettings_add_link_mode(ks, advertising,
50000baseSR2_Full);
}
@@ -814,7 +1610,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_high & ICE_PHY_TYPE_HIGH_100G_AUI2) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100000baseCR4_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
need_add_adv_mode = true;
}
if (need_add_adv_mode) {
@@ -826,7 +1623,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_SR2) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100000baseSR4_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
need_add_adv_mode = true;
}
if (need_add_adv_mode) {
@@ -838,7 +1636,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_low & ICE_PHY_TYPE_LOW_100GBASE_DR) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100000baseLR4_ER4_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
need_add_adv_mode = true;
}
if (need_add_adv_mode) {
@@ -851,7 +1650,8 @@ ice_phy_type_to_ethtool(struct net_device *netdev,
phy_types_high & ICE_PHY_TYPE_HIGH_100GBASE_KR2_PAM4) {
ethtool_link_ksettings_add_link_mode(ks, supported,
100000baseKR4_Full);
- if (hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
+ if (!hw_link_info->req_speeds ||
+ hw_link_info->req_speeds & ICE_AQ_LINK_SPEED_100GB)
need_add_adv_mode = true;
}
if (need_add_adv_mode)
@@ -1275,6 +2075,7 @@ ice_get_link_ksettings(struct net_device *netdev,
struct ethtool_link_ksettings *ks)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_aqc_get_phy_caps_data *caps;
struct ice_link_status *hw_link_info;
struct ice_vsi *vsi = np->vsi;
@@ -1345,6 +2146,40 @@ ice_get_link_ksettings(struct net_device *netdev,
break;
}
+ caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+ if (!caps)
+ goto done;
+
+ if (ice_aq_get_phy_caps(vsi->port_info, false, ICE_AQC_REPORT_TOPO_CAP,
+ caps, NULL))
+ netdev_info(netdev, "Get phy capability failed.\n");
+
+ /* Set supported FEC modes based on PHY capability */
+ ethtool_link_ksettings_add_link_mode(ks, supported, FEC_NONE);
+
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_EN ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_CLAUSE74_EN)
+ ethtool_link_ksettings_add_link_mode(ks, supported, FEC_BASER);
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_CLAUSE91_EN)
+ ethtool_link_ksettings_add_link_mode(ks, supported, FEC_RS);
+
+ if (ice_aq_get_phy_caps(vsi->port_info, false, ICE_AQC_REPORT_SW_CFG,
+ caps, NULL))
+ netdev_info(netdev, "Get phy capability failed.\n");
+
+ /* Set advertised FEC modes based on PHY capability */
+ ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_NONE);
+
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+ ethtool_link_ksettings_add_link_mode(ks, advertising,
+ FEC_BASER);
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+ ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
+
+done:
+ devm_kfree(&vsi->back->pdev->dev, caps);
return 0;
}
@@ -2371,8 +3206,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec,
if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) {
rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high;
- wr32(&pf->hw, GLINT_RATE(vsi->hw_base_vector +
- rc->ring->q_vector->v_idx),
+ wr32(&pf->hw, GLINT_RATE(rc->ring->q_vector->reg_idx),
ice_intrl_usec_to_reg(ec->rx_coalesce_usecs_high,
pf->hw.intrl_gran));
}
@@ -2533,6 +3367,7 @@ static const struct ethtool_ops ice_ethtool_ops = {
.get_regs = ice_get_regs,
.get_msglevel = ice_get_msglevel,
.set_msglevel = ice_set_msglevel,
+ .self_test = ice_self_test,
.get_link = ethtool_op_get_link,
.get_eeprom_len = ice_get_eeprom_len,
.get_eeprom = ice_get_eeprom,
@@ -2557,6 +3392,8 @@ static const struct ethtool_ops ice_ethtool_ops = {
.get_ts_info = ethtool_op_get_ts_info,
.get_per_queue_coalesce = ice_get_per_q_coalesce,
.set_per_queue_coalesce = ice_set_per_q_coalesce,
+ .get_fecparam = ice_get_fecparam,
+ .set_fecparam = ice_set_fecparam,
};
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index ec25f26069b0..6c5ce05742b1 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -6,6 +6,9 @@
#ifndef _ICE_HW_AUTOGEN_H_
#define _ICE_HW_AUTOGEN_H_
+#define PF0INT_ITR_0(_i) (0x03000004 + ((_i) * 4096))
+#define PF0INT_ITR_1(_i) (0x03000008 + ((_i) * 4096))
+#define PF0INT_ITR_2(_i) (0x0300000C + ((_i) * 4096))
#define QTX_COMM_DBELL(_DBQM) (0x002C0000 + ((_DBQM) * 4))
#define QTX_COMM_HEAD(_DBQM) (0x000E0000 + ((_DBQM) * 4))
#define QTX_COMM_HEAD_HEAD_S 0
@@ -155,6 +158,7 @@
#define PFINT_OICR_HMC_ERR_M BIT(26)
#define PFINT_OICR_PE_CRITERR_M BIT(28)
#define PFINT_OICR_VFLR_M BIT(29)
+#define PFINT_OICR_SWINT_M BIT(31)
#define PFINT_OICR_CTL 0x0016CA80
#define PFINT_OICR_CTL_MSIX_INDX_M ICE_M(0x7FF, 0)
#define PFINT_OICR_CTL_ITR_INDX_S 11
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index fbf1eba0cc2a..35c5ff910afa 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -137,6 +137,8 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
* for PF or EMP this field should be set to zero
*/
switch (vsi->type) {
+ case ICE_VSI_LB:
+ /* fall through */
case ICE_VSI_PF:
tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
break;
@@ -251,6 +253,10 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi)
if (!vsi->rx_rings)
goto err_rxrings;
+ /* There is no need to allocate q_vectors for a loopback VSI. */
+ if (vsi->type == ICE_VSI_LB)
+ return 0;
+
/* allocate memory for q_vector pointers */
vsi->q_vectors = devm_kcalloc(&pf->pdev->dev, vsi->num_q_vectors,
sizeof(*vsi->q_vectors), GFP_KERNEL);
@@ -275,6 +281,8 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi)
{
switch (vsi->type) {
case ICE_VSI_PF:
+ /* fall through */
+ case ICE_VSI_LB:
vsi->num_rx_desc = ICE_DFLT_NUM_RX_DESC;
vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC;
break;
@@ -313,10 +321,14 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
vsi->alloc_rxq = vf->num_vf_qs;
/* pf->num_vf_msix includes (VF miscellaneous vector +
* data queue interrupts). Since vsi->num_q_vectors is number
- * of queues vectors, subtract 1 from the original vector
- * count
+ * of queues vectors, subtract 1 (ICE_NONQ_VECS_VF) from the
+ * original vector count
*/
- vsi->num_q_vectors = pf->num_vf_msix - 1;
+ vsi->num_q_vectors = pf->num_vf_msix - ICE_NONQ_VECS_VF;
+ break;
+ case ICE_VSI_LB:
+ vsi->alloc_txq = 1;
+ vsi->alloc_rxq = 1;
break;
default:
dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
@@ -516,6 +528,10 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type type, u16 vf_id)
if (ice_vsi_alloc_arrays(vsi))
goto err_rings;
break;
+ case ICE_VSI_LB:
+ if (ice_vsi_alloc_arrays(vsi))
+ goto err_rings;
+ break;
default:
dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
goto unlock_pf;
@@ -732,6 +748,8 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
BIT(cap->rss_table_entry_width));
vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI;
break;
+ case ICE_VSI_LB:
+ break;
default:
dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n",
vsi->type);
@@ -924,6 +942,9 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
break;
+ case ICE_VSI_LB:
+ dev_dbg(&pf->pdev->dev, "Unsupported VSI type %d\n", vsi->type);
+ return;
default:
dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
return;
@@ -955,6 +976,8 @@ static int ice_vsi_init(struct ice_vsi *vsi)
ctxt->info = vsi->info;
switch (vsi->type) {
+ case ICE_VSI_LB:
+ /* fall through */
case ICE_VSI_PF:
ctxt->flags = ICE_AQ_VSI_TYPE_PF;
break;
@@ -1145,61 +1168,32 @@ err_out:
static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
{
struct ice_pf *pf = vsi->back;
- int num_q_vectors = 0;
+ u16 num_q_vectors;
+
+ /* SRIOV doesn't grab irq_tracker entries for each VSI */
+ if (vsi->type == ICE_VSI_VF)
+ return 0;
- if (vsi->sw_base_vector || vsi->hw_base_vector) {
- dev_dbg(&pf->pdev->dev, "VSI %d has non-zero HW base vector %d or SW base vector %d\n",
- vsi->vsi_num, vsi->hw_base_vector, vsi->sw_base_vector);
+ if (vsi->base_vector) {
+ dev_dbg(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n",
+ vsi->vsi_num, vsi->base_vector);
return -EEXIST;
}
if (!test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
return -ENOENT;
- switch (vsi->type) {
- case ICE_VSI_PF:
- num_q_vectors = vsi->num_q_vectors;
- /* reserve slots from OS requested IRQs */
- vsi->sw_base_vector = ice_get_res(pf, pf->sw_irq_tracker,
- num_q_vectors, vsi->idx);
- if (vsi->sw_base_vector < 0) {
- dev_err(&pf->pdev->dev,
- "Failed to get tracking for %d SW vectors for VSI %d, err=%d\n",
- num_q_vectors, vsi->vsi_num,
- vsi->sw_base_vector);
- return -ENOENT;
- }
- pf->num_avail_sw_msix -= num_q_vectors;
-
- /* reserve slots from HW interrupts */
- vsi->hw_base_vector = ice_get_res(pf, pf->hw_irq_tracker,
- num_q_vectors, vsi->idx);
- break;
- case ICE_VSI_VF:
- /* take VF misc vector and data vectors into account */
- num_q_vectors = pf->num_vf_msix;
- /* For VF VSI, reserve slots only from HW interrupts */
- vsi->hw_base_vector = ice_get_res(pf, pf->hw_irq_tracker,
- num_q_vectors, vsi->idx);
- break;
- default:
- dev_warn(&pf->pdev->dev, "Unknown VSI type %d\n", vsi->type);
- break;
- }
-
- if (vsi->hw_base_vector < 0) {
+ num_q_vectors = vsi->num_q_vectors;
+ /* reserve slots from OS requested IRQs */
+ vsi->base_vector = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
+ vsi->idx);
+ if (vsi->base_vector < 0) {
dev_err(&pf->pdev->dev,
- "Failed to get tracking for %d HW vectors for VSI %d, err=%d\n",
- num_q_vectors, vsi->vsi_num, vsi->hw_base_vector);
- if (vsi->type != ICE_VSI_VF) {
- ice_free_res(pf->sw_irq_tracker,
- vsi->sw_base_vector, vsi->idx);
- pf->num_avail_sw_msix += num_q_vectors;
- }
+ "Failed to get tracking for %d vectors for VSI %d, err=%d\n",
+ num_q_vectors, vsi->vsi_num, vsi->base_vector);
return -ENOENT;
}
-
- pf->num_avail_hw_msix -= num_q_vectors;
+ pf->num_avail_sw_msix -= num_q_vectors;
return 0;
}
@@ -1842,8 +1836,73 @@ ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
}
/**
+ * ice_cfg_txq_interrupt - configure interrupt on Tx queue
+ * @vsi: the VSI being configured
+ * @txq: Tx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Tx queue by associating Tx queue to MSI-X vector
+ * within the function space.
+ */
+#ifdef CONFIG_PCI_IOV
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+#else
+static void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx)
+#endif /* CONFIG_PCI_IOV */
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 val;
+
+ itr_idx = (itr_idx << QINT_TQCTL_ITR_INDX_S) & QINT_TQCTL_ITR_INDX_M;
+
+ val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
+ ((msix_idx << QINT_TQCTL_MSIX_INDX_S) & QINT_TQCTL_MSIX_INDX_M);
+
+ wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+}
+
+/**
+ * ice_cfg_rxq_interrupt - configure interrupt on Rx queue
+ * @vsi: the VSI being configured
+ * @rxq: Rx queue being mapped to MSI-X vector
+ * @msix_idx: MSI-X vector index within the function
+ * @itr_idx: ITR index of the interrupt cause
+ *
+ * Configure interrupt on Rx queue by associating Rx queue to MSI-X vector
+ * within the function space.
+ */
+#ifdef CONFIG_PCI_IOV
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+#else
+static void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx)
+#endif /* CONFIG_PCI_IOV */
+{
+ struct ice_pf *pf = vsi->back;
+ struct ice_hw *hw = &pf->hw;
+ u32 val;
+
+ itr_idx = (itr_idx << QINT_RQCTL_ITR_INDX_S) & QINT_RQCTL_ITR_INDX_M;
+
+ val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
+ ((msix_idx << QINT_RQCTL_MSIX_INDX_S) & QINT_RQCTL_MSIX_INDX_M);
+
+ wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+
+ ice_flush(hw);
+}
+
+/**
* ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW
* @vsi: the VSI being configured
+ *
+ * This configures MSIX mode interrupts for the PF VSI, and should not be used
+ * for the VF VSI.
*/
void ice_vsi_cfg_msix(struct ice_vsi *vsi)
{
@@ -1873,43 +1932,17 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi)
* tracked for this PF.
*/
for (q = 0; q < q_vector->num_ring_tx; q++) {
- int itr_idx = (q_vector->tx.itr_idx <<
- QINT_TQCTL_ITR_INDX_S) &
- QINT_TQCTL_ITR_INDX_M;
- u32 val;
-
- if (vsi->type == ICE_VSI_VF)
- val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
- (((i + 1) << QINT_TQCTL_MSIX_INDX_S) &
- QINT_TQCTL_MSIX_INDX_M);
- else
- val = QINT_TQCTL_CAUSE_ENA_M | itr_idx |
- ((reg_idx << QINT_TQCTL_MSIX_INDX_S) &
- QINT_TQCTL_MSIX_INDX_M);
- wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), val);
+ ice_cfg_txq_interrupt(vsi, txq, reg_idx,
+ q_vector->tx.itr_idx);
txq++;
}
for (q = 0; q < q_vector->num_ring_rx; q++) {
- int itr_idx = (q_vector->rx.itr_idx <<
- QINT_RQCTL_ITR_INDX_S) &
- QINT_RQCTL_ITR_INDX_M;
- u32 val;
-
- if (vsi->type == ICE_VSI_VF)
- val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
- (((i + 1) << QINT_RQCTL_MSIX_INDX_S) &
- QINT_RQCTL_MSIX_INDX_M);
- else
- val = QINT_RQCTL_CAUSE_ENA_M | itr_idx |
- ((reg_idx << QINT_RQCTL_MSIX_INDX_S) &
- QINT_RQCTL_MSIX_INDX_M);
- wr32(hw, QINT_RQCTL(vsi->rxq_map[rxq]), val);
+ ice_cfg_rxq_interrupt(vsi, rxq, reg_idx,
+ q_vector->rx.itr_idx);
rxq++;
}
}
-
- ice_flush(hw);
}
/**
@@ -2024,6 +2057,19 @@ int ice_vsi_stop_rx_rings(struct ice_vsi *vsi)
}
/**
+ * ice_trigger_sw_intr - trigger a software interrupt
+ * @hw: pointer to the HW structure
+ * @q_vector: interrupt vector to trigger the software interrupt for
+ */
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector)
+{
+ wr32(hw, GLINT_DYN_CTL(q_vector->reg_idx),
+ (ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S) |
+ GLINT_DYN_CTL_SWINT_TRIG_M |
+ GLINT_DYN_CTL_INTENA_M);
+}
+
+/**
* ice_vsi_stop_tx_rings - Disable Tx rings
* @vsi: the VSI being configured
* @rst_src: reset source
@@ -2070,8 +2116,9 @@ ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
break;
for (i = 0; i < vsi->tc_cfg.tc_info[tc].qcount_tx; i++) {
- if (!rings || !rings[q_idx] ||
- !rings[q_idx]->q_vector) {
+ struct ice_q_vector *q_vector;
+
+ if (!rings || !rings[q_idx]) {
err = -EINVAL;
goto err_out;
}
@@ -2091,9 +2138,10 @@ ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
/* trigger a software interrupt for the vector
* associated to the queue to schedule NAPI handler
*/
- wr32(hw, GLINT_DYN_CTL(rings[i]->q_vector->reg_idx),
- GLINT_DYN_CTL_SWINT_TRIG_M |
- GLINT_DYN_CTL_INTENA_MSK_M);
+ q_vector = rings[i]->q_vector;
+ if (q_vector)
+ ice_trigger_sw_intr(hw, q_vector);
+
q_idx++;
}
status = ice_dis_vsi_txq(vsi->port_info, vsi->idx, tc,
@@ -2234,7 +2282,14 @@ ice_vsi_set_q_vectors_reg_idx(struct ice_vsi *vsi)
goto clear_reg_idx;
}
- q_vector->reg_idx = q_vector->v_idx + vsi->hw_base_vector;
+ if (vsi->type == ICE_VSI_VF) {
+ struct ice_vf *vf = &vsi->back->vf[vsi->vf_id];
+
+ q_vector->reg_idx = ice_calc_vf_reg_idx(vf, q_vector);
+ } else {
+ q_vector->reg_idx =
+ q_vector->v_idx + vsi->base_vector;
+ }
}
return 0;
@@ -2290,6 +2345,56 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule)
ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
}
+#define ICE_ETH_P_LLDP 0x88CC
+
+/**
+ * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling
+ * @vsi: the VSI being configured
+ * @tx: bool to determine Tx or Rx rule
+ * @create: bool to determine create or remove Rule
+ */
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
+{
+ struct ice_fltr_list_entry *list;
+ struct ice_pf *pf = vsi->back;
+ LIST_HEAD(tmp_add_list);
+ enum ice_status status;
+
+ list = devm_kzalloc(&pf->pdev->dev, sizeof(*list), GFP_KERNEL);
+ if (!list)
+ return;
+
+ list->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
+ list->fltr_info.vsi_handle = vsi->idx;
+ list->fltr_info.l_data.ethertype_mac.ethertype = ICE_ETH_P_LLDP;
+
+ if (tx) {
+ list->fltr_info.fltr_act = ICE_DROP_PACKET;
+ list->fltr_info.flag = ICE_FLTR_TX;
+ list->fltr_info.src_id = ICE_SRC_ID_VSI;
+ } else {
+ list->fltr_info.fltr_act = ICE_FWD_TO_VSI;
+ list->fltr_info.flag = ICE_FLTR_RX;
+ list->fltr_info.src_id = ICE_SRC_ID_LPORT;
+ }
+
+ INIT_LIST_HEAD(&list->list_entry);
+ list_add(&list->list_entry, &tmp_add_list);
+
+ if (create)
+ status = ice_add_eth_mac(&pf->hw, &tmp_add_list);
+ else
+ status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
+
+ if (status)
+ dev_err(&pf->pdev->dev,
+ "Fail %s %s LLDP rule on VSI %i error: %d\n",
+ create ? "adding" : "removing", tx ? "TX" : "RX",
+ vsi->vsi_num, status);
+
+ ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+}
+
/**
* ice_vsi_setup - Set up a VSI by a given type
* @pf: board private structure
@@ -2310,6 +2415,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
struct device *dev = &pf->pdev->dev;
+ enum ice_status status;
struct ice_vsi *vsi;
int ret, i;
@@ -2389,23 +2495,24 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
if (ret)
goto unroll_alloc_q_vector;
- /* Setup Vector base only during VF init phase or when VF asks
- * for more vectors than assigned number. In all other cases,
- * assign hw_base_vector to the value given earlier.
- */
- if (test_bit(ICE_VF_STATE_CFG_INTR, pf->vf[vf_id].vf_states)) {
- ret = ice_vsi_setup_vector_base(vsi);
- if (ret)
- goto unroll_vector_base;
- } else {
- vsi->hw_base_vector = pf->vf[vf_id].first_vector_idx;
- }
ret = ice_vsi_set_q_vectors_reg_idx(vsi);
if (ret)
goto unroll_vector_base;
pf->q_left_tx -= vsi->alloc_txq;
pf->q_left_rx -= vsi->alloc_rxq;
+
+ /* Do not exit if configuring RSS had an issue, at least
+ * receive traffic on first queue. Hence no need to capture
+ * return value
+ */
+ if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+ ice_vsi_cfg_rss_lut_key(vsi);
+ break;
+ case ICE_VSI_LB:
+ ret = ice_vsi_alloc_rings(vsi);
+ if (ret)
+ goto unroll_vsi_init;
break;
default:
/* clean up the resources and exit */
@@ -2416,12 +2523,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
for (i = 0; i < vsi->tc_cfg.numtc; i++)
max_txqs[i] = pf->num_lan_tx;
- ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
- max_txqs);
- if (ret) {
+ status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+ max_txqs);
+ if (status) {
dev_err(&pf->pdev->dev,
"VSI %d failed lan queue config, error %d\n",
- vsi->vsi_num, ret);
+ vsi->vsi_num, status);
goto unroll_vector_base;
}
@@ -2430,19 +2537,28 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
* out PAUSE or PFC frames. If enabled, FW can still send FC frames.
* The rule is added once for PF VSI in order to create appropriate
* recipe, since VSI/VSI list is ignored with drop action...
+ * Also add rules to handle LLDP Tx and Rx packets. Tx LLDP packets
+ * need to be dropped so that VFs cannot send LLDP packets to reconfig
+ * DCB settings in the HW. Also, if the FW DCBX engine is not running
+ * then Rx LLDP packets need to be redirected up the stack.
*/
- if (vsi->type == ICE_VSI_PF)
+ if (vsi->type == ICE_VSI_PF) {
ice_vsi_add_rem_eth_mac(vsi, true);
+ /* Tx LLDP packets */
+ ice_cfg_sw_lldp(vsi, true, true);
+
+ /* Rx LLDP packets */
+ if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags))
+ ice_cfg_sw_lldp(vsi, false, true);
+ }
+
return vsi;
unroll_vector_base:
/* reclaim SW interrupts back to the common pool */
- ice_free_res(pf->sw_irq_tracker, vsi->sw_base_vector, vsi->idx);
+ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
pf->num_avail_sw_msix += vsi->num_q_vectors;
- /* reclaim HW interrupt back to the common pool */
- ice_free_res(pf->hw_irq_tracker, vsi->hw_base_vector, vsi->idx);
- pf->num_avail_hw_msix += vsi->num_q_vectors;
unroll_alloc_q_vector:
ice_vsi_free_q_vectors(vsi);
unroll_vsi_init:
@@ -2463,17 +2579,17 @@ unroll_get_qs:
static void ice_vsi_release_msix(struct ice_vsi *vsi)
{
struct ice_pf *pf = vsi->back;
- u16 vector = vsi->hw_base_vector;
struct ice_hw *hw = &pf->hw;
u32 txq = 0;
u32 rxq = 0;
int i, q;
- for (i = 0; i < vsi->num_q_vectors; i++, vector++) {
+ for (i = 0; i < vsi->num_q_vectors; i++) {
struct ice_q_vector *q_vector = vsi->q_vectors[i];
+ u16 reg_idx = q_vector->reg_idx;
- wr32(hw, GLINT_ITR(ICE_IDX_ITR0, vector), 0);
- wr32(hw, GLINT_ITR(ICE_IDX_ITR1, vector), 0);
+ wr32(hw, GLINT_ITR(ICE_IDX_ITR0, reg_idx), 0);
+ wr32(hw, GLINT_ITR(ICE_IDX_ITR1, reg_idx), 0);
for (q = 0; q < q_vector->num_ring_tx; q++) {
wr32(hw, QINT_TQCTL(vsi->txq_map[txq]), 0);
txq++;
@@ -2495,7 +2611,7 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi)
void ice_vsi_free_irq(struct ice_vsi *vsi)
{
struct ice_pf *pf = vsi->back;
- int base = vsi->sw_base_vector;
+ int base = vsi->base_vector;
if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags)) {
int i;
@@ -2591,11 +2707,11 @@ int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
int count = 0;
int i;
- if (!res || index >= res->num_entries)
+ if (!res || index >= res->end)
return -EINVAL;
id |= ICE_RES_VALID_BIT;
- for (i = index; i < res->num_entries && res->list[i] == id; i++) {
+ for (i = index; i < res->end && res->list[i] == id; i++) {
res->list[i] = 0;
count++;
}
@@ -2613,10 +2729,9 @@ int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
*/
static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
{
- int start = res->search_hint;
- int end = start;
+ int start = 0, end = 0;
- if ((start + needed) > res->num_entries)
+ if (needed > res->end)
return -ENOMEM;
id |= ICE_RES_VALID_BIT;
@@ -2625,7 +2740,7 @@ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
/* skip already allocated entries */
if (res->list[end++] & ICE_RES_VALID_BIT) {
start = end;
- if ((start + needed) > res->num_entries)
+ if ((start + needed) > res->end)
break;
}
@@ -2636,13 +2751,9 @@ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
while (i != end)
res->list[i++] = id;
- if (end == res->num_entries)
- end = 0;
-
- res->search_hint = end;
return start;
}
- } while (1);
+ } while (end < res->end);
return -ENOMEM;
}
@@ -2654,16 +2765,11 @@ static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
* @needed: size of the block needed
* @id: identifier to track owner
*
- * Returns the base item index of the block, or -ENOMEM for error
- * The search_hint trick and lack of advanced fit-finding only works
- * because we're highly likely to have all the same sized requests.
- * Linear search time and any fragmentation should be minimal.
+ * Returns the base item index of the block, or negative for error
*/
int
ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
{
- int ret;
-
if (!res || !pf)
return -EINVAL;
@@ -2674,16 +2780,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
return -EINVAL;
}
- /* search based on search_hint */
- ret = ice_search_res(res, needed, id);
-
- if (ret < 0) {
- /* previous search failed. Reset search hint and try again */
- res->search_hint = 0;
- ret = ice_search_res(res, needed, id);
- }
-
- return ret;
+ return ice_search_res(res, needed, id);
}
/**
@@ -2692,7 +2789,7 @@ ice_get_res(struct ice_pf *pf, struct ice_res_tracker *res, u16 needed, u16 id)
*/
void ice_vsi_dis_irq(struct ice_vsi *vsi)
{
- int base = vsi->sw_base_vector;
+ int base = vsi->base_vector;
struct ice_pf *pf = vsi->back;
struct ice_hw *hw = &pf->hw;
u32 val;
@@ -2738,6 +2835,21 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi)
}
/**
+ * ice_napi_del - Remove NAPI handler for the VSI
+ * @vsi: VSI for which NAPI handler is to be removed
+ */
+void ice_napi_del(struct ice_vsi *vsi)
+{
+ int v_idx;
+
+ if (!vsi->netdev)
+ return;
+
+ ice_for_each_q_vector(vsi, v_idx)
+ netif_napi_del(&vsi->q_vectors[v_idx]->napi);
+}
+
+/**
* ice_vsi_release - Delete a VSI and free its resources
* @vsi: the VSI being removed
*
@@ -2745,60 +2857,61 @@ void ice_vsi_dis_irq(struct ice_vsi *vsi)
*/
int ice_vsi_release(struct ice_vsi *vsi)
{
- struct ice_vf *vf = NULL;
struct ice_pf *pf;
if (!vsi->back)
return -ENODEV;
pf = vsi->back;
- if (vsi->type == ICE_VSI_VF)
- vf = &pf->vf[vsi->vf_id];
- /* do not unregister and free netdevs while driver is in the reset
- * recovery pending state. Since reset/rebuild happens through PF
- * service task workqueue, its not a good idea to unregister netdev
- * that is associated to the PF that is running the work queue items
- * currently. This is done to avoid check_flush_dependency() warning
- * on this wq
+ /* do not unregister while driver is in the reset recovery pending
+ * state. Since reset/rebuild happens through PF service task workqueue,
+ * it's not a good idea to unregister netdev that is associated to the
+ * PF that is running the work queue items currently. This is done to
+ * avoid check_flush_dependency() warning on this wq
*/
- if (vsi->netdev && !ice_is_reset_in_progress(pf->state)) {
- ice_napi_del(vsi);
+ if (vsi->netdev && !ice_is_reset_in_progress(pf->state))
unregister_netdev(vsi->netdev);
- free_netdev(vsi->netdev);
- vsi->netdev = NULL;
- }
if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
ice_rss_clean(vsi);
/* Disable VSI and free resources */
- ice_vsi_dis_irq(vsi);
+ if (vsi->type != ICE_VSI_LB)
+ ice_vsi_dis_irq(vsi);
ice_vsi_close(vsi);
- /* reclaim interrupt vectors back to PF */
+ /* SR-IOV determines needed MSIX resources all at once instead of per
+ * VSI since when VFs are spawned we know how many VFs there are and how
+ * many interrupts each VF needs. SR-IOV MSIX resources are also
+ * cleared in the same manner.
+ */
if (vsi->type != ICE_VSI_VF) {
/* reclaim SW interrupts back to the common pool */
- ice_free_res(pf->sw_irq_tracker, vsi->sw_base_vector, vsi->idx);
+ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
pf->num_avail_sw_msix += vsi->num_q_vectors;
- /* reclaim HW interrupts back to the common pool */
- ice_free_res(pf->hw_irq_tracker, vsi->hw_base_vector, vsi->idx);
- pf->num_avail_hw_msix += vsi->num_q_vectors;
- } else if (test_bit(ICE_VF_STATE_CFG_INTR, vf->vf_states)) {
- /* Reclaim VF resources back only while freeing all VFs or
- * vector reassignment is requested
- */
- ice_free_res(pf->hw_irq_tracker, vf->first_vector_idx,
- vsi->idx);
- pf->num_avail_hw_msix += pf->num_vf_msix;
}
- if (vsi->type == ICE_VSI_PF)
+ if (vsi->type == ICE_VSI_PF) {
ice_vsi_add_rem_eth_mac(vsi, false);
+ ice_cfg_sw_lldp(vsi, true, false);
+ /* The Rx rule will only exist to remove if the LLDP FW
+ * engine is currently stopped
+ */
+ if (!test_bit(ICE_FLAG_ENABLE_FW_LLDP, pf->flags))
+ ice_cfg_sw_lldp(vsi, false, false);
+ }
ice_remove_vsi_fltr(&pf->hw, vsi->idx);
ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
ice_vsi_delete(vsi);
ice_vsi_free_q_vectors(vsi);
+
+ /* make sure unregister_netdev() was called by checking __ICE_DOWN */
+ if (vsi->netdev && test_bit(__ICE_DOWN, vsi->state)) {
+ free_netdev(vsi->netdev);
+ vsi->netdev = NULL;
+ }
+
ice_vsi_clear_rings(vsi);
ice_vsi_put_qs(vsi);
@@ -2825,6 +2938,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
struct ice_vf *vf = NULL;
+ enum ice_status status;
struct ice_pf *pf;
int ret, i;
@@ -2838,24 +2952,17 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
ice_vsi_free_q_vectors(vsi);
+ /* SR-IOV determines needed MSIX resources all at once instead of per
+ * VSI since when VFs are spawned we know how many VFs there are and how
+ * many interrupts each VF needs. SR-IOV MSIX resources are also
+ * cleared in the same manner.
+ */
if (vsi->type != ICE_VSI_VF) {
/* reclaim SW interrupts back to the common pool */
- ice_free_res(pf->sw_irq_tracker, vsi->sw_base_vector, vsi->idx);
+ ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
pf->num_avail_sw_msix += vsi->num_q_vectors;
- vsi->sw_base_vector = 0;
- /* reclaim HW interrupts back to the common pool */
- ice_free_res(pf->hw_irq_tracker, vsi->hw_base_vector,
- vsi->idx);
- pf->num_avail_hw_msix += vsi->num_q_vectors;
- } else {
- /* Reclaim VF resources back to the common pool for reset and
- * and rebuild, with vector reassignment
- */
- ice_free_res(pf->hw_irq_tracker, vf->first_vector_idx,
- vsi->idx);
- pf->num_avail_hw_msix += pf->num_vf_msix;
+ vsi->base_vector = 0;
}
- vsi->hw_base_vector = 0;
ice_vsi_clear_rings(vsi);
ice_vsi_free_arrays(vsi);
@@ -2881,10 +2988,6 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
if (ret)
goto err_rings;
- ret = ice_vsi_setup_vector_base(vsi);
- if (ret)
- goto err_vectors;
-
ret = ice_vsi_set_q_vectors_reg_idx(vsi);
if (ret)
goto err_vectors;
@@ -2929,12 +3032,12 @@ int ice_vsi_rebuild(struct ice_vsi *vsi)
for (i = 0; i < vsi->tc_cfg.numtc; i++)
max_txqs[i] = pf->num_lan_tx;
- ret = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
- max_txqs);
- if (ret) {
+ status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
+ max_txqs);
+ if (status) {
dev_err(&pf->pdev->dev,
"VSI %d failed lan queue config, error %d\n",
- vsi->vsi_num, ret);
+ vsi->vsi_num, status);
goto err_vectors;
}
return 0;
@@ -2956,7 +3059,7 @@ err_vsi:
/**
* ice_is_reset_in_progress - check for a reset in progress
- * @state: pf state field
+ * @state: PF state field
*/
bool ice_is_reset_in_progress(unsigned long *state)
{
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index a91d3553cc89..6e43ef03bfc3 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -19,6 +19,14 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
void ice_vsi_cfg_msix(struct ice_vsi *vsi);
+#ifdef CONFIG_PCI_IOV
+void
+ice_cfg_txq_interrupt(struct ice_vsi *vsi, u16 txq, u16 msix_idx, u16 itr_idx);
+
+void
+ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx);
+#endif /* CONFIG_PCI_IOV */
+
int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid);
int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid);
@@ -37,6 +45,8 @@ ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc);
+void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create);
+
void ice_vsi_delete(struct ice_vsi *vsi);
int ice_vsi_clear(struct ice_vsi *vsi);
@@ -49,6 +59,8 @@ struct ice_vsi *
ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
enum ice_vsi_type type, u16 vf_id);
+void ice_napi_del(struct ice_vsi *vsi);
+
int ice_vsi_release(struct ice_vsi *vsi);
void ice_vsi_close(struct ice_vsi *vsi);
@@ -64,6 +76,8 @@ bool ice_is_reset_in_progress(unsigned long *state);
void ice_vsi_free_q_vectors(struct ice_vsi *vsi);
+void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector);
+
void ice_vsi_put_qs(struct ice_vsi *vsi);
#ifdef CONFIG_DCB
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 7843abf4d44d..28ec0d57941d 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -61,9 +61,10 @@ static u32 ice_get_tx_pending(struct ice_ring *ring)
static void ice_check_for_hang_subtask(struct ice_pf *pf)
{
struct ice_vsi *vsi = NULL;
+ struct ice_hw *hw;
unsigned int i;
- u32 v, v_idx;
int packets;
+ u32 v;
ice_for_each_vsi(pf, v)
if (pf->vsi[v] && pf->vsi[v]->type == ICE_VSI_PF) {
@@ -77,12 +78,12 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)
if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
return;
+ hw = &vsi->back->hw;
+
for (i = 0; i < vsi->num_txq; i++) {
struct ice_ring *tx_ring = vsi->tx_rings[i];
if (tx_ring && tx_ring->desc) {
- int itr = ICE_ITR_NONE;
-
/* If packet counter has not changed the queue is
* likely stalled, so force an interrupt for this
* queue.
@@ -93,12 +94,7 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)
packets = tx_ring->stats.pkts & INT_MAX;
if (tx_ring->tx_stats.prev_pkt == packets) {
/* Trigger sw interrupt to revive the queue */
- v_idx = tx_ring->q_vector->v_idx;
- wr32(&vsi->back->hw,
- GLINT_DYN_CTL(vsi->hw_base_vector + v_idx),
- (itr << GLINT_DYN_CTL_ITR_INDX_S) |
- GLINT_DYN_CTL_SWINT_TRIG_M |
- GLINT_DYN_CTL_INTENA_MSK_M);
+ ice_trigger_sw_intr(hw, tx_ring->q_vector);
continue;
}
@@ -113,6 +109,67 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)
}
/**
+ * ice_init_mac_fltr - Set initial MAC filters
+ * @pf: board private structure
+ *
+ * Set initial set of MAC filters for PF VSI; configure filters for permanent
+ * address and broadcast address. If an error is encountered, netdevice will be
+ * unregistered.
+ */
+static int ice_init_mac_fltr(struct ice_pf *pf)
+{
+ LIST_HEAD(tmp_add_list);
+ u8 broadcast[ETH_ALEN];
+ struct ice_vsi *vsi;
+ int status;
+
+ vsi = ice_find_vsi_by_type(pf, ICE_VSI_PF);
+ if (!vsi)
+ return -EINVAL;
+
+ /* To add a MAC filter, first add the MAC to a list and then
+ * pass the list to ice_add_mac.
+ */
+
+ /* Add a unicast MAC filter so the VSI can get its packets */
+ status = ice_add_mac_to_list(vsi, &tmp_add_list,
+ vsi->port_info->mac.perm_addr);
+ if (status)
+ goto unregister;
+
+ /* VSI needs to receive broadcast traffic, so add the broadcast
+ * MAC address to the list as well.
+ */
+ eth_broadcast_addr(broadcast);
+ status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast);
+ if (status)
+ goto free_mac_list;
+
+ /* Program MAC filters for entries in tmp_add_list */
+ status = ice_add_mac(&pf->hw, &tmp_add_list);
+ if (status)
+ status = -ENOMEM;
+
+free_mac_list:
+ ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
+
+unregister:
+ /* We aren't useful with no MAC filters, so unregister if we
+ * had an error
+ */
+ if (status && vsi->netdev->reg_state == NETREG_REGISTERED) {
+ dev_err(&pf->pdev->dev,
+ "Could not add MAC filters error %d. Unregistering device\n",
+ status);
+ unregister_netdev(vsi->netdev);
+ free_netdev(vsi->netdev);
+ vsi->netdev = NULL;
+ }
+
+ return status;
+}
+
+/**
* ice_add_mac_to_sync_list - creates list of MAC addresses to be synced
* @netdev: the net device on which the sync is happening
* @addr: MAC address to sync
@@ -567,7 +624,11 @@ static void ice_reset_subtask(struct ice_pf *pf)
*/
void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
{
+ struct ice_aqc_get_phy_caps_data *caps;
+ enum ice_status status;
+ const char *fec_req;
const char *speed;
+ const char *fec;
const char *fc;
if (!vsi)
@@ -584,6 +645,12 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
}
switch (vsi->port_info->phy.link_info.link_speed) {
+ case ICE_AQ_LINK_SPEED_100GB:
+ speed = "100 G";
+ break;
+ case ICE_AQ_LINK_SPEED_50GB:
+ speed = "50 G";
+ break;
case ICE_AQ_LINK_SPEED_40GB:
speed = "40 G";
break;
@@ -615,13 +682,13 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
switch (vsi->port_info->fc.current_mode) {
case ICE_FC_FULL:
- fc = "RX/TX";
+ fc = "Rx/Tx";
break;
case ICE_FC_TX_PAUSE:
- fc = "TX";
+ fc = "Tx";
break;
case ICE_FC_RX_PAUSE:
- fc = "RX";
+ fc = "Rx";
break;
case ICE_FC_NONE:
fc = "None";
@@ -631,8 +698,47 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup)
break;
}
- netdev_info(vsi->netdev, "NIC Link is up %sbps, Flow Control: %s\n",
- speed, fc);
+ /* Get FEC mode based on negotiated link info */
+ switch (vsi->port_info->phy.link_info.fec_info) {
+ case ICE_AQ_LINK_25G_RS_528_FEC_EN:
+ /* fall through */
+ case ICE_AQ_LINK_25G_RS_544_FEC_EN:
+ fec = "RS-FEC";
+ break;
+ case ICE_AQ_LINK_25G_KR_FEC_EN:
+ fec = "FC-FEC/BASE-R";
+ break;
+ default:
+ fec = "NONE";
+ break;
+ }
+
+ /* Get FEC mode requested based on PHY caps last SW configuration */
+ caps = devm_kzalloc(&vsi->back->pdev->dev, sizeof(*caps), GFP_KERNEL);
+ if (!caps) {
+ fec_req = "Unknown";
+ goto done;
+ }
+
+ status = ice_aq_get_phy_caps(vsi->port_info, false,
+ ICE_AQC_REPORT_SW_CFG, caps, NULL);
+ if (status)
+ netdev_info(vsi->netdev, "Get phy capability failed.\n");
+
+ if (caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_528_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_RS_544_REQ)
+ fec_req = "RS-FEC";
+ else if (caps->link_fec_options & ICE_AQC_PHY_FEC_10G_KR_40G_KR4_REQ ||
+ caps->link_fec_options & ICE_AQC_PHY_FEC_25G_KR_REQ)
+ fec_req = "FC-FEC/BASE-R";
+ else
+ fec_req = "NONE";
+
+ devm_kfree(&vsi->back->pdev->dev, caps);
+
+done:
+ netdev_info(vsi->netdev, "NIC Link is up %sbps, Requested FEC: %s, FEC: %s, Flow Control: %s\n",
+ speed, fec_req, fec, fc);
}
/**
@@ -664,7 +770,7 @@ static void ice_vsi_link_event(struct ice_vsi *vsi, bool link_up)
/**
* ice_link_event - process the link event
- * @pf: pf that the link event is associated with
+ * @pf: PF that the link event is associated with
* @pi: port_info for the port that the link event is associated with
* @link_up: true if the physical link is up and false if it is down
* @link_speed: current link speed received from the link event
@@ -774,7 +880,7 @@ static int ice_init_link_events(struct ice_port_info *pi)
/**
* ice_handle_link_event - handle link event via ARQ
- * @pf: pf that the link event is associated with
+ * @pf: PF that the link event is associated with
* @event: event structure containing link status info
*/
static int
@@ -1161,16 +1267,16 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
}
}
- /* see if one of the VFs needs to be reset */
- for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
+ /* check to see if one of the VFs caused the MDD */
+ for (i = 0; i < pf->num_alloc_vfs; i++) {
struct ice_vf *vf = &pf->vf[i];
- mdd_detected = false;
+ bool vf_mdd_detected = false;
reg = rd32(hw, VP_MDET_TX_PQM(i));
if (reg & VP_MDET_TX_PQM_VALID_M) {
wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF);
- mdd_detected = true;
+ vf_mdd_detected = true;
dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
i);
}
@@ -1178,7 +1284,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
reg = rd32(hw, VP_MDET_TX_TCLAN(i));
if (reg & VP_MDET_TX_TCLAN_VALID_M) {
wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF);
- mdd_detected = true;
+ vf_mdd_detected = true;
dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
i);
}
@@ -1186,7 +1292,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
reg = rd32(hw, VP_MDET_TX_TDPU(i));
if (reg & VP_MDET_TX_TDPU_VALID_M) {
wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF);
- mdd_detected = true;
+ vf_mdd_detected = true;
dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
i);
}
@@ -1194,19 +1300,18 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
reg = rd32(hw, VP_MDET_RX(i));
if (reg & VP_MDET_RX_VALID_M) {
wr32(hw, VP_MDET_RX(i), 0xFFFF);
- mdd_detected = true;
+ vf_mdd_detected = true;
dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
i);
}
- if (mdd_detected) {
+ if (vf_mdd_detected) {
vf->num_mdd_events++;
- dev_info(&pf->pdev->dev,
- "Use PF Control I/F to re-enable the VF\n");
- set_bit(ICE_VF_STATE_DIS, vf->vf_states);
+ if (vf->num_mdd_events > 1)
+ dev_info(&pf->pdev->dev, "VF %d has had %llu MDD events since last boot\n",
+ i, vf->num_mdd_events);
}
}
-
}
/**
@@ -1327,7 +1432,7 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
{
int q_vectors = vsi->num_q_vectors;
struct ice_pf *pf = vsi->back;
- int base = vsi->sw_base_vector;
+ int base = vsi->base_vector;
int rx_int_idx = 0;
int tx_int_idx = 0;
int vector, err;
@@ -1408,7 +1513,7 @@ static void ice_ena_misc_vector(struct ice_pf *pf)
wr32(hw, PFINT_OICR_ENA, val);
/* SW_ITR_IDX = 0, but don't change INTENA */
- wr32(hw, GLINT_DYN_CTL(pf->hw_oicr_idx),
+ wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
GLINT_DYN_CTL_SW_ITR_INDX_M | GLINT_DYN_CTL_INTENA_MSK_M);
}
@@ -1430,6 +1535,11 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
oicr = rd32(hw, PFINT_OICR);
ena_mask = rd32(hw, PFINT_OICR_ENA);
+ if (oicr & PFINT_OICR_SWINT_M) {
+ ena_mask &= ~PFINT_OICR_SWINT_M;
+ pf->sw_int_count++;
+ }
+
if (oicr & PFINT_OICR_MAL_DETECT_M) {
ena_mask &= ~PFINT_OICR_MAL_DETECT_M;
set_bit(__ICE_MDD_EVENT_PENDING, pf->state);
@@ -1556,15 +1666,13 @@ static void ice_free_irq_msix_misc(struct ice_pf *pf)
ice_flush(hw);
if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags) && pf->msix_entries) {
- synchronize_irq(pf->msix_entries[pf->sw_oicr_idx].vector);
+ synchronize_irq(pf->msix_entries[pf->oicr_idx].vector);
devm_free_irq(&pf->pdev->dev,
- pf->msix_entries[pf->sw_oicr_idx].vector, pf);
+ pf->msix_entries[pf->oicr_idx].vector, pf);
}
pf->num_avail_sw_msix += 1;
- ice_free_res(pf->sw_irq_tracker, pf->sw_oicr_idx, ICE_RES_MISC_VEC_ID);
- pf->num_avail_hw_msix += 1;
- ice_free_res(pf->hw_irq_tracker, pf->hw_oicr_idx, ICE_RES_MISC_VEC_ID);
+ ice_free_res(pf->irq_tracker, pf->oicr_idx, ICE_RES_MISC_VEC_ID);
}
/**
@@ -1618,43 +1726,31 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
if (ice_is_reset_in_progress(pf->state))
goto skip_req_irq;
- /* reserve one vector in sw_irq_tracker for misc interrupts */
- oicr_idx = ice_get_res(pf, pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+ /* reserve one vector in irq_tracker for misc interrupts */
+ oicr_idx = ice_get_res(pf, pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
if (oicr_idx < 0)
return oicr_idx;
pf->num_avail_sw_msix -= 1;
- pf->sw_oicr_idx = oicr_idx;
-
- /* reserve one vector in hw_irq_tracker for misc interrupts */
- oicr_idx = ice_get_res(pf, pf->hw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
- if (oicr_idx < 0) {
- ice_free_res(pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
- pf->num_avail_sw_msix += 1;
- return oicr_idx;
- }
- pf->num_avail_hw_msix -= 1;
- pf->hw_oicr_idx = oicr_idx;
+ pf->oicr_idx = oicr_idx;
err = devm_request_irq(&pf->pdev->dev,
- pf->msix_entries[pf->sw_oicr_idx].vector,
+ pf->msix_entries[pf->oicr_idx].vector,
ice_misc_intr, 0, pf->int_name, pf);
if (err) {
dev_err(&pf->pdev->dev,
"devm_request_irq for %s failed: %d\n",
pf->int_name, err);
- ice_free_res(pf->sw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
+ ice_free_res(pf->irq_tracker, 1, ICE_RES_MISC_VEC_ID);
pf->num_avail_sw_msix += 1;
- ice_free_res(pf->hw_irq_tracker, 1, ICE_RES_MISC_VEC_ID);
- pf->num_avail_hw_msix += 1;
return err;
}
skip_req_irq:
ice_ena_misc_vector(pf);
- ice_ena_ctrlq_interrupts(hw, pf->hw_oicr_idx);
- wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->hw_oicr_idx),
+ ice_ena_ctrlq_interrupts(hw, pf->oicr_idx);
+ wr32(hw, GLINT_ITR(ICE_RX_ITR, pf->oicr_idx),
ITR_REG_ALIGN(ICE_ITR_8K) >> ICE_ITR_GRAN_S);
ice_flush(hw);
@@ -1664,21 +1760,6 @@ skip_req_irq:
}
/**
- * ice_napi_del - Remove NAPI handler for the VSI
- * @vsi: VSI for which NAPI handler is to be removed
- */
-void ice_napi_del(struct ice_vsi *vsi)
-{
- int v_idx;
-
- if (!vsi->netdev)
- return;
-
- ice_for_each_q_vector(vsi, v_idx)
- netif_napi_del(&vsi->q_vectors[v_idx]->napi);
-}
-
-/**
* ice_napi_add - register NAPI handler for the VSI
* @vsi: VSI for which NAPI handler is to be registered
*
@@ -1803,8 +1884,8 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size)
* @pf: board private structure
* @pi: pointer to the port_info instance
*
- * Returns pointer to the successfully allocated VSI sw struct on success,
- * otherwise returns NULL on failure.
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
*/
static struct ice_vsi *
ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
@@ -1813,6 +1894,20 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
}
/**
+ * ice_lb_vsi_setup - Set up a loopback VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
+ */
+struct ice_vsi *
+ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+ return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID);
+}
+
+/**
* ice_vlan_rx_add_vid - Add a VLAN ID filter to HW offload
* @netdev: network interface to be adjusted
* @proto: unused protocol
@@ -1900,8 +1995,6 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto,
*/
static int ice_setup_pf_sw(struct ice_pf *pf)
{
- LIST_HEAD(tmp_add_list);
- u8 broadcast[ETH_ALEN];
struct ice_vsi *vsi;
int status = 0;
@@ -1926,38 +2019,12 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
*/
ice_napi_add(vsi);
- /* To add a MAC filter, first add the MAC to a list and then
- * pass the list to ice_add_mac.
- */
-
- /* Add a unicast MAC filter so the VSI can get its packets */
- status = ice_add_mac_to_list(vsi, &tmp_add_list,
- vsi->port_info->mac.perm_addr);
+ status = ice_init_mac_fltr(pf);
if (status)
goto unroll_napi_add;
- /* VSI needs to receive broadcast traffic, so add the broadcast
- * MAC address to the list as well.
- */
- eth_broadcast_addr(broadcast);
- status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast);
- if (status)
- goto free_mac_list;
-
- /* program MAC filters for entries in tmp_add_list */
- status = ice_add_mac(&pf->hw, &tmp_add_list);
- if (status) {
- dev_err(&pf->pdev->dev, "Could not add MAC filters\n");
- status = -ENOMEM;
- goto free_mac_list;
- }
-
- ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
return status;
-free_mac_list:
- ice_free_fltr_list(&pf->pdev->dev, &tmp_add_list);
-
unroll_napi_add:
if (vsi) {
ice_napi_del(vsi);
@@ -2149,14 +2216,9 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf)
if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
ice_dis_msix(pf);
- if (pf->sw_irq_tracker) {
- devm_kfree(&pf->pdev->dev, pf->sw_irq_tracker);
- pf->sw_irq_tracker = NULL;
- }
-
- if (pf->hw_irq_tracker) {
- devm_kfree(&pf->pdev->dev, pf->hw_irq_tracker);
- pf->hw_irq_tracker = NULL;
+ if (pf->irq_tracker) {
+ devm_kfree(&pf->pdev->dev, pf->irq_tracker);
+ pf->irq_tracker = NULL;
}
}
@@ -2166,7 +2228,7 @@ static void ice_clear_interrupt_scheme(struct ice_pf *pf)
*/
static int ice_init_interrupt_scheme(struct ice_pf *pf)
{
- int vectors = 0, hw_vectors = 0;
+ int vectors;
if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
vectors = ice_ena_msix_range(pf);
@@ -2177,31 +2239,18 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf)
return vectors;
/* set up vector assignment tracking */
- pf->sw_irq_tracker =
- devm_kzalloc(&pf->pdev->dev, sizeof(*pf->sw_irq_tracker) +
+ pf->irq_tracker =
+ devm_kzalloc(&pf->pdev->dev, sizeof(*pf->irq_tracker) +
(sizeof(u16) * vectors), GFP_KERNEL);
- if (!pf->sw_irq_tracker) {
+ if (!pf->irq_tracker) {
ice_dis_msix(pf);
return -ENOMEM;
}
/* populate SW interrupts pool with number of OS granted IRQs. */
pf->num_avail_sw_msix = vectors;
- pf->sw_irq_tracker->num_entries = vectors;
-
- /* set up HW vector assignment tracking */
- hw_vectors = pf->hw.func_caps.common_cap.num_msix_vectors;
- pf->hw_irq_tracker =
- devm_kzalloc(&pf->pdev->dev, sizeof(*pf->hw_irq_tracker) +
- (sizeof(u16) * hw_vectors), GFP_KERNEL);
- if (!pf->hw_irq_tracker) {
- ice_clear_interrupt_scheme(pf);
- return -ENOMEM;
- }
-
- /* populate HW interrupts pool with number of HW supported irqs. */
- pf->num_avail_hw_msix = hw_vectors;
- pf->hw_irq_tracker->num_entries = hw_vectors;
+ pf->irq_tracker->num_entries = vectors;
+ pf->irq_tracker->end = pf->irq_tracker->num_entries;
return 0;
}
@@ -2252,7 +2301,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
if (!pf)
return -ENOMEM;
- /* set up for high or low dma */
+ /* set up for high or low DMA */
err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
if (err)
err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
@@ -2302,7 +2351,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
ice_init_pf(pf);
- err = ice_init_pf_dcb(pf);
+ err = ice_init_pf_dcb(pf, false);
if (err) {
clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
clear_bit(ICE_FLAG_DCB_ENA, pf->flags);
@@ -2368,7 +2417,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
err = ice_setup_pf_sw(pf);
if (err) {
- dev_err(dev, "probe failed due to setup pf switch:%d\n", err);
+ dev_err(dev, "probe failed due to setup PF switch:%d\n", err);
goto err_alloc_sw_unroll;
}
@@ -2625,7 +2674,7 @@ static int __init ice_module_init(void)
status = pci_register_driver(&ice_driver);
if (status) {
- pr_err("failed to register pci driver, err %d\n", status);
+ pr_err("failed to register PCI driver, err %d\n", status);
destroy_workqueue(ice_wq);
}
@@ -2725,21 +2774,21 @@ free_lists:
ice_free_fltr_list(&pf->pdev->dev, &a_mac_list);
if (err) {
- netdev_err(netdev, "can't set mac %pM. filter update failed\n",
+ netdev_err(netdev, "can't set MAC %pM. filter update failed\n",
mac);
return err;
}
/* change the netdev's MAC address */
memcpy(netdev->dev_addr, mac, netdev->addr_len);
- netdev_dbg(vsi->netdev, "updated mac address to %pM\n",
+ netdev_dbg(vsi->netdev, "updated MAC address to %pM\n",
netdev->dev_addr);
/* write new MAC address to the firmware */
flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
status = ice_aq_manage_mac_write(hw, mac, flags, NULL);
if (status) {
- netdev_err(netdev, "can't set mac %pM. write to firmware failed.\n",
+ netdev_err(netdev, "can't set MAC %pM. write to firmware failed.\n",
mac);
}
return 0;
@@ -2876,6 +2925,13 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
(netdev->features & NETIF_F_HW_VLAN_CTAG_TX))
ret = ice_vsi_manage_vlan_insertion(vsi);
+ if ((features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+ !(netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ ret = ice_cfg_vlan_pruning(vsi, true, false);
+ else if (!(features & NETIF_F_HW_VLAN_CTAG_FILTER) &&
+ (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ ret = ice_cfg_vlan_pruning(vsi, false, false);
+
return ret;
}
@@ -2901,7 +2957,7 @@ static int ice_vsi_vlan_setup(struct ice_vsi *vsi)
*
* Return 0 on success and negative value on error
*/
-static int ice_vsi_cfg(struct ice_vsi *vsi)
+int ice_vsi_cfg(struct ice_vsi *vsi)
{
int err;
@@ -2933,7 +2989,7 @@ static void ice_napi_enable_all(struct ice_vsi *vsi)
if (!vsi->netdev)
return;
- ice_for_each_q_vector(vsi, q_idx) {
+ ice_for_each_q_vector(vsi, q_idx) {
struct ice_q_vector *q_vector = vsi->q_vectors[q_idx];
if (q_vector->rx.ring || q_vector->tx.ring)
@@ -3456,7 +3512,7 @@ int ice_down(struct ice_vsi *vsi)
*
* Return 0 on success, negative on failure
*/
-static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
+int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
{
int i, err = 0;
@@ -3482,7 +3538,7 @@ static int ice_vsi_setup_tx_rings(struct ice_vsi *vsi)
*
* Return 0 on success, negative on failure
*/
-static int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
+int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
{
int i, err = 0;
@@ -3658,7 +3714,7 @@ static int ice_pf_ena_all_vsi(struct ice_pf *pf, bool locked)
}
/**
- * ice_vsi_rebuild_all - rebuild all VSIs in pf
+ * ice_vsi_rebuild_all - rebuild all VSIs in PF
* @pf: the PF
*/
static int ice_vsi_rebuild_all(struct ice_pf *pf)
@@ -3728,7 +3784,7 @@ static int ice_vsi_replay_all(struct ice_pf *pf)
/**
* ice_rebuild - rebuild after reset
- * @pf: pf to rebuild
+ * @pf: PF to rebuild
*/
static void ice_rebuild(struct ice_pf *pf)
{
@@ -3740,7 +3796,7 @@ static void ice_rebuild(struct ice_pf *pf)
if (test_bit(__ICE_DOWN, pf->state))
goto clear_recovery;
- dev_dbg(dev, "rebuilding pf\n");
+ dev_dbg(dev, "rebuilding PF\n");
ret = ice_init_all_ctrlq(hw);
if (ret) {
@@ -3768,12 +3824,6 @@ static void ice_rebuild(struct ice_pf *pf)
ice_dcb_rebuild(pf);
- /* reset search_hint of irq_trackers to 0 since interrupts are
- * reclaimed and could be allocated from beginning during VSI rebuild
- */
- pf->sw_irq_tracker->search_hint = 0;
- pf->hw_irq_tracker->search_hint = 0;
-
err = ice_vsi_rebuild_all(pf);
if (err) {
dev_err(dev, "ice_vsi_rebuild_all failed\n");
@@ -3857,16 +3907,16 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
u8 count = 0;
if (new_mtu == netdev->mtu) {
- netdev_warn(netdev, "mtu is already %u\n", netdev->mtu);
+ netdev_warn(netdev, "MTU is already %u\n", netdev->mtu);
return 0;
}
if (new_mtu < netdev->min_mtu) {
- netdev_err(netdev, "new mtu invalid. min_mtu is %d\n",
+ netdev_err(netdev, "new MTU invalid. min_mtu is %d\n",
netdev->min_mtu);
return -EINVAL;
} else if (new_mtu > netdev->max_mtu) {
- netdev_err(netdev, "new mtu invalid. max_mtu is %d\n",
+ netdev_err(netdev, "new MTU invalid. max_mtu is %d\n",
netdev->min_mtu);
return -EINVAL;
}
@@ -3882,7 +3932,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
} while (count < 100);
if (count == 100) {
- netdev_err(netdev, "can't change mtu. Device is busy\n");
+ netdev_err(netdev, "can't change MTU. Device is busy\n");
return -EBUSY;
}
@@ -3894,18 +3944,18 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
err = ice_down(vsi);
if (err) {
- netdev_err(netdev, "change mtu if_up err %d\n", err);
+ netdev_err(netdev, "change MTU if_up err %d\n", err);
return err;
}
err = ice_up(vsi);
if (err) {
- netdev_err(netdev, "change mtu if_up err %d\n", err);
+ netdev_err(netdev, "change MTU if_up err %d\n", err);
return err;
}
}
- netdev_dbg(netdev, "changed mtu to %d\n", new_mtu);
+ netdev_info(netdev, "changed MTU to %d\n", new_mtu);
return 0;
}
@@ -4241,7 +4291,7 @@ static void ice_tx_timeout(struct net_device *netdev)
*
* Returns 0 on success, negative value on failure
*/
-static int ice_open(struct net_device *netdev)
+int ice_open(struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
@@ -4278,7 +4328,7 @@ static int ice_open(struct net_device *netdev)
*
* Returns success only - not allowed to fail
*/
-static int ice_stop(struct net_device *netdev)
+int ice_stop(struct net_device *netdev)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index 62571d33d0d6..bcb431f1bd92 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -119,7 +119,7 @@ ice_read_sr_word_aq(struct ice_hw *hw, u16 offset, u16 *data)
status = ice_read_sr_aq(hw, offset, 1, data, true);
if (!status)
- *data = le16_to_cpu(*(__le16 *)data);
+ *data = le16_to_cpu(*(__force __le16 *)data);
return status;
}
@@ -174,7 +174,7 @@ ice_read_sr_buf_aq(struct ice_hw *hw, u16 offset, u16 *words, u16 *data)
} while (words_read < *words);
for (i = 0; i < *words; i++)
- data[i] = le16_to_cpu(((__le16 *)data)[i]);
+ data[i] = le16_to_cpu(((__force __le16 *)data)[i]);
read_nvm_buf_aq_exit:
*words = words_read;
@@ -316,3 +316,34 @@ ice_read_sr_buf(struct ice_hw *hw, u16 offset, u16 *words, u16 *data)
return status;
}
+
+/**
+ * ice_nvm_validate_checksum
+ * @hw: pointer to the HW struct
+ *
+ * Verify NVM PFA checksum validity (0x0706)
+ */
+enum ice_status ice_nvm_validate_checksum(struct ice_hw *hw)
+{
+ struct ice_aqc_nvm_checksum *cmd;
+ struct ice_aq_desc desc;
+ enum ice_status status;
+
+ status = ice_acquire_nvm(hw, ICE_RES_READ);
+ if (status)
+ return status;
+
+ cmd = &desc.params.nvm_checksum;
+
+ ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_nvm_checksum);
+ cmd->flags = ICE_AQC_NVM_CHECKSUM_VERIFY;
+
+ status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
+ ice_release_nvm(hw);
+
+ if (!status)
+ if (le16_to_cpu(cmd->checksum) != ICE_AQC_NVM_CHECKSUM_CORRECT)
+ status = ICE_ERR_NVM_CHECKSUM;
+
+ return status;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
index 17afe6acb18a..c01597885629 100644
--- a/drivers/net/ethernet/intel/ice/ice_status.h
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -26,6 +26,7 @@ enum ice_status {
ICE_ERR_IN_USE = -16,
ICE_ERR_MAX_LIMIT = -17,
ICE_ERR_RESET_ONGOING = -18,
+ ICE_ERR_NVM_CHECKSUM = -51,
ICE_ERR_BUF_TOO_SHORT = -52,
ICE_ERR_NVM_BLANK_MODE = -53,
ICE_ERR_AQ_ERROR = -100,
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 9f1f595ae7e6..8271fd651725 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -799,7 +799,7 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
daddr = f_info->l_data.ethertype_mac.mac_addr;
/* fall-through */
case ICE_SW_LKUP_ETHERTYPE:
- off = (__be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET);
+ off = (__force __be16 *)(eth_hdr + ICE_ETH_ETHTYPE_OFFSET);
*off = cpu_to_be16(f_info->l_data.ethertype_mac.ethertype);
break;
case ICE_SW_LKUP_MAC_VLAN:
@@ -829,7 +829,7 @@ ice_fill_sw_rule(struct ice_hw *hw, struct ice_fltr_info *f_info,
ether_addr_copy(eth_hdr + ICE_ETH_DA_OFFSET, daddr);
if (!(vlan_id > ICE_MAX_VLAN_ID)) {
- off = (__be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET);
+ off = (__force __be16 *)(eth_hdr + ICE_ETH_VLAN_TCI_OFFSET);
*off = cpu_to_be16(vlan_id);
}
@@ -1973,6 +1973,10 @@ ice_add_vlan(struct ice_hw *hw, struct list_head *v_list)
* ice_add_eth_mac - Add ethertype and MAC based filter rule
* @hw: pointer to the hardware structure
* @em_list: list of ether type MAC filter, MAC is optional
+ *
+ * This function requires the caller to populate the entries in
+ * the filter list with the necessary fields (including flags to
+ * indicate Tx or Rx rules).
*/
enum ice_status
ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list)
@@ -1990,7 +1994,6 @@ ice_add_eth_mac(struct ice_hw *hw, struct list_head *em_list)
l_type != ICE_SW_LKUP_ETHERTYPE)
return ICE_ERR_PARAM;
- em_list_itr->fltr_info.flag = ICE_FLTR_TX;
em_list_itr->status = ice_add_rule_internal(hw, l_type,
em_list_itr);
if (em_list_itr->status)
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index 732b0b9b2e15..cb123fbe30be 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -8,9 +8,11 @@
#define ICE_SW_CFG_MAX_BUF_LEN 2048
#define ICE_DFLT_VSI_INVAL 0xff
+#define ICE_FLTR_RX BIT(0)
+#define ICE_FLTR_TX BIT(1)
+#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX)
#define ICE_VSI_INVAL_ID 0xffff
#define ICE_INVAL_Q_HANDLE 0xFFFF
-#define ICE_INVAL_Q_HANDLE 0xFFFF
/* VSI queue context structure */
struct ice_q_ctx {
@@ -69,9 +71,6 @@ struct ice_fltr_info {
/* rule ID returned by firmware once filter rule is created */
u16 fltr_rule_id;
u16 flag;
-#define ICE_FLTR_RX BIT(0)
-#define ICE_FLTR_TX BIT(1)
-#define ICE_FLTR_TX_RX (ICE_FLTR_RX | ICE_FLTR_TX)
/* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */
u16 src;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 2364eaf33d23..3c83230434b6 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -55,7 +55,7 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring)
if (!tx_ring->tx_buf)
return;
- /* Free all the Tx ring sk_bufss */
+ /* Free all the Tx ring sk_buffs */
for (i = 0; i < tx_ring->count; i++)
ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
@@ -1101,7 +1101,7 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
* ice_adjust_itr_by_size_and_speed - Adjust ITR based on current traffic
* @port_info: port_info structure containing the current link speed
* @avg_pkt_size: average size of Tx or Rx packets based on clean routine
- * @itr: itr value to update
+ * @itr: ITR value to update
*
* Calculate how big of an increment should be applied to the ITR value passed
* in based on wmem_default, SKB overhead, Ethernet overhead, and the current
@@ -1316,7 +1316,7 @@ clear_counts:
*/
static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
{
- /* The itr value is reported in microseconds, and the register value is
+ /* The ITR value is reported in microseconds, and the register value is
* recorded in 2 microsecond units. For this reason we only need to
* shift by the GLINT_DYN_CTL_INTERVAL_S - ICE_ITR_GRAN_S to apply this
* granularity as a shift instead of division. The mask makes sure the
@@ -1645,7 +1645,7 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
return;
dma_error:
- /* clear dma mappings for failed tx_buf map */
+ /* clear DMA mappings for failed tx_buf map */
for (;;) {
tx_buf = &tx_ring->tx_buf[i];
ice_unmap_and_free_tx_buf(tx_ring, tx_buf);
@@ -1874,10 +1874,10 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
cd_mss = skb_shinfo(skb)->gso_size;
/* record cdesc_qw1 with TSO parameters */
- off->cd_qw1 |= ICE_TX_DESC_DTYPE_CTX |
- (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
- (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
- (cd_mss << ICE_TXD_CTX_QW1_MSS_S);
+ off->cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+ (ICE_TX_CTX_DESC_TSO << ICE_TXD_CTX_QW1_CMD_S) |
+ (cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
+ (cd_mss << ICE_TXD_CTX_QW1_MSS_S));
first->tx_flags |= ICE_TX_FLAGS_TSO;
return 1;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 66e05032ee56..ec76aba347b9 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -58,19 +58,19 @@ struct ice_tx_buf {
unsigned int bytecount;
unsigned short gso_segs;
u32 tx_flags;
- DEFINE_DMA_UNMAP_ADDR(dma);
DEFINE_DMA_UNMAP_LEN(len);
+ DEFINE_DMA_UNMAP_ADDR(dma);
};
struct ice_tx_offload_params {
- u8 header_len;
+ u64 cd_qw1;
+ struct ice_ring *tx_ring;
u32 td_cmd;
u32 td_offset;
u32 td_l2tag1;
- u16 cd_l2tag2;
u32 cd_tunnel_params;
- u64 cd_qw1;
- struct ice_ring *tx_ring;
+ u16 cd_l2tag2;
+ u8 header_len;
};
struct ice_rx_buf {
@@ -150,6 +150,7 @@ enum ice_rx_dtype {
/* descriptor ring, associated with a VSI */
struct ice_ring {
+ /* CL1 - 1st cacheline starts here */
struct ice_ring *next; /* pointer to next ring in q_vector */
void *desc; /* Descriptor ring memory */
struct device *dev; /* Used for DMA mapping */
@@ -161,11 +162,11 @@ struct ice_ring {
struct ice_tx_buf *tx_buf;
struct ice_rx_buf *rx_buf;
};
+ /* CL2 - 2nd cacheline starts here */
u16 q_index; /* Queue number of ring */
- u32 txq_teid; /* Added Tx queue TEID */
-#ifdef CONFIG_DCB
- u8 dcb_tc; /* Traffic class of ring */
-#endif /* CONFIG_DCB */
+ u16 q_handle; /* Queue handle per TC */
+
+ u8 ring_active:1; /* is ring online or not */
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
@@ -173,8 +174,7 @@ struct ice_ring {
/* used in interrupt processing */
u16 next_to_use;
u16 next_to_clean;
-
- u8 ring_active; /* is ring online or not */
+ u16 next_to_alloc;
/* stats structs */
struct ice_q_stats stats;
@@ -184,10 +184,17 @@ struct ice_ring {
struct ice_rxq_stats rx_stats;
};
- unsigned int size; /* length of descriptor ring in bytes */
- dma_addr_t dma; /* physical address of ring */
struct rcu_head rcu; /* to avoid race on free */
- u16 next_to_alloc;
+ /* CLX - the below items are only accessed infrequently and should be
+ * in their own cache line if possible
+ */
+ dma_addr_t dma; /* physical address of ring */
+ unsigned int size; /* length of descriptor ring in bytes */
+ u32 txq_teid; /* Added Tx queue TEID */
+ u16 rx_buf_len;
+#ifdef CONFIG_DCB
+ u8 dcb_tc; /* Traffic class of ring */
+#endif /* CONFIG_DCB */
} ____cacheline_internodealigned_in_smp;
struct ice_ring_container {
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index a862af4cbf78..24bbef8bbe69 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -23,6 +23,7 @@ static inline bool ice_is_tc_ena(u8 bitmap, u8 tc)
/* debug masks - set these bits in hw->debug_mask to control output */
#define ICE_DBG_INIT BIT_ULL(1)
+#define ICE_DBG_FW_LOG BIT_ULL(3)
#define ICE_DBG_LINK BIT_ULL(4)
#define ICE_DBG_PHY BIT_ULL(5)
#define ICE_DBG_QCTX BIT_ULL(6)
@@ -61,6 +62,13 @@ enum ice_fc_mode {
ICE_FC_DFLT
};
+enum ice_fec_mode {
+ ICE_FEC_NONE = 0,
+ ICE_FEC_RS,
+ ICE_FEC_BASER,
+ ICE_FEC_AUTO
+};
+
enum ice_set_fc_aq_failures {
ICE_SET_FC_AQ_FAIL_NONE = 0,
ICE_SET_FC_AQ_FAIL_GET,
@@ -86,12 +94,14 @@ enum ice_media_type {
enum ice_vsi_type {
ICE_VSI_PF = 0,
ICE_VSI_VF,
+ ICE_VSI_LB = 6,
};
struct ice_link_status {
/* Refer to ice_aq_phy_type for bits definition */
u64 phy_type_low;
u64 phy_type_high;
+ u8 topo_media_conflict;
u16 max_frame_size;
u16 link_speed;
u16 req_speeds;
@@ -99,6 +109,7 @@ struct ice_link_status {
u8 link_info;
u8 an_info;
u8 ext_info;
+ u8 fec_info;
u8 pacing;
/* Refer to #define from module_type[ICE_MODULE_TYPE_TOTAL_BYTE] of
* ice_aqc_get_phy_caps structure
@@ -423,7 +434,7 @@ struct ice_hw {
struct ice_fw_log_cfg fw_log;
/* Device max aggregate bandwidths corresponding to the GL_PWR_MODE_CTL
- * register. Used for determining the itr/intrl granularity during
+ * register. Used for determining the ITR/intrl granularity during
* initialization.
*/
#define ICE_MAX_AGG_BW_200G 0x0
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index a805cbdd69be..5d24b539648f 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -103,7 +103,7 @@ ice_set_pfe_link_forced(struct ice_vf *vf, struct virtchnl_pf_event *pfe,
u16 link_speed;
if (link_up)
- link_speed = ICE_AQ_LINK_SPEED_40GB;
+ link_speed = ICE_AQ_LINK_SPEED_100GB;
else
link_speed = ICE_AQ_LINK_SPEED_UNKNOWN;
@@ -141,32 +141,20 @@ static void ice_vc_notify_vf_link_state(struct ice_vf *vf)
}
/**
- * ice_get_vf_vector - get VF interrupt vector register offset
- * @vf_msix: number of MSIx vector per VF on a PF
- * @vf_id: VF identifier
- * @i: index of MSIx vector
- */
-static u32 ice_get_vf_vector(int vf_msix, int vf_id, int i)
-{
- return ((i == 0) ? VFINT_DYN_CTLN(vf_id) :
- VFINT_DYN_CTLN(((vf_msix - 1) * (vf_id)) + (i - 1)));
-}
-
-/**
* ice_free_vf_res - Free a VF's resources
* @vf: pointer to the VF info
*/
static void ice_free_vf_res(struct ice_vf *vf)
{
struct ice_pf *pf = vf->pf;
- int i, pf_vf_msix;
+ int i, last_vector_idx;
/* First, disable VF's configuration API to prevent OS from
* accessing the VF's VSI after it's freed or invalidated.
*/
clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
- /* free vsi & disconnect it from the parent uplink */
+ /* free VSI and disconnect it from the parent uplink */
if (vf->lan_vsi_idx) {
ice_vsi_release(pf->vsi[vf->lan_vsi_idx]);
vf->lan_vsi_idx = 0;
@@ -174,13 +162,10 @@ static void ice_free_vf_res(struct ice_vf *vf)
vf->num_mac = 0;
}
- pf_vf_msix = pf->num_vf_msix;
+ last_vector_idx = vf->first_vector_idx + pf->num_vf_msix - 1;
/* Disable interrupts so that VF starts in a known state */
- for (i = 0; i < pf_vf_msix; i++) {
- u32 reg_idx;
-
- reg_idx = ice_get_vf_vector(pf_vf_msix, vf->vf_id, i);
- wr32(&pf->hw, reg_idx, VFINT_DYN_CTLN_CLEARPBA_M);
+ for (i = vf->first_vector_idx; i <= last_vector_idx; i++) {
+ wr32(&pf->hw, GLINT_DYN_CTL(i), GLINT_DYN_CTL_CLEARPBA_M);
ice_flush(&pf->hw);
}
/* reset some of the state variables keeping track of the resources */
@@ -205,8 +190,7 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
wr32(hw, VPINT_ALLOC(vf->vf_id), 0);
wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0);
- first = vf->first_vector_idx +
- hw->func_caps.common_cap.msix_vector_first_id;
+ first = vf->first_vector_idx;
last = first + pf->num_vf_msix - 1;
for (v = first; v <= last; v++) {
u32 reg;
@@ -232,6 +216,42 @@ static void ice_dis_vf_mappings(struct ice_vf *vf)
}
/**
+ * ice_sriov_free_msix_res - Reset/free any used MSIX resources
+ * @pf: pointer to the PF structure
+ *
+ * If MSIX entries from the pf->irq_tracker were needed then we need to
+ * reset the irq_tracker->end and give back the entries we needed to
+ * num_avail_sw_msix.
+ *
+ * If no MSIX entries were taken from the pf->irq_tracker then just clear
+ * the pf->sriov_base_vector.
+ *
+ * Returns 0 on success, and -EINVAL on error.
+ */
+static int ice_sriov_free_msix_res(struct ice_pf *pf)
+{
+ struct ice_res_tracker *res;
+
+ if (!pf)
+ return -EINVAL;
+
+ res = pf->irq_tracker;
+ if (!res)
+ return -EINVAL;
+
+ /* give back irq_tracker resources used */
+ if (pf->sriov_base_vector < res->num_entries) {
+ res->end = res->num_entries;
+ pf->num_avail_sw_msix +=
+ res->num_entries - pf->sriov_base_vector;
+ }
+
+ pf->sriov_base_vector = 0;
+
+ return 0;
+}
+
+/**
* ice_free_vfs - Free all VFs
* @pf: pointer to the PF structure
*/
@@ -246,15 +266,6 @@ void ice_free_vfs(struct ice_pf *pf)
while (test_and_set_bit(__ICE_VF_DIS, pf->state))
usleep_range(1000, 2000);
- /* Disable IOV before freeing resources. This lets any VF drivers
- * running in the host get themselves cleaned up before we yank
- * the carpet out from underneath their feet.
- */
- if (!pci_vfs_assigned(pf->pdev))
- pci_disable_sriov(pf->pdev);
- else
- dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
-
/* Avoid wait time by stopping all VFs at the same time */
for (i = 0; i < pf->num_alloc_vfs; i++) {
struct ice_vsi *vsi;
@@ -270,6 +281,15 @@ void ice_free_vfs(struct ice_pf *pf)
clear_bit(ICE_VF_STATE_ENA, pf->vf[i].vf_states);
}
+ /* Disable IOV before freeing resources. This lets any VF drivers
+ * running in the host get themselves cleaned up before we yank
+ * the carpet out from underneath their feet.
+ */
+ if (!pci_vfs_assigned(pf->pdev))
+ pci_disable_sriov(pf->pdev);
+ else
+ dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
+
tmp = pf->num_alloc_vfs;
pf->num_vf_qps = 0;
pf->num_alloc_vfs = 0;
@@ -288,6 +308,10 @@ void ice_free_vfs(struct ice_pf *pf)
}
}
+ if (ice_sriov_free_msix_res(pf))
+ dev_err(&pf->pdev->dev,
+ "Failed to free MSIX resources used by SR-IOV\n");
+
devm_kfree(&pf->pdev->dev, pf->vf);
pf->vf = NULL;
@@ -457,6 +481,22 @@ ice_vf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, u16 vf_id)
}
/**
+ * ice_calc_vf_first_vector_idx - Calculate absolute MSIX vector index in HW
+ * @pf: pointer to PF structure
+ * @vf: pointer to VF that the first MSIX vector index is being calculated for
+ *
+ * This returns the first MSIX vector index in HW that is used by this VF and
+ * this will always be the OICR index in the AVF driver so any functionality
+ * using vf->first_vector_idx for queue configuration will have to increment by
+ * 1 to avoid meddling with the OICR index.
+ */
+static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
+{
+ return pf->hw.func_caps.common_cap.msix_vector_first_id +
+ pf->sriov_base_vector + vf->vf_id * pf->num_vf_msix;
+}
+
+/**
* ice_alloc_vsi_res - Setup VF VSI and its resources
* @vf: pointer to the VF structure
*
@@ -470,8 +510,10 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
struct ice_vsi *vsi;
int status = 0;
- vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
+ /* first vector index is the VFs OICR index */
+ vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf);
+ vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
if (!vsi) {
dev_err(&pf->pdev->dev, "Failed to create VF VSI\n");
return -ENOMEM;
@@ -480,14 +522,6 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
vf->lan_vsi_idx = vsi->idx;
vf->lan_vsi_num = vsi->vsi_num;
- /* first vector index is the VFs OICR index */
- vf->first_vector_idx = vsi->hw_base_vector;
- /* Since hw_base_vector holds the vector where data queue interrupts
- * starts, increment by 1 since VFs allocated vectors include OICR intr
- * as well.
- */
- vsi->hw_base_vector += 1;
-
/* Check if port VLAN exist before, and restore it accordingly */
if (vf->port_vlan_id) {
ice_vsi_manage_pvid(vsi, vf->port_vlan_id, true);
@@ -580,8 +614,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
hw = &pf->hw;
vsi = pf->vsi[vf->lan_vsi_idx];
- first = vf->first_vector_idx +
- hw->func_caps.common_cap.msix_vector_first_id;
+ first = vf->first_vector_idx;
last = (first + pf->num_vf_msix) - 1;
abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
@@ -687,6 +720,97 @@ ice_determine_res(struct ice_pf *pf, u16 avail_res, u16 max_res, u16 min_res)
}
/**
+ * ice_calc_vf_reg_idx - Calculate the VF's register index in the PF space
+ * @vf: VF to calculate the register index for
+ * @q_vector: a q_vector associated to the VF
+ */
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector)
+{
+ struct ice_pf *pf;
+
+ if (!vf || !q_vector)
+ return -EINVAL;
+
+ pf = vf->pf;
+
+ /* always add one to account for the OICR being the first MSIX */
+ return pf->sriov_base_vector + pf->num_vf_msix * vf->vf_id +
+ q_vector->v_idx + 1;
+}
+
+/**
+ * ice_get_max_valid_res_idx - Get the max valid resource index
+ * @res: pointer to the resource to find the max valid index for
+ *
+ * Start from the end of the ice_res_tracker and return right when we find the
+ * first res->list entry with the ICE_RES_VALID_BIT set. This function is only
+ * valid for SR-IOV because it is the only consumer that manipulates the
+ * res->end and this is always called when res->end is set to res->num_entries.
+ */
+static int ice_get_max_valid_res_idx(struct ice_res_tracker *res)
+{
+ int i;
+
+ if (!res)
+ return -EINVAL;
+
+ for (i = res->num_entries - 1; i >= 0; i--)
+ if (res->list[i] & ICE_RES_VALID_BIT)
+ return i;
+
+ return 0;
+}
+
+/**
+ * ice_sriov_set_msix_res - Set any used MSIX resources
+ * @pf: pointer to PF structure
+ * @num_msix_needed: number of MSIX vectors needed for all SR-IOV VFs
+ *
+ * This function allows SR-IOV resources to be taken from the end of the PF's
+ * allowed HW MSIX vectors so in many cases the irq_tracker will not
+ * be needed. In these cases we just set the pf->sriov_base_vector and return
+ * success.
+ *
+ * If SR-IOV needs to use any pf->irq_tracker entries it updates the
+ * irq_tracker->end based on the first entry needed for SR-IOV. This makes it
+ * so any calls to ice_get_res() using the irq_tracker will not try to use
+ * resources at or beyond the newly set value.
+ *
+ * Return 0 on success, and -EINVAL when there are not enough MSIX vectors in
+ * in the PF's space available for SR-IOV.
+ */
+static int ice_sriov_set_msix_res(struct ice_pf *pf, u16 num_msix_needed)
+{
+ int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
+ u16 pf_total_msix_vectors =
+ pf->hw.func_caps.common_cap.num_msix_vectors;
+ struct ice_res_tracker *res = pf->irq_tracker;
+ int sriov_base_vector;
+
+ if (max_valid_res_idx < 0)
+ return max_valid_res_idx;
+
+ sriov_base_vector = pf_total_msix_vectors - num_msix_needed;
+
+ /* make sure we only grab irq_tracker entries from the list end and
+ * that we have enough available MSIX vectors
+ */
+ if (sriov_base_vector <= max_valid_res_idx)
+ return -EINVAL;
+
+ pf->sriov_base_vector = sriov_base_vector;
+
+ /* dip into irq_tracker entries and update used resources */
+ if (num_msix_needed > (pf_total_msix_vectors - res->num_entries)) {
+ pf->num_avail_sw_msix -=
+ res->num_entries - pf->sriov_base_vector;
+ res->end = pf->sriov_base_vector;
+ }
+
+ return 0;
+}
+
+/**
* ice_check_avail_res - check if vectors and queues are available
* @pf: pointer to the PF structure
*
@@ -696,11 +820,16 @@ ice_determine_res(struct ice_pf *pf, u16 avail_res, u16 max_res, u16 min_res)
*/
static int ice_check_avail_res(struct ice_pf *pf)
{
- u16 num_msix, num_txq, num_rxq;
+ int max_valid_res_idx = ice_get_max_valid_res_idx(pf->irq_tracker);
+ u16 num_msix, num_txq, num_rxq, num_avail_msix;
- if (!pf->num_alloc_vfs)
+ if (!pf->num_alloc_vfs || max_valid_res_idx < 0)
return -EINVAL;
+ /* add 1 to max_valid_res_idx to account for it being 0-based */
+ num_avail_msix = pf->hw.func_caps.common_cap.num_msix_vectors -
+ (max_valid_res_idx + 1);
+
/* Grab from HW interrupts common pool
* Note: By the time the user decides it needs more vectors in a VF
* its already too late since one must decide this prior to creating the
@@ -717,11 +846,11 @@ static int ice_check_avail_res(struct ice_pf *pf)
* grab default interrupt vectors (5 as supported by AVF driver).
*/
if (pf->num_alloc_vfs <= 16) {
- num_msix = ice_determine_res(pf, pf->num_avail_hw_msix,
+ num_msix = ice_determine_res(pf, num_avail_msix,
ICE_MAX_INTR_PER_VF,
ICE_MIN_INTR_PER_VF);
} else if (pf->num_alloc_vfs <= ICE_MAX_VF_COUNT) {
- num_msix = ice_determine_res(pf, pf->num_avail_hw_msix,
+ num_msix = ice_determine_res(pf, num_avail_msix,
ICE_DFLT_INTR_PER_VF,
ICE_MIN_INTR_PER_VF);
} else {
@@ -750,6 +879,9 @@ static int ice_check_avail_res(struct ice_pf *pf)
if (!num_txq || !num_rxq)
return -EIO;
+ if (ice_sriov_set_msix_res(pf, num_msix * pf->num_alloc_vfs))
+ return -EINVAL;
+
/* since AVF driver works with only queue pairs which means, it expects
* to have equal number of Rx and Tx queues, so take the minimum of
* available Tx or Rx queues
@@ -938,6 +1070,10 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
vf->num_vf_qs = 0;
}
+ if (ice_sriov_free_msix_res(pf))
+ dev_err(&pf->pdev->dev,
+ "Failed to free MSIX resources used by SR-IOV\n");
+
if (ice_check_avail_res(pf)) {
dev_err(&pf->pdev->dev,
"Cannot allocate VF resources, try with fewer number of VFs\n");
@@ -1119,7 +1255,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
int i, ret;
/* Disable global interrupt 0 so we don't try to handle the VFLR. */
- wr32(hw, GLINT_DYN_CTL(pf->hw_oicr_idx),
+ wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
ICE_ITR_NONE << GLINT_DYN_CTL_ITR_INDX_S);
ice_flush(hw);
@@ -1134,7 +1270,7 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
GFP_KERNEL);
if (!vfs) {
ret = -ENOMEM;
- goto err_unroll_sriov;
+ goto err_pci_disable_sriov;
}
pf->vf = vfs;
@@ -1154,12 +1290,19 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
pf->num_alloc_vfs = num_alloc_vfs;
/* VF resources get allocated during reset */
- if (!ice_reset_all_vfs(pf, true))
+ if (!ice_reset_all_vfs(pf, true)) {
+ ret = -EIO;
goto err_unroll_sriov;
+ }
goto err_unroll_intr;
err_unroll_sriov:
+ pf->vf = NULL;
+ devm_kfree(&pf->pdev->dev, vfs);
+ vfs = NULL;
+ pf->num_alloc_vfs = 0;
+err_pci_disable_sriov:
pci_disable_sriov(pf->pdev);
err_unroll_intr:
/* rearm interrupts here */
@@ -1168,8 +1311,8 @@ err_unroll_intr:
}
/**
- * ice_pf_state_is_nominal - checks the pf for nominal state
- * @pf: pointer to pf to check
+ * ice_pf_state_is_nominal - checks the PF for nominal state
+ * @pf: pointer to PF to check
*
* Check the PF's state for a collection of bits that would indicate
* the PF is in a state that would inhibit normal operation for
@@ -1496,7 +1639,7 @@ static void ice_vc_reset_vf_msg(struct ice_vf *vf)
/**
* ice_find_vsi_from_id
- * @pf: the pf structure to search for the VSI
+ * @pf: the PF structure to search for the VSI
* @id: ID of the VSI it is searching for
*
* searches for the VSI with the given ID
@@ -1807,28 +1950,37 @@ error_param:
static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
{
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
- struct virtchnl_irq_map_info *irqmap_info =
- (struct virtchnl_irq_map_info *)msg;
+ struct virtchnl_irq_map_info *irqmap_info;
u16 vsi_id, vsi_q_id, vector_id;
struct virtchnl_vector_map *map;
- struct ice_vsi *vsi = NULL;
struct ice_pf *pf = vf->pf;
+ u16 num_q_vectors_mapped;
+ struct ice_vsi *vsi;
unsigned long qmap;
- u16 num_q_vectors;
int i;
- num_q_vectors = irqmap_info->num_vectors - ICE_NONQ_VECS_VF;
+ irqmap_info = (struct virtchnl_irq_map_info *)msg;
+ num_q_vectors_mapped = irqmap_info->num_vectors;
+
vsi = pf->vsi[vf->lan_vsi_idx];
+ if (!vsi) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+ /* Check to make sure number of VF vectors mapped is not greater than
+ * number of VF vectors originally allocated, and check that
+ * there is actually at least a single VF queue vector mapped
+ */
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
- !vsi || vsi->num_q_vectors < num_q_vectors ||
- irqmap_info->num_vectors == 0) {
+ pf->num_vf_msix < num_q_vectors_mapped ||
+ !irqmap_info->num_vectors) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
- for (i = 0; i < num_q_vectors; i++) {
- struct ice_q_vector *q_vector = vsi->q_vectors[i];
+ for (i = 0; i < num_q_vectors_mapped; i++) {
+ struct ice_q_vector *q_vector;
map = &irqmap_info->vecmap[i];
@@ -1836,7 +1988,21 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
vsi_id = map->vsi_id;
/* validate msg params */
if (!(vector_id < pf->hw.func_caps.common_cap
- .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id)) {
+ .num_msix_vectors) || !ice_vc_isvalid_vsi_id(vf, vsi_id) ||
+ (!vector_id && (map->rxq_map || map->txq_map))) {
+ v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ goto error_param;
+ }
+
+ /* No need to map VF miscellaneous or rogue vector */
+ if (!vector_id)
+ continue;
+
+ /* Subtract non queue vector from vector_id passed by VF
+ * to get actual number of VSI queue vector array index
+ */
+ q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF];
+ if (!q_vector) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
@@ -1852,6 +2018,8 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
q_vector->num_ring_rx++;
q_vector->rx.itr_idx = map->rxitr_idx;
vsi->rx_rings[vsi_q_id]->q_vector = q_vector;
+ ice_cfg_rxq_interrupt(vsi, vsi_q_id, vector_id,
+ q_vector->rx.itr_idx);
}
qmap = map->txq_map;
@@ -1864,11 +2032,11 @@ static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg)
q_vector->num_ring_tx++;
q_vector->tx.itr_idx = map->txitr_idx;
vsi->tx_rings[vsi_q_id]->q_vector = q_vector;
+ ice_cfg_txq_interrupt(vsi, vsi_q_id, vector_id,
+ q_vector->tx.itr_idx);
}
}
- if (vsi)
- ice_vsi_cfg_msix(vsi);
error_param:
/* send the response to the VF */
return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret,
@@ -1903,9 +2071,8 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
}
vsi = pf->vsi[vf->lan_vsi_idx];
- if (!vsi) {
+ if (!vsi)
goto error_param;
- }
if (qci->num_queue_pairs > ICE_MAX_BASE_QS_PER_VF) {
dev_err(&pf->pdev->dev,
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 3725aea16840..c3ca522c245a 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -49,29 +49,34 @@ struct ice_vf {
struct ice_pf *pf;
s16 vf_id; /* VF ID in the PF space */
- u32 driver_caps; /* reported by VF driver */
+ u16 lan_vsi_idx; /* index into PF struct */
int first_vector_idx; /* first vector index of this VF */
struct ice_sw *vf_sw_id; /* switch ID the VF VSIs connect to */
struct virtchnl_version_info vf_ver;
+ u32 driver_caps; /* reported by VF driver */
struct virtchnl_ether_addr dflt_lan_addr;
u16 port_vlan_id;
- u8 pf_set_mac; /* VF MAC address set by VMM admin */
- u8 trusted;
- u16 lan_vsi_idx; /* index into PF struct */
+ u8 pf_set_mac:1; /* VF MAC address set by VMM admin */
+ u8 trusted:1;
+ u8 spoofchk:1;
+ u8 link_forced:1;
+ u8 link_up:1; /* only valid if VF link is forced */
+ /* VSI indices - actual VSI pointers are maintained in the PF structure
+ * When assigned, these will be non-zero, because VSI 0 is always
+ * the main LAN VSI for the PF.
+ */
u16 lan_vsi_num; /* ID as used by firmware */
+ unsigned int tx_rate; /* Tx bandwidth limit in Mbps */
+ DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */
+
u64 num_mdd_events; /* number of MDD events detected */
u64 num_inval_msgs; /* number of continuous invalid msgs */
u64 num_valid_msgs; /* number of valid msgs detected */
unsigned long vf_caps; /* VF's adv. capabilities */
- DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */
- unsigned int tx_rate; /* Tx bandwidth limit in Mbps */
- u8 link_forced;
- u8 link_up; /* only valid if VF link is forced */
- u8 spoofchk;
+ u8 num_req_qs; /* num of queue pairs requested by VF */
u16 num_mac;
u16 num_vlan;
u16 num_vf_qs; /* num of queue configured per VF */
- u8 num_req_qs; /* num of queue pairs requested by VF */
};
#ifdef CONFIG_PCI_IOV
@@ -96,6 +101,8 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted);
int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state);
int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena);
+
+int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector);
#else /* CONFIG_PCI_IOV */
#define ice_process_vflr_event(pf) do {} while (0)
#define ice_free_vfs(pf) do {} while (0)
@@ -161,5 +168,11 @@ ice_set_vf_link_state(struct net_device __always_unused *netdev,
return -EOPNOTSUPP;
}
+static inline int
+ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf,
+ struct ice_q_vector __always_unused *q_vector)
+{
+ return 0;
+}
#endif /* CONFIG_PCI_IOV */
#endif /* _ICE_VIRTCHNL_PF_H_ */
diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c
index bafdcf70a353..3ec2ce0725d5 100644
--- a/drivers/net/ethernet/intel/igb/e1000_82575.c
+++ b/drivers/net/ethernet/intel/igb/e1000_82575.c
@@ -638,7 +638,7 @@ static s32 igb_get_invariants_82575(struct e1000_hw *hw)
dev_spec->sgmii_active = true;
break;
}
- /* fall through for I2C based SGMII */
+ /* fall through - for I2C based SGMII */
case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES:
/* read media type from SFP EEPROM */
ret_val = igb_set_sfp_media_type_82575(hw);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 39f33afc479c..fc925adbd9fa 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -753,6 +753,7 @@ u32 igb_rd32(struct e1000_hw *hw, u32 reg)
struct net_device *netdev = igb->netdev;
hw->hw_addr = NULL;
netdev_err(netdev, "PCIe link lost\n");
+ WARN(1, "igb: Failed to read reg 0x%x!\n", reg);
}
return value;
@@ -6695,7 +6696,7 @@ static int __igb_notify_dca(struct device *dev, void *data)
igb_setup_dca(adapter);
break;
}
- /* Fall Through since DCA is disabled. */
+ /* Fall Through - since DCA is disabled. */
case DCA_PROVIDER_REMOVE:
if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
/* without this a class_device is left
diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index 51a8b8769c67..59258d791106 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -10,50 +10,6 @@
#include "igc.h"
/**
- * igc_set_pcie_completion_timeout - set pci-e completion timeout
- * @hw: pointer to the HW structure
- */
-static s32 igc_set_pcie_completion_timeout(struct igc_hw *hw)
-{
- u32 gcr = rd32(IGC_GCR);
- u16 pcie_devctl2;
- s32 ret_val = 0;
-
- /* only take action if timeout value is defaulted to 0 */
- if (gcr & IGC_GCR_CMPL_TMOUT_MASK)
- goto out;
-
- /* if capabilities version is type 1 we can write the
- * timeout of 10ms to 200ms through the GCR register
- */
- if (!(gcr & IGC_GCR_CAP_VER2)) {
- gcr |= IGC_GCR_CMPL_TMOUT_10ms;
- goto out;
- }
-
- /* for version 2 capabilities we need to write the config space
- * directly in order to set the completion timeout value for
- * 16ms to 55ms
- */
- ret_val = igc_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
- &pcie_devctl2);
- if (ret_val)
- goto out;
-
- pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms;
-
- ret_val = igc_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2,
- &pcie_devctl2);
-out:
- /* disable completion timeout resend */
- gcr &= ~IGC_GCR_CMPL_TMOUT_RESEND;
-
- wr32(IGC_GCR, gcr);
-
- return ret_val;
-}
-
-/**
* igc_reset_hw_base - Reset hardware
* @hw: pointer to the HW structure
*
@@ -72,11 +28,6 @@ static s32 igc_reset_hw_base(struct igc_hw *hw)
if (ret_val)
hw_dbg("PCI-E Master disable polling has failed.\n");
- /* set the completion timeout for interface */
- ret_val = igc_set_pcie_completion_timeout(hw);
- if (ret_val)
- hw_dbg("PCI-E Set completion timeout has failed.\n");
-
hw_dbg("Masking off all interrupts\n");
wr32(IGC_IMC, 0xffffffff);
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index a9a30268de59..fc0ccfe38a20 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -5,8 +5,8 @@
#define _IGC_DEFINES_H_
/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
-#define REQ_TX_DESCRIPTOR_MULTIPLE 8
-#define REQ_RX_DESCRIPTOR_MULTIPLE 8
+#define REQ_TX_DESCRIPTOR_MULTIPLE 8
+#define REQ_RX_DESCRIPTOR_MULTIPLE 8
#define IGC_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */
@@ -29,12 +29,6 @@
/* Status of Master requests. */
#define IGC_STATUS_GIO_MASTER_ENABLE 0x00080000
-/* PCI Express Control */
-#define IGC_GCR_CMPL_TMOUT_MASK 0x0000F000
-#define IGC_GCR_CMPL_TMOUT_10ms 0x00001000
-#define IGC_GCR_CMPL_TMOUT_RESEND 0x00010000
-#define IGC_GCR_CAP_VER2 0x00040000
-
/* Receive Address
* Number of high/low register pairs in the RAR. The RAR (Receive Address
* Registers) holds the directed and multicast addresses that we monitor.
@@ -72,6 +66,9 @@
#define IGC_CONNSW_AUTOSENSE_EN 0x1
+/* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */
+#define MAX_JUMBO_FRAME_SIZE 0x2600
+
/* PBA constants */
#define IGC_PBA_34K 0x0022
@@ -264,9 +261,6 @@
#define IGC_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */
#define IGC_TCTL_MULR 0x10000000 /* Multiple request support */
-#define IGC_CT_SHIFT 4
-#define IGC_COLLISION_THRESHOLD 15
-
/* Flow Control Constants */
#define FLOW_CONTROL_ADDRESS_LOW 0x00C28001
#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100
@@ -398,7 +392,7 @@
#define IGC_MDIC_ERROR 0x40000000
#define IGC_MDIC_DEST 0x80000000
-#define IGC_N0_QUEUE -1
+#define IGC_N0_QUEUE -1
#define IGC_MAX_MAC_HDR_LEN 127
#define IGC_MAX_NETWORK_HDR_LEN 511
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
index 7c88b7bd4799..1039a224ac80 100644
--- a/drivers/net/ethernet/intel/igc/igc_hw.h
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -114,11 +114,8 @@ struct igc_nvm_operations {
struct igc_phy_operations {
s32 (*acquire)(struct igc_hw *hw);
- s32 (*check_polarity)(struct igc_hw *hw);
s32 (*check_reset_block)(struct igc_hw *hw);
s32 (*force_speed_duplex)(struct igc_hw *hw);
- s32 (*get_cfg_done)(struct igc_hw *hw);
- s32 (*get_cable_length)(struct igc_hw *hw);
s32 (*get_phy_info)(struct igc_hw *hw);
s32 (*read_reg)(struct igc_hw *hw, u32 address, u16 *data);
void (*release)(struct igc_hw *hw);
diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
index f7683d3ae47c..ba4646737288 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.c
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -8,7 +8,6 @@
#include "igc_hw.h"
/* forward declaration */
-static s32 igc_set_default_fc(struct igc_hw *hw);
static s32 igc_set_fc_watermarks(struct igc_hw *hw);
/**
@@ -96,13 +95,10 @@ s32 igc_setup_link(struct igc_hw *hw)
goto out;
/* If requested flow control is set to default, set flow control
- * based on the EEPROM flow control settings.
+ * to the both 'rx' and 'tx' pause frames.
*/
- if (hw->fc.requested_mode == igc_fc_default) {
- ret_val = igc_set_default_fc(hw);
- if (ret_val)
- goto out;
- }
+ if (hw->fc.requested_mode == igc_fc_default)
+ hw->fc.requested_mode = igc_fc_full;
/* We want to save off the original Flow Control configuration just
* in case we get disconnected and then reconnected into a different
@@ -136,19 +132,6 @@ out:
}
/**
- * igc_set_default_fc - Set flow control default values
- * @hw: pointer to the HW structure
- *
- * Read the EEPROM for the default values for flow control and store the
- * values.
- */
-static s32 igc_set_default_fc(struct igc_hw *hw)
-{
- hw->fc.requested_mode = igc_fc_full;
- return 0;
-}
-
-/**
* igc_force_mac_fc - Force the MAC's flow control settings
* @hw: pointer to the HW structure
*
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 34fa0e60a780..93f3b4e6185b 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -72,6 +72,27 @@ void igc_reset(struct igc_adapter *adapter)
{
struct pci_dev *pdev = adapter->pdev;
struct igc_hw *hw = &adapter->hw;
+ struct igc_fc_info *fc = &hw->fc;
+ u32 pba, hwm;
+
+ /* Repartition PBA for greater than 9k MTU if required */
+ pba = IGC_PBA_34K;
+
+ /* flow control settings
+ * The high water mark must be low enough to fit one full frame
+ * after transmitting the pause frame. As such we must have enough
+ * space to allow for us to complete our current transmit and then
+ * receive the frame that is in progress from the link partner.
+ * Set it to:
+ * - the full Rx FIFO size minus one full Tx plus one full Rx frame
+ */
+ hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
+
+ fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */
+ fc->low_water = fc->high_water - 16;
+ fc->pause_time = 0xFFFF;
+ fc->send_xon = 1;
+ fc->current_mode = fc->requested_mode;
hw->mac.ops.reset_hw(hw);
@@ -3934,6 +3955,7 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
hw->hw_addr = NULL;
netif_device_detach(netdev);
netdev_err(netdev, "PCIe link lost, device now detached\n");
+ WARN(1, "igc: Failed to read reg 0x%x!\n", reg);
}
return value;
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 269bd73be1a0..94dc0a272644 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -437,6 +437,7 @@ struct mvneta_port {
struct device_node *dn;
unsigned int tx_csum_limit;
struct phylink *phylink;
+ struct phylink_config phylink_config;
struct phy *comphy;
struct mvneta_bm *bm_priv;
@@ -3356,9 +3357,11 @@ static int mvneta_set_mac_addr(struct net_device *dev, void *addr)
return 0;
}
-static void mvneta_validate(struct net_device *ndev, unsigned long *supported,
+static void mvneta_validate(struct phylink_config *config,
+ unsigned long *supported,
struct phylink_link_state *state)
{
+ struct net_device *ndev = to_net_dev(config->dev);
struct mvneta_port *pp = netdev_priv(ndev);
__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
@@ -3408,9 +3411,10 @@ static void mvneta_validate(struct net_device *ndev, unsigned long *supported,
phylink_helper_basex_speed(state);
}
-static int mvneta_mac_link_state(struct net_device *ndev,
+static int mvneta_mac_link_state(struct phylink_config *config,
struct phylink_link_state *state)
{
+ struct net_device *ndev = to_net_dev(config->dev);
struct mvneta_port *pp = netdev_priv(ndev);
u32 gmac_stat;
@@ -3438,8 +3442,9 @@ static int mvneta_mac_link_state(struct net_device *ndev,
return 1;
}
-static void mvneta_mac_an_restart(struct net_device *ndev)
+static void mvneta_mac_an_restart(struct phylink_config *config)
{
+ struct net_device *ndev = to_net_dev(config->dev);
struct mvneta_port *pp = netdev_priv(ndev);
u32 gmac_an = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG);
@@ -3449,9 +3454,10 @@ static void mvneta_mac_an_restart(struct net_device *ndev)
gmac_an & ~MVNETA_GMAC_INBAND_RESTART_AN);
}
-static void mvneta_mac_config(struct net_device *ndev, unsigned int mode,
- const struct phylink_link_state *state)
+static void mvneta_mac_config(struct phylink_config *config, unsigned int mode,
+ const struct phylink_link_state *state)
{
+ struct net_device *ndev = to_net_dev(config->dev);
struct mvneta_port *pp = netdev_priv(ndev);
u32 new_ctrl0, gmac_ctrl0 = mvreg_read(pp, MVNETA_GMAC_CTRL_0);
u32 new_ctrl2, gmac_ctrl2 = mvreg_read(pp, MVNETA_GMAC_CTRL_2);
@@ -3581,9 +3587,10 @@ static void mvneta_set_eee(struct mvneta_port *pp, bool enable)
mvreg_write(pp, MVNETA_LPI_CTRL_1, lpi_ctl1);
}
-static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode,
- phy_interface_t interface)
+static void mvneta_mac_link_down(struct phylink_config *config,
+ unsigned int mode, phy_interface_t interface)
{
+ struct net_device *ndev = to_net_dev(config->dev);
struct mvneta_port *pp = netdev_priv(ndev);
u32 val;
@@ -3600,10 +3607,11 @@ static void mvneta_mac_link_down(struct net_device *ndev, unsigned int mode,
mvneta_set_eee(pp, false);
}
-static void mvneta_mac_link_up(struct net_device *ndev, unsigned int mode,
+static void mvneta_mac_link_up(struct phylink_config *config, unsigned int mode,
phy_interface_t interface,
struct phy_device *phy)
{
+ struct net_device *ndev = to_net_dev(config->dev);
struct mvneta_port *pp = netdev_priv(ndev);
u32 val;
@@ -4500,8 +4508,14 @@ static int mvneta_probe(struct platform_device *pdev)
comphy = NULL;
}
- phylink = phylink_create(dev, pdev->dev.fwnode, phy_mode,
- &mvneta_phylink_ops);
+ pp = netdev_priv(dev);
+ spin_lock_init(&pp->lock);
+
+ pp->phylink_config.dev = &dev->dev;
+ pp->phylink_config.type = PHYLINK_NETDEV;
+
+ phylink = phylink_create(&pp->phylink_config, pdev->dev.fwnode,
+ phy_mode, &mvneta_phylink_ops);
if (IS_ERR(phylink)) {
err = PTR_ERR(phylink);
goto err_free_irq;
@@ -4513,8 +4527,6 @@ static int mvneta_probe(struct platform_device *pdev)
dev->ethtool_ops = &mvneta_eth_tool_ops;
- pp = netdev_priv(dev);
- spin_lock_init(&pp->lock);
pp->phylink = phylink;
pp->comphy = comphy;
pp->phy_interface = phy_mode;
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
index 6171270a016c..d67c970f02e5 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2.h
@@ -148,6 +148,8 @@
#define MVPP22_CLS_C2_ATTR2 0x1b6c
#define MVPP22_CLS_C2_ATTR2_RSS_EN BIT(30)
#define MVPP22_CLS_C2_ATTR3 0x1b70
+#define MVPP22_CLS_C2_TCAM_CTRL 0x1b90
+#define MVPP22_CLS_C2_TCAM_BYPASS_FIFO BIT(0)
/* Descriptor Manager Top Registers */
#define MVPP2_RXQ_NUM_REG 0x2040
@@ -624,6 +626,7 @@
#define MVPP2_N_RFS_RULES (MVPP2_N_RFS_ENTRIES_PER_FLOW * 7)
/* RSS constants */
+#define MVPP22_N_RSS_TABLES 8
#define MVPP22_RSS_TABLE_ENTRIES 32
/* IPv6 max L3 address size */
@@ -725,6 +728,10 @@ enum mvpp2_prs_l3_cast {
/* Definitions */
struct mvpp2_dbgfs_entries;
+struct mvpp2_rss_table {
+ u32 indir[MVPP22_RSS_TABLE_ENTRIES];
+};
+
/* Shared Packet Processor resources */
struct mvpp2 {
/* Shared registers' base addresses */
@@ -788,6 +795,9 @@ struct mvpp2 {
/* Debugfs entries private data */
struct mvpp2_dbgfs_entries *dbgfs_entries;
+
+ /* RSS Indirection tables */
+ struct mvpp2_rss_table *rss_tables[MVPP22_N_RSS_TABLES];
};
struct mvpp2_pcpu_stats {
@@ -905,6 +915,7 @@ struct mvpp2_port {
phy_interface_t phy_interface;
struct phylink *phylink;
+ struct phylink_config phylink_config;
struct phy *comphy;
struct mvpp2_bm_pool *pool_long;
@@ -919,12 +930,14 @@ struct mvpp2_port {
u32 tx_time_coal;
- /* RSS indirection table */
- u32 indir[MVPP22_RSS_TABLE_ENTRIES];
-
/* List of steering rules active on that port */
- struct mvpp2_ethtool_fs *rfs_rules[MVPP2_N_RFS_RULES];
+ struct mvpp2_ethtool_fs *rfs_rules[MVPP2_N_RFS_ENTRIES_PER_FLOW];
int n_rfs_rules;
+
+ /* Each port has its own view of the rss contexts, so that it can number
+ * them from 0
+ */
+ int rss_ctx[MVPP22_N_RSS_TABLES];
};
/* The mvpp2_tx_desc and mvpp2_rx_desc structures describe the
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
index a57d17ab91f0..e47c00c5f829 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.c
@@ -923,6 +923,12 @@ void mvpp2_cls_init(struct mvpp2 *priv)
mvpp2_cls_c2_write(priv, &c2);
}
+ /* Disable the FIFO stages in C2 engine, which are only used in BIST
+ * mode
+ */
+ mvpp2_write(priv, MVPP22_CLS_C2_TCAM_CTRL,
+ MVPP22_CLS_C2_TCAM_BYPASS_FIFO);
+
mvpp2_cls_port_init_flows(priv);
}
@@ -963,12 +969,22 @@ u32 mvpp2_cls_c2_hit_count(struct mvpp2 *priv, int c2_index)
return mvpp2_read(priv, MVPP22_CLS_C2_HIT_CTR);
}
-static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port)
+static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port, u32 ctx)
{
struct mvpp2_cls_c2_entry c2;
+ u8 qh, ql;
mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+ /* The RxQ number is used to select the RSS table. It that case, we set
+ * it to be the ctx number.
+ */
+ qh = (ctx >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+ ql = ctx & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+
+ c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
+ MVPP22_CLS_C2_ATTR0_QLOW(ql);
+
c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN;
mvpp2_cls_c2_write(port->priv, &c2);
@@ -977,22 +993,45 @@ static void mvpp2_rss_port_c2_enable(struct mvpp2_port *port)
static void mvpp2_rss_port_c2_disable(struct mvpp2_port *port)
{
struct mvpp2_cls_c2_entry c2;
+ u8 qh, ql;
mvpp2_cls_c2_read(port->priv, MVPP22_CLS_C2_RSS_ENTRY(port->id), &c2);
+ /* Reset the default destination RxQ to the port's first rx queue. */
+ qh = (port->first_rxq >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+ ql = port->first_rxq & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+
+ c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
+ MVPP22_CLS_C2_ATTR0_QLOW(ql);
+
c2.attr[2] &= ~MVPP22_CLS_C2_ATTR2_RSS_EN;
mvpp2_cls_c2_write(port->priv, &c2);
}
-void mvpp22_port_rss_enable(struct mvpp2_port *port)
+static inline int mvpp22_rss_ctx(struct mvpp2_port *port, int port_rss_ctx)
+{
+ return port->rss_ctx[port_rss_ctx];
+}
+
+int mvpp22_port_rss_enable(struct mvpp2_port *port)
{
- mvpp2_rss_port_c2_enable(port);
+ if (mvpp22_rss_ctx(port, 0) < 0)
+ return -EINVAL;
+
+ mvpp2_rss_port_c2_enable(port, mvpp22_rss_ctx(port, 0));
+
+ return 0;
}
-void mvpp22_port_rss_disable(struct mvpp2_port *port)
+int mvpp22_port_rss_disable(struct mvpp2_port *port)
{
+ if (mvpp22_rss_ctx(port, 0) < 0)
+ return -EINVAL;
+
mvpp2_rss_port_c2_disable(port);
+
+ return 0;
}
static void mvpp22_port_c2_lookup_disable(struct mvpp2_port *port, int entry)
@@ -1029,7 +1068,7 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port,
struct flow_action_entry *act;
struct mvpp2_cls_c2_entry c2;
u8 qh, ql, pmap;
- int index;
+ int index, ctx;
memset(&c2, 0, sizeof(c2));
@@ -1069,14 +1108,36 @@ static int mvpp2_port_c2_tcam_rule_add(struct mvpp2_port *port,
*/
c2.act = MVPP22_CLS_C2_ACT_COLOR(MVPP22_C2_COL_NO_UPD_LOCK);
+ /* Update RSS status after matching this entry */
+ if (act->queue.ctx)
+ c2.attr[2] |= MVPP22_CLS_C2_ATTR2_RSS_EN;
+
+ /* Always lock the RSS_EN decision. We might have high prio
+ * rules steering to an RXQ, and a lower one steering to RSS,
+ * we don't want the low prio RSS rule overwriting this flag.
+ */
+ c2.act = MVPP22_CLS_C2_ACT_RSS_EN(MVPP22_C2_UPD_LOCK);
+
/* Mark packet as "forwarded to software", needed for RSS */
c2.act |= MVPP22_CLS_C2_ACT_FWD(MVPP22_C2_FWD_SW_LOCK);
c2.act |= MVPP22_CLS_C2_ACT_QHIGH(MVPP22_C2_UPD_LOCK) |
MVPP22_CLS_C2_ACT_QLOW(MVPP22_C2_UPD_LOCK);
- qh = ((act->queue.index + port->first_rxq) >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
- ql = (act->queue.index + port->first_rxq) & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+ if (act->queue.ctx) {
+ /* Get the global ctx number */
+ ctx = mvpp22_rss_ctx(port, act->queue.ctx);
+ if (ctx < 0)
+ return -EINVAL;
+
+ qh = (ctx >> 3) & MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+ ql = ctx & MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+ } else {
+ qh = ((act->queue.index + port->first_rxq) >> 3) &
+ MVPP22_CLS_C2_ATTR0_QHIGH_MASK;
+ ql = (act->queue.index + port->first_rxq) &
+ MVPP22_CLS_C2_ATTR0_QLOW_MASK;
+ }
c2.attr[0] = MVPP22_CLS_C2_ATTR0_QHIGH(qh) |
MVPP22_CLS_C2_ATTR0_QLOW(ql);
@@ -1196,6 +1257,13 @@ static int mvpp2_cls_rfs_parse_rule(struct mvpp2_rfs_rule *rule)
if (act->id != FLOW_ACTION_QUEUE && act->id != FLOW_ACTION_DROP)
return -EOPNOTSUPP;
+ /* When both an RSS context and an queue index are set, the index
+ * is considered as an offset to be added to the indirection table
+ * entries. We don't support this, so reject this rule.
+ */
+ if (act->queue.ctx && act->queue.index)
+ return -EOPNOTSUPP;
+
/* For now, only use the C2 engine which has a HEK size limited to 64
* bits for TCAM matching.
*/
@@ -1212,7 +1280,7 @@ int mvpp2_ethtool_cls_rule_get(struct mvpp2_port *port,
{
struct mvpp2_ethtool_fs *efs;
- if (rxnfc->fs.location >= MVPP2_N_RFS_RULES)
+ if (rxnfc->fs.location >= MVPP2_N_RFS_ENTRIES_PER_FLOW)
return -EINVAL;
efs = port->rfs_rules[rxnfc->fs.location];
@@ -1232,8 +1300,7 @@ int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port,
struct mvpp2_ethtool_fs *efs, *old_efs;
int ret = 0;
- if (info->fs.location >= 4 ||
- info->fs.location < 0)
+ if (info->fs.location >= MVPP2_N_RFS_ENTRIES_PER_FLOW)
return -EINVAL;
efs = kzalloc(sizeof(*efs), GFP_KERNEL);
@@ -1242,6 +1309,12 @@ int mvpp2_ethtool_cls_rule_ins(struct mvpp2_port *port,
input.fs = &info->fs;
+ /* We need to manually set the rss_ctx, since this info isn't present
+ * in info->fs
+ */
+ if (info->fs.flow_type & FLOW_RSS)
+ input.rss_ctx = info->rss_context;
+
ethtool_rule = ethtool_rx_flow_rule_create(&input);
if (IS_ERR(ethtool_rule)) {
ret = PTR_ERR(ethtool_rule);
@@ -1328,19 +1401,160 @@ static inline u32 mvpp22_rxfh_indir(struct mvpp2_port *port, u32 rxq)
return port->first_rxq + ((rxq * nrxqs + rxq / cpus) % port->nrxqs);
}
-void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table)
+static void mvpp22_rss_fill_table(struct mvpp2_port *port,
+ struct mvpp2_rss_table *table,
+ u32 rss_ctx)
{
struct mvpp2 *priv = port->priv;
int i;
for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++) {
- u32 sel = MVPP22_RSS_INDEX_TABLE(table) |
+ u32 sel = MVPP22_RSS_INDEX_TABLE(rss_ctx) |
MVPP22_RSS_INDEX_TABLE_ENTRY(i);
mvpp2_write(priv, MVPP22_RSS_INDEX, sel);
mvpp2_write(priv, MVPP22_RSS_TABLE_ENTRY,
- mvpp22_rxfh_indir(port, port->indir[i]));
+ mvpp22_rxfh_indir(port, table->indir[i]));
+ }
+}
+
+static int mvpp22_rss_context_create(struct mvpp2_port *port, u32 *rss_ctx)
+{
+ struct mvpp2 *priv = port->priv;
+ u32 ctx;
+
+ /* Find the first free RSS table */
+ for (ctx = 0; ctx < MVPP22_N_RSS_TABLES; ctx++) {
+ if (!priv->rss_tables[ctx])
+ break;
+ }
+
+ if (ctx == MVPP22_N_RSS_TABLES)
+ return -EINVAL;
+
+ priv->rss_tables[ctx] = kzalloc(sizeof(*priv->rss_tables[ctx]),
+ GFP_KERNEL);
+ if (!priv->rss_tables[ctx])
+ return -ENOMEM;
+
+ *rss_ctx = ctx;
+
+ /* Set the table width: replace the whole classifier Rx queue number
+ * with the ones configured in RSS table entries.
+ */
+ mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(ctx));
+ mvpp2_write(priv, MVPP22_RSS_WIDTH, 8);
+
+ mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_QUEUE(ctx));
+ mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE, MVPP22_RSS_TABLE_POINTER(ctx));
+
+ return 0;
+}
+
+int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *port_ctx)
+{
+ u32 rss_ctx;
+ int ret, i;
+
+ ret = mvpp22_rss_context_create(port, &rss_ctx);
+ if (ret)
+ return ret;
+
+ /* Find the first available context number in the port, starting from 1.
+ * Context 0 on each port is reserved for the default context.
+ */
+ for (i = 1; i < MVPP22_N_RSS_TABLES; i++) {
+ if (port->rss_ctx[i] < 0)
+ break;
+ }
+
+ if (i == MVPP22_N_RSS_TABLES)
+ return -EINVAL;
+
+ port->rss_ctx[i] = rss_ctx;
+ *port_ctx = i;
+
+ return 0;
+}
+
+static struct mvpp2_rss_table *mvpp22_rss_table_get(struct mvpp2 *priv,
+ int rss_ctx)
+{
+ if (rss_ctx < 0 || rss_ctx >= MVPP22_N_RSS_TABLES)
+ return NULL;
+
+ return priv->rss_tables[rss_ctx];
+}
+
+int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 port_ctx)
+{
+ struct mvpp2 *priv = port->priv;
+ struct ethtool_rxnfc *rxnfc;
+ int i, rss_ctx, ret;
+
+ rss_ctx = mvpp22_rss_ctx(port, port_ctx);
+
+ if (rss_ctx < 0 || rss_ctx >= MVPP22_N_RSS_TABLES)
+ return -EINVAL;
+
+ /* Invalidate any active classification rule that use this context */
+ for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) {
+ if (!port->rfs_rules[i])
+ continue;
+
+ rxnfc = &port->rfs_rules[i]->rxnfc;
+ if (!(rxnfc->fs.flow_type & FLOW_RSS) ||
+ rxnfc->rss_context != port_ctx)
+ continue;
+
+ ret = mvpp2_ethtool_cls_rule_del(port, rxnfc);
+ if (ret) {
+ netdev_warn(port->dev,
+ "couldn't remove classification rule %d associated to this context",
+ rxnfc->fs.location);
+ }
}
+
+ kfree(priv->rss_tables[rss_ctx]);
+
+ priv->rss_tables[rss_ctx] = NULL;
+ port->rss_ctx[port_ctx] = -1;
+
+ return 0;
+}
+
+int mvpp22_port_rss_ctx_indir_set(struct mvpp2_port *port, u32 port_ctx,
+ const u32 *indir)
+{
+ int rss_ctx = mvpp22_rss_ctx(port, port_ctx);
+ struct mvpp2_rss_table *rss_table = mvpp22_rss_table_get(port->priv,
+ rss_ctx);
+
+ if (!rss_table)
+ return -EINVAL;
+
+ memcpy(rss_table->indir, indir,
+ MVPP22_RSS_TABLE_ENTRIES * sizeof(rss_table->indir[0]));
+
+ mvpp22_rss_fill_table(port, rss_table, rss_ctx);
+
+ return 0;
+}
+
+int mvpp22_port_rss_ctx_indir_get(struct mvpp2_port *port, u32 port_ctx,
+ u32 *indir)
+{
+ int rss_ctx = mvpp22_rss_ctx(port, port_ctx);
+ struct mvpp2_rss_table *rss_table = mvpp22_rss_table_get(port->priv,
+ rss_ctx);
+
+ if (!rss_table)
+ return -EINVAL;
+
+ memcpy(indir, rss_table->indir,
+ MVPP22_RSS_TABLE_ENTRIES * sizeof(rss_table->indir[0]));
+
+ return 0;
}
int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info)
@@ -1424,32 +1638,32 @@ int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info)
return 0;
}
-void mvpp22_port_rss_init(struct mvpp2_port *port)
+int mvpp22_port_rss_init(struct mvpp2_port *port)
{
- struct mvpp2 *priv = port->priv;
- int i;
+ struct mvpp2_rss_table *table;
+ u32 context = 0;
+ int i, ret;
- /* Set the table width: replace the whole classifier Rx queue number
- * with the ones configured in RSS table entries.
- */
- mvpp2_write(priv, MVPP22_RSS_INDEX, MVPP22_RSS_INDEX_TABLE(port->id));
- mvpp2_write(priv, MVPP22_RSS_WIDTH, 8);
+ for (i = 0; i < MVPP22_N_RSS_TABLES; i++)
+ port->rss_ctx[i] = -1;
- /* The default RxQ is used as a key to select the RSS table to use.
- * We use one RSS table per port.
- */
- mvpp2_write(priv, MVPP22_RSS_INDEX,
- MVPP22_RSS_INDEX_QUEUE(port->first_rxq));
- mvpp2_write(priv, MVPP22_RXQ2RSS_TABLE,
- MVPP22_RSS_TABLE_POINTER(port->id));
+ ret = mvpp22_rss_context_create(port, &context);
+ if (ret)
+ return ret;
+
+ table = mvpp22_rss_table_get(port->priv, context);
+ if (!table)
+ return -EINVAL;
+
+ port->rss_ctx[0] = context;
/* Configure the first table to evenly distribute the packets across
* real Rx Queues. The table entries map a hash to a port Rx Queue.
*/
for (i = 0; i < MVPP22_RSS_TABLE_ENTRIES; i++)
- port->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs);
+ table->indir[i] = ethtool_rxfh_indir_default(i, port->nrxqs);
- mvpp22_rss_fill_table(port, port->id);
+ mvpp22_rss_fill_table(port, table, mvpp22_rss_ctx(port, 0));
/* Configure default flows */
mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_IP4, MVPP22_CLS_HEK_IP4_2T);
@@ -1458,4 +1672,6 @@ void mvpp22_port_rss_init(struct mvpp2_port *port)
mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_TCP6, MVPP22_CLS_HEK_IP6_5T);
mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_UDP4, MVPP22_CLS_HEK_IP4_5T);
mvpp2_port_rss_hash_opts_set(port, MVPP22_FLOW_UDP6, MVPP22_CLS_HEK_IP6_5T);
+
+ return 0;
}
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
index 56b617375a65..26cc1176e758 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_cls.h
@@ -249,11 +249,18 @@ struct mvpp2_cls_lookup_entry {
u32 data;
};
-void mvpp22_rss_fill_table(struct mvpp2_port *port, u32 table);
-void mvpp22_port_rss_init(struct mvpp2_port *port);
+int mvpp22_port_rss_init(struct mvpp2_port *port);
-void mvpp22_port_rss_enable(struct mvpp2_port *port);
-void mvpp22_port_rss_disable(struct mvpp2_port *port);
+int mvpp22_port_rss_enable(struct mvpp2_port *port);
+int mvpp22_port_rss_disable(struct mvpp2_port *port);
+
+int mvpp22_port_rss_ctx_create(struct mvpp2_port *port, u32 *rss_ctx);
+int mvpp22_port_rss_ctx_delete(struct mvpp2_port *port, u32 rss_ctx);
+
+int mvpp22_port_rss_ctx_indir_set(struct mvpp2_port *port, u32 rss_ctx,
+ const u32 *indir);
+int mvpp22_port_rss_ctx_indir_get(struct mvpp2_port *port, u32 rss_ctx,
+ u32 *indir);
int mvpp2_ethtool_rxfh_get(struct mvpp2_port *port, struct ethtool_rxnfc *info);
int mvpp2_ethtool_rxfh_set(struct mvpp2_port *port, struct ethtool_rxnfc *info);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 7a67e23a2c2b..772caf0bdecd 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -56,9 +56,9 @@ static struct {
/* The prototype is added here to be used in start_dev when using ACPI. This
* will be removed once phylink is used for all modes (dt+ACPI).
*/
-static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
const struct phylink_link_state *state);
-static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode,
phy_interface_t interface, struct phy_device *phy);
/* Queue modes */
@@ -3237,9 +3237,9 @@ static void mvpp2_start_dev(struct mvpp2_port *port)
struct phylink_link_state state = {
.interface = port->phy_interface,
};
- mvpp2_mac_config(port->dev, MLO_AN_INBAND, &state);
- mvpp2_mac_link_up(port->dev, MLO_AN_INBAND, port->phy_interface,
- NULL);
+ mvpp2_mac_config(&port->phylink_config, MLO_AN_INBAND, &state);
+ mvpp2_mac_link_up(&port->phylink_config, MLO_AN_INBAND,
+ port->phy_interface, NULL);
}
netif_tx_start_all_queues(port->dev);
@@ -3954,7 +3954,7 @@ static int mvpp2_ethtool_get_rxnfc(struct net_device *dev,
ret = mvpp2_ethtool_cls_rule_get(port, info);
break;
case ETHTOOL_GRXCLSRLALL:
- for (i = 0; i < MVPP2_N_RFS_RULES; i++) {
+ for (i = 0; i < MVPP2_N_RFS_ENTRIES_PER_FLOW; i++) {
if (port->rfs_rules[i])
rules[loc++] = i;
}
@@ -4000,24 +4000,25 @@ static int mvpp2_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
u8 *hfunc)
{
struct mvpp2_port *port = netdev_priv(dev);
+ int ret = 0;
if (!mvpp22_rss_is_supported())
return -EOPNOTSUPP;
if (indir)
- memcpy(indir, port->indir,
- ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
+ ret = mvpp22_port_rss_ctx_indir_get(port, 0, indir);
if (hfunc)
*hfunc = ETH_RSS_HASH_CRC32;
- return 0;
+ return ret;
}
static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
const u8 *key, const u8 hfunc)
{
struct mvpp2_port *port = netdev_priv(dev);
+ int ret = 0;
if (!mvpp22_rss_is_supported())
return -EOPNOTSUPP;
@@ -4028,15 +4029,58 @@ static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
if (key)
return -EOPNOTSUPP;
- if (indir) {
- memcpy(port->indir, indir,
- ARRAY_SIZE(port->indir) * sizeof(port->indir[0]));
- mvpp22_rss_fill_table(port, port->id);
- }
+ if (indir)
+ ret = mvpp22_port_rss_ctx_indir_set(port, 0, indir);
- return 0;
+ return ret;
+}
+
+static int mvpp2_ethtool_get_rxfh_context(struct net_device *dev, u32 *indir,
+ u8 *key, u8 *hfunc, u32 rss_context)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+ int ret = 0;
+
+ if (!mvpp22_rss_is_supported())
+ return -EOPNOTSUPP;
+
+ if (hfunc)
+ *hfunc = ETH_RSS_HASH_CRC32;
+
+ if (indir)
+ ret = mvpp22_port_rss_ctx_indir_get(port, rss_context, indir);
+
+ return ret;
}
+static int mvpp2_ethtool_set_rxfh_context(struct net_device *dev,
+ const u32 *indir, const u8 *key,
+ const u8 hfunc, u32 *rss_context,
+ bool delete)
+{
+ struct mvpp2_port *port = netdev_priv(dev);
+ int ret;
+
+ if (!mvpp22_rss_is_supported())
+ return -EOPNOTSUPP;
+
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_CRC32)
+ return -EOPNOTSUPP;
+
+ if (key)
+ return -EOPNOTSUPP;
+
+ if (delete)
+ return mvpp22_port_rss_ctx_delete(port, *rss_context);
+
+ if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
+ ret = mvpp22_port_rss_ctx_create(port, rss_context);
+ if (ret)
+ return ret;
+ }
+
+ return mvpp22_port_rss_ctx_indir_set(port, *rss_context, indir);
+}
/* Device ops */
static const struct net_device_ops mvpp2_netdev_ops = {
@@ -4073,7 +4117,8 @@ static const struct ethtool_ops mvpp2_eth_tool_ops = {
.get_rxfh_indir_size = mvpp2_ethtool_get_rxfh_indir_size,
.get_rxfh = mvpp2_ethtool_get_rxfh,
.set_rxfh = mvpp2_ethtool_set_rxfh,
-
+ .get_rxfh_context = mvpp2_ethtool_get_rxfh_context,
+ .set_rxfh_context = mvpp2_ethtool_set_rxfh_context,
};
/* Used for PPv2.1, or PPv2.2 with the old Device Tree binding that
@@ -4416,11 +4461,12 @@ static void mvpp2_port_copy_mac_addr(struct net_device *dev, struct mvpp2 *priv,
eth_hw_addr_random(dev);
}
-static void mvpp2_phylink_validate(struct net_device *dev,
+static void mvpp2_phylink_validate(struct phylink_config *config,
unsigned long *supported,
struct phylink_link_state *state)
{
- struct mvpp2_port *port = netdev_priv(dev);
+ struct mvpp2_port *port = container_of(config, struct mvpp2_port,
+ phylink_config);
__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
/* Invalid combinations */
@@ -4544,10 +4590,11 @@ static void mvpp2_gmac_link_state(struct mvpp2_port *port,
state->pause |= MLO_PAUSE_TX;
}
-static int mvpp2_phylink_mac_link_state(struct net_device *dev,
+static int mvpp2_phylink_mac_link_state(struct phylink_config *config,
struct phylink_link_state *state)
{
- struct mvpp2_port *port = netdev_priv(dev);
+ struct mvpp2_port *port = container_of(config, struct mvpp2_port,
+ phylink_config);
if (port->priv->hw_version == MVPP22 && port->gop_id == 0) {
u32 mode = readl(port->base + MVPP22_XLG_CTRL3_REG);
@@ -4563,9 +4610,10 @@ static int mvpp2_phylink_mac_link_state(struct net_device *dev,
return 1;
}
-static void mvpp2_mac_an_restart(struct net_device *dev)
+static void mvpp2_mac_an_restart(struct phylink_config *config)
{
- struct mvpp2_port *port = netdev_priv(dev);
+ struct mvpp2_port *port = container_of(config, struct mvpp2_port,
+ phylink_config);
u32 val = readl(port->base + MVPP2_GMAC_AUTONEG_CONFIG);
writel(val | MVPP2_GMAC_IN_BAND_RESTART_AN,
@@ -4750,9 +4798,10 @@ static void mvpp2_gmac_config(struct mvpp2_port *port, unsigned int mode,
}
}
-static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_config(struct phylink_config *config, unsigned int mode,
const struct phylink_link_state *state)
{
+ struct net_device *dev = to_net_dev(config->dev);
struct mvpp2_port *port = netdev_priv(dev);
bool change_interface = port->phy_interface != state->interface;
@@ -4792,9 +4841,10 @@ static void mvpp2_mac_config(struct net_device *dev, unsigned int mode,
mvpp2_port_enable(port);
}
-static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
+static void mvpp2_mac_link_up(struct phylink_config *config, unsigned int mode,
phy_interface_t interface, struct phy_device *phy)
{
+ struct net_device *dev = to_net_dev(config->dev);
struct mvpp2_port *port = netdev_priv(dev);
u32 val;
@@ -4819,9 +4869,10 @@ static void mvpp2_mac_link_up(struct net_device *dev, unsigned int mode,
netif_tx_wake_all_queues(dev);
}
-static void mvpp2_mac_link_down(struct net_device *dev, unsigned int mode,
- phy_interface_t interface)
+static void mvpp2_mac_link_down(struct phylink_config *config,
+ unsigned int mode, phy_interface_t interface)
{
+ struct net_device *dev = to_net_dev(config->dev);
struct mvpp2_port *port = netdev_priv(dev);
u32 val;
@@ -5078,8 +5129,11 @@ static int mvpp2_port_probe(struct platform_device *pdev,
/* Phylink isn't used w/ ACPI as of now */
if (port_node) {
- phylink = phylink_create(dev, port_fwnode, phy_mode,
- &mvpp2_phylink_ops);
+ port->phylink_config.dev = &dev->dev;
+ port->phylink_config.type = PHYLINK_NETDEV;
+
+ phylink = phylink_create(&port->phylink_config, port_fwnode,
+ phy_mode, &mvpp2_phylink_ops);
if (IS_ERR(phylink)) {
err = PTR_ERR(phylink);
goto err_free_port_pcpu;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 6ee6de7f0160..182762898361 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1098,6 +1098,12 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
goto err_register_params;
}
+ if (mlxsw_driver->init) {
+ err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
+ if (err)
+ goto err_driver_init;
+ }
+
err = mlxsw_hwmon_init(mlxsw_core, mlxsw_bus_info, &mlxsw_core->hwmon);
if (err)
goto err_hwmon_init;
@@ -1107,22 +1113,17 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
if (err)
goto err_thermal_init;
- if (mlxsw_driver->init) {
- err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
- if (err)
- goto err_driver_init;
- }
-
if (mlxsw_driver->params_register && !reload)
devlink_params_publish(devlink);
return 0;
-err_driver_init:
- mlxsw_thermal_fini(mlxsw_core->thermal);
err_thermal_init:
mlxsw_hwmon_fini(mlxsw_core->hwmon);
err_hwmon_init:
+ if (mlxsw_core->driver->fini)
+ mlxsw_core->driver->fini(mlxsw_core);
+err_driver_init:
if (mlxsw_driver->params_unregister && !reload)
mlxsw_driver->params_unregister(mlxsw_core);
err_register_params:
@@ -1187,10 +1188,10 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core,
if (mlxsw_core->driver->params_unregister && !reload)
devlink_params_unpublish(devlink);
- if (mlxsw_core->driver->fini)
- mlxsw_core->driver->fini(mlxsw_core);
mlxsw_thermal_fini(mlxsw_core->thermal);
mlxsw_hwmon_fini(mlxsw_core->hwmon);
+ if (mlxsw_core->driver->fini)
+ mlxsw_core->driver->fini(mlxsw_core);
if (mlxsw_core->driver->params_unregister && !reload)
mlxsw_core->driver->params_unregister(mlxsw_core);
if (!reload)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
index 72539a9a3847..d2c7ce67c300 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c
@@ -92,33 +92,20 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module,
u16 temp;
} temp_thresh;
char mcia_pl[MLXSW_REG_MCIA_LEN] = {0};
- char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0};
- u16 module_temp;
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ unsigned int module_temp;
bool qsfp;
int err;
- mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
- 1);
- err = mlxsw_reg_query(core, MLXSW_REG(mtbr), mtbr_pl);
+ mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
+ false, false);
+ err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
if (err)
return err;
-
- /* Don't read temperature thresholds for module with no valid info. */
- mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &module_temp, NULL);
- switch (module_temp) {
- case MLXSW_REG_MTBR_BAD_SENS_INFO: /* fall-through */
- case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
- case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
- case MLXSW_REG_MTBR_INDEX_NA:
+ mlxsw_reg_mtmp_unpack(mtmp_pl, &module_temp, NULL, NULL);
+ if (!module_temp) {
*temp = 0;
return 0;
- default:
- /* Do not consider thresholds for zero temperature. */
- if (MLXSW_REG_MTMP_TEMP_TO_MC(module_temp) == 0) {
- *temp = 0;
- return 0;
- }
- break;
}
/* Read Free Side Device Temperature Thresholds from page 03h
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index 496dc904c5ed..056e3f55ae6c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -23,6 +23,14 @@ struct mlxsw_hwmon_attr {
char name[32];
};
+static int mlxsw_hwmon_get_attr_index(int index, int count)
+{
+ if (index >= count)
+ return index % count + MLXSW_REG_MTMP_GBOX_INDEX_MIN;
+
+ return index;
+}
+
struct mlxsw_hwmon {
struct mlxsw_core *core;
const struct mlxsw_bus_info *bus_info;
@@ -33,6 +41,7 @@ struct mlxsw_hwmon {
struct mlxsw_hwmon_attr hwmon_attrs[MLXSW_HWMON_ATTR_COUNT];
unsigned int attrs_count;
u8 sensor_count;
+ u8 module_sensor_count;
};
static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
@@ -44,10 +53,12 @@ static ssize_t mlxsw_hwmon_temp_show(struct device *dev,
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
char mtmp_pl[MLXSW_REG_MTMP_LEN];
unsigned int temp;
+ int index;
int err;
- mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
- false, false);
+ index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+ mlxsw_hwmon->module_sensor_count);
+ mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
if (err) {
dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
@@ -66,10 +77,12 @@ static ssize_t mlxsw_hwmon_temp_max_show(struct device *dev,
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
char mtmp_pl[MLXSW_REG_MTMP_LEN];
unsigned int temp_max;
+ int index;
int err;
- mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index,
- false, false);
+ index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+ mlxsw_hwmon->module_sensor_count);
+ mlxsw_reg_mtmp_pack(mtmp_pl, index, false, false);
err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
if (err) {
dev_err(mlxsw_hwmon->bus_info->dev, "Failed to query temp sensor\n");
@@ -88,6 +101,7 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
char mtmp_pl[MLXSW_REG_MTMP_LEN];
unsigned long val;
+ int index;
int err;
err = kstrtoul(buf, 10, &val);
@@ -96,7 +110,9 @@ static ssize_t mlxsw_hwmon_temp_rst_store(struct device *dev,
if (val != 1)
return -EINVAL;
- mlxsw_reg_mtmp_pack(mtmp_pl, mlwsw_hwmon_attr->type_index, true, true);
+ index = mlxsw_hwmon_get_attr_index(mlwsw_hwmon_attr->type_index,
+ mlxsw_hwmon->module_sensor_count);
+ mlxsw_reg_mtmp_pack(mtmp_pl, index, true, true);
err = mlxsw_reg_write(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
if (err) {
dev_err(mlxsw_hwmon->bus_info->dev, "Failed to reset temp sensor history\n");
@@ -198,38 +214,18 @@ static ssize_t mlxsw_hwmon_module_temp_show(struct device *dev,
struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
- char mtbr_pl[MLXSW_REG_MTBR_LEN] = {0};
- u16 temp;
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ unsigned int temp;
u8 module;
int err;
module = mlwsw_hwmon_attr->type_index - mlxsw_hwmon->sensor_count;
- mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX + module,
- 1);
- err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtbr), mtbr_pl);
- if (err) {
- dev_err(dev, "Failed to query module temperature sensor\n");
+ mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + module,
+ false, false);
+ err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err)
return err;
- }
-
- mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
- /* Update status and temperature cache. */
- switch (temp) {
- case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
- case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
- case MLXSW_REG_MTBR_INDEX_NA:
- temp = 0;
- break;
- case MLXSW_REG_MTBR_BAD_SENS_INFO:
- /* Untrusted cable is connected. Reading temperature from its
- * sensor is faulty.
- */
- temp = 0;
- break;
- default:
- temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
- break;
- }
+ mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
return sprintf(buf, "%u\n", temp);
}
@@ -333,6 +329,20 @@ mlxsw_hwmon_module_temp_label_show(struct device *dev,
mlwsw_hwmon_attr->type_index);
}
+static ssize_t
+mlxsw_hwmon_gbox_temp_label_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlxsw_hwmon_attr *mlwsw_hwmon_attr =
+ container_of(attr, struct mlxsw_hwmon_attr, dev_attr);
+ struct mlxsw_hwmon *mlxsw_hwmon = mlwsw_hwmon_attr->hwmon;
+ int index = mlwsw_hwmon_attr->type_index -
+ mlxsw_hwmon->module_sensor_count + 1;
+
+ return sprintf(buf, "gearbox %03u\n", index);
+}
+
enum mlxsw_hwmon_attr_type {
MLXSW_HWMON_ATTR_TYPE_TEMP,
MLXSW_HWMON_ATTR_TYPE_TEMP_MAX,
@@ -345,6 +355,7 @@ enum mlxsw_hwmon_attr_type {
MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_CRIT,
MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_EMERG,
MLXSW_HWMON_ATTR_TYPE_TEMP_MODULE_LABEL,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL,
};
static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
@@ -428,6 +439,13 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
"temp%u_label", num + 1);
break;
+ case MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL:
+ mlxsw_hwmon_attr->dev_attr.show =
+ mlxsw_hwmon_gbox_temp_label_show;
+ mlxsw_hwmon_attr->dev_attr.attr.mode = 0444;
+ snprintf(mlxsw_hwmon_attr->name, sizeof(mlxsw_hwmon_attr->name),
+ "temp%u_label", num + 1);
+ break;
default:
WARN_ON(1);
}
@@ -556,6 +574,54 @@ static int mlxsw_hwmon_module_init(struct mlxsw_hwmon *mlxsw_hwmon)
index, index);
index++;
}
+ mlxsw_hwmon->module_sensor_count = index;
+
+ return 0;
+}
+
+static int mlxsw_hwmon_gearbox_init(struct mlxsw_hwmon *mlxsw_hwmon)
+{
+ int index, max_index, sensor_index;
+ char mgpir_pl[MLXSW_REG_MGPIR_LEN];
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ u8 gbox_num;
+ int err;
+
+ mlxsw_reg_mgpir_pack(mgpir_pl);
+ err = mlxsw_reg_query(mlxsw_hwmon->core, MLXSW_REG(mgpir), mgpir_pl);
+ if (err)
+ return err;
+
+ mlxsw_reg_mgpir_unpack(mgpir_pl, &gbox_num, NULL, NULL);
+ if (!gbox_num)
+ return 0;
+
+ index = mlxsw_hwmon->module_sensor_count;
+ max_index = mlxsw_hwmon->module_sensor_count + gbox_num;
+ while (index < max_index) {
+ sensor_index = index % mlxsw_hwmon->module_sensor_count +
+ MLXSW_REG_MTMP_GBOX_INDEX_MIN;
+ mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, true, true);
+ err = mlxsw_reg_write(mlxsw_hwmon->core,
+ MLXSW_REG(mtmp), mtmp_pl);
+ if (err) {
+ dev_err(mlxsw_hwmon->bus_info->dev, "Failed to setup temp sensor number %d\n",
+ sensor_index);
+ return err;
+ }
+ mlxsw_hwmon_attr_add(mlxsw_hwmon, MLXSW_HWMON_ATTR_TYPE_TEMP,
+ index, index);
+ mlxsw_hwmon_attr_add(mlxsw_hwmon,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_MAX, index,
+ index);
+ mlxsw_hwmon_attr_add(mlxsw_hwmon,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_RST, index,
+ index);
+ mlxsw_hwmon_attr_add(mlxsw_hwmon,
+ MLXSW_HWMON_ATTR_TYPE_TEMP_GBOX_LABEL,
+ index, index);
+ index++;
+ }
return 0;
}
@@ -586,6 +652,10 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
if (err)
goto err_temp_module_init;
+ err = mlxsw_hwmon_gearbox_init(mlxsw_hwmon);
+ if (err)
+ goto err_temp_gearbox_init;
+
mlxsw_hwmon->groups[0] = &mlxsw_hwmon->group;
mlxsw_hwmon->group.attrs = mlxsw_hwmon->attrs;
@@ -602,6 +672,7 @@ int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
return 0;
err_hwmon_register:
+err_temp_gearbox_init:
err_temp_module_init:
err_fans_init:
err_temp_init:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index d3e851e7ca72..cfab0e330a47 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -449,39 +449,31 @@ static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
struct mlxsw_thermal_module *tz = tzdev->devdata;
struct mlxsw_thermal *thermal = tz->parent;
struct device *dev = thermal->bus_info->dev;
- char mtbr_pl[MLXSW_REG_MTBR_LEN];
- u16 temp;
+ char mtmp_pl[MLXSW_REG_MTMP_LEN];
+ unsigned int temp;
int err;
/* Read module temperature. */
- mlxsw_reg_mtbr_pack(mtbr_pl, MLXSW_REG_MTBR_BASE_MODULE_INDEX +
- tz->module, 1);
- err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtbr), mtbr_pl);
- if (err)
- return err;
-
- mlxsw_reg_mtbr_temp_unpack(mtbr_pl, 0, &temp, NULL);
- /* Update temperature. */
- switch (temp) {
- case MLXSW_REG_MTBR_NO_CONN: /* fall-through */
- case MLXSW_REG_MTBR_NO_TEMP_SENS: /* fall-through */
- case MLXSW_REG_MTBR_INDEX_NA: /* fall-through */
- case MLXSW_REG_MTBR_BAD_SENS_INFO:
+ mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN +
+ tz->module, false, false);
+ err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+ if (err) {
+ /* Do not return error - in case of broken module's sensor
+ * it will cause error message flooding.
+ */
temp = 0;
- break;
- default:
- temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
- /* Reset all trip point. */
- mlxsw_thermal_module_trips_reset(tz);
- /* Update trip points. */
- err = mlxsw_thermal_module_trips_update(dev, thermal->core,
- tz);
- if (err)
- return err;
- break;
+ *p_temp = (int) temp;
+ return 0;
}
-
+ mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
*p_temp = (int) temp;
+
+ if (!temp)
+ return 0;
+
+ /* Update trip points. */
+ mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
index 06aea1999518..95f408d0e103 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/i2c.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -43,11 +43,10 @@
#define MLXSW_I2C_PREP_SIZE (MLXSW_I2C_ADDR_WIDTH + 28)
#define MLXSW_I2C_MBOX_SIZE 20
#define MLXSW_I2C_MBOX_OUT_PARAM_OFF 12
-#define MLXSW_I2C_MAX_BUFF_SIZE 32
#define MLXSW_I2C_MBOX_OFFSET_BITS 20
#define MLXSW_I2C_MBOX_SIZE_BITS 12
#define MLXSW_I2C_ADDR_BUF_SIZE 4
-#define MLXSW_I2C_BLK_MAX 32
+#define MLXSW_I2C_BLK_DEF 32
#define MLXSW_I2C_RETRY 5
#define MLXSW_I2C_TIMEOUT_MSECS 5000
#define MLXSW_I2C_MAX_DATA_SIZE 256
@@ -62,6 +61,7 @@
* @dev: I2C device;
* @core: switch core pointer;
* @bus_info: bus info block;
+ * @block_size: maximum block size allowed to pass to under layer;
*/
struct mlxsw_i2c {
struct {
@@ -74,6 +74,7 @@ struct mlxsw_i2c {
struct device *dev;
struct mlxsw_core *core;
struct mlxsw_bus_info bus_info;
+ u16 block_size;
};
#define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) { \
@@ -315,20 +316,26 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
struct i2c_client *client = to_i2c_client(dev);
struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
- u8 tran_buf[MLXSW_I2C_MAX_BUFF_SIZE + MLXSW_I2C_ADDR_BUF_SIZE];
int off = mlxsw_i2c->cmd.mb_off_in, chunk_size, i, j;
unsigned long end;
+ u8 *tran_buf;
struct i2c_msg write_tran =
- MLXSW_I2C_WRITE_MSG(client, tran_buf, MLXSW_I2C_PUSH_CMD_SIZE);
+ MLXSW_I2C_WRITE_MSG(client, NULL, MLXSW_I2C_PUSH_CMD_SIZE);
int err;
+ tran_buf = kmalloc(mlxsw_i2c->block_size + MLXSW_I2C_ADDR_BUF_SIZE,
+ GFP_KERNEL);
+ if (!tran_buf)
+ return -ENOMEM;
+
+ write_tran.buf = tran_buf;
for (i = 0; i < num; i++) {
- chunk_size = (in_mbox_size > MLXSW_I2C_BLK_MAX) ?
- MLXSW_I2C_BLK_MAX : in_mbox_size;
+ chunk_size = (in_mbox_size > mlxsw_i2c->block_size) ?
+ mlxsw_i2c->block_size : in_mbox_size;
write_tran.len = MLXSW_I2C_ADDR_WIDTH + chunk_size;
mlxsw_i2c_set_slave_addr(tran_buf, off);
memcpy(&tran_buf[MLXSW_I2C_ADDR_BUF_SIZE], in_mbox +
- MLXSW_I2C_BLK_MAX * i, chunk_size);
+ mlxsw_i2c->block_size * i, chunk_size);
j = 0;
end = jiffies + timeout;
@@ -342,9 +349,10 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
(j++ < MLXSW_I2C_RETRY));
if (err != 1) {
- if (!err)
+ if (!err) {
err = -EIO;
- return err;
+ goto mlxsw_i2c_write_exit;
+ }
}
off += chunk_size;
@@ -355,24 +363,27 @@ mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 0);
if (err) {
dev_err(&client->dev, "Could not start transaction");
- return -EIO;
+ err = -EIO;
+ goto mlxsw_i2c_write_exit;
}
/* Wait until go bit is cleared. */
err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, p_status);
if (err) {
dev_err(&client->dev, "HW semaphore is not released");
- return err;
+ goto mlxsw_i2c_write_exit;
}
/* Validate transaction completion status. */
if (*p_status) {
dev_err(&client->dev, "Bad transaction completion status %x\n",
*p_status);
- return -EIO;
+ err = -EIO;
}
- return 0;
+mlxsw_i2c_write_exit:
+ kfree(tran_buf);
+ return err;
}
/* Routine executes I2C command. */
@@ -395,8 +406,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
if (in_mbox) {
reg_size = mlxsw_i2c_get_reg_size(in_mbox);
- num = reg_size / MLXSW_I2C_BLK_MAX;
- if (reg_size % MLXSW_I2C_BLK_MAX)
+ num = reg_size / mlxsw_i2c->block_size;
+ if (reg_size % mlxsw_i2c->block_size)
num++;
if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
@@ -416,7 +427,7 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
} else {
/* No input mailbox is case of initialization query command. */
reg_size = MLXSW_I2C_MAX_DATA_SIZE;
- num = reg_size / MLXSW_I2C_BLK_MAX;
+ num = reg_size / mlxsw_i2c->block_size;
if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
dev_err(&client->dev, "Could not acquire lock");
@@ -432,8 +443,8 @@ mlxsw_i2c_cmd(struct device *dev, u16 opcode, u32 in_mod, size_t in_mbox_size,
/* Send read transaction to get output mailbox content. */
read_tran[1].buf = out_mbox;
for (i = 0; i < num; i++) {
- chunk_size = (reg_size > MLXSW_I2C_BLK_MAX) ?
- MLXSW_I2C_BLK_MAX : reg_size;
+ chunk_size = (reg_size > mlxsw_i2c->block_size) ?
+ mlxsw_i2c->block_size : reg_size;
read_tran[1].len = chunk_size;
mlxsw_i2c_set_slave_addr(tran_buf, off);
@@ -509,8 +520,20 @@ mlxsw_i2c_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
if (!mbox)
return -ENOMEM;
+ err = mlxsw_cmd_query_fw(mlxsw_core, mbox);
+ if (err)
+ goto mbox_put;
+
+ mlxsw_i2c->bus_info.fw_rev.major =
+ mlxsw_cmd_mbox_query_fw_fw_rev_major_get(mbox);
+ mlxsw_i2c->bus_info.fw_rev.minor =
+ mlxsw_cmd_mbox_query_fw_fw_rev_minor_get(mbox);
+ mlxsw_i2c->bus_info.fw_rev.subminor =
+ mlxsw_cmd_mbox_query_fw_fw_rev_subminor_get(mbox);
+
err = mlxsw_core_resources_query(mlxsw_core, mbox, res);
+mbox_put:
mlxsw_cmd_mbox_free(mbox);
return err;
}
@@ -534,6 +557,7 @@ static const struct mlxsw_bus mlxsw_i2c_bus = {
static int mlxsw_i2c_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
+ const struct i2c_adapter_quirks *quirks = client->adapter->quirks;
struct mlxsw_i2c *mlxsw_i2c;
u8 status;
int err;
@@ -542,6 +566,22 @@ static int mlxsw_i2c_probe(struct i2c_client *client,
if (!mlxsw_i2c)
return -ENOMEM;
+ if (quirks) {
+ if ((quirks->max_read_len &&
+ quirks->max_read_len < MLXSW_I2C_BLK_DEF) ||
+ (quirks->max_write_len &&
+ quirks->max_write_len < MLXSW_I2C_BLK_DEF)) {
+ dev_err(&client->dev, "Insufficient transaction buffer length\n");
+ return -EOPNOTSUPP;
+ }
+
+ mlxsw_i2c->block_size = max_t(u16, MLXSW_I2C_BLK_DEF,
+ min_t(u16, quirks->max_read_len,
+ quirks->max_write_len));
+ } else {
+ mlxsw_i2c->block_size = MLXSW_I2C_BLK_DEF;
+ }
+
i2c_set_clientdata(client, mlxsw_i2c);
mutex_init(&mlxsw_i2c->cmd.lock);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
index cf2114273b72..471b0ca6d69a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -67,6 +67,23 @@ static const struct net_device_ops mlxsw_m_port_netdev_ops = {
.ndo_get_devlink_port = mlxsw_m_port_get_devlink_port,
};
+static void mlxsw_m_module_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *drvinfo)
+{
+ struct mlxsw_m_port *mlxsw_m_port = netdev_priv(dev);
+ struct mlxsw_m *mlxsw_m = mlxsw_m_port->mlxsw_m;
+
+ strlcpy(drvinfo->driver, mlxsw_m->bus_info->device_kind,
+ sizeof(drvinfo->driver));
+ snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+ "%d.%d.%d",
+ mlxsw_m->bus_info->fw_rev.major,
+ mlxsw_m->bus_info->fw_rev.minor,
+ mlxsw_m->bus_info->fw_rev.subminor);
+ strlcpy(drvinfo->bus_info, mlxsw_m->bus_info->device_name,
+ sizeof(drvinfo->bus_info));
+}
+
static int mlxsw_m_get_module_info(struct net_device *netdev,
struct ethtool_modinfo *modinfo)
{
@@ -88,6 +105,7 @@ mlxsw_m_get_module_eeprom(struct net_device *netdev, struct ethtool_eeprom *ee,
}
static const struct ethtool_ops mlxsw_m_port_ethtool_ops = {
+ .get_drvinfo = mlxsw_m_module_get_drvinfo,
.get_module_info = mlxsw_m_get_module_info,
.get_module_eeprom = mlxsw_m_get_module_eeprom,
};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index e8002bfc1e8f..7348c5a5ad6a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -8039,13 +8039,15 @@ MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7);
MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN);
+#define MLXSW_REG_MTMP_MODULE_INDEX_MIN 64
+#define MLXSW_REG_MTMP_GBOX_INDEX_MIN 256
/* reg_mtmp_sensor_index
* Sensors index to access.
* 64-127 of sensor_index are mapped to the SFP+/QSFP modules sequentially
* (module 0 is mapped to sensor_index 64).
* Access: Index
*/
-MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 7);
+MLXSW_ITEM32(reg, mtmp, sensor_index, 0x00, 0, 12);
/* Convert to milli degrees Celsius */
#define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125)
@@ -8107,7 +8109,7 @@ MLXSW_ITEM32(reg, mtmp, temperature_threshold_lo, 0x10, 0, 16);
*/
MLXSW_ITEM_BUF(reg, mtmp, sensor_name, 0x18, MLXSW_REG_MTMP_SENSOR_NAME_SIZE);
-static inline void mlxsw_reg_mtmp_pack(char *payload, u8 sensor_index,
+static inline void mlxsw_reg_mtmp_pack(char *payload, u16 sensor_index,
bool max_temp_enable,
bool max_temp_reset)
{
@@ -8156,7 +8158,7 @@ MLXSW_REG_DEFINE(mtbr, MLXSW_REG_MTBR_ID, MLXSW_REG_MTBR_LEN);
* 64-127 are mapped to the SFP+/QSFP modules sequentially).
* Access: Index
*/
-MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 7);
+MLXSW_ITEM32(reg, mtbr, base_sensor_index, 0x00, 0, 12);
/* reg_mtbr_num_rec
* Request: Number of records to read
@@ -8183,7 +8185,7 @@ MLXSW_ITEM32_INDEXED(reg, mtbr, rec_max_temp, MLXSW_REG_MTBR_BASE_LEN, 16,
MLXSW_ITEM32_INDEXED(reg, mtbr, rec_temp, MLXSW_REG_MTBR_BASE_LEN, 0, 16,
MLXSW_REG_MTBR_REC_LEN, 0x00, false);
-static inline void mlxsw_reg_mtbr_pack(char *payload, u8 base_sensor_index,
+static inline void mlxsw_reg_mtbr_pack(char *payload, u16 base_sensor_index,
u8 num_rec)
{
MLXSW_REG_ZERO(mtbr, payload);
@@ -9043,6 +9045,57 @@ static inline void mlxsw_reg_mprs_pack(char *payload, u16 parsing_depth,
mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport);
}
+/* MGPIR - Management General Peripheral Information Register
+ * ----------------------------------------------------------
+ * MGPIR register allows software to query the hardware and
+ * firmware general information of peripheral entities.
+ */
+#define MLXSW_REG_MGPIR_ID 0x9100
+#define MLXSW_REG_MGPIR_LEN 0xA0
+
+MLXSW_REG_DEFINE(mgpir, MLXSW_REG_MGPIR_ID, MLXSW_REG_MGPIR_LEN);
+
+enum mlxsw_reg_mgpir_device_type {
+ MLXSW_REG_MGPIR_DEVICE_TYPE_NONE,
+ MLXSW_REG_MGPIR_DEVICE_TYPE_GEARBOX_DIE,
+};
+
+/* device_type
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, device_type, 0x00, 24, 4);
+
+/* devices_per_flash
+ * Number of devices of device_type per flash (can be shared by few devices).
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, devices_per_flash, 0x00, 16, 8);
+
+/* num_of_devices
+ * Number of devices of device_type.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, mgpir, num_of_devices, 0x00, 0, 8);
+
+static inline void mlxsw_reg_mgpir_pack(char *payload)
+{
+ MLXSW_REG_ZERO(mgpir, payload);
+}
+
+static inline void
+mlxsw_reg_mgpir_unpack(char *payload, u8 *num_of_devices,
+ enum mlxsw_reg_mgpir_device_type *device_type,
+ u8 *devices_per_flash)
+{
+ if (num_of_devices)
+ *num_of_devices = mlxsw_reg_mgpir_num_of_devices_get(payload);
+ if (device_type)
+ *device_type = mlxsw_reg_mgpir_device_type_get(payload);
+ if (devices_per_flash)
+ *devices_per_flash =
+ mlxsw_reg_mgpir_devices_per_flash_get(payload);
+}
+
/* TNGCR - Tunneling NVE General Configuration Register
* ----------------------------------------------------
* The TNGCR register is used for setting up the NVE Tunneling configuration.
@@ -10058,6 +10111,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(mcda),
MLXSW_REG(mgpc),
MLXSW_REG(mprs),
+ MLXSW_REG(mgpir),
MLXSW_REG(tngcr),
MLXSW_REG(tnumt),
MLXSW_REG(tnqcr),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 1cda8a248b12..0ec52be7cc33 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2886,7 +2886,7 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
return false;
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
- struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
+ struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
struct in6_addr *gw;
int ifindex, weight;
@@ -2958,7 +2958,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
struct net_device *dev;
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
- dev = mlxsw_sp_rt6->rt->fib6_nh.fib_nh_dev;
+ dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev;
val ^= dev->ifindex;
}
@@ -3960,9 +3960,9 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
struct fib6_info *rt = mlxsw_sp_rt6->rt;
- if (nh->rif && nh->rif->dev == rt->fib6_nh.fib_nh_dev &&
+ if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
- &rt->fib6_nh.fib_nh_gw6))
+ &rt->fib6_nh->fib_nh_gw6))
return nh;
continue;
}
@@ -4022,13 +4022,13 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) {
list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
- list)->rt->fib6_nh.fib_nh_flags |= RTNH_F_OFFLOAD;
+ list)->rt->fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
return;
}
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
- struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh;
+ struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
struct mlxsw_sp_nexthop *nh;
nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
@@ -4050,7 +4050,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry)
list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
struct fib6_info *rt = mlxsw_sp_rt6->rt;
- rt->fib6_nh.fib_nh_flags &= ~RTNH_F_OFFLOAD;
+ rt->fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
}
}
@@ -4928,7 +4928,8 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
{
/* RTF_CACHE routes are ignored */
- return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_gw_family;
+ return !(rt->fib6_flags & RTF_ADDRCONF) &&
+ rt->fib6_nh->fib_nh_gw_family;
}
static struct fib6_info *
@@ -4987,8 +4988,8 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
const struct fib6_info *rt,
enum mlxsw_sp_ipip_type *ret)
{
- return rt->fib6_nh.fib_nh_dev &&
- mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.fib_nh_dev, ret);
+ return rt->fib6_nh->fib_nh_dev &&
+ mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
}
static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
@@ -4998,7 +4999,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
{
const struct mlxsw_sp_ipip_ops *ipip_ops;
struct mlxsw_sp_ipip_entry *ipip_entry;
- struct net_device *dev = rt->fib6_nh.fib_nh_dev;
+ struct net_device *dev = rt->fib6_nh->fib_nh_dev;
struct mlxsw_sp_rif *rif;
int err;
@@ -5041,11 +5042,11 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop *nh,
const struct fib6_info *rt)
{
- struct net_device *dev = rt->fib6_nh.fib_nh_dev;
+ struct net_device *dev = rt->fib6_nh->fib_nh_dev;
nh->nh_grp = nh_grp;
- nh->nh_weight = rt->fib6_nh.fib_nh_weight;
- memcpy(&nh->gw_addr, &rt->fib6_nh.fib_nh_gw6, sizeof(nh->gw_addr));
+ nh->nh_weight = rt->fib6_nh->fib_nh_weight;
+ memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
@@ -5068,7 +5069,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
const struct fib6_info *rt)
{
- return rt->fib6_nh.fib_nh_gw_family ||
+ return rt->fib6_nh->fib_nh_gw_family ||
mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
}
diff --git a/drivers/net/ethernet/mscc/Makefile b/drivers/net/ethernet/mscc/Makefile
index cb52a3b128ae..5e694dc1f7f8 100644
--- a/drivers/net/ethernet/mscc/Makefile
+++ b/drivers/net/ethernet/mscc/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: (GPL-2.0 OR MIT)
obj-$(CONFIG_MSCC_OCELOT_SWITCH) += mscc_ocelot_common.o
mscc_ocelot_common-y := ocelot.o ocelot_io.o
-mscc_ocelot_common-y += ocelot_regs.o
+mscc_ocelot_common-y += ocelot_regs.o ocelot_tc.o ocelot_police.o
obj-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) += ocelot_board.o
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 02ad11e0b0d8..3e692755c02f 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -884,6 +884,13 @@ static int ocelot_set_features(struct net_device *dev,
struct ocelot_port *port = netdev_priv(dev);
netdev_features_t changed = dev->features ^ features;
+ if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) &&
+ port->tc.offload_cnt) {
+ netdev_err(dev,
+ "Cannot disable HW TC offload while offloads active\n");
+ return -EBUSY;
+ }
+
if (changed & NETIF_F_HW_VLAN_CTAG_FILTER)
ocelot_vlan_mode(port, features);
@@ -917,6 +924,7 @@ static const struct net_device_ops ocelot_port_netdev_ops = {
.ndo_vlan_rx_kill_vid = ocelot_vlan_rx_kill_vid,
.ndo_set_features = ocelot_set_features,
.ndo_get_port_parent_id = ocelot_get_port_parent_id,
+ .ndo_setup_tc = ocelot_setup_tc,
};
static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data)
@@ -1636,8 +1644,9 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port,
dev->netdev_ops = &ocelot_port_netdev_ops;
dev->ethtool_ops = &ocelot_ethtool_ops;
- dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXFCS;
- dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
+ dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_RXFCS |
+ NETIF_F_HW_TC;
+ dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC;
memcpy(dev->dev_addr, ocelot->base_mac, ETH_ALEN);
dev->dev_addr[ETH_ALEN - 1] += port;
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index 541fe41e60b0..04d87a7ecaba 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -22,6 +22,7 @@
#include "ocelot_rew.h"
#include "ocelot_sys.h"
#include "ocelot_qs.h"
+#include "ocelot_tc.h"
#define PGID_AGGR 64
#define PGID_SRC 80
@@ -454,6 +455,8 @@ struct ocelot_port {
phy_interface_t phy_mode;
struct phy *serdes;
+
+ struct ocelot_port_tc tc;
};
u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
diff --git a/drivers/net/ethernet/mscc/ocelot_police.c b/drivers/net/ethernet/mscc/ocelot_police.c
new file mode 100644
index 000000000000..701e82dd749a
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_police.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#include "ocelot_police.h"
+
+enum mscc_qos_rate_mode {
+ MSCC_QOS_RATE_MODE_DISABLED, /* Policer/shaper disabled */
+ MSCC_QOS_RATE_MODE_LINE, /* Measure line rate in kbps incl. IPG */
+ MSCC_QOS_RATE_MODE_DATA, /* Measures data rate in kbps excl. IPG */
+ MSCC_QOS_RATE_MODE_FRAME, /* Measures frame rate in fps */
+ __MSCC_QOS_RATE_MODE_END,
+ NUM_MSCC_QOS_RATE_MODE = __MSCC_QOS_RATE_MODE_END,
+ MSCC_QOS_RATE_MODE_MAX = __MSCC_QOS_RATE_MODE_END - 1,
+};
+
+/* Types for ANA:POL[0-192]:POL_MODE_CFG.FRM_MODE */
+#define POL_MODE_LINERATE 0 /* Incl IPG. Unit: 33 1/3 kbps, 4096 bytes */
+#define POL_MODE_DATARATE 1 /* Excl IPG. Unit: 33 1/3 kbps, 4096 bytes */
+#define POL_MODE_FRMRATE_HI 2 /* Unit: 33 1/3 fps, 32.8 frames */
+#define POL_MODE_FRMRATE_LO 3 /* Unit: 1/3 fps, 0.3 frames */
+
+/* Policer indexes */
+#define POL_IX_PORT 0 /* 0-11 : Port policers */
+#define POL_IX_QUEUE 32 /* 32-127 : Queue policers */
+
+/* Default policer order */
+#define POL_ORDER 0x1d3 /* Ocelot policer order: Serial (QoS -> Port -> VCAP) */
+
+struct qos_policer_conf {
+ enum mscc_qos_rate_mode mode;
+ bool dlb; /* Enable DLB (dual leaky bucket mode */
+ bool cf; /* Coupling flag (ignored in SLB mode) */
+ u32 cir; /* CIR in kbps/fps (ignored in SLB mode) */
+ u32 cbs; /* CBS in bytes/frames (ignored in SLB mode) */
+ u32 pir; /* PIR in kbps/fps */
+ u32 pbs; /* PBS in bytes/frames */
+ u8 ipg; /* Size of IPG when MSCC_QOS_RATE_MODE_LINE is chosen */
+};
+
+static int qos_policer_conf_set(struct ocelot_port *port, u32 pol_ix,
+ struct qos_policer_conf *conf)
+{
+ u32 cf = 0, cir_ena = 0, frm_mode = POL_MODE_LINERATE;
+ u32 cir = 0, cbs = 0, pir = 0, pbs = 0;
+ bool cir_discard = 0, pir_discard = 0;
+ struct ocelot *ocelot = port->ocelot;
+ u32 pbs_max = 0, cbs_max = 0;
+ u8 ipg = 20;
+ u32 value;
+
+ pir = conf->pir;
+ pbs = conf->pbs;
+
+ switch (conf->mode) {
+ case MSCC_QOS_RATE_MODE_LINE:
+ case MSCC_QOS_RATE_MODE_DATA:
+ if (conf->mode == MSCC_QOS_RATE_MODE_LINE) {
+ frm_mode = POL_MODE_LINERATE;
+ ipg = min_t(u8, GENMASK(4, 0), conf->ipg);
+ } else {
+ frm_mode = POL_MODE_DATARATE;
+ }
+ if (conf->dlb) {
+ cir_ena = 1;
+ cir = conf->cir;
+ cbs = conf->cbs;
+ if (cir == 0 && cbs == 0) {
+ /* Discard cir frames */
+ cir_discard = 1;
+ } else {
+ cir = DIV_ROUND_UP(cir, 100);
+ cir *= 3; /* 33 1/3 kbps */
+ cbs = DIV_ROUND_UP(cbs, 4096);
+ cbs = (cbs ? cbs : 1); /* No zero burst size */
+ cbs_max = 60; /* Limit burst size */
+ cf = conf->cf;
+ if (cf)
+ pir += conf->cir;
+ }
+ }
+ if (pir == 0 && pbs == 0) {
+ /* Discard PIR frames */
+ pir_discard = 1;
+ } else {
+ pir = DIV_ROUND_UP(pir, 100);
+ pir *= 3; /* 33 1/3 kbps */
+ pbs = DIV_ROUND_UP(pbs, 4096);
+ pbs = (pbs ? pbs : 1); /* No zero burst size */
+ pbs_max = 60; /* Limit burst size */
+ }
+ break;
+ case MSCC_QOS_RATE_MODE_FRAME:
+ if (pir >= 100) {
+ frm_mode = POL_MODE_FRMRATE_HI;
+ pir = DIV_ROUND_UP(pir, 100);
+ pir *= 3; /* 33 1/3 fps */
+ pbs = (pbs * 10) / 328; /* 32.8 frames */
+ pbs = (pbs ? pbs : 1); /* No zero burst size */
+ pbs_max = GENMASK(6, 0); /* Limit burst size */
+ } else {
+ frm_mode = POL_MODE_FRMRATE_LO;
+ if (pir == 0 && pbs == 0) {
+ /* Discard all frames */
+ pir_discard = 1;
+ cir_discard = 1;
+ } else {
+ pir *= 3; /* 1/3 fps */
+ pbs = (pbs * 10) / 3; /* 0.3 frames */
+ pbs = (pbs ? pbs : 1); /* No zero burst size */
+ pbs_max = 61; /* Limit burst size */
+ }
+ }
+ break;
+ default: /* MSCC_QOS_RATE_MODE_DISABLED */
+ /* Disable policer using maximum rate and zero burst */
+ pir = GENMASK(15, 0);
+ pbs = 0;
+ break;
+ }
+
+ /* Check limits */
+ if (pir > GENMASK(15, 0)) {
+ netdev_err(port->dev, "Invalid pir\n");
+ return -EINVAL;
+ }
+
+ if (cir > GENMASK(15, 0)) {
+ netdev_err(port->dev, "Invalid cir\n");
+ return -EINVAL;
+ }
+
+ if (pbs > pbs_max) {
+ netdev_err(port->dev, "Invalid pbs\n");
+ return -EINVAL;
+ }
+
+ if (cbs > cbs_max) {
+ netdev_err(port->dev, "Invalid cbs\n");
+ return -EINVAL;
+ }
+
+ value = (ANA_POL_MODE_CFG_IPG_SIZE(ipg) |
+ ANA_POL_MODE_CFG_FRM_MODE(frm_mode) |
+ (cf ? ANA_POL_MODE_CFG_DLB_COUPLED : 0) |
+ (cir_ena ? ANA_POL_MODE_CFG_CIR_ENA : 0) |
+ ANA_POL_MODE_CFG_OVERSHOOT_ENA);
+
+ ocelot_write_gix(ocelot, value, ANA_POL_MODE_CFG, pol_ix);
+
+ ocelot_write_gix(ocelot,
+ ANA_POL_PIR_CFG_PIR_RATE(pir) |
+ ANA_POL_PIR_CFG_PIR_BURST(pbs),
+ ANA_POL_PIR_CFG, pol_ix);
+
+ ocelot_write_gix(ocelot,
+ (pir_discard ? GENMASK(22, 0) : 0),
+ ANA_POL_PIR_STATE, pol_ix);
+
+ ocelot_write_gix(ocelot,
+ ANA_POL_CIR_CFG_CIR_RATE(cir) |
+ ANA_POL_CIR_CFG_CIR_BURST(cbs),
+ ANA_POL_CIR_CFG, pol_ix);
+
+ ocelot_write_gix(ocelot,
+ (cir_discard ? GENMASK(22, 0) : 0),
+ ANA_POL_CIR_STATE, pol_ix);
+
+ return 0;
+}
+
+int ocelot_port_policer_add(struct ocelot_port *port,
+ struct ocelot_policer *pol)
+{
+ struct ocelot *ocelot = port->ocelot;
+ struct qos_policer_conf pp = { 0 };
+ int err;
+
+ if (!pol)
+ return -EINVAL;
+
+ pp.mode = MSCC_QOS_RATE_MODE_DATA;
+ pp.pir = pol->rate;
+ pp.pbs = pol->burst;
+
+ netdev_dbg(port->dev,
+ "%s: port %u pir %u kbps, pbs %u bytes\n",
+ __func__, port->chip_port, pp.pir, pp.pbs);
+
+ err = qos_policer_conf_set(port, POL_IX_PORT + port->chip_port, &pp);
+ if (err)
+ return err;
+
+ ocelot_rmw_gix(ocelot,
+ ANA_PORT_POL_CFG_PORT_POL_ENA |
+ ANA_PORT_POL_CFG_POL_ORDER(POL_ORDER),
+ ANA_PORT_POL_CFG_PORT_POL_ENA |
+ ANA_PORT_POL_CFG_POL_ORDER_M,
+ ANA_PORT_POL_CFG, port->chip_port);
+
+ return 0;
+}
+
+int ocelot_port_policer_del(struct ocelot_port *port)
+{
+ struct ocelot *ocelot = port->ocelot;
+ struct qos_policer_conf pp = { 0 };
+ int err;
+
+ netdev_dbg(port->dev, "%s: port %u\n", __func__, port->chip_port);
+
+ pp.mode = MSCC_QOS_RATE_MODE_DISABLED;
+
+ err = qos_policer_conf_set(port, POL_IX_PORT + port->chip_port, &pp);
+ if (err)
+ return err;
+
+ ocelot_rmw_gix(ocelot,
+ ANA_PORT_POL_CFG_POL_ORDER(POL_ORDER),
+ ANA_PORT_POL_CFG_PORT_POL_ENA |
+ ANA_PORT_POL_CFG_POL_ORDER_M,
+ ANA_PORT_POL_CFG, port->chip_port);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/mscc/ocelot_police.h b/drivers/net/ethernet/mscc/ocelot_police.h
new file mode 100644
index 000000000000..d1137f79efda
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_police.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/* Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_POLICE_H_
+#define _MSCC_OCELOT_POLICE_H_
+
+#include "ocelot.h"
+
+struct ocelot_policer {
+ u32 rate; /* kilobit per second */
+ u32 burst; /* bytes */
+};
+
+int ocelot_port_policer_add(struct ocelot_port *port,
+ struct ocelot_policer *pol);
+
+int ocelot_port_policer_del(struct ocelot_port *port);
+
+#endif /* _MSCC_OCELOT_POLICE_H_ */
diff --git a/drivers/net/ethernet/mscc/ocelot_tc.c b/drivers/net/ethernet/mscc/ocelot_tc.c
new file mode 100644
index 000000000000..a0eaadccfecc
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_tc.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Microsemi Ocelot Switch TC driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#include "ocelot_tc.h"
+#include "ocelot_police.h"
+#include <net/pkt_cls.h>
+
+static int ocelot_setup_tc_cls_matchall(struct ocelot_port *port,
+ struct tc_cls_matchall_offload *f,
+ bool ingress)
+{
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct ocelot_policer pol = { 0 };
+ struct flow_action_entry *action;
+ int err;
+
+ netdev_dbg(port->dev, "%s: port %u command %d cookie %lu\n",
+ __func__, port->chip_port, f->command, f->cookie);
+
+ if (!ingress) {
+ NL_SET_ERR_MSG_MOD(extack, "Only ingress is supported");
+ return -EOPNOTSUPP;
+ }
+
+ switch (f->command) {
+ case TC_CLSMATCHALL_REPLACE:
+ if (!flow_offload_has_one_action(&f->rule->action)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only one action is supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (port->tc.block_shared) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Rate limit is not supported on shared blocks");
+ return -EOPNOTSUPP;
+ }
+
+ action = &f->rule->action.entries[0];
+
+ if (action->id != FLOW_ACTION_POLICE) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported action");
+ return -EOPNOTSUPP;
+ }
+
+ if (port->tc.police_id && port->tc.police_id != f->cookie) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only one policer per port is supported\n");
+ return -EEXIST;
+ }
+
+ pol.rate = (u32)div_u64(action->police.rate_bytes_ps, 1000) * 8;
+ pol.burst = (u32)div_u64(action->police.rate_bytes_ps *
+ PSCHED_NS2TICKS(action->police.burst),
+ PSCHED_TICKS_PER_SEC);
+
+ err = ocelot_port_policer_add(port, &pol);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Could not add policer\n");
+ return err;
+ }
+
+ port->tc.police_id = f->cookie;
+ port->tc.offload_cnt++;
+ return 0;
+ case TC_CLSMATCHALL_DESTROY:
+ if (port->tc.police_id != f->cookie)
+ return -ENOENT;
+
+ err = ocelot_port_policer_del(port);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Could not delete policer\n");
+ return err;
+ }
+ port->tc.police_id = 0;
+ port->tc.offload_cnt--;
+ return 0;
+ case TC_CLSMATCHALL_STATS: /* fall through */
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int ocelot_setup_tc_block_cb(enum tc_setup_type type,
+ void *type_data,
+ void *cb_priv, bool ingress)
+{
+ struct ocelot_port *port = cb_priv;
+
+ if (!tc_cls_can_offload_and_chain0(port->dev, type_data))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case TC_SETUP_CLSMATCHALL:
+ netdev_dbg(port->dev, "tc_block_cb: TC_SETUP_CLSMATCHALL %s\n",
+ ingress ? "ingress" : "egress");
+
+ return ocelot_setup_tc_cls_matchall(port, type_data, ingress);
+ case TC_SETUP_CLSFLOWER:
+ netdev_dbg(port->dev, "tc_block_cb: TC_SETUP_CLSFLOWER %s\n",
+ ingress ? "ingress" : "egress");
+
+ return -EOPNOTSUPP;
+ default:
+ netdev_dbg(port->dev, "tc_block_cb: type %d %s\n",
+ type,
+ ingress ? "ingress" : "egress");
+
+ return -EOPNOTSUPP;
+ }
+}
+
+static int ocelot_setup_tc_block_cb_ig(enum tc_setup_type type,
+ void *type_data,
+ void *cb_priv)
+{
+ return ocelot_setup_tc_block_cb(type, type_data,
+ cb_priv, true);
+}
+
+static int ocelot_setup_tc_block_cb_eg(enum tc_setup_type type,
+ void *type_data,
+ void *cb_priv)
+{
+ return ocelot_setup_tc_block_cb(type, type_data,
+ cb_priv, false);
+}
+
+static int ocelot_setup_tc_block(struct ocelot_port *port,
+ struct tc_block_offload *f)
+{
+ tc_setup_cb_t *cb;
+
+ netdev_dbg(port->dev, "tc_block command %d, binder_type %d\n",
+ f->command, f->binder_type);
+
+ if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
+ cb = ocelot_setup_tc_block_cb_ig;
+ port->tc.block_shared = tcf_block_shared(f->block);
+ } else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
+ cb = ocelot_setup_tc_block_cb_eg;
+ } else {
+ return -EOPNOTSUPP;
+ }
+
+ switch (f->command) {
+ case TC_BLOCK_BIND:
+ return tcf_block_cb_register(f->block, cb, port,
+ port, f->extack);
+ case TC_BLOCK_UNBIND:
+ tcf_block_cb_unregister(f->block, cb, port);
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+int ocelot_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data)
+{
+ struct ocelot_port *port = netdev_priv(dev);
+
+ switch (type) {
+ case TC_SETUP_BLOCK:
+ return ocelot_setup_tc_block(port, type_data);
+ default:
+ return -EOPNOTSUPP;
+ }
+ return 0;
+}
diff --git a/drivers/net/ethernet/mscc/ocelot_tc.h b/drivers/net/ethernet/mscc/ocelot_tc.h
new file mode 100644
index 000000000000..61757c2250a6
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_tc.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/* Microsemi Ocelot Switch driver
+ *
+ * Copyright (c) 2019 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_TC_H_
+#define _MSCC_OCELOT_TC_H_
+
+#include <linux/netdevice.h>
+
+struct ocelot_port_tc {
+ bool block_shared;
+ unsigned long offload_cnt;
+
+ unsigned long police_id;
+};
+
+int ocelot_setup_tc(struct net_device *dev, enum tc_setup_type type,
+ void *type_data);
+
+#endif /* _MSCC_OCELOT_TC_H_ */
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index c5e96ce20f59..89fe091c958d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -140,6 +140,7 @@ struct qed_cxt_mngr;
struct qed_sb_sp_info;
struct qed_ll2_info;
struct qed_mcp_info;
+struct qed_llh_info;
struct qed_rt_data {
u32 *init_val;
@@ -741,6 +742,7 @@ struct qed_dev {
#define QED_DEV_ID_MASK 0xff00
#define QED_DEV_ID_MASK_BB 0x1600
#define QED_DEV_ID_MASK_AH 0x8000
+#define QED_IS_E4(dev) (QED_IS_BB(dev) || QED_IS_AH(dev))
u16 chip_num;
#define CHIP_NUM_MASK 0xffff
@@ -801,6 +803,11 @@ struct qed_dev {
u8 num_hwfns;
struct qed_hwfn hwfns[MAX_HWFNS_PER_DEVICE];
+ /* Engine affinity */
+ u8 l2_affin_hint;
+ u8 fir_affin;
+ u8 iwarp_affin;
+
/* SRIOV */
struct qed_hw_sriov_info *p_iov_info;
#define IS_QED_SRIOV(cdev) (!!(cdev)->p_iov_info)
@@ -815,6 +822,10 @@ struct qed_dev {
/* Recovery */
bool recov_in_prog;
+ /* LLH info */
+ u8 ppfid_bitmap;
+ struct qed_llh_info *p_llh_info;
+
/* Linux specific here */
struct qede_dev *edev;
struct pci_dev *pdev;
@@ -852,6 +863,9 @@ struct qed_dev {
u32 rdma_max_inline;
u32 rdma_max_srq_sge;
u16 tunn_feature_mask;
+
+ struct devlink *dl;
+ bool iwarp_cmt;
};
#define NUM_OF_VFS(dev) (QED_IS_BB(dev) ? MAX_NUM_VFS_BB \
@@ -904,6 +918,14 @@ void qed_set_fw_mac_addr(__le16 *fw_msb,
__le16 *fw_mid, __le16 *fw_lsb, u8 *mac);
#define QED_LEADING_HWFN(dev) (&dev->hwfns[0])
+#define QED_IS_CMT(dev) ((dev)->num_hwfns > 1)
+/* Macros for getting the engine-affinitized hwfn (FIR: fcoe,iscsi,roce) */
+#define QED_FIR_AFFIN_HWFN(dev) (&(dev)->hwfns[dev->fir_affin])
+#define QED_IWARP_AFFIN_HWFN(dev) (&(dev)->hwfns[dev->iwarp_affin])
+#define QED_AFFIN_HWFN(dev) \
+ (QED_IS_IWARP_PERSONALITY(QED_LEADING_HWFN(dev)) ? \
+ QED_IWARP_AFFIN_HWFN(dev) : QED_FIR_AFFIN_HWFN(dev))
+#define QED_AFFIN_HWFN_IDX(dev) (IS_LEAD_HWFN(QED_AFFIN_HWFN(dev)) ? 0 : 1)
/* Flags for indication of required queues */
#define PQ_FLAGS_RLS (BIT(0))
@@ -923,8 +945,6 @@ u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf);
u16 qed_get_cm_pq_idx_ofld_mtc(struct qed_hwfn *p_hwfn, u8 tc);
u16 qed_get_cm_pq_idx_llt_mtc(struct qed_hwfn *p_hwfn, u8 tc);
-#define QED_LEADING_HWFN(dev) (&dev->hwfns[0])
-
/* doorbell recovery mechanism */
void qed_db_recovery_dp(struct qed_hwfn *p_hwfn);
void qed_db_recovery_execute(struct qed_hwfn *p_hwfn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index e61d1d905415..8e1bdf58b9e7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -2351,7 +2351,8 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
/* Write via DMAE since the PSWRQ2_REG_ILT_MEMORY line is a wide-bus */
qed_dmae_host2grc(p_hwfn, p_ptt, (u64) (uintptr_t)&ilt_hw_entry,
- reg_offset, sizeof(ilt_hw_entry) / sizeof(u32), 0);
+ reg_offset, sizeof(ilt_hw_entry) / sizeof(u32),
+ NULL);
if (elem_type == QED_ELEM_CXT) {
u32 last_cid_allocated = (1 + (iid / elems_per_p)) *
@@ -2457,7 +2458,7 @@ qed_cxt_free_ilt_range(struct qed_hwfn *p_hwfn,
(u64) (uintptr_t) &ilt_hw_entry,
reg_offset,
sizeof(ilt_hw_entry) / sizeof(u32),
- 0);
+ NULL);
}
qed_ptt_release(p_hwfn, p_ptt);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index 979f1e4bc18b..8525e6bf6ae5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -2537,7 +2537,7 @@ static u32 qed_grc_dump_addr_range(struct qed_hwfn *p_hwfn,
(len >= s_platform_defs[dev_data->platform_id].dmae_thresh ||
wide_bus)) {
if (!qed_dmae_grc2host(p_hwfn, p_ptt, DWORDS_TO_BYTES(addr),
- (u64)(uintptr_t)(dump_buf), len, 0))
+ (u64)(uintptr_t)(dump_buf), len, NULL))
return len;
dev_data->use_dmae = 0;
DP_VERBOSE(p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index fccdb06fc5c5..eec7cb65c7e6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -361,6 +361,926 @@ void qed_db_recovery_execute(struct qed_hwfn *p_hwfn)
/******************** Doorbell Recovery end ****************/
+/********************************** NIG LLH ***********************************/
+
+enum qed_llh_filter_type {
+ QED_LLH_FILTER_TYPE_MAC,
+ QED_LLH_FILTER_TYPE_PROTOCOL,
+};
+
+struct qed_llh_mac_filter {
+ u8 addr[ETH_ALEN];
+};
+
+struct qed_llh_protocol_filter {
+ enum qed_llh_prot_filter_type_t type;
+ u16 source_port_or_eth_type;
+ u16 dest_port;
+};
+
+union qed_llh_filter {
+ struct qed_llh_mac_filter mac;
+ struct qed_llh_protocol_filter protocol;
+};
+
+struct qed_llh_filter_info {
+ bool b_enabled;
+ u32 ref_cnt;
+ enum qed_llh_filter_type type;
+ union qed_llh_filter filter;
+};
+
+struct qed_llh_info {
+ /* Number of LLH filters banks */
+ u8 num_ppfid;
+
+#define MAX_NUM_PPFID 8
+ u8 ppfid_array[MAX_NUM_PPFID];
+
+ /* Array of filters arrays:
+ * "num_ppfid" elements of filters banks, where each is an array of
+ * "NIG_REG_LLH_FUNC_FILTER_EN_SIZE" filters.
+ */
+ struct qed_llh_filter_info **pp_filters;
+};
+
+static void qed_llh_free(struct qed_dev *cdev)
+{
+ struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+ u32 i;
+
+ if (p_llh_info) {
+ if (p_llh_info->pp_filters)
+ for (i = 0; i < p_llh_info->num_ppfid; i++)
+ kfree(p_llh_info->pp_filters[i]);
+
+ kfree(p_llh_info->pp_filters);
+ }
+
+ kfree(p_llh_info);
+ cdev->p_llh_info = NULL;
+}
+
+static int qed_llh_alloc(struct qed_dev *cdev)
+{
+ struct qed_llh_info *p_llh_info;
+ u32 size, i;
+
+ p_llh_info = kzalloc(sizeof(*p_llh_info), GFP_KERNEL);
+ if (!p_llh_info)
+ return -ENOMEM;
+ cdev->p_llh_info = p_llh_info;
+
+ for (i = 0; i < MAX_NUM_PPFID; i++) {
+ if (!(cdev->ppfid_bitmap & (0x1 << i)))
+ continue;
+
+ p_llh_info->ppfid_array[p_llh_info->num_ppfid] = i;
+ DP_VERBOSE(cdev, QED_MSG_SP, "ppfid_array[%d] = %hhd\n",
+ p_llh_info->num_ppfid, i);
+ p_llh_info->num_ppfid++;
+ }
+
+ size = p_llh_info->num_ppfid * sizeof(*p_llh_info->pp_filters);
+ p_llh_info->pp_filters = kzalloc(size, GFP_KERNEL);
+ if (!p_llh_info->pp_filters)
+ return -ENOMEM;
+
+ size = NIG_REG_LLH_FUNC_FILTER_EN_SIZE *
+ sizeof(**p_llh_info->pp_filters);
+ for (i = 0; i < p_llh_info->num_ppfid; i++) {
+ p_llh_info->pp_filters[i] = kzalloc(size, GFP_KERNEL);
+ if (!p_llh_info->pp_filters[i])
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int qed_llh_shadow_sanity(struct qed_dev *cdev,
+ u8 ppfid, u8 filter_idx, const char *action)
+{
+ struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+
+ if (ppfid >= p_llh_info->num_ppfid) {
+ DP_NOTICE(cdev,
+ "LLH shadow [%s]: using ppfid %d while only %d ppfids are available\n",
+ action, ppfid, p_llh_info->num_ppfid);
+ return -EINVAL;
+ }
+
+ if (filter_idx >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
+ DP_NOTICE(cdev,
+ "LLH shadow [%s]: using filter_idx %d while only %d filters are available\n",
+ action, filter_idx, NIG_REG_LLH_FUNC_FILTER_EN_SIZE);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+#define QED_LLH_INVALID_FILTER_IDX 0xff
+
+static int
+qed_llh_shadow_search_filter(struct qed_dev *cdev,
+ u8 ppfid,
+ union qed_llh_filter *p_filter, u8 *p_filter_idx)
+{
+ struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+ struct qed_llh_filter_info *p_filters;
+ int rc;
+ u8 i;
+
+ rc = qed_llh_shadow_sanity(cdev, ppfid, 0, "search");
+ if (rc)
+ return rc;
+
+ *p_filter_idx = QED_LLH_INVALID_FILTER_IDX;
+
+ p_filters = p_llh_info->pp_filters[ppfid];
+ for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
+ if (!memcmp(p_filter, &p_filters[i].filter,
+ sizeof(*p_filter))) {
+ *p_filter_idx = i;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+qed_llh_shadow_get_free_idx(struct qed_dev *cdev, u8 ppfid, u8 *p_filter_idx)
+{
+ struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+ struct qed_llh_filter_info *p_filters;
+ int rc;
+ u8 i;
+
+ rc = qed_llh_shadow_sanity(cdev, ppfid, 0, "get_free_idx");
+ if (rc)
+ return rc;
+
+ *p_filter_idx = QED_LLH_INVALID_FILTER_IDX;
+
+ p_filters = p_llh_info->pp_filters[ppfid];
+ for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
+ if (!p_filters[i].b_enabled) {
+ *p_filter_idx = i;
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+__qed_llh_shadow_add_filter(struct qed_dev *cdev,
+ u8 ppfid,
+ u8 filter_idx,
+ enum qed_llh_filter_type type,
+ union qed_llh_filter *p_filter, u32 *p_ref_cnt)
+{
+ struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+ struct qed_llh_filter_info *p_filters;
+ int rc;
+
+ rc = qed_llh_shadow_sanity(cdev, ppfid, filter_idx, "add");
+ if (rc)
+ return rc;
+
+ p_filters = p_llh_info->pp_filters[ppfid];
+ if (!p_filters[filter_idx].ref_cnt) {
+ p_filters[filter_idx].b_enabled = true;
+ p_filters[filter_idx].type = type;
+ memcpy(&p_filters[filter_idx].filter, p_filter,
+ sizeof(p_filters[filter_idx].filter));
+ }
+
+ *p_ref_cnt = ++p_filters[filter_idx].ref_cnt;
+
+ return 0;
+}
+
+static int
+qed_llh_shadow_add_filter(struct qed_dev *cdev,
+ u8 ppfid,
+ enum qed_llh_filter_type type,
+ union qed_llh_filter *p_filter,
+ u8 *p_filter_idx, u32 *p_ref_cnt)
+{
+ int rc;
+
+ /* Check if the same filter already exist */
+ rc = qed_llh_shadow_search_filter(cdev, ppfid, p_filter, p_filter_idx);
+ if (rc)
+ return rc;
+
+ /* Find a new entry in case of a new filter */
+ if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) {
+ rc = qed_llh_shadow_get_free_idx(cdev, ppfid, p_filter_idx);
+ if (rc)
+ return rc;
+ }
+
+ /* No free entry was found */
+ if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) {
+ DP_NOTICE(cdev,
+ "Failed to find an empty LLH filter to utilize [ppfid %d]\n",
+ ppfid);
+ return -EINVAL;
+ }
+
+ return __qed_llh_shadow_add_filter(cdev, ppfid, *p_filter_idx, type,
+ p_filter, p_ref_cnt);
+}
+
+static int
+__qed_llh_shadow_remove_filter(struct qed_dev *cdev,
+ u8 ppfid, u8 filter_idx, u32 *p_ref_cnt)
+{
+ struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+ struct qed_llh_filter_info *p_filters;
+ int rc;
+
+ rc = qed_llh_shadow_sanity(cdev, ppfid, filter_idx, "remove");
+ if (rc)
+ return rc;
+
+ p_filters = p_llh_info->pp_filters[ppfid];
+ if (!p_filters[filter_idx].ref_cnt) {
+ DP_NOTICE(cdev,
+ "LLH shadow: trying to remove a filter with ref_cnt=0\n");
+ return -EINVAL;
+ }
+
+ *p_ref_cnt = --p_filters[filter_idx].ref_cnt;
+ if (!p_filters[filter_idx].ref_cnt)
+ memset(&p_filters[filter_idx],
+ 0, sizeof(p_filters[filter_idx]));
+
+ return 0;
+}
+
+static int
+qed_llh_shadow_remove_filter(struct qed_dev *cdev,
+ u8 ppfid,
+ union qed_llh_filter *p_filter,
+ u8 *p_filter_idx, u32 *p_ref_cnt)
+{
+ int rc;
+
+ rc = qed_llh_shadow_search_filter(cdev, ppfid, p_filter, p_filter_idx);
+ if (rc)
+ return rc;
+
+ /* No matching filter was found */
+ if (*p_filter_idx == QED_LLH_INVALID_FILTER_IDX) {
+ DP_NOTICE(cdev, "Failed to find a filter in the LLH shadow\n");
+ return -EINVAL;
+ }
+
+ return __qed_llh_shadow_remove_filter(cdev, ppfid, *p_filter_idx,
+ p_ref_cnt);
+}
+
+static int qed_llh_abs_ppfid(struct qed_dev *cdev, u8 ppfid, u8 *p_abs_ppfid)
+{
+ struct qed_llh_info *p_llh_info = cdev->p_llh_info;
+
+ if (ppfid >= p_llh_info->num_ppfid) {
+ DP_NOTICE(cdev,
+ "ppfid %d is not valid, available indices are 0..%hhd\n",
+ ppfid, p_llh_info->num_ppfid - 1);
+ return -EINVAL;
+ }
+
+ *p_abs_ppfid = p_llh_info->ppfid_array[ppfid];
+
+ return 0;
+}
+
+static int
+qed_llh_set_engine_affin(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+ struct qed_dev *cdev = p_hwfn->cdev;
+ enum qed_eng eng;
+ u8 ppfid;
+ int rc;
+
+ rc = qed_mcp_get_engine_config(p_hwfn, p_ptt);
+ if (rc != 0 && rc != -EOPNOTSUPP) {
+ DP_NOTICE(p_hwfn,
+ "Failed to get the engine affinity configuration\n");
+ return rc;
+ }
+
+ /* RoCE PF is bound to a single engine */
+ if (QED_IS_ROCE_PERSONALITY(p_hwfn)) {
+ eng = cdev->fir_affin ? QED_ENG1 : QED_ENG0;
+ rc = qed_llh_set_roce_affinity(cdev, eng);
+ if (rc) {
+ DP_NOTICE(cdev,
+ "Failed to set the RoCE engine affinity\n");
+ return rc;
+ }
+
+ DP_VERBOSE(cdev,
+ QED_MSG_SP,
+ "LLH: Set the engine affinity of RoCE packets as %d\n",
+ eng);
+ }
+
+ /* Storage PF is bound to a single engine while L2 PF uses both */
+ if (QED_IS_FCOE_PERSONALITY(p_hwfn) || QED_IS_ISCSI_PERSONALITY(p_hwfn))
+ eng = cdev->fir_affin ? QED_ENG1 : QED_ENG0;
+ else /* L2_PERSONALITY */
+ eng = QED_BOTH_ENG;
+
+ for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) {
+ rc = qed_llh_set_ppfid_affinity(cdev, ppfid, eng);
+ if (rc) {
+ DP_NOTICE(cdev,
+ "Failed to set the engine affinity of ppfid %d\n",
+ ppfid);
+ return rc;
+ }
+ }
+
+ DP_VERBOSE(cdev, QED_MSG_SP,
+ "LLH: Set the engine affinity of non-RoCE packets as %d\n",
+ eng);
+
+ return 0;
+}
+
+static int qed_llh_hw_init_pf(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt)
+{
+ struct qed_dev *cdev = p_hwfn->cdev;
+ u8 ppfid, abs_ppfid;
+ int rc;
+
+ for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) {
+ u32 addr;
+
+ rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+ if (rc)
+ return rc;
+
+ addr = NIG_REG_LLH_PPFID2PFID_TBL_0 + abs_ppfid * 0x4;
+ qed_wr(p_hwfn, p_ptt, addr, p_hwfn->rel_pf_id);
+ }
+
+ if (test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits) &&
+ !QED_IS_FCOE_PERSONALITY(p_hwfn)) {
+ rc = qed_llh_add_mac_filter(cdev, 0,
+ p_hwfn->hw_info.hw_mac_addr);
+ if (rc)
+ DP_NOTICE(cdev,
+ "Failed to add an LLH filter with the primary MAC\n");
+ }
+
+ if (QED_IS_CMT(cdev)) {
+ rc = qed_llh_set_engine_affin(p_hwfn, p_ptt);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+u8 qed_llh_get_num_ppfid(struct qed_dev *cdev)
+{
+ return cdev->p_llh_info->num_ppfid;
+}
+
+#define NIG_REG_PPF_TO_ENGINE_SEL_ROCE_MASK 0x3
+#define NIG_REG_PPF_TO_ENGINE_SEL_ROCE_SHIFT 0
+#define NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE_MASK 0x3
+#define NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE_SHIFT 2
+
+int qed_llh_set_ppfid_affinity(struct qed_dev *cdev, u8 ppfid, enum qed_eng eng)
+{
+ struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+ struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+ u32 addr, val, eng_sel;
+ u8 abs_ppfid;
+ int rc = 0;
+
+ if (!p_ptt)
+ return -EAGAIN;
+
+ if (!QED_IS_CMT(cdev))
+ goto out;
+
+ rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+ if (rc)
+ goto out;
+
+ switch (eng) {
+ case QED_ENG0:
+ eng_sel = 0;
+ break;
+ case QED_ENG1:
+ eng_sel = 1;
+ break;
+ case QED_BOTH_ENG:
+ eng_sel = 2;
+ break;
+ default:
+ DP_NOTICE(cdev, "Invalid affinity value for ppfid [%d]\n", eng);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ addr = NIG_REG_PPF_TO_ENGINE_SEL + abs_ppfid * 0x4;
+ val = qed_rd(p_hwfn, p_ptt, addr);
+ SET_FIELD(val, NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE, eng_sel);
+ qed_wr(p_hwfn, p_ptt, addr, val);
+
+ /* The iWARP affinity is set as the affinity of ppfid 0 */
+ if (!ppfid && QED_IS_IWARP_PERSONALITY(p_hwfn))
+ cdev->iwarp_affin = (eng == QED_ENG1) ? 1 : 0;
+out:
+ qed_ptt_release(p_hwfn, p_ptt);
+
+ return rc;
+}
+
+int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng)
+{
+ struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+ struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+ u32 addr, val, eng_sel;
+ u8 ppfid, abs_ppfid;
+ int rc = 0;
+
+ if (!p_ptt)
+ return -EAGAIN;
+
+ if (!QED_IS_CMT(cdev))
+ goto out;
+
+ switch (eng) {
+ case QED_ENG0:
+ eng_sel = 0;
+ break;
+ case QED_ENG1:
+ eng_sel = 1;
+ break;
+ case QED_BOTH_ENG:
+ eng_sel = 2;
+ qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_ENG_CLS_ROCE_QP_SEL,
+ 0xf); /* QP bit 15 */
+ break;
+ default:
+ DP_NOTICE(cdev, "Invalid affinity value for RoCE [%d]\n", eng);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ for (ppfid = 0; ppfid < cdev->p_llh_info->num_ppfid; ppfid++) {
+ rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+ if (rc)
+ goto out;
+
+ addr = NIG_REG_PPF_TO_ENGINE_SEL + abs_ppfid * 0x4;
+ val = qed_rd(p_hwfn, p_ptt, addr);
+ SET_FIELD(val, NIG_REG_PPF_TO_ENGINE_SEL_ROCE, eng_sel);
+ qed_wr(p_hwfn, p_ptt, addr, val);
+ }
+out:
+ qed_ptt_release(p_hwfn, p_ptt);
+
+ return rc;
+}
+
+struct qed_llh_filter_details {
+ u64 value;
+ u32 mode;
+ u32 protocol_type;
+ u32 hdr_sel;
+ u32 enable;
+};
+
+static int
+qed_llh_access_filter(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u8 abs_ppfid,
+ u8 filter_idx,
+ struct qed_llh_filter_details *p_details)
+{
+ struct qed_dmae_params params = {0};
+ u32 addr;
+ u8 pfid;
+ int rc;
+
+ /* The NIG/LLH registers that are accessed in this function have only 16
+ * rows which are exposed to a PF. I.e. only the 16 filters of its
+ * default ppfid. Accessing filters of other ppfids requires pretending
+ * to another PFs.
+ * The calculation of PPFID->PFID in AH is based on the relative index
+ * of a PF on its port.
+ * For BB the pfid is actually the abs_ppfid.
+ */
+ if (QED_IS_BB(p_hwfn->cdev))
+ pfid = abs_ppfid;
+ else
+ pfid = abs_ppfid * p_hwfn->cdev->num_ports_in_engine +
+ MFW_PORT(p_hwfn);
+
+ /* Filter enable - should be done first when removing a filter */
+ if (!p_details->enable) {
+ qed_fid_pretend(p_hwfn, p_ptt,
+ pfid << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+
+ addr = NIG_REG_LLH_FUNC_FILTER_EN + filter_idx * 0x4;
+ qed_wr(p_hwfn, p_ptt, addr, p_details->enable);
+
+ qed_fid_pretend(p_hwfn, p_ptt,
+ p_hwfn->rel_pf_id <<
+ PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+ }
+
+ /* Filter value */
+ addr = NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * filter_idx * 0x4;
+
+ params.flags = QED_DMAE_FLAG_PF_DST;
+ params.dst_pfid = pfid;
+ rc = qed_dmae_host2grc(p_hwfn,
+ p_ptt,
+ (u64)(uintptr_t)&p_details->value,
+ addr, 2 /* size_in_dwords */,
+ &params);
+ if (rc)
+ return rc;
+
+ qed_fid_pretend(p_hwfn, p_ptt,
+ pfid << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+
+ /* Filter mode */
+ addr = NIG_REG_LLH_FUNC_FILTER_MODE + filter_idx * 0x4;
+ qed_wr(p_hwfn, p_ptt, addr, p_details->mode);
+
+ /* Filter protocol type */
+ addr = NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE + filter_idx * 0x4;
+ qed_wr(p_hwfn, p_ptt, addr, p_details->protocol_type);
+
+ /* Filter header select */
+ addr = NIG_REG_LLH_FUNC_FILTER_HDR_SEL + filter_idx * 0x4;
+ qed_wr(p_hwfn, p_ptt, addr, p_details->hdr_sel);
+
+ /* Filter enable - should be done last when adding a filter */
+ if (p_details->enable) {
+ addr = NIG_REG_LLH_FUNC_FILTER_EN + filter_idx * 0x4;
+ qed_wr(p_hwfn, p_ptt, addr, p_details->enable);
+ }
+
+ qed_fid_pretend(p_hwfn, p_ptt,
+ p_hwfn->rel_pf_id <<
+ PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+
+ return 0;
+}
+
+static int
+qed_llh_add_filter(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt,
+ u8 abs_ppfid,
+ u8 filter_idx, u8 filter_prot_type, u32 high, u32 low)
+{
+ struct qed_llh_filter_details filter_details;
+
+ filter_details.enable = 1;
+ filter_details.value = ((u64)high << 32) | low;
+ filter_details.hdr_sel = 0;
+ filter_details.protocol_type = filter_prot_type;
+ /* Mode: 0: MAC-address classification 1: protocol classification */
+ filter_details.mode = filter_prot_type ? 1 : 0;
+
+ return qed_llh_access_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+ &filter_details);
+}
+
+static int
+qed_llh_remove_filter(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt, u8 abs_ppfid, u8 filter_idx)
+{
+ struct qed_llh_filter_details filter_details = {0};
+
+ return qed_llh_access_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+ &filter_details);
+}
+
+int qed_llh_add_mac_filter(struct qed_dev *cdev,
+ u8 ppfid, u8 mac_addr[ETH_ALEN])
+{
+ struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+ struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+ union qed_llh_filter filter = {};
+ u8 filter_idx, abs_ppfid;
+ u32 high, low, ref_cnt;
+ int rc = 0;
+
+ if (!p_ptt)
+ return -EAGAIN;
+
+ if (!test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits))
+ goto out;
+
+ memcpy(filter.mac.addr, mac_addr, ETH_ALEN);
+ rc = qed_llh_shadow_add_filter(cdev, ppfid,
+ QED_LLH_FILTER_TYPE_MAC,
+ &filter, &filter_idx, &ref_cnt);
+ if (rc)
+ goto err;
+
+ /* Configure the LLH only in case of a new the filter */
+ if (ref_cnt == 1) {
+ rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+ if (rc)
+ goto err;
+
+ high = mac_addr[1] | (mac_addr[0] << 8);
+ low = mac_addr[5] | (mac_addr[4] << 8) | (mac_addr[3] << 16) |
+ (mac_addr[2] << 24);
+ rc = qed_llh_add_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+ 0, high, low);
+ if (rc)
+ goto err;
+ }
+
+ DP_VERBOSE(cdev,
+ QED_MSG_SP,
+ "LLH: Added MAC filter [%pM] to ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+ mac_addr, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+ goto out;
+
+err: DP_NOTICE(cdev,
+ "LLH: Failed to add MAC filter [%pM] to ppfid %hhd\n",
+ mac_addr, ppfid);
+out:
+ qed_ptt_release(p_hwfn, p_ptt);
+
+ return rc;
+}
+
+static int
+qed_llh_protocol_filter_stringify(struct qed_dev *cdev,
+ enum qed_llh_prot_filter_type_t type,
+ u16 source_port_or_eth_type,
+ u16 dest_port, u8 *str, size_t str_len)
+{
+ switch (type) {
+ case QED_LLH_FILTER_ETHERTYPE:
+ snprintf(str, str_len, "Ethertype 0x%04x",
+ source_port_or_eth_type);
+ break;
+ case QED_LLH_FILTER_TCP_SRC_PORT:
+ snprintf(str, str_len, "TCP src port 0x%04x",
+ source_port_or_eth_type);
+ break;
+ case QED_LLH_FILTER_UDP_SRC_PORT:
+ snprintf(str, str_len, "UDP src port 0x%04x",
+ source_port_or_eth_type);
+ break;
+ case QED_LLH_FILTER_TCP_DEST_PORT:
+ snprintf(str, str_len, "TCP dst port 0x%04x", dest_port);
+ break;
+ case QED_LLH_FILTER_UDP_DEST_PORT:
+ snprintf(str, str_len, "UDP dst port 0x%04x", dest_port);
+ break;
+ case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
+ snprintf(str, str_len, "TCP src/dst ports 0x%04x/0x%04x",
+ source_port_or_eth_type, dest_port);
+ break;
+ case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
+ snprintf(str, str_len, "UDP src/dst ports 0x%04x/0x%04x",
+ source_port_or_eth_type, dest_port);
+ break;
+ default:
+ DP_NOTICE(cdev,
+ "Non valid LLH protocol filter type %d\n", type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+qed_llh_protocol_filter_to_hilo(struct qed_dev *cdev,
+ enum qed_llh_prot_filter_type_t type,
+ u16 source_port_or_eth_type,
+ u16 dest_port, u32 *p_high, u32 *p_low)
+{
+ *p_high = 0;
+ *p_low = 0;
+
+ switch (type) {
+ case QED_LLH_FILTER_ETHERTYPE:
+ *p_high = source_port_or_eth_type;
+ break;
+ case QED_LLH_FILTER_TCP_SRC_PORT:
+ case QED_LLH_FILTER_UDP_SRC_PORT:
+ *p_low = source_port_or_eth_type << 16;
+ break;
+ case QED_LLH_FILTER_TCP_DEST_PORT:
+ case QED_LLH_FILTER_UDP_DEST_PORT:
+ *p_low = dest_port;
+ break;
+ case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
+ case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
+ *p_low = (source_port_or_eth_type << 16) | dest_port;
+ break;
+ default:
+ DP_NOTICE(cdev,
+ "Non valid LLH protocol filter type %d\n", type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int
+qed_llh_add_protocol_filter(struct qed_dev *cdev,
+ u8 ppfid,
+ enum qed_llh_prot_filter_type_t type,
+ u16 source_port_or_eth_type, u16 dest_port)
+{
+ struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+ struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+ u8 filter_idx, abs_ppfid, str[32], type_bitmap;
+ union qed_llh_filter filter = {};
+ u32 high, low, ref_cnt;
+ int rc = 0;
+
+ if (!p_ptt)
+ return -EAGAIN;
+
+ if (!test_bit(QED_MF_LLH_PROTO_CLSS, &cdev->mf_bits))
+ goto out;
+
+ rc = qed_llh_protocol_filter_stringify(cdev, type,
+ source_port_or_eth_type,
+ dest_port, str, sizeof(str));
+ if (rc)
+ goto err;
+
+ filter.protocol.type = type;
+ filter.protocol.source_port_or_eth_type = source_port_or_eth_type;
+ filter.protocol.dest_port = dest_port;
+ rc = qed_llh_shadow_add_filter(cdev,
+ ppfid,
+ QED_LLH_FILTER_TYPE_PROTOCOL,
+ &filter, &filter_idx, &ref_cnt);
+ if (rc)
+ goto err;
+
+ rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+ if (rc)
+ goto err;
+
+ /* Configure the LLH only in case of a new the filter */
+ if (ref_cnt == 1) {
+ rc = qed_llh_protocol_filter_to_hilo(cdev, type,
+ source_port_or_eth_type,
+ dest_port, &high, &low);
+ if (rc)
+ goto err;
+
+ type_bitmap = 0x1 << type;
+ rc = qed_llh_add_filter(p_hwfn, p_ptt, abs_ppfid,
+ filter_idx, type_bitmap, high, low);
+ if (rc)
+ goto err;
+ }
+
+ DP_VERBOSE(cdev,
+ QED_MSG_SP,
+ "LLH: Added protocol filter [%s] to ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+ str, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+ goto out;
+
+err: DP_NOTICE(p_hwfn,
+ "LLH: Failed to add protocol filter [%s] to ppfid %hhd\n",
+ str, ppfid);
+out:
+ qed_ptt_release(p_hwfn, p_ptt);
+
+ return rc;
+}
+
+void qed_llh_remove_mac_filter(struct qed_dev *cdev,
+ u8 ppfid, u8 mac_addr[ETH_ALEN])
+{
+ struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+ struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+ union qed_llh_filter filter = {};
+ u8 filter_idx, abs_ppfid;
+ int rc = 0;
+ u32 ref_cnt;
+
+ if (!p_ptt)
+ return;
+
+ if (!test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits))
+ goto out;
+
+ ether_addr_copy(filter.mac.addr, mac_addr);
+ rc = qed_llh_shadow_remove_filter(cdev, ppfid, &filter, &filter_idx,
+ &ref_cnt);
+ if (rc)
+ goto err;
+
+ rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+ if (rc)
+ goto err;
+
+ /* Remove from the LLH in case the filter is not in use */
+ if (!ref_cnt) {
+ rc = qed_llh_remove_filter(p_hwfn, p_ptt, abs_ppfid,
+ filter_idx);
+ if (rc)
+ goto err;
+ }
+
+ DP_VERBOSE(cdev,
+ QED_MSG_SP,
+ "LLH: Removed MAC filter [%pM] from ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+ mac_addr, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+ goto out;
+
+err: DP_NOTICE(cdev,
+ "LLH: Failed to remove MAC filter [%pM] from ppfid %hhd\n",
+ mac_addr, ppfid);
+out:
+ qed_ptt_release(p_hwfn, p_ptt);
+}
+
+void qed_llh_remove_protocol_filter(struct qed_dev *cdev,
+ u8 ppfid,
+ enum qed_llh_prot_filter_type_t type,
+ u16 source_port_or_eth_type, u16 dest_port)
+{
+ struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+ struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+ u8 filter_idx, abs_ppfid, str[32];
+ union qed_llh_filter filter = {};
+ int rc = 0;
+ u32 ref_cnt;
+
+ if (!p_ptt)
+ return;
+
+ if (!test_bit(QED_MF_LLH_PROTO_CLSS, &cdev->mf_bits))
+ goto out;
+
+ rc = qed_llh_protocol_filter_stringify(cdev, type,
+ source_port_or_eth_type,
+ dest_port, str, sizeof(str));
+ if (rc)
+ goto err;
+
+ filter.protocol.type = type;
+ filter.protocol.source_port_or_eth_type = source_port_or_eth_type;
+ filter.protocol.dest_port = dest_port;
+ rc = qed_llh_shadow_remove_filter(cdev, ppfid, &filter, &filter_idx,
+ &ref_cnt);
+ if (rc)
+ goto err;
+
+ rc = qed_llh_abs_ppfid(cdev, ppfid, &abs_ppfid);
+ if (rc)
+ goto err;
+
+ /* Remove from the LLH in case the filter is not in use */
+ if (!ref_cnt) {
+ rc = qed_llh_remove_filter(p_hwfn, p_ptt, abs_ppfid,
+ filter_idx);
+ if (rc)
+ goto err;
+ }
+
+ DP_VERBOSE(cdev,
+ QED_MSG_SP,
+ "LLH: Removed protocol filter [%s] from ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+ str, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+ goto out;
+
+err: DP_NOTICE(cdev,
+ "LLH: Failed to remove protocol filter [%s] from ppfid %hhd\n",
+ str, ppfid);
+out:
+ qed_ptt_release(p_hwfn, p_ptt);
+}
+
+/******************************* NIG LLH - End ********************************/
+
#define QED_MIN_DPIS (4)
#define QED_MIN_PWM_REGION (QED_WID_SIZE * QED_MIN_DPIS)
@@ -461,6 +1381,8 @@ void qed_resc_free(struct qed_dev *cdev)
kfree(cdev->reset_stats);
cdev->reset_stats = NULL;
+ qed_llh_free(cdev);
+
for_each_hwfn(cdev, i) {
struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
@@ -1428,6 +2350,13 @@ int qed_resc_alloc(struct qed_dev *cdev)
goto alloc_err;
}
+ rc = qed_llh_alloc(cdev);
+ if (rc) {
+ DP_NOTICE(cdev,
+ "Failed to allocate memory for the llh_info structure\n");
+ goto alloc_err;
+ }
+
cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL);
if (!cdev->reset_stats)
goto alloc_no_mem;
@@ -1879,6 +2808,10 @@ static int qed_hw_init_port(struct qed_hwfn *p_hwfn,
{
int rc = 0;
+ /* In CMT the gate should be cleared by the 2nd hwfn */
+ if (!QED_IS_CMT(p_hwfn->cdev) || !IS_LEAD_HWFN(p_hwfn))
+ STORE_RT_REG(p_hwfn, NIG_REG_BRB_GATE_DNTFWD_PORT_RT_OFFSET, 0);
+
rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id, hw_mode);
if (rc)
return rc;
@@ -1964,6 +2897,13 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn,
if (rc)
return rc;
+ /* Use the leading hwfn since in CMT only NIG #0 is operational */
+ if (IS_LEAD_HWFN(p_hwfn)) {
+ rc = qed_llh_hw_init_pf(p_hwfn, p_ptt);
+ if (rc)
+ return rc;
+ }
+
if (b_hw_start) {
/* enable interrupts */
qed_int_igu_enable(p_hwfn, p_ptt, int_mode);
@@ -2393,6 +3333,12 @@ int qed_hw_stop(struct qed_dev *cdev)
qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DB_ENABLE, 0);
qed_wr(p_hwfn, p_ptt, QM_REG_PF_EN, 0);
+ if (IS_LEAD_HWFN(p_hwfn) &&
+ test_bit(QED_MF_LLH_MAC_CLSS, &cdev->mf_bits) &&
+ !QED_IS_FCOE_PERSONALITY(p_hwfn))
+ qed_llh_remove_mac_filter(cdev, 0,
+ p_hwfn->hw_info.hw_mac_addr);
+
if (!cdev->recov_in_prog) {
rc = qed_mcp_unload_done(p_hwfn, p_ptt);
if (rc) {
@@ -2868,6 +3814,36 @@ static int qed_hw_set_resc_info(struct qed_hwfn *p_hwfn)
return 0;
}
+static int qed_hw_get_ppfid_bitmap(struct qed_hwfn *p_hwfn,
+ struct qed_ptt *p_ptt)
+{
+ struct qed_dev *cdev = p_hwfn->cdev;
+ u8 native_ppfid_idx;
+ int rc;
+
+ /* Calculation of BB/AH is different for native_ppfid_idx */
+ if (QED_IS_BB(cdev))
+ native_ppfid_idx = p_hwfn->rel_pf_id;
+ else
+ native_ppfid_idx = p_hwfn->rel_pf_id /
+ cdev->num_ports_in_engine;
+
+ rc = qed_mcp_get_ppfid_bitmap(p_hwfn, p_ptt);
+ if (rc != 0 && rc != -EOPNOTSUPP)
+ return rc;
+ else if (rc == -EOPNOTSUPP)
+ cdev->ppfid_bitmap = 0x1 << native_ppfid_idx;
+
+ if (!(cdev->ppfid_bitmap & (0x1 << native_ppfid_idx))) {
+ DP_INFO(p_hwfn,
+ "Fix the PPFID bitmap to include the native PPFID [native_ppfid_idx %hhd, orig_bitmap 0x%hhx]\n",
+ native_ppfid_idx, cdev->ppfid_bitmap);
+ cdev->ppfid_bitmap = 0x1 << native_ppfid_idx;
+ }
+
+ return 0;
+}
+
static int qed_hw_get_resc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
{
struct qed_resc_unlock_params resc_unlock_params;
@@ -2925,6 +3901,13 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
"Failed to release the resource lock for the resource allocation commands\n");
}
+ /* PPFID bitmap */
+ if (IS_LEAD_HWFN(p_hwfn)) {
+ rc = qed_hw_get_ppfid_bitmap(p_hwfn, p_ptt);
+ if (rc)
+ return rc;
+ }
+
/* Sanity for ILT */
if ((b_ah && (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_K2)) ||
(!b_ah && (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB))) {
@@ -3443,6 +4426,7 @@ static void qed_nvm_info_free(struct qed_hwfn *p_hwfn)
static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
void __iomem *p_regview,
void __iomem *p_doorbells,
+ u64 db_phys_addr,
enum qed_pci_personality personality)
{
struct qed_dev *cdev = p_hwfn->cdev;
@@ -3451,6 +4435,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn,
/* Split PCI bars evenly between hwfns */
p_hwfn->regview = p_regview;
p_hwfn->doorbells = p_doorbells;
+ p_hwfn->db_phys_addr = db_phys_addr;
if (IS_VF(p_hwfn->cdev))
return qed_vf_hw_prepare(p_hwfn);
@@ -3546,7 +4531,9 @@ int qed_hw_prepare(struct qed_dev *cdev,
/* Initialize the first hwfn - will learn number of hwfns */
rc = qed_hw_prepare_single(p_hwfn,
cdev->regview,
- cdev->doorbells, personality);
+ cdev->doorbells,
+ cdev->db_phys_addr,
+ personality);
if (rc)
return rc;
@@ -3555,22 +4542,25 @@ int qed_hw_prepare(struct qed_dev *cdev,
/* Initialize the rest of the hwfns */
if (cdev->num_hwfns > 1) {
void __iomem *p_regview, *p_doorbell;
- u8 __iomem *addr;
+ u64 db_phys_addr;
+ u32 offset;
/* adjust bar offset for second engine */
- addr = cdev->regview +
- qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
- BAR_ID_0) / 2;
- p_regview = addr;
+ offset = qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
+ BAR_ID_0) / 2;
+ p_regview = cdev->regview + offset;
+
+ offset = qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
+ BAR_ID_1) / 2;
- addr = cdev->doorbells +
- qed_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
- BAR_ID_1) / 2;
- p_doorbell = addr;
+ p_doorbell = cdev->doorbells + offset;
+
+ db_phys_addr = cdev->db_phys_addr + offset;
/* prepare second hw function */
rc = qed_hw_prepare_single(&cdev->hwfns[1], p_regview,
- p_doorbell, personality);
+ p_doorbell, db_phys_addr,
+ personality);
/* in case of error, need to free the previously
* initiliazed hwfn 0.
@@ -3951,269 +4941,6 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id)
return 0;
}
-static void qed_llh_mac_to_filter(u32 *p_high, u32 *p_low,
- u8 *p_filter)
-{
- *p_high = p_filter[1] | (p_filter[0] << 8);
- *p_low = p_filter[5] | (p_filter[4] << 8) |
- (p_filter[3] << 16) | (p_filter[2] << 24);
-}
-
-int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt, u8 *p_filter)
-{
- u32 high = 0, low = 0, en;
- int i;
-
- if (!test_bit(QED_MF_LLH_MAC_CLSS, &p_hwfn->cdev->mf_bits))
- return 0;
-
- qed_llh_mac_to_filter(&high, &low, p_filter);
-
- /* Find a free entry and utilize it */
- for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
- en = qed_rd(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32));
- if (en)
- continue;
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_VALUE +
- 2 * i * sizeof(u32), low);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_VALUE +
- (2 * i + 1) * sizeof(u32), high);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
- i * sizeof(u32), 0);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1);
- break;
- }
- if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
- DP_NOTICE(p_hwfn,
- "Failed to find an empty LLH filter to utilize\n");
- return -EINVAL;
- }
-
- DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
- "mac: %pM is added at %d\n",
- p_filter, i);
-
- return 0;
-}
-
-void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt, u8 *p_filter)
-{
- u32 high = 0, low = 0;
- int i;
-
- if (!test_bit(QED_MF_LLH_MAC_CLSS, &p_hwfn->cdev->mf_bits))
- return;
-
- qed_llh_mac_to_filter(&high, &low, p_filter);
-
- /* Find the entry and clean it */
- for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
- if (qed_rd(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_VALUE +
- 2 * i * sizeof(u32)) != low)
- continue;
- if (qed_rd(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_VALUE +
- (2 * i + 1) * sizeof(u32)) != high)
- continue;
-
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_VALUE +
- (2 * i + 1) * sizeof(u32), 0);
-
- DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
- "mac: %pM is removed from %d\n",
- p_filter, i);
- break;
- }
- if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE)
- DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n");
-}
-
-int
-qed_llh_add_protocol_filter(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
- u16 source_port_or_eth_type,
- u16 dest_port, enum qed_llh_port_filter_type_t type)
-{
- u32 high = 0, low = 0, en;
- int i;
-
- if (!test_bit(QED_MF_LLH_PROTO_CLSS, &p_hwfn->cdev->mf_bits))
- return 0;
-
- switch (type) {
- case QED_LLH_FILTER_ETHERTYPE:
- high = source_port_or_eth_type;
- break;
- case QED_LLH_FILTER_TCP_SRC_PORT:
- case QED_LLH_FILTER_UDP_SRC_PORT:
- low = source_port_or_eth_type << 16;
- break;
- case QED_LLH_FILTER_TCP_DEST_PORT:
- case QED_LLH_FILTER_UDP_DEST_PORT:
- low = dest_port;
- break;
- case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
- case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
- low = (source_port_or_eth_type << 16) | dest_port;
- break;
- default:
- DP_NOTICE(p_hwfn,
- "Non valid LLH protocol filter type %d\n", type);
- return -EINVAL;
- }
- /* Find a free entry and utilize it */
- for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
- en = qed_rd(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32));
- if (en)
- continue;
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_VALUE +
- 2 * i * sizeof(u32), low);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_VALUE +
- (2 * i + 1) * sizeof(u32), high);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 1);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
- i * sizeof(u32), 1 << type);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1);
- break;
- }
- if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
- DP_NOTICE(p_hwfn,
- "Failed to find an empty LLH filter to utilize\n");
- return -EINVAL;
- }
- switch (type) {
- case QED_LLH_FILTER_ETHERTYPE:
- DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
- "ETH type %x is added at %d\n",
- source_port_or_eth_type, i);
- break;
- case QED_LLH_FILTER_TCP_SRC_PORT:
- DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
- "TCP src port %x is added at %d\n",
- source_port_or_eth_type, i);
- break;
- case QED_LLH_FILTER_UDP_SRC_PORT:
- DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
- "UDP src port %x is added at %d\n",
- source_port_or_eth_type, i);
- break;
- case QED_LLH_FILTER_TCP_DEST_PORT:
- DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
- "TCP dst port %x is added at %d\n", dest_port, i);
- break;
- case QED_LLH_FILTER_UDP_DEST_PORT:
- DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
- "UDP dst port %x is added at %d\n", dest_port, i);
- break;
- case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
- DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
- "TCP src/dst ports %x/%x are added at %d\n",
- source_port_or_eth_type, dest_port, i);
- break;
- case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
- DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
- "UDP src/dst ports %x/%x are added at %d\n",
- source_port_or_eth_type, dest_port, i);
- break;
- }
- return 0;
-}
-
-void
-qed_llh_remove_protocol_filter(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
- u16 source_port_or_eth_type,
- u16 dest_port,
- enum qed_llh_port_filter_type_t type)
-{
- u32 high = 0, low = 0;
- int i;
-
- if (!test_bit(QED_MF_LLH_PROTO_CLSS, &p_hwfn->cdev->mf_bits))
- return;
-
- switch (type) {
- case QED_LLH_FILTER_ETHERTYPE:
- high = source_port_or_eth_type;
- break;
- case QED_LLH_FILTER_TCP_SRC_PORT:
- case QED_LLH_FILTER_UDP_SRC_PORT:
- low = source_port_or_eth_type << 16;
- break;
- case QED_LLH_FILTER_TCP_DEST_PORT:
- case QED_LLH_FILTER_UDP_DEST_PORT:
- low = dest_port;
- break;
- case QED_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
- case QED_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
- low = (source_port_or_eth_type << 16) | dest_port;
- break;
- default:
- DP_NOTICE(p_hwfn,
- "Non valid LLH protocol filter type %d\n", type);
- return;
- }
-
- for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
- if (!qed_rd(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32)))
- continue;
- if (!qed_rd(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32)))
- continue;
- if (!(qed_rd(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
- i * sizeof(u32)) & BIT(type)))
- continue;
- if (qed_rd(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_VALUE +
- 2 * i * sizeof(u32)) != low)
- continue;
- if (qed_rd(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_VALUE +
- (2 * i + 1) * sizeof(u32)) != high)
- continue;
-
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE +
- i * sizeof(u32), 0);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0);
- qed_wr(p_hwfn, p_ptt,
- NIG_REG_LLH_FUNC_FILTER_VALUE +
- (2 * i + 1) * sizeof(u32), 0);
- break;
- }
-
- if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE)
- DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n");
-}
-
static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
u32 hw_addr, void *p_eth_qzone,
size_t eth_qzone_size, u8 timeset)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index e4b4e3b78e8a..47376d4d071f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -241,11 +241,17 @@ enum qed_dmae_address_type_t {
#define QED_DMAE_FLAG_VF_SRC 0x00000002
#define QED_DMAE_FLAG_VF_DST 0x00000004
#define QED_DMAE_FLAG_COMPLETION_DST 0x00000008
+#define QED_DMAE_FLAG_PORT 0x00000010
+#define QED_DMAE_FLAG_PF_SRC 0x00000020
+#define QED_DMAE_FLAG_PF_DST 0x00000040
struct qed_dmae_params {
u32 flags; /* consists of QED_DMAE_FLAG_* values */
u8 src_vfid;
u8 dst_vfid;
+ u8 port_id;
+ u8 src_pfid;
+ u8 dst_pfid;
};
/**
@@ -257,7 +263,7 @@ struct qed_dmae_params {
* @param source_addr
* @param grc_addr (dmae_data_offset)
* @param size_in_dwords
- * @param flags (one of the flags defined above)
+ * @param p_params (default parameters will be used in case of NULL)
*/
int
qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
@@ -265,7 +271,7 @@ qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
u64 source_addr,
u32 grc_addr,
u32 size_in_dwords,
- u32 flags);
+ struct qed_dmae_params *p_params);
/**
* @brief qed_dmae_grc2host - Read data from dmae data offset
@@ -275,11 +281,11 @@ qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
* @param grc_addr (dmae_data_offset)
* @param dest_addr
* @param size_in_dwords
- * @param flags - one of the flags defined above
+ * @param p_params (default parameters will be used in case of NULL)
*/
int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
u32 grc_addr, dma_addr_t dest_addr, u32 size_in_dwords,
- u32 flags);
+ struct qed_dmae_params *p_params);
/**
* @brief qed_dmae_host2host - copy data from to source address
@@ -290,7 +296,7 @@ int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
* @param source_addr
* @param dest_addr
* @param size_in_dwords
- * @param params
+ * @param p_params (default parameters will be used in case of NULL)
*/
int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
@@ -368,26 +374,66 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn,
u8 *dst_id);
/**
- * @brief qed_llh_add_mac_filter - configures a MAC filter in llh
+ * @brief qed_llh_get_num_ppfid - Return the allocated number of LLH filter
+ * banks that are allocated to the PF.
*
- * @param p_hwfn
- * @param p_ptt
- * @param p_filter - MAC to add
+ * @param cdev
+ *
+ * @return u8 - Number of LLH filter banks
*/
-int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt, u8 *p_filter);
+u8 qed_llh_get_num_ppfid(struct qed_dev *cdev);
+
+enum qed_eng {
+ QED_ENG0,
+ QED_ENG1,
+ QED_BOTH_ENG,
+};
/**
- * @brief qed_llh_remove_mac_filter - removes a MAC filter from llh
+ * @brief qed_llh_set_ppfid_affinity - Set the engine affinity for the given
+ * LLH filter bank.
+ *
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param eng
+ *
+ * @return int
+ */
+int qed_llh_set_ppfid_affinity(struct qed_dev *cdev,
+ u8 ppfid, enum qed_eng eng);
+
+/**
+ * @brief qed_llh_set_roce_affinity - Set the RoCE engine affinity
+ *
+ * @param cdev
+ * @param eng
+ *
+ * @return int
+ */
+int qed_llh_set_roce_affinity(struct qed_dev *cdev, enum qed_eng eng);
+
+/**
+ * @brief qed_llh_add_mac_filter - Add a LLH MAC filter into the given filter
+ * bank.
+ *
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param mac_addr - MAC to add
+ */
+int qed_llh_add_mac_filter(struct qed_dev *cdev,
+ u8 ppfid, u8 mac_addr[ETH_ALEN]);
+
+/**
+ * @brief qed_llh_remove_mac_filter - Remove a LLH MAC filter from the given
+ * filter bank.
*
- * @param p_hwfn
* @param p_ptt
* @param p_filter - MAC to remove
*/
-void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt, u8 *p_filter);
+void qed_llh_remove_mac_filter(struct qed_dev *cdev,
+ u8 ppfid, u8 mac_addr[ETH_ALEN]);
-enum qed_llh_port_filter_type_t {
+enum qed_llh_prot_filter_type_t {
QED_LLH_FILTER_ETHERTYPE,
QED_LLH_FILTER_TCP_SRC_PORT,
QED_LLH_FILTER_TCP_DEST_PORT,
@@ -398,36 +444,37 @@ enum qed_llh_port_filter_type_t {
};
/**
- * @brief qed_llh_add_protocol_filter - configures a protocol filter in llh
+ * @brief qed_llh_add_protocol_filter - Add a LLH protocol filter into the
+ * given filter bank.
*
- * @param p_hwfn
- * @param p_ptt
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param type - type of filters and comparing
* @param source_port_or_eth_type - source port or ethertype to add
* @param dest_port - destination port to add
* @param type - type of filters and comparing
*/
int
-qed_llh_add_protocol_filter(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
- u16 source_port_or_eth_type,
- u16 dest_port,
- enum qed_llh_port_filter_type_t type);
+qed_llh_add_protocol_filter(struct qed_dev *cdev,
+ u8 ppfid,
+ enum qed_llh_prot_filter_type_t type,
+ u16 source_port_or_eth_type, u16 dest_port);
/**
- * @brief qed_llh_remove_protocol_filter - remove a protocol filter in llh
+ * @brief qed_llh_remove_protocol_filter - Remove a LLH protocol filter from
+ * the given filter bank.
*
- * @param p_hwfn
- * @param p_ptt
+ * @param cdev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param type - type of filters and comparing
* @param source_port_or_eth_type - source port or ethertype to add
* @param dest_port - destination port to add
- * @param type - type of filters and comparing
*/
void
-qed_llh_remove_protocol_filter(struct qed_hwfn *p_hwfn,
- struct qed_ptt *p_ptt,
- u16 source_port_or_eth_type,
- u16 dest_port,
- enum qed_llh_port_filter_type_t type);
+qed_llh_remove_protocol_filter(struct qed_dev *cdev,
+ u8 ppfid,
+ enum qed_llh_prot_filter_type_t type,
+ u16 source_port_or_eth_type, u16 dest_port);
/**
* *@brief Cleanup of previous driver remains prior to load
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
index 46dc93d3b9b5..de31a382f58e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
@@ -745,7 +745,7 @@ struct qed_hash_fcoe_con {
static int qed_fill_fcoe_dev_info(struct qed_dev *cdev,
struct qed_dev_fcoe_info *info)
{
- struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+ struct qed_hwfn *hwfn = QED_AFFIN_HWFN(cdev);
int rc;
memset(info, 0, sizeof(*info));
@@ -806,15 +806,15 @@ static int qed_fcoe_stop(struct qed_dev *cdev)
return -EINVAL;
}
- p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
+ p_ptt = qed_ptt_acquire(QED_AFFIN_HWFN(cdev));
if (!p_ptt)
return -EAGAIN;
/* Stop the fcoe */
- rc = qed_sp_fcoe_func_stop(QED_LEADING_HWFN(cdev), p_ptt,
+ rc = qed_sp_fcoe_func_stop(QED_AFFIN_HWFN(cdev), p_ptt,
QED_SPQ_MODE_EBLOCK, NULL);
cdev->flags &= ~QED_FLAG_STORAGE_STARTED;
- qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
+ qed_ptt_release(QED_AFFIN_HWFN(cdev), p_ptt);
return rc;
}
@@ -828,8 +828,8 @@ static int qed_fcoe_start(struct qed_dev *cdev, struct qed_fcoe_tid *tasks)
return 0;
}
- rc = qed_sp_fcoe_func_start(QED_LEADING_HWFN(cdev),
- QED_SPQ_MODE_EBLOCK, NULL);
+ rc = qed_sp_fcoe_func_start(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK,
+ NULL);
if (rc) {
DP_NOTICE(cdev, "Failed to start fcoe\n");
return rc;
@@ -849,7 +849,7 @@ static int qed_fcoe_start(struct qed_dev *cdev, struct qed_fcoe_tid *tasks)
return -ENOMEM;
}
- rc = qed_cxt_get_tid_mem_info(QED_LEADING_HWFN(cdev), tid_info);
+ rc = qed_cxt_get_tid_mem_info(QED_AFFIN_HWFN(cdev), tid_info);
if (rc) {
DP_NOTICE(cdev, "Failed to gather task information\n");
qed_fcoe_stop(cdev);
@@ -884,7 +884,7 @@ static int qed_fcoe_acquire_conn(struct qed_dev *cdev,
}
/* Acquire the connection */
- rc = qed_fcoe_acquire_connection(QED_LEADING_HWFN(cdev), NULL,
+ rc = qed_fcoe_acquire_connection(QED_AFFIN_HWFN(cdev), NULL,
&hash_con->con);
if (rc) {
DP_NOTICE(cdev, "Failed to acquire Connection\n");
@@ -898,7 +898,7 @@ static int qed_fcoe_acquire_conn(struct qed_dev *cdev,
hash_add(cdev->connections, &hash_con->node, *handle);
if (p_doorbell)
- *p_doorbell = qed_fcoe_get_db_addr(QED_LEADING_HWFN(cdev),
+ *p_doorbell = qed_fcoe_get_db_addr(QED_AFFIN_HWFN(cdev),
*handle);
return 0;
@@ -916,7 +916,7 @@ static int qed_fcoe_release_conn(struct qed_dev *cdev, u32 handle)
}
hlist_del(&hash_con->node);
- qed_fcoe_release_connection(QED_LEADING_HWFN(cdev), hash_con->con);
+ qed_fcoe_release_connection(QED_AFFIN_HWFN(cdev), hash_con->con);
kfree(hash_con);
return 0;
@@ -971,7 +971,7 @@ static int qed_fcoe_offload_conn(struct qed_dev *cdev,
con->d_id.addr_mid = conn_info->d_id.addr_mid;
con->d_id.addr_lo = conn_info->d_id.addr_lo;
- return qed_sp_fcoe_conn_offload(QED_LEADING_HWFN(cdev), con,
+ return qed_sp_fcoe_conn_offload(QED_AFFIN_HWFN(cdev), con,
QED_SPQ_MODE_EBLOCK, NULL);
}
@@ -992,13 +992,13 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev,
con = hash_con->con;
con->terminate_params = terminate_params;
- return qed_sp_fcoe_conn_destroy(QED_LEADING_HWFN(cdev), con,
+ return qed_sp_fcoe_conn_destroy(QED_AFFIN_HWFN(cdev), con,
QED_SPQ_MODE_EBLOCK, NULL);
}
static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats)
{
- return qed_fcoe_get_stats(QED_LEADING_HWFN(cdev), stats);
+ return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats);
}
void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 37edaa847512..e054f6c69e3a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12612,8 +12612,10 @@ struct public_drv_mb {
#define DRV_MSG_CODE_BIST_TEST 0x001e0000
#define DRV_MSG_CODE_SET_LED_MODE 0x00200000
-#define DRV_MSG_CODE_RESOURCE_CMD 0x00230000
+#define DRV_MSG_CODE_RESOURCE_CMD 0x00230000
#define DRV_MSG_CODE_GET_TLV_DONE 0x002f0000
+#define DRV_MSG_CODE_GET_ENGINE_CONFIG 0x00370000
+#define DRV_MSG_CODE_GET_PPFID_BITMAP 0x43000000
#define RESOURCE_CMD_REQ_RESC_MASK 0x0000001F
#define RESOURCE_CMD_REQ_RESC_SHIFT 0
@@ -12802,6 +12804,18 @@ struct public_drv_mb {
#define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR (1 << 0)
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID_MASK 0x00000001
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID_SHIFT 0
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE_MASK 0x00000002
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE_SHIFT 1
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID_MASK 0x00000004
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID_SHIFT 2
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE_MASK 0x00000008
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE_SHIFT 3
+
+#define FW_MB_PARAM_PPFID_BITMAP_MASK 0xFF
+#define FW_MB_PARAM_PPFID_BITMAP_SHIFT 0
+
u32 drv_pulse_mb;
#define DRV_PULSE_SEQ_MASK 0x00007fff
#define DRV_PULSE_SYSTEM_TIME_MASK 0xffff0000
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 72ec1c6bdf70..a4de9e3ef72c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -392,11 +392,15 @@ u32 qed_vfid_to_concrete(struct qed_hwfn *p_hwfn, u8 vfid)
}
/* DMAE */
+#define QED_DMAE_FLAGS_IS_SET(params, flag) \
+ ((params) != NULL && ((params)->flags & QED_DMAE_FLAG_##flag))
+
static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
const u8 is_src_type_grc,
const u8 is_dst_type_grc,
struct qed_dmae_params *p_params)
{
+ u8 src_pfid, dst_pfid, port_id;
u16 opcode_b = 0;
u32 opcode = 0;
@@ -407,14 +411,18 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
opcode |= (is_src_type_grc ? DMAE_CMD_SRC_MASK_GRC
: DMAE_CMD_SRC_MASK_PCIE) <<
DMAE_CMD_SRC_SHIFT;
- opcode |= ((p_hwfn->rel_pf_id & DMAE_CMD_SRC_PF_ID_MASK) <<
+ src_pfid = QED_DMAE_FLAGS_IS_SET(p_params, PF_SRC) ?
+ p_params->src_pfid : p_hwfn->rel_pf_id;
+ opcode |= ((src_pfid & DMAE_CMD_SRC_PF_ID_MASK) <<
DMAE_CMD_SRC_PF_ID_SHIFT);
/* The destination of the DMA can be: 0-None 1-PCIe 2-GRC 3-None */
opcode |= (is_dst_type_grc ? DMAE_CMD_DST_MASK_GRC
: DMAE_CMD_DST_MASK_PCIE) <<
DMAE_CMD_DST_SHIFT;
- opcode |= ((p_hwfn->rel_pf_id & DMAE_CMD_DST_PF_ID_MASK) <<
+ dst_pfid = QED_DMAE_FLAGS_IS_SET(p_params, PF_DST) ?
+ p_params->dst_pfid : p_hwfn->rel_pf_id;
+ opcode |= ((dst_pfid & DMAE_CMD_DST_PF_ID_MASK) <<
DMAE_CMD_DST_PF_ID_SHIFT);
/* Whether to write a completion word to the completion destination:
@@ -425,12 +433,14 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK <<
DMAE_CMD_SRC_ADDR_RESET_SHIFT);
- if (p_params->flags & QED_DMAE_FLAG_COMPLETION_DST)
+ if (QED_DMAE_FLAGS_IS_SET(p_params, COMPLETION_DST))
opcode |= (1 << DMAE_CMD_COMP_FUNC_SHIFT);
opcode |= (DMAE_CMD_ENDIANITY << DMAE_CMD_ENDIANITY_MODE_SHIFT);
- opcode |= ((p_hwfn->port_id) << DMAE_CMD_PORT_ID_SHIFT);
+ port_id = (QED_DMAE_FLAGS_IS_SET(p_params, PORT)) ?
+ p_params->port_id : p_hwfn->port_id;
+ opcode |= (port_id << DMAE_CMD_PORT_ID_SHIFT);
/* reset source address in next go */
opcode |= (DMAE_CMD_SRC_ADDR_RESET_MASK <<
@@ -441,7 +451,7 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
DMAE_CMD_DST_ADDR_RESET_SHIFT);
/* SRC/DST VFID: all 1's - pf, otherwise VF id */
- if (p_params->flags & QED_DMAE_FLAG_VF_SRC) {
+ if (QED_DMAE_FLAGS_IS_SET(p_params, VF_SRC)) {
opcode |= 1 << DMAE_CMD_SRC_VF_ID_VALID_SHIFT;
opcode_b |= p_params->src_vfid << DMAE_CMD_SRC_VF_ID_SHIFT;
} else {
@@ -449,7 +459,7 @@ static void qed_dmae_opcode(struct qed_hwfn *p_hwfn,
DMAE_CMD_SRC_VF_ID_SHIFT;
}
- if (p_params->flags & QED_DMAE_FLAG_VF_DST) {
+ if (QED_DMAE_FLAGS_IS_SET(p_params, VF_DST)) {
opcode |= 1 << DMAE_CMD_DST_VF_ID_VALID_SHIFT;
opcode_b |= p_params->dst_vfid << DMAE_CMD_DST_VF_ID_SHIFT;
} else {
@@ -733,7 +743,7 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
for (i = 0; i <= cnt_split; i++) {
offset = length_limit * i;
- if (!(p_params->flags & QED_DMAE_FLAG_RW_REPL_SRC)) {
+ if (!QED_DMAE_FLAGS_IS_SET(p_params, RW_REPL_SRC)) {
if (src_type == QED_DMAE_ADDRESS_GRC)
src_addr_split = src_addr + offset;
else
@@ -771,14 +781,12 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
- u64 source_addr, u32 grc_addr, u32 size_in_dwords, u32 flags)
+ u64 source_addr, u32 grc_addr, u32 size_in_dwords,
+ struct qed_dmae_params *p_params)
{
u32 grc_addr_in_dw = grc_addr / sizeof(u32);
- struct qed_dmae_params params;
int rc;
- memset(&params, 0, sizeof(struct qed_dmae_params));
- params.flags = flags;
mutex_lock(&p_hwfn->dmae_info.mutex);
@@ -786,7 +794,7 @@ int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
grc_addr_in_dw,
QED_DMAE_ADDRESS_HOST_VIRT,
QED_DMAE_ADDRESS_GRC,
- size_in_dwords, &params);
+ size_in_dwords, p_params);
mutex_unlock(&p_hwfn->dmae_info.mutex);
@@ -796,21 +804,19 @@ int qed_dmae_host2grc(struct qed_hwfn *p_hwfn,
int qed_dmae_grc2host(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt,
u32 grc_addr,
- dma_addr_t dest_addr, u32 size_in_dwords, u32 flags)
+ dma_addr_t dest_addr, u32 size_in_dwords,
+ struct qed_dmae_params *p_params)
{
u32 grc_addr_in_dw = grc_addr / sizeof(u32);
- struct qed_dmae_params params;
int rc;
- memset(&params, 0, sizeof(struct qed_dmae_params));
- params.flags = flags;
mutex_lock(&p_hwfn->dmae_info.mutex);
rc = qed_dmae_execute_command(p_hwfn, p_ptt, grc_addr_in_dw,
dest_addr, QED_DMAE_ADDRESS_GRC,
QED_DMAE_ADDRESS_HOST_VIRT,
- size_in_dwords, &params);
+ size_in_dwords, p_params);
mutex_unlock(&p_hwfn->dmae_info.mutex);
@@ -842,7 +848,6 @@ int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
struct qed_ptt *p_ptt, const char *phase)
{
u32 size = PAGE_SIZE / 2, val;
- struct qed_dmae_params params;
int rc = 0;
dma_addr_t p_phys;
void *p_virt;
@@ -875,9 +880,8 @@ int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
(u64)p_phys,
p_virt, (u64)(p_phys + size), (u8 *)p_virt + size, size);
- memset(&params, 0, sizeof(params));
rc = qed_dmae_host2host(p_hwfn, p_ptt, p_phys, p_phys + size,
- size / 4 /* size_in_dwords */, &params);
+ size / 4, NULL);
if (rc) {
DP_NOTICE(p_hwfn,
"DMAE sanity [%s]: qed_dmae_host2host() failed. rc = %d.\n",
diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
index 34193c2f1699..a868d7f88601 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c
@@ -131,7 +131,7 @@ static int qed_init_rt(struct qed_hwfn *p_hwfn,
rc = qed_dmae_host2grc(p_hwfn, p_ptt,
(uintptr_t)(p_init_val + i),
- addr + (i << 2), segment, 0);
+ addr + (i << 2), segment, NULL);
if (rc)
return rc;
@@ -194,7 +194,7 @@ static int qed_init_array_dmae(struct qed_hwfn *p_hwfn,
} else {
rc = qed_dmae_host2grc(p_hwfn, p_ptt,
(uintptr_t)(buf + dmae_data_offset),
- addr, size, 0);
+ addr, size, NULL);
}
return rc;
@@ -205,6 +205,7 @@ static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn,
u32 addr, u32 fill, u32 fill_count)
{
static u32 zero_buffer[DMAE_MAX_RW_SIZE];
+ struct qed_dmae_params params = {};
memset(zero_buffer, 0, sizeof(u32) * DMAE_MAX_RW_SIZE);
@@ -214,10 +215,10 @@ static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn,
* 3. p_hwfb->temp_data,
* 4. fill_count
*/
-
+ params.flags = QED_DMAE_FLAG_RW_REPL_SRC;
return qed_dmae_host2grc(p_hwfn, p_ptt,
(uintptr_t)(&zero_buffer[0]),
- addr, fill_count, QED_DMAE_FLAG_RW_REPL_SRC);
+ addr, fill_count, &params);
}
static void qed_init_fill(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index fdfedbc8e431..4e8118a08654 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -1508,10 +1508,10 @@ void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn,
qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&phys_addr,
CAU_REG_SB_ADDR_MEMORY +
- igu_sb_id * sizeof(u64), 2, 0);
+ igu_sb_id * sizeof(u64), 2, NULL);
qed_dmae_host2grc(p_hwfn, p_ptt, (u64)(uintptr_t)&sb_entry,
CAU_REG_SB_VAR_MEMORY +
- igu_sb_id * sizeof(u64), 2, 0);
+ igu_sb_id * sizeof(u64), 2, NULL);
} else {
/* Initialize Status Block Address */
STORE_RT_REG_AGG(p_hwfn,
@@ -2362,7 +2362,7 @@ int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
sb_id * sizeof(u64),
- (u64)(uintptr_t)&sb_entry, 2, 0);
+ (u64)(uintptr_t)&sb_entry, 2, NULL);
if (rc) {
DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
return rc;
@@ -2376,7 +2376,7 @@ int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
rc = qed_dmae_host2grc(p_hwfn, p_ptt,
(u64)(uintptr_t)&sb_entry,
CAU_REG_SB_VAR_MEMORY +
- sb_id * sizeof(u64), 2, 0);
+ sb_id * sizeof(u64), 2, NULL);
if (rc) {
DP_ERR(p_hwfn, "dmae_host2grc failed %d\n", rc);
return rc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 4f8a685d1a55..5585c18053ec 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -1082,7 +1082,7 @@ struct qed_hash_iscsi_con {
static int qed_fill_iscsi_dev_info(struct qed_dev *cdev,
struct qed_dev_iscsi_info *info)
{
- struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+ struct qed_hwfn *hwfn = QED_AFFIN_HWFN(cdev);
int rc;
@@ -1141,8 +1141,8 @@ static int qed_iscsi_stop(struct qed_dev *cdev)
}
/* Stop the iscsi */
- rc = qed_sp_iscsi_func_stop(QED_LEADING_HWFN(cdev),
- QED_SPQ_MODE_EBLOCK, NULL);
+ rc = qed_sp_iscsi_func_stop(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK,
+ NULL);
cdev->flags &= ~QED_FLAG_STORAGE_STARTED;
return rc;
@@ -1161,9 +1161,8 @@ static int qed_iscsi_start(struct qed_dev *cdev,
return 0;
}
- rc = qed_sp_iscsi_func_start(QED_LEADING_HWFN(cdev),
- QED_SPQ_MODE_EBLOCK, NULL, event_context,
- async_event_cb);
+ rc = qed_sp_iscsi_func_start(QED_AFFIN_HWFN(cdev), QED_SPQ_MODE_EBLOCK,
+ NULL, event_context, async_event_cb);
if (rc) {
DP_NOTICE(cdev, "Failed to start iscsi\n");
return rc;
@@ -1182,8 +1181,7 @@ static int qed_iscsi_start(struct qed_dev *cdev,
return -ENOMEM;
}
- rc = qed_cxt_get_tid_mem_info(QED_LEADING_HWFN(cdev),
- tid_info);
+ rc = qed_cxt_get_tid_mem_info(QED_AFFIN_HWFN(cdev), tid_info);
if (rc) {
DP_NOTICE(cdev, "Failed to gather task information\n");
qed_iscsi_stop(cdev);
@@ -1215,7 +1213,7 @@ static int qed_iscsi_acquire_conn(struct qed_dev *cdev,
return -ENOMEM;
/* Acquire the connection */
- rc = qed_iscsi_acquire_connection(QED_LEADING_HWFN(cdev), NULL,
+ rc = qed_iscsi_acquire_connection(QED_AFFIN_HWFN(cdev), NULL,
&hash_con->con);
if (rc) {
DP_NOTICE(cdev, "Failed to acquire Connection\n");
@@ -1229,7 +1227,7 @@ static int qed_iscsi_acquire_conn(struct qed_dev *cdev,
hash_add(cdev->connections, &hash_con->node, *handle);
if (p_doorbell)
- *p_doorbell = qed_iscsi_get_db_addr(QED_LEADING_HWFN(cdev),
+ *p_doorbell = qed_iscsi_get_db_addr(QED_AFFIN_HWFN(cdev),
*handle);
return 0;
@@ -1247,7 +1245,7 @@ static int qed_iscsi_release_conn(struct qed_dev *cdev, u32 handle)
}
hlist_del(&hash_con->node);
- qed_iscsi_release_connection(QED_LEADING_HWFN(cdev), hash_con->con);
+ qed_iscsi_release_connection(QED_AFFIN_HWFN(cdev), hash_con->con);
kfree(hash_con);
return 0;
@@ -1324,7 +1322,7 @@ static int qed_iscsi_offload_conn(struct qed_dev *cdev,
/* Set default values on other connection fields */
con->offl_flags = 0x1;
- return qed_sp_iscsi_conn_offload(QED_LEADING_HWFN(cdev), con,
+ return qed_sp_iscsi_conn_offload(QED_AFFIN_HWFN(cdev), con,
QED_SPQ_MODE_EBLOCK, NULL);
}
@@ -1351,7 +1349,7 @@ static int qed_iscsi_update_conn(struct qed_dev *cdev,
con->first_seq_length = conn_info->first_seq_length;
con->exp_stat_sn = conn_info->exp_stat_sn;
- return qed_sp_iscsi_conn_update(QED_LEADING_HWFN(cdev), con,
+ return qed_sp_iscsi_conn_update(QED_AFFIN_HWFN(cdev), con,
QED_SPQ_MODE_EBLOCK, NULL);
}
@@ -1366,8 +1364,7 @@ static int qed_iscsi_clear_conn_sq(struct qed_dev *cdev, u32 handle)
return -EINVAL;
}
- return qed_sp_iscsi_conn_clear_sq(QED_LEADING_HWFN(cdev),
- hash_con->con,
+ return qed_sp_iscsi_conn_clear_sq(QED_AFFIN_HWFN(cdev), hash_con->con,
QED_SPQ_MODE_EBLOCK, NULL);
}
@@ -1385,14 +1382,13 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
hash_con->con->abortive_dsconnect = abrt_conn;
- return qed_sp_iscsi_conn_terminate(QED_LEADING_HWFN(cdev),
- hash_con->con,
+ return qed_sp_iscsi_conn_terminate(QED_AFFIN_HWFN(cdev), hash_con->con,
QED_SPQ_MODE_EBLOCK, NULL);
}
static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
{
- return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats);
+ return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats);
}
static int qed_iscsi_change_mac(struct qed_dev *cdev,
@@ -1407,8 +1403,7 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev,
return -EINVAL;
}
- return qed_sp_iscsi_mac_update(QED_LEADING_HWFN(cdev),
- hash_con->con,
+ return qed_sp_iscsi_mac_update(QED_AFFIN_HWFN(cdev), hash_con->con,
QED_SPQ_MODE_EBLOCK, NULL);
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
index ded556b7bab5..7c71ea15251f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c
@@ -2528,7 +2528,7 @@ qed_iwarp_ll2_slowpath(void *cxt,
memset(fpdu, 0, sizeof(*fpdu));
}
-static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn)
{
struct qed_iwarp_info *iwarp_info = &p_hwfn->p_rdma_info->iwarp;
int rc = 0;
@@ -2563,8 +2563,9 @@ static int qed_iwarp_ll2_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
iwarp_info->ll2_mpa_handle = QED_IWARP_HANDLE_INVAL;
}
- qed_llh_remove_mac_filter(p_hwfn,
- p_ptt, p_hwfn->p_rdma_info->iwarp.mac_addr);
+ qed_llh_remove_mac_filter(p_hwfn->cdev, 0,
+ p_hwfn->p_rdma_info->iwarp.mac_addr);
+
return rc;
}
@@ -2608,8 +2609,7 @@ qed_iwarp_ll2_alloc_buffers(struct qed_hwfn *p_hwfn,
static int
qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
- struct qed_rdma_start_in_params *params,
- struct qed_ptt *p_ptt)
+ struct qed_rdma_start_in_params *params)
{
struct qed_iwarp_info *iwarp_info;
struct qed_ll2_acquire_data data;
@@ -2628,7 +2628,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
ether_addr_copy(p_hwfn->p_rdma_info->iwarp.mac_addr, params->mac_addr);
- rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+ rc = qed_llh_add_mac_filter(p_hwfn->cdev, 0, params->mac_addr);
if (rc)
return rc;
@@ -2653,7 +2653,7 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
rc = qed_ll2_acquire_connection(p_hwfn, &data);
if (rc) {
DP_NOTICE(p_hwfn, "Failed to acquire LL2 connection\n");
- qed_llh_remove_mac_filter(p_hwfn, p_ptt, params->mac_addr);
+ qed_llh_remove_mac_filter(p_hwfn->cdev, 0, params->mac_addr);
return rc;
}
@@ -2757,12 +2757,12 @@ qed_iwarp_ll2_start(struct qed_hwfn *p_hwfn,
&iwarp_info->mpa_buf_list);
return rc;
err:
- qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+ qed_iwarp_ll2_stop(p_hwfn);
return rc;
}
-int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn,
struct qed_rdma_start_in_params *params)
{
struct qed_iwarp_info *iwarp_info;
@@ -2794,10 +2794,10 @@ int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
qed_iwarp_async_event);
qed_ooo_setup(p_hwfn);
- return qed_iwarp_ll2_start(p_hwfn, params, p_ptt);
+ return qed_iwarp_ll2_start(p_hwfn, params);
}
-int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn)
{
int rc;
@@ -2808,7 +2808,7 @@ int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
qed_spq_unregister_async_cb(p_hwfn, PROTOCOLID_IWARP);
- return qed_iwarp_ll2_stop(p_hwfn, p_ptt);
+ return qed_iwarp_ll2_stop(p_hwfn);
}
static void qed_iwarp_qp_in_error(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
index 7ac959038324..c1b2057d23b8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.h
@@ -183,13 +183,13 @@ struct qed_iwarp_listener {
int qed_iwarp_alloc(struct qed_hwfn *p_hwfn);
-int qed_iwarp_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
+int qed_iwarp_setup(struct qed_hwfn *p_hwfn,
struct qed_rdma_start_in_params *params);
void qed_iwarp_init_fw_ramrod(struct qed_hwfn *p_hwfn,
struct iwarp_init_func_ramrod_data *p_ramrod);
-int qed_iwarp_stop(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+int qed_iwarp_stop(struct qed_hwfn *p_hwfn);
void qed_iwarp_resc_free(struct qed_hwfn *p_hwfn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 57641728df69..9f36e7948222 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -2111,7 +2111,7 @@ int qed_get_rxq_coalesce(struct qed_hwfn *p_hwfn,
rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
p_cid->sb_igu_id * sizeof(u64),
- (u64)(uintptr_t)&sb_entry, 2, 0);
+ (u64)(uintptr_t)&sb_entry, 2, NULL);
if (rc) {
DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
return rc;
@@ -2144,7 +2144,7 @@ int qed_get_txq_coalesce(struct qed_hwfn *p_hwfn,
rc = qed_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
p_cid->sb_igu_id * sizeof(u64),
- (u64)(uintptr_t)&sb_entry, 2, 0);
+ (u64)(uintptr_t)&sb_entry, 2, NULL);
if (rc) {
DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
return rc;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index b5f419b71287..19a1a58d60f8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -239,9 +239,8 @@ out_post1:
buffer->phys_addr = new_phys_addr;
out_post:
- rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), cdev->ll2->handle,
- buffer->phys_addr, 0, buffer, 1);
-
+ rc = qed_ll2_post_rx_buffer(p_hwfn, cdev->ll2->handle,
+ buffer->phys_addr, 0, buffer, 1);
if (rc)
qed_ll2_dealloc_buffer(cdev, buffer);
}
@@ -926,16 +925,15 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
return 0;
}
-static void qed_ll2_stop_ooo(struct qed_dev *cdev)
+static void qed_ll2_stop_ooo(struct qed_hwfn *p_hwfn)
{
- struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
- u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
+ u8 *handle = &p_hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
- DP_VERBOSE(cdev, QED_MSG_STORAGE, "Stopping LL2 OOO queue [%02x]\n",
- *handle);
+ DP_VERBOSE(p_hwfn, (QED_MSG_STORAGE | QED_MSG_LL2),
+ "Stopping LL2 OOO queue [%02x]\n", *handle);
- qed_ll2_terminate_connection(hwfn, *handle);
- qed_ll2_release_connection(hwfn, *handle);
+ qed_ll2_terminate_connection(p_hwfn, *handle);
+ qed_ll2_release_connection(p_hwfn, *handle);
*handle = QED_LL2_UNUSED_HANDLE;
}
@@ -1574,12 +1572,12 @@ int qed_ll2_establish_connection(void *cxt, u8 connection_handle)
if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
if (!test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
- qed_llh_add_protocol_filter(p_hwfn, p_ptt,
- ETH_P_FCOE, 0,
- QED_LLH_FILTER_ETHERTYPE);
- qed_llh_add_protocol_filter(p_hwfn, p_ptt,
- ETH_P_FIP, 0,
- QED_LLH_FILTER_ETHERTYPE);
+ qed_llh_add_protocol_filter(p_hwfn->cdev, 0,
+ QED_LLH_FILTER_ETHERTYPE,
+ ETH_P_FCOE, 0);
+ qed_llh_add_protocol_filter(p_hwfn->cdev, 0,
+ QED_LLH_FILTER_ETHERTYPE,
+ ETH_P_FIP, 0);
}
out:
@@ -1980,12 +1978,12 @@ int qed_ll2_terminate_connection(void *cxt, u8 connection_handle)
if (p_ll2_conn->input.conn_type == QED_LL2_TYPE_FCOE) {
if (!test_bit(QED_MF_UFP_SPECIFIC, &p_hwfn->cdev->mf_bits))
- qed_llh_remove_protocol_filter(p_hwfn, p_ptt,
- ETH_P_FCOE, 0,
- QED_LLH_FILTER_ETHERTYPE);
- qed_llh_remove_protocol_filter(p_hwfn, p_ptt,
- ETH_P_FIP, 0,
- QED_LLH_FILTER_ETHERTYPE);
+ qed_llh_remove_protocol_filter(p_hwfn->cdev, 0,
+ QED_LLH_FILTER_ETHERTYPE,
+ ETH_P_FCOE, 0);
+ qed_llh_remove_protocol_filter(p_hwfn->cdev, 0,
+ QED_LLH_FILTER_ETHERTYPE,
+ ETH_P_FIP, 0);
}
out:
@@ -2086,12 +2084,12 @@ static void _qed_ll2_get_port_stats(struct qed_hwfn *p_hwfn,
TSTORM_LL2_PORT_STAT_OFFSET(MFW_PORT(p_hwfn)),
sizeof(port_stats));
- p_stats->gsi_invalid_hdr = HILO_64_REGPAIR(port_stats.gsi_invalid_hdr);
- p_stats->gsi_invalid_pkt_length =
+ p_stats->gsi_invalid_hdr += HILO_64_REGPAIR(port_stats.gsi_invalid_hdr);
+ p_stats->gsi_invalid_pkt_length +=
HILO_64_REGPAIR(port_stats.gsi_invalid_pkt_length);
- p_stats->gsi_unsupported_pkt_typ =
+ p_stats->gsi_unsupported_pkt_typ +=
HILO_64_REGPAIR(port_stats.gsi_unsupported_pkt_typ);
- p_stats->gsi_crcchksm_error =
+ p_stats->gsi_crcchksm_error +=
HILO_64_REGPAIR(port_stats.gsi_crcchksm_error);
}
@@ -2109,9 +2107,9 @@ static void _qed_ll2_get_tstats(struct qed_hwfn *p_hwfn,
CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(qid);
qed_memcpy_from(p_hwfn, p_ptt, &tstats, tstats_addr, sizeof(tstats));
- p_stats->packet_too_big_discard =
+ p_stats->packet_too_big_discard +=
HILO_64_REGPAIR(tstats.packet_too_big_discard);
- p_stats->no_buff_discard = HILO_64_REGPAIR(tstats.no_buff_discard);
+ p_stats->no_buff_discard += HILO_64_REGPAIR(tstats.no_buff_discard);
}
static void _qed_ll2_get_ustats(struct qed_hwfn *p_hwfn,
@@ -2128,12 +2126,12 @@ static void _qed_ll2_get_ustats(struct qed_hwfn *p_hwfn,
CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(qid);
qed_memcpy_from(p_hwfn, p_ptt, &ustats, ustats_addr, sizeof(ustats));
- p_stats->rcv_ucast_bytes = HILO_64_REGPAIR(ustats.rcv_ucast_bytes);
- p_stats->rcv_mcast_bytes = HILO_64_REGPAIR(ustats.rcv_mcast_bytes);
- p_stats->rcv_bcast_bytes = HILO_64_REGPAIR(ustats.rcv_bcast_bytes);
- p_stats->rcv_ucast_pkts = HILO_64_REGPAIR(ustats.rcv_ucast_pkts);
- p_stats->rcv_mcast_pkts = HILO_64_REGPAIR(ustats.rcv_mcast_pkts);
- p_stats->rcv_bcast_pkts = HILO_64_REGPAIR(ustats.rcv_bcast_pkts);
+ p_stats->rcv_ucast_bytes += HILO_64_REGPAIR(ustats.rcv_ucast_bytes);
+ p_stats->rcv_mcast_bytes += HILO_64_REGPAIR(ustats.rcv_mcast_bytes);
+ p_stats->rcv_bcast_bytes += HILO_64_REGPAIR(ustats.rcv_bcast_bytes);
+ p_stats->rcv_ucast_pkts += HILO_64_REGPAIR(ustats.rcv_ucast_pkts);
+ p_stats->rcv_mcast_pkts += HILO_64_REGPAIR(ustats.rcv_mcast_pkts);
+ p_stats->rcv_bcast_pkts += HILO_64_REGPAIR(ustats.rcv_bcast_pkts);
}
static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn,
@@ -2150,23 +2148,21 @@ static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn,
CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(stats_id);
qed_memcpy_from(p_hwfn, p_ptt, &pstats, pstats_addr, sizeof(pstats));
- p_stats->sent_ucast_bytes = HILO_64_REGPAIR(pstats.sent_ucast_bytes);
- p_stats->sent_mcast_bytes = HILO_64_REGPAIR(pstats.sent_mcast_bytes);
- p_stats->sent_bcast_bytes = HILO_64_REGPAIR(pstats.sent_bcast_bytes);
- p_stats->sent_ucast_pkts = HILO_64_REGPAIR(pstats.sent_ucast_pkts);
- p_stats->sent_mcast_pkts = HILO_64_REGPAIR(pstats.sent_mcast_pkts);
- p_stats->sent_bcast_pkts = HILO_64_REGPAIR(pstats.sent_bcast_pkts);
+ p_stats->sent_ucast_bytes += HILO_64_REGPAIR(pstats.sent_ucast_bytes);
+ p_stats->sent_mcast_bytes += HILO_64_REGPAIR(pstats.sent_mcast_bytes);
+ p_stats->sent_bcast_bytes += HILO_64_REGPAIR(pstats.sent_bcast_bytes);
+ p_stats->sent_ucast_pkts += HILO_64_REGPAIR(pstats.sent_ucast_pkts);
+ p_stats->sent_mcast_pkts += HILO_64_REGPAIR(pstats.sent_mcast_pkts);
+ p_stats->sent_bcast_pkts += HILO_64_REGPAIR(pstats.sent_bcast_pkts);
}
-int qed_ll2_get_stats(void *cxt,
- u8 connection_handle, struct qed_ll2_stats *p_stats)
+static int __qed_ll2_get_stats(void *cxt, u8 connection_handle,
+ struct qed_ll2_stats *p_stats)
{
struct qed_hwfn *p_hwfn = cxt;
struct qed_ll2_info *p_ll2_conn = NULL;
struct qed_ptt *p_ptt;
- memset(p_stats, 0, sizeof(*p_stats));
-
if ((connection_handle >= QED_MAX_NUM_OF_LL2_CONNECTIONS) ||
!p_hwfn->p_ll2_info)
return -EINVAL;
@@ -2181,15 +2177,26 @@ int qed_ll2_get_stats(void *cxt,
if (p_ll2_conn->input.gsi_enable)
_qed_ll2_get_port_stats(p_hwfn, p_ptt, p_stats);
+
_qed_ll2_get_tstats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
+
_qed_ll2_get_ustats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
+
if (p_ll2_conn->tx_stats_en)
_qed_ll2_get_pstats(p_hwfn, p_ptt, p_ll2_conn, p_stats);
qed_ptt_release(p_hwfn, p_ptt);
+
return 0;
}
+int qed_ll2_get_stats(void *cxt,
+ u8 connection_handle, struct qed_ll2_stats *p_stats)
+{
+ memset(p_stats, 0, sizeof(*p_stats));
+ return __qed_ll2_get_stats(cxt, connection_handle, p_stats);
+}
+
static void qed_ll2b_release_rx_packet(void *cxt,
u8 connection_handle,
void *cookie,
@@ -2216,7 +2223,7 @@ struct qed_ll2_cbs ll2_cbs = {
.tx_release_cb = &qed_ll2b_complete_tx_packet,
};
-static void qed_ll2_set_conn_data(struct qed_dev *cdev,
+static void qed_ll2_set_conn_data(struct qed_hwfn *p_hwfn,
struct qed_ll2_acquire_data *data,
struct qed_ll2_params *params,
enum qed_ll2_conn_type conn_type,
@@ -2232,7 +2239,7 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev,
data->input.tx_num_desc = QED_LL2_TX_SIZE;
data->p_connection_handle = handle;
data->cbs = &ll2_cbs;
- ll2_cbs.cookie = QED_LEADING_HWFN(cdev);
+ ll2_cbs.cookie = p_hwfn;
if (lb) {
data->input.tx_tc = PKT_LB_TC;
@@ -2243,74 +2250,102 @@ static void qed_ll2_set_conn_data(struct qed_dev *cdev,
}
}
-static int qed_ll2_start_ooo(struct qed_dev *cdev,
+static int qed_ll2_start_ooo(struct qed_hwfn *p_hwfn,
struct qed_ll2_params *params)
{
- struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
- u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
+ u8 *handle = &p_hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
struct qed_ll2_acquire_data data;
int rc;
- qed_ll2_set_conn_data(cdev, &data, params,
+ qed_ll2_set_conn_data(p_hwfn, &data, params,
QED_LL2_TYPE_OOO, handle, true);
- rc = qed_ll2_acquire_connection(hwfn, &data);
+ rc = qed_ll2_acquire_connection(p_hwfn, &data);
if (rc) {
- DP_INFO(cdev, "Failed to acquire LL2 OOO connection\n");
+ DP_INFO(p_hwfn, "Failed to acquire LL2 OOO connection\n");
goto out;
}
- rc = qed_ll2_establish_connection(hwfn, *handle);
+ rc = qed_ll2_establish_connection(p_hwfn, *handle);
if (rc) {
- DP_INFO(cdev, "Failed to establist LL2 OOO connection\n");
+ DP_INFO(p_hwfn, "Failed to establish LL2 OOO connection\n");
goto fail;
}
return 0;
fail:
- qed_ll2_release_connection(hwfn, *handle);
+ qed_ll2_release_connection(p_hwfn, *handle);
out:
*handle = QED_LL2_UNUSED_HANDLE;
return rc;
}
-static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
+static bool qed_ll2_is_storage_eng1(struct qed_dev *cdev)
{
- struct qed_ll2_buffer *buffer, *tmp_buffer;
- enum qed_ll2_conn_type conn_type;
- struct qed_ll2_acquire_data data;
- struct qed_ptt *p_ptt;
- int rc, i;
+ return (QED_IS_FCOE_PERSONALITY(QED_LEADING_HWFN(cdev)) ||
+ QED_IS_ISCSI_PERSONALITY(QED_LEADING_HWFN(cdev))) &&
+ (QED_AFFIN_HWFN(cdev) != QED_LEADING_HWFN(cdev));
+}
+static int __qed_ll2_stop(struct qed_hwfn *p_hwfn)
+{
+ struct qed_dev *cdev = p_hwfn->cdev;
+ int rc;
- /* Initialize LL2 locks & lists */
- INIT_LIST_HEAD(&cdev->ll2->list);
- spin_lock_init(&cdev->ll2->lock);
- cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN +
- L1_CACHE_BYTES + params->mtu;
+ rc = qed_ll2_terminate_connection(p_hwfn, cdev->ll2->handle);
+ if (rc)
+ DP_INFO(cdev, "Failed to terminate LL2 connection\n");
- /*Allocate memory for LL2 */
- DP_INFO(cdev, "Allocating LL2 buffers of size %08x bytes\n",
- cdev->ll2->rx_size);
- for (i = 0; i < QED_LL2_RX_SIZE; i++) {
- buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
- if (!buffer) {
- DP_INFO(cdev, "Failed to allocate LL2 buffers\n");
- goto fail;
- }
+ qed_ll2_release_connection(p_hwfn, cdev->ll2->handle);
- rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data,
- &buffer->phys_addr);
- if (rc) {
- kfree(buffer);
- goto fail;
- }
+ return rc;
+}
- list_add_tail(&buffer->list, &cdev->ll2->list);
+static int qed_ll2_stop(struct qed_dev *cdev)
+{
+ bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev);
+ struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
+ int rc = 0, rc2 = 0;
+
+ if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE)
+ return 0;
+
+ qed_llh_remove_mac_filter(cdev, 0, cdev->ll2_mac_address);
+ eth_zero_addr(cdev->ll2_mac_address);
+
+ if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
+ qed_ll2_stop_ooo(p_hwfn);
+
+ /* In CMT mode, LL2 is always started on engine 0 for a storage PF */
+ if (b_is_storage_eng1) {
+ rc2 = __qed_ll2_stop(QED_LEADING_HWFN(cdev));
+ if (rc2)
+ DP_NOTICE(QED_LEADING_HWFN(cdev),
+ "Failed to stop LL2 on engine 0\n");
}
- switch (QED_LEADING_HWFN(cdev)->hw_info.personality) {
+ rc = __qed_ll2_stop(p_hwfn);
+ if (rc)
+ DP_NOTICE(p_hwfn, "Failed to stop LL2\n");
+
+ qed_ll2_kill_buffers(cdev);
+
+ cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
+
+ return rc | rc2;
+}
+
+static int __qed_ll2_start(struct qed_hwfn *p_hwfn,
+ struct qed_ll2_params *params)
+{
+ struct qed_ll2_buffer *buffer, *tmp_buffer;
+ struct qed_dev *cdev = p_hwfn->cdev;
+ enum qed_ll2_conn_type conn_type;
+ struct qed_ll2_acquire_data data;
+ int rc, rx_cnt;
+
+ switch (p_hwfn->hw_info.personality) {
case QED_PCI_FCOE:
conn_type = QED_LL2_TYPE_FCOE;
break;
@@ -2321,33 +2356,34 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
conn_type = QED_LL2_TYPE_ROCE;
break;
default:
+
conn_type = QED_LL2_TYPE_TEST;
}
- qed_ll2_set_conn_data(cdev, &data, params, conn_type,
+ qed_ll2_set_conn_data(p_hwfn, &data, params, conn_type,
&cdev->ll2->handle, false);
- rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &data);
+ rc = qed_ll2_acquire_connection(p_hwfn, &data);
if (rc) {
- DP_INFO(cdev, "Failed to acquire LL2 connection\n");
- goto fail;
+ DP_INFO(p_hwfn, "Failed to acquire LL2 connection\n");
+ return rc;
}
- rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev),
- cdev->ll2->handle);
+ rc = qed_ll2_establish_connection(p_hwfn, cdev->ll2->handle);
if (rc) {
- DP_INFO(cdev, "Failed to establish LL2 connection\n");
- goto release_fail;
+ DP_INFO(p_hwfn, "Failed to establish LL2 connection\n");
+ goto release_conn;
}
/* Post all Rx buffers to FW */
spin_lock_bh(&cdev->ll2->lock);
+ rx_cnt = cdev->ll2->rx_cnt;
list_for_each_entry_safe(buffer, tmp_buffer, &cdev->ll2->list, list) {
- rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev),
+ rc = qed_ll2_post_rx_buffer(p_hwfn,
cdev->ll2->handle,
buffer->phys_addr, 0, buffer, 1);
if (rc) {
- DP_INFO(cdev,
+ DP_INFO(p_hwfn,
"Failed to post an Rx buffer; Deleting it\n");
dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr,
cdev->ll2->rx_size, DMA_FROM_DEVICE);
@@ -2355,100 +2391,127 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
list_del(&buffer->list);
kfree(buffer);
} else {
- cdev->ll2->rx_cnt++;
+ rx_cnt++;
}
}
spin_unlock_bh(&cdev->ll2->lock);
- if (!cdev->ll2->rx_cnt) {
- DP_INFO(cdev, "Failed passing even a single Rx buffer\n");
- goto release_terminate;
+ if (rx_cnt == cdev->ll2->rx_cnt) {
+ DP_NOTICE(p_hwfn, "Failed passing even a single Rx buffer\n");
+ goto terminate_conn;
}
+ cdev->ll2->rx_cnt = rx_cnt;
+
+ return 0;
+
+terminate_conn:
+ qed_ll2_terminate_connection(p_hwfn, cdev->ll2->handle);
+release_conn:
+ qed_ll2_release_connection(p_hwfn, cdev->ll2->handle);
+ return rc;
+}
+
+static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
+{
+ bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev);
+ struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
+ struct qed_ll2_buffer *buffer;
+ int rx_num_desc, i, rc;
if (!is_valid_ether_addr(params->ll2_mac_address)) {
- DP_INFO(cdev, "Invalid Ethernet address\n");
- goto release_terminate;
+ DP_NOTICE(cdev, "Invalid Ethernet address\n");
+ return -EINVAL;
}
- if (QED_LEADING_HWFN(cdev)->hw_info.personality == QED_PCI_ISCSI) {
- DP_VERBOSE(cdev, QED_MSG_STORAGE, "Starting OOO LL2 queue\n");
- rc = qed_ll2_start_ooo(cdev, params);
+ WARN_ON(!cdev->ll2->cbs);
+
+ /* Initialize LL2 locks & lists */
+ INIT_LIST_HEAD(&cdev->ll2->list);
+ spin_lock_init(&cdev->ll2->lock);
+
+ cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN +
+ L1_CACHE_BYTES + params->mtu;
+
+ /* Allocate memory for LL2.
+ * In CMT mode, in case of a storage PF which is affintized to engine 1,
+ * LL2 is started also on engine 0 and thus we need twofold buffers.
+ */
+ rx_num_desc = QED_LL2_RX_SIZE * (b_is_storage_eng1 ? 2 : 1);
+ DP_INFO(cdev, "Allocating %d LL2 buffers of size %08x bytes\n",
+ rx_num_desc, cdev->ll2->rx_size);
+ for (i = 0; i < rx_num_desc; i++) {
+ buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
+ if (!buffer) {
+ DP_INFO(cdev, "Failed to allocate LL2 buffers\n");
+ rc = -ENOMEM;
+ goto err0;
+ }
+
+ rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data,
+ &buffer->phys_addr);
if (rc) {
- DP_INFO(cdev,
- "Failed to initialize the OOO LL2 queue\n");
- goto release_terminate;
+ kfree(buffer);
+ goto err0;
}
- }
- p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
- if (!p_ptt) {
- DP_INFO(cdev, "Failed to acquire PTT\n");
- goto release_terminate;
+ list_add_tail(&buffer->list, &cdev->ll2->list);
}
- rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
- params->ll2_mac_address);
- qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
+ rc = __qed_ll2_start(p_hwfn, params);
if (rc) {
- DP_ERR(cdev, "Failed to allocate LLH filter\n");
- goto release_terminate_all;
+ DP_NOTICE(cdev, "Failed to start LL2\n");
+ goto err0;
}
- ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address);
- return 0;
-
-release_terminate_all:
-
-release_terminate:
- qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
-release_fail:
- qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
-fail:
- qed_ll2_kill_buffers(cdev);
- cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
- return -EINVAL;
-}
-
-static int qed_ll2_stop(struct qed_dev *cdev)
-{
- struct qed_ptt *p_ptt;
- int rc;
-
- if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE)
- return 0;
+ /* In CMT mode, always need to start LL2 on engine 0 for a storage PF,
+ * since broadcast/mutlicast packets are routed to engine 0.
+ */
+ if (b_is_storage_eng1) {
+ rc = __qed_ll2_start(QED_LEADING_HWFN(cdev), params);
+ if (rc) {
+ DP_NOTICE(QED_LEADING_HWFN(cdev),
+ "Failed to start LL2 on engine 0\n");
+ goto err1;
+ }
+ }
- p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
- if (!p_ptt) {
- DP_INFO(cdev, "Failed to acquire PTT\n");
- goto fail;
+ if (QED_IS_ISCSI_PERSONALITY(p_hwfn)) {
+ DP_VERBOSE(cdev, QED_MSG_STORAGE, "Starting OOO LL2 queue\n");
+ rc = qed_ll2_start_ooo(p_hwfn, params);
+ if (rc) {
+ DP_NOTICE(cdev, "Failed to start OOO LL2\n");
+ goto err2;
+ }
}
- qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt,
- cdev->ll2_mac_address);
- qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
- eth_zero_addr(cdev->ll2_mac_address);
+ rc = qed_llh_add_mac_filter(cdev, 0, params->ll2_mac_address);
+ if (rc) {
+ DP_NOTICE(cdev, "Failed to add an LLH filter\n");
+ goto err3;
+ }
- if (QED_LEADING_HWFN(cdev)->hw_info.personality == QED_PCI_ISCSI)
- qed_ll2_stop_ooo(cdev);
+ ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address);
- rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev),
- cdev->ll2->handle);
- if (rc)
- DP_INFO(cdev, "Failed to terminate LL2 connection\n");
+ return 0;
+err3:
+ if (QED_IS_ISCSI_PERSONALITY(p_hwfn))
+ qed_ll2_stop_ooo(p_hwfn);
+err2:
+ if (b_is_storage_eng1)
+ __qed_ll2_stop(QED_LEADING_HWFN(cdev));
+err1:
+ __qed_ll2_stop(p_hwfn);
+err0:
qed_ll2_kill_buffers(cdev);
-
- qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle);
cdev->ll2->handle = QED_LL2_UNUSED_HANDLE;
-
return rc;
-fail:
- return -EINVAL;
}
static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
unsigned long xmit_flags)
{
+ struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
struct qed_ll2_tx_pkt_info pkt;
const skb_frag_t *frag;
u8 flags = 0, nr_frags;
@@ -2506,7 +2569,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
* routine may run and free the SKB, so no dereferencing the SKB
* beyond this point unless skb has any fragments.
*/
- rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle,
+ rc = qed_ll2_prepare_tx_packet(p_hwfn, cdev->ll2->handle,
&pkt, 1);
if (rc)
goto err;
@@ -2524,13 +2587,13 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
goto err;
}
- rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev),
+ rc = qed_ll2_set_fragment_of_tx_packet(p_hwfn,
cdev->ll2->handle,
mapping,
skb_frag_size(frag));
/* if failed not much to do here, partial packet has been posted
- * we can't free memory, will need to wait for completion.
+ * we can't free memory, will need to wait for completion
*/
if (rc)
goto err2;
@@ -2540,18 +2603,37 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb,
err:
dma_unmap_single(&cdev->pdev->dev, mapping, skb->len, DMA_TO_DEVICE);
-
err2:
return rc;
}
static int qed_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats)
{
+ bool b_is_storage_eng1 = qed_ll2_is_storage_eng1(cdev);
+ struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
+ int rc;
+
if (!cdev->ll2)
return -EINVAL;
- return qed_ll2_get_stats(QED_LEADING_HWFN(cdev),
- cdev->ll2->handle, stats);
+ rc = qed_ll2_get_stats(p_hwfn, cdev->ll2->handle, stats);
+ if (rc) {
+ DP_NOTICE(p_hwfn, "Failed to get LL2 stats\n");
+ return rc;
+ }
+
+ /* In CMT mode, LL2 is always started on engine 0 for a storage PF */
+ if (b_is_storage_eng1) {
+ rc = __qed_ll2_get_stats(QED_LEADING_HWFN(cdev),
+ cdev->ll2->handle, stats);
+ if (rc) {
+ DP_NOTICE(QED_LEADING_HWFN(cdev),
+ "Failed to get LL2 stats on engine 0\n");
+ return rc;
+ }
+ }
+
+ return 0;
}
const struct qed_ll2_ops qed_ll2_ops_pass = {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 6de23b56b294..829dd60ab937 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -48,6 +48,7 @@
#include <linux/crc32.h>
#include <linux/qed/qed_if.h>
#include <linux/qed/qed_ll2_if.h>
+#include <net/devlink.h>
#include "qed.h"
#include "qed_sriov.h"
@@ -342,6 +343,107 @@ static int qed_set_power_state(struct qed_dev *cdev, pci_power_t state)
return 0;
}
+struct qed_devlink {
+ struct qed_dev *cdev;
+};
+
+enum qed_devlink_param_id {
+ QED_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+ QED_DEVLINK_PARAM_ID_IWARP_CMT,
+};
+
+static int qed_dl_param_get(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct qed_devlink *qed_dl;
+ struct qed_dev *cdev;
+
+ qed_dl = devlink_priv(dl);
+ cdev = qed_dl->cdev;
+ ctx->val.vbool = cdev->iwarp_cmt;
+
+ return 0;
+}
+
+static int qed_dl_param_set(struct devlink *dl, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct qed_devlink *qed_dl;
+ struct qed_dev *cdev;
+
+ qed_dl = devlink_priv(dl);
+ cdev = qed_dl->cdev;
+ cdev->iwarp_cmt = ctx->val.vbool;
+
+ return 0;
+}
+
+static const struct devlink_param qed_devlink_params[] = {
+ DEVLINK_PARAM_DRIVER(QED_DEVLINK_PARAM_ID_IWARP_CMT,
+ "iwarp_cmt", DEVLINK_PARAM_TYPE_BOOL,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ qed_dl_param_get, qed_dl_param_set, NULL),
+};
+
+static const struct devlink_ops qed_dl_ops;
+
+static int qed_devlink_register(struct qed_dev *cdev)
+{
+ union devlink_param_value value;
+ struct qed_devlink *qed_dl;
+ struct devlink *dl;
+ int rc;
+
+ dl = devlink_alloc(&qed_dl_ops, sizeof(*qed_dl));
+ if (!dl)
+ return -ENOMEM;
+
+ qed_dl = devlink_priv(dl);
+
+ cdev->dl = dl;
+ qed_dl->cdev = cdev;
+
+ rc = devlink_register(dl, &cdev->pdev->dev);
+ if (rc)
+ goto err_free;
+
+ rc = devlink_params_register(dl, qed_devlink_params,
+ ARRAY_SIZE(qed_devlink_params));
+ if (rc)
+ goto err_unregister;
+
+ value.vbool = false;
+ devlink_param_driverinit_value_set(dl,
+ QED_DEVLINK_PARAM_ID_IWARP_CMT,
+ value);
+
+ devlink_params_publish(dl);
+ cdev->iwarp_cmt = false;
+
+ return 0;
+
+err_unregister:
+ devlink_unregister(dl);
+
+err_free:
+ cdev->dl = NULL;
+ devlink_free(dl);
+
+ return rc;
+}
+
+static void qed_devlink_unregister(struct qed_dev *cdev)
+{
+ if (!cdev->dl)
+ return;
+
+ devlink_params_unregister(cdev->dl, qed_devlink_params,
+ ARRAY_SIZE(qed_devlink_params));
+
+ devlink_unregister(cdev->dl);
+ devlink_free(cdev->dl);
+}
+
/* probing */
static struct qed_dev *qed_probe(struct pci_dev *pdev,
struct qed_probe_params *params)
@@ -370,6 +472,12 @@ static struct qed_dev *qed_probe(struct pci_dev *pdev,
}
DP_INFO(cdev, "PCI init completed successfully\n");
+ rc = qed_devlink_register(cdev);
+ if (rc) {
+ DP_INFO(cdev, "Failed to register devlink.\n");
+ goto err2;
+ }
+
rc = qed_hw_prepare(cdev, QED_PCI_DEFAULT);
if (rc) {
DP_ERR(cdev, "hw prepare failed\n");
@@ -399,6 +507,8 @@ static void qed_remove(struct qed_dev *cdev)
qed_set_power_state(cdev, PCI_D3hot);
+ qed_devlink_unregister(cdev);
+
qed_free_cdev(cdev);
}
@@ -1301,26 +1411,21 @@ static u32 qed_sb_init(struct qed_dev *cdev,
{
struct qed_hwfn *p_hwfn;
struct qed_ptt *p_ptt;
- int hwfn_index;
u16 rel_sb_id;
- u8 n_hwfns;
u32 rc;
- /* RoCE uses single engine and CMT uses two engines. When using both
- * we force only a single engine. Storage uses only engine 0 too.
- */
- if (type == QED_SB_TYPE_L2_QUEUE)
- n_hwfns = cdev->num_hwfns;
- else
- n_hwfns = 1;
-
- hwfn_index = sb_id % n_hwfns;
- p_hwfn = &cdev->hwfns[hwfn_index];
- rel_sb_id = sb_id / n_hwfns;
+ /* RoCE/Storage use a single engine in CMT mode while L2 uses both */
+ if (type == QED_SB_TYPE_L2_QUEUE) {
+ p_hwfn = &cdev->hwfns[sb_id % cdev->num_hwfns];
+ rel_sb_id = sb_id / cdev->num_hwfns;
+ } else {
+ p_hwfn = QED_AFFIN_HWFN(cdev);
+ rel_sb_id = sb_id;
+ }
DP_VERBOSE(cdev, NETIF_MSG_INTR,
"hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n",
- hwfn_index, rel_sb_id, sb_id);
+ IS_LEAD_HWFN(p_hwfn) ? 0 : 1, rel_sb_id, sb_id);
if (IS_PF(p_hwfn->cdev)) {
p_ptt = qed_ptt_acquire(p_hwfn);
@@ -1339,20 +1444,26 @@ static u32 qed_sb_init(struct qed_dev *cdev,
}
static u32 qed_sb_release(struct qed_dev *cdev,
- struct qed_sb_info *sb_info, u16 sb_id)
+ struct qed_sb_info *sb_info,
+ u16 sb_id,
+ enum qed_sb_type type)
{
struct qed_hwfn *p_hwfn;
- int hwfn_index;
u16 rel_sb_id;
u32 rc;
- hwfn_index = sb_id % cdev->num_hwfns;
- p_hwfn = &cdev->hwfns[hwfn_index];
- rel_sb_id = sb_id / cdev->num_hwfns;
+ /* RoCE/Storage use a single engine in CMT mode while L2 uses both */
+ if (type == QED_SB_TYPE_L2_QUEUE) {
+ p_hwfn = &cdev->hwfns[sb_id % cdev->num_hwfns];
+ rel_sb_id = sb_id / cdev->num_hwfns;
+ } else {
+ p_hwfn = QED_AFFIN_HWFN(cdev);
+ rel_sb_id = sb_id;
+ }
DP_VERBOSE(cdev, NETIF_MSG_INTR,
"hwfn [%d] <--[init]-- SB %04x [0x%04x upper]\n",
- hwfn_index, rel_sb_id, sb_id);
+ IS_LEAD_HWFN(p_hwfn) ? 0 : 1, rel_sb_id, sb_id);
rc = qed_int_sb_release(p_hwfn, sb_info, rel_sb_id);
@@ -2372,6 +2483,11 @@ static int qed_read_module_eeprom(struct qed_dev *cdev, char *buf,
return rc;
}
+static u8 qed_get_affin_hwfn_idx(struct qed_dev *cdev)
+{
+ return QED_AFFIN_HWFN_IDX(cdev);
+}
+
static struct qed_selftest_ops qed_selftest_ops_pass = {
.selftest_memory = &qed_selftest_memory,
.selftest_interrupt = &qed_selftest_interrupt,
@@ -2419,6 +2535,7 @@ const struct qed_common_ops qed_common_ops_pass = {
.db_recovery_add = &qed_db_recovery_add,
.db_recovery_del = &qed_db_recovery_del,
.read_module_eeprom = &qed_read_module_eeprom,
+ .get_affin_hwfn_idx = &qed_get_affin_hwfn_idx,
};
void qed_get_protocol_stats(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index cc27fd60d689..758702c1ce9c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -3685,3 +3685,68 @@ int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_FEATURE_SUPPORT,
features, &mcp_resp, &mcp_param);
}
+
+int qed_mcp_get_engine_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+ struct qed_mcp_mb_params mb_params = {0};
+ struct qed_dev *cdev = p_hwfn->cdev;
+ u8 fir_valid, l2_valid;
+ int rc;
+
+ mb_params.cmd = DRV_MSG_CODE_GET_ENGINE_CONFIG;
+ rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+ if (rc)
+ return rc;
+
+ if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+ DP_INFO(p_hwfn,
+ "The get_engine_config command is unsupported by the MFW\n");
+ return -EOPNOTSUPP;
+ }
+
+ fir_valid = QED_MFW_GET_FIELD(mb_params.mcp_param,
+ FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID);
+ if (fir_valid)
+ cdev->fir_affin =
+ QED_MFW_GET_FIELD(mb_params.mcp_param,
+ FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE);
+
+ l2_valid = QED_MFW_GET_FIELD(mb_params.mcp_param,
+ FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID);
+ if (l2_valid)
+ cdev->l2_affin_hint =
+ QED_MFW_GET_FIELD(mb_params.mcp_param,
+ FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE);
+
+ DP_INFO(p_hwfn,
+ "Engine affinity config: FIR={valid %hhd, value %hhd}, L2_hint={valid %hhd, value %hhd}\n",
+ fir_valid, cdev->fir_affin, l2_valid, cdev->l2_affin_hint);
+
+ return 0;
+}
+
+int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+ struct qed_mcp_mb_params mb_params = {0};
+ struct qed_dev *cdev = p_hwfn->cdev;
+ int rc;
+
+ mb_params.cmd = DRV_MSG_CODE_GET_PPFID_BITMAP;
+ rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+ if (rc)
+ return rc;
+
+ if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+ DP_INFO(p_hwfn,
+ "The get_ppfid_bitmap command is unsupported by the MFW\n");
+ return -EOPNOTSUPP;
+ }
+
+ cdev->ppfid_bitmap = QED_MFW_GET_FIELD(mb_params.mcp_param,
+ FW_MB_PARAM_PPFID_BITMAP);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_SP, "PPFID bitmap 0x%hhx\n",
+ cdev->ppfid_bitmap);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 261c1a392e2c..e4f8fe4bd062 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -1186,4 +1186,20 @@ void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
*/
int qed_mcp_nvm_info_populate(struct qed_hwfn *p_hwfn);
+/**
+ * @brief Get the engine affinity configuration.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_mcp_get_engine_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
+/**
+ * @brief Get the PPFID bitmap.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+int qed_mcp_get_ppfid_bitmap(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
+
#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ptp.c b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
index 1302b308bd87..f3ebdc5e8f85 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ptp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ptp.c
@@ -157,7 +157,8 @@ static int qed_ptp_hw_read_tx_ts(struct qed_dev *cdev, u64 *timestamp)
*timestamp = 0;
val = qed_rd(p_hwfn, p_ptt, NIG_REG_TX_LLH_PTP_BUF_SEQID);
if (!(val & QED_TIMESTAMP_MASK)) {
- DP_INFO(p_hwfn, "Invalid Tx timestamp, buf_seqid = %d\n", val);
+ DP_VERBOSE(p_hwfn, QED_MSG_DEBUG,
+ "Invalid Tx timestamp, buf_seqid = %08x\n", val);
return -EINVAL;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 7873d6dfd91f..f900fde448db 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -700,7 +700,7 @@ static int qed_rdma_setup(struct qed_hwfn *p_hwfn,
return rc;
if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
- rc = qed_iwarp_setup(p_hwfn, p_ptt, params);
+ rc = qed_iwarp_setup(p_hwfn, params);
if (rc)
return rc;
} else {
@@ -742,7 +742,7 @@ static int qed_rdma_stop(void *rdma_cxt)
(ll2_ethertype_en & 0xFFFE));
if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
- rc = qed_iwarp_stop(p_hwfn, p_ptt);
+ rc = qed_iwarp_stop(p_hwfn);
if (rc) {
qed_ptt_release(p_hwfn, p_ptt);
return rc;
@@ -803,7 +803,7 @@ static int qed_rdma_add_user(void *rdma_cxt,
dpi_start_offset +
((out_params->dpi) * p_hwfn->dpi_size));
- out_params->dpi_phys_addr = p_hwfn->cdev->db_phys_addr +
+ out_params->dpi_phys_addr = p_hwfn->db_phys_addr +
dpi_start_offset +
((out_params->dpi) * p_hwfn->dpi_size);
@@ -818,14 +818,17 @@ static struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt)
{
struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
struct qed_rdma_port *p_port = p_hwfn->p_rdma_info->port;
+ struct qed_mcp_link_state *p_link_output;
DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA Query port\n");
- /* Link may have changed */
- p_port->port_state = p_hwfn->mcp_info->link_output.link_up ?
- QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN;
+ /* The link state is saved only for the leading hwfn */
+ p_link_output = &QED_LEADING_HWFN(p_hwfn->cdev)->mcp_info->link_output;
- p_port->link_speed = p_hwfn->mcp_info->link_output.speed;
+ p_port->port_state = p_link_output->link_up ? QED_RDMA_PORT_UP
+ : QED_RDMA_PORT_DOWN;
+
+ p_port->link_speed = p_link_output->speed;
p_port->max_msg_size = RDMA_MAX_DATA_SIZE_IN_WQE;
@@ -870,7 +873,7 @@ static void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod)
static int qed_fill_rdma_dev_info(struct qed_dev *cdev,
struct qed_dev_rdma_info *info)
{
- struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+ struct qed_hwfn *p_hwfn = QED_AFFIN_HWFN(cdev);
memset(info, 0, sizeof(*info));
@@ -889,9 +892,9 @@ static int qed_rdma_get_sb_start(struct qed_dev *cdev)
int feat_num;
if (cdev->num_hwfns > 1)
- feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE);
+ feat_num = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_PF_L2_QUE);
else
- feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE) *
+ feat_num = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_PF_L2_QUE) *
cdev->num_hwfns;
return feat_num;
@@ -899,7 +902,7 @@ static int qed_rdma_get_sb_start(struct qed_dev *cdev)
static int qed_rdma_get_min_cnq_msix(struct qed_dev *cdev)
{
- int n_cnq = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_RDMA_CNQ);
+ int n_cnq = FEAT_NUM(QED_AFFIN_HWFN(cdev), QED_RDMA_CNQ);
int n_msix = cdev->int_params.rdma_msix_cnt;
return min_t(int, n_cnq, n_msix);
@@ -1653,7 +1656,7 @@ static int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid)
static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev)
{
- return QED_LEADING_HWFN(cdev);
+ return QED_AFFIN_HWFN(cdev);
}
static int qed_rdma_modify_srq(void *rdma_cxt,
@@ -1881,7 +1884,7 @@ err:
static int qed_rdma_init(struct qed_dev *cdev,
struct qed_rdma_start_in_params *params)
{
- return qed_rdma_start(QED_LEADING_HWFN(cdev), params);
+ return qed_rdma_start(QED_AFFIN_HWFN(cdev), params);
}
static void qed_rdma_remove_user(void *rdma_cxt, u16 dpi)
@@ -1899,23 +1902,12 @@ static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev,
u8 *old_mac_address,
u8 *new_mac_address)
{
- struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
- struct qed_ptt *p_ptt;
int rc = 0;
- p_ptt = qed_ptt_acquire(p_hwfn);
- if (!p_ptt) {
- DP_ERR(cdev,
- "qed roce ll2 mac filter set: failed to acquire PTT\n");
- return -EINVAL;
- }
-
if (old_mac_address)
- qed_llh_remove_mac_filter(p_hwfn, p_ptt, old_mac_address);
+ qed_llh_remove_mac_filter(cdev, 0, old_mac_address);
if (new_mac_address)
- rc = qed_llh_add_mac_filter(p_hwfn, p_ptt, new_mac_address);
-
- qed_ptt_release(p_hwfn, p_ptt);
+ rc = qed_llh_add_mac_filter(cdev, 0, new_mac_address);
if (rc)
DP_ERR(cdev,
@@ -1924,6 +1916,36 @@ static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev,
return rc;
}
+static int qed_iwarp_set_engine_affin(struct qed_dev *cdev, bool b_reset)
+{
+ enum qed_eng eng;
+ u8 ppfid = 0;
+ int rc;
+
+ /* Make sure iwarp cmt mode is enabled before setting affinity */
+ if (!cdev->iwarp_cmt)
+ return -EINVAL;
+
+ if (b_reset)
+ eng = QED_BOTH_ENG;
+ else
+ eng = cdev->l2_affin_hint ? QED_ENG1 : QED_ENG0;
+
+ rc = qed_llh_set_ppfid_affinity(cdev, ppfid, eng);
+ if (rc) {
+ DP_NOTICE(cdev,
+ "Failed to set the engine affinity of ppfid %d\n",
+ ppfid);
+ return rc;
+ }
+
+ DP_VERBOSE(cdev, (QED_MSG_RDMA | QED_MSG_SP),
+ "LLH: Set the engine affinity of non-RoCE packets as %d\n",
+ eng);
+
+ return 0;
+}
+
static const struct qed_rdma_ops qed_rdma_ops_pass = {
.common = &qed_common_ops_pass,
.fill_dev_info = &qed_fill_rdma_dev_info,
@@ -1963,6 +1985,7 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = {
.ll2_set_fragment_of_tx_packet = &qed_ll2_set_fragment_of_tx_packet,
.ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter,
.ll2_get_stats = &qed_ll2_get_stats,
+ .iwarp_set_engine_affin = &qed_iwarp_set_engine_affin,
.iwarp_connect = &qed_iwarp_connect,
.iwarp_create_listen = &qed_iwarp_create_listen,
.iwarp_destroy_listen = &qed_iwarp_destroy_listen,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index 5ce825ca5f24..60f850c3bdd6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -254,6 +254,10 @@
0x500840UL
#define NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR \
0x50196cUL
+#define NIG_REG_LLH_PPFID2PFID_TBL_0 \
+ 0x501970UL
+#define NIG_REG_LLH_ENG_CLS_ROCE_QP_SEL \
+ 0x50
#define NIG_REG_LLH_CLS_TYPE_DUALMODE \
0x501964UL
#define NIG_REG_LLH_FUNC_TAG_EN 0x5019b0UL
@@ -1626,6 +1630,8 @@
#define PHY_PCIE_REG_PHY1_K2_E5 \
0x624000UL
#define NIG_REG_ROCE_DUPLICATE_TO_HOST 0x5088f0UL
+#define NIG_REG_PPF_TO_ENGINE_SEL 0x508900UL
+#define NIG_REG_PPF_TO_ENGINE_SEL_SIZE 8
#define PRS_REG_LIGHT_L2_ETHERTYPE_EN 0x1f0968UL
#define NIG_REG_LLH_ENG_CLS_ENG_ID_TBL 0x501b90UL
#define DORQ_REG_PF_DPM_ENABLE 0x100510UL
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index 2f318aaf2b05..78f77b712b10 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -917,10 +917,11 @@ static u8 qed_iov_alloc_vf_igu_sbs(struct qed_hwfn *p_hwfn,
/* Configure igu sb in CAU which were marked valid */
qed_init_cau_sb_entry(p_hwfn, &sb_entry,
p_hwfn->rel_pf_id, vf->abs_vf_id, 1);
+
qed_dmae_host2grc(p_hwfn, p_ptt,
(u64)(uintptr_t)&sb_entry,
CAU_REG_SB_VAR_MEMORY +
- p_block->igu_sb_id * sizeof(u64), 2, 0);
+ p_block->igu_sb_id * sizeof(u64), 2, NULL);
}
vf->num_sbs = (u8) num_rx_queues;
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 92fe226980fd..b972ab07c18b 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -92,6 +92,7 @@ struct qede_stats_common {
u64 non_coalesced_pkts;
u64 coalesced_bytes;
u64 link_change_count;
+ u64 ptp_skip_txts;
/* port */
u64 rx_64_byte_packets;
@@ -189,6 +190,7 @@ struct qede_dev {
const struct qed_eth_ops *ops;
struct qede_ptp *ptp;
+ u64 ptp_skip_txts;
struct qed_dev_eth_info dev_info;
#define QEDE_MAX_RSS_CNT(edev) ((edev)->dev_info.num_queues)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 8911a97ab0ca..e85f9fef930c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -174,6 +174,7 @@ static const struct {
QEDE_STAT(coalesced_bytes),
QEDE_STAT(link_change_count),
+ QEDE_STAT(ptp_skip_txts),
};
#define QEDE_NUM_STATS ARRAY_SIZE(qede_stats_arr)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 02a97c659e29..741377b7c00d 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -390,6 +390,7 @@ void qede_fill_by_demand_stats(struct qede_dev *edev)
p_common->brb_discards = stats.common.brb_discards;
p_common->tx_mac_ctrl_frames = stats.common.tx_mac_ctrl_frames;
p_common->link_change_count = stats.common.link_change_count;
+ p_common->ptp_skip_txts = edev->ptp_skip_txts;
if (QEDE_IS_BB(edev)) {
struct qede_stats_bb *p_bb = &edev->stats.bb;
@@ -1306,7 +1307,8 @@ static void qede_free_mem_sb(struct qede_dev *edev, struct qed_sb_info *sb_info,
u16 sb_id)
{
if (sb_info->sb_virt) {
- edev->ops->common->sb_release(edev->cdev, sb_info, sb_id);
+ edev->ops->common->sb_release(edev->cdev, sb_info, sb_id,
+ QED_SB_TYPE_L2_QUEUE);
dma_free_coherent(&edev->pdev->dev, sizeof(*sb_info->sb_virt),
(void *)sb_info->sb_virt, sb_info->sb_phys);
memset(sb_info, 0, sizeof(*sb_info));
@@ -2231,6 +2233,8 @@ out:
if (mode != QEDE_UNLOAD_RECOVERY)
DP_NOTICE(edev, "Link is down\n");
+ edev->ptp_skip_txts = 0;
+
DP_INFO(edev, "Ending qede unload\n");
}
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
index bddb2b5982dc..f815435cf106 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
@@ -30,6 +30,7 @@
* SOFTWARE.
*/
#include "qede_ptp.h"
+#define QEDE_PTP_TX_TIMEOUT (2 * HZ)
struct qede_ptp {
const struct qed_eth_ptp_ops *ops;
@@ -38,6 +39,7 @@ struct qede_ptp {
struct timecounter tc;
struct ptp_clock *clock;
struct work_struct work;
+ unsigned long ptp_tx_start;
struct qede_dev *edev;
struct sk_buff *tx_skb;
@@ -160,18 +162,30 @@ static void qede_ptp_task(struct work_struct *work)
struct qede_dev *edev;
struct qede_ptp *ptp;
u64 timestamp, ns;
+ bool timedout;
int rc;
ptp = container_of(work, struct qede_ptp, work);
edev = ptp->edev;
+ timedout = time_is_before_jiffies(ptp->ptp_tx_start +
+ QEDE_PTP_TX_TIMEOUT);
/* Read Tx timestamp registers */
spin_lock_bh(&ptp->lock);
rc = ptp->ops->read_tx_ts(edev->cdev, &timestamp);
spin_unlock_bh(&ptp->lock);
if (rc) {
- /* Reschedule to keep checking for a valid timestamp value */
- schedule_work(&ptp->work);
+ if (unlikely(timedout)) {
+ DP_INFO(edev, "Tx timestamp is not recorded\n");
+ dev_kfree_skb_any(ptp->tx_skb);
+ ptp->tx_skb = NULL;
+ clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS,
+ &edev->flags);
+ edev->ptp_skip_txts++;
+ } else {
+ /* Reschedule to keep checking for a valid TS value */
+ schedule_work(&ptp->work);
+ }
return;
}
@@ -514,19 +528,28 @@ void qede_ptp_tx_ts(struct qede_dev *edev, struct sk_buff *skb)
if (!ptp)
return;
- if (test_and_set_bit_lock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags))
+ if (test_and_set_bit_lock(QEDE_FLAGS_PTP_TX_IN_PRORGESS,
+ &edev->flags)) {
+ DP_ERR(edev, "Timestamping in progress\n");
+ edev->ptp_skip_txts++;
return;
+ }
if (unlikely(!test_bit(QEDE_FLAGS_TX_TIMESTAMPING_EN, &edev->flags))) {
- DP_NOTICE(edev,
- "Tx timestamping was not enabled, this packet will not be timestamped\n");
+ DP_ERR(edev,
+ "Tx timestamping was not enabled, this packet will not be timestamped\n");
+ clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags);
+ edev->ptp_skip_txts++;
} else if (unlikely(ptp->tx_skb)) {
- DP_NOTICE(edev,
- "The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n");
+ DP_ERR(edev,
+ "The device supports only a single outstanding packet to timestamp, this packet will not be timestamped\n");
+ clear_bit_unlock(QEDE_FLAGS_PTP_TX_IN_PRORGESS, &edev->flags);
+ edev->ptp_skip_txts++;
} else {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
/* schedule check for Tx timestamp */
ptp->tx_skb = skb_get(skb);
+ ptp->ptp_tx_start = jiffies;
schedule_work(&ptp->work);
}
}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 884f1f52dcc2..991d7e285736 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -12,6 +12,7 @@
#ifndef _RMNET_MAP_H_
#define _RMNET_MAP_H_
+#include <linux/if_rmnet.h>
struct rmnet_map_control_command {
u8 command_name;
@@ -39,30 +40,6 @@ enum rmnet_map_commands {
RMNET_MAP_COMMAND_ENUM_LENGTH
};
-struct rmnet_map_header {
- u8 pad_len:6;
- u8 reserved_bit:1;
- u8 cd_bit:1;
- u8 mux_id;
- __be16 pkt_len;
-} __aligned(1);
-
-struct rmnet_map_dl_csum_trailer {
- u8 reserved1;
- u8 valid:1;
- u8 reserved2:7;
- u16 csum_start_offset;
- u16 csum_length;
- __be16 csum_value;
-} __aligned(1);
-
-struct rmnet_map_ul_csum_header {
- __be16 csum_start_offset;
- u16 csum_insert_offset:14;
- u16 udp_ip4_ind:1;
- u16 csum_enabled:1;
-} __aligned(1);
-
#define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \
(Y)->data)->mux_id)
#define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index d06a61f00e78..962ef3d8c507 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -81,7 +81,7 @@ static const int multicast_filter_limit = 32;
#define RTL_R32(tp, reg) readl(tp->mmio_addr + (reg))
enum mac_version {
- RTL_GIGA_MAC_VER_01 = 0,
+ /* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
RTL_GIGA_MAC_VER_02,
RTL_GIGA_MAC_VER_03,
RTL_GIGA_MAC_VER_04,
@@ -132,7 +132,7 @@ enum mac_version {
RTL_GIGA_MAC_VER_49,
RTL_GIGA_MAC_VER_50,
RTL_GIGA_MAC_VER_51,
- RTL_GIGA_MAC_NONE = 0xff,
+ RTL_GIGA_MAC_NONE
};
#define JUMBO_1K ETH_DATA_LEN
@@ -146,7 +146,6 @@ static const struct {
const char *fw_name;
} rtl_chip_infos[] = {
/* PCI devices. */
- [RTL_GIGA_MAC_VER_01] = {"RTL8169" },
[RTL_GIGA_MAC_VER_02] = {"RTL8169s" },
[RTL_GIGA_MAC_VER_03] = {"RTL8110s" },
[RTL_GIGA_MAC_VER_04] = {"RTL8169sb/8110sb" },
@@ -406,8 +405,6 @@ enum rtl_register_content {
RxOK = 0x0001,
/* RxStatusDesc */
- RxBOVF = (1 << 24),
- RxFOVF = (1 << 23),
RxRWT = (1 << 22),
RxRES = (1 << 21),
RxRUNT = (1 << 20),
@@ -503,9 +500,6 @@ enum rtl_register_content {
LinkStatus = 0x02,
FullDup = 0x01,
- /* _TBICSRBit */
- TBILinkOK = 0x02000000,
-
/* ResetCounterCommand */
CounterReset = 0x1,
@@ -632,6 +626,10 @@ struct rtl8169_stats {
struct u64_stats_sync syncp;
};
+struct rtl8169_private;
+typedef void (*rtl_fw_write_t)(struct rtl8169_private *tp, int reg, int val);
+typedef int (*rtl_fw_read_t)(struct rtl8169_private *tp, int reg);
+
struct rtl8169_private {
void __iomem *mmio_addr; /* memory map physical address */
struct pci_dev *pci_dev;
@@ -639,7 +637,7 @@ struct rtl8169_private {
struct phy_device *phydev;
struct napi_struct napi;
u32 msg_enable;
- u16 mac_version;
+ enum mac_version mac_version;
u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
u32 dirty_tx;
@@ -685,6 +683,10 @@ struct rtl8169_private {
const char *fw_name;
struct rtl_fw {
+ rtl_fw_write_t phy_write;
+ rtl_fw_read_t phy_read;
+ rtl_fw_write_t mac_mcu_write;
+ rtl_fw_read_t mac_mcu_read;
const struct firmware *fw;
#define RTL_VER_SIZE 32
@@ -1015,7 +1017,7 @@ static int r8168dp_2_mdio_read(struct rtl8169_private *tp, int reg)
return value;
}
-static void rtl_writephy(struct rtl8169_private *tp, int location, u32 val)
+static void rtl_writephy(struct rtl8169_private *tp, int location, int val)
{
tp->mdio_ops.write(tp, location, val);
}
@@ -1424,7 +1426,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
}
switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_17:
+ case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_17:
options = RTL_R8(tp, Config1) & ~PMEnable;
if (wolopts)
options |= PMEnable;
@@ -2252,7 +2254,6 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp)
{ 0xfc8, 0x100, RTL_GIGA_MAC_VER_04 },
{ 0xfc8, 0x040, RTL_GIGA_MAC_VER_03 },
{ 0xfc8, 0x008, RTL_GIGA_MAC_VER_02 },
- { 0xfc8, 0x000, RTL_GIGA_MAC_VER_01 },
/* Catch-all */
{ 0x000, 0x000, RTL_GIGA_MAC_NONE }
@@ -2321,50 +2322,45 @@ static bool rtl_fw_format_ok(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
const struct firmware *fw = rtl_fw->fw;
struct fw_info *fw_info = (struct fw_info *)fw->data;
struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
- char *version = rtl_fw->version;
- bool rc = false;
if (fw->size < FW_OPCODE_SIZE)
- goto out;
+ return false;
if (!fw_info->magic) {
size_t i, size, start;
u8 checksum = 0;
if (fw->size < sizeof(*fw_info))
- goto out;
+ return false;
for (i = 0; i < fw->size; i++)
checksum += fw->data[i];
if (checksum != 0)
- goto out;
+ return false;
start = le32_to_cpu(fw_info->fw_start);
if (start > fw->size)
- goto out;
+ return false;
size = le32_to_cpu(fw_info->fw_len);
if (size > (fw->size - start) / FW_OPCODE_SIZE)
- goto out;
+ return false;
- memcpy(version, fw_info->version, RTL_VER_SIZE);
+ strscpy(rtl_fw->version, fw_info->version, RTL_VER_SIZE);
pa->code = (__le32 *)(fw->data + start);
pa->size = size;
} else {
if (fw->size % FW_OPCODE_SIZE)
- goto out;
+ return false;
- strlcpy(version, tp->fw_name, RTL_VER_SIZE);
+ strscpy(rtl_fw->version, tp->fw_name, RTL_VER_SIZE);
pa->code = (__le32 *)fw->data;
pa->size = fw->size / FW_OPCODE_SIZE;
}
- version[RTL_VER_SIZE - 1] = 0;
- rc = true;
-out:
- return rc;
+ return true;
}
static bool rtl_fw_data_ok(struct rtl8169_private *tp, struct net_device *dev,
@@ -2439,17 +2435,15 @@ out:
return rc;
}
-static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
+static void rtl_fw_write_firmware(struct rtl8169_private *tp,
+ struct rtl_fw *rtl_fw)
{
struct rtl_fw_phy_action *pa = &rtl_fw->phy_action;
- struct mdio_ops org, *ops = &tp->mdio_ops;
- u32 predata, count;
+ rtl_fw_write_t fw_write = rtl_fw->phy_write;
+ rtl_fw_read_t fw_read = rtl_fw->phy_read;
+ int predata = 0, count = 0;
size_t index;
- predata = count = 0;
- org.write = ops->write;
- org.read = ops->read;
-
for (index = 0; index < pa->size; ) {
u32 action = le32_to_cpu(pa->code[index]);
u32 data = action & 0x0000ffff;
@@ -2460,7 +2454,7 @@ static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
switch(action & 0xf0000000) {
case PHY_READ:
- predata = rtl_readphy(tp, regno);
+ predata = fw_read(tp, regno);
count++;
index++;
break;
@@ -2477,11 +2471,11 @@ static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
break;
case PHY_MDIO_CHG:
if (data == 0) {
- ops->write = org.write;
- ops->read = org.read;
+ fw_write = rtl_fw->phy_write;
+ fw_read = rtl_fw->phy_read;
} else if (data == 1) {
- ops->write = mac_mcu_write;
- ops->read = mac_mcu_read;
+ fw_write = rtl_fw->mac_mcu_write;
+ fw_read = rtl_fw->mac_mcu_read;
}
index++;
@@ -2491,7 +2485,7 @@ static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
index++;
break;
case PHY_WRITE:
- rtl_writephy(tp, regno, data);
+ fw_write(tp, regno, data);
index++;
break;
case PHY_READCOUNT_EQ_SKIP:
@@ -2508,7 +2502,7 @@ static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
index++;
break;
case PHY_WRITE_PREVIOUS:
- rtl_writephy(tp, regno, predata);
+ fw_write(tp, regno, predata);
index++;
break;
case PHY_SKIPN:
@@ -2523,9 +2517,6 @@ static void rtl_phy_write_fw(struct rtl8169_private *tp, struct rtl_fw *rtl_fw)
BUG();
}
}
-
- ops->write = org.write;
- ops->read = org.read;
}
static void rtl_release_firmware(struct rtl8169_private *tp)
@@ -2539,9 +2530,9 @@ static void rtl_release_firmware(struct rtl8169_private *tp)
static void rtl_apply_firmware(struct rtl8169_private *tp)
{
- /* TODO: release firmware once rtl_phy_write_fw signals failures. */
+ /* TODO: release firmware if rtl_fw_write_firmware signals failure. */
if (tp->rtl_fw)
- rtl_phy_write_fw(tp, tp->rtl_fw);
+ rtl_fw_write_firmware(tp, tp->rtl_fw);
}
static void rtl_apply_firmware_cond(struct rtl8169_private *tp, u8 reg, u16 val)
@@ -2910,50 +2901,59 @@ static void rtl8168c_4_hw_phy_config(struct rtl8169_private *tp)
rtl8168c_3_hw_phy_config(tp);
}
-static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
-{
- static const struct phy_reg phy_reg_init_0[] = {
- /* Channel Estimation */
- { 0x1f, 0x0001 },
- { 0x06, 0x4064 },
- { 0x07, 0x2863 },
- { 0x08, 0x059c },
- { 0x09, 0x26b4 },
- { 0x0a, 0x6a19 },
- { 0x0b, 0xdcc8 },
- { 0x10, 0xf06d },
- { 0x14, 0x7f68 },
- { 0x18, 0x7fd9 },
- { 0x1c, 0xf0ff },
- { 0x1d, 0x3d9c },
- { 0x1f, 0x0003 },
- { 0x12, 0xf49f },
- { 0x13, 0x070b },
- { 0x1a, 0x05ad },
- { 0x14, 0x94c0 },
+static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = {
+ /* Channel Estimation */
+ { 0x1f, 0x0001 },
+ { 0x06, 0x4064 },
+ { 0x07, 0x2863 },
+ { 0x08, 0x059c },
+ { 0x09, 0x26b4 },
+ { 0x0a, 0x6a19 },
+ { 0x0b, 0xdcc8 },
+ { 0x10, 0xf06d },
+ { 0x14, 0x7f68 },
+ { 0x18, 0x7fd9 },
+ { 0x1c, 0xf0ff },
+ { 0x1d, 0x3d9c },
+ { 0x1f, 0x0003 },
+ { 0x12, 0xf49f },
+ { 0x13, 0x070b },
+ { 0x1a, 0x05ad },
+ { 0x14, 0x94c0 },
- /*
- * Tx Error Issue
- * Enhance line driver power
- */
- { 0x1f, 0x0002 },
- { 0x06, 0x5561 },
- { 0x1f, 0x0005 },
- { 0x05, 0x8332 },
- { 0x06, 0x5561 },
+ /*
+ * Tx Error Issue
+ * Enhance line driver power
+ */
+ { 0x1f, 0x0002 },
+ { 0x06, 0x5561 },
+ { 0x1f, 0x0005 },
+ { 0x05, 0x8332 },
+ { 0x06, 0x5561 },
- /*
- * Can not link to 1Gbps with bad cable
- * Decrease SNR threshold form 21.07dB to 19.04dB
- */
- { 0x1f, 0x0001 },
- { 0x17, 0x0cc0 },
+ /*
+ * Can not link to 1Gbps with bad cable
+ * Decrease SNR threshold form 21.07dB to 19.04dB
+ */
+ { 0x1f, 0x0001 },
+ { 0x17, 0x0cc0 },
- { 0x1f, 0x0000 },
- { 0x0d, 0xf880 }
- };
+ { 0x1f, 0x0000 },
+ { 0x0d, 0xf880 }
+};
- rtl_writephy_batch(tp, phy_reg_init_0);
+static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = {
+ { 0x1f, 0x0002 },
+ { 0x05, 0x669a },
+ { 0x1f, 0x0005 },
+ { 0x05, 0x8330 },
+ { 0x06, 0x669a },
+ { 0x1f, 0x0002 }
+};
+
+static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
+{
+ rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0);
/*
* Rx Error Issue
@@ -2964,17 +2964,9 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
rtl_w0w1_phy(tp, 0x0c, 0xa200, 0x5d00);
if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
- static const struct phy_reg phy_reg_init[] = {
- { 0x1f, 0x0002 },
- { 0x05, 0x669a },
- { 0x1f, 0x0005 },
- { 0x05, 0x8330 },
- { 0x06, 0x669a },
- { 0x1f, 0x0002 }
- };
int val;
- rtl_writephy_batch(tp, phy_reg_init);
+ rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1);
val = rtl_readphy(tp, 0x0d);
@@ -3023,62 +3015,12 @@ static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp)
{
- static const struct phy_reg phy_reg_init_0[] = {
- /* Channel Estimation */
- { 0x1f, 0x0001 },
- { 0x06, 0x4064 },
- { 0x07, 0x2863 },
- { 0x08, 0x059c },
- { 0x09, 0x26b4 },
- { 0x0a, 0x6a19 },
- { 0x0b, 0xdcc8 },
- { 0x10, 0xf06d },
- { 0x14, 0x7f68 },
- { 0x18, 0x7fd9 },
- { 0x1c, 0xf0ff },
- { 0x1d, 0x3d9c },
- { 0x1f, 0x0003 },
- { 0x12, 0xf49f },
- { 0x13, 0x070b },
- { 0x1a, 0x05ad },
- { 0x14, 0x94c0 },
-
- /*
- * Tx Error Issue
- * Enhance line driver power
- */
- { 0x1f, 0x0002 },
- { 0x06, 0x5561 },
- { 0x1f, 0x0005 },
- { 0x05, 0x8332 },
- { 0x06, 0x5561 },
-
- /*
- * Can not link to 1Gbps with bad cable
- * Decrease SNR threshold form 21.07dB to 19.04dB
- */
- { 0x1f, 0x0001 },
- { 0x17, 0x0cc0 },
-
- { 0x1f, 0x0000 },
- { 0x0d, 0xf880 }
- };
-
- rtl_writephy_batch(tp, phy_reg_init_0);
+ rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0);
if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
- static const struct phy_reg phy_reg_init[] = {
- { 0x1f, 0x0002 },
- { 0x05, 0x669a },
- { 0x1f, 0x0005 },
- { 0x05, 0x8330 },
- { 0x06, 0x669a },
-
- { 0x1f, 0x0002 }
- };
int val;
- rtl_writephy_batch(tp, phy_reg_init);
+ rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1);
val = rtl_readphy(tp, 0x0d);
if ((val & 0x00ff) != 0x006c) {
@@ -3985,7 +3927,6 @@ static void rtl_hw_phy_config(struct net_device *dev)
{
static const rtl_generic_fct phy_configs[] = {
/* PCI devices. */
- [RTL_GIGA_MAC_VER_01] = NULL,
[RTL_GIGA_MAC_VER_02] = rtl8169s_hw_phy_config,
[RTL_GIGA_MAC_VER_03] = rtl8169s_hw_phy_config,
[RTL_GIGA_MAC_VER_04] = rtl8169sb_hw_phy_config,
@@ -4050,12 +3991,6 @@ static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag)
schedule_work(&tp->wk.work);
}
-static bool rtl_tbi_enabled(struct rtl8169_private *tp)
-{
- return (tp->mac_version == RTL_GIGA_MAC_VER_01) &&
- (RTL_R8(tp, PHYstatus) & TBI_Enable);
-}
-
static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
{
rtl_hw_phy_config(dev);
@@ -4168,7 +4103,7 @@ static void rtl_wol_suspend_quirk(struct rtl8169_private *tp)
}
}
-static void r8168_pll_power_down(struct rtl8169_private *tp)
+static void rtl_pll_power_down(struct rtl8169_private *tp)
{
if (r8168_check_dash(tp))
return;
@@ -4203,10 +4138,12 @@ static void r8168_pll_power_down(struct rtl8169_private *tp)
rtl_eri_clear_bits(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000);
RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
break;
+ default:
+ break;
}
}
-static void r8168_pll_power_up(struct rtl8169_private *tp)
+static void rtl_pll_power_up(struct rtl8169_private *tp)
{
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_33:
@@ -4230,6 +4167,8 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
rtl_eri_set_bits(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000);
break;
+ default:
+ break;
}
phy_resume(tp->phydev);
@@ -4237,32 +4176,10 @@ static void r8168_pll_power_up(struct rtl8169_private *tp)
msleep(20);
}
-static void rtl_pll_power_down(struct rtl8169_private *tp)
-{
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
- case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15:
- break;
- default:
- r8168_pll_power_down(tp);
- }
-}
-
-static void rtl_pll_power_up(struct rtl8169_private *tp)
-{
- switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
- case RTL_GIGA_MAC_VER_13 ... RTL_GIGA_MAC_VER_15:
- break;
- default:
- r8168_pll_power_up(tp);
- }
-}
-
static void rtl_init_rxcfg(struct rtl8169_private *tp)
{
switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+ case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
RTL_W32(tp, RxConfig, RX_FIFO_THRESH | RX_DMA_BURST);
break;
@@ -4445,6 +4362,11 @@ static void rtl_request_firmware(struct rtl8169_private *tp)
if (!rtl_fw)
goto err_warn;
+ rtl_fw->phy_write = rtl_writephy;
+ rtl_fw->phy_read = rtl_readphy;
+ rtl_fw->mac_mcu_write = mac_mcu_write;
+ rtl_fw->mac_mcu_read = mac_mcu_read;
+
rc = request_firmware(&rtl_fw->fw, tp->fw_name, tp_to_dev(tp));
if (rc < 0)
goto err_free;
@@ -6264,14 +6186,8 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
dev->stats.rx_length_errors++;
if (status & RxCRC)
dev->stats.rx_crc_errors++;
- /* RxFOVF is a reserved bit on later chip versions */
- if (tp->mac_version == RTL_GIGA_MAC_VER_01 &&
- status & RxFOVF) {
- rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
- dev->stats.rx_fifo_errors++;
- } else if (status & (RxRUNT | RxCRC) &&
- !(status & RxRWT) &&
- dev->features & NETIF_F_RXALL) {
+ if (status & (RxRUNT | RxCRC) && !(status & RxRWT) &&
+ dev->features & NETIF_F_RXALL) {
goto process_pkt;
}
} else {
@@ -6451,7 +6367,10 @@ static int r8169_phy_connect(struct rtl8169_private *tp)
if (ret)
return ret;
- if (!tp->supports_gmii)
+ if (tp->supports_gmii)
+ phy_remove_link_mode(phydev,
+ ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
+ else
phy_set_max_speed(phydev, SPEED_100);
phy_support_asym_pause(phydev);
@@ -7046,25 +6965,18 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
data |= (1 << 15);
r8168_mac_ocp_write(tp, 0xe8de, data);
- if (!rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42))
- return;
-}
-
-static void rtl_hw_init_8168ep(struct rtl8169_private *tp)
-{
- rtl8168ep_stop_cmac(tp);
- rtl_hw_init_8168g(tp);
+ rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
}
static void rtl_hw_initialize(struct rtl8169_private *tp)
{
switch (tp->mac_version) {
+ case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_51:
+ rtl8168ep_stop_cmac(tp);
+ /* fall through */
case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48:
rtl_hw_init_8168g(tp);
break;
- case RTL_GIGA_MAC_VER_49 ... RTL_GIGA_MAC_VER_51:
- rtl_hw_init_8168ep(tp);
- break;
default:
break;
}
@@ -7074,7 +6986,7 @@ static void rtl_hw_initialize(struct rtl8169_private *tp)
static bool rtl_chip_supports_csum_v2(struct rtl8169_private *tp)
{
switch (tp->mac_version) {
- case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+ case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
return false;
default:
@@ -7090,7 +7002,7 @@ static int rtl_jumbo_max(struct rtl8169_private *tp)
switch (tp->mac_version) {
/* RTL8169 */
- case RTL_GIGA_MAC_VER_01 ... RTL_GIGA_MAC_VER_06:
+ case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
return JUMBO_7K;
/* RTL8168b */
case RTL_GIGA_MAC_VER_11:
@@ -7204,11 +7116,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (tp->mac_version == RTL_GIGA_MAC_NONE)
return -ENODEV;
- if (rtl_tbi_enabled(tp)) {
- dev_err(&pdev->dev, "TBI fiber mode not supported\n");
- return -ENODEV;
- }
-
tp->cp_cmd = RTL_R16(tp, CPlusCmd);
if (sizeof(dma_addr_t) > 4 && tp->mac_version >= RTL_GIGA_MAC_VER_18 &&
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 06545d7399fc..0b5c8d74c683 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -13,6 +13,16 @@ config STMMAC_ETH
if STMMAC_ETH
+config STMMAC_SELFTESTS
+ bool "Support for STMMAC Selftests"
+ depends on INET
+ depends on STMMAC_ETH
+ default n
+ ---help---
+ This adds support for STMMAC Selftests using ethtool. Enable this
+ feature if you are facing problems with your HW and submit the test
+ results to the netdev Mailing List.
+
config STMMAC_PLATFORM
tristate "STMMAC Platform bus support"
depends on STMMAC_ETH
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index c529c21e9bdd..c59926d96bcc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -8,6 +8,8 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o \
stmmac_tc.o dwxgmac2_core.o dwxgmac2_dma.o dwxgmac2_descs.o \
$(stmmac-y)
+stmmac-$(CONFIG_STMMAC_SELFTESTS) += stmmac_selftests.o
+
# Ordering matters. Generic driver must be last.
obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o
obj-$(CONFIG_DWMAC_ANARION) += dwmac-anarion.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 272b9ca66314..1961fe9144ca 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -424,6 +424,7 @@ struct mac_device_info {
const struct stmmac_mode_ops *mode;
const struct stmmac_hwtimestamp *ptp;
const struct stmmac_tc_ops *tc;
+ const struct stmmac_mmc_ops *mmc;
struct mii_regs mii; /* MII register Addresses */
struct mac_link link;
void __iomem *pcsr; /* vpointer to device CSRs */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
index 7fdd1760a74c..5ae474ebaaed 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
@@ -1,14 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Amlogic Meson6 and Meson8 DWMAC glue layer
*
* Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/device.h>
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index c5979569fd60..c06295ec1ef0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -1,14 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Amlogic Meson8b, Meson8m2 and GXBB DWMAC glue layer
*
* Copyright (C) 2016 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/clk.h>
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index ba124a4da793..3c7b779dcd4e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -986,6 +986,18 @@ static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv)
regulator_disable(gmac->regulator);
}
+static void sun8i_dwmac_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+ u32 value = readl(ioaddr + EMAC_BASIC_CTL0);
+
+ if (enable)
+ value |= EMAC_LOOPBACK;
+ else
+ value &= ~EMAC_LOOPBACK;
+
+ writel(value, ioaddr + EMAC_BASIC_CTL0);
+}
+
static const struct stmmac_ops sun8i_dwmac_ops = {
.core_init = sun8i_dwmac_core_init,
.set_mac = sun8i_dwmac_set_mac,
@@ -995,6 +1007,7 @@ static const struct stmmac_ops sun8i_dwmac_ops = {
.flow_ctrl = sun8i_dwmac_flow_ctrl,
.set_umac_addr = sun8i_dwmac_set_umac_addr,
.get_umac_addr = sun8i_dwmac_get_umac_addr,
+ .set_mac_loopback = sun8i_dwmac_set_mac_loopback,
};
static struct mac_device_info *sun8i_dwmac_setup(void *ppriv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index 184ca13c8f79..56a69fb6f0b9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -146,6 +146,7 @@ enum inter_frame_gap {
#define GMAC_FRAME_FILTER_DAIF 0x00000008 /* DA Inverse Filtering */
#define GMAC_FRAME_FILTER_PM 0x00000010 /* Pass all multicast */
#define GMAC_FRAME_FILTER_DBF 0x00000020 /* Disable Broadcast frames */
+#define GMAC_FRAME_FILTER_PCF 0x00000080 /* Pass Control frames */
#define GMAC_FRAME_FILTER_SAIF 0x00000100 /* Inverse Filtering */
#define GMAC_FRAME_FILTER_SAF 0x00000200 /* Source Address Filter */
#define GMAC_FRAME_FILTER_HPF 0x00000400 /* Hash or perfect Filter */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 0877bde6e860..ebe41dd09bab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -172,7 +172,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw,
memset(mc_filter, 0, sizeof(mc_filter));
if (dev->flags & IFF_PROMISC) {
- value = GMAC_FRAME_FILTER_PR;
+ value = GMAC_FRAME_FILTER_PR | GMAC_FRAME_FILTER_PCF;
} else if (dev->flags & IFF_ALLMULTI) {
value = GMAC_FRAME_FILTER_PM; /* pass all multi */
} else if (!netdev_mc_empty(dev)) {
@@ -198,6 +198,7 @@ static void dwmac1000_set_filter(struct mac_device_info *hw,
}
}
+ value |= GMAC_FRAME_FILTER_HPF;
dwmac1000_set_mchash(ioaddr, mc_filter, mcbitslog2);
/* Handle multiple unicast addresses (perfect filtering) */
@@ -216,6 +217,12 @@ static void dwmac1000_set_filter(struct mac_device_info *hw,
GMAC_ADDR_LOW(reg));
reg++;
}
+
+ while (reg <= perfect_addr_number) {
+ writel(0, ioaddr + GMAC_ADDR_HIGH(reg));
+ writel(0, ioaddr + GMAC_ADDR_LOW(reg));
+ reg++;
+ }
}
#ifdef FRAME_FILTER_DEBUG
@@ -499,6 +506,18 @@ static void dwmac1000_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
x->mac_gmii_rx_proto_engine++;
}
+static void dwmac1000_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+ u32 value = readl(ioaddr + GMAC_CONTROL);
+
+ if (enable)
+ value |= GMAC_CONTROL_LM;
+ else
+ value &= ~GMAC_CONTROL_LM;
+
+ writel(value, ioaddr + GMAC_CONTROL);
+}
+
const struct stmmac_ops dwmac1000_ops = {
.core_init = dwmac1000_core_init,
.set_mac = stmmac_set_mac,
@@ -518,6 +537,7 @@ const struct stmmac_ops dwmac1000_ops = {
.pcs_ctrl_ane = dwmac1000_ctrl_ane,
.pcs_rane = dwmac1000_rane,
.pcs_get_adv_lp = dwmac1000_get_adv_lp,
+ .set_mac_loopback = dwmac1000_set_mac_loopback,
};
int dwmac1000_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index b735143987e1..d621b5189c41 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -160,6 +160,18 @@ static void dwmac100_pmt(struct mac_device_info *hw, unsigned long mode)
return;
}
+static void dwmac100_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+ u32 value = readl(ioaddr + MAC_CONTROL);
+
+ if (enable)
+ value |= MAC_CONTROL_OM;
+ else
+ value &= ~MAC_CONTROL_OM;
+
+ writel(value, ioaddr + MAC_CONTROL);
+}
+
const struct stmmac_ops dwmac100_ops = {
.core_init = dwmac100_core_init,
.set_mac = stmmac_set_mac,
@@ -171,6 +183,7 @@ const struct stmmac_ops dwmac100_ops = {
.pmt = dwmac100_pmt,
.set_umac_addr = dwmac100_set_umac_addr,
.get_umac_addr = dwmac100_get_umac_addr,
+ .set_mac_loopback = dwmac100_set_mac_loopback,
};
int dwmac100_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index eb013d54025a..01c10893b7a5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -64,6 +64,8 @@
#define GMAC_PACKET_FILTER_PR BIT(0)
#define GMAC_PACKET_FILTER_HMC BIT(2)
#define GMAC_PACKET_FILTER_PM BIT(4)
+#define GMAC_PACKET_FILTER_PCF BIT(7)
+#define GMAC_PACKET_FILTER_HPF BIT(10)
#define GMAC_MAX_PERFECT_ADDRESSES 128
@@ -160,6 +162,7 @@ enum power_event {
#define GMAC_CONFIG_PS BIT(15)
#define GMAC_CONFIG_FES BIT(14)
#define GMAC_CONFIG_DM BIT(13)
+#define GMAC_CONFIG_LM BIT(12)
#define GMAC_CONFIG_DCRS BIT(9)
#define GMAC_CONFIG_TE BIT(1)
#define GMAC_CONFIG_RE BIT(0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index b4bb5629de38..5e98da4e14f9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -406,7 +406,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
unsigned int value = 0;
if (dev->flags & IFF_PROMISC) {
- value = GMAC_PACKET_FILTER_PR;
+ value = GMAC_PACKET_FILTER_PR | GMAC_PACKET_FILTER_PCF;
} else if ((dev->flags & IFF_ALLMULTI) ||
(netdev_mc_count(dev) > HASH_TABLE_SIZE)) {
/* Pass all multi */
@@ -440,20 +440,28 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
writel(mc_filter[1], ioaddr + GMAC_HASH_TAB_32_63);
}
+ value |= GMAC_PACKET_FILTER_HPF;
+
/* Handle multiple unicast addresses */
if (netdev_uc_count(dev) > GMAC_MAX_PERFECT_ADDRESSES) {
/* Switch to promiscuous mode if more than 128 addrs
* are required
*/
value |= GMAC_PACKET_FILTER_PR;
- } else if (!netdev_uc_empty(dev)) {
- int reg = 1;
+ } else {
struct netdev_hw_addr *ha;
+ int reg = 1;
netdev_for_each_uc_addr(ha, dev) {
dwmac4_set_umac_addr(hw, ha->addr, reg);
reg++;
}
+
+ while (reg <= GMAC_MAX_PERFECT_ADDRESSES) {
+ writel(0, ioaddr + GMAC_ADDR_HIGH(reg));
+ writel(0, ioaddr + GMAC_ADDR_LOW(reg));
+ reg++;
+ }
}
writel(value, ioaddr + GMAC_PACKET_FILTER);
@@ -703,6 +711,18 @@ static void dwmac4_debug(void __iomem *ioaddr, struct stmmac_extra_stats *x,
x->mac_gmii_rx_proto_engine++;
}
+static void dwmac4_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+ u32 value = readl(ioaddr + GMAC_CONFIG);
+
+ if (enable)
+ value |= GMAC_CONFIG_LM;
+ else
+ value &= ~GMAC_CONFIG_LM;
+
+ writel(value, ioaddr + GMAC_CONFIG);
+}
+
const struct stmmac_ops dwmac4_ops = {
.core_init = dwmac4_core_init,
.set_mac = stmmac_set_mac,
@@ -732,6 +752,7 @@ const struct stmmac_ops dwmac4_ops = {
.pcs_get_adv_lp = dwmac4_get_adv_lp,
.debug = dwmac4_debug,
.set_filter = dwmac4_set_filter,
+ .set_mac_loopback = dwmac4_set_mac_loopback,
};
const struct stmmac_ops dwmac410_ops = {
@@ -763,6 +784,7 @@ const struct stmmac_ops dwmac410_ops = {
.pcs_get_adv_lp = dwmac4_get_adv_lp,
.debug = dwmac4_debug,
.set_filter = dwmac4_set_filter,
+ .set_mac_loopback = dwmac4_set_mac_loopback,
};
const struct stmmac_ops dwmac510_ops = {
@@ -799,6 +821,7 @@ const struct stmmac_ops dwmac510_ops = {
.safety_feat_dump = dwmac5_safety_feat_dump,
.rxp_config = dwmac5_rxp_config,
.flex_pps_config = dwmac5_flex_pps_config,
+ .set_mac_loopback = dwmac4_set_mac_loopback,
};
int dwmac4_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index 545cb9c47433..99f8a391964c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -88,10 +88,6 @@ void dwmac4_dma_stop_rx(void __iomem *ioaddr, u32 chan)
value &= ~DMA_CONTROL_SR;
writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
-
- value = readl(ioaddr + GMAC_CONFIG);
- value &= ~GMAC_CONFIG_RE;
- writel(value, ioaddr + GMAC_CONFIG);
}
void dwmac4_set_tx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 085b700a4994..b8296eb41011 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -29,6 +29,7 @@
#define XGMAC_CONFIG_GPSL GENMASK(29, 16)
#define XGMAC_CONFIG_GPSL_SHIFT 16
#define XGMAC_CONFIG_S2KP BIT(11)
+#define XGMAC_CONFIG_LM BIT(10)
#define XGMAC_CONFIG_IPC BIT(9)
#define XGMAC_CONFIG_JE BIT(8)
#define XGMAC_CONFIG_WD BIT(7)
@@ -39,6 +40,7 @@
#define XGMAC_CORE_INIT_RX 0
#define XGMAC_PACKET_FILTER 0x00000008
#define XGMAC_FILTER_RA BIT(31)
+#define XGMAC_FILTER_PCF BIT(7)
#define XGMAC_FILTER_PM BIT(4)
#define XGMAC_FILTER_HMC BIT(2)
#define XGMAC_FILTER_PR BIT(0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 64b8cb88ea45..bfa7d6913fd4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -310,7 +310,7 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw,
u32 value = XGMAC_FILTER_RA;
if (dev->flags & IFF_PROMISC) {
- value |= XGMAC_FILTER_PR;
+ value |= XGMAC_FILTER_PR | XGMAC_FILTER_PCF;
} else if ((dev->flags & IFF_ALLMULTI) ||
(netdev_mc_count(dev) > HASH_TABLE_SIZE)) {
value |= XGMAC_FILTER_PM;
@@ -321,6 +321,18 @@ static void dwxgmac2_set_filter(struct mac_device_info *hw,
writel(value, ioaddr + XGMAC_PACKET_FILTER);
}
+static void dwxgmac2_set_mac_loopback(void __iomem *ioaddr, bool enable)
+{
+ u32 value = readl(ioaddr + XGMAC_RX_CONFIG);
+
+ if (enable)
+ value |= XGMAC_CONFIG_LM;
+ else
+ value &= ~XGMAC_CONFIG_LM;
+
+ writel(value, ioaddr + XGMAC_RX_CONFIG);
+}
+
const struct stmmac_ops dwxgmac210_ops = {
.core_init = dwxgmac2_core_init,
.set_mac = dwxgmac2_set_mac,
@@ -350,6 +362,7 @@ const struct stmmac_ops dwxgmac210_ops = {
.pcs_get_adv_lp = NULL,
.debug = NULL,
.set_filter = dwxgmac2_set_filter,
+ .set_mac_loopback = dwxgmac2_set_mac_loopback,
};
int dwxgmac2_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index e79037f511e1..7861a938420a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -299,10 +299,6 @@ static void dwxgmac2_dma_stop_rx(void __iomem *ioaddr, u32 chan)
value = readl(ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
value &= ~XGMAC_RXST;
writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
-
- value = readl(ioaddr + XGMAC_RX_CONFIG);
- value &= ~XGMAC_CONFIG_RE;
- writel(value, ioaddr + XGMAC_RX_CONFIG);
}
static int dwxgmac2_dma_interrupt(void __iomem *ioaddr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c
index 81b966a8261b..6c61b753b55e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.c
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c
@@ -81,6 +81,7 @@ static const struct stmmac_hwif_entry {
const void *hwtimestamp;
const void *mode;
const void *tc;
+ const void *mmc;
int (*setup)(struct stmmac_priv *priv);
int (*quirks)(struct stmmac_priv *priv);
} stmmac_hw[] = {
@@ -100,6 +101,7 @@ static const struct stmmac_hwif_entry {
.hwtimestamp = &stmmac_ptp,
.mode = NULL,
.tc = NULL,
+ .mmc = &dwmac_mmc_ops,
.setup = dwmac100_setup,
.quirks = stmmac_dwmac1_quirks,
}, {
@@ -117,6 +119,7 @@ static const struct stmmac_hwif_entry {
.hwtimestamp = &stmmac_ptp,
.mode = NULL,
.tc = NULL,
+ .mmc = &dwmac_mmc_ops,
.setup = dwmac1000_setup,
.quirks = stmmac_dwmac1_quirks,
}, {
@@ -134,6 +137,7 @@ static const struct stmmac_hwif_entry {
.hwtimestamp = &stmmac_ptp,
.mode = NULL,
.tc = &dwmac510_tc_ops,
+ .mmc = &dwmac_mmc_ops,
.setup = dwmac4_setup,
.quirks = stmmac_dwmac4_quirks,
}, {
@@ -151,6 +155,7 @@ static const struct stmmac_hwif_entry {
.hwtimestamp = &stmmac_ptp,
.mode = &dwmac4_ring_mode_ops,
.tc = &dwmac510_tc_ops,
+ .mmc = &dwmac_mmc_ops,
.setup = dwmac4_setup,
.quirks = NULL,
}, {
@@ -168,6 +173,7 @@ static const struct stmmac_hwif_entry {
.hwtimestamp = &stmmac_ptp,
.mode = &dwmac4_ring_mode_ops,
.tc = &dwmac510_tc_ops,
+ .mmc = &dwmac_mmc_ops,
.setup = dwmac4_setup,
.quirks = NULL,
}, {
@@ -185,6 +191,7 @@ static const struct stmmac_hwif_entry {
.hwtimestamp = &stmmac_ptp,
.mode = &dwmac4_ring_mode_ops,
.tc = &dwmac510_tc_ops,
+ .mmc = &dwmac_mmc_ops,
.setup = dwmac4_setup,
.quirks = NULL,
}, {
@@ -202,6 +209,7 @@ static const struct stmmac_hwif_entry {
.hwtimestamp = &stmmac_ptp,
.mode = NULL,
.tc = &dwmac510_tc_ops,
+ .mmc = NULL,
.setup = dwxgmac2_setup,
.quirks = NULL,
},
@@ -267,6 +275,7 @@ int stmmac_hwif_init(struct stmmac_priv *priv)
mac->ptp = mac->ptp ? : entry->hwtimestamp;
mac->mode = mac->mode ? : entry->mode;
mac->tc = mac->tc ? : entry->tc;
+ mac->mmc = mac->mmc ? : entry->mmc;
priv->hw = mac;
priv->ptpaddr = priv->ioaddr + entry->regs.ptp_off;
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index 5bb00234d961..2acfbc70e3c8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -6,6 +6,7 @@
#define __STMMAC_HWIF_H__
#include <linux/netdevice.h>
+#include <linux/stmmac.h>
#define stmmac_do_void_callback(__priv, __module, __cname, __arg0, __args...) \
({ \
@@ -324,6 +325,8 @@ struct stmmac_ops {
int (*flex_pps_config)(void __iomem *ioaddr, int index,
struct stmmac_pps_cfg *cfg, bool enable,
u32 sub_second_inc, u32 systime_flags);
+ /* Loopback for selftests */
+ void (*set_mac_loopback)(void __iomem *ioaddr, bool enable);
};
#define stmmac_core_init(__priv, __args...) \
@@ -392,6 +395,8 @@ struct stmmac_ops {
stmmac_do_callback(__priv, mac, rxp_config, __args)
#define stmmac_flex_pps_config(__priv, __args...) \
stmmac_do_callback(__priv, mac, flex_pps_config, __args)
+#define stmmac_set_mac_loopback(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, set_mac_loopback, __args)
/* PTP and HW Timer helpers */
struct stmmac_hwtimestamp {
@@ -464,6 +469,21 @@ struct stmmac_tc_ops {
#define stmmac_tc_setup_cbs(__priv, __args...) \
stmmac_do_callback(__priv, tc, setup_cbs, __args)
+struct stmmac_counters;
+
+struct stmmac_mmc_ops {
+ void (*ctrl)(void __iomem *ioaddr, unsigned int mode);
+ void (*intr_all_mask)(void __iomem *ioaddr);
+ void (*read)(void __iomem *ioaddr, struct stmmac_counters *mmc);
+};
+
+#define stmmac_mmc_ctrl(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mmc, ctrl, __args)
+#define stmmac_mmc_intr_all_mask(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mmc, intr_all_mask, __args)
+#define stmmac_mmc_read(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mmc, read, __args)
+
struct stmmac_regs_off {
u32 ptp_off;
u32 mmc_off;
@@ -482,6 +502,7 @@ extern const struct stmmac_tc_ops dwmac510_tc_ops;
extern const struct stmmac_ops dwxgmac210_ops;
extern const struct stmmac_dma_ops dwxgmac210_dma_ops;
extern const struct stmmac_desc_ops dwxgmac210_desc_ops;
+extern const struct stmmac_mmc_ops dwmac_mmc_ops;
#define GMAC_VERSION 0x00000020 /* GMAC CORE Version */
#define GMAC4_VERSION 0x00000110 /* GMAC4+ CORE Version */
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h
index c037326331f5..e2bd90a4d34f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc.h
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h
@@ -128,8 +128,4 @@ struct stmmac_counters {
unsigned int mmc_rx_icmp_err_octets;
};
-void dwmac_mmc_ctrl(void __iomem *ioaddr, unsigned int mode);
-void dwmac_mmc_intr_all_mask(void __iomem *ioaddr);
-void dwmac_mmc_read(void __iomem *ioaddr, struct stmmac_counters *mmc);
-
#endif /* __MMC_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index e9b04c28980f..b8c598125cfe 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
@@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <linux/io.h>
+#include "hwif.h"
#include "mmc.h"
/* MAC Management Counters register offset */
@@ -128,7 +129,7 @@
#define MMC_RX_ICMP_GD_OCTETS 0x180
#define MMC_RX_ICMP_ERR_OCTETS 0x184
-void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
+static void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
{
u32 value = readl(mmcaddr + MMC_CNTRL);
@@ -141,7 +142,7 @@ void dwmac_mmc_ctrl(void __iomem *mmcaddr, unsigned int mode)
}
/* To mask all all interrupts.*/
-void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr)
+static void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr)
{
writel(MMC_DEFAULT_MASK, mmcaddr + MMC_RX_INTR_MASK);
writel(MMC_DEFAULT_MASK, mmcaddr + MMC_TX_INTR_MASK);
@@ -153,7 +154,7 @@ void dwmac_mmc_intr_all_mask(void __iomem *mmcaddr)
* counter after a read. So all the field of the mmc struct
* have to be incremented.
*/
-void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
+static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
{
mmc->mmc_tx_octetcount_gb += readl(mmcaddr + MMC_TX_OCTETCOUNT_GB);
mmc->mmc_tx_framecount_gb += readl(mmcaddr + MMC_TX_FRAMECOUNT_GB);
@@ -266,3 +267,9 @@ void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
mmc->mmc_rx_icmp_gd_octets += readl(mmcaddr + MMC_RX_ICMP_GD_OCTETS);
mmc->mmc_rx_icmp_err_octets += readl(mmcaddr + MMC_RX_ICMP_ERR_OCTETS);
}
+
+const struct stmmac_mmc_ops dwmac_mmc_ops = {
+ .ctrl = dwmac_mmc_ctrl,
+ .intr_all_mask = dwmac_mmc_intr_all_mask,
+ .read = dwmac_mmc_read,
+};
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index dd95d959c1ce..a16ada8b8507 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -229,4 +229,26 @@ int stmmac_dvr_probe(struct device *device,
void stmmac_disable_eee_mode(struct stmmac_priv *priv);
bool stmmac_eee_init(struct stmmac_priv *priv);
+#if IS_ENABLED(CONFIG_STMMAC_SELFTESTS)
+void stmmac_selftest_run(struct net_device *dev,
+ struct ethtool_test *etest, u64 *buf);
+void stmmac_selftest_get_strings(struct stmmac_priv *priv, u8 *data);
+int stmmac_selftest_get_count(struct stmmac_priv *priv);
+#else
+static inline void stmmac_selftest_run(struct net_device *dev,
+ struct ethtool_test *etest, u64 *buf)
+{
+ /* Not enabled */
+}
+static inline void stmmac_selftest_get_strings(struct stmmac_priv *priv,
+ u8 *data)
+{
+ /* Not enabled */
+}
+static inline int stmmac_selftest_get_count(struct stmmac_priv *priv)
+{
+ return -EOPNOTSUPP;
+}
+#endif /* CONFIG_STMMAC_SELFTESTS */
+
#endif /* __STMMAC_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index e09522c5509a..cec51ba34296 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -537,7 +537,7 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
if (ret) {
/* If supported, for new GMAC chips expose the MMC counters */
if (priv->dma_cap.rmon) {
- dwmac_mmc_read(priv->mmcaddr, &priv->mmc);
+ stmmac_mmc_read(priv, priv->mmcaddr, &priv->mmc);
for (i = 0; i < STMMAC_MMC_STATS_LEN; i++) {
char *p;
@@ -589,6 +589,8 @@ static int stmmac_get_sset_count(struct net_device *netdev, int sset)
}
return len;
+ case ETH_SS_TEST:
+ return stmmac_selftest_get_count(priv);
default:
return -EOPNOTSUPP;
}
@@ -625,6 +627,9 @@ static void stmmac_get_strings(struct net_device *dev, u32 stringset, u8 *data)
p += ETH_GSTRING_LEN;
}
break;
+ case ETH_SS_TEST:
+ stmmac_selftest_get_strings(priv, p);
+ break;
default:
WARN_ON(1);
break;
@@ -890,6 +895,7 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
.nway_reset = phy_ethtool_nway_reset,
.get_pauseparam = stmmac_get_pauseparam,
.set_pauseparam = stmmac_set_pauseparam,
+ .self_test = stmmac_selftest_run,
.get_ethtool_stats = stmmac_get_ethtool_stats,
.get_strings = stmmac_get_strings,
.get_wol = stmmac_get_wol,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 65e57b9f6887..268af79e2632 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2058,6 +2058,9 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
&priv->xstats, chan);
struct stmmac_channel *ch = &priv->channel[chan];
+ if (status)
+ status |= handle_rx | handle_tx;
+
if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
napi_schedule_irqoff(&ch->rx_napi);
@@ -2128,10 +2131,10 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv)
unsigned int mode = MMC_CNTRL_RESET_ON_READ | MMC_CNTRL_COUNTER_RESET |
MMC_CNTRL_PRESET | MMC_CNTRL_FULL_HALF_PRESET;
- dwmac_mmc_intr_all_mask(priv->mmcaddr);
+ stmmac_mmc_intr_all_mask(priv, priv->mmcaddr);
if (priv->dma_cap.rmon) {
- dwmac_mmc_ctrl(priv->mmcaddr, mode);
+ stmmac_mmc_ctrl(priv, priv->mmcaddr, mode);
memset(&priv->mmc, 0, sizeof(struct stmmac_counters));
} else
netdev_info(priv->dev, "No MAC Management Counters available\n");
@@ -2164,8 +2167,8 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv)
stmmac_get_umac_addr(priv, priv->hw, priv->dev->dev_addr, 0);
if (!is_valid_ether_addr(priv->dev->dev_addr))
eth_hw_addr_random(priv->dev);
- netdev_info(priv->dev, "device MAC address %pM\n",
- priv->dev->dev_addr);
+ dev_info(priv->device, "device MAC address %pM\n",
+ priv->dev->dev_addr);
}
}
@@ -4241,9 +4244,8 @@ int stmmac_dvr_probe(struct device *device,
u32 queue, maxq;
int ret = 0;
- ndev = alloc_etherdev_mqs(sizeof(struct stmmac_priv),
- MTL_MAX_TX_QUEUES,
- MTL_MAX_RX_QUEUES);
+ ndev = devm_alloc_etherdev_mqs(device, sizeof(struct stmmac_priv),
+ MTL_MAX_TX_QUEUES, MTL_MAX_RX_QUEUES);
if (!ndev)
return -ENOMEM;
@@ -4275,8 +4277,7 @@ int stmmac_dvr_probe(struct device *device,
priv->wq = create_singlethread_workqueue("stmmac_wq");
if (!priv->wq) {
dev_err(priv->device, "failed to create workqueue\n");
- ret = -ENOMEM;
- goto error_wq;
+ return -ENOMEM;
}
INIT_WORK(&priv->service_task, stmmac_service_task);
@@ -4432,8 +4433,6 @@ error_mdio_register:
}
error_hw_init:
destroy_workqueue(priv->wq);
-error_wq:
- free_netdev(ndev);
return ret;
}
@@ -4470,7 +4469,6 @@ int stmmac_dvr_remove(struct device *dev)
stmmac_mdio_unregister(ndev);
destroy_workqueue(priv->wq);
mutex_destroy(&priv->lock);
- free_netdev(ndev);
return 0;
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
new file mode 100644
index 000000000000..a97b1ea76438
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -0,0 +1,850 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Synopsys, Inc. and/or its affiliates.
+ * stmmac Selftests Support
+ *
+ * Author: Jose Abreu <joabreu@synopsys.com>
+ */
+
+#include <linux/completion.h>
+#include <linux/ethtool.h>
+#include <linux/ip.h>
+#include <linux/phy.h>
+#include <linux/udp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include "stmmac.h"
+
+struct stmmachdr {
+ __be32 version;
+ __be64 magic;
+ u8 id;
+} __packed;
+
+#define STMMAC_TEST_PKT_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+ sizeof(struct stmmachdr))
+#define STMMAC_TEST_PKT_MAGIC 0xdeadcafecafedeadULL
+#define STMMAC_LB_TIMEOUT msecs_to_jiffies(200)
+
+struct stmmac_packet_attrs {
+ int vlan;
+ int vlan_id_in;
+ int vlan_id_out;
+ unsigned char *src;
+ unsigned char *dst;
+ u32 ip_src;
+ u32 ip_dst;
+ int tcp;
+ int sport;
+ int dport;
+ u32 exp_hash;
+ int dont_wait;
+ int timeout;
+ int size;
+ int remove_sa;
+ u8 id;
+};
+
+static u8 stmmac_test_next_id;
+
+static struct sk_buff *stmmac_test_get_udp_skb(struct stmmac_priv *priv,
+ struct stmmac_packet_attrs *attr)
+{
+ struct sk_buff *skb = NULL;
+ struct udphdr *uhdr = NULL;
+ struct tcphdr *thdr = NULL;
+ struct stmmachdr *shdr;
+ struct ethhdr *ehdr;
+ struct iphdr *ihdr;
+ int iplen, size;
+
+ size = attr->size + STMMAC_TEST_PKT_SIZE;
+ if (attr->vlan) {
+ size += 4;
+ if (attr->vlan > 1)
+ size += 4;
+ }
+
+ if (attr->tcp)
+ size += sizeof(struct tcphdr);
+ else
+ size += sizeof(struct udphdr);
+
+ skb = netdev_alloc_skb(priv->dev, size);
+ if (!skb)
+ return NULL;
+
+ prefetchw(skb->data);
+ skb_reserve(skb, NET_IP_ALIGN);
+
+ if (attr->vlan > 1)
+ ehdr = skb_push(skb, ETH_HLEN + 8);
+ else if (attr->vlan)
+ ehdr = skb_push(skb, ETH_HLEN + 4);
+ else if (attr->remove_sa)
+ ehdr = skb_push(skb, ETH_HLEN - 6);
+ else
+ ehdr = skb_push(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+
+ skb_set_network_header(skb, skb->len);
+ ihdr = skb_put(skb, sizeof(*ihdr));
+
+ skb_set_transport_header(skb, skb->len);
+ if (attr->tcp)
+ thdr = skb_put(skb, sizeof(*thdr));
+ else
+ uhdr = skb_put(skb, sizeof(*uhdr));
+
+ if (!attr->remove_sa)
+ eth_zero_addr(ehdr->h_source);
+ eth_zero_addr(ehdr->h_dest);
+ if (attr->src && !attr->remove_sa)
+ ether_addr_copy(ehdr->h_source, attr->src);
+ if (attr->dst)
+ ether_addr_copy(ehdr->h_dest, attr->dst);
+
+ if (!attr->remove_sa) {
+ ehdr->h_proto = htons(ETH_P_IP);
+ } else {
+ __be16 *ptr = (__be16 *)ehdr;
+
+ /* HACK */
+ ptr[3] = htons(ETH_P_IP);
+ }
+
+ if (attr->vlan) {
+ __be16 *tag, *proto;
+
+ if (!attr->remove_sa) {
+ tag = (void *)ehdr + ETH_HLEN;
+ proto = (void *)ehdr + (2 * ETH_ALEN);
+ } else {
+ tag = (void *)ehdr + ETH_HLEN - 6;
+ proto = (void *)ehdr + ETH_ALEN;
+ }
+
+ proto[0] = htons(ETH_P_8021Q);
+ tag[0] = htons(attr->vlan_id_out);
+ tag[1] = htons(ETH_P_IP);
+ if (attr->vlan > 1) {
+ proto[0] = htons(ETH_P_8021AD);
+ tag[1] = htons(ETH_P_8021Q);
+ tag[2] = htons(attr->vlan_id_in);
+ tag[3] = htons(ETH_P_IP);
+ }
+ }
+
+ if (attr->tcp) {
+ thdr->source = htons(attr->sport);
+ thdr->dest = htons(attr->dport);
+ thdr->doff = sizeof(struct tcphdr) / 4;
+ thdr->check = 0;
+ } else {
+ uhdr->source = htons(attr->sport);
+ uhdr->dest = htons(attr->dport);
+ uhdr->len = htons(sizeof(*shdr) + sizeof(*uhdr) + attr->size);
+ uhdr->check = 0;
+ }
+
+ ihdr->ihl = 5;
+ ihdr->ttl = 32;
+ ihdr->version = 4;
+ if (attr->tcp)
+ ihdr->protocol = IPPROTO_TCP;
+ else
+ ihdr->protocol = IPPROTO_UDP;
+ iplen = sizeof(*ihdr) + sizeof(*shdr) + attr->size;
+ if (attr->tcp)
+ iplen += sizeof(*thdr);
+ else
+ iplen += sizeof(*uhdr);
+ ihdr->tot_len = htons(iplen);
+ ihdr->frag_off = 0;
+ ihdr->saddr = 0;
+ ihdr->daddr = htonl(attr->ip_dst);
+ ihdr->tos = 0;
+ ihdr->id = 0;
+ ip_send_check(ihdr);
+
+ shdr = skb_put(skb, sizeof(*shdr));
+ shdr->version = 0;
+ shdr->magic = cpu_to_be64(STMMAC_TEST_PKT_MAGIC);
+ attr->id = stmmac_test_next_id;
+ shdr->id = stmmac_test_next_id++;
+
+ if (attr->size)
+ skb_put(skb, attr->size);
+
+ skb->csum = 0;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ if (attr->tcp) {
+ thdr->check = ~tcp_v4_check(skb->len, ihdr->saddr, ihdr->daddr, 0);
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct tcphdr, check);
+ } else {
+ udp4_hwcsum(skb, ihdr->saddr, ihdr->daddr);
+ }
+
+ skb->protocol = htons(ETH_P_IP);
+ skb->pkt_type = PACKET_HOST;
+ skb->dev = priv->dev;
+
+ return skb;
+}
+
+struct stmmac_test_priv {
+ struct stmmac_packet_attrs *packet;
+ struct packet_type pt;
+ struct completion comp;
+ int double_vlan;
+ int vlan_id;
+ int ok;
+};
+
+static int stmmac_test_loopback_validate(struct sk_buff *skb,
+ struct net_device *ndev,
+ struct packet_type *pt,
+ struct net_device *orig_ndev)
+{
+ struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+ struct stmmachdr *shdr;
+ struct ethhdr *ehdr;
+ struct udphdr *uhdr;
+ struct tcphdr *thdr;
+ struct iphdr *ihdr;
+
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (!skb)
+ goto out;
+
+ if (skb_linearize(skb))
+ goto out;
+ if (skb_headlen(skb) < (STMMAC_TEST_PKT_SIZE - ETH_HLEN))
+ goto out;
+
+ ehdr = (struct ethhdr *)skb_mac_header(skb);
+ if (tpriv->packet->dst) {
+ if (!ether_addr_equal(ehdr->h_dest, tpriv->packet->dst))
+ goto out;
+ }
+ if (tpriv->packet->src) {
+ if (!ether_addr_equal(ehdr->h_source, orig_ndev->dev_addr))
+ goto out;
+ }
+
+ ihdr = ip_hdr(skb);
+ if (tpriv->double_vlan)
+ ihdr = (struct iphdr *)(skb_network_header(skb) + 4);
+
+ if (tpriv->packet->tcp) {
+ if (ihdr->protocol != IPPROTO_TCP)
+ goto out;
+
+ thdr = (struct tcphdr *)((u8 *)ihdr + 4 * ihdr->ihl);
+ if (thdr->dest != htons(tpriv->packet->dport))
+ goto out;
+
+ shdr = (struct stmmachdr *)((u8 *)thdr + sizeof(*thdr));
+ } else {
+ if (ihdr->protocol != IPPROTO_UDP)
+ goto out;
+
+ uhdr = (struct udphdr *)((u8 *)ihdr + 4 * ihdr->ihl);
+ if (uhdr->dest != htons(tpriv->packet->dport))
+ goto out;
+
+ shdr = (struct stmmachdr *)((u8 *)uhdr + sizeof(*uhdr));
+ }
+
+ if (shdr->magic != cpu_to_be64(STMMAC_TEST_PKT_MAGIC))
+ goto out;
+ if (tpriv->packet->exp_hash && !skb->hash)
+ goto out;
+ if (tpriv->packet->id != shdr->id)
+ goto out;
+
+ tpriv->ok = true;
+ complete(&tpriv->comp);
+out:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int __stmmac_test_loopback(struct stmmac_priv *priv,
+ struct stmmac_packet_attrs *attr)
+{
+ struct stmmac_test_priv *tpriv;
+ struct sk_buff *skb = NULL;
+ int ret = 0;
+
+ tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+ if (!tpriv)
+ return -ENOMEM;
+
+ tpriv->ok = false;
+ init_completion(&tpriv->comp);
+
+ tpriv->pt.type = htons(ETH_P_IP);
+ tpriv->pt.func = stmmac_test_loopback_validate;
+ tpriv->pt.dev = priv->dev;
+ tpriv->pt.af_packet_priv = tpriv;
+ tpriv->packet = attr;
+ dev_add_pack(&tpriv->pt);
+
+ skb = stmmac_test_get_udp_skb(priv, attr);
+ if (!skb) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ skb_set_queue_mapping(skb, 0);
+ ret = dev_queue_xmit(skb);
+ if (ret)
+ goto cleanup;
+
+ if (attr->dont_wait)
+ goto cleanup;
+
+ if (!attr->timeout)
+ attr->timeout = STMMAC_LB_TIMEOUT;
+
+ wait_for_completion_timeout(&tpriv->comp, attr->timeout);
+ ret = !tpriv->ok;
+
+cleanup:
+ dev_remove_pack(&tpriv->pt);
+ kfree(tpriv);
+ return ret;
+}
+
+static int stmmac_test_mac_loopback(struct stmmac_priv *priv)
+{
+ struct stmmac_packet_attrs attr = { };
+
+ attr.dst = priv->dev->dev_addr;
+ return __stmmac_test_loopback(priv, &attr);
+}
+
+static int stmmac_test_phy_loopback(struct stmmac_priv *priv)
+{
+ struct stmmac_packet_attrs attr = { };
+ int ret;
+
+ if (!priv->dev->phydev)
+ return -EBUSY;
+
+ ret = phy_loopback(priv->dev->phydev, true);
+ if (ret)
+ return ret;
+
+ attr.dst = priv->dev->dev_addr;
+ ret = __stmmac_test_loopback(priv, &attr);
+
+ phy_loopback(priv->dev->phydev, false);
+ return ret;
+}
+
+static int stmmac_test_mmc(struct stmmac_priv *priv)
+{
+ struct stmmac_counters initial, final;
+ int ret;
+
+ memset(&initial, 0, sizeof(initial));
+ memset(&final, 0, sizeof(final));
+
+ if (!priv->dma_cap.rmon)
+ return -EOPNOTSUPP;
+
+ /* Save previous results into internal struct */
+ stmmac_mmc_read(priv, priv->mmcaddr, &priv->mmc);
+
+ ret = stmmac_test_mac_loopback(priv);
+ if (ret)
+ return ret;
+
+ /* These will be loopback results so no need to save them */
+ stmmac_mmc_read(priv, priv->mmcaddr, &final);
+
+ /*
+ * The number of MMC counters available depends on HW configuration
+ * so we just use this one to validate the feature. I hope there is
+ * not a version without this counter.
+ */
+ if (final.mmc_tx_framecount_g <= initial.mmc_tx_framecount_g)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int stmmac_test_eee(struct stmmac_priv *priv)
+{
+ struct stmmac_extra_stats *initial, *final;
+ int retries = 10;
+ int ret;
+
+ if (!priv->dma_cap.eee || !priv->eee_active)
+ return -EOPNOTSUPP;
+
+ initial = kzalloc(sizeof(*initial), GFP_KERNEL);
+ if (!initial)
+ return -ENOMEM;
+
+ final = kzalloc(sizeof(*final), GFP_KERNEL);
+ if (!final) {
+ ret = -ENOMEM;
+ goto out_free_initial;
+ }
+
+ memcpy(initial, &priv->xstats, sizeof(*initial));
+
+ ret = stmmac_test_mac_loopback(priv);
+ if (ret)
+ goto out_free_final;
+
+ /* We have no traffic in the line so, sooner or later it will go LPI */
+ while (--retries) {
+ memcpy(final, &priv->xstats, sizeof(*final));
+
+ if (final->irq_tx_path_in_lpi_mode_n >
+ initial->irq_tx_path_in_lpi_mode_n)
+ break;
+ msleep(100);
+ }
+
+ if (!retries) {
+ ret = -ETIMEDOUT;
+ goto out_free_final;
+ }
+
+ if (final->irq_tx_path_in_lpi_mode_n <=
+ initial->irq_tx_path_in_lpi_mode_n) {
+ ret = -EINVAL;
+ goto out_free_final;
+ }
+
+ if (final->irq_tx_path_exit_lpi_mode_n <=
+ initial->irq_tx_path_exit_lpi_mode_n) {
+ ret = -EINVAL;
+ goto out_free_final;
+ }
+
+out_free_final:
+ kfree(final);
+out_free_initial:
+ kfree(initial);
+ return ret;
+}
+
+static int stmmac_filter_check(struct stmmac_priv *priv)
+{
+ if (!(priv->dev->flags & IFF_PROMISC))
+ return 0;
+
+ netdev_warn(priv->dev, "Test can't be run in promiscuous mode!\n");
+ return -EOPNOTSUPP;
+}
+
+static int stmmac_test_hfilt(struct stmmac_priv *priv)
+{
+ unsigned char gd_addr[ETH_ALEN] = {0x01, 0x00, 0xcc, 0xcc, 0xdd, 0xdd};
+ unsigned char bd_addr[ETH_ALEN] = {0x09, 0x00, 0xaa, 0xaa, 0xbb, 0xbb};
+ struct stmmac_packet_attrs attr = { };
+ int ret;
+
+ ret = stmmac_filter_check(priv);
+ if (ret)
+ return ret;
+
+ ret = dev_mc_add(priv->dev, gd_addr);
+ if (ret)
+ return ret;
+
+ attr.dst = gd_addr;
+
+ /* Shall receive packet */
+ ret = __stmmac_test_loopback(priv, &attr);
+ if (ret)
+ goto cleanup;
+
+ attr.dst = bd_addr;
+
+ /* Shall NOT receive packet */
+ ret = __stmmac_test_loopback(priv, &attr);
+ ret = !ret;
+
+cleanup:
+ dev_mc_del(priv->dev, gd_addr);
+ return ret;
+}
+
+static int stmmac_test_pfilt(struct stmmac_priv *priv)
+{
+ unsigned char gd_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77};
+ unsigned char bd_addr[ETH_ALEN] = {0x08, 0x00, 0x22, 0x33, 0x44, 0x55};
+ struct stmmac_packet_attrs attr = { };
+ int ret;
+
+ if (stmmac_filter_check(priv))
+ return -EOPNOTSUPP;
+
+ ret = dev_uc_add(priv->dev, gd_addr);
+ if (ret)
+ return ret;
+
+ attr.dst = gd_addr;
+
+ /* Shall receive packet */
+ ret = __stmmac_test_loopback(priv, &attr);
+ if (ret)
+ goto cleanup;
+
+ attr.dst = bd_addr;
+
+ /* Shall NOT receive packet */
+ ret = __stmmac_test_loopback(priv, &attr);
+ ret = !ret;
+
+cleanup:
+ dev_uc_del(priv->dev, gd_addr);
+ return ret;
+}
+
+static int stmmac_dummy_sync(struct net_device *netdev, const u8 *addr)
+{
+ return 0;
+}
+
+static void stmmac_test_set_rx_mode(struct net_device *netdev)
+{
+ /* As we are in test mode of ethtool we already own the rtnl lock
+ * so no address will change from user. We can just call the
+ * ndo_set_rx_mode() callback directly */
+ if (netdev->netdev_ops->ndo_set_rx_mode)
+ netdev->netdev_ops->ndo_set_rx_mode(netdev);
+}
+
+static int stmmac_test_mcfilt(struct stmmac_priv *priv)
+{
+ unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77};
+ unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77};
+ struct stmmac_packet_attrs attr = { };
+ int ret;
+
+ if (stmmac_filter_check(priv))
+ return -EOPNOTSUPP;
+
+ /* Remove all MC addresses */
+ __dev_mc_unsync(priv->dev, NULL);
+ stmmac_test_set_rx_mode(priv->dev);
+
+ ret = dev_uc_add(priv->dev, uc_addr);
+ if (ret)
+ goto cleanup;
+
+ attr.dst = uc_addr;
+
+ /* Shall receive packet */
+ ret = __stmmac_test_loopback(priv, &attr);
+ if (ret)
+ goto cleanup;
+
+ attr.dst = mc_addr;
+
+ /* Shall NOT receive packet */
+ ret = __stmmac_test_loopback(priv, &attr);
+ ret = !ret;
+
+cleanup:
+ dev_uc_del(priv->dev, uc_addr);
+ __dev_mc_sync(priv->dev, stmmac_dummy_sync, NULL);
+ stmmac_test_set_rx_mode(priv->dev);
+ return ret;
+}
+
+static int stmmac_test_ucfilt(struct stmmac_priv *priv)
+{
+ unsigned char uc_addr[ETH_ALEN] = {0x00, 0x01, 0x44, 0x55, 0x66, 0x77};
+ unsigned char mc_addr[ETH_ALEN] = {0x01, 0x01, 0x44, 0x55, 0x66, 0x77};
+ struct stmmac_packet_attrs attr = { };
+ int ret;
+
+ if (stmmac_filter_check(priv))
+ return -EOPNOTSUPP;
+
+ /* Remove all UC addresses */
+ __dev_uc_unsync(priv->dev, NULL);
+ stmmac_test_set_rx_mode(priv->dev);
+
+ ret = dev_mc_add(priv->dev, mc_addr);
+ if (ret)
+ goto cleanup;
+
+ attr.dst = mc_addr;
+
+ /* Shall receive packet */
+ ret = __stmmac_test_loopback(priv, &attr);
+ if (ret)
+ goto cleanup;
+
+ attr.dst = uc_addr;
+
+ /* Shall NOT receive packet */
+ ret = __stmmac_test_loopback(priv, &attr);
+ ret = !ret;
+
+cleanup:
+ dev_mc_del(priv->dev, mc_addr);
+ __dev_uc_sync(priv->dev, stmmac_dummy_sync, NULL);
+ stmmac_test_set_rx_mode(priv->dev);
+ return ret;
+}
+
+static int stmmac_test_flowctrl_validate(struct sk_buff *skb,
+ struct net_device *ndev,
+ struct packet_type *pt,
+ struct net_device *orig_ndev)
+{
+ struct stmmac_test_priv *tpriv = pt->af_packet_priv;
+ struct ethhdr *ehdr;
+
+ ehdr = (struct ethhdr *)skb_mac_header(skb);
+ if (!ether_addr_equal(ehdr->h_source, orig_ndev->dev_addr))
+ goto out;
+ if (ehdr->h_proto != htons(ETH_P_PAUSE))
+ goto out;
+
+ tpriv->ok = true;
+ complete(&tpriv->comp);
+out:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int stmmac_test_flowctrl(struct stmmac_priv *priv)
+{
+ unsigned char paddr[ETH_ALEN] = {0x01, 0x80, 0xC2, 0x00, 0x00, 0x01};
+ struct phy_device *phydev = priv->dev->phydev;
+ u32 rx_cnt = priv->plat->rx_queues_to_use;
+ struct stmmac_test_priv *tpriv;
+ unsigned int pkt_count;
+ int i, ret = 0;
+
+ if (!phydev || !phydev->pause)
+ return -EOPNOTSUPP;
+
+ tpriv = kzalloc(sizeof(*tpriv), GFP_KERNEL);
+ if (!tpriv)
+ return -ENOMEM;
+
+ tpriv->ok = false;
+ init_completion(&tpriv->comp);
+ tpriv->pt.type = htons(ETH_P_PAUSE);
+ tpriv->pt.func = stmmac_test_flowctrl_validate;
+ tpriv->pt.dev = priv->dev;
+ tpriv->pt.af_packet_priv = tpriv;
+ dev_add_pack(&tpriv->pt);
+
+ /* Compute minimum number of packets to make FIFO full */
+ pkt_count = priv->plat->rx_fifo_size;
+ if (!pkt_count)
+ pkt_count = priv->dma_cap.rx_fifo_size;
+ pkt_count /= 1400;
+ pkt_count *= 2;
+
+ for (i = 0; i < rx_cnt; i++)
+ stmmac_stop_rx(priv, priv->ioaddr, i);
+
+ ret = dev_set_promiscuity(priv->dev, 1);
+ if (ret)
+ goto cleanup;
+
+ ret = dev_mc_add(priv->dev, paddr);
+ if (ret)
+ goto cleanup;
+
+ for (i = 0; i < pkt_count; i++) {
+ struct stmmac_packet_attrs attr = { };
+
+ attr.dst = priv->dev->dev_addr;
+ attr.dont_wait = true;
+ attr.size = 1400;
+
+ ret = __stmmac_test_loopback(priv, &attr);
+ if (ret)
+ goto cleanup;
+ if (tpriv->ok)
+ break;
+ }
+
+ /* Wait for some time in case RX Watchdog is enabled */
+ msleep(200);
+
+ for (i = 0; i < rx_cnt; i++) {
+ struct stmmac_channel *ch = &priv->channel[i];
+
+ stmmac_start_rx(priv, priv->ioaddr, i);
+ local_bh_disable();
+ napi_reschedule(&ch->rx_napi);
+ local_bh_enable();
+ }
+
+ wait_for_completion_timeout(&tpriv->comp, STMMAC_LB_TIMEOUT);
+ ret = !tpriv->ok;
+
+cleanup:
+ dev_mc_del(priv->dev, paddr);
+ dev_set_promiscuity(priv->dev, -1);
+ dev_remove_pack(&tpriv->pt);
+ kfree(tpriv);
+ return ret;
+}
+
+#define STMMAC_LOOPBACK_NONE 0
+#define STMMAC_LOOPBACK_MAC 1
+#define STMMAC_LOOPBACK_PHY 2
+
+static const struct stmmac_test {
+ char name[ETH_GSTRING_LEN];
+ int lb;
+ int (*fn)(struct stmmac_priv *priv);
+} stmmac_selftests[] = {
+ {
+ .name = "MAC Loopback ",
+ .lb = STMMAC_LOOPBACK_MAC,
+ .fn = stmmac_test_mac_loopback,
+ }, {
+ .name = "PHY Loopback ",
+ .lb = STMMAC_LOOPBACK_NONE, /* Test will handle it */
+ .fn = stmmac_test_phy_loopback,
+ }, {
+ .name = "MMC Counters ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_mmc,
+ }, {
+ .name = "EEE ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_eee,
+ }, {
+ .name = "Hash Filter MC ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_hfilt,
+ }, {
+ .name = "Perfect Filter UC ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_pfilt,
+ }, {
+ .name = "MC Filter ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_mcfilt,
+ }, {
+ .name = "UC Filter ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_ucfilt,
+ }, {
+ .name = "Flow Control ",
+ .lb = STMMAC_LOOPBACK_PHY,
+ .fn = stmmac_test_flowctrl,
+ },
+};
+
+void stmmac_selftest_run(struct net_device *dev,
+ struct ethtool_test *etest, u64 *buf)
+{
+ struct stmmac_priv *priv = netdev_priv(dev);
+ int count = stmmac_selftest_get_count(priv);
+ int carrier = netif_carrier_ok(dev);
+ int i, ret;
+
+ memset(buf, 0, sizeof(*buf) * count);
+ stmmac_test_next_id = 0;
+
+ if (etest->flags != ETH_TEST_FL_OFFLINE) {
+ netdev_err(priv->dev, "Only offline tests are supported\n");
+ etest->flags |= ETH_TEST_FL_FAILED;
+ return;
+ } else if (!carrier) {
+ netdev_err(priv->dev, "You need valid Link to execute tests\n");
+ etest->flags |= ETH_TEST_FL_FAILED;
+ return;
+ }
+
+ /* We don't want extra traffic */
+ netif_carrier_off(dev);
+
+ /* Wait for queues drain */
+ msleep(200);
+
+ for (i = 0; i < count; i++) {
+ ret = 0;
+
+ switch (stmmac_selftests[i].lb) {
+ case STMMAC_LOOPBACK_PHY:
+ ret = -EOPNOTSUPP;
+ if (dev->phydev)
+ ret = phy_loopback(dev->phydev, true);
+ if (!ret)
+ break;
+ /* Fallthrough */
+ case STMMAC_LOOPBACK_MAC:
+ ret = stmmac_set_mac_loopback(priv, priv->ioaddr, true);
+ break;
+ case STMMAC_LOOPBACK_NONE:
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+
+ /*
+ * First tests will always be MAC / PHY loobpack. If any of
+ * them is not supported we abort earlier.
+ */
+ if (ret) {
+ netdev_err(priv->dev, "Loopback is not supported\n");
+ etest->flags |= ETH_TEST_FL_FAILED;
+ break;
+ }
+
+ ret = stmmac_selftests[i].fn(priv);
+ if (ret && (ret != -EOPNOTSUPP))
+ etest->flags |= ETH_TEST_FL_FAILED;
+ buf[i] = ret;
+
+ switch (stmmac_selftests[i].lb) {
+ case STMMAC_LOOPBACK_PHY:
+ ret = -EOPNOTSUPP;
+ if (dev->phydev)
+ ret = phy_loopback(dev->phydev, false);
+ if (!ret)
+ break;
+ /* Fallthrough */
+ case STMMAC_LOOPBACK_MAC:
+ stmmac_set_mac_loopback(priv, priv->ioaddr, false);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* Restart everything */
+ if (carrier)
+ netif_carrier_on(dev);
+}
+
+void stmmac_selftest_get_strings(struct stmmac_priv *priv, u8 *data)
+{
+ u8 *p = data;
+ int i;
+
+ for (i = 0; i < stmmac_selftest_get_count(priv); i++) {
+ snprintf(p, ETH_GSTRING_LEN, "%2d. %s", i + 1,
+ stmmac_selftests[i].name);
+ p += ETH_GSTRING_LEN;
+ }
+}
+
+int stmmac_selftest_get_count(struct stmmac_priv *priv)
+{
+ return ARRAY_SIZE(stmmac_selftests);
+}
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 634fc484a0b3..6d3f1f3f90cb 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1423,8 +1423,11 @@ static int cpsw_ndo_open(struct net_device *ndev)
return 0;
err_cleanup:
- cpdma_ctlr_stop(cpsw->dma);
- for_each_slave(priv, cpsw_slave_stop, cpsw);
+ if (!cpsw->usage_count) {
+ cpdma_ctlr_stop(cpsw->dma);
+ for_each_slave(priv, cpsw_slave_stop, cpsw);
+ }
+
pm_runtime_put_sync(cpsw->dev);
netif_carrier_off(priv->ndev);
return ret;
diff --git a/drivers/net/ethernet/xilinx/ll_temac.h b/drivers/net/ethernet/xilinx/ll_temac.h
index 1aeda084b8f1..276292bca334 100644
--- a/drivers/net/ethernet/xilinx/ll_temac.h
+++ b/drivers/net/ethernet/xilinx/ll_temac.h
@@ -361,7 +361,7 @@ struct temac_local {
/* For synchronization of indirect register access. Must be
* shared mutex between interfaces in same TEMAC block.
*/
- struct mutex *indirect_mutex;
+ spinlock_t *indirect_lock;
u32 options; /* Current options word */
int last_link;
unsigned int temac_features;
@@ -388,8 +388,9 @@ struct temac_local {
/* xilinx_temac.c */
int temac_indirect_busywait(struct temac_local *lp);
u32 temac_indirect_in32(struct temac_local *lp, int reg);
+u32 temac_indirect_in32_locked(struct temac_local *lp, int reg);
void temac_indirect_out32(struct temac_local *lp, int reg, u32 value);
-
+void temac_indirect_out32_locked(struct temac_local *lp, int reg, u32 value);
/* xilinx_temac_mdio.c */
int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev);
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 14870d659f7d..21c1b4322ea7 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -22,7 +22,6 @@
*
* TODO:
* - Factor out locallink DMA code into separate driver
- * - Fix multicast assignment.
* - Fix support for hardware checksumming.
* - Testing. Lots and lots of testing.
*
@@ -53,6 +52,7 @@
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/dma-mapping.h>
+#include <linux/processor.h>
#include <linux/platform_data/xilinx-ll-temac.h>
#include "ll_temac.h"
@@ -84,51 +84,118 @@ static void _temac_iow_le(struct temac_local *lp, int offset, u32 value)
return iowrite32(value, lp->regs + offset);
}
+static bool hard_acs_rdy(struct temac_local *lp)
+{
+ return temac_ior(lp, XTE_RDY0_OFFSET) & XTE_RDY0_HARD_ACS_RDY_MASK;
+}
+
+static bool hard_acs_rdy_or_timeout(struct temac_local *lp, ktime_t timeout)
+{
+ ktime_t cur = ktime_get();
+
+ return hard_acs_rdy(lp) || ktime_after(cur, timeout);
+}
+
+/* Poll for maximum 20 ms. This is similar to the 2 jiffies @ 100 Hz
+ * that was used before, and should cover MDIO bus speed down to 3200
+ * Hz.
+ */
+#define HARD_ACS_RDY_POLL_NS (20 * NSEC_PER_MSEC)
+
+/**
+ * temac_indirect_busywait - Wait for current indirect register access
+ * to complete.
+ */
int temac_indirect_busywait(struct temac_local *lp)
{
- unsigned long end = jiffies + 2;
+ ktime_t timeout = ktime_add_ns(ktime_get(), HARD_ACS_RDY_POLL_NS);
- while (!(temac_ior(lp, XTE_RDY0_OFFSET) & XTE_RDY0_HARD_ACS_RDY_MASK)) {
- if (time_before_eq(end, jiffies)) {
- WARN_ON(1);
- return -ETIMEDOUT;
- }
- usleep_range(500, 1000);
- }
- return 0;
+ spin_until_cond(hard_acs_rdy_or_timeout(lp, timeout));
+ if (WARN_ON(!hard_acs_rdy(lp)))
+ return -ETIMEDOUT;
+ else
+ return 0;
}
/**
- * temac_indirect_in32
- *
- * lp->indirect_mutex must be held when calling this function
+ * temac_indirect_in32 - Indirect register read access. This function
+ * must be called without lp->indirect_lock being held.
*/
u32 temac_indirect_in32(struct temac_local *lp, int reg)
{
- u32 val;
+ unsigned long flags;
+ int val;
+
+ spin_lock_irqsave(lp->indirect_lock, flags);
+ val = temac_indirect_in32_locked(lp, reg);
+ spin_unlock_irqrestore(lp->indirect_lock, flags);
+ return val;
+}
- if (temac_indirect_busywait(lp))
+/**
+ * temac_indirect_in32_locked - Indirect register read access. This
+ * function must be called with lp->indirect_lock being held. Use
+ * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid
+ * repeated lock/unlock and to ensure uninterrupted access to indirect
+ * registers.
+ */
+u32 temac_indirect_in32_locked(struct temac_local *lp, int reg)
+{
+ /* This initial wait should normally not spin, as we always
+ * try to wait for indirect access to complete before
+ * releasing the indirect_lock.
+ */
+ if (WARN_ON(temac_indirect_busywait(lp)))
return -ETIMEDOUT;
+ /* Initiate read from indirect register */
temac_iow(lp, XTE_CTL0_OFFSET, reg);
- if (temac_indirect_busywait(lp))
+ /* Wait for indirect register access to complete. We really
+ * should not see timeouts, and could even end up causing
+ * problem for following indirect access, so let's make a bit
+ * of WARN noise.
+ */
+ if (WARN_ON(temac_indirect_busywait(lp)))
return -ETIMEDOUT;
- val = temac_ior(lp, XTE_LSW0_OFFSET);
-
- return val;
+ /* Value is ready now */
+ return temac_ior(lp, XTE_LSW0_OFFSET);
}
/**
- * temac_indirect_out32
- *
- * lp->indirect_mutex must be held when calling this function
+ * temac_indirect_out32 - Indirect register write access. This function
+ * must be called without lp->indirect_lock being held.
*/
void temac_indirect_out32(struct temac_local *lp, int reg, u32 value)
{
- if (temac_indirect_busywait(lp))
+ unsigned long flags;
+
+ spin_lock_irqsave(lp->indirect_lock, flags);
+ temac_indirect_out32_locked(lp, reg, value);
+ spin_unlock_irqrestore(lp->indirect_lock, flags);
+}
+
+/**
+ * temac_indirect_out32_locked - Indirect register write access. This
+ * function must be called with lp->indirect_lock being held. Use
+ * this together with spin_lock_irqsave/spin_lock_irqrestore to avoid
+ * repeated lock/unlock and to ensure uninterrupted access to indirect
+ * registers.
+ */
+void temac_indirect_out32_locked(struct temac_local *lp, int reg, u32 value)
+{
+ /* As in temac_indirect_in32_locked(), we should normally not
+ * spin here. And if it happens, we actually end up silently
+ * ignoring the write request. Ouch.
+ */
+ if (WARN_ON(temac_indirect_busywait(lp)))
return;
+ /* Initiate write to indirect register */
temac_iow(lp, XTE_LSW0_OFFSET, value);
temac_iow(lp, XTE_CTL0_OFFSET, CNTLREG_WRITE_ENABLE_MASK | reg);
- temac_indirect_busywait(lp);
+ /* As in temac_indirect_in32_locked(), we should not see timeouts
+ * here. And if it happens, we continue before the write has
+ * completed. Not good.
+ */
+ WARN_ON(temac_indirect_busywait(lp));
}
/**
@@ -344,20 +411,21 @@ out:
static void temac_do_set_mac_address(struct net_device *ndev)
{
struct temac_local *lp = netdev_priv(ndev);
+ unsigned long flags;
/* set up unicast MAC address filter set its mac address */
- mutex_lock(lp->indirect_mutex);
- temac_indirect_out32(lp, XTE_UAW0_OFFSET,
- (ndev->dev_addr[0]) |
- (ndev->dev_addr[1] << 8) |
- (ndev->dev_addr[2] << 16) |
- (ndev->dev_addr[3] << 24));
+ spin_lock_irqsave(lp->indirect_lock, flags);
+ temac_indirect_out32_locked(lp, XTE_UAW0_OFFSET,
+ (ndev->dev_addr[0]) |
+ (ndev->dev_addr[1] << 8) |
+ (ndev->dev_addr[2] << 16) |
+ (ndev->dev_addr[3] << 24));
/* There are reserved bits in EUAW1
* so don't affect them Set MAC bits [47:32] in EUAW1 */
- temac_indirect_out32(lp, XTE_UAW1_OFFSET,
- (ndev->dev_addr[4] & 0x000000ff) |
- (ndev->dev_addr[5] << 8));
- mutex_unlock(lp->indirect_mutex);
+ temac_indirect_out32_locked(lp, XTE_UAW1_OFFSET,
+ (ndev->dev_addr[4] & 0x000000ff) |
+ (ndev->dev_addr[5] << 8));
+ spin_unlock_irqrestore(lp->indirect_lock, flags);
}
static int temac_init_mac_address(struct net_device *ndev, const void *address)
@@ -383,49 +451,58 @@ static int temac_set_mac_address(struct net_device *ndev, void *p)
static void temac_set_multicast_list(struct net_device *ndev)
{
struct temac_local *lp = netdev_priv(ndev);
- u32 multi_addr_msw, multi_addr_lsw, val;
- int i;
+ u32 multi_addr_msw, multi_addr_lsw;
+ int i = 0;
+ unsigned long flags;
+ bool promisc_mode_disabled = false;
- mutex_lock(lp->indirect_mutex);
- if (ndev->flags & (IFF_ALLMULTI | IFF_PROMISC) ||
- netdev_mc_count(ndev) > MULTICAST_CAM_TABLE_NUM) {
- /*
- * We must make the kernel realise we had to move
- * into promisc mode or we start all out war on
- * the cable. If it was a promisc request the
- * flag is already set. If not we assert it.
- */
- ndev->flags |= IFF_PROMISC;
+ if (ndev->flags & (IFF_PROMISC | IFF_ALLMULTI) ||
+ (netdev_mc_count(ndev) > MULTICAST_CAM_TABLE_NUM)) {
temac_indirect_out32(lp, XTE_AFM_OFFSET, XTE_AFM_EPPRM_MASK);
dev_info(&ndev->dev, "Promiscuous mode enabled.\n");
- } else if (!netdev_mc_empty(ndev)) {
+ return;
+ }
+
+ spin_lock_irqsave(lp->indirect_lock, flags);
+
+ if (!netdev_mc_empty(ndev)) {
struct netdev_hw_addr *ha;
- i = 0;
netdev_for_each_mc_addr(ha, ndev) {
- if (i >= MULTICAST_CAM_TABLE_NUM)
+ if (WARN_ON(i >= MULTICAST_CAM_TABLE_NUM))
break;
multi_addr_msw = ((ha->addr[3] << 24) |
(ha->addr[2] << 16) |
(ha->addr[1] << 8) |
(ha->addr[0]));
- temac_indirect_out32(lp, XTE_MAW0_OFFSET,
- multi_addr_msw);
+ temac_indirect_out32_locked(lp, XTE_MAW0_OFFSET,
+ multi_addr_msw);
multi_addr_lsw = ((ha->addr[5] << 8) |
(ha->addr[4]) | (i << 16));
- temac_indirect_out32(lp, XTE_MAW1_OFFSET,
- multi_addr_lsw);
+ temac_indirect_out32_locked(lp, XTE_MAW1_OFFSET,
+ multi_addr_lsw);
i++;
}
- } else {
- val = temac_indirect_in32(lp, XTE_AFM_OFFSET);
- temac_indirect_out32(lp, XTE_AFM_OFFSET,
- val & ~XTE_AFM_EPPRM_MASK);
- temac_indirect_out32(lp, XTE_MAW0_OFFSET, 0);
- temac_indirect_out32(lp, XTE_MAW1_OFFSET, 0);
- dev_info(&ndev->dev, "Promiscuous mode disabled.\n");
}
- mutex_unlock(lp->indirect_mutex);
+
+ /* Clear all or remaining/unused address table entries */
+ while (i < MULTICAST_CAM_TABLE_NUM) {
+ temac_indirect_out32_locked(lp, XTE_MAW0_OFFSET, 0);
+ temac_indirect_out32_locked(lp, XTE_MAW1_OFFSET, i << 16);
+ i++;
+ }
+
+ /* Enable address filter block if currently disabled */
+ if (temac_indirect_in32_locked(lp, XTE_AFM_OFFSET)
+ & XTE_AFM_EPPRM_MASK) {
+ temac_indirect_out32_locked(lp, XTE_AFM_OFFSET, 0);
+ promisc_mode_disabled = true;
+ }
+
+ spin_unlock_irqrestore(lp->indirect_lock, flags);
+
+ if (promisc_mode_disabled)
+ dev_info(&ndev->dev, "Promiscuous mode disabled.\n");
}
static struct temac_option {
@@ -516,17 +593,19 @@ static u32 temac_setoptions(struct net_device *ndev, u32 options)
struct temac_local *lp = netdev_priv(ndev);
struct temac_option *tp = &temac_options[0];
int reg;
+ unsigned long flags;
- mutex_lock(lp->indirect_mutex);
+ spin_lock_irqsave(lp->indirect_lock, flags);
while (tp->opt) {
- reg = temac_indirect_in32(lp, tp->reg) & ~tp->m_or;
- if (options & tp->opt)
+ reg = temac_indirect_in32_locked(lp, tp->reg) & ~tp->m_or;
+ if (options & tp->opt) {
reg |= tp->m_or;
- temac_indirect_out32(lp, tp->reg, reg);
+ temac_indirect_out32_locked(lp, tp->reg, reg);
+ }
tp++;
}
+ spin_unlock_irqrestore(lp->indirect_lock, flags);
lp->options |= options;
- mutex_unlock(lp->indirect_mutex);
return 0;
}
@@ -537,6 +616,7 @@ static void temac_device_reset(struct net_device *ndev)
struct temac_local *lp = netdev_priv(ndev);
u32 timeout;
u32 val;
+ unsigned long flags;
/* Perform a software reset */
@@ -545,7 +625,6 @@ static void temac_device_reset(struct net_device *ndev)
dev_dbg(&ndev->dev, "%s()\n", __func__);
- mutex_lock(lp->indirect_mutex);
/* Reset the receiver and wait for it to finish reset */
temac_indirect_out32(lp, XTE_RXC1_OFFSET, XTE_RXC1_RXRST_MASK);
timeout = 1000;
@@ -571,8 +650,11 @@ static void temac_device_reset(struct net_device *ndev)
}
/* Disable the receiver */
- val = temac_indirect_in32(lp, XTE_RXC1_OFFSET);
- temac_indirect_out32(lp, XTE_RXC1_OFFSET, val & ~XTE_RXC1_RXEN_MASK);
+ spin_lock_irqsave(lp->indirect_lock, flags);
+ val = temac_indirect_in32_locked(lp, XTE_RXC1_OFFSET);
+ temac_indirect_out32_locked(lp, XTE_RXC1_OFFSET,
+ val & ~XTE_RXC1_RXEN_MASK);
+ spin_unlock_irqrestore(lp->indirect_lock, flags);
/* Reset Local Link (DMA) */
lp->dma_out(lp, DMA_CONTROL_REG, DMA_CONTROL_RST);
@@ -592,12 +674,12 @@ static void temac_device_reset(struct net_device *ndev)
"temac_device_reset descriptor allocation failed\n");
}
- temac_indirect_out32(lp, XTE_RXC0_OFFSET, 0);
- temac_indirect_out32(lp, XTE_RXC1_OFFSET, 0);
- temac_indirect_out32(lp, XTE_TXC_OFFSET, 0);
- temac_indirect_out32(lp, XTE_FCC_OFFSET, XTE_FCC_RXFLO_MASK);
-
- mutex_unlock(lp->indirect_mutex);
+ spin_lock_irqsave(lp->indirect_lock, flags);
+ temac_indirect_out32_locked(lp, XTE_RXC0_OFFSET, 0);
+ temac_indirect_out32_locked(lp, XTE_RXC1_OFFSET, 0);
+ temac_indirect_out32_locked(lp, XTE_TXC_OFFSET, 0);
+ temac_indirect_out32_locked(lp, XTE_FCC_OFFSET, XTE_FCC_RXFLO_MASK);
+ spin_unlock_irqrestore(lp->indirect_lock, flags);
/* Sync default options with HW
* but leave receiver and transmitter disabled. */
@@ -621,13 +703,14 @@ static void temac_adjust_link(struct net_device *ndev)
struct phy_device *phy = ndev->phydev;
u32 mii_speed;
int link_state;
+ unsigned long flags;
/* hash together the state values to decide if something has changed */
link_state = phy->speed | (phy->duplex << 1) | phy->link;
- mutex_lock(lp->indirect_mutex);
if (lp->last_link != link_state) {
- mii_speed = temac_indirect_in32(lp, XTE_EMCFG_OFFSET);
+ spin_lock_irqsave(lp->indirect_lock, flags);
+ mii_speed = temac_indirect_in32_locked(lp, XTE_EMCFG_OFFSET);
mii_speed &= ~XTE_EMCFG_LINKSPD_MASK;
switch (phy->speed) {
@@ -637,11 +720,12 @@ static void temac_adjust_link(struct net_device *ndev)
}
/* Write new speed setting out to TEMAC */
- temac_indirect_out32(lp, XTE_EMCFG_OFFSET, mii_speed);
+ temac_indirect_out32_locked(lp, XTE_EMCFG_OFFSET, mii_speed);
+ spin_unlock_irqrestore(lp->indirect_lock, flags);
+
lp->last_link = link_state;
phy_print_status(phy);
}
- mutex_unlock(lp->indirect_mutex);
}
#ifdef CONFIG_64BIT
@@ -1011,6 +1095,7 @@ static const struct net_device_ops temac_netdev_ops = {
.ndo_open = temac_open,
.ndo_stop = temac_stop,
.ndo_start_xmit = temac_start_xmit,
+ .ndo_set_rx_mode = temac_set_multicast_list,
.ndo_set_mac_address = temac_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
.ndo_do_ioctl = temac_ioctl,
@@ -1076,7 +1161,6 @@ static int temac_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, ndev);
SET_NETDEV_DEV(ndev, &pdev->dev);
- ndev->flags &= ~IFF_MULTICAST; /* clear multicast */
ndev->features = NETIF_F_SG;
ndev->netdev_ops = &temac_netdev_ops;
ndev->ethtool_ops = &temac_ethtool_ops;
@@ -1103,17 +1187,17 @@ static int temac_probe(struct platform_device *pdev)
/* Setup mutex for synchronization of indirect register access */
if (pdata) {
- if (!pdata->indirect_mutex) {
+ if (!pdata->indirect_lock) {
dev_err(&pdev->dev,
- "indirect_mutex missing in platform_data\n");
+ "indirect_lock missing in platform_data\n");
return -EINVAL;
}
- lp->indirect_mutex = pdata->indirect_mutex;
+ lp->indirect_lock = pdata->indirect_lock;
} else {
- lp->indirect_mutex = devm_kmalloc(&pdev->dev,
- sizeof(*lp->indirect_mutex),
- GFP_KERNEL);
- mutex_init(lp->indirect_mutex);
+ lp->indirect_lock = devm_kmalloc(&pdev->dev,
+ sizeof(*lp->indirect_lock),
+ GFP_KERNEL);
+ spin_lock_init(lp->indirect_lock);
}
/* map device registers */
diff --git a/drivers/net/ethernet/xilinx/ll_temac_mdio.c b/drivers/net/ethernet/xilinx/ll_temac_mdio.c
index a4667326f745..6fd2dea4e60f 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_mdio.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_mdio.c
@@ -25,14 +25,15 @@ static int temac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
{
struct temac_local *lp = bus->priv;
u32 rc;
+ unsigned long flags;
/* Write the PHY address to the MIIM Access Initiator register.
* When the transfer completes, the PHY register value will appear
* in the LSW0 register */
- mutex_lock(lp->indirect_mutex);
+ spin_lock_irqsave(lp->indirect_lock, flags);
temac_iow(lp, XTE_LSW0_OFFSET, (phy_id << 5) | reg);
- rc = temac_indirect_in32(lp, XTE_MIIMAI_OFFSET);
- mutex_unlock(lp->indirect_mutex);
+ rc = temac_indirect_in32_locked(lp, XTE_MIIMAI_OFFSET);
+ spin_unlock_irqrestore(lp->indirect_lock, flags);
dev_dbg(lp->dev, "temac_mdio_read(phy_id=%i, reg=%x) == %x\n",
phy_id, reg, rc);
@@ -43,6 +44,7 @@ static int temac_mdio_read(struct mii_bus *bus, int phy_id, int reg)
static int temac_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
{
struct temac_local *lp = bus->priv;
+ unsigned long flags;
dev_dbg(lp->dev, "temac_mdio_write(phy_id=%i, reg=%x, val=%x)\n",
phy_id, reg, val);
@@ -50,10 +52,10 @@ static int temac_mdio_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
/* First write the desired value into the write data register
* and then write the address into the access initiator register
*/
- mutex_lock(lp->indirect_mutex);
- temac_indirect_out32(lp, XTE_MGTDR_OFFSET, val);
- temac_indirect_out32(lp, XTE_MIIMAI_OFFSET, (phy_id << 5) | reg);
- mutex_unlock(lp->indirect_mutex);
+ spin_lock_irqsave(lp->indirect_lock, flags);
+ temac_indirect_out32_locked(lp, XTE_MGTDR_OFFSET, val);
+ temac_indirect_out32_locked(lp, XTE_MIIMAI_OFFSET, (phy_id << 5) | reg);
+ spin_unlock_irqrestore(lp->indirect_lock, flags);
return 0;
}
@@ -87,9 +89,7 @@ int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev)
/* Enable the MDIO bus by asserting the enable bit and writing
* in the clock config */
- mutex_lock(lp->indirect_mutex);
temac_indirect_out32(lp, XTE_MC_OFFSET, 1 << 6 | clk_div);
- mutex_unlock(lp->indirect_mutex);
bus = devm_mdiobus_alloc(&pdev->dev);
if (!bus)
@@ -116,10 +116,8 @@ int temac_mdio_setup(struct temac_local *lp, struct platform_device *pdev)
if (rc)
return rc;
- mutex_lock(lp->indirect_mutex);
dev_dbg(lp->dev, "MDIO bus registered; MC:%x\n",
temac_indirect_in32(lp, XTE_MC_OFFSET));
- mutex_unlock(lp->indirect_mutex);
return 0;
}
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 61550122b563..0ccabde8e9c9 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -831,7 +831,7 @@ static int macvlan_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
struct ifreq ifrr;
int err = -EOPNOTSUPP;
- strncpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
+ strscpy(ifrr.ifr_name, real_dev->name, IFNAMSIZ);
ifrr.ifr_ifru = ifr->ifr_ifru;
switch (cmd) {
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index f99f27800fdb..db5645b0c898 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -416,6 +416,12 @@ config NATIONAL_PHY
---help---
Currently supports the DP83865 PHY.
+config NXP_TJA11XX_PHY
+ tristate "NXP TJA11xx PHYs support"
+ depends on HWMON
+ ---help---
+ Currently supports the NXP TJA1100 and TJA1101 PHY.
+
config QSEMI_PHY
tristate "Quality Semiconductor PHYs"
---help---
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 27d7f9f3b0de..bac339e09042 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_MICROCHIP_PHY) += microchip.o
obj-$(CONFIG_MICROCHIP_T1_PHY) += microchip_t1.o
obj-$(CONFIG_MICROSEMI_PHY) += mscc.o
obj-$(CONFIG_NATIONAL_PHY) += national.o
+obj-$(CONFIG_NXP_TJA11XX_PHY) += nxp-tja11xx.o
obj-$(CONFIG_QSEMI_PHY) += qsemi.o
obj-$(CONFIG_REALTEK_PHY) += realtek.o
obj-$(CONFIG_RENESAS_PHY) += uPD60620.o
diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c
index 0fedd28fdb6e..3b29d381116f 100644
--- a/drivers/net/phy/aquantia_main.c
+++ b/drivers/net/phy/aquantia_main.c
@@ -27,6 +27,7 @@
#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK GENMASK(7, 3)
#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_KR 0
#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_XFI 2
+#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_USXGMII 3
#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_SGMII 6
#define MDIO_PHYXS_VEND_IF_STATUS_TYPE_OCSGMII 10
@@ -360,6 +361,9 @@ static int aqr107_read_status(struct phy_device *phydev)
case MDIO_PHYXS_VEND_IF_STATUS_TYPE_XFI:
phydev->interface = PHY_INTERFACE_MODE_10GKR;
break;
+ case MDIO_PHYXS_VEND_IF_STATUS_TYPE_USXGMII:
+ phydev->interface = PHY_INTERFACE_MODE_USXGMII;
+ break;
case MDIO_PHYXS_VEND_IF_STATUS_TYPE_SGMII:
phydev->interface = PHY_INTERFACE_MODE_SGMII;
break;
@@ -488,9 +492,13 @@ static int aqr107_config_init(struct phy_device *phydev)
if (phydev->interface != PHY_INTERFACE_MODE_SGMII &&
phydev->interface != PHY_INTERFACE_MODE_2500BASEX &&
phydev->interface != PHY_INTERFACE_MODE_XGMII &&
+ phydev->interface != PHY_INTERFACE_MODE_USXGMII &&
phydev->interface != PHY_INTERFACE_MODE_10GKR)
return -ENODEV;
+ WARN(phydev->interface == PHY_INTERFACE_MODE_XGMII,
+ "Your devicetree is out of date, please update it. The AQR107 family doesn't support XGMII, maybe you mean USXGMII.\n");
+
ret = aqr107_wait_reset_complete(phydev);
if (!ret)
aqr107_chip_info(phydev);
diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c
index f0c0eefe2202..f6dce6850850 100644
--- a/drivers/net/phy/bcm87xx.c
+++ b/drivers/net/phy/bcm87xx.c
@@ -81,22 +81,18 @@ static int bcm87xx_of_reg_init(struct phy_device *phydev)
}
#endif /* CONFIG_OF_MDIO */
-static int bcm87xx_config_init(struct phy_device *phydev)
+static int bcm87xx_get_features(struct phy_device *phydev)
{
- linkmode_zero(phydev->supported);
linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT,
phydev->supported);
- linkmode_zero(phydev->advertising);
- linkmode_set_bit(ETHTOOL_LINK_MODE_10000baseR_FEC_BIT,
- phydev->advertising);
- phydev->state = PHY_NOLINK;
- phydev->autoneg = AUTONEG_DISABLE;
-
- bcm87xx_of_reg_init(phydev);
-
return 0;
}
+static int bcm87xx_config_init(struct phy_device *phydev)
+{
+ return bcm87xx_of_reg_init(phydev);
+}
+
static int bcm87xx_config_aneg(struct phy_device *phydev)
{
return -EINVAL;
@@ -194,7 +190,7 @@ static struct phy_driver bcm87xx_driver[] = {
.phy_id = PHY_ID_BCM8706,
.phy_id_mask = 0xffffffff,
.name = "Broadcom BCM8706",
- .features = PHY_10GBIT_FEC_FEATURES,
+ .get_features = bcm87xx_get_features,
.config_init = bcm87xx_config_init,
.config_aneg = bcm87xx_config_aneg,
.read_status = bcm87xx_read_status,
@@ -206,7 +202,7 @@ static struct phy_driver bcm87xx_driver[] = {
.phy_id = PHY_ID_BCM8727,
.phy_id_mask = 0xffffffff,
.name = "Broadcom BCM8727",
- .features = PHY_10GBIT_FEC_FEATURES,
+ .get_features = bcm87xx_get_features,
.config_init = bcm87xx_config_init,
.config_aneg = bcm87xx_config_aneg,
.read_status = bcm87xx_read_status,
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index c71c7d0f53f0..1f1ecee0ee2f 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -34,6 +34,7 @@
#define DP83867_RGMIICTL 0x0032
#define DP83867_STRAP_STS1 0x006E
+#define DP83867_STRAP_STS2 0x006f
#define DP83867_RGMIIDCTL 0x0086
#define DP83867_IO_MUX_CFG 0x0170
#define DP83867_10M_SGMII_CFG 0x016F
@@ -63,19 +64,30 @@
/* STRAP_STS1 bits */
#define DP83867_STRAP_STS1_RESERVED BIT(11)
+/* STRAP_STS2 bits */
+#define DP83867_STRAP_STS2_CLK_SKEW_TX_MASK GENMASK(6, 4)
+#define DP83867_STRAP_STS2_CLK_SKEW_TX_SHIFT 4
+#define DP83867_STRAP_STS2_CLK_SKEW_RX_MASK GENMASK(2, 0)
+#define DP83867_STRAP_STS2_CLK_SKEW_RX_SHIFT 0
+#define DP83867_STRAP_STS2_CLK_SKEW_NONE BIT(2)
+
/* PHY CTRL bits */
#define DP83867_PHYCR_FIFO_DEPTH_SHIFT 14
-#define DP83867_PHYCR_FIFO_DEPTH_MASK (3 << 14)
+#define DP83867_PHYCR_FIFO_DEPTH_MAX 0x03
+#define DP83867_PHYCR_FIFO_DEPTH_MASK GENMASK(15, 14)
#define DP83867_PHYCR_RESERVED_MASK BIT(11)
/* RGMIIDCTL bits */
+#define DP83867_RGMII_TX_CLK_DELAY_MAX 0xf
#define DP83867_RGMII_TX_CLK_DELAY_SHIFT 4
+#define DP83867_RGMII_RX_CLK_DELAY_MAX 0xf
+#define DP83867_RGMII_RX_CLK_DELAY_SHIFT 0
/* IO_MUX_CFG bits */
-#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL 0x1f
-
+#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MASK 0x1f
#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX 0x0
#define DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN 0x1f
+#define DP83867_IO_MUX_CFG_CLK_O_DISABLE BIT(6)
#define DP83867_IO_MUX_CFG_CLK_O_SEL_MASK (0x1f << 8)
#define DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT 8
@@ -89,13 +101,14 @@ enum {
};
struct dp83867_private {
- int rx_id_delay;
- int tx_id_delay;
- int fifo_depth;
+ u32 rx_id_delay;
+ u32 tx_id_delay;
+ u32 fifo_depth;
int io_impedance;
int port_mirroring;
bool rxctrl_strap_quirk;
- int clk_output_sel;
+ bool set_clk_output;
+ u32 clk_output_sel;
};
static int dp83867_ack_interrupt(struct phy_device *phydev)
@@ -157,38 +170,83 @@ static int dp83867_of_init(struct phy_device *phydev)
if (!of_node)
return -ENODEV;
- dp83867->io_impedance = -EINVAL;
-
/* Optional configuration */
ret = of_property_read_u32(of_node, "ti,clk-output-sel",
&dp83867->clk_output_sel);
- if (ret || dp83867->clk_output_sel > DP83867_CLK_O_SEL_REF_CLK)
- /* Keep the default value if ti,clk-output-sel is not set
- * or too high
+ /* If not set, keep default */
+ if (!ret) {
+ dp83867->set_clk_output = true;
+ /* Valid values are 0 to DP83867_CLK_O_SEL_REF_CLK or
+ * DP83867_CLK_O_SEL_OFF.
*/
- dp83867->clk_output_sel = DP83867_CLK_O_SEL_REF_CLK;
+ if (dp83867->clk_output_sel > DP83867_CLK_O_SEL_REF_CLK &&
+ dp83867->clk_output_sel != DP83867_CLK_O_SEL_OFF) {
+ phydev_err(phydev, "ti,clk-output-sel value %u out of range\n",
+ dp83867->clk_output_sel);
+ return -EINVAL;
+ }
+ }
if (of_property_read_bool(of_node, "ti,max-output-impedance"))
dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MAX;
else if (of_property_read_bool(of_node, "ti,min-output-impedance"))
dp83867->io_impedance = DP83867_IO_MUX_CFG_IO_IMPEDANCE_MIN;
+ else
+ dp83867->io_impedance = -1; /* leave at default */
dp83867->rxctrl_strap_quirk = of_property_read_bool(of_node,
"ti,dp83867-rxctrl-strap-quirk");
- ret = of_property_read_u32(of_node, "ti,rx-internal-delay",
- &dp83867->rx_id_delay);
- if (ret &&
- (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
- phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID))
- return ret;
+ /* Existing behavior was to use default pin strapping delay in rgmii
+ * mode, but rgmii should have meant no delay. Warn existing users.
+ */
+ if (phydev->interface == PHY_INTERFACE_MODE_RGMII) {
+ const u16 val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_STRAP_STS2);
+ const u16 txskew = (val & DP83867_STRAP_STS2_CLK_SKEW_TX_MASK) >>
+ DP83867_STRAP_STS2_CLK_SKEW_TX_SHIFT;
+ const u16 rxskew = (val & DP83867_STRAP_STS2_CLK_SKEW_RX_MASK) >>
+ DP83867_STRAP_STS2_CLK_SKEW_RX_SHIFT;
+
+ if (txskew != DP83867_STRAP_STS2_CLK_SKEW_NONE ||
+ rxskew != DP83867_STRAP_STS2_CLK_SKEW_NONE)
+ phydev_warn(phydev,
+ "PHY has delays via pin strapping, but phy-mode = 'rgmii'\n"
+ "Should be 'rgmii-id' to use internal delays\n");
+ }
- ret = of_property_read_u32(of_node, "ti,tx-internal-delay",
- &dp83867->tx_id_delay);
- if (ret &&
- (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
- phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID))
- return ret;
+ /* RX delay *must* be specified if internal delay of RX is used. */
+ if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) {
+ ret = of_property_read_u32(of_node, "ti,rx-internal-delay",
+ &dp83867->rx_id_delay);
+ if (ret) {
+ phydev_err(phydev, "ti,rx-internal-delay must be specified\n");
+ return ret;
+ }
+ if (dp83867->rx_id_delay > DP83867_RGMII_RX_CLK_DELAY_MAX) {
+ phydev_err(phydev,
+ "ti,rx-internal-delay value of %u out of range\n",
+ dp83867->rx_id_delay);
+ return -EINVAL;
+ }
+ }
+
+ /* TX delay *must* be specified if internal delay of RX is used. */
+ if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID ||
+ phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) {
+ ret = of_property_read_u32(of_node, "ti,tx-internal-delay",
+ &dp83867->tx_id_delay);
+ if (ret) {
+ phydev_err(phydev, "ti,tx-internal-delay must be specified\n");
+ return ret;
+ }
+ if (dp83867->tx_id_delay > DP83867_RGMII_TX_CLK_DELAY_MAX) {
+ phydev_err(phydev,
+ "ti,tx-internal-delay value of %u out of range\n",
+ dp83867->tx_id_delay);
+ return -EINVAL;
+ }
+ }
if (of_property_read_bool(of_node, "enet-phy-lane-swap"))
dp83867->port_mirroring = DP83867_PORT_MIRROING_EN;
@@ -196,8 +254,20 @@ static int dp83867_of_init(struct phy_device *phydev)
if (of_property_read_bool(of_node, "enet-phy-lane-no-swap"))
dp83867->port_mirroring = DP83867_PORT_MIRROING_DIS;
- return of_property_read_u32(of_node, "ti,fifo-depth",
+ ret = of_property_read_u32(of_node, "ti,fifo-depth",
&dp83867->fifo_depth);
+ if (ret) {
+ phydev_err(phydev,
+ "ti,fifo-depth property is required\n");
+ return ret;
+ }
+ if (dp83867->fifo_depth > DP83867_PHYCR_FIFO_DEPTH_MAX) {
+ phydev_err(phydev,
+ "ti,fifo-depth value %u out of range\n",
+ dp83867->fifo_depth);
+ return -EINVAL;
+ }
+ return 0;
}
#else
static int dp83867_of_init(struct phy_device *phydev)
@@ -206,25 +276,29 @@ static int dp83867_of_init(struct phy_device *phydev)
}
#endif /* CONFIG_OF_MDIO */
-static int dp83867_config_init(struct phy_device *phydev)
+static int dp83867_probe(struct phy_device *phydev)
{
struct dp83867_private *dp83867;
+
+ dp83867 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83867),
+ GFP_KERNEL);
+ if (!dp83867)
+ return -ENOMEM;
+
+ phydev->priv = dp83867;
+
+ return 0;
+}
+
+static int dp83867_config_init(struct phy_device *phydev)
+{
+ struct dp83867_private *dp83867 = phydev->priv;
int ret, val, bs;
u16 delay;
- if (!phydev->priv) {
- dp83867 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83867),
- GFP_KERNEL);
- if (!dp83867)
- return -ENOMEM;
-
- phydev->priv = dp83867;
- ret = dp83867_of_init(phydev);
- if (ret)
- return ret;
- } else {
- dp83867 = (struct dp83867_private *)phydev->priv;
- }
+ ret = dp83867_of_init(phydev);
+ if (ret)
+ return ret;
/* RX_DV/RX_CTRL strapped in mode 1 or mode 2 workaround */
if (dp83867->rxctrl_strap_quirk)
@@ -256,9 +330,16 @@ static int dp83867_config_init(struct phy_device *phydev)
if (ret)
return ret;
- /* Set up RGMII delays */
+ /* If rgmii mode with no internal delay is selected, we do NOT use
+ * aligned mode as one might expect. Instead we use the PHY's default
+ * based on pin strapping. And the "mode 0" default is to *use*
+ * internal delay with a value of 7 (2.00 ns).
+ *
+ * Set up RGMII delays
+ */
val = phy_read_mmd(phydev, DP83867_DEVADDR, DP83867_RGMIICTL);
+ val &= ~(DP83867_RGMII_TX_CLK_DELAY_EN | DP83867_RGMII_RX_CLK_DELAY_EN);
if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
val |= (DP83867_RGMII_TX_CLK_DELAY_EN | DP83867_RGMII_RX_CLK_DELAY_EN);
@@ -275,14 +356,14 @@ static int dp83867_config_init(struct phy_device *phydev)
phy_write_mmd(phydev, DP83867_DEVADDR, DP83867_RGMIIDCTL,
delay);
-
- if (dp83867->io_impedance >= 0)
- phy_modify_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG,
- DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL,
- dp83867->io_impedance &
- DP83867_IO_MUX_CFG_IO_IMPEDANCE_CTRL);
}
+ /* If specified, set io impedance */
+ if (dp83867->io_impedance >= 0)
+ phy_modify_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG,
+ DP83867_IO_MUX_CFG_IO_IMPEDANCE_MASK,
+ dp83867->io_impedance);
+
if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
/* For support SPEED_10 in SGMII mode
* DP83867_10M_SGMII_RATE_ADAPT bit
@@ -321,11 +402,20 @@ static int dp83867_config_init(struct phy_device *phydev)
dp83867_config_port_mirroring(phydev);
/* Clock output selection if muxing property is set */
- if (dp83867->clk_output_sel != DP83867_CLK_O_SEL_REF_CLK)
+ if (dp83867->set_clk_output) {
+ u16 mask = DP83867_IO_MUX_CFG_CLK_O_DISABLE;
+
+ if (dp83867->clk_output_sel == DP83867_CLK_O_SEL_OFF) {
+ val = DP83867_IO_MUX_CFG_CLK_O_DISABLE;
+ } else {
+ mask |= DP83867_IO_MUX_CFG_CLK_O_SEL_MASK;
+ val = dp83867->clk_output_sel <<
+ DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT;
+ }
+
phy_modify_mmd(phydev, DP83867_DEVADDR, DP83867_IO_MUX_CFG,
- DP83867_IO_MUX_CFG_CLK_O_SEL_MASK,
- dp83867->clk_output_sel <<
- DP83867_IO_MUX_CFG_CLK_O_SEL_SHIFT);
+ mask, val);
+ }
return 0;
}
@@ -350,6 +440,7 @@ static struct phy_driver dp83867_driver[] = {
.name = "TI DP83867",
/* PHY_GBIT_FEATURES */
+ .probe = dp83867_probe,
.config_init = dp83867_config_init,
.soft_reset = dp83867_phy_reset,
diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c
index 314486288119..356bd6472f49 100644
--- a/drivers/net/phy/lxt.c
+++ b/drivers/net/phy/lxt.c
@@ -262,6 +262,8 @@ static struct phy_driver lxt97x_driver[] = {
/* PHY_BASIC_FEATURES */
.ack_interrupt = lxt971_ack_interrupt,
.config_intr = lxt971_config_intr,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
}, {
.phy_id = 0x00137a10,
.name = "LXT973-A2",
@@ -271,6 +273,8 @@ static struct phy_driver lxt97x_driver[] = {
.probe = lxt973_probe,
.config_aneg = lxt973_config_aneg,
.read_status = lxt973a2_read_status,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
}, {
.phy_id = 0x00137a10,
.name = "LXT973",
@@ -279,6 +283,8 @@ static struct phy_driver lxt97x_driver[] = {
.flags = 0,
.probe = lxt973_probe,
.config_aneg = lxt973_config_aneg,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
} };
module_phy_driver(lxt97x_driver);
diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
new file mode 100644
index 000000000000..b705d0bd798b
--- /dev/null
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+/* NXP TJA1100 BroadRReach PHY driver
+ *
+ * Copyright (C) 2018 Marek Vasut <marex@denx.de>
+ */
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/kernel.h>
+#include <linux/mii.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+#include <linux/hwmon.h>
+#include <linux/bitfield.h>
+
+#define PHY_ID_MASK 0xfffffff0
+#define PHY_ID_TJA1100 0x0180dc40
+#define PHY_ID_TJA1101 0x0180dd00
+
+#define MII_ECTRL 17
+#define MII_ECTRL_LINK_CONTROL BIT(15)
+#define MII_ECTRL_POWER_MODE_MASK GENMASK(14, 11)
+#define MII_ECTRL_POWER_MODE_NO_CHANGE (0x0 << 11)
+#define MII_ECTRL_POWER_MODE_NORMAL (0x3 << 11)
+#define MII_ECTRL_POWER_MODE_STANDBY (0xc << 11)
+#define MII_ECTRL_CONFIG_EN BIT(2)
+#define MII_ECTRL_WAKE_REQUEST BIT(0)
+
+#define MII_CFG1 18
+#define MII_CFG1_AUTO_OP BIT(14)
+#define MII_CFG1_SLEEP_CONFIRM BIT(6)
+#define MII_CFG1_LED_MODE_MASK GENMASK(5, 4)
+#define MII_CFG1_LED_MODE_LINKUP 0
+#define MII_CFG1_LED_ENABLE BIT(3)
+
+#define MII_CFG2 19
+#define MII_CFG2_SLEEP_REQUEST_TO GENMASK(1, 0)
+#define MII_CFG2_SLEEP_REQUEST_TO_16MS 0x3
+
+#define MII_INTSRC 21
+#define MII_INTSRC_TEMP_ERR BIT(1)
+#define MII_INTSRC_UV_ERR BIT(3)
+
+#define MII_COMMSTAT 23
+#define MII_COMMSTAT_LINK_UP BIT(15)
+
+#define MII_GENSTAT 24
+#define MII_GENSTAT_PLL_LOCKED BIT(14)
+
+#define MII_COMMCFG 27
+#define MII_COMMCFG_AUTO_OP BIT(15)
+
+struct tja11xx_priv {
+ char *hwmon_name;
+ struct device *hwmon_dev;
+};
+
+struct tja11xx_phy_stats {
+ const char *string;
+ u8 reg;
+ u8 off;
+ u16 mask;
+};
+
+static struct tja11xx_phy_stats tja11xx_hw_stats[] = {
+ { "phy_symbol_error_count", 20, 0, GENMASK(15, 0) },
+ { "phy_polarity_detect", 25, 6, BIT(6) },
+ { "phy_open_detect", 25, 7, BIT(7) },
+ { "phy_short_detect", 25, 8, BIT(8) },
+ { "phy_rem_rcvr_count", 26, 0, GENMASK(7, 0) },
+ { "phy_loc_rcvr_count", 26, 8, GENMASK(15, 8) },
+};
+
+static int tja11xx_check(struct phy_device *phydev, u8 reg, u16 mask, u16 set)
+{
+ int i, ret;
+
+ for (i = 0; i < 200; i++) {
+ ret = phy_read(phydev, reg);
+ if (ret < 0)
+ return ret;
+
+ if ((ret & mask) == set)
+ return 0;
+
+ usleep_range(100, 150);
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int phy_modify_check(struct phy_device *phydev, u8 reg,
+ u16 mask, u16 set)
+{
+ int ret;
+
+ ret = phy_modify(phydev, reg, mask, set);
+ if (ret)
+ return ret;
+
+ return tja11xx_check(phydev, reg, mask, set);
+}
+
+static int tja11xx_enable_reg_write(struct phy_device *phydev)
+{
+ return phy_set_bits(phydev, MII_ECTRL, MII_ECTRL_CONFIG_EN);
+}
+
+static int tja11xx_enable_link_control(struct phy_device *phydev)
+{
+ return phy_set_bits(phydev, MII_ECTRL, MII_ECTRL_LINK_CONTROL);
+}
+
+static int tja11xx_wakeup(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = phy_read(phydev, MII_ECTRL);
+ if (ret < 0)
+ return ret;
+
+ switch (ret & MII_ECTRL_POWER_MODE_MASK) {
+ case MII_ECTRL_POWER_MODE_NO_CHANGE:
+ break;
+ case MII_ECTRL_POWER_MODE_NORMAL:
+ ret = phy_set_bits(phydev, MII_ECTRL, MII_ECTRL_WAKE_REQUEST);
+ if (ret)
+ return ret;
+
+ ret = phy_clear_bits(phydev, MII_ECTRL, MII_ECTRL_WAKE_REQUEST);
+ if (ret)
+ return ret;
+ break;
+ case MII_ECTRL_POWER_MODE_STANDBY:
+ ret = phy_modify_check(phydev, MII_ECTRL,
+ MII_ECTRL_POWER_MODE_MASK,
+ MII_ECTRL_POWER_MODE_STANDBY);
+ if (ret)
+ return ret;
+
+ ret = phy_modify(phydev, MII_ECTRL, MII_ECTRL_POWER_MODE_MASK,
+ MII_ECTRL_POWER_MODE_NORMAL);
+ if (ret)
+ return ret;
+
+ ret = phy_modify_check(phydev, MII_GENSTAT,
+ MII_GENSTAT_PLL_LOCKED,
+ MII_GENSTAT_PLL_LOCKED);
+ if (ret)
+ return ret;
+
+ return tja11xx_enable_link_control(phydev);
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int tja11xx_soft_reset(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = tja11xx_enable_reg_write(phydev);
+ if (ret)
+ return ret;
+
+ return genphy_soft_reset(phydev);
+}
+
+static int tja11xx_config_init(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = tja11xx_enable_reg_write(phydev);
+ if (ret)
+ return ret;
+
+ phydev->autoneg = AUTONEG_DISABLE;
+ phydev->speed = SPEED_100;
+ phydev->duplex = DUPLEX_FULL;
+
+ switch (phydev->phy_id & PHY_ID_MASK) {
+ case PHY_ID_TJA1100:
+ ret = phy_modify(phydev, MII_CFG1,
+ MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_MASK |
+ MII_CFG1_LED_ENABLE,
+ MII_CFG1_AUTO_OP | MII_CFG1_LED_MODE_LINKUP |
+ MII_CFG1_LED_ENABLE);
+ if (ret)
+ return ret;
+ break;
+ case PHY_ID_TJA1101:
+ ret = phy_set_bits(phydev, MII_COMMCFG, MII_COMMCFG_AUTO_OP);
+ if (ret)
+ return ret;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = phy_clear_bits(phydev, MII_CFG1, MII_CFG1_SLEEP_CONFIRM);
+ if (ret)
+ return ret;
+
+ ret = phy_modify(phydev, MII_CFG2, MII_CFG2_SLEEP_REQUEST_TO,
+ MII_CFG2_SLEEP_REQUEST_TO_16MS);
+ if (ret)
+ return ret;
+
+ ret = tja11xx_wakeup(phydev);
+ if (ret < 0)
+ return ret;
+
+ /* ACK interrupts by reading the status register */
+ ret = phy_read(phydev, MII_INTSRC);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int tja11xx_read_status(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = genphy_update_link(phydev);
+ if (ret)
+ return ret;
+
+ if (phydev->link) {
+ ret = phy_read(phydev, MII_COMMSTAT);
+ if (ret < 0)
+ return ret;
+
+ if (!(ret & MII_COMMSTAT_LINK_UP))
+ phydev->link = 0;
+ }
+
+ return 0;
+}
+
+static int tja11xx_get_sset_count(struct phy_device *phydev)
+{
+ return ARRAY_SIZE(tja11xx_hw_stats);
+}
+
+static void tja11xx_get_strings(struct phy_device *phydev, u8 *data)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tja11xx_hw_stats); i++) {
+ strncpy(data + i * ETH_GSTRING_LEN,
+ tja11xx_hw_stats[i].string, ETH_GSTRING_LEN);
+ }
+}
+
+static void tja11xx_get_stats(struct phy_device *phydev,
+ struct ethtool_stats *stats, u64 *data)
+{
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(tja11xx_hw_stats); i++) {
+ ret = phy_read(phydev, tja11xx_hw_stats[i].reg);
+ if (ret < 0)
+ data[i] = U64_MAX;
+ else {
+ data[i] = ret & tja11xx_hw_stats[i].mask;
+ data[i] >>= tja11xx_hw_stats[i].off;
+ }
+ }
+}
+
+static int tja11xx_hwmon_read(struct device *dev,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel, long *value)
+{
+ struct phy_device *phydev = dev_get_drvdata(dev);
+ int ret;
+
+ if (type == hwmon_in && attr == hwmon_in_lcrit_alarm) {
+ ret = phy_read(phydev, MII_INTSRC);
+ if (ret < 0)
+ return ret;
+
+ *value = !!(ret & MII_INTSRC_TEMP_ERR);
+ return 0;
+ }
+
+ if (type == hwmon_temp && attr == hwmon_temp_crit_alarm) {
+ ret = phy_read(phydev, MII_INTSRC);
+ if (ret < 0)
+ return ret;
+
+ *value = !!(ret & MII_INTSRC_UV_ERR);
+ return 0;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static umode_t tja11xx_hwmon_is_visible(const void *data,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ if (type == hwmon_in && attr == hwmon_in_lcrit_alarm)
+ return 0444;
+
+ if (type == hwmon_temp && attr == hwmon_temp_crit_alarm)
+ return 0444;
+
+ return 0;
+}
+
+static const struct hwmon_channel_info *tja11xx_hwmon_info[] = {
+ HWMON_CHANNEL_INFO(in, HWMON_I_LCRIT_ALARM),
+ HWMON_CHANNEL_INFO(temp, HWMON_T_CRIT_ALARM),
+ NULL
+};
+
+static const struct hwmon_ops tja11xx_hwmon_hwmon_ops = {
+ .is_visible = tja11xx_hwmon_is_visible,
+ .read = tja11xx_hwmon_read,
+};
+
+static const struct hwmon_chip_info tja11xx_hwmon_chip_info = {
+ .ops = &tja11xx_hwmon_hwmon_ops,
+ .info = tja11xx_hwmon_info,
+};
+
+static int tja11xx_probe(struct phy_device *phydev)
+{
+ struct device *dev = &phydev->mdio.dev;
+ struct tja11xx_priv *priv;
+ int i;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->hwmon_name = devm_kstrdup(dev, dev_name(dev), GFP_KERNEL);
+ if (!priv->hwmon_name)
+ return -ENOMEM;
+
+ for (i = 0; priv->hwmon_name[i]; i++)
+ if (hwmon_is_bad_char(priv->hwmon_name[i]))
+ priv->hwmon_name[i] = '_';
+
+ priv->hwmon_dev =
+ devm_hwmon_device_register_with_info(dev, priv->hwmon_name,
+ phydev,
+ &tja11xx_hwmon_chip_info,
+ NULL);
+
+ return PTR_ERR_OR_ZERO(priv->hwmon_dev);
+}
+
+static struct phy_driver tja11xx_driver[] = {
+ {
+ PHY_ID_MATCH_MODEL(PHY_ID_TJA1100),
+ .name = "NXP TJA1100",
+ .features = PHY_BASIC_T1_FEATURES,
+ .probe = tja11xx_probe,
+ .soft_reset = tja11xx_soft_reset,
+ .config_init = tja11xx_config_init,
+ .read_status = tja11xx_read_status,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .set_loopback = genphy_loopback,
+ /* Statistics */
+ .get_sset_count = tja11xx_get_sset_count,
+ .get_strings = tja11xx_get_strings,
+ .get_stats = tja11xx_get_stats,
+ }, {
+ PHY_ID_MATCH_MODEL(PHY_ID_TJA1101),
+ .name = "NXP TJA1101",
+ .features = PHY_BASIC_T1_FEATURES,
+ .probe = tja11xx_probe,
+ .soft_reset = tja11xx_soft_reset,
+ .config_init = tja11xx_config_init,
+ .read_status = tja11xx_read_status,
+ .suspend = genphy_suspend,
+ .resume = genphy_resume,
+ .set_loopback = genphy_loopback,
+ /* Statistics */
+ .get_sset_count = tja11xx_get_sset_count,
+ .get_strings = tja11xx_get_strings,
+ .get_stats = tja11xx_get_stats,
+ }
+};
+
+module_phy_driver(tja11xx_driver);
+
+static struct mdio_device_id __maybe_unused tja11xx_tbl[] = {
+ { PHY_ID_MATCH_MODEL(PHY_ID_TJA1100) },
+ { PHY_ID_MATCH_MODEL(PHY_ID_TJA1101) },
+ { }
+};
+
+MODULE_DEVICE_TABLE(mdio, tja11xx_tbl);
+
+MODULE_AUTHOR("Marek Vasut <marex@denx.de>");
+MODULE_DESCRIPTION("NXP TJA11xx BoardR-Reach PHY driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 3daf0214a242..16667fbac8bf 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -8,7 +8,7 @@
const char *phy_speed_to_str(int speed)
{
- BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 67,
+ BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 69,
"Enum ethtool_link_mode_bit_indices and phylib are out of sync. "
"If a speed or mode has been added please update phy_speed_to_str "
"and the PHY settings array.\n");
@@ -131,9 +131,11 @@ static const struct phy_setting settings[] = {
PHY_SETTING( 1000, FULL, 1000baseKX_Full ),
PHY_SETTING( 1000, FULL, 1000baseT_Full ),
PHY_SETTING( 1000, HALF, 1000baseT_Half ),
+ PHY_SETTING( 1000, FULL, 1000baseT1_Full ),
PHY_SETTING( 1000, FULL, 1000baseX_Full ),
/* 100M */
PHY_SETTING( 100, FULL, 100baseT_Full ),
+ PHY_SETTING( 100, FULL, 100baseT1_Full ),
PHY_SETTING( 100, HALF, 100baseT_Half ),
/* 10M */
PHY_SETTING( 10, FULL, 10baseT_Full ),
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index e8885429293a..84671d868a80 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -29,6 +29,8 @@
#include <linux/uaccess.h>
#include <linux/atomic.h>
+#define PHY_STATE_TIME HZ
+
#define PHY_STATE_STR(_state) \
case PHY_##_state: \
return __stringify(_state); \
@@ -478,12 +480,12 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
}
EXPORT_SYMBOL(phy_mii_ioctl);
-static void phy_queue_state_machine(struct phy_device *phydev,
- unsigned int secs)
+void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies)
{
mod_delayed_work(system_power_efficient_wq, &phydev->state_queue,
- secs * HZ);
+ jiffies);
}
+EXPORT_SYMBOL(phy_queue_state_machine);
static void phy_trigger_machine(struct phy_device *phydev)
{
@@ -772,8 +774,13 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat)
if (phydev->drv->did_interrupt && !phydev->drv->did_interrupt(phydev))
return IRQ_NONE;
- /* reschedule state queue work to run as soon as possible */
- phy_trigger_machine(phydev);
+ if (phydev->drv->handle_interrupt) {
+ if (phydev->drv->handle_interrupt(phydev))
+ goto phy_err;
+ } else {
+ /* reschedule state queue work to run as soon as possible */
+ phy_trigger_machine(phydev);
+ }
if (phy_clear_interrupt(phydev))
goto phy_err;
@@ -799,10 +806,10 @@ static int phy_enable_interrupts(struct phy_device *phydev)
}
/**
- * phy_request_interrupt - request interrupt for a PHY device
+ * phy_request_interrupt - request and enable interrupt for a PHY device
* @phydev: target phy_device struct
*
- * Description: Request the interrupt for the given PHY.
+ * Description: Request and enable the interrupt for the given PHY.
* If this fails, then we set irq to PHY_POLL.
* This should only be called with a valid IRQ number.
*/
@@ -817,11 +824,31 @@ void phy_request_interrupt(struct phy_device *phydev)
phydev_warn(phydev, "Error %d requesting IRQ %d, falling back to polling\n",
err, phydev->irq);
phydev->irq = PHY_POLL;
+ } else {
+ if (phy_enable_interrupts(phydev)) {
+ phydev_warn(phydev, "Can't enable interrupt, falling back to polling\n");
+ phy_free_interrupt(phydev);
+ phydev->irq = PHY_POLL;
+ }
}
}
EXPORT_SYMBOL(phy_request_interrupt);
/**
+ * phy_free_interrupt - disable and free interrupt for a PHY device
+ * @phydev: target phy_device struct
+ *
+ * Description: Disable and free the interrupt for the given PHY.
+ * This should only be called with a valid IRQ number.
+ */
+void phy_free_interrupt(struct phy_device *phydev)
+{
+ phy_disable_interrupts(phydev);
+ free_irq(phydev->irq, phydev);
+}
+EXPORT_SYMBOL(phy_free_interrupt);
+
+/**
* phy_stop - Bring down the PHY link, and stop checking the status
* @phydev: target phy_device struct
*/
@@ -835,9 +862,6 @@ void phy_stop(struct phy_device *phydev)
mutex_lock(&phydev->lock);
- if (phy_interrupt_is_valid(phydev))
- phy_disable_interrupts(phydev);
-
phydev->state = PHY_HALTED;
mutex_unlock(&phydev->lock);
@@ -864,8 +888,6 @@ EXPORT_SYMBOL(phy_stop);
*/
void phy_start(struct phy_device *phydev)
{
- int err;
-
mutex_lock(&phydev->lock);
if (phydev->state != PHY_READY && phydev->state != PHY_HALTED) {
@@ -877,13 +899,6 @@ void phy_start(struct phy_device *phydev)
/* if phy was suspended, bring the physical link up again */
__phy_resume(phydev);
- /* make sure interrupts are enabled for the PHY */
- if (phy_interrupt_is_valid(phydev)) {
- err = phy_enable_interrupts(phydev);
- if (err < 0)
- goto out;
- }
-
phydev->state = PHY_UP;
phy_start_machine(phydev);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index dcc93a873174..2c879ba01f35 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(phy_10_100_features_array);
const int phy_basic_t1_features_array[2] = {
ETHTOOL_LINK_MODE_TP_BIT,
- ETHTOOL_LINK_MODE_100baseT_Full_BIT,
+ ETHTOOL_LINK_MODE_100baseT1_Full_BIT,
};
EXPORT_SYMBOL_GPL(phy_basic_t1_features_array);
@@ -948,6 +948,9 @@ int phy_connect_direct(struct net_device *dev, struct phy_device *phydev,
{
int rc;
+ if (!dev)
+ return -EINVAL;
+
rc = phy_attach_direct(dev, phydev, phydev->dev_flags, interface);
if (rc)
return rc;
@@ -1013,7 +1016,7 @@ void phy_disconnect(struct phy_device *phydev)
phy_stop(phydev);
if (phy_interrupt_is_valid(phydev))
- free_irq(phydev->irq, phydev);
+ phy_free_interrupt(phydev);
phydev->adjust_link = NULL;
@@ -1133,6 +1136,44 @@ void phy_attached_print(struct phy_device *phydev, const char *fmt, ...)
}
EXPORT_SYMBOL(phy_attached_print);
+static void phy_sysfs_create_links(struct phy_device *phydev)
+{
+ struct net_device *dev = phydev->attached_dev;
+ int err;
+
+ if (!dev)
+ return;
+
+ err = sysfs_create_link(&phydev->mdio.dev.kobj, &dev->dev.kobj,
+ "attached_dev");
+ if (err)
+ return;
+
+ err = sysfs_create_link_nowarn(&dev->dev.kobj,
+ &phydev->mdio.dev.kobj,
+ "phydev");
+ if (err) {
+ dev_err(&dev->dev, "could not add device link to %s err %d\n",
+ kobject_name(&phydev->mdio.dev.kobj),
+ err);
+ /* non-fatal - some net drivers can use one netdevice
+ * with more then one phy
+ */
+ }
+
+ phydev->sysfs_links = true;
+}
+
+static ssize_t
+phy_standalone_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct phy_device *phydev = to_phy_device(dev);
+
+ return sprintf(buf, "%d\n", !phydev->attached_dev);
+}
+static DEVICE_ATTR_RO(phy_standalone);
+
/**
* phy_attach_direct - attach a network device to a given PHY device pointer
* @dev: network device to attach
@@ -1151,9 +1192,9 @@ EXPORT_SYMBOL(phy_attached_print);
int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
u32 flags, phy_interface_t interface)
{
- struct module *ndev_owner = dev->dev.parent->driver->owner;
struct mii_bus *bus = phydev->mdio.bus;
struct device *d = &phydev->mdio.dev;
+ struct module *ndev_owner = NULL;
bool using_genphy = false;
int err;
@@ -1162,8 +1203,10 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
* our own module->refcnt here, otherwise we would not be able to
* unload later on.
*/
+ if (dev)
+ ndev_owner = dev->dev.parent->driver->owner;
if (ndev_owner != bus->owner && !try_module_get(bus->owner)) {
- dev_err(&dev->dev, "failed to get the bus module\n");
+ phydev_err(phydev, "failed to get the bus module\n");
return -EIO;
}
@@ -1182,7 +1225,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
}
if (!try_module_get(d->driver->owner)) {
- dev_err(&dev->dev, "failed to get the device driver module\n");
+ phydev_err(phydev, "failed to get the device driver module\n");
err = -EIO;
goto error_put_device;
}
@@ -1203,8 +1246,10 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
}
phydev->phy_link_change = phy_link_change;
- phydev->attached_dev = dev;
- dev->phydev = phydev;
+ if (dev) {
+ phydev->attached_dev = dev;
+ dev->phydev = phydev;
+ }
/* Some Ethernet drivers try to connect to a PHY device before
* calling register_netdevice() -> netdev_register_kobject() and
@@ -1216,22 +1261,13 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
*/
phydev->sysfs_links = false;
- err = sysfs_create_link(&phydev->mdio.dev.kobj, &dev->dev.kobj,
- "attached_dev");
- if (!err) {
- err = sysfs_create_link_nowarn(&dev->dev.kobj,
- &phydev->mdio.dev.kobj,
- "phydev");
- if (err) {
- dev_err(&dev->dev, "could not add device link to %s err %d\n",
- kobject_name(&phydev->mdio.dev.kobj),
- err);
- /* non-fatal - some net drivers can use one netdevice
- * with more then one phy
- */
- }
+ phy_sysfs_create_links(phydev);
- phydev->sysfs_links = true;
+ if (!phydev->attached_dev) {
+ err = sysfs_create_file(&phydev->mdio.dev.kobj,
+ &dev_attr_phy_standalone.attr);
+ if (err)
+ phydev_err(phydev, "error creating 'phy_standalone' sysfs entry\n");
}
phydev->dev_flags = flags;
@@ -1243,7 +1279,8 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
/* Initial carrier state is off as the phy is about to be
* (re)initialized.
*/
- netif_carrier_off(phydev->attached_dev);
+ if (dev)
+ netif_carrier_off(phydev->attached_dev);
/* Do initial configuration here, now that
* we have certain key parameters
@@ -1290,6 +1327,9 @@ struct phy_device *phy_attach(struct net_device *dev, const char *bus_id,
struct device *d;
int rc;
+ if (!dev)
+ return ERR_PTR(-EINVAL);
+
/* Search the list of PHY devices on the mdio bus for the
* PHY with the requested name
*/
@@ -1349,16 +1389,24 @@ EXPORT_SYMBOL_GPL(phy_driver_is_genphy_10g);
void phy_detach(struct phy_device *phydev)
{
struct net_device *dev = phydev->attached_dev;
- struct module *ndev_owner = dev->dev.parent->driver->owner;
+ struct module *ndev_owner = NULL;
struct mii_bus *bus;
if (phydev->sysfs_links) {
- sysfs_remove_link(&dev->dev.kobj, "phydev");
+ if (dev)
+ sysfs_remove_link(&dev->dev.kobj, "phydev");
sysfs_remove_link(&phydev->mdio.dev.kobj, "attached_dev");
}
+
+ if (!phydev->attached_dev)
+ sysfs_remove_file(&phydev->mdio.dev.kobj,
+ &dev_attr_phy_standalone.attr);
+
phy_suspend(phydev);
- phydev->attached_dev->phydev = NULL;
- phydev->attached_dev = NULL;
+ if (dev) {
+ phydev->attached_dev->phydev = NULL;
+ phydev->attached_dev = NULL;
+ }
phydev->phylink = NULL;
phy_led_triggers_unregister(phydev);
@@ -1381,6 +1429,8 @@ void phy_detach(struct phy_device *phydev)
bus = phydev->mdio.bus;
put_device(&phydev->mdio.dev);
+ if (dev)
+ ndev_owner = dev->dev.parent->driver->owner;
if (ndev_owner != bus->owner)
module_put(bus->owner);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 9044b95d2afe..b7300ea2663c 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -41,6 +41,9 @@ struct phylink {
/* private: */
struct net_device *netdev;
const struct phylink_mac_ops *ops;
+ struct phylink_config *config;
+ struct device *dev;
+ unsigned int old_link_state:1;
unsigned long phylink_disable_state; /* bitmask of disables */
struct phy_device *phydev;
@@ -69,6 +72,23 @@ struct phylink {
struct sfp_bus *sfp_bus;
};
+#define phylink_printk(level, pl, fmt, ...) \
+ do { \
+ if ((pl)->config->type == PHYLINK_NETDEV) \
+ netdev_printk(level, (pl)->netdev, fmt, ##__VA_ARGS__); \
+ else if ((pl)->config->type == PHYLINK_DEV) \
+ dev_printk(level, (pl)->dev, fmt, ##__VA_ARGS__); \
+ } while (0)
+
+#define phylink_err(pl, fmt, ...) \
+ phylink_printk(KERN_ERR, pl, fmt, ##__VA_ARGS__)
+#define phylink_warn(pl, fmt, ...) \
+ phylink_printk(KERN_WARNING, pl, fmt, ##__VA_ARGS__)
+#define phylink_info(pl, fmt, ...) \
+ phylink_printk(KERN_INFO, pl, fmt, ##__VA_ARGS__)
+#define phylink_dbg(pl, fmt, ...) \
+ phylink_printk(KERN_DEBUG, pl, fmt, ##__VA_ARGS__)
+
/**
* phylink_set_port_modes() - set the port type modes in the ethtool mask
* @mask: ethtool link mode mask
@@ -115,7 +135,7 @@ static const char *phylink_an_mode_str(unsigned int mode)
static int phylink_validate(struct phylink *pl, unsigned long *supported,
struct phylink_link_state *state)
{
- pl->ops->validate(pl->netdev, supported, state);
+ pl->ops->validate(pl->config, supported, state);
return phylink_is_empty_linkmode(supported) ? -EINVAL : 0;
}
@@ -165,7 +185,7 @@ static int phylink_parse_fixedlink(struct phylink *pl,
ret = fwnode_property_read_u32_array(fwnode, "fixed-link",
NULL, 0);
if (ret != ARRAY_SIZE(prop)) {
- netdev_err(pl->netdev, "broken fixed-link?\n");
+ phylink_err(pl, "broken fixed-link?\n");
return -EINVAL;
}
@@ -184,8 +204,8 @@ static int phylink_parse_fixedlink(struct phylink *pl,
if (pl->link_config.speed > SPEED_1000 &&
pl->link_config.duplex != DUPLEX_FULL)
- netdev_warn(pl->netdev, "fixed link specifies half duplex for %dMbps link?\n",
- pl->link_config.speed);
+ phylink_warn(pl, "fixed link specifies half duplex for %dMbps link?\n",
+ pl->link_config.speed);
bitmap_fill(pl->supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
linkmode_copy(pl->link_config.advertising, pl->supported);
@@ -198,9 +218,9 @@ static int phylink_parse_fixedlink(struct phylink *pl,
if (s) {
__set_bit(s->bit, pl->supported);
} else {
- netdev_warn(pl->netdev, "fixed link %s duplex %dMbps not recognised\n",
- pl->link_config.duplex == DUPLEX_FULL ? "full" : "half",
- pl->link_config.speed);
+ phylink_warn(pl, "fixed link %s duplex %dMbps not recognised\n",
+ pl->link_config.duplex == DUPLEX_FULL ? "full" : "half",
+ pl->link_config.speed);
}
linkmode_and(pl->link_config.advertising, pl->link_config.advertising,
@@ -225,8 +245,8 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
if (fwnode_property_read_string(fwnode, "managed", &managed) == 0 &&
strcmp(managed, "in-band-status") == 0) {
if (pl->link_an_mode == MLO_AN_FIXED) {
- netdev_err(pl->netdev,
- "can't use both fixed-link and in-band-status\n");
+ phylink_err(pl,
+ "can't use both fixed-link and in-band-status\n");
return -EINVAL;
}
@@ -273,17 +293,17 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
break;
default:
- netdev_err(pl->netdev,
- "incorrect link mode %s for in-band status\n",
- phy_modes(pl->link_config.interface));
+ phylink_err(pl,
+ "incorrect link mode %s for in-band status\n",
+ phy_modes(pl->link_config.interface));
return -EINVAL;
}
linkmode_copy(pl->link_config.advertising, pl->supported);
if (phylink_validate(pl, pl->supported, &pl->link_config)) {
- netdev_err(pl->netdev,
- "failed to validate link configuration for in-band status\n");
+ phylink_err(pl,
+ "failed to validate link configuration for in-band status\n");
return -EINVAL;
}
}
@@ -294,16 +314,16 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
static void phylink_mac_config(struct phylink *pl,
const struct phylink_link_state *state)
{
- netdev_dbg(pl->netdev,
- "%s: mode=%s/%s/%s/%s adv=%*pb pause=%02x link=%u an=%u\n",
- __func__, phylink_an_mode_str(pl->link_an_mode),
- phy_modes(state->interface),
- phy_speed_to_str(state->speed),
- phy_duplex_to_str(state->duplex),
- __ETHTOOL_LINK_MODE_MASK_NBITS, state->advertising,
- state->pause, state->link, state->an_enabled);
-
- pl->ops->mac_config(pl->netdev, pl->link_an_mode, state);
+ phylink_dbg(pl,
+ "%s: mode=%s/%s/%s/%s adv=%*pb pause=%02x link=%u an=%u\n",
+ __func__, phylink_an_mode_str(pl->link_an_mode),
+ phy_modes(state->interface),
+ phy_speed_to_str(state->speed),
+ phy_duplex_to_str(state->duplex),
+ __ETHTOOL_LINK_MODE_MASK_NBITS, state->advertising,
+ state->pause, state->link, state->an_enabled);
+
+ pl->ops->mac_config(pl->config, pl->link_an_mode, state);
}
static void phylink_mac_config_up(struct phylink *pl,
@@ -317,12 +337,11 @@ static void phylink_mac_an_restart(struct phylink *pl)
{
if (pl->link_config.an_enabled &&
phy_interface_mode_is_8023z(pl->link_config.interface))
- pl->ops->mac_an_restart(pl->netdev);
+ pl->ops->mac_an_restart(pl->config);
}
static int phylink_get_mac_state(struct phylink *pl, struct phylink_link_state *state)
{
- struct net_device *ndev = pl->netdev;
linkmode_copy(state->advertising, pl->link_config.advertising);
linkmode_zero(state->lp_advertising);
@@ -334,7 +353,7 @@ static int phylink_get_mac_state(struct phylink *pl, struct phylink_link_state *
state->an_complete = 0;
state->link = 1;
- return pl->ops->mac_link_state(ndev, state);
+ return pl->ops->mac_link_state(pl->config, state);
}
/* The fixed state is... fixed except for the link state,
@@ -399,11 +418,43 @@ static const char *phylink_pause_to_str(int pause)
}
}
+static void phylink_mac_link_up(struct phylink *pl,
+ struct phylink_link_state link_state)
+{
+ struct net_device *ndev = pl->netdev;
+
+ pl->cur_interface = link_state.interface;
+ pl->ops->mac_link_up(pl->config, pl->link_an_mode,
+ pl->phy_state.interface,
+ pl->phydev);
+
+ if (ndev)
+ netif_carrier_on(ndev);
+
+ phylink_info(pl,
+ "Link is Up - %s/%s - flow control %s\n",
+ phy_speed_to_str(link_state.speed),
+ phy_duplex_to_str(link_state.duplex),
+ phylink_pause_to_str(link_state.pause));
+}
+
+static void phylink_mac_link_down(struct phylink *pl)
+{
+ struct net_device *ndev = pl->netdev;
+
+ if (ndev)
+ netif_carrier_off(ndev);
+ pl->ops->mac_link_down(pl->config, pl->link_an_mode,
+ pl->cur_interface);
+ phylink_info(pl, "Link is Down\n");
+}
+
static void phylink_resolve(struct work_struct *w)
{
struct phylink *pl = container_of(w, struct phylink, resolve);
struct phylink_link_state link_state;
struct net_device *ndev = pl->netdev;
+ int link_changed;
mutex_lock(&pl->state_mutex);
if (pl->phylink_disable_state) {
@@ -446,25 +497,17 @@ static void phylink_resolve(struct work_struct *w)
}
}
- if (link_state.link != netif_carrier_ok(ndev)) {
- if (!link_state.link) {
- netif_carrier_off(ndev);
- pl->ops->mac_link_down(ndev, pl->link_an_mode,
- pl->cur_interface);
- netdev_info(ndev, "Link is Down\n");
- } else {
- pl->cur_interface = link_state.interface;
- pl->ops->mac_link_up(ndev, pl->link_an_mode,
- pl->cur_interface, pl->phydev);
-
- netif_carrier_on(ndev);
-
- netdev_info(ndev,
- "Link is Up - %s/%s - flow control %s\n",
- phy_speed_to_str(link_state.speed),
- phy_duplex_to_str(link_state.duplex),
- phylink_pause_to_str(link_state.pause));
- }
+ if (pl->netdev)
+ link_changed = (link_state.link != netif_carrier_ok(ndev));
+ else
+ link_changed = (link_state.link != pl->old_link_state);
+
+ if (link_changed) {
+ pl->old_link_state = link_state.link;
+ if (!link_state.link)
+ phylink_mac_link_down(pl);
+ else
+ phylink_mac_link_up(pl, link_state);
}
if (!link_state.link && pl->mac_link_dropped) {
pl->mac_link_dropped = false;
@@ -516,8 +559,8 @@ static int phylink_register_sfp(struct phylink *pl,
if (ret == -ENOENT)
return 0;
- netdev_err(pl->netdev, "unable to parse \"sfp\" node: %d\n",
- ret);
+ phylink_err(pl, "unable to parse \"sfp\" node: %d\n",
+ ret);
return ret;
}
@@ -543,7 +586,7 @@ static int phylink_register_sfp(struct phylink *pl,
* Returns a pointer to a &struct phylink, or an error-pointer value. Users
* must use IS_ERR() to check for errors from this function.
*/
-struct phylink *phylink_create(struct net_device *ndev,
+struct phylink *phylink_create(struct phylink_config *config,
struct fwnode_handle *fwnode,
phy_interface_t iface,
const struct phylink_mac_ops *ops)
@@ -557,7 +600,17 @@ struct phylink *phylink_create(struct net_device *ndev,
mutex_init(&pl->state_mutex);
INIT_WORK(&pl->resolve, phylink_resolve);
- pl->netdev = ndev;
+
+ pl->config = config;
+ if (config->type == PHYLINK_NETDEV) {
+ pl->netdev = to_net_dev(config->dev);
+ } else if (config->type == PHYLINK_DEV) {
+ pl->dev = config->dev;
+ } else {
+ kfree(pl);
+ return ERR_PTR(-EINVAL);
+ }
+
pl->phy_state.interface = iface;
pl->link_interface = iface;
if (iface == PHY_INTERFACE_MODE_MOCA)
@@ -639,10 +692,10 @@ static void phylink_phy_change(struct phy_device *phydev, bool up,
phylink_run_resolve(pl);
- netdev_dbg(pl->netdev, "phy link %s %s/%s/%s\n", up ? "up" : "down",
- phy_modes(phydev->interface),
- phy_speed_to_str(phydev->speed),
- phy_duplex_to_str(phydev->duplex));
+ phylink_dbg(pl, "phy link %s %s/%s/%s\n", up ? "up" : "down",
+ phy_modes(phydev->interface),
+ phy_speed_to_str(phydev->speed),
+ phy_duplex_to_str(phydev->duplex));
}
static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
@@ -675,9 +728,9 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
phy->phylink = pl;
phy->phy_link_change = phylink_phy_change;
- netdev_info(pl->netdev,
- "PHY [%s] driver [%s]\n", dev_name(&phy->mdio.dev),
- phy->drv->name);
+ phylink_info(pl,
+ "PHY [%s] driver [%s]\n", dev_name(&phy->mdio.dev),
+ phy->drv->name);
mutex_lock(&phy->lock);
mutex_lock(&pl->state_mutex);
@@ -690,10 +743,10 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
mutex_unlock(&pl->state_mutex);
mutex_unlock(&phy->lock);
- netdev_dbg(pl->netdev,
- "phy: setting supported %*pb advertising %*pb\n",
- __ETHTOOL_LINK_MODE_MASK_NBITS, pl->supported,
- __ETHTOOL_LINK_MODE_MASK_NBITS, phy->advertising);
+ phylink_dbg(pl,
+ "phy: setting supported %*pb advertising %*pb\n",
+ __ETHTOOL_LINK_MODE_MASK_NBITS, pl->supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS, phy->advertising);
if (phy_interrupt_is_valid(phy))
phy_request_interrupt(phy);
@@ -871,7 +924,7 @@ void phylink_mac_change(struct phylink *pl, bool up)
if (!up)
pl->mac_link_dropped = true;
phylink_run_resolve(pl);
- netdev_dbg(pl->netdev, "mac link %s\n", up ? "up" : "down");
+ phylink_dbg(pl, "mac link %s\n", up ? "up" : "down");
}
EXPORT_SYMBOL_GPL(phylink_mac_change);
@@ -887,12 +940,13 @@ void phylink_start(struct phylink *pl)
{
ASSERT_RTNL();
- netdev_info(pl->netdev, "configuring for %s/%s link mode\n",
- phylink_an_mode_str(pl->link_an_mode),
- phy_modes(pl->link_config.interface));
+ phylink_info(pl, "configuring for %s/%s link mode\n",
+ phylink_an_mode_str(pl->link_an_mode),
+ phy_modes(pl->link_config.interface));
/* Always set the carrier off */
- netif_carrier_off(pl->netdev);
+ if (pl->netdev)
+ netif_carrier_off(pl->netdev);
/* Apply the link configuration to the MAC when starting. This allows
* a fixed-link to start with the correct parameters, and also
@@ -1237,7 +1291,8 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl,
switch (pl->link_an_mode) {
case MLO_AN_PHY:
/* Silently mark the carrier down, and then trigger a resolve */
- netif_carrier_off(pl->netdev);
+ if (pl->netdev)
+ netif_carrier_off(pl->netdev);
phylink_run_resolve(pl);
break;
@@ -1598,33 +1653,33 @@ static int phylink_sfp_module_insert(void *upstream,
/* Ignore errors if we're expecting a PHY to attach later */
ret = phylink_validate(pl, support, &config);
if (ret) {
- netdev_err(pl->netdev, "validation with support %*pb failed: %d\n",
- __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+ phylink_err(pl, "validation with support %*pb failed: %d\n",
+ __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
return ret;
}
iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
if (iface == PHY_INTERFACE_MODE_NA) {
- netdev_err(pl->netdev,
- "selection of interface failed, advertisement %*pb\n",
- __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
+ phylink_err(pl,
+ "selection of interface failed, advertisement %*pb\n",
+ __ETHTOOL_LINK_MODE_MASK_NBITS, config.advertising);
return -EINVAL;
}
config.interface = iface;
ret = phylink_validate(pl, support, &config);
if (ret) {
- netdev_err(pl->netdev, "validation of %s/%s with support %*pb failed: %d\n",
- phylink_an_mode_str(MLO_AN_INBAND),
- phy_modes(config.interface),
- __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
+ phylink_err(pl, "validation of %s/%s with support %*pb failed: %d\n",
+ phylink_an_mode_str(MLO_AN_INBAND),
+ phy_modes(config.interface),
+ __ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
return ret;
}
- netdev_dbg(pl->netdev, "requesting link mode %s/%s with support %*pb\n",
- phylink_an_mode_str(MLO_AN_INBAND),
- phy_modes(config.interface),
- __ETHTOOL_LINK_MODE_MASK_NBITS, support);
+ phylink_dbg(pl, "requesting link mode %s/%s with support %*pb\n",
+ phylink_an_mode_str(MLO_AN_INBAND),
+ phy_modes(config.interface),
+ __ETHTOOL_LINK_MODE_MASK_NBITS, support);
if (phy_interface_mode_is_8023z(iface) && pl->phydev)
return -EINVAL;
@@ -1643,9 +1698,9 @@ static int phylink_sfp_module_insert(void *upstream,
changed = true;
- netdev_info(pl->netdev, "switched to %s/%s link mode\n",
- phylink_an_mode_str(MLO_AN_INBAND),
- phy_modes(config.interface));
+ phylink_info(pl, "switched to %s/%s link mode\n",
+ phylink_an_mode_str(MLO_AN_INBAND),
+ phy_modes(config.interface));
}
pl->link_port = port;
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index d4635c2178d1..554acc869c25 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <linux/acpi.h>
#include <linux/ctype.h>
#include <linux/delay.h>
#include <linux/gpio/consumer.h>
@@ -1782,6 +1783,7 @@ static void sfp_cleanup(void *data)
static int sfp_probe(struct platform_device *pdev)
{
const struct sff_data *sff;
+ struct i2c_adapter *i2c;
struct sfp *sfp;
bool poll = false;
int irq, err, i;
@@ -1801,7 +1803,6 @@ static int sfp_probe(struct platform_device *pdev)
if (pdev->dev.of_node) {
struct device_node *node = pdev->dev.of_node;
const struct of_device_id *id;
- struct i2c_adapter *i2c;
struct device_node *np;
id = of_match_node(sfp_of_match, node);
@@ -1818,14 +1819,32 @@ static int sfp_probe(struct platform_device *pdev)
i2c = of_find_i2c_adapter_by_node(np);
of_node_put(np);
- if (!i2c)
- return -EPROBE_DEFER;
-
- err = sfp_i2c_configure(sfp, i2c);
- if (err < 0) {
- i2c_put_adapter(i2c);
- return err;
+ } else if (has_acpi_companion(&pdev->dev)) {
+ struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+ struct fwnode_handle *fw = acpi_fwnode_handle(adev);
+ struct fwnode_reference_args args;
+ struct acpi_handle *acpi_handle;
+ int ret;
+
+ ret = acpi_node_get_property_reference(fw, "i2c-bus", 0, &args);
+ if (ACPI_FAILURE(ret) || !is_acpi_device_node(args.fwnode)) {
+ dev_err(&pdev->dev, "missing 'i2c-bus' property\n");
+ return -ENODEV;
}
+
+ acpi_handle = ACPI_HANDLE_FWNODE(args.fwnode);
+ i2c = i2c_acpi_find_adapter_by_handle(acpi_handle);
+ } else {
+ return -EINVAL;
+ }
+
+ if (!i2c)
+ return -EPROBE_DEFER;
+
+ err = sfp_i2c_configure(sfp, i2c);
+ if (err < 0) {
+ i2c_put_adapter(i2c);
+ return err;
}
for (i = 0; i < GPIO_MAX; i++)
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 5b07235497c6..47e40859960b 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2218,16 +2218,21 @@ static void qedf_simd_int_handler(void *cookie)
static void qedf_sync_free_irqs(struct qedf_ctx *qedf)
{
int i;
+ u16 vector_idx = 0;
+ u32 vector;
if (qedf->int_info.msix_cnt) {
for (i = 0; i < qedf->int_info.used_cnt; i++) {
- synchronize_irq(qedf->int_info.msix[i].vector);
- irq_set_affinity_hint(qedf->int_info.msix[i].vector,
- NULL);
- irq_set_affinity_notifier(qedf->int_info.msix[i].vector,
- NULL);
- free_irq(qedf->int_info.msix[i].vector,
- &qedf->fp_array[i]);
+ vector_idx = i * qedf->dev_info.common.num_hwfns +
+ qed_ops->common->get_affin_hwfn_idx(qedf->cdev);
+ QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC,
+ "Freeing IRQ #%d vector_idx=%d.\n",
+ i, vector_idx);
+ vector = qedf->int_info.msix[vector_idx].vector;
+ synchronize_irq(vector);
+ irq_set_affinity_hint(vector, NULL);
+ irq_set_affinity_notifier(vector, NULL);
+ free_irq(vector, &qedf->fp_array[i]);
}
} else
qed_ops->common->simd_handler_clean(qedf->cdev,
@@ -2240,11 +2245,19 @@ static void qedf_sync_free_irqs(struct qedf_ctx *qedf)
static int qedf_request_msix_irq(struct qedf_ctx *qedf)
{
int i, rc, cpu;
+ u16 vector_idx = 0;
+ u32 vector;
cpu = cpumask_first(cpu_online_mask);
for (i = 0; i < qedf->num_queues; i++) {
- rc = request_irq(qedf->int_info.msix[i].vector,
- qedf_msix_handler, 0, "qedf", &qedf->fp_array[i]);
+ vector_idx = i * qedf->dev_info.common.num_hwfns +
+ qed_ops->common->get_affin_hwfn_idx(qedf->cdev);
+ QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC,
+ "Requesting IRQ #%d vector_idx=%d.\n",
+ i, vector_idx);
+ vector = qedf->int_info.msix[vector_idx].vector;
+ rc = request_irq(vector, qedf_msix_handler, 0, "qedf",
+ &qedf->fp_array[i]);
if (rc) {
QEDF_WARN(&(qedf->dbg_ctx), "request_irq failed.\n");
@@ -2253,8 +2266,7 @@ static int qedf_request_msix_irq(struct qedf_ctx *qedf)
}
qedf->int_info.used_cnt++;
- rc = irq_set_affinity_hint(qedf->int_info.msix[i].vector,
- get_cpu_mask(cpu));
+ rc = irq_set_affinity_hint(vector, get_cpu_mask(cpu));
cpu = cpumask_next(cpu, cpu_online_mask);
}
@@ -3211,6 +3223,11 @@ static int __qedf_probe(struct pci_dev *pdev, int mode)
goto err1;
}
+ QEDF_INFO(&qedf->dbg_ctx, QEDF_LOG_DISC,
+ "dev_info: num_hwfns=%d affin_hwfn_idx=%d.\n",
+ qedf->dev_info.common.num_hwfns,
+ qed_ops->common->get_affin_hwfn_idx(qedf->cdev));
+
/* queue allocation code should come here
* order should be
* slowpath_start
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index e5db9a9954dc..1a62ab0867ef 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -1313,13 +1313,20 @@ static void qedi_simd_int_handler(void *cookie)
static void qedi_sync_free_irqs(struct qedi_ctx *qedi)
{
int i;
+ u16 idx;
if (qedi->int_info.msix_cnt) {
for (i = 0; i < qedi->int_info.used_cnt; i++) {
- synchronize_irq(qedi->int_info.msix[i].vector);
- irq_set_affinity_hint(qedi->int_info.msix[i].vector,
+ idx = i * qedi->dev_info.common.num_hwfns +
+ qedi_ops->common->get_affin_hwfn_idx(qedi->cdev);
+
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+ "Freeing IRQ #%d vector_idx=%d.\n", i, idx);
+
+ synchronize_irq(qedi->int_info.msix[idx].vector);
+ irq_set_affinity_hint(qedi->int_info.msix[idx].vector,
NULL);
- free_irq(qedi->int_info.msix[i].vector,
+ free_irq(qedi->int_info.msix[idx].vector,
&qedi->fp_array[i]);
}
} else {
@@ -1334,20 +1341,28 @@ static void qedi_sync_free_irqs(struct qedi_ctx *qedi)
static int qedi_request_msix_irq(struct qedi_ctx *qedi)
{
int i, rc, cpu;
+ u16 idx;
cpu = cpumask_first(cpu_online_mask);
- for (i = 0; i < qedi->int_info.msix_cnt; i++) {
- rc = request_irq(qedi->int_info.msix[i].vector,
+ for (i = 0; i < MIN_NUM_CPUS_MSIX(qedi); i++) {
+ idx = i * qedi->dev_info.common.num_hwfns +
+ qedi_ops->common->get_affin_hwfn_idx(qedi->cdev);
+
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+ "dev_info: num_hwfns=%d affin_hwfn_idx=%d.\n",
+ qedi->dev_info.common.num_hwfns,
+ qedi_ops->common->get_affin_hwfn_idx(qedi->cdev));
+
+ rc = request_irq(qedi->int_info.msix[idx].vector,
qedi_msix_handler, 0, "qedi",
&qedi->fp_array[i]);
-
if (rc) {
QEDI_WARN(&qedi->dbg_ctx, "request_irq failed.\n");
qedi_sync_free_irqs(qedi);
return rc;
}
qedi->int_info.used_cnt++;
- rc = irq_set_affinity_hint(qedi->int_info.msix[i].vector,
+ rc = irq_set_affinity_hint(qedi->int_info.msix[idx].vector,
get_cpu_mask(cpu));
cpu = cpumask_next(cpu, cpu_online_mask);
}
@@ -2415,6 +2430,11 @@ static int __qedi_probe(struct pci_dev *pdev, int mode)
if (rc)
goto free_host;
+ QEDI_INFO(&qedi->dbg_ctx, QEDI_LOG_INFO,
+ "dev_info: num_hwfns=%d affin_hwfn_idx=%d.\n",
+ qedi->dev_info.common.num_hwfns,
+ qedi_ops->common->get_affin_hwfn_idx(qedi->cdev));
+
if (mode != QEDI_MODE_RECOVERY) {
rc = qedi_set_iscsi_pf_param(qedi);
if (rc) {
diff --git a/include/dt-bindings/net/ti-dp83867.h b/include/dt-bindings/net/ti-dp83867.h
index 7b1656427cbe..192b79439eb7 100644
--- a/include/dt-bindings/net/ti-dp83867.h
+++ b/include/dt-bindings/net/ti-dp83867.h
@@ -56,4 +56,6 @@
#define DP83867_CLK_O_SEL_CHN_C_TCLK 0xA
#define DP83867_CLK_O_SEL_CHN_D_TCLK 0xB
#define DP83867_CLK_O_SEL_REF_CLK 0xC
+/* Special flag to indicate clock should be off */
+#define DP83867_CLK_O_SEL_OFF 0xFFFFFFFF
#endif
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 1308126fc384..e982b8913b73 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -14,6 +14,7 @@
#ifndef _LINUX_I2C_H
#define _LINUX_I2C_H
+#include <linux/acpi.h> /* for acpi_handle */
#include <linux/mod_devicetable.h>
#include <linux/device.h> /* for struct device */
#include <linux/sched.h> /* for completion */
@@ -981,6 +982,7 @@ bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
u32 i2c_acpi_find_bus_speed(struct device *dev);
struct i2c_client *i2c_acpi_new_device(struct device *dev, int index,
struct i2c_board_info *info);
+struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle);
#else
static inline bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares,
struct acpi_resource_i2c_serialbus **i2c)
@@ -996,6 +998,10 @@ static inline struct i2c_client *i2c_acpi_new_device(struct device *dev,
{
return NULL;
}
+static inline struct i2c_adapter *i2c_acpi_find_adapter_by_handle(acpi_handle handle)
+{
+ return NULL;
+}
#endif /* CONFIG_ACPI */
#endif /* _LINUX_I2C_H */
diff --git a/include/linux/if_rmnet.h b/include/linux/if_rmnet.h
new file mode 100644
index 000000000000..b4f5403383fc
--- /dev/null
+++ b/include/linux/if_rmnet.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ * Copyright (c) 2013-2019, The Linux Foundation. All rights reserved.
+ */
+
+#ifndef _LINUX_IF_RMNET_H_
+#define _LINUX_IF_RMNET_H_
+
+struct rmnet_map_header {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 pad_len:6;
+ u8 reserved_bit:1;
+ u8 cd_bit:1;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ u8 cd_bit:1;
+ u8 reserved_bit:1;
+ u8 pad_len:6;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ u8 mux_id;
+ __be16 pkt_len;
+} __aligned(1);
+
+struct rmnet_map_dl_csum_trailer {
+ u8 reserved1;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u8 valid:1;
+ u8 reserved2:7;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ u8 reserved2:7;
+ u8 valid:1;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ u16 csum_start_offset;
+ u16 csum_length;
+ __be16 csum_value;
+} __aligned(1);
+
+struct rmnet_map_ul_csum_header {
+ __be16 csum_start_offset;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ u16 csum_insert_offset:14;
+ u16 udp_ip4_ind:1;
+ u16 csum_enabled:1;
+#elif defined (__BIG_ENDIAN_BITFIELD)
+ u16 csum_enabled:1;
+ u16 udp_ip4_ind:1;
+ u16 csum_insert_offset:14;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+} __aligned(1);
+
+#endif /* !(_LINUX_IF_RMNET_H_) */
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 9c94b2ea789c..6649cb78de4a 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -65,8 +65,8 @@ struct ip_mc_socklist {
struct ip_sf_list {
struct ip_sf_list *sf_next;
- __be32 sf_inaddr;
unsigned long sf_count[2]; /* include/exclude counts */
+ __be32 sf_inaddr;
unsigned char sf_gsresp; /* include in g & s response? */
unsigned char sf_oldin; /* change state */
unsigned char sf_crcount; /* retrans. left to send */
diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h
index 12113e502656..3a3dc4b1f0e7 100644
--- a/include/linux/netfilter_ipv6.h
+++ b/include/linux/netfilter_ipv6.h
@@ -19,6 +19,7 @@ struct ip6_rt_info {
};
struct nf_queue_entry;
+struct nf_ct_bridge_frag_data;
/*
* Hook functions for ipv6 to allow xt_* modules to be built-in even
@@ -39,6 +40,15 @@ struct nf_ipv6_ops {
int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *));
int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
+#if IS_MODULE(CONFIG_IPV6)
+ int (*br_defrag)(struct net *net, struct sk_buff *skb, u32 user);
+ int (*br_fragment)(struct net *net, struct sock *sk,
+ struct sk_buff *skb,
+ struct nf_ct_bridge_frag_data *data,
+ int (*output)(struct net *, struct sock *sk,
+ const struct nf_ct_bridge_frag_data *data,
+ struct sk_buff *));
+#endif
};
#ifdef CONFIG_NETFILTER
@@ -86,6 +96,48 @@ static inline int nf_ip6_route(struct net *net, struct dst_entry **dst,
#endif
}
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+
+static inline int nf_ipv6_br_defrag(struct net *net, struct sk_buff *skb,
+ u32 user)
+{
+#if IS_MODULE(CONFIG_IPV6)
+ const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+ if (!v6_ops)
+ return 1;
+
+ return v6_ops->br_defrag(net, skb, user);
+#else
+ return nf_ct_frag6_gather(net, skb, user);
+#endif
+}
+
+int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ struct nf_ct_bridge_frag_data *data,
+ int (*output)(struct net *, struct sock *sk,
+ const struct nf_ct_bridge_frag_data *data,
+ struct sk_buff *));
+
+static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk,
+ struct sk_buff *skb,
+ struct nf_ct_bridge_frag_data *data,
+ int (*output)(struct net *, struct sock *sk,
+ const struct nf_ct_bridge_frag_data *data,
+ struct sk_buff *))
+{
+#if IS_MODULE(CONFIG_IPV6)
+ const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+ if (!v6_ops)
+ return 1;
+
+ return v6_ops->br_fragment(net, sk, skb, data, output);
+#else
+ return br_ip6_fragment(net, sk, skb, data, output);
+#endif
+}
+
int ip6_route_me_harder(struct net *net, struct sk_buff *skb);
static inline int nf_ip6_route_me_harder(struct net *net, struct sk_buff *skb)
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 073fb151b5a9..dc4b51060ebc 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -103,6 +103,7 @@ typedef enum {
PHY_INTERFACE_MODE_XAUI,
/* 10GBASE-KR, XFI, SFI - single lane 10G Serdes */
PHY_INTERFACE_MODE_10GKR,
+ PHY_INTERFACE_MODE_USXGMII,
PHY_INTERFACE_MODE_MAX,
} phy_interface_t;
@@ -178,6 +179,8 @@ static inline const char *phy_modes(phy_interface_t interface)
return "xaui";
case PHY_INTERFACE_MODE_10GKR:
return "10gbase-kr";
+ case PHY_INTERFACE_MODE_USXGMII:
+ return "usxgmii";
default:
return "unknown";
}
@@ -185,7 +188,6 @@ static inline const char *phy_modes(phy_interface_t interface)
#define PHY_INIT_TIMEOUT 100000
-#define PHY_STATE_TIME 1
#define PHY_FORCE_TIMEOUT 10
#define PHY_MAX_ADDR 32
@@ -534,6 +536,9 @@ struct phy_driver {
*/
int (*did_interrupt)(struct phy_device *phydev);
+ /* Override default interrupt handling */
+ int (*handle_interrupt)(struct phy_device *phydev);
+
/* Clears up any memory if needed */
void (*remove)(struct phy_device *phydev);
@@ -1134,6 +1139,7 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner);
int phy_drivers_register(struct phy_driver *new_driver, int n,
struct module *owner);
void phy_state_machine(struct work_struct *work);
+void phy_queue_state_machine(struct phy_device *phydev, unsigned long jiffies);
void phy_mac_interrupt(struct phy_device *phydev);
void phy_start_machine(struct phy_device *phydev);
void phy_stop_machine(struct phy_device *phydev);
@@ -1144,6 +1150,7 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev,
const struct ethtool_link_ksettings *cmd);
int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd);
void phy_request_interrupt(struct phy_device *phydev);
+void phy_free_interrupt(struct phy_device *phydev);
void phy_print_status(struct phy_device *phydev);
int phy_set_max_speed(struct phy_device *phydev, u32 max_speed);
void phy_remove_link_mode(struct phy_device *phydev, u32 link_mode);
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 6411c624f63a..0f6f65bb9d44 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -54,6 +54,21 @@ struct phylink_link_state {
unsigned int an_complete:1;
};
+enum phylink_op_type {
+ PHYLINK_NETDEV = 0,
+ PHYLINK_DEV,
+};
+
+/**
+ * struct phylink_config - PHYLINK configuration structure
+ * @dev: a pointer to a struct device associated with the MAC
+ * @type: operation type of PHYLINK instance
+ */
+struct phylink_config {
+ struct device *dev;
+ enum phylink_op_type type;
+};
+
/**
* struct phylink_mac_ops - MAC operations structure.
* @validate: Validate and update the link configuration.
@@ -66,16 +81,17 @@ struct phylink_link_state {
* The individual methods are described more fully below.
*/
struct phylink_mac_ops {
- void (*validate)(struct net_device *ndev, unsigned long *supported,
+ void (*validate)(struct phylink_config *config,
+ unsigned long *supported,
struct phylink_link_state *state);
- int (*mac_link_state)(struct net_device *ndev,
+ int (*mac_link_state)(struct phylink_config *config,
struct phylink_link_state *state);
- void (*mac_config)(struct net_device *ndev, unsigned int mode,
+ void (*mac_config)(struct phylink_config *config, unsigned int mode,
const struct phylink_link_state *state);
- void (*mac_an_restart)(struct net_device *ndev);
- void (*mac_link_down)(struct net_device *ndev, unsigned int mode,
+ void (*mac_an_restart)(struct phylink_config *config);
+ void (*mac_link_down)(struct phylink_config *config, unsigned int mode,
phy_interface_t interface);
- void (*mac_link_up)(struct net_device *ndev, unsigned int mode,
+ void (*mac_link_up)(struct phylink_config *config, unsigned int mode,
phy_interface_t interface,
struct phy_device *phy);
};
@@ -83,7 +99,7 @@ struct phylink_mac_ops {
#if 0 /* For kernel-doc purposes only. */
/**
* validate - Validate and update the link configuration
- * @ndev: a pointer to a &struct net_device for the MAC.
+ * @config: a pointer to a &struct phylink_config.
* @supported: ethtool bitmask for supported link modes.
* @state: a pointer to a &struct phylink_link_state.
*
@@ -100,12 +116,12 @@ struct phylink_mac_ops {
* based on @state->advertising and/or @state->speed and update
* @state->interface accordingly.
*/
-void validate(struct net_device *ndev, unsigned long *supported,
+void validate(struct phylink_config *config, unsigned long *supported,
struct phylink_link_state *state);
/**
* mac_link_state() - Read the current link state from the hardware
- * @ndev: a pointer to a &struct net_device for the MAC.
+ * @config: a pointer to a &struct phylink_config.
* @state: a pointer to a &struct phylink_link_state.
*
* Read the current link state from the MAC, reporting the current
@@ -114,12 +130,12 @@ void validate(struct net_device *ndev, unsigned long *supported,
* negotiation completion state in @state->an_complete, and link
* up state in @state->link.
*/
-int mac_link_state(struct net_device *ndev,
+int mac_link_state(struct phylink_config *config,
struct phylink_link_state *state);
/**
* mac_config() - configure the MAC for the selected mode and state
- * @ndev: a pointer to a &struct net_device for the MAC.
+ * @config: a pointer to a &struct phylink_config.
* @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND.
* @state: a pointer to a &struct phylink_link_state.
*
@@ -157,18 +173,18 @@ int mac_link_state(struct net_device *ndev,
* down. This "update" behaviour is critical to avoid bouncing the
* link up status.
*/
-void mac_config(struct net_device *ndev, unsigned int mode,
+void mac_config(struct phylink_config *config, unsigned int mode,
const struct phylink_link_state *state);
/**
* mac_an_restart() - restart 802.3z BaseX autonegotiation
- * @ndev: a pointer to a &struct net_device for the MAC.
+ * @config: a pointer to a &struct phylink_config.
*/
-void mac_an_restart(struct net_device *ndev);
+void mac_an_restart(struct phylink_config *config);
/**
* mac_link_down() - take the link down
- * @ndev: a pointer to a &struct net_device for the MAC.
+ * @config: a pointer to a &struct phylink_config.
* @mode: link autonegotiation mode
* @interface: link &typedef phy_interface_t mode
*
@@ -177,12 +193,12 @@ void mac_an_restart(struct net_device *ndev);
* Energy Efficient Ethernet MAC configuration. Interface type
* selection must be done in mac_config().
*/
-void mac_link_down(struct net_device *ndev, unsigned int mode,
+void mac_link_down(struct phylink_config *config, unsigned int mode,
phy_interface_t interface);
/**
* mac_link_up() - allow the link to come up
- * @ndev: a pointer to a &struct net_device for the MAC.
+ * @config: a pointer to a &struct phylink_config.
* @mode: link autonegotiation mode
* @interface: link &typedef phy_interface_t mode
* @phy: any attached phy
@@ -193,13 +209,14 @@ void mac_link_down(struct net_device *ndev, unsigned int mode,
* phy_init_eee() and perform appropriate MAC configuration for EEE.
* Interface type selection must be done in mac_config().
*/
-void mac_link_up(struct net_device *ndev, unsigned int mode,
+void mac_link_up(struct phylink_config *config, unsigned int mode,
phy_interface_t interface,
struct phy_device *phy);
#endif
-struct phylink *phylink_create(struct net_device *, struct fwnode_handle *,
- phy_interface_t iface, const struct phylink_mac_ops *ops);
+struct phylink *phylink_create(struct phylink_config *, struct fwnode_handle *,
+ phy_interface_t iface,
+ const struct phylink_mac_ops *ops);
void phylink_destroy(struct phylink *);
int phylink_connect_phy(struct phylink *, struct phy_device *);
diff --git a/include/linux/platform_data/xilinx-ll-temac.h b/include/linux/platform_data/xilinx-ll-temac.h
index 368530f98176..f4a68136afa6 100644
--- a/include/linux/platform_data/xilinx-ll-temac.h
+++ b/include/linux/platform_data/xilinx-ll-temac.h
@@ -4,6 +4,7 @@
#include <linux/if_ether.h>
#include <linux/phy.h>
+#include <linux/spinlock.h>
struct ll_temac_platform_data {
bool txcsum; /* Enable/disable TX checksum */
@@ -21,7 +22,7 @@ struct ll_temac_platform_data {
* TEMAC IP block, the same mutex should be passed here, as
* they share the same DCR bus bridge.
*/
- struct mutex *indirect_mutex;
+ spinlock_t *indirect_lock;
/* DMA channel control setup */
u8 tx_irq_timeout; /* TX Interrupt Delay Time-out */
u8 tx_irq_count; /* TX Interrupt Coalescing Threshold Count */
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 48841e5dab90..eef02e64b422 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -907,7 +907,8 @@ struct qed_common_ops {
u32 (*sb_release)(struct qed_dev *cdev,
struct qed_sb_info *sb_info,
- u16 sb_id);
+ u16 sb_id,
+ enum qed_sb_type type);
void (*simd_handler_config)(struct qed_dev *cdev,
void *token,
@@ -1123,6 +1124,13 @@ struct qed_common_ops {
*/
int (*read_module_eeprom)(struct qed_dev *cdev,
char *buf, u8 dev_addr, u32 offset, u32 len);
+
+/**
+ * @brief get_affin_hwfn_idx
+ *
+ * @param cdev
+ */
+ u8 (*get_affin_hwfn_idx)(struct qed_dev *cdev);
};
#define MASK_FIELD(_name, _value) \
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index d15f8e4815e3..898f595ea3d6 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -670,6 +670,8 @@ struct qed_rdma_ops {
int (*ll2_set_mac_filter)(struct qed_dev *cdev,
u8 *old_mac_address, u8 *new_mac_address);
+ int (*iwarp_set_engine_affin)(struct qed_dev *cdev, bool b_reset);
+
int (*iwarp_connect)(void *rdma_cxt,
struct qed_iwarp_connect_in *iparams,
struct qed_iwarp_connect_out *oparams);
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 9f8bc06d4136..beb9a9da1699 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -352,37 +352,38 @@ static inline void rht_unlock(struct bucket_table *tbl,
static inline struct rhash_head __rcu *__rht_ptr(
struct rhash_lock_head *const *bkt)
{
- return (struct rhash_head __rcu *)((unsigned long)*bkt & ~BIT(0));
+ return (struct rhash_head __rcu *)
+ ((unsigned long)*bkt & ~BIT(0) ?:
+ (unsigned long)RHT_NULLS_MARKER(bkt));
}
/*
* Where 'bkt' is a bucket and might be locked:
- * rht_ptr() dereferences that pointer and clears the lock bit.
+ * rht_ptr_rcu() dereferences that pointer and clears the lock bit.
+ * rht_ptr() dereferences in a context where the bucket is locked.
* rht_ptr_exclusive() dereferences in a context where exclusive
* access is guaranteed, such as when destroying the table.
*/
+static inline struct rhash_head *rht_ptr_rcu(
+ struct rhash_lock_head *const *bkt)
+{
+ struct rhash_head __rcu *p = __rht_ptr(bkt);
+
+ return rcu_dereference(p);
+}
+
static inline struct rhash_head *rht_ptr(
struct rhash_lock_head *const *bkt,
struct bucket_table *tbl,
unsigned int hash)
{
- struct rhash_head __rcu *p = __rht_ptr(bkt);
-
- if (!p)
- return RHT_NULLS_MARKER(bkt);
-
- return rht_dereference_bucket_rcu(p, tbl, hash);
+ return rht_dereference_bucket(__rht_ptr(bkt), tbl, hash);
}
static inline struct rhash_head *rht_ptr_exclusive(
struct rhash_lock_head *const *bkt)
{
- struct rhash_head __rcu *p = __rht_ptr(bkt);
-
- if (!p)
- return RHT_NULLS_MARKER(bkt);
-
- return rcu_dereference_protected(p, 1);
+ return rcu_dereference_protected(__rht_ptr(bkt), 1);
}
static inline void rht_assign_locked(struct rhash_lock_head **bkt,
@@ -509,7 +510,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl,
*/
#define rht_for_each_rcu(pos, tbl, hash) \
for (({barrier(); }), \
- pos = rht_ptr(rht_bucket(tbl, hash), tbl, hash); \
+ pos = rht_ptr_rcu(rht_bucket(tbl, hash)); \
!rht_is_a_nulls(pos); \
pos = rcu_dereference_raw(pos->next))
@@ -546,8 +547,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl,
*/
#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
rht_for_each_entry_rcu_from(tpos, pos, \
- rht_ptr(rht_bucket(tbl, hash), \
- tbl, hash), \
+ rht_ptr_rcu(rht_bucket(tbl, hash)), \
tbl, hash, member)
/**
@@ -603,7 +603,7 @@ restart:
hash = rht_key_hashfn(ht, tbl, key, params);
bkt = rht_bucket(tbl, hash);
do {
- rht_for_each_rcu_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) {
+ rht_for_each_rcu_from(he, rht_ptr_rcu(bkt), tbl, hash) {
if (params.obj_cmpfn ?
params.obj_cmpfn(&arg, rht_obj(ht, he)) :
rhashtable_compare(&arg, rht_obj(ht, he)))
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 1c4adfb4195a..151eb930d329 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -17,6 +17,7 @@
#include <linux/list.h>
#include <linux/netdevice.h>
#include <linux/spinlock.h>
+#include <linux/workqueue.h>
#include <net/net_namespace.h>
#include <uapi/linux/devlink.h>
@@ -64,6 +65,7 @@ struct devlink_port {
enum devlink_port_type desired_type;
void *type_dev;
struct devlink_port_attrs attrs;
+ struct delayed_work type_warn_dw;
};
struct devlink_sb_pool_info {
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 685294817712..a7f36219904f 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -22,6 +22,7 @@
#include <linux/net_tstamp.h>
#include <linux/phy.h>
#include <linux/platform_data/dsa.h>
+#include <linux/phylink.h>
#include <net/devlink.h>
#include <net/switchdev.h>
@@ -193,6 +194,7 @@ struct dsa_port {
struct net_device *bridge_dev;
struct devlink_port devlink_port;
struct phylink *pl;
+ struct phylink_config pl_config;
struct work_struct xmit_work;
struct sk_buff_head xmit_queue;
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index b473df5b9512..eba8465e1d86 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -103,6 +103,7 @@ struct fib_rule_notifier_info {
};
#define FRA_GENERIC_POLICY \
+ [FRA_UNSPEC] = { .strict_start_type = FRA_DPORT_RANGE + 1 }, \
[FRA_IIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
[FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
[FRA_PRIORITY] = { .type = NLA_U32 }, \
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 378904ee9129..e91b79ad4e4a 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -3,19 +3,24 @@
#define __NET_FRAG_H__
#include <linux/rhashtable-types.h>
+#include <linux/completion.h>
-struct netns_frags {
+/* Per netns frag queues directory */
+struct fqdir {
/* sysctls */
long high_thresh;
long low_thresh;
int timeout;
int max_dist;
struct inet_frags *f;
+ struct net *net;
+ bool dead;
struct rhashtable rhashtable ____cacheline_aligned_in_smp;
/* Keep atomic mem on separate cachelines in structs that include it */
atomic_long_t mem ____cacheline_aligned_in_smp;
+ struct rcu_work destroy_rwork;
};
/**
@@ -24,11 +29,13 @@ struct netns_frags {
* @INET_FRAG_FIRST_IN: first fragment has arrived
* @INET_FRAG_LAST_IN: final fragment has arrived
* @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction
+ * @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable
*/
enum {
INET_FRAG_FIRST_IN = BIT(0),
INET_FRAG_LAST_IN = BIT(1),
INET_FRAG_COMPLETE = BIT(2),
+ INET_FRAG_HASH_DEAD = BIT(3),
};
struct frag_v4_compare_key {
@@ -64,7 +71,7 @@ struct frag_v6_compare_key {
* @meat: length of received fragments so far
* @flags: fragment queue flags
* @max_size: maximum received fragment size
- * @net: namespace that this frag belongs to
+ * @fqdir: pointer to struct fqdir
* @rcu: rcu head for freeing deferall
*/
struct inet_frag_queue {
@@ -84,7 +91,7 @@ struct inet_frag_queue {
int meat;
__u8 flags;
u16 max_size;
- struct netns_frags *net;
+ struct fqdir *fqdir;
struct rcu_head rcu;
};
@@ -98,21 +105,19 @@ struct inet_frags {
struct kmem_cache *frags_cachep;
const char *frags_cache_name;
struct rhashtable_params rhash_params;
+ refcount_t refcnt;
+ struct completion completion;
};
int inet_frags_init(struct inet_frags *);
void inet_frags_fini(struct inet_frags *);
-static inline int inet_frags_init_net(struct netns_frags *nf)
-{
- atomic_long_set(&nf->mem, 0);
- return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
-}
-void inet_frags_exit_net(struct netns_frags *nf);
+int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net);
+void fqdir_exit(struct fqdir *fqdir);
void inet_frag_kill(struct inet_frag_queue *q);
void inet_frag_destroy(struct inet_frag_queue *q);
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
+struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key);
/* Free all skbs in the queue; return the sum of their truesizes. */
unsigned int inet_frag_rbtree_purge(struct rb_root *root);
@@ -125,19 +130,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
/* Memory Tracking Functions. */
-static inline long frag_mem_limit(const struct netns_frags *nf)
+static inline long frag_mem_limit(const struct fqdir *fqdir)
{
- return atomic_long_read(&nf->mem);
+ return atomic_long_read(&fqdir->mem);
}
-static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
+static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val)
{
- atomic_long_sub(val, &nf->mem);
+ atomic_long_sub(val, &fqdir->mem);
}
-static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
+static inline void add_frag_mem_limit(struct fqdir *fqdir, long val)
{
- atomic_long_add(val, &nf->mem);
+ atomic_long_add(val, &fqdir->mem);
}
/* RFC 3168 support :
diff --git a/include/net/ip.h b/include/net/ip.h
index 2d3cce7c3e8a..029cc3fd26bd 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -165,6 +165,45 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *));
+
+struct ip_fraglist_iter {
+ struct sk_buff *frag_list;
+ struct sk_buff *frag;
+ struct iphdr *iph;
+ int offset;
+ unsigned int hlen;
+};
+
+void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph,
+ unsigned int hlen, struct ip_fraglist_iter *iter);
+void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter);
+
+static inline struct sk_buff *ip_fraglist_next(struct ip_fraglist_iter *iter)
+{
+ struct sk_buff *skb = iter->frag;
+
+ iter->frag = skb->next;
+ skb_mark_not_on_list(skb);
+
+ return skb;
+}
+
+struct ip_frag_state {
+ struct iphdr *iph;
+ unsigned int hlen;
+ unsigned int ll_rs;
+ unsigned int mtu;
+ unsigned int left;
+ int offset;
+ int ptr;
+ __be16 not_last_frag;
+};
+
+void ip_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int ll_rs,
+ unsigned int mtu, struct ip_frag_state *state);
+struct sk_buff *ip_frag_next(struct sk_buff *skb,
+ struct ip_frag_state *state);
+
void ip_send_check(struct iphdr *ip);
int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 525f701653ca..ebe5d65f97e0 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -131,6 +131,9 @@ struct fib6_nh {
#ifdef CONFIG_IPV6_ROUTER_PREF
unsigned long last_probe;
#endif
+
+ struct rt6_info * __percpu *rt6i_pcpu;
+ struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
};
struct fib6_info {
@@ -156,22 +159,18 @@ struct fib6_info {
struct rt6key fib6_src;
struct rt6key fib6_prefsrc;
- struct rt6_info * __percpu *rt6i_pcpu;
- struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
-
u32 fib6_metric;
u8 fib6_protocol;
u8 fib6_type;
- u8 exception_bucket_flushed:1,
- should_flush:1,
+ u8 should_flush:1,
dst_nocount:1,
dst_nopolicy:1,
dst_host:1,
fib6_destroying:1,
- unused:2;
+ unused:3;
- struct fib6_nh fib6_nh;
struct rcu_head rcu;
+ struct fib6_nh fib6_nh[0];
};
struct rt6_info {
@@ -281,7 +280,7 @@ static inline void ip6_rt_put(struct rt6_info *rt)
dst_release(&rt->dst);
}
-struct fib6_info *fib6_info_alloc(gfp_t gfp_flags);
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh);
void fib6_info_destroy_rcu(struct rcu_head *head);
static inline void fib6_info_hold(struct fib6_info *f6i)
@@ -444,7 +443,7 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr)
static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
{
- return f6i->fib6_nh.fib_nh_dev;
+ return f6i->fib6_nh->fib_nh_dev;
}
int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
@@ -452,6 +451,12 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
struct netlink_ext_ack *extack);
void fib6_nh_release(struct fib6_nh *fib6_nh);
+int call_fib6_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct fib6_info *rt,
+ struct netlink_ext_ack *extack);
+void fib6_rt_update(struct net *net, struct fib6_info *rt,
+ struct nl_info *info);
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int flags);
@@ -485,6 +490,7 @@ int fib6_tables_dump(struct net *net, struct notifier_block *nb);
void fib6_update_sernum(struct net *net, struct fib6_info *rt);
void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt);
+void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i);
void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val);
static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 4790beaa86e0..a6ce6ea856b9 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -70,7 +70,7 @@ static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i)
{
/* the RTF_ADDRCONF flag filters out RA's */
return !(f6i->fib6_flags & RTF_ADDRCONF) &&
- f6i->fib6_nh.fib_nh_gw_family;
+ f6i->fib6_nh->fib_nh_gw_family;
}
void ip6_route_input(struct sk_buff *skb);
@@ -275,7 +275,7 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt,
static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b)
{
- struct fib6_nh *nha = &a->fib6_nh, *nhb = &b->fib6_nh;
+ struct fib6_nh *nha = a->fib6_nh, *nhb = b->fib6_nh;
return nha->fib_nh_dev == nhb->fib_nh_dev &&
ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) &&
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index d0e28f4ab099..70ba0302c8c9 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -150,6 +150,7 @@ struct fib_info {
#define fib_advmss fib_metrics->metrics[RTAX_ADVMSS-1]
int fib_nhs;
bool fib_nh_is_v6;
+ bool nh_updated;
struct rcu_head rcu;
struct fib_nh fib_nh[0];
#define fib_dev fib_nh[0].fib_nh_dev
@@ -200,7 +201,8 @@ static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel)
#define FIB_TABLE_HASHSZ 2
#endif
-__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh);
+__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh,
+ unsigned char scope);
__be32 fib_result_prefsrc(struct net *net, struct fib_result *res);
#define FIB_RES_NHC(res) ((res).nhc)
@@ -231,6 +233,7 @@ int call_fib4_notifiers(struct net *net, enum fib_event_type event_type,
int __net_init fib4_notifier_init(struct net *net);
void __net_exit fib4_notifier_exit(struct net *net);
+void fib_info_notify_update(struct net *net, struct nl_info *info);
void fib_notify(struct net *net, struct notifier_block *nb);
struct fib_table {
@@ -429,11 +432,14 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
int fib_sync_down_addr(struct net_device *dev, __be32 local);
int fib_sync_up(struct net_device *dev, unsigned char nh_flags);
void fib_sync_mtu(struct net_device *dev, u32 orig_mtu);
+void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig);
#ifdef CONFIG_IP_ROUTE_MULTIPATH
int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
const struct sk_buff *skb, struct flow_keys *flkeys);
#endif
+int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
+ struct netlink_ext_ack *extack);
void fib_select_multipath(struct fib_result *res, int hash);
void fib_select_path(struct net *net, struct fib_result *res,
struct flowi4 *fl4, const struct sk_buff *skb);
@@ -469,6 +475,7 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res)
#endif
}
+void fib_flush(struct net *net);
void free_fib_info(struct fib_info *fi);
static inline void fib_info_hold(struct fib_info *fi)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index daf80863d3a5..21bb830e9679 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -154,6 +154,50 @@ struct frag_hdr {
#define IP6_MF 0x0001
#define IP6_OFFSET 0xFFF8
+struct ip6_fraglist_iter {
+ struct ipv6hdr *tmp_hdr;
+ struct sk_buff *frag_list;
+ struct sk_buff *frag;
+ int offset;
+ unsigned int hlen;
+ __be32 frag_id;
+ u8 nexthdr;
+};
+
+int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
+ u8 nexthdr, __be32 frag_id,
+ struct ip6_fraglist_iter *iter);
+void ip6_fraglist_prepare(struct sk_buff *skb, struct ip6_fraglist_iter *iter);
+
+static inline struct sk_buff *ip6_fraglist_next(struct ip6_fraglist_iter *iter)
+{
+ struct sk_buff *skb = iter->frag;
+
+ iter->frag = skb->next;
+ skb_mark_not_on_list(skb);
+
+ return skb;
+}
+
+struct ip6_frag_state {
+ u8 *prevhdr;
+ unsigned int hlen;
+ unsigned int mtu;
+ unsigned int left;
+ int offset;
+ int ptr;
+ int hroom;
+ int troom;
+ __be32 frag_id;
+ u8 nexthdr;
+};
+
+void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
+ unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
+ u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state);
+struct sk_buff *ip6_frag_next(struct sk_buff *skb,
+ struct ip6_frag_state *state);
+
#define IP6_REPLY_MARK(net, mark) \
((net)->ipv6.sysctl.fwmark_reflect ? (mark) : 0)
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 6c0c4fde16f8..5c93e942c50b 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -45,6 +45,11 @@ struct ipv6_stub {
struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack);
void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
+ void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt);
+ int (*ip6_del_rt)(struct net *net, struct fib6_info *rt);
+ void (*fib6_rt_update)(struct net *net, struct fib6_info *rt,
+ struct nl_info *info);
+
void (*udpv6_encap_enable)(void);
void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 12689ddfc24c..abb4f92456e1 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -19,6 +19,7 @@
#include <net/netns/packet.h>
#include <net/netns/ipv4.h>
#include <net/netns/ipv6.h>
+#include <net/netns/nexthop.h>
#include <net/netns/ieee802154_6lowpan.h>
#include <net/netns/sctp.h>
#include <net/netns/dccp.h>
@@ -108,6 +109,7 @@ struct net {
struct netns_mib mib;
struct netns_packet packet;
struct netns_unix unx;
+ struct netns_nexthop nexthop;
struct netns_ipv4 ipv4;
#if IS_ENABLED(CONFIG_IPV6)
struct netns_ipv6 ipv6;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index d2bc733a2ef1..5cb19ce454d1 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -49,6 +49,7 @@ union nf_conntrack_expect_proto {
struct nf_conntrack_net {
unsigned int users4;
unsigned int users6;
+ unsigned int users_bridge;
};
#include <linux/types.h>
diff --git a/include/net/netfilter/nf_conntrack_bridge.h b/include/net/netfilter/nf_conntrack_bridge.h
new file mode 100644
index 000000000000..9a5514d5bc51
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_bridge.h
@@ -0,0 +1,20 @@
+#ifndef NF_CONNTRACK_BRIDGE_
+#define NF_CONNTRACK_BRIDGE_
+
+struct nf_ct_bridge_info {
+ struct nf_hook_ops *ops;
+ unsigned int ops_size;
+ struct module *me;
+};
+
+void nf_ct_bridge_register(struct nf_ct_bridge_info *info);
+void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info);
+
+struct nf_ct_bridge_frag_data {
+ char mac[ETH_HLEN];
+ bool vlan_present;
+ u16 vlan_tci;
+ __be16 vlan_proto;
+};
+
+#endif
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index ae41e92251dd..de10faf2ce91 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -64,6 +64,9 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
return ret;
}
+unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo);
+
void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l4proto *proto);
diff --git a/include/net/netns/ieee802154_6lowpan.h b/include/net/netns/ieee802154_6lowpan.h
index 736aeac52f56..95406e1342cb 100644
--- a/include/net/netns/ieee802154_6lowpan.h
+++ b/include/net/netns/ieee802154_6lowpan.h
@@ -16,7 +16,7 @@ struct netns_sysctl_lowpan {
struct netns_ieee802154_lowpan {
struct netns_sysctl_lowpan sysctl;
- struct netns_frags frags;
+ struct fqdir *fqdir;
};
#endif
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 7698460a3dd1..c07cee1e0c9e 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -72,7 +72,7 @@ struct netns_ipv4 {
struct inet_peer_base *peers;
struct sock * __percpu *tcp_sk;
- struct netns_frags frags;
+ struct fqdir *fqdir;
#ifdef CONFIG_NETFILTER
struct xt_table *iptable_filter;
struct xt_table *iptable_mangle;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 5e61b5a8635d..022a0fd1a5a4 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -58,7 +58,7 @@ struct netns_ipv6 {
struct ipv6_devconf *devconf_all;
struct ipv6_devconf *devconf_dflt;
struct inet_peer_base *peers;
- struct netns_frags frags;
+ struct fqdir *fqdir;
#ifdef CONFIG_NETFILTER
struct xt_table *ip6table_filter;
struct xt_table *ip6table_mangle;
@@ -116,7 +116,7 @@ struct netns_ipv6 {
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
struct netns_nf_frag {
- struct netns_frags frags;
+ struct fqdir *fqdir;
};
#endif
diff --git a/include/net/netns/nexthop.h b/include/net/netns/nexthop.h
new file mode 100644
index 000000000000..c712ee5eebd9
--- /dev/null
+++ b/include/net/netns/nexthop.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * nexthops in net namespaces
+ */
+
+#ifndef __NETNS_NEXTHOP_H__
+#define __NETNS_NEXTHOP_H__
+
+#include <linux/rbtree.h>
+
+struct netns_nexthop {
+ struct rb_root rb_root; /* tree of nexthops by id */
+ struct hlist_head *devhash; /* nexthops by device */
+
+ unsigned int seq; /* protected by rtnl_mutex */
+ u32 last_id_allocated;
+};
+#endif
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
new file mode 100644
index 000000000000..6e1b8f53624c
--- /dev/null
+++ b/include/net/nexthop.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generic nexthop implementation
+ *
+ * Copyright (c) 2017-19 Cumulus Networks
+ * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
+ */
+
+#ifndef __LINUX_NEXTHOP_H
+#define __LINUX_NEXTHOP_H
+
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <net/ip_fib.h>
+#include <net/ip6_fib.h>
+#include <net/netlink.h>
+
+#define NEXTHOP_VALID_USER_FLAGS RTNH_F_ONLINK
+
+struct nexthop;
+
+struct nh_config {
+ u32 nh_id;
+
+ u8 nh_family;
+ u8 nh_protocol;
+ u8 nh_blackhole;
+ u32 nh_flags;
+
+ int nh_ifindex;
+ struct net_device *dev;
+
+ union {
+ __be32 ipv4;
+ struct in6_addr ipv6;
+ } gw;
+
+ struct nlattr *nh_grp;
+ u16 nh_grp_type;
+
+ struct nlattr *nh_encap;
+ u16 nh_encap_type;
+
+ u32 nlflags;
+ struct nl_info nlinfo;
+};
+
+struct nh_info {
+ struct hlist_node dev_hash; /* entry on netns devhash */
+ struct nexthop *nh_parent;
+
+ u8 family;
+ bool reject_nh;
+
+ union {
+ struct fib_nh_common fib_nhc;
+ struct fib_nh fib_nh;
+ struct fib6_nh fib6_nh;
+ };
+};
+
+struct nh_grp_entry {
+ struct nexthop *nh;
+ u8 weight;
+ atomic_t upper_bound;
+
+ struct list_head nh_list;
+ struct nexthop *nh_parent; /* nexthop of group with this entry */
+};
+
+struct nh_group {
+ u16 num_nh;
+ bool mpath;
+ bool has_v4;
+ struct nh_grp_entry nh_entries[0];
+};
+
+struct nexthop {
+ struct rb_node rb_node; /* entry on netns rbtree */
+ struct list_head grp_list; /* nh group entries using this nh */
+ struct net *net;
+
+ u32 id;
+
+ u8 protocol; /* app managing this nh */
+ u8 nh_flags;
+ bool is_group;
+
+ refcount_t refcnt;
+ struct rcu_head rcu;
+
+ union {
+ struct nh_info __rcu *nh_info;
+ struct nh_group __rcu *nh_grp;
+ };
+};
+
+/* caller is holding rcu or rtnl; no reference taken to nexthop */
+struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
+void nexthop_free_rcu(struct rcu_head *head);
+
+static inline bool nexthop_get(struct nexthop *nh)
+{
+ return refcount_inc_not_zero(&nh->refcnt);
+}
+
+static inline void nexthop_put(struct nexthop *nh)
+{
+ if (refcount_dec_and_test(&nh->refcnt))
+ call_rcu(&nh->rcu, nexthop_free_rcu);
+}
+
+static inline bool nexthop_is_multipath(const struct nexthop *nh)
+{
+ if (nh->is_group) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+ return nh_grp->mpath;
+ }
+ return false;
+}
+
+struct nexthop *nexthop_select_path(struct nexthop *nh, int hash);
+
+static inline unsigned int nexthop_num_path(const struct nexthop *nh)
+{
+ unsigned int rc = 1;
+
+ if (nexthop_is_multipath(nh)) {
+ struct nh_group *nh_grp;
+
+ nh_grp = rcu_dereference_rtnl(nh->nh_grp);
+ rc = nh_grp->num_nh;
+ } else {
+ const struct nh_info *nhi;
+
+ nhi = rcu_dereference_rtnl(nh->nh_info);
+ if (nhi->reject_nh)
+ rc = 0;
+ }
+
+ return rc;
+}
+
+static inline
+struct nexthop *nexthop_mpath_select(const struct nexthop *nh, int nhsel)
+{
+ const struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
+
+ /* for_nexthops macros in fib_semantics.c grabs a pointer to
+ * the nexthop before checking nhsel
+ */
+ if (nhsel > nhg->num_nh)
+ return NULL;
+
+ return nhg->nh_entries[nhsel].nh;
+}
+
+static inline
+int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh)
+{
+ struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ int i;
+
+ for (i = 0; i < nhg->num_nh; i++) {
+ struct nexthop *nhe = nhg->nh_entries[i].nh;
+ struct nh_info *nhi = rcu_dereference_rtnl(nhe->nh_info);
+ struct fib_nh_common *nhc = &nhi->fib_nhc;
+ int weight = nhg->nh_entries[i].weight;
+
+ if (fib_add_nexthop(skb, nhc, weight) < 0)
+ return -EMSGSIZE;
+ }
+
+ return 0;
+}
+
+/* called with rcu lock */
+static inline bool nexthop_is_blackhole(const struct nexthop *nh)
+{
+ const struct nh_info *nhi;
+
+ if (nexthop_is_multipath(nh)) {
+ if (nexthop_num_path(nh) > 1)
+ return false;
+ nh = nexthop_mpath_select(nh, 0);
+ if (!nh)
+ return false;
+ }
+
+ nhi = rcu_dereference_rtnl(nh->nh_info);
+ return nhi->reject_nh;
+}
+#endif
diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h
index 314699333bec..5a9bb09f32b6 100644
--- a/include/net/sctp/checksum.h
+++ b/include/net/sctp/checksum.h
@@ -43,19 +43,21 @@ static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2,
(__force __u32)csum2, len);
}
+static const struct skb_checksum_ops sctp_csum_ops = {
+ .update = sctp_csum_update,
+ .combine = sctp_csum_combine,
+};
+
static inline __le32 sctp_compute_cksum(const struct sk_buff *skb,
unsigned int offset)
{
struct sctphdr *sh = (struct sctphdr *)(skb->data + offset);
- const struct skb_checksum_ops ops = {
- .update = sctp_csum_update,
- .combine = sctp_csum_combine,
- };
__le32 old = sh->checksum;
__wsum new;
sh->checksum = 0;
- new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0, &ops);
+ new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0,
+ &sctp_csum_ops);
sh->checksum = old;
return cpu_to_le32((__force __u32)new);
diff --git a/include/net/tc_act/tc_ctinfo.h b/include/net/tc_act/tc_ctinfo.h
new file mode 100644
index 000000000000..d6a688571672
--- /dev/null
+++ b/include/net/tc_act/tc_ctinfo.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_TC_CTINFO_H
+#define __NET_TC_CTINFO_H
+
+#include <net/act_api.h>
+
+struct tcf_ctinfo_params {
+ struct rcu_head rcu;
+ struct net *net;
+ u32 dscpmask;
+ u32 dscpstatemask;
+ u32 cpmarkmask;
+ u16 zone;
+ u8 mode;
+ u8 dscpmaskshift;
+};
+
+struct tcf_ctinfo {
+ struct tc_action common;
+ struct tcf_ctinfo_params __rcu *params;
+ u64 stats_dscp_set;
+ u64 stats_dscp_error;
+ u64 stats_cpmark_set;
+};
+
+#define to_ctinfo(a) ((struct tcf_ctinfo *)a)
+
+#endif /* __NET_TC_CTINFO_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 985aa5db570c..0083a14fb64f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1614,7 +1614,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp);
void tcp_fastopen_destroy_cipher(struct sock *sk);
void tcp_fastopen_ctx_destroy(struct net *net);
int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
- void *key, unsigned int len);
+ void *primary_key, void *backup_key,
+ unsigned int len);
void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
@@ -1625,11 +1626,14 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie);
bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
#define TCP_FASTOPEN_KEY_LENGTH 16
+#define TCP_FASTOPEN_KEY_MAX 2
+#define TCP_FASTOPEN_KEY_BUF_LENGTH \
+ (TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX)
/* Fastopen key context */
struct tcp_fastopen_context {
- struct crypto_cipher *tfm;
- __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ struct crypto_cipher *tfm[TCP_FASTOPEN_KEY_MAX];
+ __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
struct rcu_head rcu;
};
@@ -1639,6 +1643,37 @@ bool tcp_fastopen_active_should_disable(struct sock *sk);
void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
+/* Caller needs to wrap with rcu_read_(un)lock() */
+static inline
+struct tcp_fastopen_context *tcp_fastopen_get_ctx(const struct sock *sk)
+{
+ struct tcp_fastopen_context *ctx;
+
+ ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
+ if (!ctx)
+ ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
+ return ctx;
+}
+
+static inline
+bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc,
+ const struct tcp_fastopen_cookie *orig)
+{
+ if (orig->len == TCP_FASTOPEN_COOKIE_SIZE &&
+ orig->len == foc->len &&
+ !memcmp(orig->val, foc->val, foc->len))
+ return true;
+ return false;
+}
+
+static inline
+int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx)
+{
+ if (ctx->tfm[1])
+ return 2;
+ return 1;
+}
+
/* Latencies incurred by various limits for a sender. They are
* chronograph-like stats that are mutually exclusive.
*/
diff --git a/include/trace/events/neigh.h b/include/trace/events/neigh.h
index 0bdb08557763..62bb17516713 100644
--- a/include/trace/events/neigh.h
+++ b/include/trace/events/neigh.h
@@ -20,6 +20,55 @@
{ NUD_NOARP, "noarp" }, \
{ NUD_PERMANENT, "permanent"})
+TRACE_EVENT(neigh_create,
+
+ TP_PROTO(struct neigh_table *tbl, struct net_device *dev,
+ const void *pkey, const struct neighbour *n,
+ bool exempt_from_gc),
+
+ TP_ARGS(tbl, dev, pkey, n, exempt_from_gc),
+
+ TP_STRUCT__entry(
+ __field(u32, family)
+ __dynamic_array(char, dev, IFNAMSIZ )
+ __field(int, entries)
+ __field(u8, created)
+ __field(u8, gc_exempt)
+ __array(u8, primary_key4, 4)
+ __array(u8, primary_key6, 16)
+ ),
+
+ TP_fast_assign(
+ struct in6_addr *pin6;
+ __be32 *p32;
+
+ __entry->family = tbl->family;
+ __assign_str(dev, (dev ? dev->name : "NULL"));
+ __entry->entries = atomic_read(&tbl->gc_entries);
+ __entry->created = n != NULL;
+ __entry->gc_exempt = exempt_from_gc;
+ pin6 = (struct in6_addr *)__entry->primary_key6;
+ p32 = (__be32 *)__entry->primary_key4;
+
+ if (tbl->family == AF_INET)
+ *p32 = *(__be32 *)pkey;
+ else
+ *p32 = 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (tbl->family == AF_INET6) {
+ pin6 = (struct in6_addr *)__entry->primary_key6;
+ *pin6 = *(struct in6_addr *)pkey;
+ }
+#endif
+ ),
+
+ TP_printk("family %d dev %s entries %d primary_key4 %pI4 primary_key6 %pI6c created %d gc_exempt %d",
+ __entry->family, __get_str(dev), __entry->entries,
+ __entry->primary_key4, __entry->primary_key6,
+ __entry->created, __entry->gc_exempt)
+);
+
TRACE_EVENT(neigh_update,
TP_PROTO(struct neighbour *n, const u8 *lladdr, u8 new,
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 3534ce157ae9..dd06302aa93e 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1483,6 +1483,8 @@ enum ethtool_link_mode_bit_indices {
ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT = 64,
ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT = 65,
ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT = 66,
+ ETHTOOL_LINK_MODE_100baseT1_Full_BIT = 67,
+ ETHTOOL_LINK_MODE_1000baseT1_Full_BIT = 68,
/* must be last entry */
__ETHTOOL_LINK_MODE_MASK_NBITS
diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h
new file mode 100644
index 000000000000..7b61867e9848
--- /dev/null
+++ b/include/uapi/linux/nexthop.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_NEXTHOP_H
+#define _UAPI_LINUX_NEXTHOP_H
+
+#include <linux/types.h>
+
+struct nhmsg {
+ unsigned char nh_family;
+ unsigned char nh_scope; /* return only */
+ unsigned char nh_protocol; /* Routing protocol that installed nh */
+ unsigned char resvd;
+ unsigned int nh_flags; /* RTNH_F flags */
+};
+
+/* entry in a nexthop group */
+struct nexthop_grp {
+ __u32 id; /* nexthop id - must exist */
+ __u8 weight; /* weight of this nexthop */
+ __u8 resvd1;
+ __u16 resvd2;
+};
+
+enum {
+ NEXTHOP_GRP_TYPE_MPATH, /* default type if not specified */
+ __NEXTHOP_GRP_TYPE_MAX,
+};
+
+#define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1)
+
+enum {
+ NHA_UNSPEC,
+ NHA_ID, /* u32; id for nexthop. id == 0 means auto-assign */
+
+ NHA_GROUP, /* array of nexthop_grp */
+ NHA_GROUP_TYPE, /* u16 one of NEXTHOP_GRP_TYPE */
+ /* if NHA_GROUP attribute is added, no other attributes can be set */
+
+ NHA_BLACKHOLE, /* flag; nexthop used to blackhole packets */
+ /* if NHA_BLACKHOLE is added, OIF, GATEWAY, ENCAP can not be set */
+
+ NHA_OIF, /* u32; nexthop device */
+ NHA_GATEWAY, /* be32 (IPv4) or in6_addr (IPv6) gw address */
+ NHA_ENCAP_TYPE, /* u16; lwt encap type */
+ NHA_ENCAP, /* lwt encap data */
+
+ /* NHA_OIF can be appended to dump request to return only
+ * nexthops using given device
+ */
+ NHA_GROUPS, /* flag; only return nexthop groups in dump */
+ NHA_MASTER, /* u32; only return nexthops with given master dev */
+
+ __NHA_MAX,
+};
+
+#define NHA_MAX (__NHA_MAX - 1)
+#endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 51a0496f78ea..a93680fc4bfa 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -105,6 +105,7 @@ enum tca_id {
TCA_ID_IFE = TCA_ACT_IFE,
TCA_ID_SAMPLE = TCA_ACT_SAMPLE,
/* other actions go here */
+ TCA_ID_CTINFO,
__TCA_ID_MAX = 255
};
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 46399367627f..ce2a623abb75 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -157,6 +157,13 @@ enum {
RTM_GETCHAIN,
#define RTM_GETCHAIN RTM_GETCHAIN
+ RTM_NEWNEXTHOP = 104,
+#define RTM_NEWNEXTHOP RTM_NEWNEXTHOP
+ RTM_DELNEXTHOP,
+#define RTM_DELNEXTHOP RTM_DELNEXTHOP
+ RTM_GETNEXTHOP,
+#define RTM_GETNEXTHOP RTM_GETNEXTHOP
+
__RTM_MAX,
#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
};
@@ -342,6 +349,7 @@ enum rtattr_type_t {
RTA_IP_PROTO,
RTA_SPORT,
RTA_DPORT,
+ RTA_NH_ID,
__RTA_MAX
};
@@ -704,6 +712,8 @@ enum rtnetlink_groups {
#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R
RTNLGRP_IPV6_MROUTE_R,
#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R
+ RTNLGRP_NEXTHOP,
+#define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP
__RTNLGRP_MAX
};
#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 86dc24a96c90..74904e9d1b72 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -283,6 +283,7 @@ enum
LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */
LINUX_MIB_TCPZEROWINDOWDROP, /* TCPZeroWindowDrop */
LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */
+ LINUX_MIB_TCPFASTOPENPASSIVEALTKEY, /* TCPFastOpenPassiveAltKey */
__LINUX_MIB_MAX
};
diff --git a/include/uapi/linux/tc_act/tc_ctinfo.h b/include/uapi/linux/tc_act/tc_ctinfo.h
new file mode 100644
index 000000000000..da803e05a89b
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_ctinfo.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __UAPI_TC_CTINFO_H
+#define __UAPI_TC_CTINFO_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+struct tc_ctinfo {
+ tc_gen;
+};
+
+enum {
+ TCA_CTINFO_UNSPEC,
+ TCA_CTINFO_PAD,
+ TCA_CTINFO_TM,
+ TCA_CTINFO_ACT,
+ TCA_CTINFO_ZONE,
+ TCA_CTINFO_PARMS_DSCP_MASK,
+ TCA_CTINFO_PARMS_DSCP_STATEMASK,
+ TCA_CTINFO_PARMS_CPMARK_MASK,
+ TCA_CTINFO_STATS_DSCP_SET,
+ TCA_CTINFO_STATS_DSCP_ERROR,
+ TCA_CTINFO_STATS_CPMARK_SET,
+ __TCA_CTINFO_MAX
+};
+
+#define TCA_CTINFO_MAX (__TCA_CTINFO_MAX - 1)
+
+enum {
+ CTINFO_MODE_DSCP = BIT(0),
+ CTINFO_MODE_CPMARK = BIT(1)
+};
+
+#endif
diff --git a/include/uapi/linux/unix_diag.h b/include/uapi/linux/unix_diag.h
index 5c502fdf7a42..a1988576fa8a 100644
--- a/include/uapi/linux/unix_diag.h
+++ b/include/uapi/linux/unix_diag.h
@@ -20,6 +20,7 @@ struct unix_diag_req {
#define UDIAG_SHOW_ICONS 0x00000008 /* show pending connections */
#define UDIAG_SHOW_RQLEN 0x00000010 /* show skb receive queue len */
#define UDIAG_SHOW_MEMINFO 0x00000020 /* show memory info of a socket */
+#define UDIAG_SHOW_UID 0x00000040 /* show socket's UID */
struct unix_diag_msg {
__u8 udiag_family;
@@ -40,6 +41,7 @@ enum {
UNIX_DIAG_RQLEN,
UNIX_DIAG_MEMINFO,
UNIX_DIAG_SHUTDOWN,
+ UNIX_DIAG_UID,
__UNIX_DIAG_MAX,
};
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 013323b6dbe4..693aefad7f8a 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -56,6 +56,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
br_switchdev_frame_unmark(skb);
BR_INPUT_SKB_CB(skb)->brdev = dev;
+ BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
skb_reset_mac_header(skb);
eth = eth_hdr(skb);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 334a8c496b50..68561741e827 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -425,6 +425,7 @@ struct net_bridge {
struct br_input_skb_cb {
struct net_device *brdev;
+ u16 frag_max_size;
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
u8 igmp;
u8 mrouters_only:1;
diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig
index c3ad90c43801..f4fb0b9b927d 100644
--- a/net/bridge/netfilter/Kconfig
+++ b/net/bridge/netfilter/Kconfig
@@ -19,6 +19,20 @@ config NF_LOG_BRIDGE
tristate "Bridge packet logging"
select NF_LOG_COMMON
+config NF_CONNTRACK_BRIDGE
+ tristate "IPv4/IPV6 bridge connection tracking support"
+ depends on NF_CONNTRACK
+ default n
+ help
+ Connection tracking keeps a record of what packets have passed
+ through your machine, in order to figure out how they are related
+ into connections. This is used to enhance packet filtering via
+ stateful policies. Enable this if you want native tracking from
+ the bridge. This provides a replacement for the `br_netfilter'
+ infrastructure.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
endif # NF_TABLES_BRIDGE
menuconfig BRIDGE_NF_EBTABLES
diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile
index 9b868861f21a..9d7767322a64 100644
--- a/net/bridge/netfilter/Makefile
+++ b/net/bridge/netfilter/Makefile
@@ -5,6 +5,9 @@
obj-$(CONFIG_NFT_BRIDGE_REJECT) += nft_reject_bridge.o
+# connection tracking
+obj-$(CONFIG_NF_CONNTRACK_BRIDGE) += nf_conntrack_bridge.o
+
# packet logging
obj-$(CONFIG_NF_LOG_BRIDGE) += nf_log_bridge.o
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
new file mode 100644
index 000000000000..b675cd7c1a82
--- /dev/null
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -0,0 +1,433 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/icmp.h>
+#include <linux/sysctl.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_bridge.h>
+
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/netfilter/nf_tables.h>
+
+#include "../br_private.h"
+
+/* Best effort variant of ip_do_fragment which preserves geometry, unless skbuff
+ * has been linearized or cloned.
+ */
+static int nf_br_ip_fragment(struct net *net, struct sock *sk,
+ struct sk_buff *skb,
+ struct nf_ct_bridge_frag_data *data,
+ int (*output)(struct net *, struct sock *sk,
+ const struct nf_ct_bridge_frag_data *data,
+ struct sk_buff *))
+{
+ int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+ unsigned int hlen, ll_rs, mtu;
+ struct ip_frag_state state;
+ struct iphdr *iph;
+ int err;
+
+ /* for offloaded checksums cleanup checksum before fragmentation */
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ (err = skb_checksum_help(skb)))
+ goto blackhole;
+
+ iph = ip_hdr(skb);
+
+ /*
+ * Setup starting values
+ */
+
+ hlen = iph->ihl * 4;
+ frag_max_size -= hlen;
+ ll_rs = LL_RESERVED_SPACE(skb->dev);
+ mtu = skb->dev->mtu;
+
+ if (skb_has_frag_list(skb)) {
+ unsigned int first_len = skb_pagelen(skb);
+ struct ip_fraglist_iter iter;
+ struct sk_buff *frag;
+
+ if (first_len - hlen > mtu ||
+ skb_headroom(skb) < ll_rs)
+ goto blackhole;
+
+ if (skb_cloned(skb))
+ goto slow_path;
+
+ skb_walk_frags(skb, frag) {
+ if (frag->len > mtu ||
+ skb_headroom(frag) < hlen + ll_rs)
+ goto blackhole;
+
+ if (skb_shared(frag))
+ goto slow_path;
+ }
+
+ ip_fraglist_init(skb, iph, hlen, &iter);
+
+ for (;;) {
+ if (iter.frag)
+ ip_fraglist_prepare(skb, &iter);
+
+ err = output(net, sk, data, skb);
+ if (err || !iter.frag)
+ break;
+
+ skb = ip_fraglist_next(&iter);
+ }
+ return err;
+ }
+slow_path:
+ /* This is a linearized skbuff, the original geometry is lost for us.
+ * This may also be a clone skbuff, we could preserve the geometry for
+ * the copies but probably not worth the effort.
+ */
+ ip_frag_init(skb, hlen, ll_rs, frag_max_size, &state);
+
+ while (state.left > 0) {
+ struct sk_buff *skb2;
+
+ skb2 = ip_frag_next(skb, &state);
+ if (IS_ERR(skb2)) {
+ err = PTR_ERR(skb2);
+ goto blackhole;
+ }
+
+ err = output(net, sk, data, skb2);
+ if (err)
+ goto blackhole;
+ }
+ consume_skb(skb);
+ return err;
+
+blackhole:
+ kfree_skb(skb);
+ return 0;
+}
+
+/* ip_defrag() expects IPCB() in place. */
+static void br_skb_cb_save(struct sk_buff *skb, struct br_input_skb_cb *cb,
+ size_t inet_skb_parm_size)
+{
+ memcpy(cb, skb->cb, sizeof(*cb));
+ memset(skb->cb, 0, inet_skb_parm_size);
+}
+
+static void br_skb_cb_restore(struct sk_buff *skb,
+ const struct br_input_skb_cb *cb,
+ u16 fragsz)
+{
+ memcpy(skb->cb, cb, sizeof(*cb));
+ BR_INPUT_SKB_CB(skb)->frag_max_size = fragsz;
+}
+
+static unsigned int nf_ct_br_defrag4(struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
+ enum ip_conntrack_info ctinfo;
+ struct br_input_skb_cb cb;
+ const struct nf_conn *ct;
+ int err;
+
+ if (!ip_is_fragment(ip_hdr(skb)))
+ return NF_ACCEPT;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct)
+ zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
+
+ br_skb_cb_save(skb, &cb, sizeof(struct inet_skb_parm));
+ local_bh_disable();
+ err = ip_defrag(state->net, skb,
+ IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
+ local_bh_enable();
+ if (!err) {
+ br_skb_cb_restore(skb, &cb, IPCB(skb)->frag_max_size);
+ skb->ignore_df = 1;
+ return NF_ACCEPT;
+ }
+
+ return NF_STOLEN;
+}
+
+static unsigned int nf_ct_br_defrag6(struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
+ enum ip_conntrack_info ctinfo;
+ struct br_input_skb_cb cb;
+ const struct nf_conn *ct;
+ int err;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct)
+ zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
+
+ br_skb_cb_save(skb, &cb, sizeof(struct inet6_skb_parm));
+
+ err = nf_ipv6_br_defrag(state->net, skb,
+ IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id);
+ /* queued */
+ if (err == -EINPROGRESS)
+ return NF_STOLEN;
+
+ br_skb_cb_restore(skb, &cb, IP6CB(skb)->frag_max_size);
+ return err == 0 ? NF_ACCEPT : NF_DROP;
+}
+
+static int nf_ct_br_ip_check(const struct sk_buff *skb)
+{
+ const struct iphdr *iph;
+ int nhoff, len;
+
+ nhoff = skb_network_offset(skb);
+ iph = ip_hdr(skb);
+ if (iph->ihl < 5 ||
+ iph->version != 4)
+ return -1;
+
+ len = ntohs(iph->tot_len);
+ if (skb->len < nhoff + len ||
+ len < (iph->ihl * 4))
+ return -1;
+
+ return 0;
+}
+
+static int nf_ct_br_ipv6_check(const struct sk_buff *skb)
+{
+ const struct ipv6hdr *hdr;
+ int nhoff, len;
+
+ nhoff = skb_network_offset(skb);
+ hdr = ipv6_hdr(skb);
+ if (hdr->version != 6)
+ return -1;
+
+ len = ntohs(hdr->payload_len) + sizeof(struct ipv6hdr) + nhoff;
+ if (skb->len < len)
+ return -1;
+
+ return 0;
+}
+
+static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_hook_state bridge_state = *state;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ u32 len;
+ int ret;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if ((ct && !nf_ct_is_template(ct)) ||
+ ctinfo == IP_CT_UNTRACKED)
+ return NF_ACCEPT;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ return NF_ACCEPT;
+
+ len = ntohs(ip_hdr(skb)->tot_len);
+ if (pskb_trim_rcsum(skb, len))
+ return NF_ACCEPT;
+
+ if (nf_ct_br_ip_check(skb))
+ return NF_ACCEPT;
+
+ bridge_state.pf = NFPROTO_IPV4;
+ ret = nf_ct_br_defrag4(skb, &bridge_state);
+ break;
+ case htons(ETH_P_IPV6):
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ return NF_ACCEPT;
+
+ len = sizeof(struct ipv6hdr) + ntohs(ipv6_hdr(skb)->payload_len);
+ if (pskb_trim_rcsum(skb, len))
+ return NF_ACCEPT;
+
+ if (nf_ct_br_ipv6_check(skb))
+ return NF_ACCEPT;
+
+ bridge_state.pf = NFPROTO_IPV6;
+ ret = nf_ct_br_defrag6(skb, &bridge_state);
+ break;
+ default:
+ nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
+ return NF_ACCEPT;
+ }
+
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ return nf_conntrack_in(skb, &bridge_state);
+}
+
+static void nf_ct_bridge_frag_save(struct sk_buff *skb,
+ struct nf_ct_bridge_frag_data *data)
+{
+ if (skb_vlan_tag_present(skb)) {
+ data->vlan_present = true;
+ data->vlan_tci = skb->vlan_tci;
+ data->vlan_proto = skb->vlan_proto;
+ } else {
+ data->vlan_present = false;
+ }
+ skb_copy_from_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
+}
+
+static unsigned int
+nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state,
+ int (*output)(struct net *, struct sock *sk,
+ const struct nf_ct_bridge_frag_data *data,
+ struct sk_buff *))
+{
+ struct nf_ct_bridge_frag_data data;
+
+ if (!BR_INPUT_SKB_CB(skb)->frag_max_size)
+ return NF_ACCEPT;
+
+ nf_ct_bridge_frag_save(skb, &data);
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ nf_br_ip_fragment(state->net, state->sk, skb, &data, output);
+ break;
+ case htons(ETH_P_IPV6):
+ nf_br_ip6_fragment(state->net, state->sk, skb, &data, output);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return NF_DROP;
+ }
+
+ return NF_STOLEN;
+}
+
+/* Actually only slow path refragmentation needs this. */
+static int nf_ct_bridge_frag_restore(struct sk_buff *skb,
+ const struct nf_ct_bridge_frag_data *data)
+{
+ int err;
+
+ err = skb_cow_head(skb, ETH_HLEN);
+ if (err) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ if (data->vlan_present)
+ __vlan_hwaccel_put_tag(skb, data->vlan_proto, data->vlan_tci);
+
+ skb_copy_to_linear_data_offset(skb, -ETH_HLEN, data->mac, ETH_HLEN);
+ skb_reset_mac_header(skb);
+
+ return 0;
+}
+
+static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk,
+ const struct nf_ct_bridge_frag_data *data,
+ struct sk_buff *skb)
+{
+ int err;
+
+ err = nf_ct_bridge_frag_restore(skb, data);
+ if (err < 0)
+ return err;
+
+ return br_dev_queue_push_xmit(net, sk, skb);
+}
+
+static unsigned int nf_ct_bridge_confirm(struct sk_buff *skb)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ int protoff;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ return nf_conntrack_confirm(skb);
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ protoff = skb_network_offset(skb) + ip_hdrlen(skb);
+ break;
+ case htons(ETH_P_IPV6): {
+ unsigned char pnum = ipv6_hdr(skb)->nexthdr;
+ __be16 frag_off;
+
+ protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
+ &frag_off);
+ if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
+ return nf_conntrack_confirm(skb);
+ }
+ break;
+ default:
+ return NF_ACCEPT;
+ }
+ return nf_confirm(skb, protoff, ct, ctinfo);
+}
+
+static unsigned int nf_ct_bridge_post(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ int ret;
+
+ ret = nf_ct_bridge_confirm(skb);
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ return nf_ct_bridge_refrag(skb, state, nf_ct_bridge_refrag_post);
+}
+
+static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
+ {
+ .hook = nf_ct_bridge_pre,
+ .pf = NFPROTO_BRIDGE,
+ .hooknum = NF_BR_PRE_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK,
+ },
+ {
+ .hook = nf_ct_bridge_post,
+ .pf = NFPROTO_BRIDGE,
+ .hooknum = NF_BR_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
+};
+
+static struct nf_ct_bridge_info bridge_info = {
+ .ops = nf_ct_bridge_hook_ops,
+ .ops_size = ARRAY_SIZE(nf_ct_bridge_hook_ops),
+ .me = THIS_MODULE,
+};
+
+static int __init nf_conntrack_l3proto_bridge_init(void)
+{
+ nf_ct_bridge_register(&bridge_info);
+
+ return 0;
+}
+
+static void __exit nf_conntrack_l3proto_bridge_fini(void)
+{
+ nf_ct_bridge_unregister(&bridge_info);
+}
+
+module_init(nf_conntrack_l3proto_bridge_init);
+module_exit(nf_conntrack_l3proto_bridge_fini);
+
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
+MODULE_LICENSE("GPL");
diff --git a/net/core/devlink.c b/net/core/devlink.c
index d43bc52b8840..9716a7f382cb 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -21,6 +21,7 @@
#include <linux/netdevice.h>
#include <linux/spinlock.h>
#include <linux/refcount.h>
+#include <linux/workqueue.h>
#include <rdma/ib_verbs.h>
#include <net/netlink.h>
#include <net/genetlink.h>
@@ -5390,6 +5391,38 @@ void devlink_free(struct devlink *devlink)
}
EXPORT_SYMBOL_GPL(devlink_free);
+static void devlink_port_type_warn(struct work_struct *work)
+{
+ WARN(true, "Type was not set for devlink port.");
+}
+
+static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
+{
+ /* Ignore CPU and DSA flavours. */
+ return devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU &&
+ devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA;
+}
+
+#define DEVLINK_PORT_TYPE_WARN_TIMEOUT (HZ * 30)
+
+static void devlink_port_type_warn_schedule(struct devlink_port *devlink_port)
+{
+ if (!devlink_port_type_should_warn(devlink_port))
+ return;
+ /* Schedule a work to WARN in case driver does not set port
+ * type within timeout.
+ */
+ schedule_delayed_work(&devlink_port->type_warn_dw,
+ DEVLINK_PORT_TYPE_WARN_TIMEOUT);
+}
+
+static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
+{
+ if (!devlink_port_type_should_warn(devlink_port))
+ return;
+ cancel_delayed_work_sync(&devlink_port->type_warn_dw);
+}
+
/**
* devlink_port_register - Register devlink port
*
@@ -5419,6 +5452,8 @@ int devlink_port_register(struct devlink *devlink,
list_add_tail(&devlink_port->list, &devlink->port_list);
INIT_LIST_HEAD(&devlink_port->param_list);
mutex_unlock(&devlink->lock);
+ INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
+ devlink_port_type_warn_schedule(devlink_port);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
return 0;
}
@@ -5433,6 +5468,7 @@ void devlink_port_unregister(struct devlink_port *devlink_port)
{
struct devlink *devlink = devlink_port->devlink;
+ devlink_port_type_warn_cancel(devlink_port);
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
mutex_lock(&devlink->lock);
list_del(&devlink_port->list);
@@ -5446,6 +5482,7 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port,
{
if (WARN_ON(!devlink_port->registered))
return;
+ devlink_port_type_warn_cancel(devlink_port);
spin_lock(&devlink_port->type_lock);
devlink_port->type = type;
devlink_port->type_dev = type_dev;
@@ -5519,6 +5556,7 @@ EXPORT_SYMBOL_GPL(devlink_port_type_ib_set);
void devlink_port_type_clear(struct devlink_port *devlink_port)
{
__devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL);
+ devlink_port_type_warn_schedule(devlink_port);
}
EXPORT_SYMBOL_GPL(devlink_port_type_clear);
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 5ce7d47a960e..3d93e51b83e0 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -7,8 +7,7 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions)
{
struct flow_rule *rule;
- rule = kzalloc(sizeof(struct flow_rule) +
- sizeof(struct flow_action_entry) * num_actions,
+ rule = kzalloc(struct_size(rule, action.entries, num_actions),
GFP_KERNEL);
if (!rule)
return NULL;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index dfa871061f14..a5556e4d3f96 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -587,6 +587,8 @@ static struct neighbour *___neigh_create(struct neigh_table *tbl,
int error;
struct neigh_hash_table *nht;
+ trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc);
+
if (!n) {
rc = ERR_PTR(-ENOBUFS);
goto out;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index eaad23f9c7b5..4a712a00243a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -76,6 +76,7 @@
#include <linux/highmem.h>
#include <linux/capability.h>
#include <linux/user_namespace.h>
+#include <linux/indirect_call_wrapper.h>
#include "datagram.h"
@@ -2511,7 +2512,8 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
if (copy > 0) {
if (copy > len)
copy = len;
- csum = ops->update(skb->data + offset, copy, csum);
+ csum = INDIRECT_CALL_1(ops->update, csum_partial_ext,
+ skb->data + offset, copy, csum);
if ((len -= copy) == 0)
return csum;
offset += copy;
@@ -2538,9 +2540,13 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
frag->page_offset + offset - start,
copy, p, p_off, p_len, copied) {
vaddr = kmap_atomic(p);
- csum2 = ops->update(vaddr + p_off, p_len, 0);
+ csum2 = INDIRECT_CALL_1(ops->update,
+ csum_partial_ext,
+ vaddr + p_off, p_len, 0);
kunmap_atomic(vaddr);
- csum = ops->combine(csum, csum2, pos, p_len);
+ csum = INDIRECT_CALL_1(ops->combine,
+ csum_block_add_ext, csum,
+ csum2, pos, p_len);
pos += p_len;
}
@@ -2563,7 +2569,8 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
copy = len;
csum2 = __skb_checksum(frag_iter, offset - start,
copy, 0, ops);
- csum = ops->combine(csum, csum2, pos, copy);
+ csum = INDIRECT_CALL_1(ops->combine, csum_block_add_ext,
+ csum, csum2, pos, copy);
if ((len -= copy) == 0)
return csum;
offset += copy;
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 3b5f434cad3f..b70befe8a3c8 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -261,7 +261,7 @@ static int dsa_port_setup(struct dsa_port *dp)
enum devlink_port_flavour flavour;
struct dsa_switch *ds = dp->ds;
struct dsa_switch_tree *dst = ds->dst;
- int err;
+ int err = 0;
if (dp->type == DSA_PORT_TYPE_UNUSED)
return 0;
@@ -299,19 +299,15 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
case DSA_PORT_TYPE_CPU:
err = dsa_port_link_register_of(dp);
- if (err) {
+ if (err)
dev_err(ds->dev, "failed to setup link for port %d.%d\n",
ds->index, dp->index);
- return err;
- }
break;
case DSA_PORT_TYPE_DSA:
err = dsa_port_link_register_of(dp);
- if (err) {
+ if (err)
dev_err(ds->dev, "failed to setup link for port %d.%d\n",
ds->index, dp->index);
- return err;
- }
break;
case DSA_PORT_TYPE_USER:
err = dsa_slave_create(dp);
@@ -323,7 +319,10 @@ static int dsa_port_setup(struct dsa_port *dp)
break;
}
- return 0;
+ if (err)
+ devlink_port_unregister(&dp->devlink_port);
+
+ return err;
}
static void dsa_port_teardown(struct dsa_port *dp)
@@ -351,7 +350,7 @@ static void dsa_port_teardown(struct dsa_port *dp)
static int dsa_switch_setup(struct dsa_switch *ds)
{
- int err;
+ int err = 0;
/* Initialize ds->phys_mii_mask before registering the slave MDIO bus
* driver and before ops->setup() has run, since the switch drivers and
@@ -369,29 +368,41 @@ static int dsa_switch_setup(struct dsa_switch *ds)
err = devlink_register(ds->devlink, ds->dev);
if (err)
- return err;
+ goto free_devlink;
err = dsa_switch_register_notifier(ds);
if (err)
- return err;
+ goto unregister_devlink;
err = ds->ops->setup(ds);
if (err < 0)
- return err;
+ goto unregister_notifier;
if (!ds->slave_mii_bus && ds->ops->phy_read) {
ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
- if (!ds->slave_mii_bus)
- return -ENOMEM;
+ if (!ds->slave_mii_bus) {
+ err = -ENOMEM;
+ goto unregister_notifier;
+ }
dsa_slave_mii_bus_init(ds);
err = mdiobus_register(ds->slave_mii_bus);
if (err < 0)
- return err;
+ goto unregister_notifier;
}
return 0;
+
+unregister_notifier:
+ dsa_switch_unregister_notifier(ds);
+unregister_devlink:
+ devlink_unregister(ds->devlink);
+free_devlink:
+ devlink_free(ds->devlink);
+ ds->devlink = NULL;
+
+ return err;
}
static void dsa_switch_teardown(struct dsa_switch *ds)
@@ -413,8 +424,8 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
{
struct dsa_switch *ds;
struct dsa_port *dp;
- int device, port;
- int err;
+ int device, port, i;
+ int err = 0;
for (device = 0; device < DSA_MAX_SWITCHES; device++) {
ds = dst->ds[device];
@@ -423,18 +434,41 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
err = dsa_switch_setup(ds);
if (err)
- return err;
+ goto switch_teardown;
for (port = 0; port < ds->num_ports; port++) {
dp = &ds->ports[port];
err = dsa_port_setup(dp);
if (err)
- return err;
+ goto ports_teardown;
}
}
return 0;
+
+ports_teardown:
+ for (i = 0; i < port; i++)
+ dsa_port_teardown(&ds->ports[i]);
+
+ dsa_switch_teardown(ds);
+
+switch_teardown:
+ for (i = 0; i < device; i++) {
+ ds = dst->ds[i];
+ if (!ds)
+ continue;
+
+ for (port = 0; port < ds->num_ports; port++) {
+ dp = &ds->ports[port];
+
+ dsa_port_teardown(dp);
+ }
+
+ dsa_switch_teardown(ds);
+ }
+
+ return err;
}
static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
@@ -496,17 +530,24 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
err = dsa_tree_setup_switches(dst);
if (err)
- return err;
+ goto teardown_default_cpu;
err = dsa_tree_setup_master(dst);
if (err)
- return err;
+ goto teardown_switches;
dst->setup = true;
pr_info("DSA: tree %d setup\n", dst->index);
return 0;
+
+teardown_switches:
+ dsa_tree_teardown_switches(dst);
+teardown_default_cpu:
+ dsa_tree_teardown_default_cpu(dst);
+
+ return err;
}
static void dsa_tree_teardown(struct dsa_switch_tree *dst)
@@ -547,8 +588,10 @@ static int dsa_tree_add_switch(struct dsa_switch_tree *dst,
dst->ds[index] = ds;
err = dsa_tree_setup(dst);
- if (err)
- dsa_tree_remove_switch(dst, index);
+ if (err) {
+ dst->ds[index] = NULL;
+ dsa_tree_put(dst);
+ }
return err;
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 8f1222324646..418490bda3a4 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -163,6 +163,23 @@ int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags);
int dsa_port_vid_del(struct dsa_port *dp, u16 vid);
int dsa_port_link_register_of(struct dsa_port *dp);
void dsa_port_link_unregister_of(struct dsa_port *dp);
+void dsa_port_phylink_validate(struct phylink_config *config,
+ unsigned long *supported,
+ struct phylink_link_state *state);
+int dsa_port_phylink_mac_link_state(struct phylink_config *config,
+ struct phylink_link_state *state);
+void dsa_port_phylink_mac_config(struct phylink_config *config,
+ unsigned int mode,
+ const struct phylink_link_state *state);
+void dsa_port_phylink_mac_an_restart(struct phylink_config *config);
+void dsa_port_phylink_mac_link_down(struct phylink_config *config,
+ unsigned int mode,
+ phy_interface_t interface);
+void dsa_port_phylink_mac_link_up(struct phylink_config *config,
+ unsigned int mode,
+ phy_interface_t interface,
+ struct phy_device *phydev);
+extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
/* slave.c */
extern const struct dsa_device_ops notag_netdev_ops;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index ed8ba9daa3ba..d74bc9df1359 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -422,6 +422,108 @@ static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
return phydev;
}
+void dsa_port_phylink_validate(struct phylink_config *config,
+ unsigned long *supported,
+ struct phylink_link_state *state)
+{
+ struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->phylink_validate)
+ return;
+
+ ds->ops->phylink_validate(ds, dp->index, supported, state);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_validate);
+
+int dsa_port_phylink_mac_link_state(struct phylink_config *config,
+ struct phylink_link_state *state)
+{
+ struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_switch *ds = dp->ds;
+
+ /* Only called for SGMII and 802.3z */
+ if (!ds->ops->phylink_mac_link_state)
+ return -EOPNOTSUPP;
+
+ return ds->ops->phylink_mac_link_state(ds, dp->index, state);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_state);
+
+void dsa_port_phylink_mac_config(struct phylink_config *config,
+ unsigned int mode,
+ const struct phylink_link_state *state)
+{
+ struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->phylink_mac_config)
+ return;
+
+ ds->ops->phylink_mac_config(ds, dp->index, mode, state);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_config);
+
+void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
+{
+ struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->phylink_mac_an_restart)
+ return;
+
+ ds->ops->phylink_mac_an_restart(ds, dp->index);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_an_restart);
+
+void dsa_port_phylink_mac_link_down(struct phylink_config *config,
+ unsigned int mode,
+ phy_interface_t interface)
+{
+ struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct phy_device *phydev = NULL;
+ struct dsa_switch *ds = dp->ds;
+
+ if (dsa_is_user_port(ds, dp->index))
+ phydev = dp->slave->phydev;
+
+ if (!ds->ops->phylink_mac_link_down) {
+ if (ds->ops->adjust_link && phydev)
+ ds->ops->adjust_link(ds, dp->index, phydev);
+ return;
+ }
+
+ ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_down);
+
+void dsa_port_phylink_mac_link_up(struct phylink_config *config,
+ unsigned int mode,
+ phy_interface_t interface,
+ struct phy_device *phydev)
+{
+ struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->phylink_mac_link_up) {
+ if (ds->ops->adjust_link && phydev)
+ ds->ops->adjust_link(ds, dp->index, phydev);
+ return;
+ }
+
+ ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev);
+}
+EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_up);
+
+const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
+ .validate = dsa_port_phylink_validate,
+ .mac_link_state = dsa_port_phylink_mac_link_state,
+ .mac_config = dsa_port_phylink_mac_config,
+ .mac_an_restart = dsa_port_phylink_mac_an_restart,
+ .mac_link_down = dsa_port_phylink_mac_link_down,
+ .mac_link_up = dsa_port_phylink_mac_link_up,
+};
+
static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable)
{
struct dsa_switch *ds = dp->ds;
@@ -499,8 +601,53 @@ static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
return 0;
}
+static int dsa_port_phylink_register(struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+ struct device_node *port_dn = dp->dn;
+ int mode, err;
+
+ mode = of_get_phy_mode(port_dn);
+ if (mode < 0)
+ mode = PHY_INTERFACE_MODE_NA;
+
+ dp->pl_config.dev = ds->dev;
+ dp->pl_config.type = PHYLINK_DEV;
+
+ dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn),
+ mode, &dsa_port_phylink_mac_ops);
+ if (IS_ERR(dp->pl)) {
+ pr_err("error creating PHYLINK: %ld\n", PTR_ERR(dp->pl));
+ return PTR_ERR(dp->pl);
+ }
+
+ err = phylink_of_phy_connect(dp->pl, port_dn, 0);
+ if (err) {
+ pr_err("could not attach to PHY: %d\n", err);
+ goto err_phy_connect;
+ }
+
+ rtnl_lock();
+ phylink_start(dp->pl);
+ rtnl_unlock();
+
+ return 0;
+
+err_phy_connect:
+ phylink_destroy(dp->pl);
+ return err;
+}
+
int dsa_port_link_register_of(struct dsa_port *dp)
{
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->adjust_link)
+ return dsa_port_phylink_register(dp);
+
+ dev_warn(ds->dev,
+ "Using legacy PHYLIB callbacks. Please migrate to PHYLINK!\n");
+
if (of_phy_is_fixed_link(dp->dn))
return dsa_port_fixed_link_register_of(dp);
else
@@ -509,6 +656,16 @@ int dsa_port_link_register_of(struct dsa_port *dp)
void dsa_port_link_unregister_of(struct dsa_port *dp)
{
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->adjust_link) {
+ rtnl_lock();
+ phylink_disconnect_phy(dp->pl);
+ rtnl_unlock();
+ phylink_destroy(dp->pl);
+ return;
+ }
+
if (of_phy_is_fixed_link(dp->dn))
of_phy_deregister_fixed_link(dp->dn);
else
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 9892ca1f6859..1e2ae9d59b88 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1164,98 +1164,6 @@ static struct device_type dsa_type = {
.name = "dsa",
};
-static void dsa_slave_phylink_validate(struct net_device *dev,
- unsigned long *supported,
- struct phylink_link_state *state)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_validate)
- return;
-
- ds->ops->phylink_validate(ds, dp->index, supported, state);
-}
-
-static int dsa_slave_phylink_mac_link_state(struct net_device *dev,
- struct phylink_link_state *state)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
- struct dsa_switch *ds = dp->ds;
-
- /* Only called for SGMII and 802.3z */
- if (!ds->ops->phylink_mac_link_state)
- return -EOPNOTSUPP;
-
- return ds->ops->phylink_mac_link_state(ds, dp->index, state);
-}
-
-static void dsa_slave_phylink_mac_config(struct net_device *dev,
- unsigned int mode,
- const struct phylink_link_state *state)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_config)
- return;
-
- ds->ops->phylink_mac_config(ds, dp->index, mode, state);
-}
-
-static void dsa_slave_phylink_mac_an_restart(struct net_device *dev)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_an_restart)
- return;
-
- ds->ops->phylink_mac_an_restart(ds, dp->index);
-}
-
-static void dsa_slave_phylink_mac_link_down(struct net_device *dev,
- unsigned int mode,
- phy_interface_t interface)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_link_down) {
- if (ds->ops->adjust_link && dev->phydev)
- ds->ops->adjust_link(ds, dp->index, dev->phydev);
- return;
- }
-
- ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
-}
-
-static void dsa_slave_phylink_mac_link_up(struct net_device *dev,
- unsigned int mode,
- phy_interface_t interface,
- struct phy_device *phydev)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
- struct dsa_switch *ds = dp->ds;
-
- if (!ds->ops->phylink_mac_link_up) {
- if (ds->ops->adjust_link && dev->phydev)
- ds->ops->adjust_link(ds, dp->index, dev->phydev);
- return;
- }
-
- ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev);
-}
-
-static const struct phylink_mac_ops dsa_slave_phylink_mac_ops = {
- .validate = dsa_slave_phylink_validate,
- .mac_link_state = dsa_slave_phylink_mac_link_state,
- .mac_config = dsa_slave_phylink_mac_config,
- .mac_an_restart = dsa_slave_phylink_mac_an_restart,
- .mac_link_down = dsa_slave_phylink_mac_link_down,
- .mac_link_up = dsa_slave_phylink_mac_link_up,
-};
-
void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up)
{
const struct dsa_port *dp = dsa_to_port(ds, port);
@@ -1303,8 +1211,11 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
if (mode < 0)
mode = PHY_INTERFACE_MODE_NA;
- dp->pl = phylink_create(slave_dev, of_fwnode_handle(port_dn), mode,
- &dsa_slave_phylink_mac_ops);
+ dp->pl_config.dev = &slave_dev->dev;
+ dp->pl_config.type = PHYLINK_NETDEV;
+
+ dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn), mode,
+ &dsa_port_phylink_mac_ops);
if (IS_ERR(dp->pl)) {
netdev_err(slave_dev,
"error creating PHYLINK: %ld\n", PTR_ERR(dp->pl));
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 4196bcd4105a..5b56f16ed86b 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -79,7 +79,7 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
key.src = *src;
key.dst = *dst;
- q = inet_frag_find(&ieee802154_lowpan->frags, &key);
+ q = inet_frag_find(ieee802154_lowpan->fqdir, &key);
if (!q)
return NULL;
@@ -139,7 +139,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
fq->q.flags |= INET_FRAG_FIRST_IN;
fq->q.meat += skb->len;
- add_frag_mem_limit(fq->q.net, skb->truesize);
+ add_frag_mem_limit(fq->q.fqdir, skb->truesize);
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
@@ -326,23 +326,18 @@ err:
static struct ctl_table lowpan_frags_ns_ctl_table[] = {
{
.procname = "6lowpanfrag_high_thresh",
- .data = &init_net.ieee802154_lowpan.frags.high_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
},
{
.procname = "6lowpanfrag_low_thresh",
- .data = &init_net.ieee802154_lowpan.frags.low_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
},
{
.procname = "6lowpanfrag_time",
- .data = &init_net.ieee802154_lowpan.frags.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -377,17 +372,17 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
if (table == NULL)
goto err_alloc;
- table[0].data = &ieee802154_lowpan->frags.high_thresh;
- table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
- table[1].data = &ieee802154_lowpan->frags.low_thresh;
- table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
- table[2].data = &ieee802154_lowpan->frags.timeout;
-
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
table[0].procname = NULL;
}
+ table[0].data = &ieee802154_lowpan->fqdir->high_thresh;
+ table[0].extra1 = &ieee802154_lowpan->fqdir->low_thresh;
+ table[1].data = &ieee802154_lowpan->fqdir->low_thresh;
+ table[1].extra2 = &ieee802154_lowpan->fqdir->high_thresh;
+ table[2].data = &ieee802154_lowpan->fqdir->timeout;
+
hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table);
if (hdr == NULL)
goto err_reg;
@@ -454,17 +449,18 @@ static int __net_init lowpan_frags_init_net(struct net *net)
net_ieee802154_lowpan(net);
int res;
- ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
- ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
- ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
- ieee802154_lowpan->frags.f = &lowpan_frags;
- res = inet_frags_init_net(&ieee802154_lowpan->frags);
+ res = fqdir_init(&ieee802154_lowpan->fqdir, &lowpan_frags, net);
if (res < 0)
return res;
+
+ ieee802154_lowpan->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
+ ieee802154_lowpan->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
+ ieee802154_lowpan->fqdir->timeout = IPV6_FRAG_TIMEOUT;
+
res = lowpan_frags_ns_sysctl_register(net);
if (res < 0)
- inet_frags_exit_net(&ieee802154_lowpan->frags);
+ fqdir_exit(ieee802154_lowpan->fqdir);
return res;
}
@@ -474,7 +470,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
net_ieee802154_lowpan(net);
lowpan_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&ieee802154_lowpan->frags);
+ fqdir_exit(ieee802154_lowpan->fqdir);
}
static struct pernet_operations lowpan_frags_ops = {
@@ -544,7 +540,7 @@ err_sysctl:
void lowpan_net_frag_exit(void)
{
- inet_frags_fini(&lowpan_frags);
lowpan_frags_sysctl_unregister();
unregister_pernet_subsys(&lowpan_frags_ops);
+ inet_frags_fini(&lowpan_frags);
}
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 000a61994c8f..d57ecfaf89d4 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -14,7 +14,7 @@ obj-y := route.o inetpeer.o protocol.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
- metrics.o netlink.o
+ metrics.o netlink.o nexthop.o
obj-$(CONFIG_BPFILTER) += bpfilter/
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b298255f6fdb..76055c66326a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -192,7 +192,7 @@ int fib_unmerge(struct net *net)
return 0;
}
-static void fib_flush(struct net *net)
+void fib_flush(struct net *net)
{
int flushed = 0;
unsigned int h;
@@ -645,6 +645,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, struct rtentry *rt)
}
const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
+ [RTA_UNSPEC] = { .strict_start_type = RTA_DPORT + 1 },
[RTA_DST] = { .type = NLA_U32 },
[RTA_SRC] = { .type = NLA_U32 },
[RTA_IIF] = { .type = NLA_U32 },
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d3da6a10f86f..78648072783e 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1092,15 +1092,13 @@ out:
return err;
}
-static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh,
- struct netlink_ext_ack *extack)
+int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope,
+ struct netlink_ext_ack *extack)
{
- struct net *net = cfg->fc_nlinfo.nl_net;
- u32 table = cfg->fc_table;
int err;
if (nh->fib_nh_gw_family == AF_INET)
- err = fib_check_nh_v4_gw(net, nh, table, cfg->fc_scope, extack);
+ err = fib_check_nh_v4_gw(net, nh, table, scope, extack);
else if (nh->fib_nh_gw_family == AF_INET6)
err = fib_check_nh_v6_gw(net, nh, table, extack);
else
@@ -1191,11 +1189,10 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash,
fib_info_hash_free(old_laddrhash, bytes);
}
-__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
+__be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh,
+ unsigned char scope)
{
- nh->nh_saddr = inet_select_addr(nh->fib_nh_dev,
- nh->fib_nh_gw4,
- nh->nh_parent->fib_scope);
+ nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope);
nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid);
return nh->nh_saddr;
@@ -1213,7 +1210,7 @@ __be32 fib_result_prefsrc(struct net *net, struct fib_result *res)
if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid))
return nh->nh_saddr;
- return fib_info_update_nh_saddr(net, nh);
+ return fib_info_update_nh_saddr(net, nh, res->fi->fib_scope);
}
static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
@@ -1377,7 +1374,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
int linkdown = 0;
change_nexthops(fi) {
- err = fib_check_nh(cfg, nexthop_nh, extack);
+ err = fib_check_nh(cfg->fc_nlinfo.nl_net, nexthop_nh,
+ cfg->fc_table, cfg->fc_scope,
+ extack);
if (err != 0)
goto failure;
if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN)
@@ -1393,7 +1392,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
}
change_nexthops(fi) {
- fib_info_update_nh_saddr(net, nexthop_nh);
+ fib_info_update_nh_saddr(net, nexthop_nh, fi->fib_scope);
if (nexthop_nh->fib_nh_gw_family == AF_INET6)
fi->fib_nh_is_v6 = true;
} endfor_nexthops(fi)
@@ -1713,7 +1712,7 @@ static int call_fib_nh_notifiers(struct fib_nh *nh,
* - if the new MTU is greater than the PMTU, don't make any change
* - otherwise, unlock and set PMTU
*/
-static void nh_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
+void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
{
struct fnhe_hash_bucket *bucket;
int i;
@@ -1749,7 +1748,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu)
hlist_for_each_entry(nh, head, nh_hash) {
if (nh->fib_nh_dev == dev)
- nh_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
+ fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu);
}
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 334f723bdf80..b53ecef89d59 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1943,6 +1943,77 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
return found;
}
+/* derived from fib_trie_free */
+static void __fib_info_notify_update(struct net *net, struct fib_table *tb,
+ struct nl_info *info)
+{
+ struct trie *t = (struct trie *)tb->tb_data;
+ struct key_vector *pn = t->kv;
+ unsigned long cindex = 1;
+ struct fib_alias *fa;
+
+ for (;;) {
+ struct key_vector *n;
+
+ if (!(cindex--)) {
+ t_key pkey = pn->key;
+
+ if (IS_TRIE(pn))
+ break;
+
+ pn = node_parent(pn);
+ cindex = get_index(pkey, pn);
+ continue;
+ }
+
+ /* grab the next available node */
+ n = get_child(pn, cindex);
+ if (!n)
+ continue;
+
+ if (IS_TNODE(n)) {
+ /* record pn and cindex for leaf walking */
+ pn = n;
+ cindex = 1ul << n->bits;
+
+ continue;
+ }
+
+ hlist_for_each_entry(fa, &n->leaf, fa_list) {
+ struct fib_info *fi = fa->fa_info;
+
+ if (!fi || !fi->nh_updated || fa->tb_id != tb->tb_id)
+ continue;
+
+ rtmsg_fib(RTM_NEWROUTE, htonl(n->key), fa,
+ KEYLENGTH - fa->fa_slen, tb->tb_id,
+ info, NLM_F_REPLACE);
+
+ /* call_fib_entry_notifiers will be removed when
+ * in-kernel notifier is implemented and supported
+ * for nexthop objects
+ */
+ call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
+ n->key,
+ KEYLENGTH - fa->fa_slen, fa,
+ NULL);
+ }
+ }
+}
+
+void fib_info_notify_update(struct net *net, struct nl_info *info)
+{
+ unsigned int h;
+
+ for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+ struct hlist_head *head = &net->ipv4.fib_table_hash[h];
+ struct fib_table *tb;
+
+ hlist_for_each_entry_rcu(tb, head, tb_hlist)
+ __fib_info_notify_update(net, tb, info);
+ }
+}
+
static void fib_leaf_notify(struct net *net, struct key_vector *l,
struct fib_table *tb, struct notifier_block *nb)
{
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 737808e27f8b..35e9784fab4e 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -110,48 +110,101 @@ int inet_frags_init(struct inet_frags *f)
if (!f->frags_cachep)
return -ENOMEM;
+ refcount_set(&f->refcnt, 1);
+ init_completion(&f->completion);
return 0;
}
EXPORT_SYMBOL(inet_frags_init);
void inet_frags_fini(struct inet_frags *f)
{
- /* We must wait that all inet_frag_destroy_rcu() have completed. */
- rcu_barrier();
+ if (refcount_dec_and_test(&f->refcnt))
+ complete(&f->completion);
+
+ wait_for_completion(&f->completion);
kmem_cache_destroy(f->frags_cachep);
f->frags_cachep = NULL;
}
EXPORT_SYMBOL(inet_frags_fini);
+/* called from rhashtable_free_and_destroy() at netns_frags dismantle */
static void inet_frags_free_cb(void *ptr, void *arg)
{
struct inet_frag_queue *fq = ptr;
+ int count;
- /* If we can not cancel the timer, it means this frag_queue
- * is already disappearing, we have nothing to do.
- * Otherwise, we own a refcount until the end of this function.
- */
- if (!del_timer(&fq->timer))
- return;
+ count = del_timer_sync(&fq->timer) ? 1 : 0;
spin_lock_bh(&fq->lock);
if (!(fq->flags & INET_FRAG_COMPLETE)) {
fq->flags |= INET_FRAG_COMPLETE;
- refcount_dec(&fq->refcnt);
+ count++;
+ } else if (fq->flags & INET_FRAG_HASH_DEAD) {
+ count++;
}
spin_unlock_bh(&fq->lock);
- inet_frag_put(fq);
+ if (refcount_sub_and_test(count, &fq->refcnt))
+ inet_frag_destroy(fq);
}
-void inet_frags_exit_net(struct netns_frags *nf)
+static void fqdir_rwork_fn(struct work_struct *work)
{
- nf->high_thresh = 0; /* prevent creation of new frags */
+ struct fqdir *fqdir = container_of(to_rcu_work(work),
+ struct fqdir, destroy_rwork);
+ struct inet_frags *f = fqdir->f;
+
+ rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL);
+
+ /* We need to make sure all ongoing call_rcu(..., inet_frag_destroy_rcu)
+ * have completed, since they need to dereference fqdir.
+ * Would it not be nice to have kfree_rcu_barrier() ? :)
+ */
+ rcu_barrier();
+
+ if (refcount_dec_and_test(&f->refcnt))
+ complete(&f->completion);
- rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
+ kfree(fqdir);
}
-EXPORT_SYMBOL(inet_frags_exit_net);
+
+int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net)
+{
+ struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL);
+ int res;
+
+ if (!fqdir)
+ return -ENOMEM;
+ fqdir->f = f;
+ fqdir->net = net;
+ res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params);
+ if (res < 0) {
+ kfree(fqdir);
+ return res;
+ }
+ refcount_inc(&f->refcnt);
+ *fqdirp = fqdir;
+ return 0;
+}
+EXPORT_SYMBOL(fqdir_init);
+
+void fqdir_exit(struct fqdir *fqdir)
+{
+ fqdir->high_thresh = 0; /* prevent creation of new frags */
+
+ fqdir->dead = true;
+
+ /* call_rcu is supposed to provide memory barrier semantics,
+ * separating the setting of fqdir->dead with the destruction
+ * work. This implicit barrier is paired with inet_frag_kill().
+ */
+
+ INIT_RCU_WORK(&fqdir->destroy_rwork, fqdir_rwork_fn);
+ queue_rcu_work(system_wq, &fqdir->destroy_rwork);
+
+}
+EXPORT_SYMBOL(fqdir_exit);
void inet_frag_kill(struct inet_frag_queue *fq)
{
@@ -159,11 +212,23 @@ void inet_frag_kill(struct inet_frag_queue *fq)
refcount_dec(&fq->refcnt);
if (!(fq->flags & INET_FRAG_COMPLETE)) {
- struct netns_frags *nf = fq->net;
+ struct fqdir *fqdir = fq->fqdir;
fq->flags |= INET_FRAG_COMPLETE;
- rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
- refcount_dec(&fq->refcnt);
+ rcu_read_lock();
+ /* The RCU read lock provides a memory barrier
+ * guaranteeing that if fqdir->dead is false then
+ * the hash table destruction will not start until
+ * after we unlock. Paired with inet_frags_exit_net().
+ */
+ if (!fqdir->dead) {
+ rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
+ fqdir->f->rhash_params);
+ refcount_dec(&fq->refcnt);
+ } else {
+ fq->flags |= INET_FRAG_HASH_DEAD;
+ }
+ rcu_read_unlock();
}
}
EXPORT_SYMBOL(inet_frag_kill);
@@ -172,7 +237,7 @@ static void inet_frag_destroy_rcu(struct rcu_head *head)
{
struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
rcu);
- struct inet_frags *f = q->net->f;
+ struct inet_frags *f = q->fqdir->f;
if (f->destructor)
f->destructor(q);
@@ -203,7 +268,7 @@ EXPORT_SYMBOL(inet_frag_rbtree_purge);
void inet_frag_destroy(struct inet_frag_queue *q)
{
- struct netns_frags *nf;
+ struct fqdir *fqdir;
unsigned int sum, sum_truesize = 0;
struct inet_frags *f;
@@ -211,18 +276,18 @@ void inet_frag_destroy(struct inet_frag_queue *q)
WARN_ON(del_timer(&q->timer) != 0);
/* Release all fragment data. */
- nf = q->net;
- f = nf->f;
+ fqdir = q->fqdir;
+ f = fqdir->f;
sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
sum = sum_truesize + f->qsize;
call_rcu(&q->rcu, inet_frag_destroy_rcu);
- sub_frag_mem_limit(nf, sum);
+ sub_frag_mem_limit(fqdir, sum);
}
EXPORT_SYMBOL(inet_frag_destroy);
-static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
+static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
struct inet_frags *f,
void *arg)
{
@@ -232,9 +297,9 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
if (!q)
return NULL;
- q->net = nf;
+ q->fqdir = fqdir;
f->constructor(q, arg);
- add_frag_mem_limit(nf, f->qsize);
+ add_frag_mem_limit(fqdir, f->qsize);
timer_setup(&q->timer, f->frag_expire, 0);
spin_lock_init(&q->lock);
@@ -243,21 +308,21 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
return q;
}
-static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
+static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
void *arg,
struct inet_frag_queue **prev)
{
- struct inet_frags *f = nf->f;
+ struct inet_frags *f = fqdir->f;
struct inet_frag_queue *q;
- q = inet_frag_alloc(nf, f, arg);
+ q = inet_frag_alloc(fqdir, f, arg);
if (!q) {
*prev = ERR_PTR(-ENOMEM);
return NULL;
}
- mod_timer(&q->timer, jiffies + nf->timeout);
+ mod_timer(&q->timer, jiffies + fqdir->timeout);
- *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
+ *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key,
&q->node, f->rhash_params);
if (*prev) {
q->flags |= INET_FRAG_COMPLETE;
@@ -269,18 +334,18 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
}
/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
-struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
+struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
{
struct inet_frag_queue *fq = NULL, *prev;
- if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
+ if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh)
return NULL;
rcu_read_lock();
- prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+ prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params);
if (!prev)
- fq = inet_frag_create(nf, key, &prev);
+ fq = inet_frag_create(fqdir, key, &prev);
if (prev && !IS_ERR(prev)) {
fq = prev;
if (!refcount_inc_not_zero(&fq->refcnt))
@@ -391,7 +456,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
delta += head->truesize;
if (delta)
- add_frag_mem_limit(q->net, delta);
+ add_frag_mem_limit(q->fqdir, delta);
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
@@ -413,7 +478,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
head->truesize += clone->truesize;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- add_frag_mem_limit(q->net, clone->truesize);
+ add_frag_mem_limit(q->fqdir, clone->truesize);
skb_shinfo(head)->frag_list = clone;
nextp = &clone->next;
} else {
@@ -466,7 +531,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
rbn = rbnext;
}
}
- sub_frag_mem_limit(q->net, head->truesize);
+ sub_frag_mem_limit(q->fqdir, head->truesize);
*nextp = NULL;
skb_mark_not_on_list(head);
@@ -494,7 +559,7 @@ struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
if (head == q->fragments_tail)
q->fragments_tail = NULL;
- sub_frag_mem_limit(q->net, head->truesize);
+ sub_frag_mem_limit(q->fqdir, head->truesize);
return head;
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index cf2b0a6a3337..1ffaec056821 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -82,15 +82,13 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
struct ipq *qp = container_of(q, struct ipq, q);
- struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
- frags);
- struct net *net = container_of(ipv4, struct net, ipv4);
+ struct net *net = q->fqdir->net;
const struct frag_v4_compare_key *key = a;
q->key.v4 = *key;
qp->ecn = 0;
- qp->peer = q->net->max_dist ?
+ qp->peer = q->fqdir->max_dist ?
inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
NULL;
}
@@ -142,7 +140,7 @@ static void ip_expire(struct timer_list *t)
int err;
qp = container_of(frag, struct ipq, q);
- net = container_of(qp->q.net, struct net, ipv4.frags);
+ net = qp->q.fqdir->net;
rcu_read_lock();
spin_lock(&qp->q.lock);
@@ -211,7 +209,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
};
struct inet_frag_queue *q;
- q = inet_frag_find(&net->ipv4.frags, &key);
+ q = inet_frag_find(net->ipv4.fqdir, &key);
if (!q)
return NULL;
@@ -222,7 +220,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
static int ip_frag_too_far(struct ipq *qp)
{
struct inet_peer *peer = qp->peer;
- unsigned int max = qp->q.net->max_dist;
+ unsigned int max = qp->q.fqdir->max_dist;
unsigned int start, end;
int rc;
@@ -236,12 +234,8 @@ static int ip_frag_too_far(struct ipq *qp)
rc = qp->q.fragments_tail && (end - start) > max;
- if (rc) {
- struct net *net;
-
- net = container_of(qp->q.net, struct net, ipv4.frags);
- __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
- }
+ if (rc)
+ __IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS);
return rc;
}
@@ -250,13 +244,13 @@ static int ip_frag_reinit(struct ipq *qp)
{
unsigned int sum_truesize = 0;
- if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
+ if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) {
refcount_inc(&qp->q.refcnt);
return -ETIMEDOUT;
}
sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
- sub_frag_mem_limit(qp->q.net, sum_truesize);
+ sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
qp->q.flags = 0;
qp->q.len = 0;
@@ -273,7 +267,7 @@ static int ip_frag_reinit(struct ipq *qp)
/* Add new segment to existing queue. */
static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
- struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ struct net *net = qp->q.fqdir->net;
int ihl, end, flags, offset;
struct sk_buff *prev_tail;
struct net_device *dev;
@@ -352,7 +346,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
- add_frag_mem_limit(qp->q.net, skb->truesize);
+ add_frag_mem_limit(qp->q.fqdir, skb->truesize);
if (offset == 0)
qp->q.flags |= INET_FRAG_FIRST_IN;
@@ -399,7 +393,7 @@ err:
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
struct sk_buff *prev_tail, struct net_device *dev)
{
- struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
+ struct net *net = qp->q.fqdir->net;
struct iphdr *iph;
void *reasm_data;
int len, err;
@@ -544,30 +538,24 @@ static int dist_min;
static struct ctl_table ip4_frags_ns_ctl_table[] = {
{
.procname = "ipfrag_high_thresh",
- .data = &init_net.ipv4.frags.high_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &init_net.ipv4.frags.low_thresh
},
{
.procname = "ipfrag_low_thresh",
- .data = &init_net.ipv4.frags.low_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra2 = &init_net.ipv4.frags.high_thresh
},
{
.procname = "ipfrag_time",
- .data = &init_net.ipv4.frags.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "ipfrag_max_dist",
- .data = &init_net.ipv4.frags.max_dist,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
@@ -600,13 +588,13 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
if (!table)
goto err_alloc;
- table[0].data = &net->ipv4.frags.high_thresh;
- table[0].extra1 = &net->ipv4.frags.low_thresh;
- table[1].data = &net->ipv4.frags.low_thresh;
- table[1].extra2 = &net->ipv4.frags.high_thresh;
- table[2].data = &net->ipv4.frags.timeout;
- table[3].data = &net->ipv4.frags.max_dist;
}
+ table[0].data = &net->ipv4.fqdir->high_thresh;
+ table[0].extra1 = &net->ipv4.fqdir->low_thresh;
+ table[1].data = &net->ipv4.fqdir->low_thresh;
+ table[1].extra2 = &net->ipv4.fqdir->high_thresh;
+ table[2].data = &net->ipv4.fqdir->timeout;
+ table[3].data = &net->ipv4.fqdir->max_dist;
hdr = register_net_sysctl(net, "net/ipv4", table);
if (!hdr)
@@ -654,6 +642,9 @@ static int __net_init ipv4_frags_init_net(struct net *net)
{
int res;
+ res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net);
+ if (res < 0)
+ return res;
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
@@ -668,31 +659,27 @@ static int __net_init ipv4_frags_init_net(struct net *net)
* we will prune down to 3MB, making room for approx 8 big 64K
* fragments 8x128k.
*/
- net->ipv4.frags.high_thresh = 4 * 1024 * 1024;
- net->ipv4.frags.low_thresh = 3 * 1024 * 1024;
+ net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024;
+ net->ipv4.fqdir->low_thresh = 3 * 1024 * 1024;
/*
* Important NOTE! Fragment queue must be destroyed before MSL expires.
* RFC791 is wrong proposing to prolongate timer each fragment arrival
* by TTL.
*/
- net->ipv4.frags.timeout = IP_FRAG_TIME;
+ net->ipv4.fqdir->timeout = IP_FRAG_TIME;
- net->ipv4.frags.max_dist = 64;
- net->ipv4.frags.f = &ip4_frags;
+ net->ipv4.fqdir->max_dist = 64;
- res = inet_frags_init_net(&net->ipv4.frags);
- if (res < 0)
- return res;
res = ip4_frags_ns_ctl_register(net);
if (res < 0)
- inet_frags_exit_net(&net->ipv4.frags);
+ fqdir_exit(net->ipv4.fqdir);
return res;
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
{
ip4_frags_ns_ctl_unregister(net);
- inet_frags_exit_net(&net->ipv4.frags);
+ fqdir_exit(net->ipv4.fqdir);
}
static struct pernet_operations ip4_frags_ops = {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8c9189a41b13..d8f2a22c6ff7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -525,9 +525,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_hash(to, from);
- /* Copy the flags to each fragment. */
- IPCB(to)->flags = IPCB(from)->flags;
-
#ifdef CONFIG_NET_SCHED
to->tc_index = from->tc_index;
#endif
@@ -561,6 +558,176 @@ static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
return ip_do_fragment(net, sk, skb, output);
}
+void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph,
+ unsigned int hlen, struct ip_fraglist_iter *iter)
+{
+ unsigned int first_len = skb_pagelen(skb);
+
+ iter->frag_list = skb_shinfo(skb)->frag_list;
+ iter->frag = iter->frag_list;
+ skb_frag_list_init(skb);
+
+ iter->offset = 0;
+ iter->iph = iph;
+ iter->hlen = hlen;
+
+ skb->data_len = first_len - skb_headlen(skb);
+ skb->len = first_len;
+ iph->tot_len = htons(first_len);
+ iph->frag_off = htons(IP_MF);
+ ip_send_check(iph);
+}
+EXPORT_SYMBOL(ip_fraglist_init);
+
+static void ip_fraglist_ipcb_prepare(struct sk_buff *skb,
+ struct ip_fraglist_iter *iter)
+{
+ struct sk_buff *to = iter->frag;
+
+ /* Copy the flags to each fragment. */
+ IPCB(to)->flags = IPCB(skb)->flags;
+
+ if (iter->offset == 0)
+ ip_options_fragment(to);
+}
+
+void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter)
+{
+ unsigned int hlen = iter->hlen;
+ struct iphdr *iph = iter->iph;
+ struct sk_buff *frag;
+
+ frag = iter->frag;
+ frag->ip_summed = CHECKSUM_NONE;
+ skb_reset_transport_header(frag);
+ __skb_push(frag, hlen);
+ skb_reset_network_header(frag);
+ memcpy(skb_network_header(frag), iph, hlen);
+ iter->iph = ip_hdr(frag);
+ iph = iter->iph;
+ iph->tot_len = htons(frag->len);
+ ip_copy_metadata(frag, skb);
+ iter->offset += skb->len - hlen;
+ iph->frag_off = htons(iter->offset >> 3);
+ if (frag->next)
+ iph->frag_off |= htons(IP_MF);
+ /* Ready, complete checksum */
+ ip_send_check(iph);
+}
+EXPORT_SYMBOL(ip_fraglist_prepare);
+
+void ip_frag_init(struct sk_buff *skb, unsigned int hlen,
+ unsigned int ll_rs, unsigned int mtu,
+ struct ip_frag_state *state)
+{
+ struct iphdr *iph = ip_hdr(skb);
+
+ state->hlen = hlen;
+ state->ll_rs = ll_rs;
+ state->mtu = mtu;
+
+ state->left = skb->len - hlen; /* Space per frame */
+ state->ptr = hlen; /* Where to start from */
+
+ state->offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
+ state->not_last_frag = iph->frag_off & htons(IP_MF);
+}
+EXPORT_SYMBOL(ip_frag_init);
+
+static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to,
+ bool first_frag, struct ip_frag_state *state)
+{
+ /* Copy the flags to each fragment. */
+ IPCB(to)->flags = IPCB(from)->flags;
+
+ if (IPCB(from)->flags & IPSKB_FRAG_PMTU)
+ state->iph->frag_off |= htons(IP_DF);
+
+ /* ANK: dirty, but effective trick. Upgrade options only if
+ * the segment to be fragmented was THE FIRST (otherwise,
+ * options are already fixed) and make it ONCE
+ * on the initial skb, so that all the following fragments
+ * will inherit fixed options.
+ */
+ if (first_frag)
+ ip_options_fragment(from);
+}
+
+struct sk_buff *ip_frag_next(struct sk_buff *skb, struct ip_frag_state *state)
+{
+ unsigned int len = state->left;
+ struct sk_buff *skb2;
+ struct iphdr *iph;
+
+ len = state->left;
+ /* IF: it doesn't fit, use 'mtu' - the data space left */
+ if (len > state->mtu)
+ len = state->mtu;
+ /* IF: we are not sending up to and including the packet end
+ then align the next start on an eight byte boundary */
+ if (len < state->left) {
+ len &= ~7;
+ }
+
+ /* Allocate buffer */
+ skb2 = alloc_skb(len + state->hlen + state->ll_rs, GFP_ATOMIC);
+ if (!skb2)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * Set up data on packet
+ */
+
+ ip_copy_metadata(skb2, skb);
+ skb_reserve(skb2, state->ll_rs);
+ skb_put(skb2, len + state->hlen);
+ skb_reset_network_header(skb2);
+ skb2->transport_header = skb2->network_header + state->hlen;
+
+ /*
+ * Charge the memory for the fragment to any owner
+ * it might possess
+ */
+
+ if (skb->sk)
+ skb_set_owner_w(skb2, skb->sk);
+
+ /*
+ * Copy the packet header into the new buffer.
+ */
+
+ skb_copy_from_linear_data(skb, skb_network_header(skb2), state->hlen);
+
+ /*
+ * Copy a block of the IP datagram.
+ */
+ if (skb_copy_bits(skb, state->ptr, skb_transport_header(skb2), len))
+ BUG();
+ state->left -= len;
+
+ /*
+ * Fill in the new header fields.
+ */
+ iph = ip_hdr(skb2);
+ iph->frag_off = htons((state->offset >> 3));
+
+ /*
+ * Added AC : If we are fragmenting a fragment that's not the
+ * last fragment then keep MF on each bit
+ */
+ if (state->left > 0 || state->not_last_frag)
+ iph->frag_off |= htons(IP_MF);
+ state->ptr += len;
+ state->offset += len;
+
+ iph->tot_len = htons(len + state->hlen);
+
+ ip_send_check(iph);
+
+ return skb2;
+}
+EXPORT_SYMBOL(ip_frag_next);
+
/*
* This IP datagram is too large to be sent in one piece. Break it up into
* smaller pieces (each of size equal to IP header plus
@@ -572,12 +739,11 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
struct iphdr *iph;
- int ptr;
struct sk_buff *skb2;
- unsigned int mtu, hlen, left, len, ll_rs;
- int offset;
- __be16 not_last_frag;
struct rtable *rt = skb_rtable(skb);
+ unsigned int mtu, hlen, ll_rs;
+ struct ip_fraglist_iter iter;
+ struct ip_frag_state state;
int err = 0;
/* for offloaded checksums cleanup checksum before fragmentation */
@@ -642,49 +808,24 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
}
/* Everything is OK. Generate! */
-
- err = 0;
- offset = 0;
- frag = skb_shinfo(skb)->frag_list;
- skb_frag_list_init(skb);
- skb->data_len = first_len - skb_headlen(skb);
- skb->len = first_len;
- iph->tot_len = htons(first_len);
- iph->frag_off = htons(IP_MF);
- ip_send_check(iph);
+ ip_fraglist_init(skb, iph, hlen, &iter);
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
- if (frag) {
- frag->ip_summed = CHECKSUM_NONE;
- skb_reset_transport_header(frag);
- __skb_push(frag, hlen);
- skb_reset_network_header(frag);
- memcpy(skb_network_header(frag), iph, hlen);
- iph = ip_hdr(frag);
- iph->tot_len = htons(frag->len);
- ip_copy_metadata(frag, skb);
- if (offset == 0)
- ip_options_fragment(frag);
- offset += skb->len - hlen;
- iph->frag_off = htons(offset>>3);
- if (frag->next)
- iph->frag_off |= htons(IP_MF);
- /* Ready, complete checksum */
- ip_send_check(iph);
+ if (iter.frag) {
+ ip_fraglist_ipcb_prepare(skb, &iter);
+ ip_fraglist_prepare(skb, &iter);
}
err = output(net, sk, skb);
if (!err)
IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
- if (err || !frag)
+ if (err || !iter.frag)
break;
- skb = frag;
- frag = skb->next;
- skb_mark_not_on_list(skb);
+ skb = ip_fraglist_next(&iter);
}
if (err == 0) {
@@ -692,7 +833,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
return 0;
}
- kfree_skb_list(frag);
+ kfree_skb_list(iter.frag_list);
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
return err;
@@ -708,105 +849,29 @@ slow_path_clean:
}
slow_path:
- iph = ip_hdr(skb);
-
- left = skb->len - hlen; /* Space per frame */
- ptr = hlen; /* Where to start from */
-
/*
* Fragment the datagram.
*/
- offset = (ntohs(iph->frag_off) & IP_OFFSET) << 3;
- not_last_frag = iph->frag_off & htons(IP_MF);
+ ip_frag_init(skb, hlen, ll_rs, mtu, &state);
/*
* Keep copying data until we run out.
*/
- while (left > 0) {
- len = left;
- /* IF: it doesn't fit, use 'mtu' - the data space left */
- if (len > mtu)
- len = mtu;
- /* IF: we are not sending up to and including the packet end
- then align the next start on an eight byte boundary */
- if (len < left) {
- len &= ~7;
- }
+ while (state.left > 0) {
+ bool first_frag = (state.offset == 0);
- /* Allocate buffer */
- skb2 = alloc_skb(len + hlen + ll_rs, GFP_ATOMIC);
- if (!skb2) {
- err = -ENOMEM;
+ skb2 = ip_frag_next(skb, &state);
+ if (IS_ERR(skb2)) {
+ err = PTR_ERR(skb2);
goto fail;
}
-
- /*
- * Set up data on packet
- */
-
- ip_copy_metadata(skb2, skb);
- skb_reserve(skb2, ll_rs);
- skb_put(skb2, len + hlen);
- skb_reset_network_header(skb2);
- skb2->transport_header = skb2->network_header + hlen;
-
- /*
- * Charge the memory for the fragment to any owner
- * it might possess
- */
-
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
-
- /*
- * Copy the packet header into the new buffer.
- */
-
- skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
-
- /*
- * Copy a block of the IP datagram.
- */
- if (skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
- BUG();
- left -= len;
-
- /*
- * Fill in the new header fields.
- */
- iph = ip_hdr(skb2);
- iph->frag_off = htons((offset >> 3));
-
- if (IPCB(skb)->flags & IPSKB_FRAG_PMTU)
- iph->frag_off |= htons(IP_DF);
-
- /* ANK: dirty, but effective trick. Upgrade options only if
- * the segment to be fragmented was THE FIRST (otherwise,
- * options are already fixed) and make it ONCE
- * on the initial skb, so that all the following fragments
- * will inherit fixed options.
- */
- if (offset == 0)
- ip_options_fragment(skb);
-
- /*
- * Added AC : If we are fragmenting a fragment that's not the
- * last fragment then keep MF on each bit
- */
- if (left > 0 || not_last_frag)
- iph->frag_off |= htons(IP_MF);
- ptr += len;
- offset += len;
+ ip_frag_ipcb(skb, skb2, first_frag, &state);
/*
* Put this fragment into the sending queue.
*/
- iph->tot_len = htons(len + hlen);
-
- ip_send_check(iph);
-
err = output(net, sk, skb2);
if (err)
goto fail;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
new file mode 100644
index 000000000000..1af8a329dacb
--- /dev/null
+++ b/net/ipv4/nexthop.c
@@ -0,0 +1,1479 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Generic nexthop implementation
+ *
+ * Copyright (c) 2017-19 Cumulus Networks
+ * Copyright (c) 2017-19 David Ahern <dsa@cumulusnetworks.com>
+ */
+
+#include <linux/nexthop.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <net/arp.h>
+#include <net/ipv6_stubs.h>
+#include <net/lwtunnel.h>
+#include <net/ndisc.h>
+#include <net/nexthop.h>
+#include <net/route.h>
+#include <net/sock.h>
+
+static void remove_nexthop(struct net *net, struct nexthop *nh,
+ struct nl_info *nlinfo);
+
+#define NH_DEV_HASHBITS 8
+#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
+
+static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
+ [NHA_UNSPEC] = { .strict_start_type = NHA_UNSPEC + 1 },
+ [NHA_ID] = { .type = NLA_U32 },
+ [NHA_GROUP] = { .type = NLA_BINARY },
+ [NHA_GROUP_TYPE] = { .type = NLA_U16 },
+ [NHA_BLACKHOLE] = { .type = NLA_FLAG },
+ [NHA_OIF] = { .type = NLA_U32 },
+ [NHA_GATEWAY] = { .type = NLA_BINARY },
+ [NHA_ENCAP_TYPE] = { .type = NLA_U16 },
+ [NHA_ENCAP] = { .type = NLA_NESTED },
+ [NHA_GROUPS] = { .type = NLA_FLAG },
+ [NHA_MASTER] = { .type = NLA_U32 },
+};
+
+static unsigned int nh_dev_hashfn(unsigned int val)
+{
+ unsigned int mask = NH_DEV_HASHSIZE - 1;
+
+ return (val ^
+ (val >> NH_DEV_HASHBITS) ^
+ (val >> (NH_DEV_HASHBITS * 2))) & mask;
+}
+
+static void nexthop_devhash_add(struct net *net, struct nh_info *nhi)
+{
+ struct net_device *dev = nhi->fib_nhc.nhc_dev;
+ struct hlist_head *head;
+ unsigned int hash;
+
+ WARN_ON(!dev);
+
+ hash = nh_dev_hashfn(dev->ifindex);
+ head = &net->nexthop.devhash[hash];
+ hlist_add_head(&nhi->dev_hash, head);
+}
+
+static void nexthop_free_mpath(struct nexthop *nh)
+{
+ struct nh_group *nhg;
+ int i;
+
+ nhg = rcu_dereference_raw(nh->nh_grp);
+ for (i = 0; i < nhg->num_nh; ++i)
+ WARN_ON(nhg->nh_entries[i].nh);
+
+ kfree(nhg);
+}
+
+static void nexthop_free_single(struct nexthop *nh)
+{
+ struct nh_info *nhi;
+
+ nhi = rcu_dereference_raw(nh->nh_info);
+ switch (nhi->family) {
+ case AF_INET:
+ fib_nh_release(nh->net, &nhi->fib_nh);
+ break;
+ case AF_INET6:
+ ipv6_stub->fib6_nh_release(&nhi->fib6_nh);
+ break;
+ }
+ kfree(nhi);
+}
+
+void nexthop_free_rcu(struct rcu_head *head)
+{
+ struct nexthop *nh = container_of(head, struct nexthop, rcu);
+
+ if (nh->is_group)
+ nexthop_free_mpath(nh);
+ else
+ nexthop_free_single(nh);
+
+ kfree(nh);
+}
+EXPORT_SYMBOL_GPL(nexthop_free_rcu);
+
+static struct nexthop *nexthop_alloc(void)
+{
+ struct nexthop *nh;
+
+ nh = kzalloc(sizeof(struct nexthop), GFP_KERNEL);
+ if (nh) {
+ INIT_LIST_HEAD(&nh->grp_list);
+ }
+ return nh;
+}
+
+static struct nh_group *nexthop_grp_alloc(u16 num_nh)
+{
+ size_t sz = offsetof(struct nexthop, nh_grp)
+ + sizeof(struct nh_group)
+ + sizeof(struct nh_grp_entry) * num_nh;
+ struct nh_group *nhg;
+
+ nhg = kzalloc(sz, GFP_KERNEL);
+ if (nhg)
+ nhg->num_nh = num_nh;
+
+ return nhg;
+}
+
+static void nh_base_seq_inc(struct net *net)
+{
+ while (++net->nexthop.seq == 0)
+ ;
+}
+
+/* no reference taken; rcu lock or rtnl must be held */
+struct nexthop *nexthop_find_by_id(struct net *net, u32 id)
+{
+ struct rb_node **pp, *parent = NULL, *next;
+
+ pp = &net->nexthop.rb_root.rb_node;
+ while (1) {
+ struct nexthop *nh;
+
+ next = rcu_dereference_raw(*pp);
+ if (!next)
+ break;
+ parent = next;
+
+ nh = rb_entry(parent, struct nexthop, rb_node);
+ if (id < nh->id)
+ pp = &next->rb_left;
+ else if (id > nh->id)
+ pp = &next->rb_right;
+ else
+ return nh;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(nexthop_find_by_id);
+
+/* used for auto id allocation; called with rtnl held */
+static u32 nh_find_unused_id(struct net *net)
+{
+ u32 id_start = net->nexthop.last_id_allocated;
+
+ while (1) {
+ net->nexthop.last_id_allocated++;
+ if (net->nexthop.last_id_allocated == id_start)
+ break;
+
+ if (!nexthop_find_by_id(net, net->nexthop.last_id_allocated))
+ return net->nexthop.last_id_allocated;
+ }
+ return 0;
+}
+
+static int nla_put_nh_group(struct sk_buff *skb, struct nh_group *nhg)
+{
+ struct nexthop_grp *p;
+ size_t len = nhg->num_nh * sizeof(*p);
+ struct nlattr *nla;
+ u16 group_type = 0;
+ int i;
+
+ if (nhg->mpath)
+ group_type = NEXTHOP_GRP_TYPE_MPATH;
+
+ if (nla_put_u16(skb, NHA_GROUP_TYPE, group_type))
+ goto nla_put_failure;
+
+ nla = nla_reserve(skb, NHA_GROUP, len);
+ if (!nla)
+ goto nla_put_failure;
+
+ p = nla_data(nla);
+ for (i = 0; i < nhg->num_nh; ++i) {
+ p->id = nhg->nh_entries[i].nh->id;
+ p->weight = nhg->nh_entries[i].weight - 1;
+ p += 1;
+ }
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
+ int event, u32 portid, u32 seq, unsigned int nlflags)
+{
+ struct fib6_nh *fib6_nh;
+ struct fib_nh *fib_nh;
+ struct nlmsghdr *nlh;
+ struct nh_info *nhi;
+ struct nhmsg *nhm;
+
+ nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nhm), nlflags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ nhm = nlmsg_data(nlh);
+ nhm->nh_family = AF_UNSPEC;
+ nhm->nh_flags = nh->nh_flags;
+ nhm->nh_protocol = nh->protocol;
+ nhm->nh_scope = 0;
+ nhm->resvd = 0;
+
+ if (nla_put_u32(skb, NHA_ID, nh->id))
+ goto nla_put_failure;
+
+ if (nh->is_group) {
+ struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+
+ if (nla_put_nh_group(skb, nhg))
+ goto nla_put_failure;
+ goto out;
+ }
+
+ nhi = rtnl_dereference(nh->nh_info);
+ nhm->nh_family = nhi->family;
+ if (nhi->reject_nh) {
+ if (nla_put_flag(skb, NHA_BLACKHOLE))
+ goto nla_put_failure;
+ goto out;
+ } else {
+ const struct net_device *dev;
+
+ dev = nhi->fib_nhc.nhc_dev;
+ if (dev && nla_put_u32(skb, NHA_OIF, dev->ifindex))
+ goto nla_put_failure;
+ }
+
+ nhm->nh_scope = nhi->fib_nhc.nhc_scope;
+ switch (nhi->family) {
+ case AF_INET:
+ fib_nh = &nhi->fib_nh;
+ if (fib_nh->fib_nh_gw_family &&
+ nla_put_u32(skb, NHA_GATEWAY, fib_nh->fib_nh_gw4))
+ goto nla_put_failure;
+ break;
+
+ case AF_INET6:
+ fib6_nh = &nhi->fib6_nh;
+ if (fib6_nh->fib_nh_gw_family &&
+ nla_put_in6_addr(skb, NHA_GATEWAY, &fib6_nh->fib_nh_gw6))
+ goto nla_put_failure;
+ break;
+ }
+
+ if (nhi->fib_nhc.nhc_lwtstate &&
+ lwtunnel_fill_encap(skb, nhi->fib_nhc.nhc_lwtstate,
+ NHA_ENCAP, NHA_ENCAP_TYPE) < 0)
+ goto nla_put_failure;
+
+out:
+ nlmsg_end(skb, nlh);
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static size_t nh_nlmsg_size_grp(struct nexthop *nh)
+{
+ struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+ size_t sz = sizeof(struct nexthop_grp) * nhg->num_nh;
+
+ return nla_total_size(sz) +
+ nla_total_size(2); /* NHA_GROUP_TYPE */
+}
+
+static size_t nh_nlmsg_size_single(struct nexthop *nh)
+{
+ struct nh_info *nhi = rtnl_dereference(nh->nh_info);
+ size_t sz;
+
+ /* covers NHA_BLACKHOLE since NHA_OIF and BLACKHOLE
+ * are mutually exclusive
+ */
+ sz = nla_total_size(4); /* NHA_OIF */
+
+ switch (nhi->family) {
+ case AF_INET:
+ if (nhi->fib_nh.fib_nh_gw_family)
+ sz += nla_total_size(4); /* NHA_GATEWAY */
+ break;
+
+ case AF_INET6:
+ /* NHA_GATEWAY */
+ if (nhi->fib6_nh.fib_nh_gw_family)
+ sz += nla_total_size(sizeof(const struct in6_addr));
+ break;
+ }
+
+ if (nhi->fib_nhc.nhc_lwtstate) {
+ sz += lwtunnel_get_encap_size(nhi->fib_nhc.nhc_lwtstate);
+ sz += nla_total_size(2); /* NHA_ENCAP_TYPE */
+ }
+
+ return sz;
+}
+
+static size_t nh_nlmsg_size(struct nexthop *nh)
+{
+ size_t sz = nla_total_size(4); /* NHA_ID */
+
+ if (nh->is_group)
+ sz += nh_nlmsg_size_grp(nh);
+ else
+ sz += nh_nlmsg_size_single(nh);
+
+ return sz;
+}
+
+static void nexthop_notify(int event, struct nexthop *nh, struct nl_info *info)
+{
+ unsigned int nlflags = info->nlh ? info->nlh->nlmsg_flags : 0;
+ u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ skb = nlmsg_new(nh_nlmsg_size(nh), gfp_any());
+ if (!skb)
+ goto errout;
+
+ err = nh_fill_node(skb, nh, event, info->portid, seq, nlflags);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in nh_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_NEXTHOP,
+ info->nlh, gfp_any());
+ return;
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(info->nl_net, RTNLGRP_NEXTHOP, err);
+}
+
+static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
+ struct netlink_ext_ack *extack)
+{
+ if (nh->is_group) {
+ struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
+
+ /* nested multipath (group within a group) is not
+ * supported
+ */
+ if (nhg->mpath) {
+ NL_SET_ERR_MSG(extack,
+ "Multipath group can not be a nexthop within a group");
+ return false;
+ }
+ } else {
+ struct nh_info *nhi = rtnl_dereference(nh->nh_info);
+
+ if (nhi->reject_nh && npaths > 1) {
+ NL_SET_ERR_MSG(extack,
+ "Blackhole nexthop can not be used in a group with more than 1 path");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
+{
+ unsigned int len = nla_len(tb[NHA_GROUP]);
+ struct nexthop_grp *nhg;
+ unsigned int i, j;
+
+ if (len & (sizeof(struct nexthop_grp) - 1)) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid length for nexthop group attribute");
+ return -EINVAL;
+ }
+
+ /* convert len to number of nexthop ids */
+ len /= sizeof(*nhg);
+
+ nhg = nla_data(tb[NHA_GROUP]);
+ for (i = 0; i < len; ++i) {
+ if (nhg[i].resvd1 || nhg[i].resvd2) {
+ NL_SET_ERR_MSG(extack, "Reserved fields in nexthop_grp must be 0");
+ return -EINVAL;
+ }
+ if (nhg[i].weight > 254) {
+ NL_SET_ERR_MSG(extack, "Invalid value for weight");
+ return -EINVAL;
+ }
+ for (j = i + 1; j < len; ++j) {
+ if (nhg[i].id == nhg[j].id) {
+ NL_SET_ERR_MSG(extack, "Nexthop id can not be used twice in a group");
+ return -EINVAL;
+ }
+ }
+ }
+
+ nhg = nla_data(tb[NHA_GROUP]);
+ for (i = 0; i < len; ++i) {
+ struct nexthop *nh;
+
+ nh = nexthop_find_by_id(net, nhg[i].id);
+ if (!nh) {
+ NL_SET_ERR_MSG(extack, "Invalid nexthop id");
+ return -EINVAL;
+ }
+ if (!valid_group_nh(nh, len, extack))
+ return -EINVAL;
+ }
+ for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ NL_SET_ERR_MSG(extack,
+ "No other attributes can be set in nexthop groups");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static bool ipv6_good_nh(const struct fib6_nh *nh)
+{
+ int state = NUD_REACHABLE;
+ struct neighbour *n;
+
+ rcu_read_lock_bh();
+
+ n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6);
+ if (n)
+ state = n->nud_state;
+
+ rcu_read_unlock_bh();
+
+ return !!(state & NUD_VALID);
+}
+
+static bool ipv4_good_nh(const struct fib_nh *nh)
+{
+ int state = NUD_REACHABLE;
+ struct neighbour *n;
+
+ rcu_read_lock_bh();
+
+ n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
+ (__force u32)nh->fib_nh_gw4);
+ if (n)
+ state = n->nud_state;
+
+ rcu_read_unlock_bh();
+
+ return !!(state & NUD_VALID);
+}
+
+struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
+{
+ struct nexthop *rc = NULL;
+ struct nh_group *nhg;
+ int i;
+
+ if (!nh->is_group)
+ return nh;
+
+ nhg = rcu_dereference(nh->nh_grp);
+ for (i = 0; i < nhg->num_nh; ++i) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+ struct nh_info *nhi;
+
+ if (hash > atomic_read(&nhge->upper_bound))
+ continue;
+
+ /* nexthops always check if it is good and does
+ * not rely on a sysctl for this behavior
+ */
+ nhi = rcu_dereference(nhge->nh->nh_info);
+ switch (nhi->family) {
+ case AF_INET:
+ if (ipv4_good_nh(&nhi->fib_nh))
+ return nhge->nh;
+ break;
+ case AF_INET6:
+ if (ipv6_good_nh(&nhi->fib6_nh))
+ return nhge->nh;
+ break;
+ }
+
+ if (!rc)
+ rc = nhge->nh;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(nexthop_select_path);
+
+static void nh_group_rebalance(struct nh_group *nhg)
+{
+ int total = 0;
+ int w = 0;
+ int i;
+
+ for (i = 0; i < nhg->num_nh; ++i)
+ total += nhg->nh_entries[i].weight;
+
+ for (i = 0; i < nhg->num_nh; ++i) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+ int upper_bound;
+
+ w += nhge->weight;
+ upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1;
+ atomic_set(&nhge->upper_bound, upper_bound);
+ }
+}
+
+static void remove_nh_grp_entry(struct nh_grp_entry *nhge,
+ struct nh_group *nhg,
+ struct nl_info *nlinfo)
+{
+ struct nexthop *nh = nhge->nh;
+ struct nh_grp_entry *nhges;
+ bool found = false;
+ int i;
+
+ WARN_ON(!nh);
+
+ nhges = nhg->nh_entries;
+ for (i = 0; i < nhg->num_nh; ++i) {
+ if (found) {
+ nhges[i-1].nh = nhges[i].nh;
+ nhges[i-1].weight = nhges[i].weight;
+ list_del(&nhges[i].nh_list);
+ list_add(&nhges[i-1].nh_list, &nhges[i-1].nh->grp_list);
+ } else if (nhg->nh_entries[i].nh == nh) {
+ found = true;
+ }
+ }
+
+ if (WARN_ON(!found))
+ return;
+
+ nhg->num_nh--;
+ nhg->nh_entries[nhg->num_nh].nh = NULL;
+
+ nh_group_rebalance(nhg);
+
+ nexthop_put(nh);
+
+ if (nlinfo)
+ nexthop_notify(RTM_NEWNEXTHOP, nhge->nh_parent, nlinfo);
+}
+
+static void remove_nexthop_from_groups(struct net *net, struct nexthop *nh,
+ struct nl_info *nlinfo)
+{
+ struct nh_grp_entry *nhge, *tmp;
+
+ list_for_each_entry_safe(nhge, tmp, &nh->grp_list, nh_list) {
+ struct nh_group *nhg;
+
+ list_del(&nhge->nh_list);
+ nhg = rtnl_dereference(nhge->nh_parent->nh_grp);
+ remove_nh_grp_entry(nhge, nhg, nlinfo);
+
+ /* if this group has no more entries then remove it */
+ if (!nhg->num_nh)
+ remove_nexthop(net, nhge->nh_parent, nlinfo);
+ }
+}
+
+static void remove_nexthop_group(struct nexthop *nh, struct nl_info *nlinfo)
+{
+ struct nh_group *nhg = rcu_dereference_rtnl(nh->nh_grp);
+ int i, num_nh = nhg->num_nh;
+
+ for (i = 0; i < num_nh; ++i) {
+ struct nh_grp_entry *nhge = &nhg->nh_entries[i];
+
+ if (WARN_ON(!nhge->nh))
+ continue;
+
+ list_del(&nhge->nh_list);
+ nexthop_put(nhge->nh);
+ nhge->nh = NULL;
+ nhg->num_nh--;
+ }
+}
+
+static void __remove_nexthop(struct net *net, struct nexthop *nh,
+ struct nl_info *nlinfo)
+{
+ if (nh->is_group) {
+ remove_nexthop_group(nh, nlinfo);
+ } else {
+ struct nh_info *nhi;
+
+ nhi = rtnl_dereference(nh->nh_info);
+ if (nhi->fib_nhc.nhc_dev)
+ hlist_del(&nhi->dev_hash);
+
+ remove_nexthop_from_groups(net, nh, nlinfo);
+ }
+}
+
+static void remove_nexthop(struct net *net, struct nexthop *nh,
+ struct nl_info *nlinfo)
+{
+ /* remove from the tree */
+ rb_erase(&nh->rb_node, &net->nexthop.rb_root);
+
+ if (nlinfo)
+ nexthop_notify(RTM_DELNEXTHOP, nh, nlinfo);
+
+ __remove_nexthop(net, nh, nlinfo);
+ nh_base_seq_inc(net);
+
+ nexthop_put(nh);
+}
+
+static int replace_nexthop(struct net *net, struct nexthop *old,
+ struct nexthop *new, struct netlink_ext_ack *extack)
+{
+ return -EEXIST;
+}
+
+/* called with rtnl_lock held */
+static int insert_nexthop(struct net *net, struct nexthop *new_nh,
+ struct nh_config *cfg, struct netlink_ext_ack *extack)
+{
+ struct rb_node **pp, *parent = NULL, *next;
+ struct rb_root *root = &net->nexthop.rb_root;
+ bool replace = !!(cfg->nlflags & NLM_F_REPLACE);
+ bool create = !!(cfg->nlflags & NLM_F_CREATE);
+ u32 new_id = new_nh->id;
+ int rc = -EEXIST;
+
+ pp = &root->rb_node;
+ while (1) {
+ struct nexthop *nh;
+
+ next = rtnl_dereference(*pp);
+ if (!next)
+ break;
+
+ parent = next;
+
+ nh = rb_entry(parent, struct nexthop, rb_node);
+ if (new_id < nh->id) {
+ pp = &next->rb_left;
+ } else if (new_id > nh->id) {
+ pp = &next->rb_right;
+ } else if (replace) {
+ rc = replace_nexthop(net, nh, new_nh, extack);
+ if (!rc)
+ new_nh = nh; /* send notification with old nh */
+ goto out;
+ } else {
+ /* id already exists and not a replace */
+ goto out;
+ }
+ }
+
+ if (replace && !create) {
+ NL_SET_ERR_MSG(extack, "Replace specified without create and no entry exists");
+ rc = -ENOENT;
+ goto out;
+ }
+
+ rb_link_node_rcu(&new_nh->rb_node, parent, pp);
+ rb_insert_color(&new_nh->rb_node, root);
+ rc = 0;
+out:
+ if (!rc) {
+ nh_base_seq_inc(net);
+ nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
+ }
+
+ return rc;
+}
+
+/* rtnl */
+/* remove all nexthops tied to a device being deleted */
+static void nexthop_flush_dev(struct net_device *dev)
+{
+ unsigned int hash = nh_dev_hashfn(dev->ifindex);
+ struct net *net = dev_net(dev);
+ struct hlist_head *head = &net->nexthop.devhash[hash];
+ struct hlist_node *n;
+ struct nh_info *nhi;
+
+ hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
+ if (nhi->fib_nhc.nhc_dev != dev)
+ continue;
+
+ remove_nexthop(net, nhi->nh_parent, NULL);
+ }
+}
+
+/* rtnl; called when net namespace is deleted */
+static void flush_all_nexthops(struct net *net)
+{
+ struct rb_root *root = &net->nexthop.rb_root;
+ struct rb_node *node;
+ struct nexthop *nh;
+
+ while ((node = rb_first(root))) {
+ nh = rb_entry(node, struct nexthop, rb_node);
+ remove_nexthop(net, nh, NULL);
+ cond_resched();
+ }
+}
+
+static struct nexthop *nexthop_create_group(struct net *net,
+ struct nh_config *cfg)
+{
+ struct nlattr *grps_attr = cfg->nh_grp;
+ struct nexthop_grp *entry = nla_data(grps_attr);
+ struct nh_group *nhg;
+ struct nexthop *nh;
+ int i;
+
+ nh = nexthop_alloc();
+ if (!nh)
+ return ERR_PTR(-ENOMEM);
+
+ nh->is_group = 1;
+
+ nhg = nexthop_grp_alloc(nla_len(grps_attr) / sizeof(*entry));
+ if (!nhg) {
+ kfree(nh);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ for (i = 0; i < nhg->num_nh; ++i) {
+ struct nexthop *nhe;
+ struct nh_info *nhi;
+
+ nhe = nexthop_find_by_id(net, entry[i].id);
+ if (!nexthop_get(nhe))
+ goto out_no_nh;
+
+ nhi = rtnl_dereference(nhe->nh_info);
+ if (nhi->family == AF_INET)
+ nhg->has_v4 = true;
+
+ nhg->nh_entries[i].nh = nhe;
+ nhg->nh_entries[i].weight = entry[i].weight + 1;
+ list_add(&nhg->nh_entries[i].nh_list, &nhe->grp_list);
+ nhg->nh_entries[i].nh_parent = nh;
+ }
+
+ if (cfg->nh_grp_type == NEXTHOP_GRP_TYPE_MPATH) {
+ nhg->mpath = 1;
+ nh_group_rebalance(nhg);
+ }
+
+ rcu_assign_pointer(nh->nh_grp, nhg);
+
+ return nh;
+
+out_no_nh:
+ for (; i >= 0; --i)
+ nexthop_put(nhg->nh_entries[i].nh);
+
+ kfree(nhg);
+ kfree(nh);
+
+ return ERR_PTR(-ENOENT);
+}
+
+static int nh_create_ipv4(struct net *net, struct nexthop *nh,
+ struct nh_info *nhi, struct nh_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct fib_nh *fib_nh = &nhi->fib_nh;
+ struct fib_config fib_cfg = {
+ .fc_oif = cfg->nh_ifindex,
+ .fc_gw4 = cfg->gw.ipv4,
+ .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
+ .fc_flags = cfg->nh_flags,
+ .fc_encap = cfg->nh_encap,
+ .fc_encap_type = cfg->nh_encap_type,
+ };
+ u32 tb_id = l3mdev_fib_table(cfg->dev);
+ int err = -EINVAL;
+
+ err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
+ if (err) {
+ fib_nh_release(net, fib_nh);
+ goto out;
+ }
+
+ /* sets nh_dev if successful */
+ err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
+ if (!err) {
+ nh->nh_flags = fib_nh->fib_nh_flags;
+ fib_info_update_nh_saddr(net, fib_nh, fib_nh->fib_nh_scope);
+ } else {
+ fib_nh_release(net, fib_nh);
+ }
+out:
+ return err;
+}
+
+static int nh_create_ipv6(struct net *net, struct nexthop *nh,
+ struct nh_info *nhi, struct nh_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct fib6_nh *fib6_nh = &nhi->fib6_nh;
+ struct fib6_config fib6_cfg = {
+ .fc_table = l3mdev_fib_table(cfg->dev),
+ .fc_ifindex = cfg->nh_ifindex,
+ .fc_gateway = cfg->gw.ipv6,
+ .fc_flags = cfg->nh_flags,
+ .fc_encap = cfg->nh_encap,
+ .fc_encap_type = cfg->nh_encap_type,
+ };
+ int err = -EINVAL;
+
+ if (!ipv6_addr_any(&cfg->gw.ipv6))
+ fib6_cfg.fc_flags |= RTF_GATEWAY;
+
+ /* sets nh_dev if successful */
+ err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL,
+ extack);
+ if (err)
+ ipv6_stub->fib6_nh_release(fib6_nh);
+ else
+ nh->nh_flags = fib6_nh->fib_nh_flags;
+
+ return err;
+}
+
+static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct nh_info *nhi;
+ struct nexthop *nh;
+ int err = 0;
+
+ nh = nexthop_alloc();
+ if (!nh)
+ return ERR_PTR(-ENOMEM);
+
+ nhi = kzalloc(sizeof(*nhi), GFP_KERNEL);
+ if (!nhi) {
+ kfree(nh);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ nh->nh_flags = cfg->nh_flags;
+ nh->net = net;
+
+ nhi->nh_parent = nh;
+ nhi->family = cfg->nh_family;
+ nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
+
+ if (cfg->nh_blackhole) {
+ nhi->reject_nh = 1;
+ cfg->nh_ifindex = net->loopback_dev->ifindex;
+ }
+
+ switch (cfg->nh_family) {
+ case AF_INET:
+ err = nh_create_ipv4(net, nh, nhi, cfg, extack);
+ break;
+ case AF_INET6:
+ err = nh_create_ipv6(net, nh, nhi, cfg, extack);
+ break;
+ }
+
+ if (err) {
+ kfree(nhi);
+ kfree(nh);
+ return ERR_PTR(err);
+ }
+
+ /* add the entry to the device based hash */
+ nexthop_devhash_add(net, nhi);
+
+ rcu_assign_pointer(nh->nh_info, nhi);
+
+ return nh;
+}
+
+/* called with rtnl lock held */
+static struct nexthop *nexthop_add(struct net *net, struct nh_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct nexthop *nh;
+ int err;
+
+ if (cfg->nlflags & NLM_F_REPLACE && !cfg->nh_id) {
+ NL_SET_ERR_MSG(extack, "Replace requires nexthop id");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!cfg->nh_id) {
+ cfg->nh_id = nh_find_unused_id(net);
+ if (!cfg->nh_id) {
+ NL_SET_ERR_MSG(extack, "No unused id");
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ if (cfg->nh_grp)
+ nh = nexthop_create_group(net, cfg);
+ else
+ nh = nexthop_create(net, cfg, extack);
+
+ if (IS_ERR(nh))
+ return nh;
+
+ refcount_set(&nh->refcnt, 1);
+ nh->id = cfg->nh_id;
+ nh->protocol = cfg->nh_protocol;
+ nh->net = net;
+
+ err = insert_nexthop(net, nh, cfg, extack);
+ if (err) {
+ __remove_nexthop(net, nh, NULL);
+ nexthop_put(nh);
+ nh = ERR_PTR(err);
+ }
+
+ return nh;
+}
+
+static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nh_config *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct nhmsg *nhm = nlmsg_data(nlh);
+ struct nlattr *tb[NHA_MAX + 1];
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
+ extack);
+ if (err < 0)
+ return err;
+
+ err = -EINVAL;
+ if (nhm->resvd || nhm->nh_scope) {
+ NL_SET_ERR_MSG(extack, "Invalid values in ancillary header");
+ goto out;
+ }
+ if (nhm->nh_flags & ~NEXTHOP_VALID_USER_FLAGS) {
+ NL_SET_ERR_MSG(extack, "Invalid nexthop flags in ancillary header");
+ goto out;
+ }
+
+ switch (nhm->nh_family) {
+ case AF_INET:
+ case AF_INET6:
+ break;
+ case AF_UNSPEC:
+ if (tb[NHA_GROUP])
+ break;
+ /* fallthrough */
+ default:
+ NL_SET_ERR_MSG(extack, "Invalid address family");
+ goto out;
+ }
+
+ if (tb[NHA_GROUPS] || tb[NHA_MASTER]) {
+ NL_SET_ERR_MSG(extack, "Invalid attributes in request");
+ goto out;
+ }
+
+ memset(cfg, 0, sizeof(*cfg));
+ cfg->nlflags = nlh->nlmsg_flags;
+ cfg->nlinfo.portid = NETLINK_CB(skb).portid;
+ cfg->nlinfo.nlh = nlh;
+ cfg->nlinfo.nl_net = net;
+
+ cfg->nh_family = nhm->nh_family;
+ cfg->nh_protocol = nhm->nh_protocol;
+ cfg->nh_flags = nhm->nh_flags;
+
+ if (tb[NHA_ID])
+ cfg->nh_id = nla_get_u32(tb[NHA_ID]);
+
+ if (tb[NHA_GROUP]) {
+ if (nhm->nh_family != AF_UNSPEC) {
+ NL_SET_ERR_MSG(extack, "Invalid family for group");
+ goto out;
+ }
+ cfg->nh_grp = tb[NHA_GROUP];
+
+ cfg->nh_grp_type = NEXTHOP_GRP_TYPE_MPATH;
+ if (tb[NHA_GROUP_TYPE])
+ cfg->nh_grp_type = nla_get_u16(tb[NHA_GROUP_TYPE]);
+
+ if (cfg->nh_grp_type > NEXTHOP_GRP_TYPE_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid group type");
+ goto out;
+ }
+ err = nh_check_attr_group(net, tb, extack);
+
+ /* no other attributes should be set */
+ goto out;
+ }
+
+ if (tb[NHA_BLACKHOLE]) {
+ if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
+ tb[NHA_ENCAP] || tb[NHA_ENCAP_TYPE]) {
+ NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway or oif");
+ goto out;
+ }
+
+ cfg->nh_blackhole = 1;
+ err = 0;
+ goto out;
+ }
+
+ if (!tb[NHA_OIF]) {
+ NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops");
+ goto out;
+ }
+
+ cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
+ if (cfg->nh_ifindex)
+ cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
+
+ if (!cfg->dev) {
+ NL_SET_ERR_MSG(extack, "Invalid device index");
+ goto out;
+ } else if (!(cfg->dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ } else if (!netif_carrier_ok(cfg->dev)) {
+ NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
+ err = -ENETDOWN;
+ goto out;
+ }
+
+ err = -EINVAL;
+ if (tb[NHA_GATEWAY]) {
+ struct nlattr *gwa = tb[NHA_GATEWAY];
+
+ switch (cfg->nh_family) {
+ case AF_INET:
+ if (nla_len(gwa) != sizeof(u32)) {
+ NL_SET_ERR_MSG(extack, "Invalid gateway");
+ goto out;
+ }
+ cfg->gw.ipv4 = nla_get_be32(gwa);
+ break;
+ case AF_INET6:
+ if (nla_len(gwa) != sizeof(struct in6_addr)) {
+ NL_SET_ERR_MSG(extack, "Invalid gateway");
+ goto out;
+ }
+ cfg->gw.ipv6 = nla_get_in6_addr(gwa);
+ break;
+ default:
+ NL_SET_ERR_MSG(extack,
+ "Unknown address family for gateway");
+ goto out;
+ }
+ } else {
+ /* device only nexthop (no gateway) */
+ if (cfg->nh_flags & RTNH_F_ONLINK) {
+ NL_SET_ERR_MSG(extack,
+ "ONLINK flag can not be set for nexthop without a gateway");
+ goto out;
+ }
+ }
+
+ if (tb[NHA_ENCAP]) {
+ cfg->nh_encap = tb[NHA_ENCAP];
+
+ if (!tb[NHA_ENCAP_TYPE]) {
+ NL_SET_ERR_MSG(extack, "LWT encapsulation type is missing");
+ goto out;
+ }
+
+ cfg->nh_encap_type = nla_get_u16(tb[NHA_ENCAP_TYPE]);
+ err = lwtunnel_valid_encap_type(cfg->nh_encap_type, extack);
+ if (err < 0)
+ goto out;
+
+ } else if (tb[NHA_ENCAP_TYPE]) {
+ NL_SET_ERR_MSG(extack, "LWT encapsulation attribute is missing");
+ goto out;
+ }
+
+
+ err = 0;
+out:
+ return err;
+}
+
+/* rtnl */
+static int rtm_new_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nh_config cfg;
+ struct nexthop *nh;
+ int err;
+
+ err = rtm_to_nh_config(net, skb, nlh, &cfg, extack);
+ if (!err) {
+ nh = nexthop_add(net, &cfg, extack);
+ if (IS_ERR(nh))
+ err = PTR_ERR(nh);
+ }
+
+ return err;
+}
+
+static int nh_valid_get_del_req(struct nlmsghdr *nlh, u32 *id,
+ struct netlink_ext_ack *extack)
+{
+ struct nhmsg *nhm = nlmsg_data(nlh);
+ struct nlattr *tb[NHA_MAX + 1];
+ int err, i;
+
+ err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
+ extack);
+ if (err < 0)
+ return err;
+
+ err = -EINVAL;
+ for (i = 0; i < __NHA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ switch (i) {
+ case NHA_ID:
+ break;
+ default:
+ NL_SET_ERR_MSG_ATTR(extack, tb[i],
+ "Unexpected attribute in request");
+ goto out;
+ }
+ }
+ if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header");
+ goto out;
+ }
+
+ if (!tb[NHA_ID]) {
+ NL_SET_ERR_MSG(extack, "Nexthop id is missing");
+ goto out;
+ }
+
+ *id = nla_get_u32(tb[NHA_ID]);
+ if (!(*id))
+ NL_SET_ERR_MSG(extack, "Invalid nexthop id");
+ else
+ err = 0;
+out:
+ return err;
+}
+
+/* rtnl */
+static int rtm_del_nexthop(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nl_info nlinfo = {
+ .nlh = nlh,
+ .nl_net = net,
+ .portid = NETLINK_CB(skb).portid,
+ };
+ struct nexthop *nh;
+ int err;
+ u32 id;
+
+ err = nh_valid_get_del_req(nlh, &id, extack);
+ if (err)
+ return err;
+
+ nh = nexthop_find_by_id(net, id);
+ if (!nh)
+ return -ENOENT;
+
+ remove_nexthop(net, nh, &nlinfo);
+
+ return 0;
+}
+
+/* rtnl */
+static int rtm_get_nexthop(struct sk_buff *in_skb, struct nlmsghdr *nlh,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(in_skb->sk);
+ struct sk_buff *skb = NULL;
+ struct nexthop *nh;
+ int err;
+ u32 id;
+
+ err = nh_valid_get_del_req(nlh, &id, extack);
+ if (err)
+ return err;
+
+ err = -ENOBUFS;
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ goto out;
+
+ err = -ENOENT;
+ nh = nexthop_find_by_id(net, id);
+ if (!nh)
+ goto errout_free;
+
+ err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP, NETLINK_CB(in_skb).portid,
+ nlh->nlmsg_seq, 0);
+ if (err < 0) {
+ WARN_ON(err == -EMSGSIZE);
+ goto errout_free;
+ }
+
+ err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
+out:
+ return err;
+errout_free:
+ kfree_skb(skb);
+ goto out;
+}
+
+static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx,
+ bool group_filter, u8 family)
+{
+ const struct net_device *dev;
+ const struct nh_info *nhi;
+
+ if (group_filter && !nh->is_group)
+ return true;
+
+ if (!dev_idx && !master_idx && !family)
+ return false;
+
+ if (nh->is_group)
+ return true;
+
+ nhi = rtnl_dereference(nh->nh_info);
+ if (family && nhi->family != family)
+ return true;
+
+ dev = nhi->fib_nhc.nhc_dev;
+ if (dev_idx && (!dev || dev->ifindex != dev_idx))
+ return true;
+
+ if (master_idx) {
+ struct net_device *master;
+
+ if (!dev)
+ return true;
+
+ master = netdev_master_upper_dev_get((struct net_device *)dev);
+ if (!master || master->ifindex != master_idx)
+ return true;
+ }
+
+ return false;
+}
+
+static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
+ int *master_idx, bool *group_filter,
+ struct netlink_callback *cb)
+{
+ struct netlink_ext_ack *extack = cb->extack;
+ struct nlattr *tb[NHA_MAX + 1];
+ struct nhmsg *nhm;
+ int err, i;
+ u32 idx;
+
+ err = nlmsg_parse(nlh, sizeof(*nhm), tb, NHA_MAX, rtm_nh_policy,
+ NULL);
+ if (err < 0)
+ return err;
+
+ for (i = 0; i <= NHA_MAX; ++i) {
+ if (!tb[i])
+ continue;
+
+ switch (i) {
+ case NHA_OIF:
+ idx = nla_get_u32(tb[i]);
+ if (idx > INT_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid device index");
+ return -EINVAL;
+ }
+ *dev_idx = idx;
+ break;
+ case NHA_MASTER:
+ idx = nla_get_u32(tb[i]);
+ if (idx > INT_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid master device index");
+ return -EINVAL;
+ }
+ *master_idx = idx;
+ break;
+ case NHA_GROUPS:
+ *group_filter = true;
+ break;
+ default:
+ NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
+ return -EINVAL;
+ }
+ }
+
+ nhm = nlmsg_data(nlh);
+ if (nhm->nh_protocol || nhm->resvd || nhm->nh_scope || nhm->nh_flags) {
+ NL_SET_ERR_MSG(extack, "Invalid values in header for nexthop dump request");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* rtnl */
+static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct nhmsg *nhm = nlmsg_data(cb->nlh);
+ int dev_filter_idx = 0, master_idx = 0;
+ struct net *net = sock_net(skb->sk);
+ struct rb_root *root = &net->nexthop.rb_root;
+ bool group_filter = false;
+ struct rb_node *node;
+ int idx = 0, s_idx;
+ int err;
+
+ err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx,
+ &group_filter, cb);
+ if (err < 0)
+ return err;
+
+ s_idx = cb->args[0];
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ struct nexthop *nh;
+
+ if (idx < s_idx)
+ goto cont;
+
+ nh = rb_entry(node, struct nexthop, rb_node);
+ if (nh_dump_filtered(nh, dev_filter_idx, master_idx,
+ group_filter, nhm->nh_family))
+ goto cont;
+
+ err = nh_fill_node(skb, nh, RTM_NEWNEXTHOP,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI);
+ if (err < 0) {
+ if (likely(skb->len))
+ goto out;
+
+ goto out_err;
+ }
+cont:
+ idx++;
+ }
+
+out:
+ err = skb->len;
+out_err:
+ cb->args[0] = idx;
+ cb->seq = net->nexthop.seq;
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+
+ return err;
+}
+
+static void nexthop_sync_mtu(struct net_device *dev, u32 orig_mtu)
+{
+ unsigned int hash = nh_dev_hashfn(dev->ifindex);
+ struct net *net = dev_net(dev);
+ struct hlist_head *head = &net->nexthop.devhash[hash];
+ struct hlist_node *n;
+ struct nh_info *nhi;
+
+ hlist_for_each_entry_safe(nhi, n, head, dev_hash) {
+ if (nhi->fib_nhc.nhc_dev == dev) {
+ if (nhi->family == AF_INET)
+ fib_nhc_update_mtu(&nhi->fib_nhc, dev->mtu,
+ orig_mtu);
+ }
+ }
+}
+
+/* rtnl */
+static int nh_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct netdev_notifier_info_ext *info_ext;
+
+ switch (event) {
+ case NETDEV_DOWN:
+ case NETDEV_UNREGISTER:
+ nexthop_flush_dev(dev);
+ break;
+ case NETDEV_CHANGE:
+ if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
+ nexthop_flush_dev(dev);
+ break;
+ case NETDEV_CHANGEMTU:
+ info_ext = ptr;
+ nexthop_sync_mtu(dev, info_ext->ext.mtu);
+ rt_cache_flush(dev_net(dev));
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block nh_netdev_notifier = {
+ .notifier_call = nh_netdev_event,
+};
+
+static void __net_exit nexthop_net_exit(struct net *net)
+{
+ rtnl_lock();
+ flush_all_nexthops(net);
+ rtnl_unlock();
+ kfree(net->nexthop.devhash);
+}
+
+static int __net_init nexthop_net_init(struct net *net)
+{
+ size_t sz = sizeof(struct hlist_head) * NH_DEV_HASHSIZE;
+
+ net->nexthop.rb_root = RB_ROOT;
+ net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
+ if (!net->nexthop.devhash)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static struct pernet_operations nexthop_net_ops = {
+ .init = nexthop_net_init,
+ .exit = nexthop_net_exit,
+};
+
+static int __init nexthop_init(void)
+{
+ register_pernet_subsys(&nexthop_net_ops);
+
+ register_netdevice_notifier(&nh_netdev_notifier);
+
+ rtnl_register(PF_UNSPEC, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_DELNEXTHOP, rtm_del_nexthop, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_GETNEXTHOP, rtm_get_nexthop,
+ rtm_dump_nexthop, 0);
+
+ rtnl_register(PF_INET, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
+ rtnl_register(PF_INET, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
+
+ rtnl_register(PF_INET6, RTM_NEWNEXTHOP, rtm_new_nexthop, NULL, 0);
+ rtnl_register(PF_INET6, RTM_GETNEXTHOP, NULL, rtm_dump_nexthop, 0);
+
+ return 0;
+}
+subsys_initcall(nexthop_init);
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index c3610b37bb4c..4746f963c439 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -72,8 +72,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "RAW: inuse %d\n",
sock_prot_inuse_get(net, &raw_prot));
seq_printf(seq, "FRAG: inuse %u memory %lu\n",
- atomic_read(&net->ipv4.frags.rhashtable.nelems),
- frag_mem_limit(&net->ipv4.frags));
+ atomic_read(&net->ipv4.fqdir->rhashtable.nelems),
+ frag_mem_limit(net->ipv4.fqdir));
return 0;
}
@@ -291,6 +291,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED),
SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP),
SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP),
+ SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 875867b64d6a..90f09e47198b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -277,55 +277,97 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl,
return ret;
}
+static int sscanf_key(char *buf, __le32 *key)
+{
+ u32 user_key[4];
+ int i, ret = 0;
+
+ if (sscanf(buf, "%x-%x-%x-%x", user_key, user_key + 1,
+ user_key + 2, user_key + 3) != 4) {
+ ret = -EINVAL;
+ } else {
+ for (i = 0; i < ARRAY_SIZE(user_key); i++)
+ key[i] = cpu_to_le32(user_key[i]);
+ }
+ pr_debug("proc TFO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
+ user_key[0], user_key[1], user_key[2], user_key[3], buf, ret);
+
+ return ret;
+}
+
static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
struct net *net = container_of(table->data, struct net,
ipv4.sysctl_tcp_fastopen);
- struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
- struct tcp_fastopen_context *ctxt;
- u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
- __le32 key[4];
- int ret, i;
+ /* maxlen to print the list of keys in hex (*2), with dashes
+ * separating doublewords and a comma in between keys.
+ */
+ struct ctl_table tbl = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH *
+ 2 * TCP_FASTOPEN_KEY_MAX) +
+ (TCP_FASTOPEN_KEY_MAX * 5)) };
+ struct tcp_fastopen_context *ctx;
+ u32 user_key[TCP_FASTOPEN_KEY_MAX * 4];
+ __le32 key[TCP_FASTOPEN_KEY_MAX * 4];
+ char *backup_data;
+ int ret, i = 0, off = 0, n_keys = 0;
tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
if (!tbl.data)
return -ENOMEM;
rcu_read_lock();
- ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
- if (ctxt)
- memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
- else
- memset(key, 0, sizeof(key));
+ ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
+ if (ctx) {
+ n_keys = tcp_fastopen_context_len(ctx);
+ memcpy(&key[0], &ctx->key[0], TCP_FASTOPEN_KEY_LENGTH * n_keys);
+ }
rcu_read_unlock();
- for (i = 0; i < ARRAY_SIZE(key); i++)
+ if (!n_keys) {
+ memset(&key[0], 0, TCP_FASTOPEN_KEY_LENGTH);
+ n_keys = 1;
+ }
+
+ for (i = 0; i < n_keys * 4; i++)
user_key[i] = le32_to_cpu(key[i]);
- snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
- user_key[0], user_key[1], user_key[2], user_key[3]);
+ for (i = 0; i < n_keys; i++) {
+ off += snprintf(tbl.data + off, tbl.maxlen - off,
+ "%08x-%08x-%08x-%08x",
+ user_key[i * 4],
+ user_key[i * 4 + 1],
+ user_key[i * 4 + 2],
+ user_key[i * 4 + 3]);
+ if (i + 1 < n_keys)
+ off += snprintf(tbl.data + off, tbl.maxlen - off, ",");
+ }
+
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
if (write && ret == 0) {
- if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1,
- user_key + 2, user_key + 3) != 4) {
+ backup_data = strchr(tbl.data, ',');
+ if (backup_data) {
+ *backup_data = '\0';
+ backup_data++;
+ }
+ if (sscanf_key(tbl.data, key)) {
ret = -EINVAL;
goto bad_key;
}
-
- for (i = 0; i < ARRAY_SIZE(user_key); i++)
- key[i] = cpu_to_le32(user_key[i]);
-
+ if (backup_data) {
+ if (sscanf_key(backup_data, key + 4)) {
+ ret = -EINVAL;
+ goto bad_key;
+ }
+ }
tcp_fastopen_reset_cipher(net, NULL, key,
+ backup_data ? key + 4 : NULL,
TCP_FASTOPEN_KEY_LENGTH);
}
bad_key:
- pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
- user_key[0], user_key[1], user_key[2], user_key[3],
- (char *)tbl.data, ret);
kfree(tbl.data);
return ret;
}
@@ -933,7 +975,12 @@ static struct ctl_table ipv4_net_table[] = {
.procname = "tcp_fastopen_key",
.mode = 0600,
.data = &init_net.ipv4.sysctl_tcp_fastopen,
- .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10),
+ /* maxlen to print the list of keys in hex (*2), with dashes
+ * separating doublewords and a comma in between keys.
+ */
+ .maxlen = ((TCP_FASTOPEN_KEY_LENGTH *
+ 2 * TCP_FASTOPEN_KEY_MAX) +
+ (TCP_FASTOPEN_KEY_MAX * 5)),
.proc_handler = proc_tcp_fastopen_key,
},
{
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 53d61ca3ac4b..27ce13ece510 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2790,15 +2790,24 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
return err;
}
case TCP_FASTOPEN_KEY: {
- __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
+ __u8 *backup_key = NULL;
- if (optlen != sizeof(key))
+ /* Allow a backup key as well to facilitate key rotation
+ * First key is the active one.
+ */
+ if (optlen != TCP_FASTOPEN_KEY_LENGTH &&
+ optlen != TCP_FASTOPEN_KEY_BUF_LENGTH)
return -EINVAL;
if (copy_from_user(key, optval, optlen))
return -EFAULT;
- return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key));
+ if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH)
+ backup_key = key + TCP_FASTOPEN_KEY_LENGTH;
+
+ return tcp_fastopen_reset_cipher(net, sk, key, backup_key,
+ TCP_FASTOPEN_KEY_LENGTH);
}
default:
/* fallthru */
@@ -3452,21 +3461,23 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
return 0;
case TCP_FASTOPEN_KEY: {
- __u8 key[TCP_FASTOPEN_KEY_LENGTH];
+ __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH];
struct tcp_fastopen_context *ctx;
+ unsigned int key_len = 0;
if (get_user(len, optlen))
return -EFAULT;
rcu_read_lock();
ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx);
- if (ctx)
- memcpy(key, ctx->key, sizeof(key));
- else
- len = 0;
+ if (ctx) {
+ key_len = tcp_fastopen_context_len(ctx) *
+ TCP_FASTOPEN_KEY_LENGTH;
+ memcpy(&key[0], &ctx->key[0], key_len);
+ }
rcu_read_unlock();
- len = min_t(unsigned int, len, sizeof(key));
+ len = min_t(unsigned int, len, key_len);
if (put_user(len, optlen))
return -EFAULT;
if (copy_to_user(optval, key, len))
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 018a48477355..8e1580485c9e 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -30,14 +30,20 @@ void tcp_fastopen_init_key_once(struct net *net)
* for a valid cookie, so this is an acceptable risk.
*/
get_random_bytes(key, sizeof(key));
- tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key));
+ tcp_fastopen_reset_cipher(net, NULL, key, NULL, sizeof(key));
}
static void tcp_fastopen_ctx_free(struct rcu_head *head)
{
struct tcp_fastopen_context *ctx =
container_of(head, struct tcp_fastopen_context, rcu);
- crypto_free_cipher(ctx->tfm);
+ int i;
+
+ /* We own ctx, thus no need to hold the Fastopen-lock */
+ for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++) {
+ if (ctx->tfm[i])
+ crypto_free_cipher(ctx->tfm[i]);
+ }
kfree(ctx);
}
@@ -66,33 +72,54 @@ void tcp_fastopen_ctx_destroy(struct net *net)
call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free);
}
+struct tcp_fastopen_context *tcp_fastopen_alloc_ctx(void *primary_key,
+ void *backup_key,
+ unsigned int len)
+{
+ struct tcp_fastopen_context *new_ctx;
+ void *key = primary_key;
+ int err, i;
+
+ new_ctx = kmalloc(sizeof(*new_ctx), GFP_KERNEL);
+ if (!new_ctx)
+ return ERR_PTR(-ENOMEM);
+ for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++)
+ new_ctx->tfm[i] = NULL;
+ for (i = 0; i < (backup_key ? 2 : 1); i++) {
+ new_ctx->tfm[i] = crypto_alloc_cipher("aes", 0, 0);
+ if (IS_ERR(new_ctx->tfm[i])) {
+ err = PTR_ERR(new_ctx->tfm[i]);
+ new_ctx->tfm[i] = NULL;
+ pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
+ goto out;
+ }
+ err = crypto_cipher_setkey(new_ctx->tfm[i], key, len);
+ if (err) {
+ pr_err("TCP: TFO cipher key error: %d\n", err);
+ goto out;
+ }
+ memcpy(&new_ctx->key[i * TCP_FASTOPEN_KEY_LENGTH], key, len);
+ key = backup_key;
+ }
+ return new_ctx;
+out:
+ tcp_fastopen_ctx_free(&new_ctx->rcu);
+ return ERR_PTR(err);
+}
+
int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk,
- void *key, unsigned int len)
+ void *primary_key, void *backup_key,
+ unsigned int len)
{
struct tcp_fastopen_context *ctx, *octx;
struct fastopen_queue *q;
- int err;
+ int err = 0;
- ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
- ctx->tfm = crypto_alloc_cipher("aes", 0, 0);
-
- if (IS_ERR(ctx->tfm)) {
- err = PTR_ERR(ctx->tfm);
-error: kfree(ctx);
- pr_err("TCP: TFO aes cipher alloc error: %d\n", err);
- return err;
+ ctx = tcp_fastopen_alloc_ctx(primary_key, backup_key, len);
+ if (IS_ERR(ctx)) {
+ err = PTR_ERR(ctx);
+ goto out;
}
- err = crypto_cipher_setkey(ctx->tfm, key, len);
- if (err) {
- pr_err("TCP: TFO cipher key error: %d\n", err);
- crypto_free_cipher(ctx->tfm);
- goto error;
- }
- memcpy(ctx->key, key, len);
-
-
spin_lock(&net->ipv4.tcp_fastopen_ctx_lock);
if (sk) {
q = &inet_csk(sk)->icsk_accept_queue.fastopenq;
@@ -108,28 +135,42 @@ error: kfree(ctx);
if (octx)
call_rcu(&octx->rcu, tcp_fastopen_ctx_free);
+out:
return err;
}
-static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
- struct tcp_fastopen_cookie *foc)
+static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req,
+ struct sk_buff *syn,
+ struct crypto_cipher *tfm,
+ struct tcp_fastopen_cookie *foc)
{
- struct tcp_fastopen_context *ctx;
- bool ok = false;
-
- rcu_read_lock();
+ if (req->rsk_ops->family == AF_INET) {
+ const struct iphdr *iph = ip_hdr(syn);
+ __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
- ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx);
- if (!ctx)
- ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx);
+ crypto_cipher_encrypt_one(tfm, foc->val, (void *)path);
+ foc->len = TCP_FASTOPEN_COOKIE_SIZE;
+ return true;
+ }
- if (ctx) {
- crypto_cipher_encrypt_one(ctx->tfm, foc->val, path);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (req->rsk_ops->family == AF_INET6) {
+ const struct ipv6hdr *ip6h = ipv6_hdr(syn);
+ struct tcp_fastopen_cookie tmp;
+ struct in6_addr *buf;
+ int i;
+
+ crypto_cipher_encrypt_one(tfm, tmp.val,
+ (void *)&ip6h->saddr);
+ buf = &tmp.addr;
+ for (i = 0; i < 4; i++)
+ buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
+ crypto_cipher_encrypt_one(tfm, foc->val, (void *)buf);
foc->len = TCP_FASTOPEN_COOKIE_SIZE;
- ok = true;
+ return true;
}
- rcu_read_unlock();
- return ok;
+#endif
+ return false;
}
/* Generate the fastopen cookie by doing aes128 encryption on both
@@ -138,37 +179,20 @@ static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path,
*
* XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE.
*/
-static bool tcp_fastopen_cookie_gen(struct sock *sk,
+static void tcp_fastopen_cookie_gen(struct sock *sk,
struct request_sock *req,
struct sk_buff *syn,
struct tcp_fastopen_cookie *foc)
{
- if (req->rsk_ops->family == AF_INET) {
- const struct iphdr *iph = ip_hdr(syn);
-
- __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 };
- return __tcp_fastopen_cookie_gen(sk, path, foc);
- }
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (req->rsk_ops->family == AF_INET6) {
- const struct ipv6hdr *ip6h = ipv6_hdr(syn);
- struct tcp_fastopen_cookie tmp;
-
- if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) {
- struct in6_addr *buf = &tmp.addr;
- int i;
+ struct tcp_fastopen_context *ctx;
- for (i = 0; i < 4; i++)
- buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i];
- return __tcp_fastopen_cookie_gen(sk, buf, foc);
- }
- }
-#endif
- return false;
+ rcu_read_lock();
+ ctx = tcp_fastopen_get_ctx(sk);
+ if (ctx)
+ __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[0], foc);
+ rcu_read_unlock();
}
-
/* If an incoming SYN or SYNACK frame contains a payload and/or FIN,
* queue this additional data / FIN.
*/
@@ -212,6 +236,35 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
tcp_fin(sk);
}
+/* returns 0 - no key match, 1 for primary, 2 for backup */
+static int tcp_fastopen_cookie_gen_check(struct sock *sk,
+ struct request_sock *req,
+ struct sk_buff *syn,
+ struct tcp_fastopen_cookie *orig,
+ struct tcp_fastopen_cookie *valid_foc)
+{
+ struct tcp_fastopen_cookie search_foc = { .len = -1 };
+ struct tcp_fastopen_cookie *foc = valid_foc;
+ struct tcp_fastopen_context *ctx;
+ int i, ret = 0;
+
+ rcu_read_lock();
+ ctx = tcp_fastopen_get_ctx(sk);
+ if (!ctx)
+ goto out;
+ for (i = 0; i < tcp_fastopen_context_len(ctx); i++) {
+ __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[i], foc);
+ if (tcp_fastopen_cookie_match(foc, orig)) {
+ ret = i + 1;
+ goto out;
+ }
+ foc = &search_foc;
+ }
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
static struct sock *tcp_fastopen_create_child(struct sock *sk,
struct sk_buff *skb,
struct request_sock *req)
@@ -331,6 +384,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
+ int ret = 0;
if (foc->len == 0) /* Client requests a cookie */
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
@@ -346,31 +400,44 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;
- if (foc->len >= 0 && /* Client presents or requests a cookie */
- tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) &&
- foc->len == TCP_FASTOPEN_COOKIE_SIZE &&
- foc->len == valid_foc.len &&
- !memcmp(foc->val, valid_foc.val, foc->len)) {
- /* Cookie is valid. Create a (full) child socket to accept
- * the data in SYN before returning a SYN-ACK to ack the
- * data. If we fail to create the socket, fall back and
- * ack the ISN only but includes the same cookie.
- *
- * Note: Data-less SYN with valid cookie is allowed to send
- * data in SYN_RECV state.
- */
+ if (foc->len == 0) {
+ /* Client requests a cookie. */
+ tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc);
+ } else if (foc->len > 0) {
+ ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc,
+ &valid_foc);
+ if (!ret) {
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
+ } else {
+ /* Cookie is valid. Create a (full) child socket to
+ * accept the data in SYN before returning a SYN-ACK to
+ * ack the data. If we fail to create the socket, fall
+ * back and ack the ISN only but includes the same
+ * cookie.
+ *
+ * Note: Data-less SYN with valid cookie is allowed to
+ * send data in SYN_RECV state.
+ */
fastopen:
- child = tcp_fastopen_create_child(sk, skb, req);
- if (child) {
- foc->len = -1;
+ child = tcp_fastopen_create_child(sk, skb, req);
+ if (child) {
+ if (ret == 2) {
+ valid_foc.exp = foc->exp;
+ *foc = valid_foc;
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENPASSIVEALTKEY);
+ } else {
+ foc->len = -1;
+ }
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPFASTOPENPASSIVE);
+ return child;
+ }
NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPFASTOPENPASSIVE);
- return child;
+ LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
}
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
- } else if (foc->len > 0) /* Client presents an invalid cookie */
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
-
+ }
valid_foc.exp = foc->exp;
*foc = valid_foc;
return NULL;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index b51630ddb728..6b673d4f5ca9 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2421,9 +2421,9 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
goto out;
for_each_fib6_node_rt_rcu(fn) {
- if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex)
+ if (rt->fib6_nh->fib_nh_dev->ifindex != dev->ifindex)
continue;
- if (no_gw && rt->fib6_nh.fib_nh_gw_family)
+ if (no_gw && rt->fib6_nh->fib_nh_gw_family)
continue;
if ((rt->fib6_flags & flags) != flags)
continue;
@@ -6354,16 +6354,16 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val)
list_for_each_entry(ifa, &idev->addr_list, if_list) {
spin_lock(&ifa->lock);
if (ifa->rt) {
- struct fib6_info *rt = ifa->rt;
+ struct fib6_nh *nh = ifa->rt->fib6_nh;
int cpu;
rcu_read_lock();
ifa->rt->dst_nopolicy = val ? true : false;
- if (rt->rt6i_pcpu) {
+ if (nh->rt6i_pcpu) {
for_each_possible_cpu(cpu) {
struct rt6_info **rtp;
- rtp = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+ rtp = per_cpu_ptr(nh->rt6i_pcpu, cpu);
addrconf_set_nopolicy(*rtp, val);
}
}
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 5b1246635e02..783f3c1466da 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -183,6 +183,11 @@ static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
return -EAFNOSUPPORT;
}
+static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt)
+{
+ return -EAFNOSUPPORT;
+}
+
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
.ipv6_route_input = eafnosupport_ipv6_route_input,
@@ -192,6 +197,7 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.fib6_select_path = eafnosupport_fib6_select_path,
.ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6,
.fib6_nh_init = eafnosupport_fib6_nh_init,
+ .ip6_del_rt = eafnosupport_ip6_del_rt,
};
EXPORT_SYMBOL_GPL(ipv6_stub);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c04ae282f4e4..cc6f8d0c625a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -926,6 +926,9 @@ static const struct ipv6_stub ipv6_stub_impl = {
.ip6_mtu_from_fib6 = ip6_mtu_from_fib6,
.fib6_nh_init = fib6_nh_init,
.fib6_nh_release = fib6_nh_release,
+ .fib6_update_sernum = fib6_update_sernum_stub,
+ .fib6_rt_update = fib6_rt_update,
+ .ip6_del_rt = ip6_del_rt,
.udpv6_encap_enable = udpv6_encap_enable,
.ndisc_send_na = ndisc_send_na,
.nd_tbl = &nd_tbl,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 008421b550c6..cdfb8500ccae 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -147,20 +147,18 @@ static __be32 addr_bit_set(const void *token, int fn_bit)
addr[fn_bit >> 5];
}
-struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
+struct fib6_info *fib6_info_alloc(gfp_t gfp_flags, bool with_fib6_nh)
{
struct fib6_info *f6i;
+ size_t sz = sizeof(*f6i);
- f6i = kzalloc(sizeof(*f6i), gfp_flags);
+ if (with_fib6_nh)
+ sz += sizeof(struct fib6_nh);
+
+ f6i = kzalloc(sz, gfp_flags);
if (!f6i)
return NULL;
- f6i->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
- if (!f6i->rt6i_pcpu) {
- kfree(f6i);
- return NULL;
- }
-
INIT_LIST_HEAD(&f6i->fib6_siblings);
refcount_set(&f6i->fib6_ref, 1);
@@ -170,36 +168,11 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
void fib6_info_destroy_rcu(struct rcu_head *head)
{
struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
- struct rt6_exception_bucket *bucket;
WARN_ON(f6i->fib6_node);
- bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
- kfree(bucket);
-
- if (f6i->rt6i_pcpu) {
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct rt6_info **ppcpu_rt;
- struct rt6_info *pcpu_rt;
-
- ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
- pcpu_rt = *ppcpu_rt;
- if (pcpu_rt) {
- dst_dev_put(&pcpu_rt->dst);
- dst_release(&pcpu_rt->dst);
- *ppcpu_rt = NULL;
- }
- }
-
- free_percpu(f6i->rt6i_pcpu);
- }
-
- fib6_nh_release(&f6i->fib6_nh);
-
+ fib6_nh_release(f6i->fib6_nh);
ip_fib_metrics_put(f6i->fib6_metrics);
-
kfree(f6i);
}
EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
@@ -393,10 +366,10 @@ static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net,
return call_fib6_notifier(nb, net, event_type, &info.info);
}
-static int call_fib6_entry_notifiers(struct net *net,
- enum fib_event_type event_type,
- struct fib6_info *rt,
- struct netlink_ext_ack *extack)
+int call_fib6_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct fib6_info *rt,
+ struct netlink_ext_ack *extack)
{
struct fib6_entry_notifier_info info = {
.info.extack = extack,
@@ -899,16 +872,14 @@ insert_above:
return ln;
}
-static void fib6_drop_pcpu_from(struct fib6_info *f6i,
- const struct fib6_table *table)
+static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh,
+ const struct fib6_info *match,
+ const struct fib6_table *table)
{
int cpu;
- /* Make sure rt6_make_pcpu_route() wont add other percpu routes
- * while we are cleaning them here.
- */
- f6i->fib6_destroying = 1;
- mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
+ if (!fib6_nh->rt6i_pcpu)
+ return;
/* release the reference to this fib entry from
* all of its cached pcpu routes
@@ -917,9 +888,15 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
struct rt6_info **ppcpu_rt;
struct rt6_info *pcpu_rt;
- ppcpu_rt = per_cpu_ptr(f6i->rt6i_pcpu, cpu);
+ ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
pcpu_rt = *ppcpu_rt;
- if (pcpu_rt) {
+
+ /* only dropping the 'from' reference if the cached route
+ * is using 'match'. The cached pcpu_rt->from only changes
+ * from a fib6_info to NULL (ip6_dst_destroy); it can never
+ * change from one fib6_info reference to another
+ */
+ if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) {
struct fib6_info *from;
from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
@@ -928,13 +905,27 @@ static void fib6_drop_pcpu_from(struct fib6_info *f6i,
}
}
+static void fib6_drop_pcpu_from(struct fib6_info *f6i,
+ const struct fib6_table *table)
+{
+ struct fib6_nh *fib6_nh;
+
+ /* Make sure rt6_make_pcpu_route() wont add other percpu routes
+ * while we are cleaning them here.
+ */
+ f6i->fib6_destroying = 1;
+ mb(); /* paired with the cmpxchg() in rt6_make_pcpu_route() */
+
+ fib6_nh = f6i->fib6_nh;
+ __fib6_drop_pcpu_from(fib6_nh, f6i, table);
+}
+
static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
struct net *net)
{
struct fib6_table *table = rt->fib6_table;
- if (rt->rt6i_pcpu)
- fib6_drop_pcpu_from(rt, table);
+ fib6_drop_pcpu_from(rt, table);
if (refcount_read(&rt->fib6_ref) != 1) {
/* This route is used as dummy address holder in some split
@@ -1222,6 +1213,14 @@ void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt)
__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
}
+/* allow ipv4 to update sernum via ipv6_stub */
+void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i)
+{
+ spin_lock_bh(&f6i->fib6_table->tb6_lock);
+ fib6_update_sernum_upto_root(net, f6i);
+ spin_unlock_bh(&f6i->fib6_table->tb6_lock);
+}
+
/*
* Add routing information to the routing tree.
* <destination addr>/<source addr>
@@ -2306,14 +2305,14 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v)
#else
seq_puts(seq, "00000000000000000000000000000000 00 ");
#endif
- if (rt->fib6_nh.fib_nh_gw_family) {
+ if (rt->fib6_nh->fib_nh_gw_family) {
flags |= RTF_GATEWAY;
- seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6);
+ seq_printf(seq, "%pi6", &rt->fib6_nh->fib_nh_gw6);
} else {
seq_puts(seq, "00000000000000000000000000000000");
}
- dev = rt->fib6_nh.fib_nh_dev;
+ dev = rt->fib6_nh->fib_nh_dev;
seq_printf(seq, " %08x %08x %08x %08x %8s\n",
rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
flags, dev ? dev->name : "");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f9e43323e667..0bb6b6de7962 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -592,6 +592,170 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
+int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
+ u8 nexthdr, __be32 frag_id,
+ struct ip6_fraglist_iter *iter)
+{
+ unsigned int first_len;
+ struct frag_hdr *fh;
+
+ /* BUILD HEADER */
+ *prevhdr = NEXTHDR_FRAGMENT;
+ iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
+ if (!iter->tmp_hdr)
+ return -ENOMEM;
+
+ iter->frag_list = skb_shinfo(skb)->frag_list;
+ iter->frag = iter->frag_list;
+ skb_frag_list_init(skb);
+
+ iter->offset = 0;
+ iter->hlen = hlen;
+ iter->frag_id = frag_id;
+ iter->nexthdr = nexthdr;
+
+ __skb_pull(skb, hlen);
+ fh = __skb_push(skb, sizeof(struct frag_hdr));
+ __skb_push(skb, hlen);
+ skb_reset_network_header(skb);
+ memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
+
+ fh->nexthdr = nexthdr;
+ fh->reserved = 0;
+ fh->frag_off = htons(IP6_MF);
+ fh->identification = frag_id;
+
+ first_len = skb_pagelen(skb);
+ skb->data_len = first_len - skb_headlen(skb);
+ skb->len = first_len;
+ ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
+
+ return 0;
+}
+EXPORT_SYMBOL(ip6_fraglist_init);
+
+void ip6_fraglist_prepare(struct sk_buff *skb,
+ struct ip6_fraglist_iter *iter)
+{
+ struct sk_buff *frag = iter->frag;
+ unsigned int hlen = iter->hlen;
+ struct frag_hdr *fh;
+
+ frag->ip_summed = CHECKSUM_NONE;
+ skb_reset_transport_header(frag);
+ fh = __skb_push(frag, sizeof(struct frag_hdr));
+ __skb_push(frag, hlen);
+ skb_reset_network_header(frag);
+ memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
+ iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
+ fh->nexthdr = iter->nexthdr;
+ fh->reserved = 0;
+ fh->frag_off = htons(iter->offset);
+ if (frag->next)
+ fh->frag_off |= htons(IP6_MF);
+ fh->identification = iter->frag_id;
+ ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+ ip6_copy_metadata(frag, skb);
+}
+EXPORT_SYMBOL(ip6_fraglist_prepare);
+
+void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
+ unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
+ u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
+{
+ state->prevhdr = prevhdr;
+ state->nexthdr = nexthdr;
+ state->frag_id = frag_id;
+
+ state->hlen = hlen;
+ state->mtu = mtu;
+
+ state->left = skb->len - hlen; /* Space per frame */
+ state->ptr = hlen; /* Where to start from */
+
+ state->hroom = hdr_room;
+ state->troom = needed_tailroom;
+
+ state->offset = 0;
+}
+EXPORT_SYMBOL(ip6_frag_init);
+
+struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
+{
+ u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
+ struct sk_buff *frag;
+ struct frag_hdr *fh;
+ unsigned int len;
+
+ len = state->left;
+ /* IF: it doesn't fit, use 'mtu' - the data space left */
+ if (len > state->mtu)
+ len = state->mtu;
+ /* IF: we are not sending up to and including the packet end
+ then align the next start on an eight byte boundary */
+ if (len < state->left)
+ len &= ~7;
+
+ /* Allocate buffer */
+ frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
+ state->hroom + state->troom, GFP_ATOMIC);
+ if (!frag)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * Set up data on packet
+ */
+
+ ip6_copy_metadata(frag, skb);
+ skb_reserve(frag, state->hroom);
+ skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
+ skb_reset_network_header(frag);
+ fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
+ frag->transport_header = (frag->network_header + state->hlen +
+ sizeof(struct frag_hdr));
+
+ /*
+ * Charge the memory for the fragment to any owner
+ * it might possess
+ */
+ if (skb->sk)
+ skb_set_owner_w(frag, skb->sk);
+
+ /*
+ * Copy the packet header into the new buffer.
+ */
+ skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
+
+ fragnexthdr_offset = skb_network_header(frag);
+ fragnexthdr_offset += prevhdr - skb_network_header(skb);
+ *fragnexthdr_offset = NEXTHDR_FRAGMENT;
+
+ /*
+ * Build fragment header.
+ */
+ fh->nexthdr = state->nexthdr;
+ fh->reserved = 0;
+ fh->identification = state->frag_id;
+
+ /*
+ * Copy a block of the IP datagram.
+ */
+ BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
+ len));
+ state->left -= len;
+
+ fh->frag_off = htons(state->offset);
+ if (state->left > 0)
+ fh->frag_off |= htons(IP6_MF);
+ ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
+
+ state->ptr += len;
+ state->offset += len;
+
+ return frag;
+}
+EXPORT_SYMBOL(ip6_frag_next);
+
int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
int (*output)(struct net *, struct sock *, struct sk_buff *))
{
@@ -599,12 +763,10 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
inet6_sk(skb->sk) : NULL;
- struct ipv6hdr *tmp_hdr;
- struct frag_hdr *fh;
- unsigned int mtu, hlen, left, len, nexthdr_offset;
- int hroom, troom;
+ struct ip6_frag_state state;
+ unsigned int mtu, hlen, nexthdr_offset;
+ int hroom, err = 0;
__be32 frag_id;
- int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0;
err = ip6_find_1stfragopt(skb, &prevhdr);
@@ -651,6 +813,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
hroom = LL_RESERVED_SPACE(rt->dst.dev);
if (skb_has_frag_list(skb)) {
unsigned int first_len = skb_pagelen(skb);
+ struct ip6_fraglist_iter iter;
struct sk_buff *frag2;
if (first_len - hlen > mtu ||
@@ -678,74 +841,29 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
skb->truesize -= frag->truesize;
}
- err = 0;
- offset = 0;
- /* BUILD HEADER */
-
- *prevhdr = NEXTHDR_FRAGMENT;
- tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
- if (!tmp_hdr) {
- err = -ENOMEM;
+ err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
+ &iter);
+ if (err < 0)
goto fail;
- }
- frag = skb_shinfo(skb)->frag_list;
- skb_frag_list_init(skb);
-
- __skb_pull(skb, hlen);
- fh = __skb_push(skb, sizeof(struct frag_hdr));
- __skb_push(skb, hlen);
- skb_reset_network_header(skb);
- memcpy(skb_network_header(skb), tmp_hdr, hlen);
-
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->frag_off = htons(IP6_MF);
- fh->identification = frag_id;
-
- first_len = skb_pagelen(skb);
- skb->data_len = first_len - skb_headlen(skb);
- skb->len = first_len;
- ipv6_hdr(skb)->payload_len = htons(first_len -
- sizeof(struct ipv6hdr));
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
- if (frag) {
- frag->ip_summed = CHECKSUM_NONE;
- skb_reset_transport_header(frag);
- fh = __skb_push(frag, sizeof(struct frag_hdr));
- __skb_push(frag, hlen);
- skb_reset_network_header(frag);
- memcpy(skb_network_header(frag), tmp_hdr,
- hlen);
- offset += skb->len - hlen - sizeof(struct frag_hdr);
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->frag_off = htons(offset);
- if (frag->next)
- fh->frag_off |= htons(IP6_MF);
- fh->identification = frag_id;
- ipv6_hdr(frag)->payload_len =
- htons(frag->len -
- sizeof(struct ipv6hdr));
- ip6_copy_metadata(frag, skb);
- }
+ if (iter.frag)
+ ip6_fraglist_prepare(skb, &iter);
err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES);
- if (err || !frag)
+ if (err || !iter.frag)
break;
- skb = frag;
- frag = skb->next;
- skb_mark_not_on_list(skb);
+ skb = ip6_fraglist_next(&iter);
}
- kfree(tmp_hdr);
+ kfree(iter.tmp_hdr);
if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
@@ -753,7 +871,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
return 0;
}
- kfree_skb_list(frag);
+ kfree_skb_list(iter.frag_list);
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
@@ -770,91 +888,26 @@ slow_path_clean:
}
slow_path:
- left = skb->len - hlen; /* Space per frame */
- ptr = hlen; /* Where to start from */
-
/*
* Fragment the datagram.
*/
- troom = rt->dst.dev->needed_tailroom;
+ ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
+ LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
+ &state);
/*
* Keep copying data until we run out.
*/
- while (left > 0) {
- u8 *fragnexthdr_offset;
-
- len = left;
- /* IF: it doesn't fit, use 'mtu' - the data space left */
- if (len > mtu)
- len = mtu;
- /* IF: we are not sending up to and including the packet end
- then align the next start on an eight byte boundary */
- if (len < left) {
- len &= ~7;
- }
- /* Allocate buffer */
- frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
- hroom + troom, GFP_ATOMIC);
- if (!frag) {
- err = -ENOMEM;
+ while (state.left > 0) {
+ frag = ip6_frag_next(skb, &state);
+ if (IS_ERR(frag)) {
+ err = PTR_ERR(frag);
goto fail;
}
/*
- * Set up data on packet
- */
-
- ip6_copy_metadata(frag, skb);
- skb_reserve(frag, hroom);
- skb_put(frag, len + hlen + sizeof(struct frag_hdr));
- skb_reset_network_header(frag);
- fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
- frag->transport_header = (frag->network_header + hlen +
- sizeof(struct frag_hdr));
-
- /*
- * Charge the memory for the fragment to any owner
- * it might possess
- */
- if (skb->sk)
- skb_set_owner_w(frag, skb->sk);
-
- /*
- * Copy the packet header into the new buffer.
- */
- skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
-
- fragnexthdr_offset = skb_network_header(frag);
- fragnexthdr_offset += prevhdr - skb_network_header(skb);
- *fragnexthdr_offset = NEXTHDR_FRAGMENT;
-
- /*
- * Build fragment header.
- */
- fh->nexthdr = nexthdr;
- fh->reserved = 0;
- fh->identification = frag_id;
-
- /*
- * Copy a block of the IP datagram.
- */
- BUG_ON(skb_copy_bits(skb, ptr, skb_transport_header(frag),
- len));
- left -= len;
-
- fh->frag_off = htons(offset);
- if (left > 0)
- fh->frag_off |= htons(IP6_MF);
- ipv6_hdr(frag)->payload_len = htons(frag->len -
- sizeof(struct ipv6hdr));
-
- ptr += len;
- offset += len;
-
- /*
* Put this fragment into the sending queue.
*/
err = output(net, sk, frag);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 4c8e2ea8bf19..f874dde1ee85 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1293,8 +1293,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev);
if (rt) {
- neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6,
- rt->fib6_nh.fib_nh_dev, NULL,
+ neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
+ rt->fib6_nh->fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
@@ -1323,8 +1323,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
return;
}
- neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6,
- rt->fib6_nh.fib_nh_dev, NULL,
+ neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
+ rt->fib6_nh->fib_nh_dev, NULL,
&ipv6_hdr(skb)->saddr);
if (!neigh) {
ND_PRINTK(0, err,
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 1240ccd57f39..9530cc280953 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -16,6 +16,9 @@
#include <net/ip6_route.h>
#include <net/xfrm.h>
#include <net/netfilter/nf_queue.h>
+#include <net/netfilter/nf_conntrack_bridge.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include "../bridge/br_private.h"
int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
{
@@ -109,6 +112,122 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst,
}
EXPORT_SYMBOL_GPL(__nf_ip6_route);
+int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ struct nf_ct_bridge_frag_data *data,
+ int (*output)(struct net *, struct sock *sk,
+ const struct nf_ct_bridge_frag_data *data,
+ struct sk_buff *))
+{
+ int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
+ struct ip6_frag_state state;
+ u8 *prevhdr, nexthdr = 0;
+ unsigned int mtu, hlen;
+ int hroom, err = 0;
+ __be32 frag_id;
+
+ err = ip6_find_1stfragopt(skb, &prevhdr);
+ if (err < 0)
+ goto blackhole;
+ hlen = err;
+ nexthdr = *prevhdr;
+
+ mtu = skb->dev->mtu;
+ if (frag_max_size > mtu ||
+ frag_max_size < IPV6_MIN_MTU)
+ goto blackhole;
+
+ mtu = frag_max_size;
+ if (mtu < hlen + sizeof(struct frag_hdr) + 8)
+ goto blackhole;
+ mtu -= hlen + sizeof(struct frag_hdr);
+
+ frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
+ &ipv6_hdr(skb)->saddr);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ (err = skb_checksum_help(skb)))
+ goto blackhole;
+
+ hroom = LL_RESERVED_SPACE(skb->dev);
+ if (skb_has_frag_list(skb)) {
+ unsigned int first_len = skb_pagelen(skb);
+ struct ip6_fraglist_iter iter;
+ struct sk_buff *frag2;
+
+ if (first_len - hlen > mtu ||
+ skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
+ goto blackhole;
+
+ if (skb_cloned(skb))
+ goto slow_path;
+
+ skb_walk_frags(skb, frag2) {
+ if (frag2->len > mtu ||
+ skb_headroom(frag2) < (hlen + hroom + sizeof(struct frag_hdr)))
+ goto blackhole;
+
+ /* Partially cloned skb? */
+ if (skb_shared(frag2))
+ goto slow_path;
+ }
+
+ err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
+ &iter);
+ if (err < 0)
+ goto blackhole;
+
+ for (;;) {
+ /* Prepare header of the next frame,
+ * before previous one went down.
+ */
+ if (iter.frag)
+ ip6_fraglist_prepare(skb, &iter);
+
+ err = output(net, sk, data, skb);
+ if (err || !iter.frag)
+ break;
+
+ skb = ip6_fraglist_next(&iter);
+ }
+
+ kfree(iter.tmp_hdr);
+ if (!err)
+ return 0;
+
+ kfree_skb_list(iter.frag_list);
+ return err;
+ }
+slow_path:
+ /* This is a linearized skbuff, the original geometry is lost for us.
+ * This may also be a clone skbuff, we could preserve the geometry for
+ * the copies but probably not worth the effort.
+ */
+ ip6_frag_init(skb, hlen, mtu, skb->dev->needed_tailroom,
+ LL_RESERVED_SPACE(skb->dev), prevhdr, nexthdr, frag_id,
+ &state);
+
+ while (state.left > 0) {
+ struct sk_buff *skb2;
+
+ skb2 = ip6_frag_next(skb, &state);
+ if (IS_ERR(skb2)) {
+ err = PTR_ERR(skb2);
+ goto blackhole;
+ }
+
+ err = output(net, sk, data, skb2);
+ if (err)
+ goto blackhole;
+ }
+ consume_skb(skb);
+ return err;
+
+blackhole:
+ kfree_skb(skb);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(br_ip6_fragment);
+
static const struct nf_ipv6_ops ipv6ops = {
#if IS_MODULE(CONFIG_IPV6)
.chk_addr = ipv6_chk_addr,
@@ -119,6 +238,10 @@ static const struct nf_ipv6_ops ipv6ops = {
.route_input = ip6_route_input,
.fragment = ip6_fragment,
.reroute = nf_ip6_reroute,
+#if IS_MODULE(CONFIG_IPV6)
+ .br_defrag = nf_ct_frag6_gather,
+ .br_fragment = br_ip6_fragment,
+#endif
};
int __init ipv6_netfilter_init(void)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 3de0e9b0a482..c5d59fa568d6 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -58,26 +58,21 @@ static struct inet_frags nf_frags;
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_timeout",
- .data = &init_net.nf_frag.frags.timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "nf_conntrack_frag6_low_thresh",
- .data = &init_net.nf_frag.frags.low_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
- .data = &init_net.nf_frag.frags.high_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
};
@@ -93,15 +88,15 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
GFP_KERNEL);
if (table == NULL)
goto err_alloc;
-
- table[0].data = &net->nf_frag.frags.timeout;
- table[1].data = &net->nf_frag.frags.low_thresh;
- table[1].extra2 = &net->nf_frag.frags.high_thresh;
- table[2].data = &net->nf_frag.frags.high_thresh;
- table[2].extra1 = &net->nf_frag.frags.low_thresh;
- table[2].extra2 = &init_net.nf_frag.frags.high_thresh;
}
+ table[0].data = &net->nf_frag.fqdir->timeout;
+ table[1].data = &net->nf_frag.fqdir->low_thresh;
+ table[1].extra2 = &net->nf_frag.fqdir->high_thresh;
+ table[2].data = &net->nf_frag.fqdir->high_thresh;
+ table[2].extra1 = &net->nf_frag.fqdir->low_thresh;
+ table[2].extra2 = &init_net.nf_frag.fqdir->high_thresh;
+
hdr = register_net_sysctl(net, "net/netfilter", table);
if (hdr == NULL)
goto err_reg;
@@ -148,12 +143,10 @@ static void nf_ct_frag6_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
- struct net *net;
fq = container_of(frag, struct frag_queue, q);
- net = container_of(fq->q.net, struct net, nf_frag.frags);
- ip6frag_expire_frag_queue(net, fq);
+ ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
}
/* Creation primitives. */
@@ -169,7 +162,7 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
};
struct inet_frag_queue *q;
- q = inet_frag_find(&net->nf_frag.frags, &key);
+ q = inet_frag_find(net->nf_frag.fqdir, &key);
if (!q)
return NULL;
@@ -276,7 +269,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
- add_frag_mem_limit(fq->q.net, skb->truesize);
+ add_frag_mem_limit(fq->q.fqdir, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -496,24 +489,24 @@ static int nf_ct_net_init(struct net *net)
{
int res;
- net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
- net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
- net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- net->nf_frag.frags.f = &nf_frags;
-
- res = inet_frags_init_net(&net->nf_frag.frags);
+ res = fqdir_init(&net->nf_frag.fqdir, &nf_frags, net);
if (res < 0)
return res;
+
+ net->nf_frag.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->nf_frag.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->nf_frag.fqdir->timeout = IPV6_FRAG_TIMEOUT;
+
res = nf_ct_frag6_sysctl_register(net);
if (res < 0)
- inet_frags_exit_net(&net->nf_frag.frags);
+ fqdir_exit(net->nf_frag.fqdir);
return res;
}
static void nf_ct_net_exit(struct net *net)
{
nf_ct_frags6_sysctl_unregister(net);
- inet_frags_exit_net(&net->nf_frag.frags);
+ fqdir_exit(net->nf_frag.fqdir);
}
static struct pernet_operations nf_ct_net_ops = {
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 2356b4af7309..0bbefc440bcd 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -48,8 +48,8 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
- atomic_read(&net->ipv6.frags.rhashtable.nelems),
- frag_mem_limit(&net->ipv6.frags));
+ atomic_read(&net->ipv6.fqdir->rhashtable.nelems),
+ frag_mem_limit(net->ipv6.fqdir));
return 0;
}
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 1a832f5e190b..ff5b6d8de2c6 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -76,12 +76,10 @@ static void ip6_frag_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
- struct net *net;
fq = container_of(frag, struct frag_queue, q);
- net = container_of(fq->q.net, struct net, ipv6.frags);
- ip6frag_expire_frag_queue(net, fq);
+ ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
}
static struct frag_queue *
@@ -100,7 +98,7 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
IPV6_ADDR_LINKLOCAL)))
key.iif = 0;
- q = inet_frag_find(&net->ipv6.frags, &key);
+ q = inet_frag_find(net->ipv6.fqdir, &key);
if (!q)
return NULL;
@@ -200,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
- add_frag_mem_limit(fq->q.net, skb->truesize);
+ add_frag_mem_limit(fq->q.fqdir, skb->truesize);
fragsize = -skb_network_offset(skb) + skb->len;
if (fragsize > fq->q.max_size)
@@ -254,7 +252,7 @@ err:
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
struct sk_buff *prev_tail, struct net_device *dev)
{
- struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
+ struct net *net = fq->q.fqdir->net;
unsigned int nhoff;
void *reasm_data;
int payload_len;
@@ -401,23 +399,18 @@ static const struct inet6_protocol frag_protocol = {
static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
.procname = "ip6frag_high_thresh",
- .data = &init_net.ipv6.frags.high_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra1 = &init_net.ipv6.frags.low_thresh
},
{
.procname = "ip6frag_low_thresh",
- .data = &init_net.ipv6.frags.low_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
- .extra2 = &init_net.ipv6.frags.high_thresh
},
{
.procname = "ip6frag_time",
- .data = &init_net.ipv6.frags.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -449,12 +442,12 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
if (!table)
goto err_alloc;
- table[0].data = &net->ipv6.frags.high_thresh;
- table[0].extra1 = &net->ipv6.frags.low_thresh;
- table[1].data = &net->ipv6.frags.low_thresh;
- table[1].extra2 = &net->ipv6.frags.high_thresh;
- table[2].data = &net->ipv6.frags.timeout;
}
+ table[0].data = &net->ipv6.fqdir->high_thresh;
+ table[0].extra1 = &net->ipv6.fqdir->low_thresh;
+ table[1].data = &net->ipv6.fqdir->low_thresh;
+ table[1].extra2 = &net->ipv6.fqdir->high_thresh;
+ table[2].data = &net->ipv6.fqdir->timeout;
hdr = register_net_sysctl(net, "net/ipv6", table);
if (!hdr)
@@ -517,25 +510,24 @@ static int __net_init ipv6_frags_init_net(struct net *net)
{
int res;
- net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
- net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
- net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
- net->ipv6.frags.f = &ip6_frags;
-
- res = inet_frags_init_net(&net->ipv6.frags);
+ res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net);
if (res < 0)
return res;
+ net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
+ net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
+ net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT;
+
res = ip6_frags_ns_sysctl_register(net);
if (res < 0)
- inet_frags_exit_net(&net->ipv6.frags);
+ fqdir_exit(net->ipv6.fqdir);
return res;
}
static void __net_exit ipv6_frags_exit_net(struct net *net)
{
ip6_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&net->ipv6.frags);
+ fqdir_exit(net->ipv6.fqdir);
}
static struct pernet_operations ip6_frags_ops = {
@@ -591,8 +583,8 @@ err_protocol:
void ipv6_frag_exit(void)
{
- inet_frags_fini(&ip6_frags);
ip6_frags_sysctl_unregister();
unregister_pernet_subsys(&ip6_frags_ops);
inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ inet_frags_fini(&ip6_frags);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 848e944f07df..fada5a13bcb2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -441,12 +441,12 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
if (!fl6->mp_hash)
fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
- if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
+ if (fl6->mp_hash <= atomic_read(&match->fib6_nh->fib_nh_upper_bound))
goto out;
list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
fib6_siblings) {
- const struct fib6_nh *nh = &sibling->fib6_nh;
+ const struct fib6_nh *nh = sibling->fib6_nh;
int nh_upper_bound;
nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
@@ -460,7 +460,7 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
out:
res->f6i = match;
- res->nh = &match->fib6_nh;
+ res->nh = match->fib6_nh;
}
/*
@@ -496,13 +496,13 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
struct fib6_nh *nh;
if (!oif && ipv6_addr_any(saddr)) {
- nh = &f6i->fib6_nh;
+ nh = f6i->fib6_nh;
if (!(nh->fib_nh_flags & RTNH_F_DEAD))
goto out;
}
for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
- nh = &spf6i->fib6_nh;
+ nh = spf6i->fib6_nh;
if (__rt6_device_match(net, nh, saddr, oif, flags)) {
res->f6i = spf6i;
goto out;
@@ -511,14 +511,14 @@ static void rt6_device_match(struct net *net, struct fib6_result *res,
if (oif && flags & RT6_LOOKUP_F_IFACE) {
res->f6i = net->ipv6.fib6_null_entry;
- nh = &res->f6i->fib6_nh;
+ nh = res->f6i->fib6_nh;
goto out;
}
- nh = &f6i->fib6_nh;
+ nh = f6i->fib6_nh;
if (nh->fib_nh_flags & RTNH_F_DEAD) {
res->f6i = net->ipv6.fib6_null_entry;
- nh = &res->f6i->fib6_nh;
+ nh = res->f6i->fib6_nh;
}
out:
res->nh = nh;
@@ -714,7 +714,7 @@ static void __find_rr_leaf(struct fib6_info *f6i_start,
if (fib6_check_expired(f6i))
continue;
- nh = &f6i->fib6_nh;
+ nh = f6i->fib6_nh;
if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
res->f6i = f6i;
res->nh = nh;
@@ -796,7 +796,7 @@ static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
out:
if (!res->f6i) {
res->f6i = net->ipv6.fib6_null_entry;
- res->nh = &res->f6i->fib6_nh;
+ res->nh = res->f6i->fib6_nh;
res->fib6_flags = res->f6i->fib6_flags;
res->fib6_type = res->f6i->fib6_type;
}
@@ -1270,7 +1270,7 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
{
struct rt6_info *pcpu_rt, **p;
- p = this_cpu_ptr(res->f6i->rt6i_pcpu);
+ p = this_cpu_ptr(res->nh->rt6i_pcpu);
pcpu_rt = *p;
if (pcpu_rt)
@@ -1291,7 +1291,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net,
}
dst_hold(&pcpu_rt->dst);
- p = this_cpu_ptr(res->f6i->rt6i_pcpu);
+ p = this_cpu_ptr(res->nh->rt6i_pcpu);
prev = cmpxchg(p, NULL, pcpu_rt);
BUG_ON(prev);
@@ -1461,25 +1461,74 @@ static unsigned int fib6_mtu(const struct fib6_result *res)
return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
}
+#define FIB6_EXCEPTION_BUCKET_FLUSHED 0x1UL
+
+/* used when the flushed bit is not relevant, only access to the bucket
+ * (ie., all bucket users except rt6_insert_exception);
+ *
+ * called under rcu lock; sometimes called with rt6_exception_lock held
+ */
+static
+struct rt6_exception_bucket *fib6_nh_get_excptn_bucket(const struct fib6_nh *nh,
+ spinlock_t *lock)
+{
+ struct rt6_exception_bucket *bucket;
+
+ if (lock)
+ bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+ lockdep_is_held(lock));
+ else
+ bucket = rcu_dereference(nh->rt6i_exception_bucket);
+
+ /* remove bucket flushed bit if set */
+ if (bucket) {
+ unsigned long p = (unsigned long)bucket;
+
+ p &= ~FIB6_EXCEPTION_BUCKET_FLUSHED;
+ bucket = (struct rt6_exception_bucket *)p;
+ }
+
+ return bucket;
+}
+
+static bool fib6_nh_excptn_bucket_flushed(struct rt6_exception_bucket *bucket)
+{
+ unsigned long p = (unsigned long)bucket;
+
+ return !!(p & FIB6_EXCEPTION_BUCKET_FLUSHED);
+}
+
+/* called with rt6_exception_lock held */
+static void fib6_nh_excptn_bucket_set_flushed(struct fib6_nh *nh,
+ spinlock_t *lock)
+{
+ struct rt6_exception_bucket *bucket;
+ unsigned long p;
+
+ bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+ lockdep_is_held(lock));
+
+ p = (unsigned long)bucket;
+ p |= FIB6_EXCEPTION_BUCKET_FLUSHED;
+ bucket = (struct rt6_exception_bucket *)p;
+ rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
+}
+
static int rt6_insert_exception(struct rt6_info *nrt,
const struct fib6_result *res)
{
struct net *net = dev_net(nrt->dst.dev);
struct rt6_exception_bucket *bucket;
+ struct fib6_info *f6i = res->f6i;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
- struct fib6_info *f6i = res->f6i;
+ struct fib6_nh *nh = res->nh;
int err = 0;
spin_lock_bh(&rt6_exception_lock);
- if (f6i->exception_bucket_flushed) {
- err = -EINVAL;
- goto out;
- }
-
- bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
+ bucket = rcu_dereference_protected(nh->rt6i_exception_bucket,
+ lockdep_is_held(&rt6_exception_lock));
if (!bucket) {
bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
GFP_ATOMIC);
@@ -1487,7 +1536,10 @@ static int rt6_insert_exception(struct rt6_info *nrt,
err = -ENOMEM;
goto out;
}
- rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
+ rcu_assign_pointer(nh->rt6i_exception_bucket, bucket);
+ } else if (fib6_nh_excptn_bucket_flushed(bucket)) {
+ err = -EINVAL;
+ goto out;
}
#ifdef CONFIG_IPV6_SUBTREES
@@ -1542,7 +1594,7 @@ out:
return err;
}
-void rt6_flush_exceptions(struct fib6_info *rt)
+static void fib6_nh_flush_exceptions(struct fib6_nh *nh, struct fib6_info *from)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
@@ -1550,25 +1602,33 @@ void rt6_flush_exceptions(struct fib6_info *rt)
int i;
spin_lock_bh(&rt6_exception_lock);
- /* Prevent rt6_insert_exception() to recreate the bucket list */
- rt->exception_bucket_flushed = 1;
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
if (!bucket)
goto out;
+ /* Prevent rt6_insert_exception() to recreate the bucket list */
+ if (!from)
+ fib6_nh_excptn_bucket_set_flushed(nh, &rt6_exception_lock);
+
for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
- hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist)
- rt6_remove_exception(bucket, rt6_ex);
- WARN_ON_ONCE(bucket->depth);
+ hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) {
+ if (!from ||
+ rcu_access_pointer(rt6_ex->rt6i->from) == from)
+ rt6_remove_exception(bucket, rt6_ex);
+ }
+ WARN_ON_ONCE(!from && bucket->depth);
bucket++;
}
-
out:
spin_unlock_bh(&rt6_exception_lock);
}
+void rt6_flush_exceptions(struct fib6_info *f6i)
+{
+ fib6_nh_flush_exceptions(f6i->fib6_nh, f6i);
+}
+
/* Find cached rt in the hash table inside passed in rt
* Caller has to hold rcu_read_lock()
*/
@@ -1597,7 +1657,7 @@ static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
src_key = saddr;
find_ex:
#endif
- bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
+ bucket = fib6_nh_get_excptn_bucket(res->nh, NULL);
rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
@@ -1615,25 +1675,20 @@ find_ex:
}
/* Remove the passed in cached rt from the hash table that contains it */
-static int rt6_remove_exception_rt(struct rt6_info *rt)
+static int fib6_nh_remove_exception(const struct fib6_nh *nh, int plen,
+ const struct rt6_info *rt)
{
+ const struct in6_addr *src_key = NULL;
struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
- struct fib6_info *from;
int err;
- from = rcu_dereference(rt->from);
- if (!from ||
- !(rt->rt6i_flags & RTF_CACHE))
- return -EINVAL;
-
- if (!rcu_access_pointer(from->rt6i_exception_bucket))
+ if (!rcu_access_pointer(nh->rt6i_exception_bucket))
return -ENOENT;
spin_lock_bh(&rt6_exception_lock);
- bucket = rcu_dereference_protected(from->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
+
#ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates 'from' is in subtree
* and exception table is indexed by a hash of
@@ -1641,7 +1696,7 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->fib6_src.plen)
+ if (plen)
src_key = &rt->rt6i_src.addr;
#endif
rt6_ex = __rt6_find_exception_spinlock(&bucket,
@@ -1658,23 +1713,29 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
return err;
}
-/* Find rt6_ex which contains the passed in rt cache and
- * refresh its stamp
- */
-static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+static int rt6_remove_exception_rt(struct rt6_info *rt)
{
- struct rt6_exception_bucket *bucket;
- struct in6_addr *src_key = NULL;
- struct rt6_exception *rt6_ex;
struct fib6_info *from;
- rcu_read_lock();
from = rcu_dereference(rt->from);
if (!from || !(rt->rt6i_flags & RTF_CACHE))
- goto unlock;
+ return -EINVAL;
- bucket = rcu_dereference(from->rt6i_exception_bucket);
+ return fib6_nh_remove_exception(from->fib6_nh,
+ from->fib6_src.plen, rt);
+}
+/* Find rt6_ex which contains the passed in rt cache and
+ * refresh its stamp
+ */
+static void fib6_nh_update_exception(const struct fib6_nh *nh, int plen,
+ const struct rt6_info *rt)
+{
+ const struct in6_addr *src_key = NULL;
+ struct rt6_exception_bucket *bucket;
+ struct rt6_exception *rt6_ex;
+
+ bucket = fib6_nh_get_excptn_bucket(nh, NULL);
#ifdef CONFIG_IPV6_SUBTREES
/* rt6i_src.plen != 0 indicates 'from' is in subtree
* and exception table is indexed by a hash of
@@ -1682,15 +1743,25 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
* Otherwise, the exception table is indexed by
* a hash of only rt6i_dst.
*/
- if (from->fib6_src.plen)
+ if (plen)
src_key = &rt->rt6i_src.addr;
#endif
- rt6_ex = __rt6_find_exception_rcu(&bucket,
- &rt->rt6i_dst.addr,
- src_key);
+ rt6_ex = __rt6_find_exception_rcu(&bucket, &rt->rt6i_dst.addr, src_key);
if (rt6_ex)
rt6_ex->stamp = jiffies;
+}
+static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
+{
+ struct fib6_info *from;
+
+ rcu_read_lock();
+
+ from = rcu_dereference(rt->from);
+ if (!from || !(rt->rt6i_flags & RTF_CACHE))
+ goto unlock;
+
+ fib6_nh_update_exception(from->fib6_nh, from->fib6_src.plen, rt);
unlock:
rcu_read_unlock();
}
@@ -1718,15 +1789,13 @@ static bool rt6_mtu_change_route_allowed(struct inet6_dev *idev,
}
static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
- struct fib6_info *rt, int mtu)
+ const struct fib6_nh *nh, int mtu)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
int i;
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
-
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
if (!bucket)
return;
@@ -1748,21 +1817,19 @@ static void rt6_exceptions_update_pmtu(struct inet6_dev *idev,
#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE)
-static void rt6_exceptions_clean_tohost(struct fib6_info *rt,
- struct in6_addr *gateway)
+static void fib6_nh_exceptions_clean_tohost(const struct fib6_nh *nh,
+ const struct in6_addr *gateway)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct hlist_node *tmp;
int i;
- if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ if (!rcu_access_pointer(nh->rt6i_exception_bucket))
return;
spin_lock_bh(&rt6_exception_lock);
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
-
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
if (bucket) {
for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
hlist_for_each_entry_safe(rt6_ex, tmp,
@@ -1827,23 +1894,21 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
gc_args->more++;
}
-void rt6_age_exceptions(struct fib6_info *rt,
- struct fib6_gc_args *gc_args,
- unsigned long now)
+static void fib6_nh_age_exceptions(const struct fib6_nh *nh,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
{
struct rt6_exception_bucket *bucket;
struct rt6_exception *rt6_ex;
struct hlist_node *tmp;
int i;
- if (!rcu_access_pointer(rt->rt6i_exception_bucket))
+ if (!rcu_access_pointer(nh->rt6i_exception_bucket))
return;
rcu_read_lock_bh();
spin_lock(&rt6_exception_lock);
- bucket = rcu_dereference_protected(rt->rt6i_exception_bucket,
- lockdep_is_held(&rt6_exception_lock));
-
+ bucket = fib6_nh_get_excptn_bucket(nh, &rt6_exception_lock);
if (bucket) {
for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) {
hlist_for_each_entry_safe(rt6_ex, tmp,
@@ -1858,6 +1923,13 @@ void rt6_age_exceptions(struct fib6_info *rt,
rcu_read_unlock_bh();
}
+void rt6_age_exceptions(struct fib6_info *f6i,
+ struct fib6_gc_args *gc_args,
+ unsigned long now)
+{
+ fib6_nh_age_exceptions(f6i->fib6_nh, gc_args, now);
+}
+
/* must be called with rcu lock held */
int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
struct flowi6 *fl6, struct fib6_result *res, int strict)
@@ -2384,7 +2456,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
rcu_read_unlock();
return;
}
- res.nh = &res.f6i->fib6_nh;
+ res.nh = res.f6i->fib6_nh;
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
@@ -2533,7 +2605,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
restart:
for_each_fib6_node_rt_rcu(fn) {
res.f6i = rt;
- res.nh = &rt->fib6_nh;
+ res.nh = rt->fib6_nh;
if (fib6_check_expired(rt))
continue;
@@ -2557,7 +2629,7 @@ restart:
}
res.f6i = rt;
- res.nh = &rt->fib6_nh;
+ res.nh = rt->fib6_nh;
out:
if (ret) {
ip6_hold_safe(net, &ret);
@@ -3074,6 +3146,12 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
!netif_carrier_ok(dev))
fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
+ fib6_nh->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, gfp_flags);
+ if (!fib6_nh->rt6i_pcpu) {
+ err = -ENOMEM;
+ goto out;
+ }
+
err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
cfg->fc_encap_type, cfg, gfp_flags, extack);
if (err)
@@ -3098,6 +3176,38 @@ out:
void fib6_nh_release(struct fib6_nh *fib6_nh)
{
+ struct rt6_exception_bucket *bucket;
+
+ rcu_read_lock();
+
+ fib6_nh_flush_exceptions(fib6_nh, NULL);
+ bucket = fib6_nh_get_excptn_bucket(fib6_nh, NULL);
+ if (bucket) {
+ rcu_assign_pointer(fib6_nh->rt6i_exception_bucket, NULL);
+ kfree(bucket);
+ }
+
+ rcu_read_unlock();
+
+ if (fib6_nh->rt6i_pcpu) {
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct rt6_info **ppcpu_rt;
+ struct rt6_info *pcpu_rt;
+
+ ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu);
+ pcpu_rt = *ppcpu_rt;
+ if (pcpu_rt) {
+ dst_dev_put(&pcpu_rt->dst);
+ dst_release(&pcpu_rt->dst);
+ *ppcpu_rt = NULL;
+ }
+ }
+
+ free_percpu(fib6_nh->rt6i_pcpu);
+ }
+
fib_nh_common_release(&fib6_nh->nh_common);
}
@@ -3160,7 +3270,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
err = -ENOMEM;
- rt = fib6_info_alloc(gfp_flags);
+ rt = fib6_info_alloc(gfp_flags, true);
if (!rt)
goto out;
@@ -3200,7 +3310,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
rt->fib6_src.plen = cfg->fc_src_len;
#endif
- err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
+ err = fib6_nh_init(net, rt->fib6_nh, cfg, gfp_flags, extack);
if (err)
goto out;
@@ -3208,7 +3318,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
* they would result in kernel looping; promote them to reject routes
*/
addr_type = ipv6_addr_type(&cfg->fc_dst);
- if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
+ if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh->fib_nh_dev, addr_type))
rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
@@ -3326,7 +3436,7 @@ out_put:
return err;
}
-static int ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
+static int __ip6_del_cached_rt(struct rt6_info *rt, struct fib6_config *cfg)
{
int rc = -ESRCH;
@@ -3342,10 +3452,25 @@ out:
return rc;
}
+static int ip6_del_cached_rt(struct fib6_config *cfg, struct fib6_info *rt,
+ struct fib6_nh *nh)
+{
+ struct fib6_result res = {
+ .f6i = rt,
+ .nh = nh,
+ };
+ struct rt6_info *rt_cache;
+
+ rt_cache = rt6_find_cached_rt(&res, &cfg->fc_dst, &cfg->fc_src);
+ if (rt_cache)
+ return __ip6_del_cached_rt(rt_cache, cfg);
+
+ return 0;
+}
+
static int ip6_route_del(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
- struct rt6_info *rt_cache;
struct fib6_table *table;
struct fib6_info *rt;
struct fib6_node *fn;
@@ -3368,26 +3493,18 @@ static int ip6_route_del(struct fib6_config *cfg,
for_each_fib6_node_rt_rcu(fn) {
struct fib6_nh *nh;
+ nh = rt->fib6_nh;
if (cfg->fc_flags & RTF_CACHE) {
- struct fib6_result res = {
- .f6i = rt,
- };
int rc;
- rt_cache = rt6_find_cached_rt(&res,
- &cfg->fc_dst,
- &cfg->fc_src);
- if (rt_cache) {
- rc = ip6_del_cached_rt(rt_cache, cfg);
- if (rc != -ESRCH) {
- rcu_read_unlock();
- return rc;
- }
+ rc = ip6_del_cached_rt(cfg, rt, nh);
+ if (rc != -ESRCH) {
+ rcu_read_unlock();
+ return rc;
}
continue;
}
- nh = &rt->fib6_nh;
if (cfg->fc_ifindex &&
(!nh->fib_nh_dev ||
nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
@@ -3509,7 +3626,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
if (!res.f6i)
goto out;
- res.nh = &res.f6i->fib6_nh;
+ res.nh = res.f6i->fib6_nh;
res.fib6_flags = res.f6i->fib6_flags;
res.fib6_type = res.f6i->fib6_type;
nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
@@ -3561,12 +3678,12 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
goto out;
for_each_fib6_node_rt_rcu(fn) {
- if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
+ if (rt->fib6_nh->fib_nh_dev->ifindex != ifindex)
continue;
if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
- !rt->fib6_nh.fib_nh_gw_family)
+ !rt->fib6_nh->fib_nh_gw_family)
continue;
- if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
+ if (!ipv6_addr_equal(&rt->fib6_nh->fib_nh_gw6, gwaddr))
continue;
if (!fib6_info_hold_safe(rt))
continue;
@@ -3624,7 +3741,7 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
rcu_read_lock();
for_each_fib6_node_rt_rcu(&table->tb6_root) {
- struct fib6_nh *nh = &rt->fib6_nh;
+ struct fib6_nh *nh = rt->fib6_nh;
if (dev == nh->fib_nh_dev &&
((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
@@ -3876,7 +3993,7 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
struct net *net = ((struct arg_dev_net_ip *)arg)->net;
struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
- if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
+ if (((void *)rt->fib6_nh->fib_nh_dev == dev || !dev) &&
rt != net->ipv6.fib6_null_entry &&
ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
spin_lock_bh(&rt6_exception_lock);
@@ -3904,18 +4021,17 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)
static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
{
struct in6_addr *gateway = (struct in6_addr *)arg;
+ struct fib6_nh *nh = rt->fib6_nh;
if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
- rt->fib6_nh.fib_nh_gw_family &&
- ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
+ nh->fib_nh_gw_family && ipv6_addr_equal(gateway, &nh->fib_nh_gw6))
return -1;
- }
/* Further clean up cached routes in exception table.
* This is needed because cached route may have a different
* gateway than its 'parent' in the case of an ip redirect.
*/
- rt6_exceptions_clean_tohost(rt, gateway);
+ fib6_nh_exceptions_clean_tohost(nh, gateway);
return 0;
}
@@ -3955,9 +4071,9 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
static bool rt6_is_dead(const struct fib6_info *rt)
{
- if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
- (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
- ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
+ if (rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD ||
+ (rt->fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN &&
+ ip6_ignore_linkdown(rt->fib6_nh->fib_nh_dev)))
return true;
return false;
@@ -3969,11 +4085,11 @@ static int rt6_multipath_total_weight(const struct fib6_info *rt)
int total = 0;
if (!rt6_is_dead(rt))
- total += rt->fib6_nh.fib_nh_weight;
+ total += rt->fib6_nh->fib_nh_weight;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
if (!rt6_is_dead(iter))
- total += iter->fib6_nh.fib_nh_weight;
+ total += iter->fib6_nh->fib_nh_weight;
}
return total;
@@ -3984,11 +4100,11 @@ static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total)
int upper_bound = -1;
if (!rt6_is_dead(rt)) {
- *weight += rt->fib6_nh.fib_nh_weight;
+ *weight += rt->fib6_nh->fib_nh_weight;
upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
total) - 1;
}
- atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
+ atomic_set(&rt->fib6_nh->fib_nh_upper_bound, upper_bound);
}
static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
@@ -4032,8 +4148,8 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg)
struct net *net = dev_net(arg->dev);
if (rt != net->ipv6.fib6_null_entry &&
- rt->fib6_nh.fib_nh_dev == arg->dev) {
- rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
+ rt->fib6_nh->fib_nh_dev == arg->dev) {
+ rt->fib6_nh->fib_nh_flags &= ~arg->nh_flags;
fib6_update_sernum_upto_root(net, rt);
rt6_multipath_rebalance(rt);
}
@@ -4061,10 +4177,10 @@ static bool rt6_multipath_uses_dev(const struct fib6_info *rt,
{
struct fib6_info *iter;
- if (rt->fib6_nh.fib_nh_dev == dev)
+ if (rt->fib6_nh->fib_nh_dev == dev)
return true;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.fib_nh_dev == dev)
+ if (iter->fib6_nh->fib_nh_dev == dev)
return true;
return false;
@@ -4085,12 +4201,12 @@ static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt,
struct fib6_info *iter;
unsigned int dead = 0;
- if (rt->fib6_nh.fib_nh_dev == down_dev ||
- rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
+ if (rt->fib6_nh->fib_nh_dev == down_dev ||
+ rt->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
dead++;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.fib_nh_dev == down_dev ||
- iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
+ if (iter->fib6_nh->fib_nh_dev == down_dev ||
+ iter->fib6_nh->fib_nh_flags & RTNH_F_DEAD)
dead++;
return dead;
@@ -4102,11 +4218,11 @@ static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
{
struct fib6_info *iter;
- if (rt->fib6_nh.fib_nh_dev == dev)
- rt->fib6_nh.fib_nh_flags |= nh_flags;
+ if (rt->fib6_nh->fib_nh_dev == dev)
+ rt->fib6_nh->fib_nh_flags |= nh_flags;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
- if (iter->fib6_nh.fib_nh_dev == dev)
- iter->fib6_nh.fib_nh_flags |= nh_flags;
+ if (iter->fib6_nh->fib_nh_dev == dev)
+ iter->fib6_nh->fib_nh_flags |= nh_flags;
}
/* called with write lock held for table with rt */
@@ -4121,12 +4237,12 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
switch (arg->event) {
case NETDEV_UNREGISTER:
- return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
+ return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
case NETDEV_DOWN:
if (rt->should_flush)
return -1;
if (!rt->fib6_nsiblings)
- return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
+ return rt->fib6_nh->fib_nh_dev == dev ? -1 : 0;
if (rt6_multipath_uses_dev(rt, dev)) {
unsigned int count;
@@ -4142,10 +4258,10 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg)
}
return -2;
case NETDEV_CHANGE:
- if (rt->fib6_nh.fib_nh_dev != dev ||
+ if (rt->fib6_nh->fib_nh_dev != dev ||
rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
break;
- rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
+ rt->fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
rt6_multipath_rebalance(rt);
break;
}
@@ -4179,9 +4295,36 @@ void rt6_disable_ip(struct net_device *dev, unsigned long event)
struct rt6_mtu_change_arg {
struct net_device *dev;
unsigned int mtu;
+ struct fib6_info *f6i;
};
-static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
+static int fib6_nh_mtu_change(struct fib6_nh *nh, void *_arg)
+{
+ struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *)_arg;
+ struct fib6_info *f6i = arg->f6i;
+
+ /* For administrative MTU increase, there is no way to discover
+ * IPv6 PMTU increase, so PMTU increase should be updated here.
+ * Since RFC 1981 doesn't include administrative MTU increase
+ * update PMTU increase is a MUST. (i.e. jumbo frame)
+ */
+ if (nh->fib_nh_dev == arg->dev) {
+ struct inet6_dev *idev = __in6_dev_get(arg->dev);
+ u32 mtu = f6i->fib6_pmtu;
+
+ if (mtu >= arg->mtu ||
+ (mtu < arg->mtu && mtu == idev->cnf.mtu6))
+ fib6_metric_set(f6i, RTAX_MTU, arg->mtu);
+
+ spin_lock_bh(&rt6_exception_lock);
+ rt6_exceptions_update_pmtu(idev, nh, arg->mtu);
+ spin_unlock_bh(&rt6_exception_lock);
+ }
+
+ return 0;
+}
+
+static int rt6_mtu_change_route(struct fib6_info *f6i, void *p_arg)
{
struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
struct inet6_dev *idev;
@@ -4196,24 +4339,11 @@ static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
if (!idev)
return 0;
- /* For administrative MTU increase, there is no way to discover
- IPv6 PMTU increase, so PMTU increase should be updated here.
- Since RFC 1981 doesn't include administrative MTU increase
- update PMTU increase is a MUST. (i.e. jumbo frame)
- */
- if (rt->fib6_nh.fib_nh_dev == arg->dev &&
- !fib6_metric_locked(rt, RTAX_MTU)) {
- u32 mtu = rt->fib6_pmtu;
-
- if (mtu >= arg->mtu ||
- (mtu < arg->mtu && mtu == idev->cnf.mtu6))
- fib6_metric_set(rt, RTAX_MTU, arg->mtu);
+ if (fib6_metric_locked(f6i, RTAX_MTU))
+ return 0;
- spin_lock_bh(&rt6_exception_lock);
- rt6_exceptions_update_pmtu(idev, rt, arg->mtu);
- spin_unlock_bh(&rt6_exception_lock);
- }
- return 0;
+ arg->f6i = f6i;
+ return fib6_nh_mtu_change(f6i->fib6_nh, arg);
}
void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
@@ -4227,6 +4357,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu)
}
static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
+ [RTA_UNSPEC] = { .strict_start_type = RTA_DPORT + 1 },
[RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
[RTA_PREFSRC] = { .len = sizeof(struct in6_addr) },
[RTA_OIF] = { .type = NLA_U32 },
@@ -4492,7 +4623,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
goto cleanup;
}
- rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
+ rt->fib6_nh->fib_nh_weight = rtnh->rtnh_hops + 1;
err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
rt, &r_cfg);
@@ -4659,7 +4790,7 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
+ NLA_ALIGN(sizeof(struct rtnexthop))
+ nla_total_size(16) /* RTA_GATEWAY */
- + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
+ + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws);
nexthop_len *= rt->fib6_nsiblings;
}
@@ -4677,7 +4808,7 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt)
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+ nla_total_size(1) /* RTA_PREF */
- + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
+ + lwtunnel_get_encap_size(rt->fib6_nh->fib_nh_lws)
+ nexthop_len;
}
@@ -4797,14 +4928,14 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
if (!mp)
goto nla_put_failure;
- if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
- rt->fib6_nh.fib_nh_weight) < 0)
+ if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common,
+ rt->fib6_nh->fib_nh_weight) < 0)
goto nla_put_failure;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings) {
- if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
- sibling->fib6_nh.fib_nh_weight) < 0)
+ if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common,
+ sibling->fib6_nh->fib_nh_weight) < 0)
goto nla_put_failure;
}
@@ -4812,7 +4943,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
} else {
unsigned char nh_flags = 0;
- if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
+ if (fib_nexthop_info(skb, &rt->fib6_nh->nh_common,
&nh_flags, false) < 0)
goto nla_put_failure;
@@ -4842,7 +4973,7 @@ nla_put_failure:
static bool fib6_info_uses_dev(const struct fib6_info *f6i,
const struct net_device *dev)
{
- if (f6i->fib6_nh.fib_nh_dev == dev)
+ if (f6i->fib6_nh->fib_nh_dev == dev)
return true;
if (f6i->fib6_nsiblings) {
@@ -4850,7 +4981,7 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i,
list_for_each_entry_safe(sibling, next_sibling,
&f6i->fib6_siblings, fib6_siblings) {
- if (sibling->fib6_nh.fib_nh_dev == dev)
+ if (sibling->fib6_nh->fib_nh_dev == dev)
return true;
}
}
@@ -5129,6 +5260,38 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
}
+void fib6_rt_update(struct net *net, struct fib6_info *rt,
+ struct nl_info *info)
+{
+ u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+
+ /* call_fib6_entry_notifiers will be removed when in-kernel notifier
+ * is implemented and supported for nexthop objects
+ */
+ call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
+
+ skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
+ if (!skb)
+ goto errout;
+
+ err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
+ RTM_NEWROUTE, info->portid, seq, NLM_F_REPLACE);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(skb);
+ goto errout;
+ }
+ rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
+ info->nlh, gfp_any());
+ return;
+errout:
+ if (err < 0)
+ rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
+}
+
static int ip6_route_dev_notify(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -5139,7 +5302,7 @@ static int ip6_route_dev_notify(struct notifier_block *this,
return NOTIFY_OK;
if (event == NETDEV_REGISTER) {
- net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
+ net->ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = dev;
net->ipv6.ip6_null_entry->dst.dev = dev;
net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@ -5333,11 +5496,11 @@ static int __net_init ip6_route_net_init(struct net *net)
if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
goto out_ip6_dst_ops;
- net->ipv6.fib6_null_entry = kmemdup(&fib6_null_entry_template,
- sizeof(*net->ipv6.fib6_null_entry),
- GFP_KERNEL);
+ net->ipv6.fib6_null_entry = fib6_info_alloc(GFP_KERNEL, true);
if (!net->ipv6.fib6_null_entry)
goto out_ip6_dst_entries;
+ memcpy(net->ipv6.fib6_null_entry, &fib6_null_entry_template,
+ sizeof(*net->ipv6.fib6_null_entry));
net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
sizeof(*net->ipv6.ip6_null_entry),
@@ -5474,7 +5637,7 @@ void __init ip6_route_init_special_entries(void)
/* Registering of the loopback is done before this portion of code,
* the loopback reference in rt6_info will not be taken, do it
* manually for init_net */
- init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
+ init_net.ipv6.fib6_null_entry->fib6_nh->fib_nh_dev = init_net.loopback_dev;
init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 37bb530d848f..a0560d175a7f 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -16,6 +16,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_bridge.h>
#include <net/netfilter/nf_log.h>
#include <linux/ip.h>
@@ -120,10 +121,8 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
};
EXPORT_SYMBOL_GPL(nf_ct_l4proto_find);
-static unsigned int nf_confirm(struct sk_buff *skb,
- unsigned int protoff,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
+unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
const struct nf_conn_help *help;
@@ -154,6 +153,7 @@ static unsigned int nf_confirm(struct sk_buff *skb,
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
+EXPORT_SYMBOL_GPL(nf_confirm);
static unsigned int ipv4_confirm(void *priv,
struct sk_buff *skb,
@@ -442,12 +442,14 @@ static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto)
return 0;
}
+static struct nf_ct_bridge_info *nf_ct_bridge_info;
+
static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
{
struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
- bool fixup_needed = false;
+ bool fixup_needed = false, retry = true;
int err = 0;
-
+retry:
mutex_lock(&nf_ct_proto_mutex);
switch (nfproto) {
@@ -487,6 +489,32 @@ static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
fixup_needed = true;
break;
#endif
+ case NFPROTO_BRIDGE:
+ if (!nf_ct_bridge_info) {
+ if (!retry) {
+ err = -EPROTO;
+ goto out_unlock;
+ }
+ mutex_unlock(&nf_ct_proto_mutex);
+ request_module("nf_conntrack_bridge");
+ retry = false;
+ goto retry;
+ }
+ if (!try_module_get(nf_ct_bridge_info->me)) {
+ err = -EPROTO;
+ goto out_unlock;
+ }
+ cnet->users_bridge++;
+ if (cnet->users_bridge > 1)
+ goto out_unlock;
+
+ err = nf_register_net_hooks(net, nf_ct_bridge_info->ops,
+ nf_ct_bridge_info->ops_size);
+ if (err)
+ cnet->users_bridge = 0;
+ else
+ fixup_needed = true;
+ break;
default:
err = -EPROTO;
break;
@@ -519,47 +547,99 @@ static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
ARRAY_SIZE(ipv6_conntrack_ops));
break;
#endif
+ case NFPROTO_BRIDGE:
+ if (!nf_ct_bridge_info)
+ break;
+ if (cnet->users_bridge && (--cnet->users_bridge == 0))
+ nf_unregister_net_hooks(net, nf_ct_bridge_info->ops,
+ nf_ct_bridge_info->ops_size);
+
+ module_put(nf_ct_bridge_info->me);
+ break;
}
-
mutex_unlock(&nf_ct_proto_mutex);
}
-int nf_ct_netns_get(struct net *net, u8 nfproto)
+static int nf_ct_netns_inet_get(struct net *net)
{
int err;
- if (nfproto == NFPROTO_INET) {
- err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
- if (err < 0)
- goto err1;
- err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
- if (err < 0)
- goto err2;
- } else {
- err = nf_ct_netns_do_get(net, nfproto);
- if (err < 0)
- goto err1;
- }
- return 0;
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
+ if (err < 0)
+ goto err1;
+ err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
+ if (err < 0)
+ goto err2;
+ return err;
err2:
nf_ct_netns_put(net, NFPROTO_IPV4);
err1:
return err;
}
+
+int nf_ct_netns_get(struct net *net, u8 nfproto)
+{
+ int err;
+
+ switch (nfproto) {
+ case NFPROTO_INET:
+ err = nf_ct_netns_inet_get(net);
+ break;
+ case NFPROTO_BRIDGE:
+ err = nf_ct_netns_do_get(net, NFPROTO_BRIDGE);
+ if (err < 0)
+ return err;
+
+ err = nf_ct_netns_inet_get(net);
+ if (err < 0) {
+ nf_ct_netns_put(net, NFPROTO_BRIDGE);
+ return err;
+ }
+ break;
+ default:
+ err = nf_ct_netns_do_get(net, nfproto);
+ break;
+ }
+ return err;
+}
EXPORT_SYMBOL_GPL(nf_ct_netns_get);
void nf_ct_netns_put(struct net *net, uint8_t nfproto)
{
- if (nfproto == NFPROTO_INET) {
+ switch (nfproto) {
+ case NFPROTO_BRIDGE:
+ nf_ct_netns_do_put(net, NFPROTO_BRIDGE);
+ /* fall through */
+ case NFPROTO_INET:
nf_ct_netns_do_put(net, NFPROTO_IPV4);
nf_ct_netns_do_put(net, NFPROTO_IPV6);
- } else {
+ break;
+ default:
nf_ct_netns_do_put(net, nfproto);
+ break;
}
}
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
+void nf_ct_bridge_register(struct nf_ct_bridge_info *info)
+{
+ WARN_ON(nf_ct_bridge_info);
+ mutex_lock(&nf_ct_proto_mutex);
+ nf_ct_bridge_info = info;
+ mutex_unlock(&nf_ct_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_ct_bridge_register);
+
+void nf_ct_bridge_unregister(struct nf_ct_bridge_info *info)
+{
+ WARN_ON(!nf_ct_bridge_info);
+ mutex_lock(&nf_ct_proto_mutex);
+ nf_ct_bridge_info = NULL;
+ mutex_unlock(&nf_ct_proto_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_ct_bridge_unregister);
+
int nf_conntrack_proto_init(void)
{
int ret;
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2c72d95c3050..d104f7ee26c7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -877,6 +877,23 @@ config NET_ACT_CONNMARK
To compile this code as a module, choose M here: the
module will be called act_connmark.
+config NET_ACT_CTINFO
+ tristate "Netfilter Connection Mark Actions"
+ depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+ depends on NF_CONNTRACK && NF_CONNTRACK_MARK
+ help
+ Say Y here to allow transfer of a connmark stored information.
+ Current actions transfer connmark stored DSCP into
+ ipv4/v6 diffserv and/or to transfer connmark to packet
+ mark. Both are useful for restoring egress based marks
+ back onto ingress connections for qdisc priority mapping
+ purposes.
+
+ If unsure, say N.
+
+ To compile this code as a module, choose M here: the
+ module will be called act_ctinfo.
+
config NET_ACT_SKBMOD
tristate "skb data modification action"
depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8a40431d7b5c..d54bfcbd7981 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o
obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o
obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o
+obj-$(CONFIG_NET_ACT_CTINFO) += act_ctinfo.o
obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o
obj-$(CONFIG_NET_ACT_IFE) += act_ife.o
obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
new file mode 100644
index 000000000000..e78b60e47c0f
--- /dev/null
+++ b/net/sched/act_ctinfo.c
@@ -0,0 +1,392 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* net/sched/act_ctinfo.c netfilter ctinfo connmark actions
+ *
+ * Copyright (c) 2019 Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/act_api.h>
+#include <net/pkt_cls.h>
+#include <uapi/linux/tc_act/tc_ctinfo.h>
+#include <net/tc_act/tc_ctinfo.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+static struct tc_action_ops act_ctinfo_ops;
+static unsigned int ctinfo_net_id;
+
+static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
+ struct tcf_ctinfo_params *cp,
+ struct sk_buff *skb, int wlen, int proto)
+{
+ u8 dscp, newdscp;
+
+ newdscp = (((ct->mark & cp->dscpmask) >> cp->dscpmaskshift) << 2) &
+ ~INET_ECN_MASK;
+
+ switch (proto) {
+ case NFPROTO_IPV4:
+ dscp = ipv4_get_dsfield(ip_hdr(skb)) & ~INET_ECN_MASK;
+ if (dscp != newdscp) {
+ if (likely(!skb_try_make_writable(skb, wlen))) {
+ ipv4_change_dsfield(ip_hdr(skb),
+ INET_ECN_MASK,
+ newdscp);
+ ca->stats_dscp_set++;
+ } else {
+ ca->stats_dscp_error++;
+ }
+ }
+ break;
+ case NFPROTO_IPV6:
+ dscp = ipv6_get_dsfield(ipv6_hdr(skb)) & ~INET_ECN_MASK;
+ if (dscp != newdscp) {
+ if (likely(!skb_try_make_writable(skb, wlen))) {
+ ipv6_change_dsfield(ipv6_hdr(skb),
+ INET_ECN_MASK,
+ newdscp);
+ ca->stats_dscp_set++;
+ } else {
+ ca->stats_dscp_error++;
+ }
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca,
+ struct tcf_ctinfo_params *cp,
+ struct sk_buff *skb)
+{
+ ca->stats_cpmark_set++;
+ skb->mark = ct->mark & cp->cpmarkmask;
+}
+
+static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ const struct nf_conntrack_tuple_hash *thash = NULL;
+ struct tcf_ctinfo *ca = to_ctinfo(a);
+ struct nf_conntrack_tuple tuple;
+ struct nf_conntrack_zone zone;
+ enum ip_conntrack_info ctinfo;
+ struct tcf_ctinfo_params *cp;
+ struct nf_conn *ct;
+ int proto, wlen;
+ int action;
+
+ cp = rcu_dereference_bh(ca->params);
+
+ tcf_lastuse_update(&ca->tcf_tm);
+ bstats_update(&ca->tcf_bstats, skb);
+ action = READ_ONCE(ca->tcf_action);
+
+ wlen = skb_network_offset(skb);
+ if (tc_skb_protocol(skb) == htons(ETH_P_IP)) {
+ wlen += sizeof(struct iphdr);
+ if (!pskb_may_pull(skb, wlen))
+ goto out;
+
+ proto = NFPROTO_IPV4;
+ } else if (tc_skb_protocol(skb) == htons(ETH_P_IPV6)) {
+ wlen += sizeof(struct ipv6hdr);
+ if (!pskb_may_pull(skb, wlen))
+ goto out;
+
+ proto = NFPROTO_IPV6;
+ } else {
+ goto out;
+ }
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct) { /* look harder, usually ingress */
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+ proto, cp->net, &tuple))
+ goto out;
+ zone.id = cp->zone;
+ zone.dir = NF_CT_DEFAULT_ZONE_DIR;
+
+ thash = nf_conntrack_find_get(cp->net, &zone, &tuple);
+ if (!thash)
+ goto out;
+
+ ct = nf_ct_tuplehash_to_ctrack(thash);
+ }
+
+ if (cp->mode & CTINFO_MODE_DSCP)
+ if (!cp->dscpstatemask || (ct->mark & cp->dscpstatemask))
+ tcf_ctinfo_dscp_set(ct, ca, cp, skb, wlen, proto);
+
+ if (cp->mode & CTINFO_MODE_CPMARK)
+ tcf_ctinfo_cpmark_set(ct, ca, cp, skb);
+
+ if (thash)
+ nf_ct_put(ct);
+out:
+ return action;
+}
+
+static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = {
+ [TCA_CTINFO_ACT] = { .len = sizeof(struct
+ tc_ctinfo) },
+ [TCA_CTINFO_ZONE] = { .type = NLA_U16 },
+ [TCA_CTINFO_PARMS_DSCP_MASK] = { .type = NLA_U32 },
+ [TCA_CTINFO_PARMS_DSCP_STATEMASK] = { .type = NLA_U32 },
+ [TCA_CTINFO_PARMS_CPMARK_MASK] = { .type = NLA_U32 },
+};
+
+static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
+ struct nlattr *est, struct tc_action **a,
+ int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+ struct nlattr *tb[TCA_CTINFO_MAX + 1];
+ struct tcf_ctinfo_params *cp_new;
+ struct tcf_chain *goto_ch = NULL;
+ u32 dscpmask = 0, dscpstatemask;
+ struct tc_ctinfo *actparm;
+ struct tcf_ctinfo *ci;
+ u8 dscpmaskshift;
+ int ret = 0, err;
+
+ if (!nla)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_CTINFO_MAX, nla, ctinfo_policy, NULL);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_CTINFO_ACT])
+ return -EINVAL;
+ actparm = nla_data(tb[TCA_CTINFO_ACT]);
+
+ /* do some basic validation here before dynamically allocating things */
+ /* that we would otherwise have to clean up. */
+ if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) {
+ dscpmask = nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_MASK]);
+ /* need contiguous 6 bit mask */
+ dscpmaskshift = dscpmask ? __ffs(dscpmask) : 0;
+ if ((~0 & (dscpmask >> dscpmaskshift)) != 0x3f)
+ return -EINVAL;
+ dscpstatemask = tb[TCA_CTINFO_PARMS_DSCP_STATEMASK] ?
+ nla_get_u32(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) : 0;
+ /* mask & statemask must not overlap */
+ if (dscpmask & dscpstatemask)
+ return -EINVAL;
+ }
+
+ /* done the validation:now to the actual action allocation */
+ err = tcf_idr_check_alloc(tn, &actparm->index, a, bind);
+ if (!err) {
+ ret = tcf_idr_create(tn, actparm->index, est, a,
+ &act_ctinfo_ops, bind, false);
+ if (ret) {
+ tcf_idr_cleanup(tn, actparm->index);
+ return ret;
+ }
+ } else if (err > 0) {
+ if (bind) /* don't override defaults */
+ return 0;
+ if (!ovr) {
+ tcf_idr_release(*a, bind);
+ return -EEXIST;
+ }
+ } else {
+ return err;
+ }
+
+ err = tcf_action_check_ctrlact(actparm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
+ ci = to_ctinfo(*a);
+
+ cp_new = kzalloc(sizeof(*cp_new), GFP_KERNEL);
+ if (unlikely(!cp_new)) {
+ err = -ENOMEM;
+ goto put_chain;
+ }
+
+ cp_new->net = net;
+ cp_new->zone = tb[TCA_CTINFO_ZONE] ?
+ nla_get_u16(tb[TCA_CTINFO_ZONE]) : 0;
+ if (dscpmask) {
+ cp_new->dscpmask = dscpmask;
+ cp_new->dscpmaskshift = dscpmaskshift;
+ cp_new->dscpstatemask = dscpstatemask;
+ cp_new->mode |= CTINFO_MODE_DSCP;
+ }
+
+ if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) {
+ cp_new->cpmarkmask =
+ nla_get_u32(tb[TCA_CTINFO_PARMS_CPMARK_MASK]);
+ cp_new->mode |= CTINFO_MODE_CPMARK;
+ }
+
+ spin_lock_bh(&ci->tcf_lock);
+ goto_ch = tcf_action_set_ctrlact(*a, actparm->action, goto_ch);
+ rcu_swap_protected(ci->params, cp_new,
+ lockdep_is_held(&ci->tcf_lock));
+ spin_unlock_bh(&ci->tcf_lock);
+
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+ if (cp_new)
+ kfree_rcu(cp_new, rcu);
+
+ if (ret == ACT_P_CREATED)
+ tcf_idr_insert(tn, *a);
+
+ return ret;
+
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
+}
+
+static int tcf_ctinfo_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ struct tcf_ctinfo *ci = to_ctinfo(a);
+ struct tc_ctinfo opt = {
+ .index = ci->tcf_index,
+ .refcnt = refcount_read(&ci->tcf_refcnt) - ref,
+ .bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
+ };
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_ctinfo_params *cp;
+ struct tcf_t t;
+
+ spin_lock_bh(&ci->tcf_lock);
+ cp = rcu_dereference_protected(ci->params,
+ lockdep_is_held(&ci->tcf_lock));
+
+ tcf_tm_dump(&t, &ci->tcf_tm);
+ if (nla_put_64bit(skb, TCA_CTINFO_TM, sizeof(t), &t, TCA_CTINFO_PAD))
+ goto nla_put_failure;
+
+ opt.action = ci->tcf_action;
+ if (nla_put(skb, TCA_CTINFO_ACT, sizeof(opt), &opt))
+ goto nla_put_failure;
+
+ if (nla_put_u16(skb, TCA_CTINFO_ZONE, cp->zone))
+ goto nla_put_failure;
+
+ if (cp->mode & CTINFO_MODE_DSCP) {
+ if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_MASK,
+ cp->dscpmask))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_CTINFO_PARMS_DSCP_STATEMASK,
+ cp->dscpstatemask))
+ goto nla_put_failure;
+ }
+
+ if (cp->mode & CTINFO_MODE_CPMARK) {
+ if (nla_put_u32(skb, TCA_CTINFO_PARMS_CPMARK_MASK,
+ cp->cpmarkmask))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_SET,
+ ci->stats_dscp_set, TCA_CTINFO_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_DSCP_ERROR,
+ ci->stats_dscp_error, TCA_CTINFO_PAD))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(skb, TCA_CTINFO_STATS_CPMARK_SET,
+ ci->stats_cpmark_set, TCA_CTINFO_PAD))
+ goto nla_put_failure;
+
+ spin_unlock_bh(&ci->tcf_lock);
+ return skb->len;
+
+nla_put_failure:
+ spin_unlock_bh(&ci->tcf_lock);
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static int tcf_ctinfo_walker(struct net *net, struct sk_buff *skb,
+ struct netlink_callback *cb, int type,
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+
+ return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static int tcf_ctinfo_search(struct net *net, struct tc_action **a, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+
+ return tcf_idr_search(tn, a, index);
+}
+
+static struct tc_action_ops act_ctinfo_ops = {
+ .kind = "ctinfo",
+ .id = TCA_ID_CTINFO,
+ .owner = THIS_MODULE,
+ .act = tcf_ctinfo_act,
+ .dump = tcf_ctinfo_dump,
+ .init = tcf_ctinfo_init,
+ .walk = tcf_ctinfo_walker,
+ .lookup = tcf_ctinfo_search,
+ .size = sizeof(struct tcf_ctinfo),
+};
+
+static __net_init int ctinfo_init_net(struct net *net)
+{
+ struct tc_action_net *tn = net_generic(net, ctinfo_net_id);
+
+ return tc_action_net_init(tn, &act_ctinfo_ops);
+}
+
+static void __net_exit ctinfo_exit_net(struct list_head *net_list)
+{
+ tc_action_net_exit(net_list, ctinfo_net_id);
+}
+
+static struct pernet_operations ctinfo_net_ops = {
+ .init = ctinfo_init_net,
+ .exit_batch = ctinfo_exit_net,
+ .id = &ctinfo_net_id,
+ .size = sizeof(struct tc_action_net),
+};
+
+static int __init ctinfo_init_module(void)
+{
+ return tcf_register_action(&act_ctinfo_ops, &ctinfo_net_ops);
+}
+
+static void __exit ctinfo_cleanup_module(void)
+{
+ tcf_unregister_action(&act_ctinfo_ops, &ctinfo_net_ops);
+}
+
+module_init(ctinfo_init_module);
+module_exit(ctinfo_cleanup_module);
+MODULE_AUTHOR("Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>");
+MODULE_DESCRIPTION("Connection tracking mark actions");
+MODULE_LICENSE("GPL");
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index edfcf16e704c..dac46dfadab5 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -103,11 +103,6 @@ static const struct net_offload sctp6_offload = {
},
};
-static const struct skb_checksum_ops crc32c_csum_ops = {
- .update = sctp_csum_update,
- .combine = sctp_csum_combine,
-};
-
int __init sctp_offload_init(void)
{
int ret;
@@ -120,7 +115,7 @@ int __init sctp_offload_init(void)
if (ret)
goto ipv4;
- crc32c_csum_stub = &crc32c_csum_ops;
+ crc32c_csum_stub = &sctp_csum_ops;
return ret;
ipv4:
diff --git a/net/unix/diag.c b/net/unix/diag.c
index c51a707260fa..9ff64f9df1f3 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -5,9 +5,11 @@
#include <linux/unix_diag.h>
#include <linux/skbuff.h>
#include <linux/module.h>
+#include <linux/uidgid.h>
#include <net/netlink.h>
#include <net/af_unix.h>
#include <net/tcp_states.h>
+#include <net/sock.h>
static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
{
@@ -111,6 +113,12 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql);
}
+static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb)
+{
+ uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk));
+ return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid);
+}
+
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
u32 portid, u32 seq, u32 flags, int sk_ino)
{
@@ -157,6 +165,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown))
goto out_nlmsg_trim;
+ if ((req->udiag_show & UDIAG_SHOW_UID) &&
+ sk_diag_dump_uid(sk, skb))
+ goto out_nlmsg_trim;
+
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 982a8dc49e03..063073fd4952 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -23,14 +23,14 @@
#include <net/sock.h>
#include <net/af_vsock.h>
-/* The host side's design of the feature requires 6 exact 4KB pages for
- * recv/send rings respectively -- this is suboptimal considering memory
- * consumption, however unluckily we have to live with it, before the
- * host comes up with a better design in the future.
+/* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some
+ * stricter requirements on the hv_sock ring buffer size of six 4K pages. Newer
+ * hosts don't have this limitation; but, keep the defaults the same for compat.
*/
#define PAGE_SIZE_4K 4096
#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
+#define RINGBUFFER_HVS_MAX_SIZE (PAGE_SIZE_4K * 64)
/* The MTU is 16KB per the host side's design */
#define HVS_MTU_SIZE (1024 * 16)
@@ -55,8 +55,9 @@ struct hvs_recv_buf {
};
/* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
- * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated
- * buffer, because tests show there is no significant performance difference.
+ * a smaller size, i.e. HVS_SEND_BUF_SIZE, to maximize concurrency between the
+ * guest and the host processing as one VMBUS packet is the smallest processing
+ * unit.
*
* Note: the buffer can be eliminated in the future when we add new VMBus
* ringbuffer APIs that allow us to directly copy data from userspace buffer
@@ -344,9 +345,12 @@ static void hvs_open_connection(struct vmbus_channel *chan)
struct sockaddr_vm addr;
struct sock *sk, *new = NULL;
- struct vsock_sock *vnew;
- struct hvsock *hvs, *hvs_new;
+ struct vsock_sock *vnew = NULL;
+ struct hvsock *hvs = NULL;
+ struct hvsock *hvs_new = NULL;
+ int rcvbuf;
int ret;
+ int sndbuf;
if_type = &chan->offermsg.offer.if_type;
if_instance = &chan->offermsg.offer.if_instance;
@@ -388,9 +392,34 @@ static void hvs_open_connection(struct vmbus_channel *chan)
}
set_channel_read_mode(chan, HV_CALL_DIRECT);
- ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
- RINGBUFFER_HVS_RCV_SIZE, NULL, 0,
- hvs_channel_cb, conn_from_host ? new : sk);
+
+ /* Use the socket buffer sizes as hints for the VMBUS ring size. For
+ * server side sockets, 'sk' is the parent socket and thus, this will
+ * allow the child sockets to inherit the size from the parent. Keep
+ * the mins to the default value and align to page size as per VMBUS
+ * requirements.
+ * For the max, the socket core library will limit the socket buffer
+ * size that can be set by the user, but, since currently, the hv_sock
+ * VMBUS ring buffer is physically contiguous allocation, restrict it
+ * further.
+ * Older versions of hv_sock host side code cannot handle bigger VMBUS
+ * ring buffer size. Use the version number to limit the change to newer
+ * versions.
+ */
+ if (vmbus_proto_version < VERSION_WIN10_V5) {
+ sndbuf = RINGBUFFER_HVS_SND_SIZE;
+ rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
+ } else {
+ sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE);
+ sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE);
+ sndbuf = ALIGN(sndbuf, PAGE_SIZE);
+ rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE);
+ rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE);
+ rcvbuf = ALIGN(rcvbuf, PAGE_SIZE);
+ }
+
+ ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
+ conn_from_host ? new : sk);
if (ret != 0) {
if (conn_from_host) {
hvs_new->chan = NULL;
@@ -441,6 +470,7 @@ static u32 hvs_get_local_cid(void)
static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
{
struct hvsock *hvs;
+ struct sock *sk = sk_vsock(vsk);
hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
if (!hvs)
@@ -448,7 +478,8 @@ static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
vsk->trans = hvs;
hvs->vsk = vsk;
-
+ sk->sk_sndbuf = RINGBUFFER_HVS_SND_SIZE;
+ sk->sk_rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
return 0;
}
@@ -644,7 +675,9 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
struct hvsock *hvs = vsk->trans;
struct vmbus_channel *chan = hvs->chan;
struct hvs_send_buf *send_buf;
- ssize_t to_write, max_writable, ret;
+ ssize_t to_write, max_writable;
+ ssize_t ret = 0;
+ ssize_t bytes_written = 0;
BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
@@ -652,20 +685,34 @@ static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
if (!send_buf)
return -ENOMEM;
- max_writable = hvs_channel_writable_bytes(chan);
- to_write = min_t(ssize_t, len, max_writable);
- to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
-
- ret = memcpy_from_msg(send_buf->data, msg, to_write);
- if (ret < 0)
- goto out;
+ /* Reader(s) could be draining data from the channel as we write.
+ * Maximize bandwidth, by iterating until the channel is found to be
+ * full.
+ */
+ while (len) {
+ max_writable = hvs_channel_writable_bytes(chan);
+ if (!max_writable)
+ break;
+ to_write = min_t(ssize_t, len, max_writable);
+ to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
+ /* memcpy_from_msg is safe for loop as it advances the offsets
+ * within the message iterator.
+ */
+ ret = memcpy_from_msg(send_buf->data, msg, to_write);
+ if (ret < 0)
+ goto out;
- ret = hvs_send_data(hvs->chan, send_buf, to_write);
- if (ret < 0)
- goto out;
+ ret = hvs_send_data(hvs->chan, send_buf, to_write);
+ if (ret < 0)
+ goto out;
- ret = to_write;
+ bytes_written += to_write;
+ len -= to_write;
+ }
out:
+ /* If any data has been sent, return that */
+ if (bytes_written)
+ ret = bytes_written;
kfree(send_buf);
return ret;
}
diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c
index 9cec81209617..2c75d823d8e2 100644
--- a/security/selinux/nlmsgtab.c
+++ b/security/selinux/nlmsgtab.c
@@ -83,6 +83,9 @@ static const struct nlmsg_perm nlmsg_route_perms[] =
{ RTM_NEWCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
{ RTM_DELCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
{ RTM_GETCHAIN, NETLINK_ROUTE_SOCKET__NLMSG_READ },
+ { RTM_NEWNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+ { RTM_DELNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_WRITE },
+ { RTM_GETNEXTHOP, NETLINK_ROUTE_SOCKET__NLMSG_READ },
};
static const struct nlmsg_perm nlmsg_tcpdiag_perms[] =
@@ -166,7 +169,7 @@ int selinux_nlmsg_lookup(u16 sclass, u16 nlmsg_type, u32 *perm)
* structures at the top of this file with the new mappings
* before updating the BUILD_BUG_ON() macro!
*/
- BUILD_BUG_ON(RTM_MAX != (RTM_NEWCHAIN + 3));
+ BUILD_BUG_ON(RTM_MAX != (RTM_NEWNEXTHOP + 3));
err = nlmsg_perm(nlmsg_type, perm, nlmsg_route_perms,
sizeof(nlmsg_route_perms));
break;
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 6f81130605d7..4ce0bc1612f5 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -17,3 +17,7 @@ tcp_inq
tls
txring_overwrite
ip_defrag
+so_txtime
+flowlabel
+flowlabel_mgr
+tcp_fastopen_backup_key
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 1e6d14d2825c..9a275d932fd5 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -9,12 +9,15 @@ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
-TEST_PROGS += test_vxlan_fdb_changelink.sh
+TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
+TEST_PROGS += tcp_fastopen_backup_key.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
+TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
+TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 474040448601..89f84b5118bf 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -25,3 +25,5 @@ CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NET_SCH_FQ=m
+CONFIG_NET_SCH_ETF=m
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index 864f865eee55..c287b90b8af8 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -4,6 +4,7 @@
# IPv4 and IPv6 onlink tests
PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+VERBOSE=0
# Network interfaces
# - odd in current namespace; even in peer ns
@@ -91,10 +92,10 @@ log_test()
if [ ${rc} -eq ${expected} ]; then
nsuccess=$((nsuccess+1))
- printf "\n TEST: %-50s [ OK ]\n" "${msg}"
+ printf " TEST: %-50s [ OK ]\n" "${msg}"
else
nfail=$((nfail+1))
- printf "\n TEST: %-50s [FAIL]\n" "${msg}"
+ printf " TEST: %-50s [FAIL]\n" "${msg}"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
@@ -121,9 +122,23 @@ log_subsection()
run_cmd()
{
- echo
- echo "COMMAND: $*"
- eval $*
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf " COMMAND: $cmd\n"
+ fi
+
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
}
get_linklocal()
@@ -451,11 +466,34 @@ run_onlink_tests()
}
################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -p Pause on fail
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
# main
nsuccess=0
nfail=0
+while getopts :t:pPhv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
cleanup
setup
run_onlink_tests
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
new file mode 100755
index 000000000000..76a7c4472dc3
--- /dev/null
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -0,0 +1,485 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# redirect test
+#
+# .253 +----+
+# +----| r1 |
+# | +----+
+# +----+ | |.1
+# | h1 |--------------+ | 10.1.1.0/30 2001:db8:1::0/126
+# +----+ .1 | |.2
+# 172.16.1/24 | +----+ +----+
+# 2001:db8:16:1/64 +----| r2 |-------------------| h2 |
+# .254 +----+ .254 .2 +----+
+# 172.16.2/24
+# 2001:db8:16:2/64
+#
+# Route from h1 to h2 goes through r1, eth1 - connection between r1 and r2.
+# Route on r1 changed to go to r2 via eth0. This causes a redirect to be sent
+# from r1 to h1 telling h1 to use r2 when talking to h2.
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+
+H1_N1_IP=172.16.1.1
+R1_N1_IP=172.16.1.253
+R2_N1_IP=172.16.1.254
+
+H1_N1_IP6=2001:db8:16:1::1
+R1_N1_IP6=2001:db8:16:1::253
+R2_N1_IP6=2001:db8:16:1::254
+
+R1_R2_N1_IP=10.1.1.1
+R2_R1_N1_IP=10.1.1.2
+
+R1_R2_N1_IP6=2001:db8:1::1
+R2_R1_N1_IP6=2001:db8:1::2
+
+H2_N2=172.16.2.0/24
+H2_N2_6=2001:db8:16:2::/64
+H2_N2_IP=172.16.2.2
+R2_N2_IP=172.16.2.254
+H2_N2_IP6=2001:db8:16:2::2
+R2_N2_IP6=2001:db8:16:2::254
+
+VRF=red
+VRF_TABLE=1111
+
+################################################################################
+# helpers
+
+log_section()
+{
+ echo
+ echo "###########################################################################"
+ echo "$*"
+ echo "###########################################################################"
+ echo
+}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+log_debug()
+{
+ if [ "$VERBOSE" = "1" ]; then
+ echo "$*"
+ fi
+}
+
+run_cmd()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo "$out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+get_linklocal()
+{
+ local ns=$1
+ local dev=$2
+ local addr
+
+ addr=$(ip -netns $ns -6 -br addr show dev ${dev} | \
+ awk '{
+ for (i = 3; i <= NF; ++i) {
+ if ($i ~ /^fe80/)
+ print $i
+ }
+ }'
+ )
+ addr=${addr/\/*}
+
+ [ -z "$addr" ] && return 1
+
+ echo $addr
+
+ return 0
+}
+
+################################################################################
+# setup and teardown
+
+cleanup()
+{
+ local ns
+
+ for ns in h1 h2 r1 r2; do
+ ip netns del $ns 2>/dev/null
+ done
+}
+
+create_vrf()
+{
+ local ns=$1
+
+ ip -netns ${ns} link add ${VRF} type vrf table ${VRF_TABLE}
+ ip -netns ${ns} link set ${VRF} up
+ ip -netns ${ns} route add vrf ${VRF} unreachable default metric 8192
+ ip -netns ${ns} -6 route add vrf ${VRF} unreachable default metric 8192
+
+ ip -netns ${ns} addr add 127.0.0.1/8 dev ${VRF}
+ ip -netns ${ns} -6 addr add ::1 dev ${VRF} nodad
+
+ ip -netns ${ns} ru del pref 0
+ ip -netns ${ns} ru add pref 32765 from all lookup local
+ ip -netns ${ns} -6 ru del pref 0
+ ip -netns ${ns} -6 ru add pref 32765 from all lookup local
+}
+
+setup()
+{
+ local ns
+
+ #
+ # create nodes as namespaces
+ #
+ for ns in h1 h2 r1 r2; do
+ ip netns add $ns
+ ip -netns $ns li set lo up
+
+ case "${ns}" in
+ h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ ;;
+ r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
+
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
+ esac
+ done
+
+ #
+ # create interconnects
+ #
+ ip -netns h1 li add eth0 type veth peer name r1h1
+ ip -netns h1 li set r1h1 netns r1 name eth0 up
+
+ ip -netns h1 li add eth1 type veth peer name r2h1
+ ip -netns h1 li set r2h1 netns r2 name eth0 up
+
+ ip -netns h2 li add eth0 type veth peer name r2h2
+ ip -netns h2 li set eth0 up
+ ip -netns h2 li set r2h2 netns r2 name eth2 up
+
+ ip -netns r1 li add eth1 type veth peer name r2r1
+ ip -netns r1 li set eth1 up
+ ip -netns r1 li set r2r1 netns r2 name eth1 up
+
+ #
+ # h1
+ #
+ if [ "${WITH_VRF}" = "yes" ]; then
+ create_vrf "h1"
+ H1_VRF_ARG="vrf ${VRF}"
+ H1_PING_ARG="-I ${VRF}"
+ else
+ H1_VRF_ARG=
+ H1_PING_ARG=
+ fi
+ ip -netns h1 li add br0 type bridge
+ if [ "${WITH_VRF}" = "yes" ]; then
+ ip -netns h1 li set br0 vrf ${VRF} up
+ else
+ ip -netns h1 li set br0 up
+ fi
+ ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
+ ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+ ip -netns h1 li set eth0 master br0 up
+ ip -netns h1 li set eth1 master br0 up
+
+ #
+ # h2
+ #
+ ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
+ ip -netns h2 ro add default via ${R2_N2_IP} dev eth0
+ ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+ ip -netns h2 -6 ro add default via ${R2_N2_IP6} dev eth0
+
+ #
+ # r1
+ #
+ ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns r1 addr add dev eth1 ${R1_R2_N1_IP}/30
+ ip -netns r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad
+
+ #
+ # r2
+ #
+ ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
+ ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+ ip -netns r2 addr add dev eth1 ${R2_R1_N1_IP}/30
+ ip -netns r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad
+ ip -netns r2 addr add dev eth2 ${R2_N2_IP}/24
+ ip -netns r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad
+
+ sleep 2
+
+ R1_LLADDR=$(get_linklocal r1 eth0)
+ if [ $? -ne 0 ]; then
+ echo "Error: Failed to get link-local address of r1's eth0"
+ exit 1
+ fi
+ log_debug "initial gateway is R1's lladdr = ${R1_LLADDR}"
+
+ R2_LLADDR=$(get_linklocal r2 eth0)
+ if [ $? -ne 0 ]; then
+ echo "Error: Failed to get link-local address of r2's eth0"
+ exit 1
+ fi
+ log_debug "initial gateway is R2's lladdr = ${R2_LLADDR}"
+}
+
+change_h2_mtu()
+{
+ local mtu=$1
+
+ run_cmd ip -netns h2 li set eth0 mtu ${mtu}
+ run_cmd ip -netns r2 li set eth2 mtu ${mtu}
+}
+
+check_exception()
+{
+ local mtu="$1"
+ local with_redirect="$2"
+ local desc="$3"
+
+ # From 172.16.1.101: icmp_seq=1 Redirect Host(New nexthop: 172.16.1.102)
+ if [ "$VERBOSE" = "1" ]; then
+ echo "Commands to check for exception:"
+ run_cmd ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP}
+ run_cmd ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6}
+ fi
+
+ if [ -n "${mtu}" ]; then
+ mtu=" mtu ${mtu}"
+ fi
+ if [ "$with_redirect" = "yes" ]; then
+ ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+ grep -q "cache <redirected> expires [0-9]*sec${mtu}"
+ elif [ -n "${mtu}" ]; then
+ ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+ grep -q "cache expires [0-9]*sec${mtu}"
+ else
+ # want to verify that neither mtu nor redirected appears in
+ # the route get output. The -v will wipe out the cache line
+ # if either are set so the last grep -q will not find a match
+ ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+ grep -E -v 'mtu|redirected' | grep -q "cache"
+ fi
+ log_test $? 0 "IPv4: ${desc}"
+
+ if [ "$with_redirect" = "yes" ]; then
+ ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+ grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}"
+ elif [ -n "${mtu}" ]; then
+ ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+ grep -q "${mtu}"
+ else
+ # IPv6 is a bit harder. First strip out the match if it
+ # contains an mtu exception and then look for the first
+ # gateway - R1's lladdr
+ ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+ grep -v "mtu" | grep -q "${R1_LLADDR}"
+ fi
+ log_test $? 0 "IPv6: ${desc}"
+}
+
+run_ping()
+{
+ local sz=$1
+
+ run_cmd ip netns exec h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP}
+ run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
+}
+
+replace_route_legacy()
+{
+ # r1 to h2 via r2 and eth0
+ run_cmd ip -netns r1 ro replace ${H2_N2} via ${R2_N1_IP} dev eth0
+ run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0
+}
+
+reset_route_legacy()
+{
+ run_cmd ip -netns r1 ro del ${H2_N2}
+ run_cmd ip -netns r1 -6 ro del ${H2_N2_6}
+
+ run_cmd ip -netns h1 ro del ${H1_VRF_ARG} ${H2_N2}
+ run_cmd ip -netns h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6}
+
+ initial_route_legacy
+}
+
+initial_route_legacy()
+{
+ # r1 to h2 via r2 and eth1
+ run_cmd ip -netns r1 ro add ${H2_N2} via ${R2_R1_N1_IP} dev eth1
+ run_cmd ip -netns r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1
+
+ # h1 to h2 via r1
+ # - IPv6 redirect only works if gateway is the LLA
+ run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0
+ run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0
+}
+
+check_connectivity()
+{
+ local rc
+
+ run_cmd ip netns exec h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP}
+ rc=$?
+ run_cmd ip netns exec h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6}
+ [ $? -ne 0 ] && rc=$?
+
+ return $rc
+}
+
+do_test()
+{
+ local ttype="$1"
+
+ eval initial_route_${ttype}
+
+ # verify connectivity
+ check_connectivity
+ if [ $? -ne 0 ]; then
+ echo "Error: Basic connectivity is broken"
+ ret=1
+ return
+ fi
+
+ # redirect exception followed by mtu
+ eval replace_route_${ttype}
+ run_ping 64
+ check_exception "" "yes" "redirect exception"
+
+ check_connectivity
+ if [ $? -ne 0 ]; then
+ echo "Error: Basic connectivity is broken after redirect"
+ ret=1
+ return
+ fi
+
+ change_h2_mtu 1300
+ run_ping 1350
+ check_exception "1300" "yes" "redirect exception plus mtu"
+
+ # remove exceptions and restore routing
+ change_h2_mtu 1500
+ eval reset_route_${ttype}
+
+ check_connectivity
+ if [ $? -ne 0 ]; then
+ echo "Error: Basic connectivity is broken after reset"
+ ret=1
+ return
+ fi
+ check_exception "" "no" "routing reset"
+
+ # MTU exception followed by redirect
+ change_h2_mtu 1300
+ run_ping 1350
+ check_exception "1300" "no" "mtu exception"
+
+ eval replace_route_${ttype}
+ run_ping 64
+ check_exception "1300" "yes" "mtu exception plus redirect"
+
+ check_connectivity
+ if [ $? -ne 0 ]; then
+ echo "Error: Basic connectivity is broken after redirect"
+ ret=1
+ return
+ fi
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -p Pause on fail
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+# Some systems don't have a ping6 binary anymore
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+ret=0
+nsuccess=0
+nfail=0
+
+while getopts :pv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ *) usage; exit 1;;
+ esac
+done
+
+trap cleanup EXIT
+
+cleanup
+WITH_VRF=no
+setup
+
+log_section "Legacy routing"
+do_test "legacy"
+
+cleanup
+log_section "Legacy routing with VRF"
+WITH_VRF=yes
+setup
+do_test "legacy"
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.c b/tools/testing/selftests/net/ipv6_flowlabel.c
new file mode 100644
index 000000000000..a7c41375374f
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_flowlabel.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test IPV6_FLOWINFO cmsg on send and recv */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <error.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/in6.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+/* uapi/glibc weirdness may leave this undefined */
+#ifndef IPV6_FLOWINFO
+#define IPV6_FLOWINFO 11
+#endif
+
+#ifndef IPV6_FLOWLABEL_MGR
+#define IPV6_FLOWLABEL_MGR 32
+#endif
+
+#define FLOWLABEL_WILDCARD ((uint32_t) -1)
+
+static const char cfg_data[] = "a";
+static uint32_t cfg_label = 1;
+
+static void do_send(int fd, bool with_flowlabel, uint32_t flowlabel)
+{
+ char control[CMSG_SPACE(sizeof(flowlabel))] = {0};
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ int ret;
+
+ iov.iov_base = (char *)cfg_data;
+ iov.iov_len = sizeof(cfg_data);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ if (with_flowlabel) {
+ struct cmsghdr *cm;
+
+ cm = (void *)control;
+ cm->cmsg_len = CMSG_LEN(sizeof(flowlabel));
+ cm->cmsg_level = SOL_IPV6;
+ cm->cmsg_type = IPV6_FLOWINFO;
+ *(uint32_t *)CMSG_DATA(cm) = htonl(flowlabel);
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+ }
+
+ ret = sendmsg(fd, &msg, 0);
+ if (ret == -1)
+ error(1, errno, "send");
+
+ if (with_flowlabel)
+ fprintf(stderr, "sent with label %u\n", flowlabel);
+ else
+ fprintf(stderr, "sent without label\n");
+}
+
+static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
+{
+ char control[CMSG_SPACE(sizeof(expect))];
+ char data[sizeof(cfg_data)];
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ struct cmsghdr *cm;
+ uint32_t flowlabel;
+ int ret;
+
+ iov.iov_base = data;
+ iov.iov_len = sizeof(data);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ memset(control, 0, sizeof(control));
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ ret = recvmsg(fd, &msg, 0);
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
+ error(1, 0, "recv: truncated");
+ if (ret != sizeof(cfg_data))
+ error(1, 0, "recv: length mismatch");
+ if (memcmp(data, cfg_data, sizeof(data)))
+ error(1, 0, "recv: data mismatch");
+
+ cm = CMSG_FIRSTHDR(&msg);
+ if (with_flowlabel) {
+ if (!cm)
+ error(1, 0, "recv: missing cmsg");
+ if (CMSG_NXTHDR(&msg, cm))
+ error(1, 0, "recv: too many cmsg");
+ if (cm->cmsg_level != SOL_IPV6 ||
+ cm->cmsg_type != IPV6_FLOWINFO)
+ error(1, 0, "recv: unexpected cmsg level or type");
+
+ flowlabel = ntohl(*(uint32_t *)CMSG_DATA(cm));
+ fprintf(stderr, "recv with label %u\n", flowlabel);
+
+ if (expect != FLOWLABEL_WILDCARD && expect != flowlabel)
+ fprintf(stderr, "recv: incorrect flowlabel %u != %u\n",
+ flowlabel, expect);
+
+ } else {
+ fprintf(stderr, "recv without label\n");
+ }
+}
+
+static bool get_autoflowlabel_enabled(void)
+{
+ int fd, ret;
+ char val;
+
+ fd = open("/proc/sys/net/ipv6/auto_flowlabels", O_RDONLY);
+ if (fd == -1)
+ error(1, errno, "open sysctl");
+
+ ret = read(fd, &val, 1);
+ if (ret == -1)
+ error(1, errno, "read sysctl");
+ if (ret == 0)
+ error(1, 0, "read sysctl: 0");
+
+ if (close(fd))
+ error(1, errno, "close sysctl");
+
+ return val == '1';
+}
+
+static void flowlabel_get(int fd, uint32_t label, uint8_t share, uint16_t flags)
+{
+ struct in6_flowlabel_req req = {
+ .flr_action = IPV6_FL_A_GET,
+ .flr_label = htonl(label),
+ .flr_flags = flags,
+ .flr_share = share,
+ };
+
+ /* do not pass IPV6_ADDR_ANY or IPV6_ADDR_MAPPED */
+ req.flr_dst.s6_addr[0] = 0xfd;
+ req.flr_dst.s6_addr[15] = 0x1;
+
+ if (setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req)))
+ error(1, errno, "setsockopt flowlabel get");
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "l:")) != -1) {
+ switch (c) {
+ case 'l':
+ cfg_label = strtoul(optarg, NULL, 0);
+ break;
+ default:
+ error(1, 0, "%s: parse error", argv[0]);
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ struct sockaddr_in6 addr = {
+ .sin6_family = AF_INET6,
+ .sin6_port = htons(8000),
+ .sin6_addr = IN6ADDR_LOOPBACK_INIT,
+ };
+ const int one = 1;
+ int fdt, fdr;
+
+ parse_opts(argc, argv);
+
+ fdt = socket(PF_INET6, SOCK_DGRAM, 0);
+ if (fdt == -1)
+ error(1, errno, "socket t");
+
+ fdr = socket(PF_INET6, SOCK_DGRAM, 0);
+ if (fdr == -1)
+ error(1, errno, "socket r");
+
+ if (connect(fdt, (void *)&addr, sizeof(addr)))
+ error(1, errno, "connect");
+ if (bind(fdr, (void *)&addr, sizeof(addr)))
+ error(1, errno, "bind");
+
+ flowlabel_get(fdt, cfg_label, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE);
+
+ if (setsockopt(fdr, SOL_IPV6, IPV6_FLOWINFO, &one, sizeof(one)))
+ error(1, errno, "setsockopt flowinfo");
+
+ if (get_autoflowlabel_enabled()) {
+ fprintf(stderr, "send no label: recv auto flowlabel\n");
+ do_send(fdt, false, 0);
+ do_recv(fdr, true, FLOWLABEL_WILDCARD);
+ } else {
+ fprintf(stderr, "send no label: recv no label (auto off)\n");
+ do_send(fdt, false, 0);
+ do_recv(fdr, false, 0);
+ }
+
+ fprintf(stderr, "send label\n");
+ do_send(fdt, true, cfg_label);
+ do_recv(fdr, true, cfg_label);
+
+ if (close(fdr))
+ error(1, errno, "close r");
+ if (close(fdt))
+ error(1, errno, "close t");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh
new file mode 100755
index 000000000000..d3bc6442704e
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_flowlabel.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Regression tests for IPv6 flowlabels
+#
+# run in separate namespaces to avoid mgmt db conflicts betweent tests
+
+set -e
+
+echo "TEST management"
+./in_netns.sh ./ipv6_flowlabel_mgr
+
+echo "TEST datapath"
+./in_netns.sh \
+ sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=0 && ./ipv6_flowlabel -l 1'
+
+echo "TEST datapath (with auto-flowlabels)"
+./in_netns.sh \
+ sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=1 && ./ipv6_flowlabel -l 1'
+
+echo OK. All tests passed
diff --git a/tools/testing/selftests/net/ipv6_flowlabel_mgr.c b/tools/testing/selftests/net/ipv6_flowlabel_mgr.c
new file mode 100644
index 000000000000..af95b48acea9
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_flowlabel_mgr.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test IPV6_FLOWINFO_MGR */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/in6.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+/* uapi/glibc weirdness may leave this undefined */
+#ifndef IPV6_FLOWLABEL_MGR
+#define IPV6_FLOWLABEL_MGR 32
+#endif
+
+/* from net/ipv6/ip6_flowlabel.c */
+#define FL_MIN_LINGER 6
+
+#define explain(x) \
+ do { if (cfg_verbose) fprintf(stderr, " " x "\n"); } while (0)
+
+#define __expect(x) \
+ do { \
+ if (!(x)) \
+ fprintf(stderr, "[OK] " #x "\n"); \
+ else \
+ error(1, 0, "[ERR] " #x " (line %d)", __LINE__); \
+ } while (0)
+
+#define expect_pass(x) __expect(x)
+#define expect_fail(x) __expect(!(x))
+
+static bool cfg_long_running;
+static bool cfg_verbose;
+
+static int flowlabel_get(int fd, uint32_t label, uint8_t share, uint16_t flags)
+{
+ struct in6_flowlabel_req req = {
+ .flr_action = IPV6_FL_A_GET,
+ .flr_label = htonl(label),
+ .flr_flags = flags,
+ .flr_share = share,
+ };
+
+ /* do not pass IPV6_ADDR_ANY or IPV6_ADDR_MAPPED */
+ req.flr_dst.s6_addr[0] = 0xfd;
+ req.flr_dst.s6_addr[15] = 0x1;
+
+ return setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req));
+}
+
+static int flowlabel_put(int fd, uint32_t label)
+{
+ struct in6_flowlabel_req req = {
+ .flr_action = IPV6_FL_A_PUT,
+ .flr_label = htonl(label),
+ };
+
+ return setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR, &req, sizeof(req));
+}
+
+static void run_tests(int fd)
+{
+ int wstatus;
+ pid_t pid;
+
+ explain("cannot get non-existent label");
+ expect_fail(flowlabel_get(fd, 1, IPV6_FL_S_ANY, 0));
+
+ explain("cannot put non-existent label");
+ expect_fail(flowlabel_put(fd, 1));
+
+ explain("cannot create label greater than 20 bits");
+ expect_fail(flowlabel_get(fd, 0x1FFFFF, IPV6_FL_S_ANY,
+ IPV6_FL_F_CREATE));
+
+ explain("create a new label (FL_F_CREATE)");
+ expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, IPV6_FL_F_CREATE));
+ explain("can get the label (without FL_F_CREATE)");
+ expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, 0));
+ explain("can get it again with create flag set, too");
+ expect_pass(flowlabel_get(fd, 1, IPV6_FL_S_ANY, IPV6_FL_F_CREATE));
+ explain("cannot get it again with the exclusive (FL_FL_EXCL) flag");
+ expect_fail(flowlabel_get(fd, 1, IPV6_FL_S_ANY,
+ IPV6_FL_F_CREATE | IPV6_FL_F_EXCL));
+ explain("can now put exactly three references");
+ expect_pass(flowlabel_put(fd, 1));
+ expect_pass(flowlabel_put(fd, 1));
+ expect_pass(flowlabel_put(fd, 1));
+ expect_fail(flowlabel_put(fd, 1));
+
+ explain("create a new exclusive label (FL_S_EXCL)");
+ expect_pass(flowlabel_get(fd, 2, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE));
+ explain("cannot get it again in non-exclusive mode");
+ expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_ANY, IPV6_FL_F_CREATE));
+ explain("cannot get it again in exclusive mode either");
+ expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE));
+ expect_pass(flowlabel_put(fd, 2));
+
+ if (cfg_long_running) {
+ explain("cannot reuse the label, due to linger");
+ expect_fail(flowlabel_get(fd, 2, IPV6_FL_S_ANY,
+ IPV6_FL_F_CREATE));
+ explain("after sleep, can reuse");
+ sleep(FL_MIN_LINGER * 2 + 1);
+ expect_pass(flowlabel_get(fd, 2, IPV6_FL_S_ANY,
+ IPV6_FL_F_CREATE));
+ }
+
+ explain("create a new user-private label (FL_S_USER)");
+ expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, IPV6_FL_F_CREATE));
+ explain("cannot get it again in non-exclusive mode");
+ expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_ANY, 0));
+ explain("cannot get it again in exclusive mode");
+ expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_EXCL, 0));
+ explain("can get it again in user mode");
+ expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0));
+ explain("child process can get it too, but not after setuid(nobody)");
+ pid = fork();
+ if (pid == -1)
+ error(1, errno, "fork");
+ if (!pid) {
+ expect_pass(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0));
+ if (setuid(USHRT_MAX))
+ fprintf(stderr, "[INFO] skip setuid child test\n");
+ else
+ expect_fail(flowlabel_get(fd, 3, IPV6_FL_S_USER, 0));
+ exit(0);
+ }
+ if (wait(&wstatus) == -1)
+ error(1, errno, "wait");
+ if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0)
+ error(1, errno, "wait: unexpected child result");
+
+ explain("create a new process-private label (FL_S_PROCESS)");
+ expect_pass(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, IPV6_FL_F_CREATE));
+ explain("can get it again");
+ expect_pass(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, 0));
+ explain("child process cannot can get it");
+ pid = fork();
+ if (pid == -1)
+ error(1, errno, "fork");
+ if (!pid) {
+ expect_fail(flowlabel_get(fd, 4, IPV6_FL_S_PROCESS, 0));
+ exit(0);
+ }
+ if (wait(&wstatus) == -1)
+ error(1, errno, "wait");
+ if (!WIFEXITED(wstatus) || WEXITSTATUS(wstatus) != 0)
+ error(1, errno, "wait: unexpected child result");
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "lv")) != -1) {
+ switch (c) {
+ case 'l':
+ cfg_long_running = true;
+ break;
+ case 'v':
+ cfg_verbose = true;
+ break;
+ default:
+ error(1, 0, "%s: parse error", argv[0]);
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int fd;
+
+ parse_opts(argc, argv);
+
+ fd = socket(PF_INET6, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket");
+
+ run_tests(fd);
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 317dafcd605d..4a1275990d7e 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -152,10 +152,10 @@ tests="
cleanup_ipv4_exception ipv4: cleanup of cached exceptions
cleanup_ipv6_exception ipv6: cleanup of cached exceptions"
-NS_A="ns-$(mktemp -u XXXXXX)"
-NS_B="ns-$(mktemp -u XXXXXX)"
-NS_R1="ns-$(mktemp -u XXXXXX)"
-NS_R2="ns-$(mktemp -u XXXXXX)"
+NS_A="ns-A"
+NS_B="ns-B"
+NS_R1="ns-R1"
+NS_R2="ns-R2"
ns_a="ip netns exec ${NS_A}"
ns_b="ip netns exec ${NS_B}"
ns_r1="ip netns exec ${NS_R1}"
@@ -212,7 +212,6 @@ dummy6_0_prefix="fc00:1000::"
dummy6_1_prefix="fc00:1001::"
dummy6_mask="64"
-cleanup_done=1
err_buf=
tcpdump_pids=
@@ -495,7 +494,7 @@ setup_routing() {
setup() {
[ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
- cleanup_done=0
+ cleanup
for arg do
eval setup_${arg} || { echo " ${arg} not supported"; return 1; }
done
@@ -519,11 +518,9 @@ cleanup() {
done
tcpdump_pids=
- [ ${cleanup_done} -eq 1 ] && return
for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
ip netns del ${n} 2> /dev/null
done
- cleanup_done=1
}
mtu() {
@@ -1136,6 +1133,9 @@ done
trap cleanup EXIT
+# start clean
+cleanup
+
for t in ${tests}; do
[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
@@ -1156,7 +1156,6 @@ for t in ${tests}; do
eval test_${name}
ret=$?
- cleanup
if [ $ret -eq 0 ]; then
printf "TEST: %-60s [ OK ]\n" "${t}"
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
new file mode 100644
index 000000000000..53f598f06647
--- /dev/null
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -0,0 +1,296 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test the SO_TXTIME API
+ *
+ * Takes two streams of { payload, delivery time }[], one input and one output.
+ * Sends the input stream and verifies arrival matches the output stream.
+ * The two streams can differ due to out-of-order delivery and drops.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <linux/net_tstamp.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+static int cfg_clockid = CLOCK_TAI;
+static bool cfg_do_ipv4;
+static bool cfg_do_ipv6;
+static uint16_t cfg_port = 8000;
+static int cfg_variance_us = 2000;
+
+static uint64_t glob_tstart;
+
+/* encode one timed transmission (of a 1B payload) */
+struct timed_send {
+ char data;
+ int64_t delay_us;
+};
+
+#define MAX_NUM_PKT 8
+static struct timed_send cfg_in[MAX_NUM_PKT];
+static struct timed_send cfg_out[MAX_NUM_PKT];
+static int cfg_num_pkt;
+
+static uint64_t gettime_ns(void)
+{
+ struct timespec ts;
+
+ if (clock_gettime(cfg_clockid, &ts))
+ error(1, errno, "gettime");
+
+ return ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec;
+}
+
+static void do_send_one(int fdt, struct timed_send *ts)
+{
+ char control[CMSG_SPACE(sizeof(uint64_t))];
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ struct cmsghdr *cm;
+ uint64_t tdeliver;
+ int ret;
+
+ iov.iov_base = &ts->data;
+ iov.iov_len = 1;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ if (ts->delay_us >= 0) {
+ memset(control, 0, sizeof(control));
+ msg.msg_control = &control;
+ msg.msg_controllen = sizeof(control);
+
+ tdeliver = glob_tstart + ts->delay_us * 1000;
+
+ cm = CMSG_FIRSTHDR(&msg);
+ cm->cmsg_level = SOL_SOCKET;
+ cm->cmsg_type = SCM_TXTIME;
+ cm->cmsg_len = CMSG_LEN(sizeof(tdeliver));
+ memcpy(CMSG_DATA(cm), &tdeliver, sizeof(tdeliver));
+ }
+
+ ret = sendmsg(fdt, &msg, 0);
+ if (ret == -1)
+ error(1, errno, "write");
+ if (ret == 0)
+ error(1, 0, "write: 0B");
+
+}
+
+static void do_recv_one(int fdr, struct timed_send *ts)
+{
+ int64_t tstop, texpect;
+ char rbuf[2];
+ int ret;
+
+ ret = recv(fdr, rbuf, sizeof(rbuf), 0);
+ if (ret == -1)
+ error(1, errno, "read");
+ if (ret != 1)
+ error(1, 0, "read: %dB", ret);
+
+ tstop = (gettime_ns() - glob_tstart) / 1000;
+ texpect = ts->delay_us >= 0 ? ts->delay_us : 0;
+
+ fprintf(stderr, "payload:%c delay:%ld expected:%ld (us)\n",
+ rbuf[0], tstop, texpect);
+
+ if (rbuf[0] != ts->data)
+ error(1, 0, "payload mismatch. expected %c", ts->data);
+
+ if (labs(tstop - texpect) > cfg_variance_us)
+ error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
+}
+
+static void do_recv_verify_empty(int fdr)
+{
+ char rbuf[1];
+ int ret;
+
+ ret = recv(fdr, rbuf, sizeof(rbuf), 0);
+ if (ret != -1 || errno != EAGAIN)
+ error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno);
+}
+
+static void setsockopt_txtime(int fd)
+{
+ struct sock_txtime so_txtime_val = { .clockid = cfg_clockid };
+ struct sock_txtime so_txtime_val_read = { 0 };
+ socklen_t vallen = sizeof(so_txtime_val);
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
+ &so_txtime_val, sizeof(so_txtime_val)))
+ error(1, errno, "setsockopt txtime");
+
+ if (getsockopt(fd, SOL_SOCKET, SO_TXTIME,
+ &so_txtime_val_read, &vallen))
+ error(1, errno, "getsockopt txtime");
+
+ if (vallen != sizeof(so_txtime_val) ||
+ memcmp(&so_txtime_val, &so_txtime_val_read, vallen))
+ error(1, 0, "getsockopt txtime: mismatch");
+}
+
+static int setup_tx(struct sockaddr *addr, socklen_t alen)
+{
+ int fd;
+
+ fd = socket(addr->sa_family, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket t");
+
+ if (connect(fd, addr, alen))
+ error(1, errno, "connect");
+
+ setsockopt_txtime(fd);
+
+ return fd;
+}
+
+static int setup_rx(struct sockaddr *addr, socklen_t alen)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ int fd;
+
+ fd = socket(addr->sa_family, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket r");
+
+ if (bind(fd, addr, alen))
+ error(1, errno, "bind");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcv timeout");
+
+ return fd;
+}
+
+static void do_test(struct sockaddr *addr, socklen_t alen)
+{
+ int fdt, fdr, i;
+
+ fprintf(stderr, "\nSO_TXTIME ipv%c clock %s\n",
+ addr->sa_family == PF_INET ? '4' : '6',
+ cfg_clockid == CLOCK_TAI ? "tai" : "monotonic");
+
+ fdt = setup_tx(addr, alen);
+ fdr = setup_rx(addr, alen);
+
+ glob_tstart = gettime_ns();
+
+ for (i = 0; i < cfg_num_pkt; i++)
+ do_send_one(fdt, &cfg_in[i]);
+ for (i = 0; i < cfg_num_pkt; i++)
+ do_recv_one(fdr, &cfg_out[i]);
+
+ do_recv_verify_empty(fdr);
+
+ if (close(fdr))
+ error(1, errno, "close r");
+ if (close(fdt))
+ error(1, errno, "close t");
+}
+
+static int parse_io(const char *optarg, struct timed_send *array)
+{
+ char *arg, *tok;
+ int aoff = 0;
+
+ arg = strdup(optarg);
+ if (!arg)
+ error(1, errno, "strdup");
+
+ while ((tok = strtok(arg, ","))) {
+ arg = NULL; /* only pass non-zero on first call */
+
+ if (aoff / 2 == MAX_NUM_PKT)
+ error(1, 0, "exceeds max pkt count (%d)", MAX_NUM_PKT);
+
+ if (aoff & 1) { /* parse delay */
+ array->delay_us = strtol(tok, NULL, 0) * 1000;
+ array++;
+ } else { /* parse character */
+ array->data = tok[0];
+ }
+
+ aoff++;
+ }
+
+ free(arg);
+
+ return aoff / 2;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c, ilen, olen;
+
+ while ((c = getopt(argc, argv, "46c:")) != -1) {
+ switch (c) {
+ case '4':
+ cfg_do_ipv4 = true;
+ break;
+ case '6':
+ cfg_do_ipv6 = true;
+ break;
+ case 'c':
+ if (!strcmp(optarg, "tai"))
+ cfg_clockid = CLOCK_TAI;
+ else if (!strcmp(optarg, "monotonic") ||
+ !strcmp(optarg, "mono"))
+ cfg_clockid = CLOCK_MONOTONIC;
+ else
+ error(1, 0, "unknown clock id %s", optarg);
+ break;
+ default:
+ error(1, 0, "parse error at %d", optind);
+ }
+ }
+
+ if (argc - optind != 2)
+ error(1, 0, "Usage: %s [-46] -c <clock> <in> <out>", argv[0]);
+
+ ilen = parse_io(argv[optind], cfg_in);
+ olen = parse_io(argv[optind + 1], cfg_out);
+ if (ilen != olen)
+ error(1, 0, "i/o streams len mismatch (%d, %d)\n", ilen, olen);
+ cfg_num_pkt = ilen;
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ if (cfg_do_ipv6) {
+ struct sockaddr_in6 addr6 = {0};
+
+ addr6.sin6_family = AF_INET6;
+ addr6.sin6_port = htons(cfg_port);
+ addr6.sin6_addr = in6addr_loopback;
+ do_test((void *)&addr6, sizeof(addr6));
+ }
+
+ if (cfg_do_ipv4) {
+ struct sockaddr_in addr4 = {0};
+
+ addr4.sin_family = AF_INET;
+ addr4.sin_port = htons(cfg_port);
+ addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ do_test((void *)&addr4, sizeof(addr4));
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh
new file mode 100755
index 000000000000..5aa519328a5b
--- /dev/null
+++ b/tools/testing/selftests/net/so_txtime.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Regression tests for the SO_TXTIME interface
+
+# Run in network namespace
+if [[ $# -eq 0 ]]; then
+ ./in_netns.sh $0 __subprocess
+ exit $?
+fi
+
+set -e
+
+tc qdisc add dev lo root fq
+./so_txtime -4 -6 -c mono a,-1 a,-1
+./so_txtime -4 -6 -c mono a,0 a,0
+./so_txtime -4 -6 -c mono a,10 a,10
+./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20
+./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20
+
+if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 200000; then
+ ! ./so_txtime -4 -6 -c tai a,-1 a,-1
+ ! ./so_txtime -4 -6 -c tai a,0 a,0
+ ./so_txtime -4 -6 -c tai a,10 a,10
+ ./so_txtime -4 -6 -c tai a,10,b,20 a,10,b,20
+ ./so_txtime -4 -6 -c tai a,20,b,10 b,10,a,20
+else
+ echo "tc ($(tc -V)) does not support qdisc etf. skipping"
+fi
+
+echo OK. All tests passed
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
new file mode 100644
index 000000000000..58bb77d9e7e1
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Test key rotation for TFO.
+ * New keys are 'rotated' in two steps:
+ * 1) Add new key as the 'backup' key 'behind' the primary key
+ * 2) Make new key the primary by swapping the backup and primary keys
+ *
+ * The rotation is done in stages using multiple sockets bound
+ * to the same port via SO_REUSEPORT. This simulates key rotation
+ * behind say a load balancer. We verify that across the rotation
+ * there are no cases in which a cookie is not accepted by verifying
+ * that TcpExtTCPFastOpenPassiveFail remains 0.
+ */
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+#include <netinet/tcp.h>
+#include <fcntl.h>
+#include <time.h>
+
+#ifndef TCP_FASTOPEN_KEY
+#define TCP_FASTOPEN_KEY 33
+#endif
+
+#define N_LISTEN 10
+#define PROC_FASTOPEN_KEY "/proc/sys/net/ipv4/tcp_fastopen_key"
+#define KEY_LENGTH 16
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+static bool do_ipv6;
+static bool do_sockopt;
+static bool do_rotate;
+static int key_len = KEY_LENGTH;
+static int rcv_fds[N_LISTEN];
+static int proc_fd;
+static const char *IP4_ADDR = "127.0.0.1";
+static const char *IP6_ADDR = "::1";
+static const int PORT = 8891;
+
+static void get_keys(int fd, uint32_t *keys)
+{
+ char buf[128];
+ int len = KEY_LENGTH * 2;
+
+ if (do_sockopt) {
+ if (getsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys, &len))
+ error(1, errno, "Unable to get key");
+ return;
+ }
+ lseek(proc_fd, 0, SEEK_SET);
+ if (read(proc_fd, buf, sizeof(buf)) <= 0)
+ error(1, errno, "Unable to read %s", PROC_FASTOPEN_KEY);
+ if (sscanf(buf, "%x-%x-%x-%x,%x-%x-%x-%x", keys, keys + 1, keys + 2,
+ keys + 3, keys + 4, keys + 5, keys + 6, keys + 7) != 8)
+ error(1, 0, "Unable to parse %s", PROC_FASTOPEN_KEY);
+}
+
+static void set_keys(int fd, uint32_t *keys)
+{
+ char buf[128];
+
+ if (do_sockopt) {
+ if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys,
+ key_len))
+ error(1, errno, "Unable to set key");
+ return;
+ }
+ if (do_rotate)
+ snprintf(buf, 128, "%08x-%08x-%08x-%08x,%08x-%08x-%08x-%08x",
+ keys[0], keys[1], keys[2], keys[3], keys[4], keys[5],
+ keys[6], keys[7]);
+ else
+ snprintf(buf, 128, "%08x-%08x-%08x-%08x",
+ keys[0], keys[1], keys[2], keys[3]);
+ lseek(proc_fd, 0, SEEK_SET);
+ if (write(proc_fd, buf, sizeof(buf)) <= 0)
+ error(1, errno, "Unable to write %s", PROC_FASTOPEN_KEY);
+}
+
+static void build_rcv_fd(int family, int proto, int *rcv_fds)
+{
+ struct sockaddr_in addr4 = {0};
+ struct sockaddr_in6 addr6 = {0};
+ struct sockaddr *addr;
+ int opt = 1, i, sz;
+ int qlen = 100;
+ uint32_t keys[8];
+
+ switch (family) {
+ case AF_INET:
+ addr4.sin_family = family;
+ addr4.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4.sin_port = htons(PORT);
+ sz = sizeof(addr4);
+ addr = (struct sockaddr *)&addr4;
+ break;
+ case AF_INET6:
+ addr6.sin6_family = AF_INET6;
+ addr6.sin6_addr = in6addr_any;
+ addr6.sin6_port = htons(PORT);
+ sz = sizeof(addr6);
+ addr = (struct sockaddr *)&addr6;
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ /* clang does not recognize error() above as terminating
+ * the program, so it complains that saddr, sz are
+ * not initialized when this code path is taken. Silence it.
+ */
+ return;
+ }
+ for (i = 0; i < ARRAY_SIZE(keys); i++)
+ keys[i] = rand();
+ for (i = 0; i < N_LISTEN; i++) {
+ rcv_fds[i] = socket(family, proto, 0);
+ if (rcv_fds[i] < 0)
+ error(1, errno, "failed to create receive socket");
+ if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT");
+ if (bind(rcv_fds[i], addr, sz))
+ error(1, errno, "failed to bind receive socket");
+ if (setsockopt(rcv_fds[i], SOL_TCP, TCP_FASTOPEN, &qlen,
+ sizeof(qlen)))
+ error(1, errno, "failed to set TCP_FASTOPEN");
+ set_keys(rcv_fds[i], keys);
+ if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
+ error(1, errno, "failed to listen on receive port");
+ }
+}
+
+static int connect_and_send(int family, int proto)
+{
+ struct sockaddr_in saddr4 = {0};
+ struct sockaddr_in daddr4 = {0};
+ struct sockaddr_in6 saddr6 = {0};
+ struct sockaddr_in6 daddr6 = {0};
+ struct sockaddr *saddr, *daddr;
+ int fd, sz, ret;
+ char data[1];
+
+ switch (family) {
+ case AF_INET:
+ saddr4.sin_family = AF_INET;
+ saddr4.sin_addr.s_addr = htonl(INADDR_ANY);
+ saddr4.sin_port = 0;
+
+ daddr4.sin_family = AF_INET;
+ if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr))
+ error(1, errno, "inet_pton failed: %s", IP4_ADDR);
+ daddr4.sin_port = htons(PORT);
+
+ sz = sizeof(saddr4);
+ saddr = (struct sockaddr *)&saddr4;
+ daddr = (struct sockaddr *)&daddr4;
+ break;
+ case AF_INET6:
+ saddr6.sin6_family = AF_INET6;
+ saddr6.sin6_addr = in6addr_any;
+
+ daddr6.sin6_family = AF_INET6;
+ if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr))
+ error(1, errno, "inet_pton failed: %s", IP6_ADDR);
+ daddr6.sin6_port = htons(PORT);
+
+ sz = sizeof(saddr6);
+ saddr = (struct sockaddr *)&saddr6;
+ daddr = (struct sockaddr *)&daddr6;
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ /* clang does not recognize error() above as terminating
+ * the program, so it complains that saddr, daddr, sz are
+ * not initialized when this code path is taken. Silence it.
+ */
+ return -1;
+ }
+ fd = socket(family, proto, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+ if (bind(fd, saddr, sz))
+ error(1, errno, "failed to bind send socket");
+ data[0] = 'a';
+ ret = sendto(fd, data, 1, MSG_FASTOPEN, daddr, sz);
+ if (ret != 1)
+ error(1, errno, "failed to sendto");
+
+ return fd;
+}
+
+static bool is_listen_fd(int fd)
+{
+ int i;
+
+ for (i = 0; i < N_LISTEN; i++) {
+ if (rcv_fds[i] == fd)
+ return true;
+ }
+ return false;
+}
+
+static int rotate_key(int fd)
+{
+ static int iter;
+ static uint32_t new_key[4];
+ uint32_t keys[8];
+ uint32_t tmp_key[4];
+ int i;
+ int len = KEY_LENGTH * 2;
+
+ if (iter < N_LISTEN) {
+ /* first set new key as backups */
+ if (iter == 0) {
+ for (i = 0; i < ARRAY_SIZE(new_key); i++)
+ new_key[i] = rand();
+ }
+ get_keys(fd, keys);
+ memcpy(keys + 4, new_key, KEY_LENGTH);
+ set_keys(fd, keys);
+ } else {
+ /* swap the keys */
+ get_keys(fd, keys);
+ memcpy(tmp_key, keys + 4, KEY_LENGTH);
+ memcpy(keys + 4, keys, KEY_LENGTH);
+ memcpy(keys, tmp_key, KEY_LENGTH);
+ set_keys(fd, keys);
+ }
+ if (++iter >= (N_LISTEN * 2))
+ iter = 0;
+}
+
+static void run_one_test(int family)
+{
+ struct epoll_event ev;
+ int i, send_fd;
+ int n_loops = 10000;
+ int rotate_key_fd = 0;
+ int key_rotate_interval = 50;
+ int fd, epfd;
+ char buf[1];
+
+ build_rcv_fd(family, SOCK_STREAM, rcv_fds);
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+ ev.events = EPOLLIN;
+ for (i = 0; i < N_LISTEN; i++) {
+ ev.data.fd = rcv_fds[i];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev))
+ error(1, errno, "failed to register sock epoll");
+ }
+ while (n_loops--) {
+ send_fd = connect_and_send(family, SOCK_STREAM);
+ if (do_rotate && ((n_loops % key_rotate_interval) == 0)) {
+ rotate_key(rcv_fds[rotate_key_fd]);
+ if (++rotate_key_fd >= N_LISTEN)
+ rotate_key_fd = 0;
+ }
+ while (1) {
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll_wait failed");
+ if (is_listen_fd(ev.data.fd)) {
+ fd = accept(ev.data.fd, NULL, NULL);
+ if (fd < 0)
+ error(1, errno, "failed to accept");
+ ev.data.fd = fd;
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev))
+ error(1, errno, "failed epoll add");
+ continue;
+ }
+ i = recv(ev.data.fd, buf, sizeof(buf), 0);
+ if (i != 1)
+ error(1, errno, "failed recv data");
+ if (epoll_ctl(epfd, EPOLL_CTL_DEL, ev.data.fd, NULL))
+ error(1, errno, "failed epoll del");
+ close(ev.data.fd);
+ break;
+ }
+ close(send_fd);
+ }
+ for (i = 0; i < N_LISTEN; i++)
+ close(rcv_fds[i]);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "46sr")) != -1) {
+ switch (c) {
+ case '4':
+ do_ipv6 = false;
+ break;
+ case '6':
+ do_ipv6 = true;
+ break;
+ case 's':
+ do_sockopt = true;
+ break;
+ case 'r':
+ do_rotate = true;
+ key_len = KEY_LENGTH * 2;
+ break;
+ default:
+ error(1, 0, "%s: parse error", argv[0]);
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+ proc_fd = open(PROC_FASTOPEN_KEY, O_RDWR);
+ if (proc_fd < 0)
+ error(1, errno, "Unable to open %s", PROC_FASTOPEN_KEY);
+ srand(time(NULL));
+ if (do_ipv6)
+ run_one_test(AF_INET6);
+ else
+ run_one_test(AF_INET);
+ close(proc_fd);
+ fprintf(stderr, "PASS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.sh b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh
new file mode 100755
index 000000000000..41476399e184
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# rotate TFO keys for ipv4/ipv6 and verify that the client does
+# not present an invalid cookie.
+
+set +x
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+ ip netns add "${NETNS}"
+ ip -netns "${NETNS}" link set lo up
+ ip netns exec "${NETNS}" sysctl -w net.ipv4.tcp_fastopen=3 \
+ >/dev/null 2>&1
+}
+
+cleanup() {
+ ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+do_test() {
+ # flush routes before each run, otherwise successive runs can
+ # initially present an old TFO cookie
+ ip netns exec "${NETNS}" ip tcp_metrics flush
+ ip netns exec "${NETNS}" ./tcp_fastopen_backup_key "$1"
+ val=$(ip netns exec "${NETNS}" nstat -az | \
+ grep TcpExtTCPFastOpenPassiveFail | awk '{print $2}')
+ if [ $val -ne 0 ]; then
+ echo "FAIL: TcpExtTCPFastOpenPassiveFail non-zero"
+ return 1
+ fi
+}
+
+do_test "-4"
+do_test "-6"
+do_test "-4"
+do_test "-6"
+do_test "-4s"
+do_test "-6s"
+do_test "-4s"
+do_test "-6s"
+do_test "-4r"
+do_test "-6r"
+do_test "-4r"
+do_test "-6r"
+do_test "-4sr"
+do_test "-6sr"
+do_test "-4sr"
+do_test "-6sr"
+echo "all tests done"
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 203302065458..b235efd55367 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -38,6 +38,7 @@ CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_VLAN=m
CONFIG_NET_ACT_BPF=m
CONFIG_NET_ACT_CONNMARK=m
+CONFIG_NET_ACT_CTINFO=m
CONFIG_NET_ACT_SKBMOD=m
CONFIG_NET_ACT_IFE=m
CONFIG_NET_ACT_TUNNEL_KEY=m